diff --git "a/all_data_checkpoint/trainer_state.json" "b/all_data_checkpoint/trainer_state.json" new file mode 100644--- /dev/null +++ "b/all_data_checkpoint/trainer_state.json" @@ -0,0 +1,126033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.17038839087096866, + "eval_steps": 1000, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 9.466021715053814e-06, + "grad_norm": 1294.9527587890625, + "learning_rate": 6.309148264984227e-10, + "loss": 172.0, + "step": 1 + }, + { + "epoch": 1.893204343010763e-05, + "grad_norm": 1150.3070068359375, + "learning_rate": 1.2618296529968454e-09, + "loss": 125.1875, + "step": 2 + }, + { + "epoch": 2.8398065145161443e-05, + "grad_norm": 2059.051513671875, + "learning_rate": 1.892744479495268e-09, + "loss": 132.1875, + "step": 3 + }, + { + "epoch": 3.786408686021526e-05, + "grad_norm": 3.2430968284606934, + "learning_rate": 2.523659305993691e-09, + "loss": 0.9551, + "step": 4 + }, + { + "epoch": 4.733010857526907e-05, + "grad_norm": 2595.162109375, + "learning_rate": 3.1545741324921134e-09, + "loss": 178.25, + "step": 5 + }, + { + "epoch": 5.6796130290322886e-05, + "grad_norm": 2045.851318359375, + "learning_rate": 3.785488958990536e-09, + "loss": 124.25, + "step": 6 + }, + { + "epoch": 6.626215200537671e-05, + "grad_norm": 1506.7730712890625, + "learning_rate": 4.4164037854889584e-09, + "loss": 196.5, + "step": 7 + }, + { + "epoch": 7.572817372043051e-05, + "grad_norm": 3.4059131145477295, + "learning_rate": 5.047318611987382e-09, + "loss": 0.8076, + "step": 8 + }, + { + "epoch": 8.519419543548434e-05, + "grad_norm": 2373.244140625, + "learning_rate": 5.678233438485804e-09, + "loss": 171.25, + "step": 9 + }, + { + "epoch": 9.466021715053814e-05, + "grad_norm": 2.5038576126098633, + "learning_rate": 6.309148264984227e-09, + "loss": 0.7388, + "step": 10 + }, + { + "epoch": 0.00010412623886559196, + "grad_norm": 6842.2763671875, + "learning_rate": 6.94006309148265e-09, + "loss": 133.3125, + "step": 11 + }, + { + "epoch": 0.00011359226058064577, + "grad_norm": 1099.3135986328125, + "learning_rate": 7.570977917981072e-09, + "loss": 126.0625, + "step": 12 + }, + { + "epoch": 0.0001230582822956996, + "grad_norm": 1913.849609375, + "learning_rate": 8.201892744479495e-09, + "loss": 176.5625, + "step": 13 + }, + { + "epoch": 0.00013252430401075341, + "grad_norm": 1012.0535278320312, + "learning_rate": 8.832807570977917e-09, + "loss": 126.9375, + "step": 14 + }, + { + "epoch": 0.0001419903257258072, + "grad_norm": 3769.14453125, + "learning_rate": 9.46372239747634e-09, + "loss": 127.5625, + "step": 15 + }, + { + "epoch": 0.00015145634744086103, + "grad_norm": 1569.410888671875, + "learning_rate": 1.0094637223974764e-08, + "loss": 129.6875, + "step": 16 + }, + { + "epoch": 0.00016092236915591485, + "grad_norm": 2181.215087890625, + "learning_rate": 1.0725552050473185e-08, + "loss": 175.0, + "step": 17 + }, + { + "epoch": 0.00017038839087096867, + "grad_norm": 1300.0615234375, + "learning_rate": 1.1356466876971609e-08, + "loss": 133.0, + "step": 18 + }, + { + "epoch": 0.00017985441258602247, + "grad_norm": 1421.8985595703125, + "learning_rate": 1.1987381703470032e-08, + "loss": 115.375, + "step": 19 + }, + { + "epoch": 0.00018932043430107629, + "grad_norm": 1994.2186279296875, + "learning_rate": 1.2618296529968454e-08, + "loss": 202.0, + "step": 20 + }, + { + "epoch": 0.0001987864560161301, + "grad_norm": 7314.115234375, + "learning_rate": 1.3249211356466877e-08, + "loss": 136.75, + "step": 21 + }, + { + "epoch": 0.00020825247773118393, + "grad_norm": 3190.08642578125, + "learning_rate": 1.38801261829653e-08, + "loss": 126.9375, + "step": 22 + }, + { + "epoch": 0.00021771849944623772, + "grad_norm": 1441.97119140625, + "learning_rate": 1.4511041009463722e-08, + "loss": 176.875, + "step": 23 + }, + { + "epoch": 0.00022718452116129154, + "grad_norm": 2326.063232421875, + "learning_rate": 1.5141955835962144e-08, + "loss": 153.5, + "step": 24 + }, + { + "epoch": 0.00023665054287634536, + "grad_norm": 3041.300048828125, + "learning_rate": 1.5772870662460567e-08, + "loss": 183.8125, + "step": 25 + }, + { + "epoch": 0.0002461165645913992, + "grad_norm": 1888.8056640625, + "learning_rate": 1.640378548895899e-08, + "loss": 138.5625, + "step": 26 + }, + { + "epoch": 0.000255582586306453, + "grad_norm": 1124.2860107421875, + "learning_rate": 1.7034700315457414e-08, + "loss": 130.75, + "step": 27 + }, + { + "epoch": 0.00026504860802150683, + "grad_norm": 4134.025390625, + "learning_rate": 1.7665615141955834e-08, + "loss": 158.4375, + "step": 28 + }, + { + "epoch": 0.0002745146297365606, + "grad_norm": 1149.139892578125, + "learning_rate": 1.829652996845426e-08, + "loss": 118.9375, + "step": 29 + }, + { + "epoch": 0.0002839806514516144, + "grad_norm": 980.4683837890625, + "learning_rate": 1.892744479495268e-08, + "loss": 122.6875, + "step": 30 + }, + { + "epoch": 0.00029344667316666824, + "grad_norm": 2778.408447265625, + "learning_rate": 1.9558359621451104e-08, + "loss": 146.5625, + "step": 31 + }, + { + "epoch": 0.00030291269488172206, + "grad_norm": 1037.8314208984375, + "learning_rate": 2.0189274447949527e-08, + "loss": 125.8125, + "step": 32 + }, + { + "epoch": 0.0003123787165967759, + "grad_norm": 2846.587158203125, + "learning_rate": 2.082018927444795e-08, + "loss": 198.5, + "step": 33 + }, + { + "epoch": 0.0003218447383118297, + "grad_norm": 1630.4580078125, + "learning_rate": 2.145110410094637e-08, + "loss": 195.875, + "step": 34 + }, + { + "epoch": 0.0003313107600268835, + "grad_norm": 1266.4034423828125, + "learning_rate": 2.2082018927444794e-08, + "loss": 140.8125, + "step": 35 + }, + { + "epoch": 0.00034077678174193734, + "grad_norm": 4139.9658203125, + "learning_rate": 2.2712933753943217e-08, + "loss": 132.875, + "step": 36 + }, + { + "epoch": 0.0003502428034569911, + "grad_norm": 1141.658203125, + "learning_rate": 2.3343848580441637e-08, + "loss": 125.5625, + "step": 37 + }, + { + "epoch": 0.00035970882517204493, + "grad_norm": 6168.24853515625, + "learning_rate": 2.3974763406940064e-08, + "loss": 123.375, + "step": 38 + }, + { + "epoch": 0.00036917484688709875, + "grad_norm": 973.4960327148438, + "learning_rate": 2.4605678233438484e-08, + "loss": 121.25, + "step": 39 + }, + { + "epoch": 0.00037864086860215257, + "grad_norm": 1568.877197265625, + "learning_rate": 2.5236593059936907e-08, + "loss": 140.125, + "step": 40 + }, + { + "epoch": 0.0003881068903172064, + "grad_norm": 1471.1729736328125, + "learning_rate": 2.586750788643533e-08, + "loss": 180.125, + "step": 41 + }, + { + "epoch": 0.0003975729120322602, + "grad_norm": 4013.028564453125, + "learning_rate": 2.6498422712933754e-08, + "loss": 175.5625, + "step": 42 + }, + { + "epoch": 0.00040703893374731404, + "grad_norm": 1544.47216796875, + "learning_rate": 2.7129337539432174e-08, + "loss": 134.75, + "step": 43 + }, + { + "epoch": 0.00041650495546236786, + "grad_norm": 1712.1092529296875, + "learning_rate": 2.77602523659306e-08, + "loss": 204.75, + "step": 44 + }, + { + "epoch": 0.0004259709771774216, + "grad_norm": 1394.689697265625, + "learning_rate": 2.839116719242902e-08, + "loss": 150.875, + "step": 45 + }, + { + "epoch": 0.00043543699889247544, + "grad_norm": 15979.23046875, + "learning_rate": 2.9022082018927444e-08, + "loss": 133.0, + "step": 46 + }, + { + "epoch": 0.00044490302060752927, + "grad_norm": 1244.7691650390625, + "learning_rate": 2.9652996845425867e-08, + "loss": 137.5, + "step": 47 + }, + { + "epoch": 0.0004543690423225831, + "grad_norm": 1501.9390869140625, + "learning_rate": 3.028391167192429e-08, + "loss": 123.8125, + "step": 48 + }, + { + "epoch": 0.0004638350640376369, + "grad_norm": 1997.481201171875, + "learning_rate": 3.0914826498422714e-08, + "loss": 197.25, + "step": 49 + }, + { + "epoch": 0.00047330108575269073, + "grad_norm": 1502.2030029296875, + "learning_rate": 3.1545741324921134e-08, + "loss": 128.3125, + "step": 50 + }, + { + "epoch": 0.00048276710746774455, + "grad_norm": 1686.3980712890625, + "learning_rate": 3.2176656151419554e-08, + "loss": 123.1875, + "step": 51 + }, + { + "epoch": 0.0004922331291827984, + "grad_norm": 1059.7200927734375, + "learning_rate": 3.280757097791798e-08, + "loss": 127.875, + "step": 52 + }, + { + "epoch": 0.0005016991508978521, + "grad_norm": 1624.57666015625, + "learning_rate": 3.34384858044164e-08, + "loss": 181.0, + "step": 53 + }, + { + "epoch": 0.000511165172612906, + "grad_norm": 5570.09228515625, + "learning_rate": 3.406940063091483e-08, + "loss": 139.1875, + "step": 54 + }, + { + "epoch": 0.0005206311943279598, + "grad_norm": 3897.96240234375, + "learning_rate": 3.470031545741325e-08, + "loss": 162.6875, + "step": 55 + }, + { + "epoch": 0.0005300972160430137, + "grad_norm": 2696.365234375, + "learning_rate": 3.533123028391167e-08, + "loss": 124.1875, + "step": 56 + }, + { + "epoch": 0.0005395632377580674, + "grad_norm": 1353.2523193359375, + "learning_rate": 3.596214511041009e-08, + "loss": 123.875, + "step": 57 + }, + { + "epoch": 0.0005490292594731212, + "grad_norm": 2294.989990234375, + "learning_rate": 3.659305993690852e-08, + "loss": 129.3125, + "step": 58 + }, + { + "epoch": 0.0005584952811881751, + "grad_norm": 1437.30908203125, + "learning_rate": 3.722397476340694e-08, + "loss": 131.1875, + "step": 59 + }, + { + "epoch": 0.0005679613029032288, + "grad_norm": 2410.67724609375, + "learning_rate": 3.785488958990536e-08, + "loss": 139.1875, + "step": 60 + }, + { + "epoch": 0.0005774273246182827, + "grad_norm": 3.6927695274353027, + "learning_rate": 3.848580441640378e-08, + "loss": 0.8262, + "step": 61 + }, + { + "epoch": 0.0005868933463333365, + "grad_norm": 1964.7734375, + "learning_rate": 3.911671924290221e-08, + "loss": 225.375, + "step": 62 + }, + { + "epoch": 0.0005963593680483903, + "grad_norm": 1374.345947265625, + "learning_rate": 3.974763406940063e-08, + "loss": 129.6875, + "step": 63 + }, + { + "epoch": 0.0006058253897634441, + "grad_norm": 3316.80078125, + "learning_rate": 4.0378548895899054e-08, + "loss": 124.375, + "step": 64 + }, + { + "epoch": 0.0006152914114784979, + "grad_norm": 5896.333984375, + "learning_rate": 4.1009463722397474e-08, + "loss": 181.75, + "step": 65 + }, + { + "epoch": 0.0006247574331935518, + "grad_norm": 1705.070068359375, + "learning_rate": 4.16403785488959e-08, + "loss": 136.875, + "step": 66 + }, + { + "epoch": 0.0006342234549086055, + "grad_norm": 2275.2607421875, + "learning_rate": 4.227129337539432e-08, + "loss": 197.375, + "step": 67 + }, + { + "epoch": 0.0006436894766236594, + "grad_norm": 3376.725830078125, + "learning_rate": 4.290220820189274e-08, + "loss": 137.9375, + "step": 68 + }, + { + "epoch": 0.0006531554983387132, + "grad_norm": 2853.64501953125, + "learning_rate": 4.353312302839116e-08, + "loss": 134.5, + "step": 69 + }, + { + "epoch": 0.000662621520053767, + "grad_norm": 2388.16845703125, + "learning_rate": 4.416403785488959e-08, + "loss": 211.375, + "step": 70 + }, + { + "epoch": 0.0006720875417688208, + "grad_norm": 3.4987003803253174, + "learning_rate": 4.4794952681388014e-08, + "loss": 0.8208, + "step": 71 + }, + { + "epoch": 0.0006815535634838747, + "grad_norm": 1166.2076416015625, + "learning_rate": 4.5425867507886434e-08, + "loss": 125.9375, + "step": 72 + }, + { + "epoch": 0.0006910195851989285, + "grad_norm": 2768.10693359375, + "learning_rate": 4.6056782334384854e-08, + "loss": 143.375, + "step": 73 + }, + { + "epoch": 0.0007004856069139822, + "grad_norm": 12647.724609375, + "learning_rate": 4.6687697160883274e-08, + "loss": 117.25, + "step": 74 + }, + { + "epoch": 0.0007099516286290361, + "grad_norm": 1172.857177734375, + "learning_rate": 4.73186119873817e-08, + "loss": 126.6875, + "step": 75 + }, + { + "epoch": 0.0007194176503440899, + "grad_norm": 3843.252685546875, + "learning_rate": 4.794952681388013e-08, + "loss": 190.0625, + "step": 76 + }, + { + "epoch": 0.0007288836720591437, + "grad_norm": 1368.9404296875, + "learning_rate": 4.858044164037855e-08, + "loss": 123.875, + "step": 77 + }, + { + "epoch": 0.0007383496937741975, + "grad_norm": 1601.910400390625, + "learning_rate": 4.921135646687697e-08, + "loss": 140.25, + "step": 78 + }, + { + "epoch": 0.0007478157154892514, + "grad_norm": 1746.275634765625, + "learning_rate": 4.9842271293375394e-08, + "loss": 204.0, + "step": 79 + }, + { + "epoch": 0.0007572817372043051, + "grad_norm": 1086.4010009765625, + "learning_rate": 5.0473186119873814e-08, + "loss": 126.375, + "step": 80 + }, + { + "epoch": 0.0007667477589193589, + "grad_norm": 1508.918701171875, + "learning_rate": 5.1104100946372234e-08, + "loss": 194.75, + "step": 81 + }, + { + "epoch": 0.0007762137806344128, + "grad_norm": 1799.4818115234375, + "learning_rate": 5.173501577287066e-08, + "loss": 163.125, + "step": 82 + }, + { + "epoch": 0.0007856798023494666, + "grad_norm": 4171.857421875, + "learning_rate": 5.236593059936909e-08, + "loss": 168.8125, + "step": 83 + }, + { + "epoch": 0.0007951458240645204, + "grad_norm": 2.688009023666382, + "learning_rate": 5.299684542586751e-08, + "loss": 0.8501, + "step": 84 + }, + { + "epoch": 0.0008046118457795742, + "grad_norm": 943.6417846679688, + "learning_rate": 5.362776025236593e-08, + "loss": 123.0625, + "step": 85 + }, + { + "epoch": 0.0008140778674946281, + "grad_norm": 7223.08984375, + "learning_rate": 5.425867507886435e-08, + "loss": 134.6875, + "step": 86 + }, + { + "epoch": 0.0008235438892096818, + "grad_norm": 1965.09521484375, + "learning_rate": 5.488958990536277e-08, + "loss": 149.3125, + "step": 87 + }, + { + "epoch": 0.0008330099109247357, + "grad_norm": 1159.4515380859375, + "learning_rate": 5.55205047318612e-08, + "loss": 181.0, + "step": 88 + }, + { + "epoch": 0.0008424759326397895, + "grad_norm": 1556.3966064453125, + "learning_rate": 5.615141955835962e-08, + "loss": 122.4375, + "step": 89 + }, + { + "epoch": 0.0008519419543548432, + "grad_norm": 1305.815673828125, + "learning_rate": 5.678233438485804e-08, + "loss": 136.8125, + "step": 90 + }, + { + "epoch": 0.0008614079760698971, + "grad_norm": 1126.86572265625, + "learning_rate": 5.741324921135646e-08, + "loss": 140.5625, + "step": 91 + }, + { + "epoch": 0.0008708739977849509, + "grad_norm": 1148.200927734375, + "learning_rate": 5.804416403785489e-08, + "loss": 128.0, + "step": 92 + }, + { + "epoch": 0.0008803400195000048, + "grad_norm": 1909.9720458984375, + "learning_rate": 5.867507886435331e-08, + "loss": 119.625, + "step": 93 + }, + { + "epoch": 0.0008898060412150585, + "grad_norm": 1243.28759765625, + "learning_rate": 5.9305993690851735e-08, + "loss": 121.625, + "step": 94 + }, + { + "epoch": 0.0008992720629301124, + "grad_norm": 1465.4775390625, + "learning_rate": 5.993690851735016e-08, + "loss": 126.375, + "step": 95 + }, + { + "epoch": 0.0009087380846451662, + "grad_norm": 1299.260498046875, + "learning_rate": 6.056782334384857e-08, + "loss": 126.875, + "step": 96 + }, + { + "epoch": 0.0009182041063602199, + "grad_norm": 2510.9443359375, + "learning_rate": 6.1198738170347e-08, + "loss": 178.625, + "step": 97 + }, + { + "epoch": 0.0009276701280752738, + "grad_norm": 1369.9324951171875, + "learning_rate": 6.182965299684543e-08, + "loss": 171.0, + "step": 98 + }, + { + "epoch": 0.0009371361497903276, + "grad_norm": 2049.669189453125, + "learning_rate": 6.246056782334384e-08, + "loss": 190.75, + "step": 99 + }, + { + "epoch": 0.0009466021715053815, + "grad_norm": 930.5064697265625, + "learning_rate": 6.309148264984227e-08, + "loss": 129.8125, + "step": 100 + }, + { + "epoch": 0.0009560681932204352, + "grad_norm": 3215.50439453125, + "learning_rate": 6.372239747634068e-08, + "loss": 129.25, + "step": 101 + }, + { + "epoch": 0.0009655342149354891, + "grad_norm": 3203.063232421875, + "learning_rate": 6.435331230283911e-08, + "loss": 131.75, + "step": 102 + }, + { + "epoch": 0.0009750002366505429, + "grad_norm": 1127.2569580078125, + "learning_rate": 6.498422712933755e-08, + "loss": 128.4375, + "step": 103 + }, + { + "epoch": 0.0009844662583655967, + "grad_norm": 1534.4344482421875, + "learning_rate": 6.561514195583596e-08, + "loss": 124.9375, + "step": 104 + }, + { + "epoch": 0.0009939322800806504, + "grad_norm": 1447.319091796875, + "learning_rate": 6.624605678233439e-08, + "loss": 134.875, + "step": 105 + }, + { + "epoch": 0.0010033983017957043, + "grad_norm": 2187.002197265625, + "learning_rate": 6.68769716088328e-08, + "loss": 169.0, + "step": 106 + }, + { + "epoch": 0.0010128643235107582, + "grad_norm": 1135.5521240234375, + "learning_rate": 6.750788643533123e-08, + "loss": 151.125, + "step": 107 + }, + { + "epoch": 0.001022330345225812, + "grad_norm": 1968.72998046875, + "learning_rate": 6.813880126182965e-08, + "loss": 207.4375, + "step": 108 + }, + { + "epoch": 0.0010317963669408657, + "grad_norm": 1163.6510009765625, + "learning_rate": 6.876971608832807e-08, + "loss": 172.875, + "step": 109 + }, + { + "epoch": 0.0010412623886559196, + "grad_norm": 985.3278198242188, + "learning_rate": 6.94006309148265e-08, + "loss": 126.25, + "step": 110 + }, + { + "epoch": 0.0010507284103709734, + "grad_norm": 1956.7113037109375, + "learning_rate": 7.003154574132492e-08, + "loss": 120.3125, + "step": 111 + }, + { + "epoch": 0.0010601944320860273, + "grad_norm": 2200.677734375, + "learning_rate": 7.066246056782333e-08, + "loss": 239.625, + "step": 112 + }, + { + "epoch": 0.001069660453801081, + "grad_norm": 1477.730712890625, + "learning_rate": 7.129337539432176e-08, + "loss": 136.9375, + "step": 113 + }, + { + "epoch": 0.0010791264755161348, + "grad_norm": 2043.1361083984375, + "learning_rate": 7.192429022082017e-08, + "loss": 172.5625, + "step": 114 + }, + { + "epoch": 0.0010885924972311887, + "grad_norm": 1399.99365234375, + "learning_rate": 7.255520504731861e-08, + "loss": 196.375, + "step": 115 + }, + { + "epoch": 0.0010980585189462424, + "grad_norm": 2666.517333984375, + "learning_rate": 7.318611987381704e-08, + "loss": 121.0, + "step": 116 + }, + { + "epoch": 0.0011075245406612963, + "grad_norm": 2776.783447265625, + "learning_rate": 7.381703470031545e-08, + "loss": 166.1875, + "step": 117 + }, + { + "epoch": 0.0011169905623763501, + "grad_norm": 3128.35595703125, + "learning_rate": 7.444794952681388e-08, + "loss": 193.5, + "step": 118 + }, + { + "epoch": 0.001126456584091404, + "grad_norm": 3239.6884765625, + "learning_rate": 7.507886435331231e-08, + "loss": 126.5, + "step": 119 + }, + { + "epoch": 0.0011359226058064577, + "grad_norm": 1262.8270263671875, + "learning_rate": 7.570977917981072e-08, + "loss": 122.9375, + "step": 120 + }, + { + "epoch": 0.0011453886275215115, + "grad_norm": 1134.215087890625, + "learning_rate": 7.634069400630915e-08, + "loss": 135.375, + "step": 121 + }, + { + "epoch": 0.0011548546492365654, + "grad_norm": 1192.8763427734375, + "learning_rate": 7.697160883280756e-08, + "loss": 132.5, + "step": 122 + }, + { + "epoch": 0.001164320670951619, + "grad_norm": 1062.813232421875, + "learning_rate": 7.760252365930599e-08, + "loss": 128.0625, + "step": 123 + }, + { + "epoch": 0.001173786692666673, + "grad_norm": 1235.7606201171875, + "learning_rate": 7.823343848580441e-08, + "loss": 135.875, + "step": 124 + }, + { + "epoch": 0.0011832527143817268, + "grad_norm": 1610.4808349609375, + "learning_rate": 7.886435331230283e-08, + "loss": 185.75, + "step": 125 + }, + { + "epoch": 0.0011927187360967807, + "grad_norm": 1205.70751953125, + "learning_rate": 7.949526813880126e-08, + "loss": 121.5, + "step": 126 + }, + { + "epoch": 0.0012021847578118344, + "grad_norm": 1320.756103515625, + "learning_rate": 8.012618296529967e-08, + "loss": 128.375, + "step": 127 + }, + { + "epoch": 0.0012116507795268882, + "grad_norm": 1072.24365234375, + "learning_rate": 8.075709779179811e-08, + "loss": 122.4375, + "step": 128 + }, + { + "epoch": 0.001221116801241942, + "grad_norm": 1348.376220703125, + "learning_rate": 8.138801261829654e-08, + "loss": 154.125, + "step": 129 + }, + { + "epoch": 0.0012305828229569958, + "grad_norm": 1366.898193359375, + "learning_rate": 8.201892744479495e-08, + "loss": 127.0, + "step": 130 + }, + { + "epoch": 0.0012400488446720496, + "grad_norm": 1560.385009765625, + "learning_rate": 8.264984227129338e-08, + "loss": 151.3125, + "step": 131 + }, + { + "epoch": 0.0012495148663871035, + "grad_norm": 1252.733154296875, + "learning_rate": 8.32807570977918e-08, + "loss": 130.5625, + "step": 132 + }, + { + "epoch": 0.0012589808881021574, + "grad_norm": 1223.7322998046875, + "learning_rate": 8.391167192429022e-08, + "loss": 125.1875, + "step": 133 + }, + { + "epoch": 0.001268446909817211, + "grad_norm": 1506.0888671875, + "learning_rate": 8.454258675078864e-08, + "loss": 198.375, + "step": 134 + }, + { + "epoch": 0.001277912931532265, + "grad_norm": 1833.3076171875, + "learning_rate": 8.517350157728706e-08, + "loss": 125.75, + "step": 135 + }, + { + "epoch": 0.0012873789532473188, + "grad_norm": 998.892333984375, + "learning_rate": 8.580441640378548e-08, + "loss": 129.0625, + "step": 136 + }, + { + "epoch": 0.0012968449749623725, + "grad_norm": 1755.04443359375, + "learning_rate": 8.643533123028391e-08, + "loss": 129.1875, + "step": 137 + }, + { + "epoch": 0.0013063109966774263, + "grad_norm": 8819.3583984375, + "learning_rate": 8.706624605678232e-08, + "loss": 133.25, + "step": 138 + }, + { + "epoch": 0.0013157770183924802, + "grad_norm": 3.8019556999206543, + "learning_rate": 8.769716088328075e-08, + "loss": 0.8831, + "step": 139 + }, + { + "epoch": 0.001325243040107534, + "grad_norm": 1276.834228515625, + "learning_rate": 8.832807570977918e-08, + "loss": 198.5, + "step": 140 + }, + { + "epoch": 0.0013347090618225877, + "grad_norm": 1510.272705078125, + "learning_rate": 8.89589905362776e-08, + "loss": 182.75, + "step": 141 + }, + { + "epoch": 0.0013441750835376416, + "grad_norm": 1264.444580078125, + "learning_rate": 8.958990536277603e-08, + "loss": 123.9375, + "step": 142 + }, + { + "epoch": 0.0013536411052526955, + "grad_norm": 1861.7518310546875, + "learning_rate": 9.022082018927444e-08, + "loss": 128.1875, + "step": 143 + }, + { + "epoch": 0.0013631071269677494, + "grad_norm": 2205.286376953125, + "learning_rate": 9.085173501577287e-08, + "loss": 178.5, + "step": 144 + }, + { + "epoch": 0.001372573148682803, + "grad_norm": 971.6563720703125, + "learning_rate": 9.14826498422713e-08, + "loss": 156.4375, + "step": 145 + }, + { + "epoch": 0.001382039170397857, + "grad_norm": 1339.9732666015625, + "learning_rate": 9.211356466876971e-08, + "loss": 126.125, + "step": 146 + }, + { + "epoch": 0.0013915051921129108, + "grad_norm": 1140.3096923828125, + "learning_rate": 9.274447949526814e-08, + "loss": 139.0625, + "step": 147 + }, + { + "epoch": 0.0014009712138279644, + "grad_norm": 3349.97412109375, + "learning_rate": 9.337539432176655e-08, + "loss": 146.875, + "step": 148 + }, + { + "epoch": 0.0014104372355430183, + "grad_norm": 1194.341064453125, + "learning_rate": 9.400630914826498e-08, + "loss": 140.75, + "step": 149 + }, + { + "epoch": 0.0014199032572580722, + "grad_norm": 1417.7950439453125, + "learning_rate": 9.46372239747634e-08, + "loss": 168.625, + "step": 150 + }, + { + "epoch": 0.001429369278973126, + "grad_norm": 1442.929443359375, + "learning_rate": 9.526813880126182e-08, + "loss": 216.625, + "step": 151 + }, + { + "epoch": 0.0014388353006881797, + "grad_norm": 1922.5452880859375, + "learning_rate": 9.589905362776026e-08, + "loss": 125.0, + "step": 152 + }, + { + "epoch": 0.0014483013224032336, + "grad_norm": 1191.654052734375, + "learning_rate": 9.652996845425868e-08, + "loss": 162.5625, + "step": 153 + }, + { + "epoch": 0.0014577673441182875, + "grad_norm": 1889.33544921875, + "learning_rate": 9.71608832807571e-08, + "loss": 187.75, + "step": 154 + }, + { + "epoch": 0.0014672333658333411, + "grad_norm": 2.8472564220428467, + "learning_rate": 9.779179810725552e-08, + "loss": 0.9492, + "step": 155 + }, + { + "epoch": 0.001476699387548395, + "grad_norm": 4.297066688537598, + "learning_rate": 9.842271293375394e-08, + "loss": 0.8535, + "step": 156 + }, + { + "epoch": 0.0014861654092634489, + "grad_norm": 1217.4825439453125, + "learning_rate": 9.905362776025236e-08, + "loss": 180.875, + "step": 157 + }, + { + "epoch": 0.0014956314309785028, + "grad_norm": 6719.46435546875, + "learning_rate": 9.968454258675079e-08, + "loss": 154.125, + "step": 158 + }, + { + "epoch": 0.0015050974526935564, + "grad_norm": 6595.26904296875, + "learning_rate": 1.003154574132492e-07, + "loss": 136.75, + "step": 159 + }, + { + "epoch": 0.0015145634744086103, + "grad_norm": 1852.66650390625, + "learning_rate": 1.0094637223974763e-07, + "loss": 211.75, + "step": 160 + }, + { + "epoch": 0.0015240294961236642, + "grad_norm": 1454.290771484375, + "learning_rate": 1.0157728706624604e-07, + "loss": 195.3125, + "step": 161 + }, + { + "epoch": 0.0015334955178387178, + "grad_norm": 1459.72119140625, + "learning_rate": 1.0220820189274447e-07, + "loss": 117.875, + "step": 162 + }, + { + "epoch": 0.0015429615395537717, + "grad_norm": 1639.9857177734375, + "learning_rate": 1.028391167192429e-07, + "loss": 196.25, + "step": 163 + }, + { + "epoch": 0.0015524275612688256, + "grad_norm": 1589.5120849609375, + "learning_rate": 1.0347003154574132e-07, + "loss": 182.75, + "step": 164 + }, + { + "epoch": 0.0015618935829838794, + "grad_norm": 1731.669189453125, + "learning_rate": 1.0410094637223975e-07, + "loss": 212.125, + "step": 165 + }, + { + "epoch": 0.001571359604698933, + "grad_norm": 1750.7200927734375, + "learning_rate": 1.0473186119873818e-07, + "loss": 177.3125, + "step": 166 + }, + { + "epoch": 0.001580825626413987, + "grad_norm": 1236.463623046875, + "learning_rate": 1.0536277602523659e-07, + "loss": 126.3125, + "step": 167 + }, + { + "epoch": 0.0015902916481290409, + "grad_norm": 2817.97802734375, + "learning_rate": 1.0599369085173502e-07, + "loss": 119.0625, + "step": 168 + }, + { + "epoch": 0.0015997576698440945, + "grad_norm": 1407.7593994140625, + "learning_rate": 1.0662460567823343e-07, + "loss": 131.625, + "step": 169 + }, + { + "epoch": 0.0016092236915591484, + "grad_norm": 1887.056884765625, + "learning_rate": 1.0725552050473186e-07, + "loss": 130.0625, + "step": 170 + }, + { + "epoch": 0.0016186897132742023, + "grad_norm": 967.4691162109375, + "learning_rate": 1.0788643533123028e-07, + "loss": 122.5625, + "step": 171 + }, + { + "epoch": 0.0016281557349892561, + "grad_norm": 1067.241943359375, + "learning_rate": 1.085173501577287e-07, + "loss": 133.875, + "step": 172 + }, + { + "epoch": 0.0016376217567043098, + "grad_norm": 1179.1019287109375, + "learning_rate": 1.0914826498422712e-07, + "loss": 126.3125, + "step": 173 + }, + { + "epoch": 0.0016470877784193637, + "grad_norm": 1104.7216796875, + "learning_rate": 1.0977917981072554e-07, + "loss": 169.5625, + "step": 174 + }, + { + "epoch": 0.0016565538001344176, + "grad_norm": 2763.822998046875, + "learning_rate": 1.1041009463722396e-07, + "loss": 119.125, + "step": 175 + }, + { + "epoch": 0.0016660198218494714, + "grad_norm": 1609.243408203125, + "learning_rate": 1.110410094637224e-07, + "loss": 180.875, + "step": 176 + }, + { + "epoch": 0.001675485843564525, + "grad_norm": 1183.4676513671875, + "learning_rate": 1.1167192429022082e-07, + "loss": 160.9375, + "step": 177 + }, + { + "epoch": 0.001684951865279579, + "grad_norm": 3891.11181640625, + "learning_rate": 1.1230283911671924e-07, + "loss": 145.0625, + "step": 178 + }, + { + "epoch": 0.0016944178869946328, + "grad_norm": 1784.876220703125, + "learning_rate": 1.1293375394321767e-07, + "loss": 120.1875, + "step": 179 + }, + { + "epoch": 0.0017038839087096865, + "grad_norm": 1114.09375, + "learning_rate": 1.1356466876971608e-07, + "loss": 121.0625, + "step": 180 + }, + { + "epoch": 0.0017133499304247404, + "grad_norm": 1295.1387939453125, + "learning_rate": 1.1419558359621451e-07, + "loss": 192.6875, + "step": 181 + }, + { + "epoch": 0.0017228159521397942, + "grad_norm": 1200.2640380859375, + "learning_rate": 1.1482649842271292e-07, + "loss": 135.6875, + "step": 182 + }, + { + "epoch": 0.0017322819738548481, + "grad_norm": 1294.312255859375, + "learning_rate": 1.1545741324921135e-07, + "loss": 177.375, + "step": 183 + }, + { + "epoch": 0.0017417479955699018, + "grad_norm": 3060.49853515625, + "learning_rate": 1.1608832807570978e-07, + "loss": 123.125, + "step": 184 + }, + { + "epoch": 0.0017512140172849557, + "grad_norm": 1267.408935546875, + "learning_rate": 1.1671924290220819e-07, + "loss": 117.125, + "step": 185 + }, + { + "epoch": 0.0017606800390000095, + "grad_norm": 1071.64892578125, + "learning_rate": 1.1735015772870662e-07, + "loss": 116.75, + "step": 186 + }, + { + "epoch": 0.0017701460607150632, + "grad_norm": 1181.3748779296875, + "learning_rate": 1.1798107255520503e-07, + "loss": 170.0, + "step": 187 + }, + { + "epoch": 0.001779612082430117, + "grad_norm": 1739.52294921875, + "learning_rate": 1.1861198738170347e-07, + "loss": 164.625, + "step": 188 + }, + { + "epoch": 0.001789078104145171, + "grad_norm": 1264.677978515625, + "learning_rate": 1.1924290220820188e-07, + "loss": 113.75, + "step": 189 + }, + { + "epoch": 0.0017985441258602248, + "grad_norm": 1270.1224365234375, + "learning_rate": 1.1987381703470032e-07, + "loss": 117.5625, + "step": 190 + }, + { + "epoch": 0.0018080101475752785, + "grad_norm": 1423.93701171875, + "learning_rate": 1.2050473186119874e-07, + "loss": 153.125, + "step": 191 + }, + { + "epoch": 0.0018174761692903323, + "grad_norm": 1510.7283935546875, + "learning_rate": 1.2113564668769715e-07, + "loss": 175.8125, + "step": 192 + }, + { + "epoch": 0.0018269421910053862, + "grad_norm": 1264.1031494140625, + "learning_rate": 1.217665615141956e-07, + "loss": 135.75, + "step": 193 + }, + { + "epoch": 0.0018364082127204399, + "grad_norm": 3.618422508239746, + "learning_rate": 1.22397476340694e-07, + "loss": 0.9531, + "step": 194 + }, + { + "epoch": 0.0018458742344354938, + "grad_norm": 1803.1566162109375, + "learning_rate": 1.2302839116719242e-07, + "loss": 156.75, + "step": 195 + }, + { + "epoch": 0.0018553402561505476, + "grad_norm": 3.9544999599456787, + "learning_rate": 1.2365930599369086e-07, + "loss": 1.0532, + "step": 196 + }, + { + "epoch": 0.0018648062778656015, + "grad_norm": 1403.880126953125, + "learning_rate": 1.2429022082018927e-07, + "loss": 132.125, + "step": 197 + }, + { + "epoch": 0.0018742722995806552, + "grad_norm": 1507.85595703125, + "learning_rate": 1.2492113564668768e-07, + "loss": 136.0, + "step": 198 + }, + { + "epoch": 0.001883738321295709, + "grad_norm": 1431.7271728515625, + "learning_rate": 1.2555205047318612e-07, + "loss": 114.1875, + "step": 199 + }, + { + "epoch": 0.001893204343010763, + "grad_norm": 1307.0106201171875, + "learning_rate": 1.2618296529968454e-07, + "loss": 110.75, + "step": 200 + }, + { + "epoch": 0.0019026703647258166, + "grad_norm": 2603.2822265625, + "learning_rate": 1.2681388012618298e-07, + "loss": 182.75, + "step": 201 + }, + { + "epoch": 0.0019121363864408704, + "grad_norm": 1452.68359375, + "learning_rate": 1.2744479495268136e-07, + "loss": 120.5625, + "step": 202 + }, + { + "epoch": 0.0019216024081559243, + "grad_norm": 1412.5966796875, + "learning_rate": 1.280757097791798e-07, + "loss": 124.0, + "step": 203 + }, + { + "epoch": 0.0019310684298709782, + "grad_norm": 1154.302734375, + "learning_rate": 1.2870662460567822e-07, + "loss": 124.25, + "step": 204 + }, + { + "epoch": 0.0019405344515860319, + "grad_norm": 1089.109619140625, + "learning_rate": 1.2933753943217666e-07, + "loss": 109.6875, + "step": 205 + }, + { + "epoch": 0.0019500004733010857, + "grad_norm": 2484.768798828125, + "learning_rate": 1.299684542586751e-07, + "loss": 129.3125, + "step": 206 + }, + { + "epoch": 0.0019594664950161396, + "grad_norm": 1709.1727294921875, + "learning_rate": 1.3059936908517348e-07, + "loss": 123.625, + "step": 207 + }, + { + "epoch": 0.0019689325167311935, + "grad_norm": 1218.867431640625, + "learning_rate": 1.3123028391167192e-07, + "loss": 135.0, + "step": 208 + }, + { + "epoch": 0.0019783985384462474, + "grad_norm": 2.9123761653900146, + "learning_rate": 1.3186119873817034e-07, + "loss": 0.9297, + "step": 209 + }, + { + "epoch": 0.001987864560161301, + "grad_norm": 3058.292724609375, + "learning_rate": 1.3249211356466878e-07, + "loss": 166.5, + "step": 210 + }, + { + "epoch": 0.0019973305818763547, + "grad_norm": 1505.275634765625, + "learning_rate": 1.331230283911672e-07, + "loss": 116.5625, + "step": 211 + }, + { + "epoch": 0.0020067966035914086, + "grad_norm": 1970.2532958984375, + "learning_rate": 1.337539432176656e-07, + "loss": 173.25, + "step": 212 + }, + { + "epoch": 0.0020162626253064624, + "grad_norm": 1390.5252685546875, + "learning_rate": 1.3438485804416402e-07, + "loss": 108.625, + "step": 213 + }, + { + "epoch": 0.0020257286470215163, + "grad_norm": 1182.1207275390625, + "learning_rate": 1.3501577287066246e-07, + "loss": 174.625, + "step": 214 + }, + { + "epoch": 0.00203519466873657, + "grad_norm": 1183.169677734375, + "learning_rate": 1.3564668769716087e-07, + "loss": 156.25, + "step": 215 + }, + { + "epoch": 0.002044660690451624, + "grad_norm": 2129.21875, + "learning_rate": 1.362776025236593e-07, + "loss": 108.375, + "step": 216 + }, + { + "epoch": 0.0020541267121666775, + "grad_norm": 1442.5628662109375, + "learning_rate": 1.369085173501577e-07, + "loss": 186.6875, + "step": 217 + }, + { + "epoch": 0.0020635927338817314, + "grad_norm": 1893.0543212890625, + "learning_rate": 1.3753943217665614e-07, + "loss": 111.4375, + "step": 218 + }, + { + "epoch": 0.0020730587555967852, + "grad_norm": 1824.6494140625, + "learning_rate": 1.3817034700315458e-07, + "loss": 163.8125, + "step": 219 + }, + { + "epoch": 0.002082524777311839, + "grad_norm": 1242.5086669921875, + "learning_rate": 1.38801261829653e-07, + "loss": 117.625, + "step": 220 + }, + { + "epoch": 0.002091990799026893, + "grad_norm": 1262.830810546875, + "learning_rate": 1.3943217665615143e-07, + "loss": 116.8125, + "step": 221 + }, + { + "epoch": 0.002101456820741947, + "grad_norm": 2042.3402099609375, + "learning_rate": 1.4006309148264984e-07, + "loss": 119.0625, + "step": 222 + }, + { + "epoch": 0.0021109228424570007, + "grad_norm": 2098.223876953125, + "learning_rate": 1.4069400630914826e-07, + "loss": 121.25, + "step": 223 + }, + { + "epoch": 0.0021203888641720546, + "grad_norm": 1642.031494140625, + "learning_rate": 1.4132492113564667e-07, + "loss": 149.8125, + "step": 224 + }, + { + "epoch": 0.002129854885887108, + "grad_norm": 1537.3067626953125, + "learning_rate": 1.419558359621451e-07, + "loss": 114.75, + "step": 225 + }, + { + "epoch": 0.002139320907602162, + "grad_norm": 1660.147705078125, + "learning_rate": 1.4258675078864352e-07, + "loss": 166.5625, + "step": 226 + }, + { + "epoch": 0.002148786929317216, + "grad_norm": 1099.3323974609375, + "learning_rate": 1.4321766561514196e-07, + "loss": 158.5625, + "step": 227 + }, + { + "epoch": 0.0021582529510322697, + "grad_norm": 1468.4925537109375, + "learning_rate": 1.4384858044164035e-07, + "loss": 108.9375, + "step": 228 + }, + { + "epoch": 0.0021677189727473236, + "grad_norm": 2.995248556137085, + "learning_rate": 1.444794952681388e-07, + "loss": 0.7837, + "step": 229 + }, + { + "epoch": 0.0021771849944623774, + "grad_norm": 1293.233154296875, + "learning_rate": 1.4511041009463723e-07, + "loss": 121.0, + "step": 230 + }, + { + "epoch": 0.0021866510161774313, + "grad_norm": 1048.2811279296875, + "learning_rate": 1.4574132492113564e-07, + "loss": 146.25, + "step": 231 + }, + { + "epoch": 0.0021961170378924848, + "grad_norm": 1430.478515625, + "learning_rate": 1.4637223974763408e-07, + "loss": 188.125, + "step": 232 + }, + { + "epoch": 0.0022055830596075386, + "grad_norm": 2140.932861328125, + "learning_rate": 1.4700315457413247e-07, + "loss": 159.25, + "step": 233 + }, + { + "epoch": 0.0022150490813225925, + "grad_norm": 1756.155517578125, + "learning_rate": 1.476340694006309e-07, + "loss": 113.6875, + "step": 234 + }, + { + "epoch": 0.0022245151030376464, + "grad_norm": 1116.3638916015625, + "learning_rate": 1.4826498422712932e-07, + "loss": 129.0625, + "step": 235 + }, + { + "epoch": 0.0022339811247527003, + "grad_norm": 2416.242919921875, + "learning_rate": 1.4889589905362776e-07, + "loss": 204.8125, + "step": 236 + }, + { + "epoch": 0.002243447146467754, + "grad_norm": 3.380565643310547, + "learning_rate": 1.4952681388012618e-07, + "loss": 0.8159, + "step": 237 + }, + { + "epoch": 0.002252913168182808, + "grad_norm": 2446.087158203125, + "learning_rate": 1.5015772870662462e-07, + "loss": 131.9375, + "step": 238 + }, + { + "epoch": 0.0022623791898978615, + "grad_norm": 1765.4295654296875, + "learning_rate": 1.50788643533123e-07, + "loss": 111.1875, + "step": 239 + }, + { + "epoch": 0.0022718452116129153, + "grad_norm": 2165.679443359375, + "learning_rate": 1.5141955835962144e-07, + "loss": 152.5, + "step": 240 + }, + { + "epoch": 0.002281311233327969, + "grad_norm": 2481.47509765625, + "learning_rate": 1.5205047318611986e-07, + "loss": 197.5, + "step": 241 + }, + { + "epoch": 0.002290777255043023, + "grad_norm": 1630.9371337890625, + "learning_rate": 1.526813880126183e-07, + "loss": 99.6875, + "step": 242 + }, + { + "epoch": 0.002300243276758077, + "grad_norm": 1309.2384033203125, + "learning_rate": 1.5331230283911674e-07, + "loss": 108.625, + "step": 243 + }, + { + "epoch": 0.002309709298473131, + "grad_norm": 2.55615496635437, + "learning_rate": 1.5394321766561512e-07, + "loss": 0.7717, + "step": 244 + }, + { + "epoch": 0.0023191753201881847, + "grad_norm": 1510.5743408203125, + "learning_rate": 1.5457413249211356e-07, + "loss": 181.5, + "step": 245 + }, + { + "epoch": 0.002328641341903238, + "grad_norm": 3.5962634086608887, + "learning_rate": 1.5520504731861198e-07, + "loss": 0.8721, + "step": 246 + }, + { + "epoch": 0.002338107363618292, + "grad_norm": 1075.9527587890625, + "learning_rate": 1.5583596214511042e-07, + "loss": 117.5625, + "step": 247 + }, + { + "epoch": 0.002347573385333346, + "grad_norm": 1590.4312744140625, + "learning_rate": 1.5646687697160883e-07, + "loss": 107.0625, + "step": 248 + }, + { + "epoch": 0.0023570394070483998, + "grad_norm": 1869.1075439453125, + "learning_rate": 1.5709779179810724e-07, + "loss": 117.125, + "step": 249 + }, + { + "epoch": 0.0023665054287634536, + "grad_norm": 1214.6466064453125, + "learning_rate": 1.5772870662460566e-07, + "loss": 111.5625, + "step": 250 + }, + { + "epoch": 0.0023759714504785075, + "grad_norm": 1314.9755859375, + "learning_rate": 1.583596214511041e-07, + "loss": 167.1875, + "step": 251 + }, + { + "epoch": 0.0023854374721935614, + "grad_norm": 1649.4871826171875, + "learning_rate": 1.589905362776025e-07, + "loss": 167.375, + "step": 252 + }, + { + "epoch": 0.002394903493908615, + "grad_norm": 990.2603149414062, + "learning_rate": 1.5962145110410095e-07, + "loss": 114.875, + "step": 253 + }, + { + "epoch": 0.0024043695156236687, + "grad_norm": 2154.75244140625, + "learning_rate": 1.6025236593059934e-07, + "loss": 179.375, + "step": 254 + }, + { + "epoch": 0.0024138355373387226, + "grad_norm": 1608.8109130859375, + "learning_rate": 1.6088328075709778e-07, + "loss": 158.625, + "step": 255 + }, + { + "epoch": 0.0024233015590537765, + "grad_norm": 1513.909423828125, + "learning_rate": 1.6151419558359622e-07, + "loss": 111.5625, + "step": 256 + }, + { + "epoch": 0.0024327675807688303, + "grad_norm": 1748.3988037109375, + "learning_rate": 1.6214511041009463e-07, + "loss": 197.5, + "step": 257 + }, + { + "epoch": 0.002442233602483884, + "grad_norm": 1518.42724609375, + "learning_rate": 1.6277602523659307e-07, + "loss": 149.875, + "step": 258 + }, + { + "epoch": 0.002451699624198938, + "grad_norm": 1262.2388916015625, + "learning_rate": 1.6340694006309146e-07, + "loss": 165.5625, + "step": 259 + }, + { + "epoch": 0.0024611656459139915, + "grad_norm": 1460.620849609375, + "learning_rate": 1.640378548895899e-07, + "loss": 102.875, + "step": 260 + }, + { + "epoch": 0.0024706316676290454, + "grad_norm": 1272.4871826171875, + "learning_rate": 1.646687697160883e-07, + "loss": 100.8125, + "step": 261 + }, + { + "epoch": 0.0024800976893440993, + "grad_norm": 1024.879638671875, + "learning_rate": 1.6529968454258675e-07, + "loss": 94.625, + "step": 262 + }, + { + "epoch": 0.002489563711059153, + "grad_norm": 1157.3880615234375, + "learning_rate": 1.6593059936908516e-07, + "loss": 103.75, + "step": 263 + }, + { + "epoch": 0.002499029732774207, + "grad_norm": 1094.3538818359375, + "learning_rate": 1.665615141955836e-07, + "loss": 105.4375, + "step": 264 + }, + { + "epoch": 0.002508495754489261, + "grad_norm": 1282.859375, + "learning_rate": 1.67192429022082e-07, + "loss": 103.0, + "step": 265 + }, + { + "epoch": 0.0025179617762043148, + "grad_norm": 999.6652221679688, + "learning_rate": 1.6782334384858043e-07, + "loss": 121.375, + "step": 266 + }, + { + "epoch": 0.0025274277979193682, + "grad_norm": 1098.42041015625, + "learning_rate": 1.6845425867507887e-07, + "loss": 105.6875, + "step": 267 + }, + { + "epoch": 0.002536893819634422, + "grad_norm": 1749.0947265625, + "learning_rate": 1.6908517350157728e-07, + "loss": 106.6875, + "step": 268 + }, + { + "epoch": 0.002546359841349476, + "grad_norm": 2.4961538314819336, + "learning_rate": 1.6971608832807572e-07, + "loss": 0.9307, + "step": 269 + }, + { + "epoch": 0.00255582586306453, + "grad_norm": 1996.1824951171875, + "learning_rate": 1.703470031545741e-07, + "loss": 97.0, + "step": 270 + }, + { + "epoch": 0.0025652918847795837, + "grad_norm": 1302.5595703125, + "learning_rate": 1.7097791798107255e-07, + "loss": 179.8125, + "step": 271 + }, + { + "epoch": 0.0025747579064946376, + "grad_norm": 1315.5894775390625, + "learning_rate": 1.7160883280757096e-07, + "loss": 95.0625, + "step": 272 + }, + { + "epoch": 0.0025842239282096915, + "grad_norm": 1143.633056640625, + "learning_rate": 1.722397476340694e-07, + "loss": 115.25, + "step": 273 + }, + { + "epoch": 0.002593689949924745, + "grad_norm": 2092.668701171875, + "learning_rate": 1.7287066246056782e-07, + "loss": 200.0, + "step": 274 + }, + { + "epoch": 0.002603155971639799, + "grad_norm": 1296.0836181640625, + "learning_rate": 1.7350157728706623e-07, + "loss": 110.6875, + "step": 275 + }, + { + "epoch": 0.0026126219933548527, + "grad_norm": 2300.943603515625, + "learning_rate": 1.7413249211356464e-07, + "loss": 140.375, + "step": 276 + }, + { + "epoch": 0.0026220880150699065, + "grad_norm": 1423.821044921875, + "learning_rate": 1.7476340694006308e-07, + "loss": 97.75, + "step": 277 + }, + { + "epoch": 0.0026315540367849604, + "grad_norm": 1114.5367431640625, + "learning_rate": 1.753943217665615e-07, + "loss": 157.8125, + "step": 278 + }, + { + "epoch": 0.0026410200585000143, + "grad_norm": 1634.1494140625, + "learning_rate": 1.7602523659305994e-07, + "loss": 103.6875, + "step": 279 + }, + { + "epoch": 0.002650486080215068, + "grad_norm": 1019.2512817382812, + "learning_rate": 1.7665615141955835e-07, + "loss": 117.125, + "step": 280 + }, + { + "epoch": 0.0026599521019301216, + "grad_norm": 1276.6824951171875, + "learning_rate": 1.7728706624605676e-07, + "loss": 97.4375, + "step": 281 + }, + { + "epoch": 0.0026694181236451755, + "grad_norm": 945.8740844726562, + "learning_rate": 1.779179810725552e-07, + "loss": 99.25, + "step": 282 + }, + { + "epoch": 0.0026788841453602294, + "grad_norm": 1480.15380859375, + "learning_rate": 1.7854889589905362e-07, + "loss": 95.0625, + "step": 283 + }, + { + "epoch": 0.0026883501670752832, + "grad_norm": 1357.3424072265625, + "learning_rate": 1.7917981072555206e-07, + "loss": 156.6875, + "step": 284 + }, + { + "epoch": 0.002697816188790337, + "grad_norm": 1161.6617431640625, + "learning_rate": 1.7981072555205047e-07, + "loss": 108.8125, + "step": 285 + }, + { + "epoch": 0.002707282210505391, + "grad_norm": 5818.056640625, + "learning_rate": 1.8044164037854888e-07, + "loss": 134.0, + "step": 286 + }, + { + "epoch": 0.002716748232220445, + "grad_norm": 1151.5064697265625, + "learning_rate": 1.810725552050473e-07, + "loss": 93.125, + "step": 287 + }, + { + "epoch": 0.0027262142539354987, + "grad_norm": 3.9509263038635254, + "learning_rate": 1.8170347003154574e-07, + "loss": 0.9551, + "step": 288 + }, + { + "epoch": 0.002735680275650552, + "grad_norm": 4691.18017578125, + "learning_rate": 1.8233438485804415e-07, + "loss": 109.5, + "step": 289 + }, + { + "epoch": 0.002745146297365606, + "grad_norm": 1262.510498046875, + "learning_rate": 1.829652996845426e-07, + "loss": 128.1875, + "step": 290 + }, + { + "epoch": 0.00275461231908066, + "grad_norm": 2.6075966358184814, + "learning_rate": 1.83596214511041e-07, + "loss": 0.7854, + "step": 291 + }, + { + "epoch": 0.002764078340795714, + "grad_norm": 1320.4666748046875, + "learning_rate": 1.8422712933753942e-07, + "loss": 103.875, + "step": 292 + }, + { + "epoch": 0.0027735443625107677, + "grad_norm": 1305.63427734375, + "learning_rate": 1.8485804416403786e-07, + "loss": 168.2812, + "step": 293 + }, + { + "epoch": 0.0027830103842258216, + "grad_norm": 999.3013916015625, + "learning_rate": 1.8548895899053627e-07, + "loss": 117.8125, + "step": 294 + }, + { + "epoch": 0.0027924764059408754, + "grad_norm": 1395.64453125, + "learning_rate": 1.861198738170347e-07, + "loss": 155.3125, + "step": 295 + }, + { + "epoch": 0.002801942427655929, + "grad_norm": 1065.287841796875, + "learning_rate": 1.867507886435331e-07, + "loss": 107.25, + "step": 296 + }, + { + "epoch": 0.0028114084493709827, + "grad_norm": 1983.1998291015625, + "learning_rate": 1.8738170347003154e-07, + "loss": 195.75, + "step": 297 + }, + { + "epoch": 0.0028208744710860366, + "grad_norm": 1196.0794677734375, + "learning_rate": 1.8801261829652995e-07, + "loss": 100.5, + "step": 298 + }, + { + "epoch": 0.0028303404928010905, + "grad_norm": 2351.440185546875, + "learning_rate": 1.886435331230284e-07, + "loss": 158.75, + "step": 299 + }, + { + "epoch": 0.0028398065145161444, + "grad_norm": 2.9501140117645264, + "learning_rate": 1.892744479495268e-07, + "loss": 0.7939, + "step": 300 + }, + { + "epoch": 0.0028492725362311982, + "grad_norm": 1298.6029052734375, + "learning_rate": 1.8990536277602522e-07, + "loss": 184.5, + "step": 301 + }, + { + "epoch": 0.002858738557946252, + "grad_norm": 1429.816162109375, + "learning_rate": 1.9053627760252363e-07, + "loss": 167.875, + "step": 302 + }, + { + "epoch": 0.0028682045796613056, + "grad_norm": 1341.813232421875, + "learning_rate": 1.9116719242902207e-07, + "loss": 170.9375, + "step": 303 + }, + { + "epoch": 0.0028776706013763594, + "grad_norm": 1841.7606201171875, + "learning_rate": 1.917981072555205e-07, + "loss": 104.75, + "step": 304 + }, + { + "epoch": 0.0028871366230914133, + "grad_norm": 5674.31103515625, + "learning_rate": 1.9242902208201892e-07, + "loss": 146.875, + "step": 305 + }, + { + "epoch": 0.002896602644806467, + "grad_norm": 1118.888916015625, + "learning_rate": 1.9305993690851736e-07, + "loss": 168.9375, + "step": 306 + }, + { + "epoch": 0.002906068666521521, + "grad_norm": 2.272533416748047, + "learning_rate": 1.9369085173501575e-07, + "loss": 0.7769, + "step": 307 + }, + { + "epoch": 0.002915534688236575, + "grad_norm": 1808.4525146484375, + "learning_rate": 1.943217665615142e-07, + "loss": 134.875, + "step": 308 + }, + { + "epoch": 0.002925000709951629, + "grad_norm": 1195.366943359375, + "learning_rate": 1.949526813880126e-07, + "loss": 105.5, + "step": 309 + }, + { + "epoch": 0.0029344667316666823, + "grad_norm": 960.0848388671875, + "learning_rate": 1.9558359621451104e-07, + "loss": 99.5, + "step": 310 + }, + { + "epoch": 0.002943932753381736, + "grad_norm": 1156.8896484375, + "learning_rate": 1.9621451104100946e-07, + "loss": 94.75, + "step": 311 + }, + { + "epoch": 0.00295339877509679, + "grad_norm": 1298.240234375, + "learning_rate": 1.9684542586750787e-07, + "loss": 105.0, + "step": 312 + }, + { + "epoch": 0.002962864796811844, + "grad_norm": 1073.2266845703125, + "learning_rate": 1.9747634069400628e-07, + "loss": 185.5625, + "step": 313 + }, + { + "epoch": 0.0029723308185268978, + "grad_norm": 1210.400146484375, + "learning_rate": 1.9810725552050472e-07, + "loss": 107.0625, + "step": 314 + }, + { + "epoch": 0.0029817968402419516, + "grad_norm": 1416.227783203125, + "learning_rate": 1.9873817034700316e-07, + "loss": 156.25, + "step": 315 + }, + { + "epoch": 0.0029912628619570055, + "grad_norm": 3768.859130859375, + "learning_rate": 1.9936908517350158e-07, + "loss": 166.875, + "step": 316 + }, + { + "epoch": 0.003000728883672059, + "grad_norm": 1410.2275390625, + "learning_rate": 2e-07, + "loss": 92.0625, + "step": 317 + }, + { + "epoch": 0.003010194905387113, + "grad_norm": 1235.7955322265625, + "learning_rate": 2.006309148264984e-07, + "loss": 85.25, + "step": 318 + }, + { + "epoch": 0.0030196609271021667, + "grad_norm": 1803.3367919921875, + "learning_rate": 2.0126182965299684e-07, + "loss": 89.5, + "step": 319 + }, + { + "epoch": 0.0030291269488172206, + "grad_norm": 1493.832275390625, + "learning_rate": 2.0189274447949526e-07, + "loss": 181.3125, + "step": 320 + }, + { + "epoch": 0.0030385929705322745, + "grad_norm": 2.9281601905822754, + "learning_rate": 2.025236593059937e-07, + "loss": 1.0322, + "step": 321 + }, + { + "epoch": 0.0030480589922473283, + "grad_norm": 1227.860595703125, + "learning_rate": 2.0315457413249208e-07, + "loss": 165.625, + "step": 322 + }, + { + "epoch": 0.003057525013962382, + "grad_norm": 1107.1368408203125, + "learning_rate": 2.0378548895899052e-07, + "loss": 83.3125, + "step": 323 + }, + { + "epoch": 0.0030669910356774356, + "grad_norm": 3.111886978149414, + "learning_rate": 2.0441640378548894e-07, + "loss": 0.7703, + "step": 324 + }, + { + "epoch": 0.0030764570573924895, + "grad_norm": 1125.5733642578125, + "learning_rate": 2.0504731861198738e-07, + "loss": 94.625, + "step": 325 + }, + { + "epoch": 0.0030859230791075434, + "grad_norm": 1059.1480712890625, + "learning_rate": 2.056782334384858e-07, + "loss": 106.375, + "step": 326 + }, + { + "epoch": 0.0030953891008225973, + "grad_norm": 898.36376953125, + "learning_rate": 2.063091482649842e-07, + "loss": 93.25, + "step": 327 + }, + { + "epoch": 0.003104855122537651, + "grad_norm": 1085.2352294921875, + "learning_rate": 2.0694006309148264e-07, + "loss": 86.25, + "step": 328 + }, + { + "epoch": 0.003114321144252705, + "grad_norm": 1363.849853515625, + "learning_rate": 2.0757097791798106e-07, + "loss": 141.375, + "step": 329 + }, + { + "epoch": 0.003123787165967759, + "grad_norm": 1281.955078125, + "learning_rate": 2.082018927444795e-07, + "loss": 87.125, + "step": 330 + }, + { + "epoch": 0.0031332531876828123, + "grad_norm": 3.2870306968688965, + "learning_rate": 2.088328075709779e-07, + "loss": 1.0161, + "step": 331 + }, + { + "epoch": 0.003142719209397866, + "grad_norm": 1306.583984375, + "learning_rate": 2.0946372239747635e-07, + "loss": 195.875, + "step": 332 + }, + { + "epoch": 0.00315218523111292, + "grad_norm": 1065.841552734375, + "learning_rate": 2.1009463722397474e-07, + "loss": 160.375, + "step": 333 + }, + { + "epoch": 0.003161651252827974, + "grad_norm": 959.3275756835938, + "learning_rate": 2.1072555205047318e-07, + "loss": 95.125, + "step": 334 + }, + { + "epoch": 0.003171117274543028, + "grad_norm": 1120.622802734375, + "learning_rate": 2.113564668769716e-07, + "loss": 106.1875, + "step": 335 + }, + { + "epoch": 0.0031805832962580817, + "grad_norm": 1932.004150390625, + "learning_rate": 2.1198738170347003e-07, + "loss": 132.5, + "step": 336 + }, + { + "epoch": 0.0031900493179731356, + "grad_norm": 1096.031494140625, + "learning_rate": 2.1261829652996844e-07, + "loss": 104.625, + "step": 337 + }, + { + "epoch": 0.003199515339688189, + "grad_norm": 1013.9035034179688, + "learning_rate": 2.1324921135646686e-07, + "loss": 134.625, + "step": 338 + }, + { + "epoch": 0.003208981361403243, + "grad_norm": 1022.3290405273438, + "learning_rate": 2.138801261829653e-07, + "loss": 178.5, + "step": 339 + }, + { + "epoch": 0.003218447383118297, + "grad_norm": 1113.2945556640625, + "learning_rate": 2.145110410094637e-07, + "loss": 108.25, + "step": 340 + }, + { + "epoch": 0.0032279134048333507, + "grad_norm": 959.1475219726562, + "learning_rate": 2.1514195583596215e-07, + "loss": 85.125, + "step": 341 + }, + { + "epoch": 0.0032373794265484045, + "grad_norm": 1559.513916015625, + "learning_rate": 2.1577287066246056e-07, + "loss": 140.9375, + "step": 342 + }, + { + "epoch": 0.0032468454482634584, + "grad_norm": 1166.2724609375, + "learning_rate": 2.1640378548895898e-07, + "loss": 108.25, + "step": 343 + }, + { + "epoch": 0.0032563114699785123, + "grad_norm": 912.3612670898438, + "learning_rate": 2.170347003154574e-07, + "loss": 87.125, + "step": 344 + }, + { + "epoch": 0.003265777491693566, + "grad_norm": 3.2482919692993164, + "learning_rate": 2.1766561514195583e-07, + "loss": 0.8613, + "step": 345 + }, + { + "epoch": 0.0032752435134086196, + "grad_norm": 765.2449951171875, + "learning_rate": 2.1829652996845424e-07, + "loss": 87.1875, + "step": 346 + }, + { + "epoch": 0.0032847095351236735, + "grad_norm": 1313.075439453125, + "learning_rate": 2.1892744479495268e-07, + "loss": 145.625, + "step": 347 + }, + { + "epoch": 0.0032941755568387274, + "grad_norm": 1417.8218994140625, + "learning_rate": 2.1955835962145107e-07, + "loss": 181.3125, + "step": 348 + }, + { + "epoch": 0.0033036415785537812, + "grad_norm": 1712.029541015625, + "learning_rate": 2.201892744479495e-07, + "loss": 197.5, + "step": 349 + }, + { + "epoch": 0.003313107600268835, + "grad_norm": 1993.977783203125, + "learning_rate": 2.2082018927444792e-07, + "loss": 166.75, + "step": 350 + }, + { + "epoch": 0.003322573621983889, + "grad_norm": 1887.4539794921875, + "learning_rate": 2.2145110410094636e-07, + "loss": 156.8125, + "step": 351 + }, + { + "epoch": 0.003332039643698943, + "grad_norm": 988.2648315429688, + "learning_rate": 2.220820189274448e-07, + "loss": 145.625, + "step": 352 + }, + { + "epoch": 0.0033415056654139963, + "grad_norm": 1204.1593017578125, + "learning_rate": 2.2271293375394322e-07, + "loss": 88.4062, + "step": 353 + }, + { + "epoch": 0.00335097168712905, + "grad_norm": 1393.5394287109375, + "learning_rate": 2.2334384858044163e-07, + "loss": 126.5, + "step": 354 + }, + { + "epoch": 0.003360437708844104, + "grad_norm": 1125.91259765625, + "learning_rate": 2.2397476340694004e-07, + "loss": 145.75, + "step": 355 + }, + { + "epoch": 0.003369903730559158, + "grad_norm": 1464.4053955078125, + "learning_rate": 2.2460567823343848e-07, + "loss": 85.0625, + "step": 356 + }, + { + "epoch": 0.003379369752274212, + "grad_norm": 1030.506103515625, + "learning_rate": 2.252365930599369e-07, + "loss": 95.4375, + "step": 357 + }, + { + "epoch": 0.0033888357739892657, + "grad_norm": 1144.7415771484375, + "learning_rate": 2.2586750788643534e-07, + "loss": 100.9375, + "step": 358 + }, + { + "epoch": 0.0033983017957043195, + "grad_norm": 961.9559326171875, + "learning_rate": 2.2649842271293372e-07, + "loss": 78.5625, + "step": 359 + }, + { + "epoch": 0.003407767817419373, + "grad_norm": 1093.2755126953125, + "learning_rate": 2.2712933753943216e-07, + "loss": 143.625, + "step": 360 + }, + { + "epoch": 0.003417233839134427, + "grad_norm": 1560.0389404296875, + "learning_rate": 2.2776025236593058e-07, + "loss": 172.375, + "step": 361 + }, + { + "epoch": 0.0034266998608494807, + "grad_norm": 985.8718872070312, + "learning_rate": 2.2839116719242902e-07, + "loss": 71.4688, + "step": 362 + }, + { + "epoch": 0.0034361658825645346, + "grad_norm": 2063.0419921875, + "learning_rate": 2.2902208201892746e-07, + "loss": 191.375, + "step": 363 + }, + { + "epoch": 0.0034456319042795885, + "grad_norm": 1101.048583984375, + "learning_rate": 2.2965299684542585e-07, + "loss": 98.4375, + "step": 364 + }, + { + "epoch": 0.0034550979259946424, + "grad_norm": 1168.1688232421875, + "learning_rate": 2.3028391167192428e-07, + "loss": 94.3438, + "step": 365 + }, + { + "epoch": 0.0034645639477096962, + "grad_norm": 1210.393798828125, + "learning_rate": 2.309148264984227e-07, + "loss": 163.75, + "step": 366 + }, + { + "epoch": 0.0034740299694247497, + "grad_norm": 1479.927734375, + "learning_rate": 2.3154574132492114e-07, + "loss": 187.9375, + "step": 367 + }, + { + "epoch": 0.0034834959911398036, + "grad_norm": 1318.9559326171875, + "learning_rate": 2.3217665615141955e-07, + "loss": 157.3438, + "step": 368 + }, + { + "epoch": 0.0034929620128548574, + "grad_norm": 1536.8331298828125, + "learning_rate": 2.3280757097791797e-07, + "loss": 113.3438, + "step": 369 + }, + { + "epoch": 0.0035024280345699113, + "grad_norm": 1066.8603515625, + "learning_rate": 2.3343848580441638e-07, + "loss": 142.8438, + "step": 370 + }, + { + "epoch": 0.003511894056284965, + "grad_norm": 752.5729370117188, + "learning_rate": 2.3406940063091482e-07, + "loss": 94.1875, + "step": 371 + }, + { + "epoch": 0.003521360078000019, + "grad_norm": 1427.775634765625, + "learning_rate": 2.3470031545741323e-07, + "loss": 92.4375, + "step": 372 + }, + { + "epoch": 0.003530826099715073, + "grad_norm": 896.3651123046875, + "learning_rate": 2.3533123028391167e-07, + "loss": 120.6875, + "step": 373 + }, + { + "epoch": 0.0035402921214301264, + "grad_norm": 886.4716796875, + "learning_rate": 2.3596214511041006e-07, + "loss": 84.0625, + "step": 374 + }, + { + "epoch": 0.0035497581431451803, + "grad_norm": 1201.2655029296875, + "learning_rate": 2.365930599369085e-07, + "loss": 169.0625, + "step": 375 + }, + { + "epoch": 0.003559224164860234, + "grad_norm": 1064.5067138671875, + "learning_rate": 2.3722397476340694e-07, + "loss": 88.375, + "step": 376 + }, + { + "epoch": 0.003568690186575288, + "grad_norm": 829.5634155273438, + "learning_rate": 2.3785488958990535e-07, + "loss": 133.875, + "step": 377 + }, + { + "epoch": 0.003578156208290342, + "grad_norm": 1119.84765625, + "learning_rate": 2.3848580441640377e-07, + "loss": 65.9375, + "step": 378 + }, + { + "epoch": 0.0035876222300053958, + "grad_norm": 1257.77099609375, + "learning_rate": 2.391167192429022e-07, + "loss": 73.375, + "step": 379 + }, + { + "epoch": 0.0035970882517204496, + "grad_norm": 960.9338989257812, + "learning_rate": 2.3974763406940064e-07, + "loss": 77.0625, + "step": 380 + }, + { + "epoch": 0.003606554273435503, + "grad_norm": 1820.6767578125, + "learning_rate": 2.4037854889589903e-07, + "loss": 173.375, + "step": 381 + }, + { + "epoch": 0.003616020295150557, + "grad_norm": 1180.593017578125, + "learning_rate": 2.4100946372239747e-07, + "loss": 85.5938, + "step": 382 + }, + { + "epoch": 0.003625486316865611, + "grad_norm": 784.6696166992188, + "learning_rate": 2.4164037854889586e-07, + "loss": 68.0312, + "step": 383 + }, + { + "epoch": 0.0036349523385806647, + "grad_norm": 1201.55810546875, + "learning_rate": 2.422712933753943e-07, + "loss": 77.6562, + "step": 384 + }, + { + "epoch": 0.0036444183602957186, + "grad_norm": 1909.580810546875, + "learning_rate": 2.4290220820189274e-07, + "loss": 142.375, + "step": 385 + }, + { + "epoch": 0.0036538843820107724, + "grad_norm": 1049.998779296875, + "learning_rate": 2.435331230283912e-07, + "loss": 154.3125, + "step": 386 + }, + { + "epoch": 0.0036633504037258263, + "grad_norm": 1079.589111328125, + "learning_rate": 2.4416403785488957e-07, + "loss": 87.0625, + "step": 387 + }, + { + "epoch": 0.0036728164254408798, + "grad_norm": 1307.552490234375, + "learning_rate": 2.44794952681388e-07, + "loss": 150.0625, + "step": 388 + }, + { + "epoch": 0.0036822824471559336, + "grad_norm": 41300.85546875, + "learning_rate": 2.4542586750788645e-07, + "loss": 349.0, + "step": 389 + }, + { + "epoch": 0.0036917484688709875, + "grad_norm": 1458.7481689453125, + "learning_rate": 2.4605678233438483e-07, + "loss": 115.125, + "step": 390 + }, + { + "epoch": 0.0037012144905860414, + "grad_norm": 1394.8209228515625, + "learning_rate": 2.4668769716088327e-07, + "loss": 121.0938, + "step": 391 + }, + { + "epoch": 0.0037106805123010953, + "grad_norm": 3100.234619140625, + "learning_rate": 2.473186119873817e-07, + "loss": 96.8438, + "step": 392 + }, + { + "epoch": 0.003720146534016149, + "grad_norm": 1114.331298828125, + "learning_rate": 2.479495268138801e-07, + "loss": 81.0625, + "step": 393 + }, + { + "epoch": 0.003729612555731203, + "grad_norm": 1301.73486328125, + "learning_rate": 2.4858044164037854e-07, + "loss": 80.4062, + "step": 394 + }, + { + "epoch": 0.0037390785774462565, + "grad_norm": 1028.352783203125, + "learning_rate": 2.49211356466877e-07, + "loss": 136.875, + "step": 395 + }, + { + "epoch": 0.0037485445991613103, + "grad_norm": 1098.214111328125, + "learning_rate": 2.4984227129337537e-07, + "loss": 65.75, + "step": 396 + }, + { + "epoch": 0.003758010620876364, + "grad_norm": 885.849853515625, + "learning_rate": 2.504731861198738e-07, + "loss": 76.0, + "step": 397 + }, + { + "epoch": 0.003767476642591418, + "grad_norm": 1819.6568603515625, + "learning_rate": 2.5110410094637225e-07, + "loss": 79.625, + "step": 398 + }, + { + "epoch": 0.003776942664306472, + "grad_norm": 1181.97705078125, + "learning_rate": 2.5173501577287063e-07, + "loss": 84.0625, + "step": 399 + }, + { + "epoch": 0.003786408686021526, + "grad_norm": 1351.29443359375, + "learning_rate": 2.5236593059936907e-07, + "loss": 136.3438, + "step": 400 + }, + { + "epoch": 0.0037958747077365797, + "grad_norm": 896.8280639648438, + "learning_rate": 2.529968454258675e-07, + "loss": 74.5625, + "step": 401 + }, + { + "epoch": 0.003805340729451633, + "grad_norm": 904.6307373046875, + "learning_rate": 2.5362776025236595e-07, + "loss": 157.5625, + "step": 402 + }, + { + "epoch": 0.003814806751166687, + "grad_norm": 887.6670532226562, + "learning_rate": 2.5425867507886434e-07, + "loss": 89.9062, + "step": 403 + }, + { + "epoch": 0.003824272772881741, + "grad_norm": 1105.4937744140625, + "learning_rate": 2.548895899053627e-07, + "loss": 128.8125, + "step": 404 + }, + { + "epoch": 0.0038337387945967948, + "grad_norm": 1030.996337890625, + "learning_rate": 2.555205047318612e-07, + "loss": 80.875, + "step": 405 + }, + { + "epoch": 0.0038432048163118487, + "grad_norm": 1993.22705078125, + "learning_rate": 2.561514195583596e-07, + "loss": 154.125, + "step": 406 + }, + { + "epoch": 0.0038526708380269025, + "grad_norm": 930.708984375, + "learning_rate": 2.5678233438485805e-07, + "loss": 99.1562, + "step": 407 + }, + { + "epoch": 0.0038621368597419564, + "grad_norm": 1368.19482421875, + "learning_rate": 2.5741324921135643e-07, + "loss": 153.3125, + "step": 408 + }, + { + "epoch": 0.0038716028814570103, + "grad_norm": 2.4488751888275146, + "learning_rate": 2.5804416403785487e-07, + "loss": 0.7373, + "step": 409 + }, + { + "epoch": 0.0038810689031720637, + "grad_norm": 792.1890869140625, + "learning_rate": 2.586750788643533e-07, + "loss": 64.4688, + "step": 410 + }, + { + "epoch": 0.0038905349248871176, + "grad_norm": 784.4141235351562, + "learning_rate": 2.593059936908517e-07, + "loss": 68.6875, + "step": 411 + }, + { + "epoch": 0.0039000009466021715, + "grad_norm": 661.890869140625, + "learning_rate": 2.599369085173502e-07, + "loss": 68.0625, + "step": 412 + }, + { + "epoch": 0.003909466968317225, + "grad_norm": 865.8956909179688, + "learning_rate": 2.605678233438486e-07, + "loss": 131.3125, + "step": 413 + }, + { + "epoch": 0.003918932990032279, + "grad_norm": 2314.0205078125, + "learning_rate": 2.6119873817034697e-07, + "loss": 127.5312, + "step": 414 + }, + { + "epoch": 0.003928399011747333, + "grad_norm": 920.534912109375, + "learning_rate": 2.618296529968454e-07, + "loss": 151.0625, + "step": 415 + }, + { + "epoch": 0.003937865033462387, + "grad_norm": 1379.917724609375, + "learning_rate": 2.6246056782334385e-07, + "loss": 139.0938, + "step": 416 + }, + { + "epoch": 0.003947331055177441, + "grad_norm": 2.8442399501800537, + "learning_rate": 2.630914826498423e-07, + "loss": 0.8975, + "step": 417 + }, + { + "epoch": 0.003956797076892495, + "grad_norm": 695.2325439453125, + "learning_rate": 2.6372239747634067e-07, + "loss": 65.1562, + "step": 418 + }, + { + "epoch": 0.003966263098607549, + "grad_norm": 967.9423828125, + "learning_rate": 2.6435331230283906e-07, + "loss": 65.375, + "step": 419 + }, + { + "epoch": 0.003975729120322602, + "grad_norm": 859.5576782226562, + "learning_rate": 2.6498422712933755e-07, + "loss": 78.8125, + "step": 420 + }, + { + "epoch": 0.0039851951420376555, + "grad_norm": 3745.72705078125, + "learning_rate": 2.6561514195583594e-07, + "loss": 137.5312, + "step": 421 + }, + { + "epoch": 0.003994661163752709, + "grad_norm": 883.601318359375, + "learning_rate": 2.662460567823344e-07, + "loss": 63.5, + "step": 422 + }, + { + "epoch": 0.004004127185467763, + "grad_norm": 844.1168823242188, + "learning_rate": 2.6687697160883277e-07, + "loss": 80.875, + "step": 423 + }, + { + "epoch": 0.004013593207182817, + "grad_norm": 1464.6785888671875, + "learning_rate": 2.675078864353312e-07, + "loss": 164.1875, + "step": 424 + }, + { + "epoch": 0.004023059228897871, + "grad_norm": 1181.98779296875, + "learning_rate": 2.6813880126182965e-07, + "loss": 121.0312, + "step": 425 + }, + { + "epoch": 0.004032525250612925, + "grad_norm": 1021.4419555664062, + "learning_rate": 2.6876971608832803e-07, + "loss": 97.6875, + "step": 426 + }, + { + "epoch": 0.004041991272327979, + "grad_norm": 1946.1812744140625, + "learning_rate": 2.694006309148265e-07, + "loss": 76.1562, + "step": 427 + }, + { + "epoch": 0.004051457294043033, + "grad_norm": 3.32100510597229, + "learning_rate": 2.700315457413249e-07, + "loss": 1.0259, + "step": 428 + }, + { + "epoch": 0.0040609233157580865, + "grad_norm": 1090.2227783203125, + "learning_rate": 2.706624605678233e-07, + "loss": 98.6875, + "step": 429 + }, + { + "epoch": 0.00407038933747314, + "grad_norm": 989.4611206054688, + "learning_rate": 2.7129337539432174e-07, + "loss": 72.7188, + "step": 430 + }, + { + "epoch": 0.004079855359188194, + "grad_norm": 812.07373046875, + "learning_rate": 2.719242902208202e-07, + "loss": 66.7812, + "step": 431 + }, + { + "epoch": 0.004089321380903248, + "grad_norm": 767.497802734375, + "learning_rate": 2.725552050473186e-07, + "loss": 75.8125, + "step": 432 + }, + { + "epoch": 0.004098787402618302, + "grad_norm": 824.1475219726562, + "learning_rate": 2.73186119873817e-07, + "loss": 66.7812, + "step": 433 + }, + { + "epoch": 0.004108253424333355, + "grad_norm": 913.0955810546875, + "learning_rate": 2.738170347003154e-07, + "loss": 104.0625, + "step": 434 + }, + { + "epoch": 0.004117719446048409, + "grad_norm": 891.0907592773438, + "learning_rate": 2.744479495268139e-07, + "loss": 80.5625, + "step": 435 + }, + { + "epoch": 0.004127185467763463, + "grad_norm": 908.581298828125, + "learning_rate": 2.7507886435331227e-07, + "loss": 71.375, + "step": 436 + }, + { + "epoch": 0.004136651489478517, + "grad_norm": 1147.619384765625, + "learning_rate": 2.757097791798107e-07, + "loss": 93.6875, + "step": 437 + }, + { + "epoch": 0.0041461175111935705, + "grad_norm": 3.2529542446136475, + "learning_rate": 2.7634069400630915e-07, + "loss": 0.8408, + "step": 438 + }, + { + "epoch": 0.004155583532908624, + "grad_norm": 695.8268432617188, + "learning_rate": 2.769716088328076e-07, + "loss": 61.1562, + "step": 439 + }, + { + "epoch": 0.004165049554623678, + "grad_norm": 918.6434936523438, + "learning_rate": 2.77602523659306e-07, + "loss": 85.7188, + "step": 440 + }, + { + "epoch": 0.004174515576338732, + "grad_norm": 969.8341674804688, + "learning_rate": 2.7823343848580437e-07, + "loss": 79.3125, + "step": 441 + }, + { + "epoch": 0.004183981598053786, + "grad_norm": 951.085693359375, + "learning_rate": 2.7886435331230286e-07, + "loss": 71.7188, + "step": 442 + }, + { + "epoch": 0.00419344761976884, + "grad_norm": 1156.0338134765625, + "learning_rate": 2.7949526813880125e-07, + "loss": 191.125, + "step": 443 + }, + { + "epoch": 0.004202913641483894, + "grad_norm": 790.0103759765625, + "learning_rate": 2.801261829652997e-07, + "loss": 131.0625, + "step": 444 + }, + { + "epoch": 0.004212379663198948, + "grad_norm": 931.4601440429688, + "learning_rate": 2.8075709779179807e-07, + "loss": 134.7812, + "step": 445 + }, + { + "epoch": 0.0042218456849140015, + "grad_norm": 738.5828857421875, + "learning_rate": 2.813880126182965e-07, + "loss": 108.1562, + "step": 446 + }, + { + "epoch": 0.004231311706629055, + "grad_norm": 783.3395385742188, + "learning_rate": 2.8201892744479495e-07, + "loss": 124.9375, + "step": 447 + }, + { + "epoch": 0.004240777728344109, + "grad_norm": 784.3565063476562, + "learning_rate": 2.8264984227129334e-07, + "loss": 81.7812, + "step": 448 + }, + { + "epoch": 0.004250243750059162, + "grad_norm": 3.651919364929199, + "learning_rate": 2.8328075709779183e-07, + "loss": 0.8901, + "step": 449 + }, + { + "epoch": 0.004259709771774216, + "grad_norm": 2071.64306640625, + "learning_rate": 2.839116719242902e-07, + "loss": 204.0, + "step": 450 + }, + { + "epoch": 0.00426917579348927, + "grad_norm": 1376.1370849609375, + "learning_rate": 2.845425867507886e-07, + "loss": 77.5938, + "step": 451 + }, + { + "epoch": 0.004278641815204324, + "grad_norm": 872.4217529296875, + "learning_rate": 2.8517350157728705e-07, + "loss": 64.5312, + "step": 452 + }, + { + "epoch": 0.004288107836919378, + "grad_norm": 928.6163330078125, + "learning_rate": 2.858044164037855e-07, + "loss": 77.7812, + "step": 453 + }, + { + "epoch": 0.004297573858634432, + "grad_norm": 872.2057495117188, + "learning_rate": 2.864353312302839e-07, + "loss": 67.3125, + "step": 454 + }, + { + "epoch": 0.0043070398803494855, + "grad_norm": 1324.0980224609375, + "learning_rate": 2.870662460567823e-07, + "loss": 70.0938, + "step": 455 + }, + { + "epoch": 0.004316505902064539, + "grad_norm": 767.0242309570312, + "learning_rate": 2.876971608832807e-07, + "loss": 68.1875, + "step": 456 + }, + { + "epoch": 0.004325971923779593, + "grad_norm": 1290.716552734375, + "learning_rate": 2.883280757097792e-07, + "loss": 91.3125, + "step": 457 + }, + { + "epoch": 0.004335437945494647, + "grad_norm": 3.6982409954071045, + "learning_rate": 2.889589905362776e-07, + "loss": 0.8906, + "step": 458 + }, + { + "epoch": 0.004344903967209701, + "grad_norm": 687.7546997070312, + "learning_rate": 2.89589905362776e-07, + "loss": 66.5312, + "step": 459 + }, + { + "epoch": 0.004354369988924755, + "grad_norm": 726.9271240234375, + "learning_rate": 2.9022082018927446e-07, + "loss": 68.8438, + "step": 460 + }, + { + "epoch": 0.004363836010639809, + "grad_norm": 901.81884765625, + "learning_rate": 2.9085173501577285e-07, + "loss": 82.4062, + "step": 461 + }, + { + "epoch": 0.004373302032354863, + "grad_norm": 1623.676513671875, + "learning_rate": 2.914826498422713e-07, + "loss": 154.5625, + "step": 462 + }, + { + "epoch": 0.004382768054069916, + "grad_norm": 803.6258544921875, + "learning_rate": 2.9211356466876967e-07, + "loss": 117.0625, + "step": 463 + }, + { + "epoch": 0.0043922340757849695, + "grad_norm": 891.427734375, + "learning_rate": 2.9274447949526817e-07, + "loss": 65.1562, + "step": 464 + }, + { + "epoch": 0.004401700097500023, + "grad_norm": 1312.71337890625, + "learning_rate": 2.9337539432176655e-07, + "loss": 153.2812, + "step": 465 + }, + { + "epoch": 0.004411166119215077, + "grad_norm": 2.616243362426758, + "learning_rate": 2.9400630914826494e-07, + "loss": 0.8545, + "step": 466 + }, + { + "epoch": 0.004420632140930131, + "grad_norm": 847.6250610351562, + "learning_rate": 2.946372239747634e-07, + "loss": 69.1875, + "step": 467 + }, + { + "epoch": 0.004430098162645185, + "grad_norm": 725.62060546875, + "learning_rate": 2.952681388012618e-07, + "loss": 64.375, + "step": 468 + }, + { + "epoch": 0.004439564184360239, + "grad_norm": 635.97900390625, + "learning_rate": 2.9589905362776026e-07, + "loss": 114.9375, + "step": 469 + }, + { + "epoch": 0.004449030206075293, + "grad_norm": 948.571044921875, + "learning_rate": 2.9652996845425865e-07, + "loss": 122.625, + "step": 470 + }, + { + "epoch": 0.004458496227790347, + "grad_norm": 738.7330322265625, + "learning_rate": 2.9716088328075703e-07, + "loss": 68.7188, + "step": 471 + }, + { + "epoch": 0.0044679622495054005, + "grad_norm": 2.6619417667388916, + "learning_rate": 2.977917981072555e-07, + "loss": 0.8384, + "step": 472 + }, + { + "epoch": 0.004477428271220454, + "grad_norm": 1052.533935546875, + "learning_rate": 2.984227129337539e-07, + "loss": 62.8438, + "step": 473 + }, + { + "epoch": 0.004486894292935508, + "grad_norm": 809.7598876953125, + "learning_rate": 2.9905362776025235e-07, + "loss": 71.25, + "step": 474 + }, + { + "epoch": 0.004496360314650562, + "grad_norm": 786.0537719726562, + "learning_rate": 2.996845425867508e-07, + "loss": 70.0, + "step": 475 + }, + { + "epoch": 0.004505826336365616, + "grad_norm": 996.31201171875, + "learning_rate": 3.0031545741324923e-07, + "loss": 124.6562, + "step": 476 + }, + { + "epoch": 0.004515292358080669, + "grad_norm": 888.217529296875, + "learning_rate": 3.009463722397476e-07, + "loss": 142.0625, + "step": 477 + }, + { + "epoch": 0.004524758379795723, + "grad_norm": 3515.321533203125, + "learning_rate": 3.01577287066246e-07, + "loss": 64.1562, + "step": 478 + }, + { + "epoch": 0.004534224401510777, + "grad_norm": 970.2354125976562, + "learning_rate": 3.022082018927445e-07, + "loss": 151.875, + "step": 479 + }, + { + "epoch": 0.004543690423225831, + "grad_norm": 841.5230712890625, + "learning_rate": 3.028391167192429e-07, + "loss": 133.375, + "step": 480 + }, + { + "epoch": 0.0045531564449408845, + "grad_norm": 822.075439453125, + "learning_rate": 3.034700315457413e-07, + "loss": 60.8125, + "step": 481 + }, + { + "epoch": 0.004562622466655938, + "grad_norm": 861.5018920898438, + "learning_rate": 3.041009463722397e-07, + "loss": 71.9688, + "step": 482 + }, + { + "epoch": 0.004572088488370992, + "grad_norm": 666.4907836914062, + "learning_rate": 3.0473186119873815e-07, + "loss": 66.9375, + "step": 483 + }, + { + "epoch": 0.004581554510086046, + "grad_norm": 585.662353515625, + "learning_rate": 3.053627760252366e-07, + "loss": 53.9375, + "step": 484 + }, + { + "epoch": 0.0045910205318011, + "grad_norm": 1327.086181640625, + "learning_rate": 3.05993690851735e-07, + "loss": 142.125, + "step": 485 + }, + { + "epoch": 0.004600486553516154, + "grad_norm": 3.1511454582214355, + "learning_rate": 3.0662460567823347e-07, + "loss": 1.0029, + "step": 486 + }, + { + "epoch": 0.004609952575231208, + "grad_norm": 553.4071044921875, + "learning_rate": 3.0725552050473186e-07, + "loss": 65.0938, + "step": 487 + }, + { + "epoch": 0.004619418596946262, + "grad_norm": 767.5245971679688, + "learning_rate": 3.0788643533123025e-07, + "loss": 63.9062, + "step": 488 + }, + { + "epoch": 0.0046288846186613155, + "grad_norm": 902.817626953125, + "learning_rate": 3.085173501577287e-07, + "loss": 67.1562, + "step": 489 + }, + { + "epoch": 0.004638350640376369, + "grad_norm": 1416.4351806640625, + "learning_rate": 3.0914826498422713e-07, + "loss": 158.5625, + "step": 490 + }, + { + "epoch": 0.004647816662091422, + "grad_norm": 780.04833984375, + "learning_rate": 3.0977917981072557e-07, + "loss": 62.0312, + "step": 491 + }, + { + "epoch": 0.004657282683806476, + "grad_norm": 843.4635620117188, + "learning_rate": 3.1041009463722395e-07, + "loss": 71.8438, + "step": 492 + }, + { + "epoch": 0.00466674870552153, + "grad_norm": 651.6881713867188, + "learning_rate": 3.1104100946372234e-07, + "loss": 60.7812, + "step": 493 + }, + { + "epoch": 0.004676214727236584, + "grad_norm": 900.7103881835938, + "learning_rate": 3.1167192429022083e-07, + "loss": 91.2188, + "step": 494 + }, + { + "epoch": 0.004685680748951638, + "grad_norm": 1069.5946044921875, + "learning_rate": 3.123028391167192e-07, + "loss": 100.5, + "step": 495 + }, + { + "epoch": 0.004695146770666692, + "grad_norm": 536.8743286132812, + "learning_rate": 3.1293375394321766e-07, + "loss": 64.8438, + "step": 496 + }, + { + "epoch": 0.004704612792381746, + "grad_norm": 697.8759765625, + "learning_rate": 3.135646687697161e-07, + "loss": 100.9375, + "step": 497 + }, + { + "epoch": 0.0047140788140967995, + "grad_norm": 905.1168212890625, + "learning_rate": 3.141955835962145e-07, + "loss": 69.2188, + "step": 498 + }, + { + "epoch": 0.004723544835811853, + "grad_norm": 1300.9388427734375, + "learning_rate": 3.1482649842271293e-07, + "loss": 114.0312, + "step": 499 + }, + { + "epoch": 0.004733010857526907, + "grad_norm": 594.5653686523438, + "learning_rate": 3.154574132492113e-07, + "loss": 61.9375, + "step": 500 + }, + { + "epoch": 0.004742476879241961, + "grad_norm": 855.7710571289062, + "learning_rate": 3.160883280757098e-07, + "loss": 109.5, + "step": 501 + }, + { + "epoch": 0.004751942900957015, + "grad_norm": 551.5289916992188, + "learning_rate": 3.167192429022082e-07, + "loss": 66.875, + "step": 502 + }, + { + "epoch": 0.004761408922672069, + "grad_norm": 780.4547729492188, + "learning_rate": 3.173501577287066e-07, + "loss": 63.5625, + "step": 503 + }, + { + "epoch": 0.004770874944387123, + "grad_norm": 1032.2275390625, + "learning_rate": 3.17981072555205e-07, + "loss": 64.2188, + "step": 504 + }, + { + "epoch": 0.004780340966102177, + "grad_norm": 3.240508556365967, + "learning_rate": 3.1861198738170346e-07, + "loss": 0.9658, + "step": 505 + }, + { + "epoch": 0.00478980698781723, + "grad_norm": 768.9003295898438, + "learning_rate": 3.192429022082019e-07, + "loss": 161.75, + "step": 506 + }, + { + "epoch": 0.0047992730095322836, + "grad_norm": 944.4700317382812, + "learning_rate": 3.198738170347003e-07, + "loss": 81.7812, + "step": 507 + }, + { + "epoch": 0.004808739031247337, + "grad_norm": 2290.404052734375, + "learning_rate": 3.205047318611987e-07, + "loss": 141.0, + "step": 508 + }, + { + "epoch": 0.004818205052962391, + "grad_norm": 1137.8609619140625, + "learning_rate": 3.2113564668769717e-07, + "loss": 74.375, + "step": 509 + }, + { + "epoch": 0.004827671074677445, + "grad_norm": 887.4998168945312, + "learning_rate": 3.2176656151419555e-07, + "loss": 68.7812, + "step": 510 + }, + { + "epoch": 0.004837137096392499, + "grad_norm": 815.4695434570312, + "learning_rate": 3.22397476340694e-07, + "loss": 63.1562, + "step": 511 + }, + { + "epoch": 0.004846603118107553, + "grad_norm": 1618.4085693359375, + "learning_rate": 3.2302839116719243e-07, + "loss": 61.5312, + "step": 512 + }, + { + "epoch": 0.004856069139822607, + "grad_norm": 1177.1611328125, + "learning_rate": 3.236593059936908e-07, + "loss": 90.5, + "step": 513 + }, + { + "epoch": 0.004865535161537661, + "grad_norm": 922.9889526367188, + "learning_rate": 3.2429022082018926e-07, + "loss": 87.5, + "step": 514 + }, + { + "epoch": 0.0048750011832527146, + "grad_norm": 891.3350219726562, + "learning_rate": 3.2492113564668765e-07, + "loss": 130.2188, + "step": 515 + }, + { + "epoch": 0.004884467204967768, + "grad_norm": 1355.3280029296875, + "learning_rate": 3.2555205047318614e-07, + "loss": 154.4062, + "step": 516 + }, + { + "epoch": 0.004893933226682822, + "grad_norm": 2.927227258682251, + "learning_rate": 3.2618296529968453e-07, + "loss": 0.9053, + "step": 517 + }, + { + "epoch": 0.004903399248397876, + "grad_norm": 684.908935546875, + "learning_rate": 3.268138801261829e-07, + "loss": 61.7812, + "step": 518 + }, + { + "epoch": 0.00491286527011293, + "grad_norm": 1358.9091796875, + "learning_rate": 3.2744479495268135e-07, + "loss": 65.8125, + "step": 519 + }, + { + "epoch": 0.004922331291827983, + "grad_norm": 611.5530395507812, + "learning_rate": 3.280757097791798e-07, + "loss": 62.25, + "step": 520 + }, + { + "epoch": 0.004931797313543037, + "grad_norm": 1594.5277099609375, + "learning_rate": 3.2870662460567823e-07, + "loss": 107.0625, + "step": 521 + }, + { + "epoch": 0.004941263335258091, + "grad_norm": 751.2952880859375, + "learning_rate": 3.293375394321766e-07, + "loss": 71.4688, + "step": 522 + }, + { + "epoch": 0.004950729356973145, + "grad_norm": 901.8346557617188, + "learning_rate": 3.299684542586751e-07, + "loss": 132.0, + "step": 523 + }, + { + "epoch": 0.0049601953786881986, + "grad_norm": 977.2008666992188, + "learning_rate": 3.305993690851735e-07, + "loss": 115.625, + "step": 524 + }, + { + "epoch": 0.0049696614004032524, + "grad_norm": 1012.7542724609375, + "learning_rate": 3.312302839116719e-07, + "loss": 77.25, + "step": 525 + }, + { + "epoch": 0.004979127422118306, + "grad_norm": 996.6942138671875, + "learning_rate": 3.3186119873817033e-07, + "loss": 68.8438, + "step": 526 + }, + { + "epoch": 0.00498859344383336, + "grad_norm": 821.8289184570312, + "learning_rate": 3.3249211356466877e-07, + "loss": 66.3438, + "step": 527 + }, + { + "epoch": 0.004998059465548414, + "grad_norm": 787.1178588867188, + "learning_rate": 3.331230283911672e-07, + "loss": 68.4688, + "step": 528 + }, + { + "epoch": 0.005007525487263468, + "grad_norm": 955.9060668945312, + "learning_rate": 3.337539432176656e-07, + "loss": 70.9375, + "step": 529 + }, + { + "epoch": 0.005016991508978522, + "grad_norm": 2.673048257827759, + "learning_rate": 3.34384858044164e-07, + "loss": 0.8884, + "step": 530 + }, + { + "epoch": 0.005026457530693576, + "grad_norm": 782.0868530273438, + "learning_rate": 3.350157728706625e-07, + "loss": 113.4062, + "step": 531 + }, + { + "epoch": 0.0050359235524086296, + "grad_norm": 1127.822998046875, + "learning_rate": 3.3564668769716086e-07, + "loss": 108.3125, + "step": 532 + }, + { + "epoch": 0.0050453895741236834, + "grad_norm": 752.520263671875, + "learning_rate": 3.362776025236593e-07, + "loss": 60.4062, + "step": 533 + }, + { + "epoch": 0.0050548555958387364, + "grad_norm": 1536.3759765625, + "learning_rate": 3.3690851735015774e-07, + "loss": 155.8125, + "step": 534 + }, + { + "epoch": 0.00506432161755379, + "grad_norm": 1995.37548828125, + "learning_rate": 3.3753943217665613e-07, + "loss": 65.0938, + "step": 535 + }, + { + "epoch": 0.005073787639268844, + "grad_norm": 693.6814575195312, + "learning_rate": 3.3817034700315457e-07, + "loss": 60.9688, + "step": 536 + }, + { + "epoch": 0.005083253660983898, + "grad_norm": 816.6685791015625, + "learning_rate": 3.3880126182965295e-07, + "loss": 59.3438, + "step": 537 + }, + { + "epoch": 0.005092719682698952, + "grad_norm": 4.170598030090332, + "learning_rate": 3.3943217665615145e-07, + "loss": 0.874, + "step": 538 + }, + { + "epoch": 0.005102185704414006, + "grad_norm": 1411.790771484375, + "learning_rate": 3.4006309148264983e-07, + "loss": 125.5312, + "step": 539 + }, + { + "epoch": 0.00511165172612906, + "grad_norm": 6981.935546875, + "learning_rate": 3.406940063091482e-07, + "loss": 148.8125, + "step": 540 + }, + { + "epoch": 0.005121117747844114, + "grad_norm": 809.6035766601562, + "learning_rate": 3.4132492113564666e-07, + "loss": 65.4688, + "step": 541 + }, + { + "epoch": 0.0051305837695591675, + "grad_norm": 810.4950561523438, + "learning_rate": 3.419558359621451e-07, + "loss": 75.5938, + "step": 542 + }, + { + "epoch": 0.005140049791274221, + "grad_norm": 589.7190551757812, + "learning_rate": 3.4258675078864354e-07, + "loss": 61.5625, + "step": 543 + }, + { + "epoch": 0.005149515812989275, + "grad_norm": 3448.148681640625, + "learning_rate": 3.4321766561514193e-07, + "loss": 242.9375, + "step": 544 + }, + { + "epoch": 0.005158981834704329, + "grad_norm": 803.6175537109375, + "learning_rate": 3.4384858044164037e-07, + "loss": 89.0312, + "step": 545 + }, + { + "epoch": 0.005168447856419383, + "grad_norm": 658.2818603515625, + "learning_rate": 3.444794952681388e-07, + "loss": 55.5625, + "step": 546 + }, + { + "epoch": 0.005177913878134437, + "grad_norm": 1006.4055786132812, + "learning_rate": 3.451104100946372e-07, + "loss": 60.8438, + "step": 547 + }, + { + "epoch": 0.00518737989984949, + "grad_norm": 1055.0003662109375, + "learning_rate": 3.4574132492113563e-07, + "loss": 131.125, + "step": 548 + }, + { + "epoch": 0.005196845921564544, + "grad_norm": 904.6844482421875, + "learning_rate": 3.463722397476341e-07, + "loss": 142.3125, + "step": 549 + }, + { + "epoch": 0.005206311943279598, + "grad_norm": 619.63623046875, + "learning_rate": 3.4700315457413246e-07, + "loss": 62.4688, + "step": 550 + }, + { + "epoch": 0.0052157779649946515, + "grad_norm": 709.7120361328125, + "learning_rate": 3.476340694006309e-07, + "loss": 82.3438, + "step": 551 + }, + { + "epoch": 0.005225243986709705, + "grad_norm": 1482.140869140625, + "learning_rate": 3.482649842271293e-07, + "loss": 128.625, + "step": 552 + }, + { + "epoch": 0.005234710008424759, + "grad_norm": 578.4993286132812, + "learning_rate": 3.488958990536278e-07, + "loss": 59.5625, + "step": 553 + }, + { + "epoch": 0.005244176030139813, + "grad_norm": 1448.31640625, + "learning_rate": 3.4952681388012617e-07, + "loss": 61.2812, + "step": 554 + }, + { + "epoch": 0.005253642051854867, + "grad_norm": 1396.4014892578125, + "learning_rate": 3.5015772870662455e-07, + "loss": 94.8438, + "step": 555 + }, + { + "epoch": 0.005263108073569921, + "grad_norm": 1177.962158203125, + "learning_rate": 3.50788643533123e-07, + "loss": 126.0625, + "step": 556 + }, + { + "epoch": 0.005272574095284975, + "grad_norm": 975.5050048828125, + "learning_rate": 3.5141955835962143e-07, + "loss": 74.2188, + "step": 557 + }, + { + "epoch": 0.005282040117000029, + "grad_norm": 782.9436645507812, + "learning_rate": 3.520504731861199e-07, + "loss": 67.0625, + "step": 558 + }, + { + "epoch": 0.0052915061387150825, + "grad_norm": 908.966796875, + "learning_rate": 3.5268138801261826e-07, + "loss": 56.5625, + "step": 559 + }, + { + "epoch": 0.005300972160430136, + "grad_norm": 2119.72705078125, + "learning_rate": 3.533123028391167e-07, + "loss": 84.8438, + "step": 560 + }, + { + "epoch": 0.00531043818214519, + "grad_norm": 659.0459594726562, + "learning_rate": 3.5394321766561514e-07, + "loss": 57.6562, + "step": 561 + }, + { + "epoch": 0.005319904203860243, + "grad_norm": 3.452929973602295, + "learning_rate": 3.5457413249211353e-07, + "loss": 0.8809, + "step": 562 + }, + { + "epoch": 0.005329370225575297, + "grad_norm": 593.4617919921875, + "learning_rate": 3.5520504731861197e-07, + "loss": 57.6875, + "step": 563 + }, + { + "epoch": 0.005338836247290351, + "grad_norm": 695.5679931640625, + "learning_rate": 3.558359621451104e-07, + "loss": 71.6562, + "step": 564 + }, + { + "epoch": 0.005348302269005405, + "grad_norm": 1503.13818359375, + "learning_rate": 3.564668769716088e-07, + "loss": 127.125, + "step": 565 + }, + { + "epoch": 0.005357768290720459, + "grad_norm": 698.4495239257812, + "learning_rate": 3.5709779179810723e-07, + "loss": 81.9062, + "step": 566 + }, + { + "epoch": 0.005367234312435513, + "grad_norm": 2.516832113265991, + "learning_rate": 3.577287066246056e-07, + "loss": 0.9082, + "step": 567 + }, + { + "epoch": 0.0053767003341505665, + "grad_norm": 1684.319091796875, + "learning_rate": 3.583596214511041e-07, + "loss": 98.375, + "step": 568 + }, + { + "epoch": 0.00538616635586562, + "grad_norm": 603.4769287109375, + "learning_rate": 3.589905362776025e-07, + "loss": 57.375, + "step": 569 + }, + { + "epoch": 0.005395632377580674, + "grad_norm": 766.1370239257812, + "learning_rate": 3.5962145110410094e-07, + "loss": 55.6562, + "step": 570 + }, + { + "epoch": 0.005405098399295728, + "grad_norm": 2.693240165710449, + "learning_rate": 3.602523659305994e-07, + "loss": 0.908, + "step": 571 + }, + { + "epoch": 0.005414564421010782, + "grad_norm": 1667.3143310546875, + "learning_rate": 3.6088328075709777e-07, + "loss": 145.375, + "step": 572 + }, + { + "epoch": 0.005424030442725836, + "grad_norm": 1260.6129150390625, + "learning_rate": 3.615141955835962e-07, + "loss": 99.5, + "step": 573 + }, + { + "epoch": 0.00543349646444089, + "grad_norm": 900.5045166015625, + "learning_rate": 3.621451104100946e-07, + "loss": 86.7188, + "step": 574 + }, + { + "epoch": 0.005442962486155944, + "grad_norm": 1474.1209716796875, + "learning_rate": 3.627760252365931e-07, + "loss": 93.8438, + "step": 575 + }, + { + "epoch": 0.0054524285078709975, + "grad_norm": 610.5883178710938, + "learning_rate": 3.634069400630915e-07, + "loss": 62.4688, + "step": 576 + }, + { + "epoch": 0.0054618945295860505, + "grad_norm": 3860.3916015625, + "learning_rate": 3.6403785488958986e-07, + "loss": 125.5938, + "step": 577 + }, + { + "epoch": 0.005471360551301104, + "grad_norm": 1044.5986328125, + "learning_rate": 3.646687697160883e-07, + "loss": 135.9375, + "step": 578 + }, + { + "epoch": 0.005480826573016158, + "grad_norm": 752.4140014648438, + "learning_rate": 3.6529968454258674e-07, + "loss": 57.4062, + "step": 579 + }, + { + "epoch": 0.005490292594731212, + "grad_norm": 624.4622802734375, + "learning_rate": 3.659305993690852e-07, + "loss": 58.8125, + "step": 580 + }, + { + "epoch": 0.005499758616446266, + "grad_norm": 882.2796630859375, + "learning_rate": 3.6656151419558357e-07, + "loss": 64.8438, + "step": 581 + }, + { + "epoch": 0.00550922463816132, + "grad_norm": 1344.762451171875, + "learning_rate": 3.67192429022082e-07, + "loss": 101.9375, + "step": 582 + }, + { + "epoch": 0.005518690659876374, + "grad_norm": 878.3090209960938, + "learning_rate": 3.6782334384858045e-07, + "loss": 61.6094, + "step": 583 + }, + { + "epoch": 0.005528156681591428, + "grad_norm": 731.1142578125, + "learning_rate": 3.6845425867507883e-07, + "loss": 73.5312, + "step": 584 + }, + { + "epoch": 0.0055376227033064815, + "grad_norm": 1131.4705810546875, + "learning_rate": 3.690851735015773e-07, + "loss": 130.625, + "step": 585 + }, + { + "epoch": 0.005547088725021535, + "grad_norm": 1000.5369262695312, + "learning_rate": 3.697160883280757e-07, + "loss": 106.0625, + "step": 586 + }, + { + "epoch": 0.005556554746736589, + "grad_norm": 3.076819658279419, + "learning_rate": 3.703470031545741e-07, + "loss": 0.8169, + "step": 587 + }, + { + "epoch": 0.005566020768451643, + "grad_norm": 2383.037109375, + "learning_rate": 3.7097791798107254e-07, + "loss": 69.6562, + "step": 588 + }, + { + "epoch": 0.005575486790166697, + "grad_norm": 792.3946533203125, + "learning_rate": 3.7160883280757093e-07, + "loss": 66.8438, + "step": 589 + }, + { + "epoch": 0.005584952811881751, + "grad_norm": 2.9350340366363525, + "learning_rate": 3.722397476340694e-07, + "loss": 0.8853, + "step": 590 + }, + { + "epoch": 0.005594418833596804, + "grad_norm": 1533.6539306640625, + "learning_rate": 3.728706624605678e-07, + "loss": 58.875, + "step": 591 + }, + { + "epoch": 0.005603884855311858, + "grad_norm": 1303.335205078125, + "learning_rate": 3.735015772870662e-07, + "loss": 113.625, + "step": 592 + }, + { + "epoch": 0.005613350877026912, + "grad_norm": 602.9747924804688, + "learning_rate": 3.741324921135647e-07, + "loss": 58.375, + "step": 593 + }, + { + "epoch": 0.0056228168987419655, + "grad_norm": 604.57568359375, + "learning_rate": 3.747634069400631e-07, + "loss": 57.625, + "step": 594 + }, + { + "epoch": 0.005632282920457019, + "grad_norm": 865.6827392578125, + "learning_rate": 3.753943217665615e-07, + "loss": 145.875, + "step": 595 + }, + { + "epoch": 0.005641748942172073, + "grad_norm": 3.388984203338623, + "learning_rate": 3.760252365930599e-07, + "loss": 0.8203, + "step": 596 + }, + { + "epoch": 0.005651214963887127, + "grad_norm": 891.7890014648438, + "learning_rate": 3.7665615141955834e-07, + "loss": 95.8125, + "step": 597 + }, + { + "epoch": 0.005660680985602181, + "grad_norm": 2357.702392578125, + "learning_rate": 3.772870662460568e-07, + "loss": 88.0469, + "step": 598 + }, + { + "epoch": 0.005670147007317235, + "grad_norm": 1378.74658203125, + "learning_rate": 3.7791798107255517e-07, + "loss": 138.6875, + "step": 599 + }, + { + "epoch": 0.005679613029032289, + "grad_norm": 866.7297973632812, + "learning_rate": 3.785488958990536e-07, + "loss": 115.9375, + "step": 600 + }, + { + "epoch": 0.005689079050747343, + "grad_norm": 1085.9368896484375, + "learning_rate": 3.7917981072555205e-07, + "loss": 145.8125, + "step": 601 + }, + { + "epoch": 0.0056985450724623965, + "grad_norm": 705.8587646484375, + "learning_rate": 3.7981072555205043e-07, + "loss": 124.5625, + "step": 602 + }, + { + "epoch": 0.00570801109417745, + "grad_norm": 866.24169921875, + "learning_rate": 3.804416403785489e-07, + "loss": 63.75, + "step": 603 + }, + { + "epoch": 0.005717477115892504, + "grad_norm": 682.0865478515625, + "learning_rate": 3.8107255520504726e-07, + "loss": 57.9688, + "step": 604 + }, + { + "epoch": 0.005726943137607557, + "grad_norm": 652.6722412109375, + "learning_rate": 3.8170347003154575e-07, + "loss": 57.0938, + "step": 605 + }, + { + "epoch": 0.005736409159322611, + "grad_norm": 648.8684692382812, + "learning_rate": 3.8233438485804414e-07, + "loss": 64.4062, + "step": 606 + }, + { + "epoch": 0.005745875181037665, + "grad_norm": 3.5317459106445312, + "learning_rate": 3.8296529968454253e-07, + "loss": 0.9404, + "step": 607 + }, + { + "epoch": 0.005755341202752719, + "grad_norm": 903.4336547851562, + "learning_rate": 3.83596214511041e-07, + "loss": 64.8438, + "step": 608 + }, + { + "epoch": 0.005764807224467773, + "grad_norm": 675.7362670898438, + "learning_rate": 3.842271293375394e-07, + "loss": 57.7812, + "step": 609 + }, + { + "epoch": 0.005774273246182827, + "grad_norm": 754.898193359375, + "learning_rate": 3.8485804416403785e-07, + "loss": 80.5625, + "step": 610 + }, + { + "epoch": 0.0057837392678978805, + "grad_norm": 1030.74755859375, + "learning_rate": 3.8548895899053624e-07, + "loss": 113.25, + "step": 611 + }, + { + "epoch": 0.005793205289612934, + "grad_norm": 2.676038980484009, + "learning_rate": 3.8611987381703473e-07, + "loss": 0.7505, + "step": 612 + }, + { + "epoch": 0.005802671311327988, + "grad_norm": 672.6953125, + "learning_rate": 3.867507886435331e-07, + "loss": 56.8125, + "step": 613 + }, + { + "epoch": 0.005812137333043042, + "grad_norm": 1044.2171630859375, + "learning_rate": 3.873817034700315e-07, + "loss": 120.6875, + "step": 614 + }, + { + "epoch": 0.005821603354758096, + "grad_norm": 2.68900465965271, + "learning_rate": 3.8801261829652994e-07, + "loss": 0.8315, + "step": 615 + }, + { + "epoch": 0.00583106937647315, + "grad_norm": 890.1533813476562, + "learning_rate": 3.886435331230284e-07, + "loss": 141.6875, + "step": 616 + }, + { + "epoch": 0.005840535398188204, + "grad_norm": 831.1906127929688, + "learning_rate": 3.892744479495268e-07, + "loss": 61.0625, + "step": 617 + }, + { + "epoch": 0.005850001419903258, + "grad_norm": 766.3878784179688, + "learning_rate": 3.899053627760252e-07, + "loss": 64.125, + "step": 618 + }, + { + "epoch": 0.005859467441618311, + "grad_norm": 928.918212890625, + "learning_rate": 3.9053627760252365e-07, + "loss": 74.7344, + "step": 619 + }, + { + "epoch": 0.0058689334633333645, + "grad_norm": 1462.5673828125, + "learning_rate": 3.911671924290221e-07, + "loss": 113.4375, + "step": 620 + }, + { + "epoch": 0.005878399485048418, + "grad_norm": 654.6067504882812, + "learning_rate": 3.917981072555205e-07, + "loss": 66.0938, + "step": 621 + }, + { + "epoch": 0.005887865506763472, + "grad_norm": 2349.647216796875, + "learning_rate": 3.924290220820189e-07, + "loss": 127.4375, + "step": 622 + }, + { + "epoch": 0.005897331528478526, + "grad_norm": 911.2752075195312, + "learning_rate": 3.9305993690851735e-07, + "loss": 63.125, + "step": 623 + }, + { + "epoch": 0.00590679755019358, + "grad_norm": 1020.8985595703125, + "learning_rate": 3.9369085173501574e-07, + "loss": 147.625, + "step": 624 + }, + { + "epoch": 0.005916263571908634, + "grad_norm": 742.1503295898438, + "learning_rate": 3.943217665615142e-07, + "loss": 54.0312, + "step": 625 + }, + { + "epoch": 0.005925729593623688, + "grad_norm": 527.100341796875, + "learning_rate": 3.9495268138801257e-07, + "loss": 47.9062, + "step": 626 + }, + { + "epoch": 0.005935195615338742, + "grad_norm": 1978.780029296875, + "learning_rate": 3.9558359621451106e-07, + "loss": 90.5312, + "step": 627 + }, + { + "epoch": 0.0059446616370537955, + "grad_norm": 712.3825073242188, + "learning_rate": 3.9621451104100945e-07, + "loss": 104.75, + "step": 628 + }, + { + "epoch": 0.005954127658768849, + "grad_norm": 734.8538208007812, + "learning_rate": 3.9684542586750784e-07, + "loss": 110.125, + "step": 629 + }, + { + "epoch": 0.005963593680483903, + "grad_norm": 665.1245727539062, + "learning_rate": 3.9747634069400633e-07, + "loss": 55.6562, + "step": 630 + }, + { + "epoch": 0.005973059702198957, + "grad_norm": 648.5557861328125, + "learning_rate": 3.981072555205047e-07, + "loss": 89.9062, + "step": 631 + }, + { + "epoch": 0.005982525723914011, + "grad_norm": 992.6857299804688, + "learning_rate": 3.9873817034700316e-07, + "loss": 95.5938, + "step": 632 + }, + { + "epoch": 0.005991991745629065, + "grad_norm": 1588.2218017578125, + "learning_rate": 3.9936908517350154e-07, + "loss": 145.125, + "step": 633 + }, + { + "epoch": 0.006001457767344118, + "grad_norm": 1168.3626708984375, + "learning_rate": 4e-07, + "loss": 137.8125, + "step": 634 + }, + { + "epoch": 0.006010923789059172, + "grad_norm": 714.9613037109375, + "learning_rate": 4.006309148264984e-07, + "loss": 95.7188, + "step": 635 + }, + { + "epoch": 0.006020389810774226, + "grad_norm": 746.7614135742188, + "learning_rate": 4.012618296529968e-07, + "loss": 88.1562, + "step": 636 + }, + { + "epoch": 0.0060298558324892795, + "grad_norm": 706.3826904296875, + "learning_rate": 4.0189274447949525e-07, + "loss": 99.25, + "step": 637 + }, + { + "epoch": 0.006039321854204333, + "grad_norm": 1050.4666748046875, + "learning_rate": 4.025236593059937e-07, + "loss": 64.2812, + "step": 638 + }, + { + "epoch": 0.006048787875919387, + "grad_norm": 672.5553588867188, + "learning_rate": 4.031545741324921e-07, + "loss": 66.4688, + "step": 639 + }, + { + "epoch": 0.006058253897634441, + "grad_norm": 3.886991024017334, + "learning_rate": 4.037854889589905e-07, + "loss": 0.8491, + "step": 640 + }, + { + "epoch": 0.006067719919349495, + "grad_norm": 569.8007202148438, + "learning_rate": 4.044164037854889e-07, + "loss": 50.5625, + "step": 641 + }, + { + "epoch": 0.006077185941064549, + "grad_norm": 1638.690185546875, + "learning_rate": 4.050473186119874e-07, + "loss": 121.1875, + "step": 642 + }, + { + "epoch": 0.006086651962779603, + "grad_norm": 636.4658203125, + "learning_rate": 4.056782334384858e-07, + "loss": 69.4062, + "step": 643 + }, + { + "epoch": 0.006096117984494657, + "grad_norm": 1232.5380859375, + "learning_rate": 4.0630914826498417e-07, + "loss": 127.625, + "step": 644 + }, + { + "epoch": 0.0061055840062097105, + "grad_norm": 1831.5367431640625, + "learning_rate": 4.0694006309148266e-07, + "loss": 123.5938, + "step": 645 + }, + { + "epoch": 0.006115050027924764, + "grad_norm": 706.0447998046875, + "learning_rate": 4.0757097791798105e-07, + "loss": 74.9375, + "step": 646 + }, + { + "epoch": 0.006124516049639818, + "grad_norm": 747.140869140625, + "learning_rate": 4.082018927444795e-07, + "loss": 53.0938, + "step": 647 + }, + { + "epoch": 0.006133982071354871, + "grad_norm": 1252.6072998046875, + "learning_rate": 4.088328075709779e-07, + "loss": 82.4375, + "step": 648 + }, + { + "epoch": 0.006143448093069925, + "grad_norm": 673.9247436523438, + "learning_rate": 4.094637223974763e-07, + "loss": 53.5938, + "step": 649 + }, + { + "epoch": 0.006152914114784979, + "grad_norm": 1363.1844482421875, + "learning_rate": 4.1009463722397476e-07, + "loss": 69.9844, + "step": 650 + }, + { + "epoch": 0.006162380136500033, + "grad_norm": 632.7025756835938, + "learning_rate": 4.1072555205047314e-07, + "loss": 66.375, + "step": 651 + }, + { + "epoch": 0.006171846158215087, + "grad_norm": 697.6436767578125, + "learning_rate": 4.113564668769716e-07, + "loss": 56.9062, + "step": 652 + }, + { + "epoch": 0.006181312179930141, + "grad_norm": 1117.9647216796875, + "learning_rate": 4.1198738170347e-07, + "loss": 115.9844, + "step": 653 + }, + { + "epoch": 0.0061907782016451945, + "grad_norm": 791.1919555664062, + "learning_rate": 4.126182965299684e-07, + "loss": 52.2812, + "step": 654 + }, + { + "epoch": 0.006200244223360248, + "grad_norm": 920.097900390625, + "learning_rate": 4.1324921135646685e-07, + "loss": 88.8125, + "step": 655 + }, + { + "epoch": 0.006209710245075302, + "grad_norm": 859.8260498046875, + "learning_rate": 4.138801261829653e-07, + "loss": 82.9062, + "step": 656 + }, + { + "epoch": 0.006219176266790356, + "grad_norm": 767.6900634765625, + "learning_rate": 4.1451104100946373e-07, + "loss": 103.9375, + "step": 657 + }, + { + "epoch": 0.00622864228850541, + "grad_norm": 1028.5289306640625, + "learning_rate": 4.151419558359621e-07, + "loss": 142.5625, + "step": 658 + }, + { + "epoch": 0.006238108310220464, + "grad_norm": 628.8794555664062, + "learning_rate": 4.157728706624605e-07, + "loss": 52.3125, + "step": 659 + }, + { + "epoch": 0.006247574331935518, + "grad_norm": 703.647705078125, + "learning_rate": 4.16403785488959e-07, + "loss": 53.3438, + "step": 660 + }, + { + "epoch": 0.006257040353650572, + "grad_norm": 852.6135864257812, + "learning_rate": 4.170347003154574e-07, + "loss": 51.7188, + "step": 661 + }, + { + "epoch": 0.006266506375365625, + "grad_norm": 621.4190063476562, + "learning_rate": 4.176656151419558e-07, + "loss": 51.5625, + "step": 662 + }, + { + "epoch": 0.0062759723970806786, + "grad_norm": 682.772705078125, + "learning_rate": 4.182965299684542e-07, + "loss": 55.75, + "step": 663 + }, + { + "epoch": 0.006285438418795732, + "grad_norm": 804.914306640625, + "learning_rate": 4.189274447949527e-07, + "loss": 75.9062, + "step": 664 + }, + { + "epoch": 0.006294904440510786, + "grad_norm": 831.501953125, + "learning_rate": 4.195583596214511e-07, + "loss": 154.25, + "step": 665 + }, + { + "epoch": 0.00630437046222584, + "grad_norm": 1172.2889404296875, + "learning_rate": 4.201892744479495e-07, + "loss": 109.9375, + "step": 666 + }, + { + "epoch": 0.006313836483940894, + "grad_norm": 686.0167236328125, + "learning_rate": 4.2082018927444797e-07, + "loss": 104.0, + "step": 667 + }, + { + "epoch": 0.006323302505655948, + "grad_norm": 733.5798950195312, + "learning_rate": 4.2145110410094636e-07, + "loss": 59.25, + "step": 668 + }, + { + "epoch": 0.006332768527371002, + "grad_norm": 657.3442993164062, + "learning_rate": 4.220820189274448e-07, + "loss": 52.4062, + "step": 669 + }, + { + "epoch": 0.006342234549086056, + "grad_norm": 675.2840576171875, + "learning_rate": 4.227129337539432e-07, + "loss": 86.7812, + "step": 670 + }, + { + "epoch": 0.0063517005708011096, + "grad_norm": 608.5492553710938, + "learning_rate": 4.233438485804416e-07, + "loss": 46.9531, + "step": 671 + }, + { + "epoch": 0.006361166592516163, + "grad_norm": 1130.2987060546875, + "learning_rate": 4.2397476340694006e-07, + "loss": 84.125, + "step": 672 + }, + { + "epoch": 0.006370632614231217, + "grad_norm": 1026.4122314453125, + "learning_rate": 4.2460567823343845e-07, + "loss": 79.125, + "step": 673 + }, + { + "epoch": 0.006380098635946271, + "grad_norm": 1241.5045166015625, + "learning_rate": 4.252365930599369e-07, + "loss": 114.125, + "step": 674 + }, + { + "epoch": 0.006389564657661325, + "grad_norm": 713.6329345703125, + "learning_rate": 4.2586750788643533e-07, + "loss": 57.9062, + "step": 675 + }, + { + "epoch": 0.006399030679376378, + "grad_norm": 712.4388427734375, + "learning_rate": 4.264984227129337e-07, + "loss": 63.3438, + "step": 676 + }, + { + "epoch": 0.006408496701091432, + "grad_norm": 656.9547729492188, + "learning_rate": 4.2712933753943216e-07, + "loss": 60.0625, + "step": 677 + }, + { + "epoch": 0.006417962722806486, + "grad_norm": 525.9691772460938, + "learning_rate": 4.277602523659306e-07, + "loss": 57.25, + "step": 678 + }, + { + "epoch": 0.00642742874452154, + "grad_norm": 914.7711181640625, + "learning_rate": 4.2839116719242904e-07, + "loss": 56.625, + "step": 679 + }, + { + "epoch": 0.006436894766236594, + "grad_norm": 703.0784912109375, + "learning_rate": 4.290220820189274e-07, + "loss": 59.0625, + "step": 680 + }, + { + "epoch": 0.0064463607879516474, + "grad_norm": 620.483154296875, + "learning_rate": 4.296529968454258e-07, + "loss": 60.75, + "step": 681 + }, + { + "epoch": 0.006455826809666701, + "grad_norm": 2.6872291564941406, + "learning_rate": 4.302839116719243e-07, + "loss": 0.9487, + "step": 682 + }, + { + "epoch": 0.006465292831381755, + "grad_norm": 560.2081298828125, + "learning_rate": 4.309148264984227e-07, + "loss": 54.1875, + "step": 683 + }, + { + "epoch": 0.006474758853096809, + "grad_norm": 474.3762512207031, + "learning_rate": 4.3154574132492113e-07, + "loss": 49.5938, + "step": 684 + }, + { + "epoch": 0.006484224874811863, + "grad_norm": 868.765380859375, + "learning_rate": 4.321766561514195e-07, + "loss": 119.0312, + "step": 685 + }, + { + "epoch": 0.006493690896526917, + "grad_norm": 905.2570190429688, + "learning_rate": 4.3280757097791796e-07, + "loss": 58.125, + "step": 686 + }, + { + "epoch": 0.006503156918241971, + "grad_norm": 740.9848022460938, + "learning_rate": 4.334384858044164e-07, + "loss": 108.1875, + "step": 687 + }, + { + "epoch": 0.006512622939957025, + "grad_norm": 932.3906860351562, + "learning_rate": 4.340694006309148e-07, + "loss": 106.2812, + "step": 688 + }, + { + "epoch": 0.0065220889616720784, + "grad_norm": 2.7568440437316895, + "learning_rate": 4.347003154574132e-07, + "loss": 0.7859, + "step": 689 + }, + { + "epoch": 0.006531554983387132, + "grad_norm": 1660.8675537109375, + "learning_rate": 4.3533123028391166e-07, + "loss": 111.875, + "step": 690 + }, + { + "epoch": 0.006541021005102185, + "grad_norm": 1078.038818359375, + "learning_rate": 4.3596214511041005e-07, + "loss": 115.5625, + "step": 691 + }, + { + "epoch": 0.006550487026817239, + "grad_norm": 759.6041259765625, + "learning_rate": 4.365930599369085e-07, + "loss": 86.3125, + "step": 692 + }, + { + "epoch": 0.006559953048532293, + "grad_norm": 667.2096557617188, + "learning_rate": 4.3722397476340693e-07, + "loss": 53.25, + "step": 693 + }, + { + "epoch": 0.006569419070247347, + "grad_norm": 552.6778564453125, + "learning_rate": 4.3785488958990537e-07, + "loss": 51.6875, + "step": 694 + }, + { + "epoch": 0.006578885091962401, + "grad_norm": 650.8267822265625, + "learning_rate": 4.3848580441640376e-07, + "loss": 50.0625, + "step": 695 + }, + { + "epoch": 0.006588351113677455, + "grad_norm": 948.421875, + "learning_rate": 4.3911671924290214e-07, + "loss": 104.7188, + "step": 696 + }, + { + "epoch": 0.006597817135392509, + "grad_norm": 929.8365478515625, + "learning_rate": 4.3974763406940064e-07, + "loss": 89.3438, + "step": 697 + }, + { + "epoch": 0.0066072831571075625, + "grad_norm": 993.6218872070312, + "learning_rate": 4.40378548895899e-07, + "loss": 56.1562, + "step": 698 + }, + { + "epoch": 0.006616749178822616, + "grad_norm": 872.5192260742188, + "learning_rate": 4.4100946372239746e-07, + "loss": 114.125, + "step": 699 + }, + { + "epoch": 0.00662621520053767, + "grad_norm": 711.3560180664062, + "learning_rate": 4.4164037854889585e-07, + "loss": 60.4062, + "step": 700 + }, + { + "epoch": 0.006635681222252724, + "grad_norm": 3.1337811946868896, + "learning_rate": 4.4227129337539434e-07, + "loss": 0.9575, + "step": 701 + }, + { + "epoch": 0.006645147243967778, + "grad_norm": 1135.262939453125, + "learning_rate": 4.4290220820189273e-07, + "loss": 72.25, + "step": 702 + }, + { + "epoch": 0.006654613265682832, + "grad_norm": 1047.9757080078125, + "learning_rate": 4.435331230283911e-07, + "loss": 98.8438, + "step": 703 + }, + { + "epoch": 0.006664079287397886, + "grad_norm": 704.785400390625, + "learning_rate": 4.441640378548896e-07, + "loss": 57.0, + "step": 704 + }, + { + "epoch": 0.006673545309112939, + "grad_norm": 802.1261596679688, + "learning_rate": 4.44794952681388e-07, + "loss": 60.4688, + "step": 705 + }, + { + "epoch": 0.006683011330827993, + "grad_norm": 742.6767578125, + "learning_rate": 4.4542586750788644e-07, + "loss": 59.1875, + "step": 706 + }, + { + "epoch": 0.0066924773525430465, + "grad_norm": 3554.342041015625, + "learning_rate": 4.460567823343848e-07, + "loss": 201.4375, + "step": 707 + }, + { + "epoch": 0.0067019433742581, + "grad_norm": 1034.7484130859375, + "learning_rate": 4.4668769716088326e-07, + "loss": 65.9375, + "step": 708 + }, + { + "epoch": 0.006711409395973154, + "grad_norm": 622.508056640625, + "learning_rate": 4.473186119873817e-07, + "loss": 132.875, + "step": 709 + }, + { + "epoch": 0.006720875417688208, + "grad_norm": 897.9864501953125, + "learning_rate": 4.479495268138801e-07, + "loss": 121.0, + "step": 710 + }, + { + "epoch": 0.006730341439403262, + "grad_norm": 633.8663940429688, + "learning_rate": 4.4858044164037853e-07, + "loss": 56.875, + "step": 711 + }, + { + "epoch": 0.006739807461118316, + "grad_norm": 2.3826282024383545, + "learning_rate": 4.4921135646687697e-07, + "loss": 0.8003, + "step": 712 + }, + { + "epoch": 0.00674927348283337, + "grad_norm": 755.7293701171875, + "learning_rate": 4.4984227129337536e-07, + "loss": 100.2656, + "step": 713 + }, + { + "epoch": 0.006758739504548424, + "grad_norm": 874.4550170898438, + "learning_rate": 4.504731861198738e-07, + "loss": 96.2812, + "step": 714 + }, + { + "epoch": 0.0067682055262634775, + "grad_norm": 857.3119506835938, + "learning_rate": 4.5110410094637224e-07, + "loss": 128.625, + "step": 715 + }, + { + "epoch": 0.006777671547978531, + "grad_norm": 632.5770263671875, + "learning_rate": 4.517350157728707e-07, + "loss": 90.8438, + "step": 716 + }, + { + "epoch": 0.006787137569693585, + "grad_norm": 1686.2781982421875, + "learning_rate": 4.5236593059936906e-07, + "loss": 108.4375, + "step": 717 + }, + { + "epoch": 0.006796603591408639, + "grad_norm": 618.0745849609375, + "learning_rate": 4.5299684542586745e-07, + "loss": 47.5781, + "step": 718 + }, + { + "epoch": 0.006806069613123692, + "grad_norm": 899.2180786132812, + "learning_rate": 4.5362776025236594e-07, + "loss": 90.5312, + "step": 719 + }, + { + "epoch": 0.006815535634838746, + "grad_norm": 921.3800659179688, + "learning_rate": 4.5425867507886433e-07, + "loss": 78.625, + "step": 720 + }, + { + "epoch": 0.0068250016565538, + "grad_norm": 606.9647827148438, + "learning_rate": 4.5488958990536277e-07, + "loss": 47.4062, + "step": 721 + }, + { + "epoch": 0.006834467678268854, + "grad_norm": 1095.116943359375, + "learning_rate": 4.5552050473186116e-07, + "loss": 110.4375, + "step": 722 + }, + { + "epoch": 0.006843933699983908, + "grad_norm": 700.3568115234375, + "learning_rate": 4.561514195583596e-07, + "loss": 58.7344, + "step": 723 + }, + { + "epoch": 0.0068533997216989615, + "grad_norm": 900.1468505859375, + "learning_rate": 4.5678233438485804e-07, + "loss": 128.5, + "step": 724 + }, + { + "epoch": 0.006862865743414015, + "grad_norm": 557.4404907226562, + "learning_rate": 4.574132492113564e-07, + "loss": 61.9688, + "step": 725 + }, + { + "epoch": 0.006872331765129069, + "grad_norm": 775.9334106445312, + "learning_rate": 4.580441640378549e-07, + "loss": 61.9375, + "step": 726 + }, + { + "epoch": 0.006881797786844123, + "grad_norm": 956.7257080078125, + "learning_rate": 4.586750788643533e-07, + "loss": 88.5312, + "step": 727 + }, + { + "epoch": 0.006891263808559177, + "grad_norm": 682.5673828125, + "learning_rate": 4.593059936908517e-07, + "loss": 86.0312, + "step": 728 + }, + { + "epoch": 0.006900729830274231, + "grad_norm": 1588.4832763671875, + "learning_rate": 4.5993690851735013e-07, + "loss": 103.4688, + "step": 729 + }, + { + "epoch": 0.006910195851989285, + "grad_norm": 2223.914306640625, + "learning_rate": 4.6056782334384857e-07, + "loss": 130.2188, + "step": 730 + }, + { + "epoch": 0.006919661873704339, + "grad_norm": 1426.7650146484375, + "learning_rate": 4.61198738170347e-07, + "loss": 82.7812, + "step": 731 + }, + { + "epoch": 0.0069291278954193925, + "grad_norm": 683.0283203125, + "learning_rate": 4.618296529968454e-07, + "loss": 48.25, + "step": 732 + }, + { + "epoch": 0.0069385939171344455, + "grad_norm": 699.4717407226562, + "learning_rate": 4.624605678233438e-07, + "loss": 90.0312, + "step": 733 + }, + { + "epoch": 0.006948059938849499, + "grad_norm": 537.8048095703125, + "learning_rate": 4.630914826498423e-07, + "loss": 46.8438, + "step": 734 + }, + { + "epoch": 0.006957525960564553, + "grad_norm": 519.2514038085938, + "learning_rate": 4.6372239747634066e-07, + "loss": 54.5, + "step": 735 + }, + { + "epoch": 0.006966991982279607, + "grad_norm": 711.2446899414062, + "learning_rate": 4.643533123028391e-07, + "loss": 91.125, + "step": 736 + }, + { + "epoch": 0.006976458003994661, + "grad_norm": 671.8798828125, + "learning_rate": 4.649842271293375e-07, + "loss": 47.5, + "step": 737 + }, + { + "epoch": 0.006985924025709715, + "grad_norm": 1387.1195068359375, + "learning_rate": 4.6561514195583593e-07, + "loss": 103.3125, + "step": 738 + }, + { + "epoch": 0.006995390047424769, + "grad_norm": 1116.261474609375, + "learning_rate": 4.6624605678233437e-07, + "loss": 113.9688, + "step": 739 + }, + { + "epoch": 0.007004856069139823, + "grad_norm": 905.7135009765625, + "learning_rate": 4.6687697160883276e-07, + "loss": 66.1406, + "step": 740 + }, + { + "epoch": 0.0070143220908548765, + "grad_norm": 690.0347290039062, + "learning_rate": 4.6750788643533125e-07, + "loss": 53.1562, + "step": 741 + }, + { + "epoch": 0.00702378811256993, + "grad_norm": 568.18505859375, + "learning_rate": 4.6813880126182964e-07, + "loss": 96.9062, + "step": 742 + }, + { + "epoch": 0.007033254134284984, + "grad_norm": 677.7637329101562, + "learning_rate": 4.68769716088328e-07, + "loss": 70.0938, + "step": 743 + }, + { + "epoch": 0.007042720156000038, + "grad_norm": 654.349853515625, + "learning_rate": 4.6940063091482646e-07, + "loss": 59.4688, + "step": 744 + }, + { + "epoch": 0.007052186177715092, + "grad_norm": 868.0798950195312, + "learning_rate": 4.700315457413249e-07, + "loss": 64.4375, + "step": 745 + }, + { + "epoch": 0.007061652199430146, + "grad_norm": 785.4501953125, + "learning_rate": 4.7066246056782334e-07, + "loss": 78.375, + "step": 746 + }, + { + "epoch": 0.007071118221145199, + "grad_norm": 780.6588745117188, + "learning_rate": 4.7129337539432173e-07, + "loss": 85.125, + "step": 747 + }, + { + "epoch": 0.007080584242860253, + "grad_norm": 666.7556762695312, + "learning_rate": 4.719242902208201e-07, + "loss": 57.3438, + "step": 748 + }, + { + "epoch": 0.007090050264575307, + "grad_norm": 666.6965942382812, + "learning_rate": 4.725552050473186e-07, + "loss": 102.4375, + "step": 749 + }, + { + "epoch": 0.0070995162862903605, + "grad_norm": 1134.0828857421875, + "learning_rate": 4.73186119873817e-07, + "loss": 61.0625, + "step": 750 + }, + { + "epoch": 0.007108982308005414, + "grad_norm": 877.4514770507812, + "learning_rate": 4.7381703470031544e-07, + "loss": 57.75, + "step": 751 + }, + { + "epoch": 0.007118448329720468, + "grad_norm": 567.602294921875, + "learning_rate": 4.744479495268139e-07, + "loss": 50.0625, + "step": 752 + }, + { + "epoch": 0.007127914351435522, + "grad_norm": 901.8771362304688, + "learning_rate": 4.750788643533123e-07, + "loss": 61.1562, + "step": 753 + }, + { + "epoch": 0.007137380373150576, + "grad_norm": 638.4566650390625, + "learning_rate": 4.757097791798107e-07, + "loss": 60.25, + "step": 754 + }, + { + "epoch": 0.00714684639486563, + "grad_norm": 1189.4766845703125, + "learning_rate": 4.763406940063091e-07, + "loss": 94.375, + "step": 755 + }, + { + "epoch": 0.007156312416580684, + "grad_norm": 801.3098754882812, + "learning_rate": 4.769716088328075e-07, + "loss": 54.0, + "step": 756 + }, + { + "epoch": 0.007165778438295738, + "grad_norm": 783.3068237304688, + "learning_rate": 4.77602523659306e-07, + "loss": 116.5625, + "step": 757 + }, + { + "epoch": 0.0071752444600107915, + "grad_norm": 511.90802001953125, + "learning_rate": 4.782334384858044e-07, + "loss": 45.5, + "step": 758 + }, + { + "epoch": 0.007184710481725845, + "grad_norm": 478.81121826171875, + "learning_rate": 4.788643533123028e-07, + "loss": 49.3125, + "step": 759 + }, + { + "epoch": 0.007194176503440899, + "grad_norm": 669.1679077148438, + "learning_rate": 4.794952681388013e-07, + "loss": 47.0938, + "step": 760 + }, + { + "epoch": 0.007203642525155953, + "grad_norm": 774.081787109375, + "learning_rate": 4.801261829652997e-07, + "loss": 55.8438, + "step": 761 + }, + { + "epoch": 0.007213108546871006, + "grad_norm": 507.76708984375, + "learning_rate": 4.807570977917981e-07, + "loss": 50.0938, + "step": 762 + }, + { + "epoch": 0.00722257456858606, + "grad_norm": 1447.4273681640625, + "learning_rate": 4.813880126182966e-07, + "loss": 84.6094, + "step": 763 + }, + { + "epoch": 0.007232040590301114, + "grad_norm": 506.8823547363281, + "learning_rate": 4.820189274447949e-07, + "loss": 51.125, + "step": 764 + }, + { + "epoch": 0.007241506612016168, + "grad_norm": 939.7547607421875, + "learning_rate": 4.826498422712933e-07, + "loss": 123.3125, + "step": 765 + }, + { + "epoch": 0.007250972633731222, + "grad_norm": 765.6340942382812, + "learning_rate": 4.832807570977917e-07, + "loss": 94.3125, + "step": 766 + }, + { + "epoch": 0.0072604386554462755, + "grad_norm": 926.777587890625, + "learning_rate": 4.839116719242902e-07, + "loss": 123.6719, + "step": 767 + }, + { + "epoch": 0.007269904677161329, + "grad_norm": 537.46875, + "learning_rate": 4.845425867507886e-07, + "loss": 64.9062, + "step": 768 + }, + { + "epoch": 0.007279370698876383, + "grad_norm": 818.5309448242188, + "learning_rate": 4.85173501577287e-07, + "loss": 116.125, + "step": 769 + }, + { + "epoch": 0.007288836720591437, + "grad_norm": 906.6332397460938, + "learning_rate": 4.858044164037855e-07, + "loss": 100.9375, + "step": 770 + }, + { + "epoch": 0.007298302742306491, + "grad_norm": 703.8358764648438, + "learning_rate": 4.864353312302839e-07, + "loss": 57.8438, + "step": 771 + }, + { + "epoch": 0.007307768764021545, + "grad_norm": 562.2327270507812, + "learning_rate": 4.870662460567824e-07, + "loss": 76.8438, + "step": 772 + }, + { + "epoch": 0.007317234785736599, + "grad_norm": 784.0732421875, + "learning_rate": 4.876971608832807e-07, + "loss": 48.2188, + "step": 773 + }, + { + "epoch": 0.007326700807451653, + "grad_norm": 542.60546875, + "learning_rate": 4.883280757097791e-07, + "loss": 51.2188, + "step": 774 + }, + { + "epoch": 0.0073361668291667065, + "grad_norm": 1071.7242431640625, + "learning_rate": 4.889589905362776e-07, + "loss": 54.8125, + "step": 775 + }, + { + "epoch": 0.0073456328508817595, + "grad_norm": 860.04638671875, + "learning_rate": 4.89589905362776e-07, + "loss": 45.5938, + "step": 776 + }, + { + "epoch": 0.007355098872596813, + "grad_norm": 739.45947265625, + "learning_rate": 4.902208201892744e-07, + "loss": 57.3125, + "step": 777 + }, + { + "epoch": 0.007364564894311867, + "grad_norm": 1081.9669189453125, + "learning_rate": 4.908517350157729e-07, + "loss": 105.5625, + "step": 778 + }, + { + "epoch": 0.007374030916026921, + "grad_norm": 666.6940307617188, + "learning_rate": 4.914826498422713e-07, + "loss": 57.0938, + "step": 779 + }, + { + "epoch": 0.007383496937741975, + "grad_norm": 1013.7083129882812, + "learning_rate": 4.921135646687697e-07, + "loss": 113.8438, + "step": 780 + }, + { + "epoch": 0.007392962959457029, + "grad_norm": 736.87353515625, + "learning_rate": 4.92744479495268e-07, + "loss": 97.4062, + "step": 781 + }, + { + "epoch": 0.007402428981172083, + "grad_norm": 3.03008770942688, + "learning_rate": 4.933753943217665e-07, + "loss": 0.978, + "step": 782 + }, + { + "epoch": 0.007411895002887137, + "grad_norm": 764.3482666015625, + "learning_rate": 4.940063091482649e-07, + "loss": 90.0312, + "step": 783 + }, + { + "epoch": 0.0074213610246021905, + "grad_norm": 1034.0223388671875, + "learning_rate": 4.946372239747634e-07, + "loss": 93.25, + "step": 784 + }, + { + "epoch": 0.007430827046317244, + "grad_norm": 4.111385345458984, + "learning_rate": 4.952681388012618e-07, + "loss": 0.9053, + "step": 785 + }, + { + "epoch": 0.007440293068032298, + "grad_norm": 1394.77099609375, + "learning_rate": 4.958990536277602e-07, + "loss": 133.2812, + "step": 786 + }, + { + "epoch": 0.007449759089747352, + "grad_norm": 1801.6568603515625, + "learning_rate": 4.965299684542587e-07, + "loss": 125.7812, + "step": 787 + }, + { + "epoch": 0.007459225111462406, + "grad_norm": 3.2125141620635986, + "learning_rate": 4.971608832807571e-07, + "loss": 0.8711, + "step": 788 + }, + { + "epoch": 0.00746869113317746, + "grad_norm": 534.101318359375, + "learning_rate": 4.977917981072556e-07, + "loss": 47.5625, + "step": 789 + }, + { + "epoch": 0.007478157154892513, + "grad_norm": 1366.0341796875, + "learning_rate": 4.98422712933754e-07, + "loss": 87.2031, + "step": 790 + }, + { + "epoch": 0.007487623176607567, + "grad_norm": 4.03013801574707, + "learning_rate": 4.990536277602523e-07, + "loss": 0.8652, + "step": 791 + }, + { + "epoch": 0.007497089198322621, + "grad_norm": 587.3823852539062, + "learning_rate": 4.996845425867507e-07, + "loss": 54.9688, + "step": 792 + }, + { + "epoch": 0.0075065552200376745, + "grad_norm": 736.0506591796875, + "learning_rate": 5.003154574132492e-07, + "loss": 68.0938, + "step": 793 + }, + { + "epoch": 0.007516021241752728, + "grad_norm": 639.2147827148438, + "learning_rate": 5.009463722397476e-07, + "loss": 70.0312, + "step": 794 + }, + { + "epoch": 0.007525487263467782, + "grad_norm": 3.1425139904022217, + "learning_rate": 5.01577287066246e-07, + "loss": 0.7754, + "step": 795 + }, + { + "epoch": 0.007534953285182836, + "grad_norm": 2.9051320552825928, + "learning_rate": 5.022082018927445e-07, + "loss": 0.96, + "step": 796 + }, + { + "epoch": 0.00754441930689789, + "grad_norm": 1276.66796875, + "learning_rate": 5.028391167192429e-07, + "loss": 122.875, + "step": 797 + }, + { + "epoch": 0.007553885328612944, + "grad_norm": 673.965087890625, + "learning_rate": 5.034700315457413e-07, + "loss": 100.5938, + "step": 798 + }, + { + "epoch": 0.007563351350327998, + "grad_norm": 642.2201538085938, + "learning_rate": 5.041009463722398e-07, + "loss": 37.4219, + "step": 799 + }, + { + "epoch": 0.007572817372043052, + "grad_norm": 934.3065185546875, + "learning_rate": 5.047318611987381e-07, + "loss": 87.5, + "step": 800 + }, + { + "epoch": 0.0075822833937581055, + "grad_norm": 1148.65771484375, + "learning_rate": 5.053627760252365e-07, + "loss": 89.2812, + "step": 801 + }, + { + "epoch": 0.007591749415473159, + "grad_norm": 1826.3553466796875, + "learning_rate": 5.05993690851735e-07, + "loss": 106.8438, + "step": 802 + }, + { + "epoch": 0.007601215437188213, + "grad_norm": 964.21044921875, + "learning_rate": 5.066246056782334e-07, + "loss": 54.8438, + "step": 803 + }, + { + "epoch": 0.007610681458903266, + "grad_norm": 511.60369873046875, + "learning_rate": 5.072555205047319e-07, + "loss": 89.6094, + "step": 804 + }, + { + "epoch": 0.00762014748061832, + "grad_norm": 510.3746337890625, + "learning_rate": 5.078864353312302e-07, + "loss": 57.375, + "step": 805 + }, + { + "epoch": 0.007629613502333374, + "grad_norm": 589.3353881835938, + "learning_rate": 5.085173501577287e-07, + "loss": 42.5938, + "step": 806 + }, + { + "epoch": 0.007639079524048428, + "grad_norm": 839.3663330078125, + "learning_rate": 5.091482649842272e-07, + "loss": 57.1875, + "step": 807 + }, + { + "epoch": 0.007648545545763482, + "grad_norm": 1263.7366943359375, + "learning_rate": 5.097791798107255e-07, + "loss": 99.4688, + "step": 808 + }, + { + "epoch": 0.007658011567478536, + "grad_norm": 1232.43017578125, + "learning_rate": 5.104100946372239e-07, + "loss": 60.2812, + "step": 809 + }, + { + "epoch": 0.0076674775891935896, + "grad_norm": 673.701904296875, + "learning_rate": 5.110410094637224e-07, + "loss": 48.2812, + "step": 810 + }, + { + "epoch": 0.007676943610908643, + "grad_norm": 990.0816650390625, + "learning_rate": 5.116719242902207e-07, + "loss": 71.1562, + "step": 811 + }, + { + "epoch": 0.007686409632623697, + "grad_norm": 829.873046875, + "learning_rate": 5.123028391167192e-07, + "loss": 58.5938, + "step": 812 + }, + { + "epoch": 0.007695875654338751, + "grad_norm": 2400.70849609375, + "learning_rate": 5.129337539432177e-07, + "loss": 125.0, + "step": 813 + }, + { + "epoch": 0.007705341676053805, + "grad_norm": 511.437255859375, + "learning_rate": 5.135646687697161e-07, + "loss": 63.4531, + "step": 814 + }, + { + "epoch": 0.007714807697768859, + "grad_norm": 697.353515625, + "learning_rate": 5.141955835962145e-07, + "loss": 71.9844, + "step": 815 + }, + { + "epoch": 0.007724273719483913, + "grad_norm": 629.1488037109375, + "learning_rate": 5.148264984227129e-07, + "loss": 48.5312, + "step": 816 + }, + { + "epoch": 0.007733739741198967, + "grad_norm": 424.27081298828125, + "learning_rate": 5.154574132492114e-07, + "loss": 50.3125, + "step": 817 + }, + { + "epoch": 0.0077432057629140206, + "grad_norm": 1043.326171875, + "learning_rate": 5.160883280757097e-07, + "loss": 123.7188, + "step": 818 + }, + { + "epoch": 0.0077526717846290736, + "grad_norm": 524.2852172851562, + "learning_rate": 5.167192429022081e-07, + "loss": 49.25, + "step": 819 + }, + { + "epoch": 0.0077621378063441274, + "grad_norm": 565.6692504882812, + "learning_rate": 5.173501577287066e-07, + "loss": 83.125, + "step": 820 + }, + { + "epoch": 0.007771603828059181, + "grad_norm": 849.1063232421875, + "learning_rate": 5.179810725552051e-07, + "loss": 97.7969, + "step": 821 + }, + { + "epoch": 0.007781069849774235, + "grad_norm": 525.5205688476562, + "learning_rate": 5.186119873817034e-07, + "loss": 46.8438, + "step": 822 + }, + { + "epoch": 0.007790535871489289, + "grad_norm": 813.9027099609375, + "learning_rate": 5.192429022082019e-07, + "loss": 62.0, + "step": 823 + }, + { + "epoch": 0.007800001893204343, + "grad_norm": 2.8228847980499268, + "learning_rate": 5.198738170347004e-07, + "loss": 0.874, + "step": 824 + }, + { + "epoch": 0.007809467914919397, + "grad_norm": 428.4258117675781, + "learning_rate": 5.205047318611987e-07, + "loss": 53.2188, + "step": 825 + }, + { + "epoch": 0.00781893393663445, + "grad_norm": 515.7318115234375, + "learning_rate": 5.211356466876972e-07, + "loss": 43.8125, + "step": 826 + }, + { + "epoch": 0.007828399958349504, + "grad_norm": 597.9266967773438, + "learning_rate": 5.217665615141955e-07, + "loss": 50.4062, + "step": 827 + }, + { + "epoch": 0.007837865980064558, + "grad_norm": 567.211181640625, + "learning_rate": 5.223974763406939e-07, + "loss": 52.0625, + "step": 828 + }, + { + "epoch": 0.007847332001779611, + "grad_norm": 1001.0836181640625, + "learning_rate": 5.230283911671924e-07, + "loss": 70.75, + "step": 829 + }, + { + "epoch": 0.007856798023494666, + "grad_norm": 746.6680297851562, + "learning_rate": 5.236593059936908e-07, + "loss": 48.6875, + "step": 830 + }, + { + "epoch": 0.00786626404520972, + "grad_norm": 800.999755859375, + "learning_rate": 5.242902208201893e-07, + "loss": 102.5625, + "step": 831 + }, + { + "epoch": 0.007875730066924774, + "grad_norm": 475.74212646484375, + "learning_rate": 5.249211356466877e-07, + "loss": 45.0938, + "step": 832 + }, + { + "epoch": 0.007885196088639827, + "grad_norm": 2.6574461460113525, + "learning_rate": 5.255520504731861e-07, + "loss": 0.7451, + "step": 833 + }, + { + "epoch": 0.007894662110354882, + "grad_norm": 2.644270420074463, + "learning_rate": 5.261829652996846e-07, + "loss": 0.957, + "step": 834 + }, + { + "epoch": 0.007904128132069935, + "grad_norm": 483.36212158203125, + "learning_rate": 5.26813880126183e-07, + "loss": 48.9688, + "step": 835 + }, + { + "epoch": 0.00791359415378499, + "grad_norm": 985.6268920898438, + "learning_rate": 5.274447949526813e-07, + "loss": 88.4062, + "step": 836 + }, + { + "epoch": 0.007923060175500042, + "grad_norm": 3.3204052448272705, + "learning_rate": 5.280757097791798e-07, + "loss": 0.8538, + "step": 837 + }, + { + "epoch": 0.007932526197215097, + "grad_norm": 977.9467163085938, + "learning_rate": 5.287066246056781e-07, + "loss": 74.0312, + "step": 838 + }, + { + "epoch": 0.00794199221893015, + "grad_norm": 594.3565063476562, + "learning_rate": 5.293375394321766e-07, + "loss": 59.8438, + "step": 839 + }, + { + "epoch": 0.007951458240645203, + "grad_norm": 760.4666137695312, + "learning_rate": 5.299684542586751e-07, + "loss": 55.4062, + "step": 840 + }, + { + "epoch": 0.007960924262360258, + "grad_norm": 579.6248779296875, + "learning_rate": 5.305993690851735e-07, + "loss": 50.1875, + "step": 841 + }, + { + "epoch": 0.007970390284075311, + "grad_norm": 582.2992553710938, + "learning_rate": 5.312302839116719e-07, + "loss": 52.0312, + "step": 842 + }, + { + "epoch": 0.007979856305790366, + "grad_norm": 1976.796875, + "learning_rate": 5.318611987381704e-07, + "loss": 92.0938, + "step": 843 + }, + { + "epoch": 0.007989322327505419, + "grad_norm": 1077.587890625, + "learning_rate": 5.324921135646688e-07, + "loss": 120.4688, + "step": 844 + }, + { + "epoch": 0.007998788349220473, + "grad_norm": 384.5727844238281, + "learning_rate": 5.331230283911671e-07, + "loss": 47.0156, + "step": 845 + }, + { + "epoch": 0.008008254370935526, + "grad_norm": 544.6519775390625, + "learning_rate": 5.337539432176655e-07, + "loss": 51.4375, + "step": 846 + }, + { + "epoch": 0.008017720392650581, + "grad_norm": 1123.7867431640625, + "learning_rate": 5.34384858044164e-07, + "loss": 104.8125, + "step": 847 + }, + { + "epoch": 0.008027186414365634, + "grad_norm": 550.1346435546875, + "learning_rate": 5.350157728706624e-07, + "loss": 86.5312, + "step": 848 + }, + { + "epoch": 0.008036652436080689, + "grad_norm": 557.1901245117188, + "learning_rate": 5.356466876971608e-07, + "loss": 46.5312, + "step": 849 + }, + { + "epoch": 0.008046118457795742, + "grad_norm": 942.27734375, + "learning_rate": 5.362776025236593e-07, + "loss": 97.3125, + "step": 850 + }, + { + "epoch": 0.008055584479510797, + "grad_norm": 530.2299194335938, + "learning_rate": 5.369085173501578e-07, + "loss": 54.5312, + "step": 851 + }, + { + "epoch": 0.00806505050122585, + "grad_norm": 583.9393920898438, + "learning_rate": 5.375394321766561e-07, + "loss": 63.7812, + "step": 852 + }, + { + "epoch": 0.008074516522940904, + "grad_norm": 350.1147155761719, + "learning_rate": 5.381703470031546e-07, + "loss": 39.7031, + "step": 853 + }, + { + "epoch": 0.008083982544655957, + "grad_norm": 589.2879028320312, + "learning_rate": 5.38801261829653e-07, + "loss": 63.9375, + "step": 854 + }, + { + "epoch": 0.00809344856637101, + "grad_norm": 754.3623046875, + "learning_rate": 5.394321766561513e-07, + "loss": 51.0938, + "step": 855 + }, + { + "epoch": 0.008102914588086065, + "grad_norm": 3.411783218383789, + "learning_rate": 5.400630914826498e-07, + "loss": 0.8623, + "step": 856 + }, + { + "epoch": 0.008112380609801118, + "grad_norm": 503.009033203125, + "learning_rate": 5.406940063091482e-07, + "loss": 46.375, + "step": 857 + }, + { + "epoch": 0.008121846631516173, + "grad_norm": 473.1482238769531, + "learning_rate": 5.413249211356466e-07, + "loss": 48.7656, + "step": 858 + }, + { + "epoch": 0.008131312653231226, + "grad_norm": 497.8544006347656, + "learning_rate": 5.419558359621451e-07, + "loss": 53.0, + "step": 859 + }, + { + "epoch": 0.00814077867494628, + "grad_norm": 655.8378295898438, + "learning_rate": 5.425867507886435e-07, + "loss": 48.75, + "step": 860 + }, + { + "epoch": 0.008150244696661334, + "grad_norm": 482.3836669921875, + "learning_rate": 5.43217665615142e-07, + "loss": 34.6094, + "step": 861 + }, + { + "epoch": 0.008159710718376388, + "grad_norm": 655.23095703125, + "learning_rate": 5.438485804416404e-07, + "loss": 47.3125, + "step": 862 + }, + { + "epoch": 0.008169176740091441, + "grad_norm": 561.69970703125, + "learning_rate": 5.444794952681387e-07, + "loss": 49.4688, + "step": 863 + }, + { + "epoch": 0.008178642761806496, + "grad_norm": 468.29144287109375, + "learning_rate": 5.451104100946372e-07, + "loss": 54.0625, + "step": 864 + }, + { + "epoch": 0.00818810878352155, + "grad_norm": 922.1507568359375, + "learning_rate": 5.457413249211356e-07, + "loss": 92.25, + "step": 865 + }, + { + "epoch": 0.008197574805236604, + "grad_norm": 578.6650390625, + "learning_rate": 5.46372239747634e-07, + "loss": 51.2188, + "step": 866 + }, + { + "epoch": 0.008207040826951657, + "grad_norm": 555.2592163085938, + "learning_rate": 5.470031545741325e-07, + "loss": 78.125, + "step": 867 + }, + { + "epoch": 0.00821650684866671, + "grad_norm": 2.4686732292175293, + "learning_rate": 5.476340694006308e-07, + "loss": 0.7295, + "step": 868 + }, + { + "epoch": 0.008225972870381765, + "grad_norm": 713.4580688476562, + "learning_rate": 5.482649842271293e-07, + "loss": 86.9062, + "step": 869 + }, + { + "epoch": 0.008235438892096818, + "grad_norm": 503.7627868652344, + "learning_rate": 5.488958990536278e-07, + "loss": 46.5938, + "step": 870 + }, + { + "epoch": 0.008244904913811872, + "grad_norm": 474.4834899902344, + "learning_rate": 5.495268138801262e-07, + "loss": 101.7188, + "step": 871 + }, + { + "epoch": 0.008254370935526925, + "grad_norm": 918.9407348632812, + "learning_rate": 5.501577287066245e-07, + "loss": 87.5156, + "step": 872 + }, + { + "epoch": 0.00826383695724198, + "grad_norm": 550.7669677734375, + "learning_rate": 5.50788643533123e-07, + "loss": 48.25, + "step": 873 + }, + { + "epoch": 0.008273302978957033, + "grad_norm": 495.71112060546875, + "learning_rate": 5.514195583596214e-07, + "loss": 50.375, + "step": 874 + }, + { + "epoch": 0.008282769000672088, + "grad_norm": 3042.91552734375, + "learning_rate": 5.520504731861198e-07, + "loss": 74.6875, + "step": 875 + }, + { + "epoch": 0.008292235022387141, + "grad_norm": 491.931396484375, + "learning_rate": 5.526813880126183e-07, + "loss": 46.6875, + "step": 876 + }, + { + "epoch": 0.008301701044102196, + "grad_norm": 606.964111328125, + "learning_rate": 5.533123028391167e-07, + "loss": 47.0312, + "step": 877 + }, + { + "epoch": 0.008311167065817249, + "grad_norm": 636.4404907226562, + "learning_rate": 5.539432176656152e-07, + "loss": 48.8438, + "step": 878 + }, + { + "epoch": 0.008320633087532303, + "grad_norm": 735.9544067382812, + "learning_rate": 5.545741324921135e-07, + "loss": 70.9062, + "step": 879 + }, + { + "epoch": 0.008330099109247356, + "grad_norm": 1061.9390869140625, + "learning_rate": 5.55205047318612e-07, + "loss": 63.6562, + "step": 880 + }, + { + "epoch": 0.008339565130962411, + "grad_norm": 535.3318481445312, + "learning_rate": 5.558359621451105e-07, + "loss": 44.4375, + "step": 881 + }, + { + "epoch": 0.008349031152677464, + "grad_norm": 814.4985961914062, + "learning_rate": 5.564668769716087e-07, + "loss": 71.875, + "step": 882 + }, + { + "epoch": 0.008358497174392517, + "grad_norm": 508.14398193359375, + "learning_rate": 5.570977917981072e-07, + "loss": 54.8438, + "step": 883 + }, + { + "epoch": 0.008367963196107572, + "grad_norm": 666.0277099609375, + "learning_rate": 5.577287066246057e-07, + "loss": 64.4688, + "step": 884 + }, + { + "epoch": 0.008377429217822625, + "grad_norm": 540.49951171875, + "learning_rate": 5.58359621451104e-07, + "loss": 51.6562, + "step": 885 + }, + { + "epoch": 0.00838689523953768, + "grad_norm": 467.25811767578125, + "learning_rate": 5.589905362776025e-07, + "loss": 46.3125, + "step": 886 + }, + { + "epoch": 0.008396361261252733, + "grad_norm": 563.1368408203125, + "learning_rate": 5.59621451104101e-07, + "loss": 51.875, + "step": 887 + }, + { + "epoch": 0.008405827282967787, + "grad_norm": 684.3067626953125, + "learning_rate": 5.602523659305994e-07, + "loss": 94.5312, + "step": 888 + }, + { + "epoch": 0.00841529330468284, + "grad_norm": 655.529052734375, + "learning_rate": 5.608832807570978e-07, + "loss": 48.4375, + "step": 889 + }, + { + "epoch": 0.008424759326397895, + "grad_norm": 458.71649169921875, + "learning_rate": 5.615141955835961e-07, + "loss": 49.0312, + "step": 890 + }, + { + "epoch": 0.008434225348112948, + "grad_norm": 1300.093017578125, + "learning_rate": 5.621451104100946e-07, + "loss": 107.0625, + "step": 891 + }, + { + "epoch": 0.008443691369828003, + "grad_norm": 1156.27587890625, + "learning_rate": 5.62776025236593e-07, + "loss": 72.0, + "step": 892 + }, + { + "epoch": 0.008453157391543056, + "grad_norm": 570.901611328125, + "learning_rate": 5.634069400630914e-07, + "loss": 54.2812, + "step": 893 + }, + { + "epoch": 0.00846262341325811, + "grad_norm": 879.7730712890625, + "learning_rate": 5.640378548895899e-07, + "loss": 53.0938, + "step": 894 + }, + { + "epoch": 0.008472089434973164, + "grad_norm": 1132.3984375, + "learning_rate": 5.646687697160883e-07, + "loss": 52.8281, + "step": 895 + }, + { + "epoch": 0.008481555456688218, + "grad_norm": 563.0892333984375, + "learning_rate": 5.652996845425867e-07, + "loss": 49.75, + "step": 896 + }, + { + "epoch": 0.008491021478403271, + "grad_norm": 927.1497802734375, + "learning_rate": 5.659305993690852e-07, + "loss": 69.1406, + "step": 897 + }, + { + "epoch": 0.008500487500118325, + "grad_norm": 746.087158203125, + "learning_rate": 5.665615141955837e-07, + "loss": 103.4062, + "step": 898 + }, + { + "epoch": 0.00850995352183338, + "grad_norm": 516.09912109375, + "learning_rate": 5.671924290220819e-07, + "loss": 53.125, + "step": 899 + }, + { + "epoch": 0.008519419543548432, + "grad_norm": 2.5828592777252197, + "learning_rate": 5.678233438485804e-07, + "loss": 0.8521, + "step": 900 + }, + { + "epoch": 0.008528885565263487, + "grad_norm": 3.1559479236602783, + "learning_rate": 5.684542586750788e-07, + "loss": 0.9404, + "step": 901 + }, + { + "epoch": 0.00853835158697854, + "grad_norm": 661.6016845703125, + "learning_rate": 5.690851735015772e-07, + "loss": 50.3438, + "step": 902 + }, + { + "epoch": 0.008547817608693595, + "grad_norm": 697.524658203125, + "learning_rate": 5.697160883280757e-07, + "loss": 89.8125, + "step": 903 + }, + { + "epoch": 0.008557283630408648, + "grad_norm": 518.76611328125, + "learning_rate": 5.703470031545741e-07, + "loss": 45.2031, + "step": 904 + }, + { + "epoch": 0.008566749652123702, + "grad_norm": 847.5130615234375, + "learning_rate": 5.709779179810725e-07, + "loss": 74.1562, + "step": 905 + }, + { + "epoch": 0.008576215673838756, + "grad_norm": 643.4619140625, + "learning_rate": 5.71608832807571e-07, + "loss": 47.1562, + "step": 906 + }, + { + "epoch": 0.00858568169555381, + "grad_norm": 597.977294921875, + "learning_rate": 5.722397476340694e-07, + "loss": 72.7188, + "step": 907 + }, + { + "epoch": 0.008595147717268863, + "grad_norm": 589.1958618164062, + "learning_rate": 5.728706624605679e-07, + "loss": 41.2969, + "step": 908 + }, + { + "epoch": 0.008604613738983918, + "grad_norm": 540.9647216796875, + "learning_rate": 5.735015772870662e-07, + "loss": 47.0938, + "step": 909 + }, + { + "epoch": 0.008614079760698971, + "grad_norm": 722.0404052734375, + "learning_rate": 5.741324921135646e-07, + "loss": 99.5625, + "step": 910 + }, + { + "epoch": 0.008623545782414024, + "grad_norm": 695.4044189453125, + "learning_rate": 5.747634069400631e-07, + "loss": 49.875, + "step": 911 + }, + { + "epoch": 0.008633011804129079, + "grad_norm": 1577.857421875, + "learning_rate": 5.753943217665614e-07, + "loss": 126.5938, + "step": 912 + }, + { + "epoch": 0.008642477825844132, + "grad_norm": 648.773193359375, + "learning_rate": 5.760252365930599e-07, + "loss": 66.3125, + "step": 913 + }, + { + "epoch": 0.008651943847559187, + "grad_norm": 551.3426513671875, + "learning_rate": 5.766561514195584e-07, + "loss": 51.0312, + "step": 914 + }, + { + "epoch": 0.00866140986927424, + "grad_norm": 997.7872924804688, + "learning_rate": 5.772870662460568e-07, + "loss": 115.5156, + "step": 915 + }, + { + "epoch": 0.008670875890989294, + "grad_norm": 575.0909423828125, + "learning_rate": 5.779179810725552e-07, + "loss": 43.7812, + "step": 916 + }, + { + "epoch": 0.008680341912704347, + "grad_norm": 821.425048828125, + "learning_rate": 5.785488958990537e-07, + "loss": 68.9688, + "step": 917 + }, + { + "epoch": 0.008689807934419402, + "grad_norm": 489.1051330566406, + "learning_rate": 5.79179810725552e-07, + "loss": 43.1562, + "step": 918 + }, + { + "epoch": 0.008699273956134455, + "grad_norm": 413.65753173828125, + "learning_rate": 5.798107255520504e-07, + "loss": 44.5938, + "step": 919 + }, + { + "epoch": 0.00870873997784951, + "grad_norm": 499.95855712890625, + "learning_rate": 5.804416403785489e-07, + "loss": 44.1562, + "step": 920 + }, + { + "epoch": 0.008718205999564563, + "grad_norm": 1091.332763671875, + "learning_rate": 5.810725552050473e-07, + "loss": 57.9688, + "step": 921 + }, + { + "epoch": 0.008727672021279618, + "grad_norm": 863.7023315429688, + "learning_rate": 5.817034700315457e-07, + "loss": 116.75, + "step": 922 + }, + { + "epoch": 0.00873713804299467, + "grad_norm": 2.4957759380340576, + "learning_rate": 5.823343848580441e-07, + "loss": 0.8101, + "step": 923 + }, + { + "epoch": 0.008746604064709725, + "grad_norm": 863.5458374023438, + "learning_rate": 5.829652996845426e-07, + "loss": 106.8906, + "step": 924 + }, + { + "epoch": 0.008756070086424778, + "grad_norm": 572.3380126953125, + "learning_rate": 5.835962145110411e-07, + "loss": 43.4375, + "step": 925 + }, + { + "epoch": 0.008765536108139831, + "grad_norm": 847.18310546875, + "learning_rate": 5.842271293375393e-07, + "loss": 96.6875, + "step": 926 + }, + { + "epoch": 0.008775002129854886, + "grad_norm": 451.4510192871094, + "learning_rate": 5.848580441640378e-07, + "loss": 39.7812, + "step": 927 + }, + { + "epoch": 0.008784468151569939, + "grad_norm": 746.7451171875, + "learning_rate": 5.854889589905363e-07, + "loss": 89.7031, + "step": 928 + }, + { + "epoch": 0.008793934173284994, + "grad_norm": 741.4230346679688, + "learning_rate": 5.861198738170346e-07, + "loss": 75.9688, + "step": 929 + }, + { + "epoch": 0.008803400195000047, + "grad_norm": 886.7265014648438, + "learning_rate": 5.867507886435331e-07, + "loss": 98.6562, + "step": 930 + }, + { + "epoch": 0.008812866216715102, + "grad_norm": 1386.0406494140625, + "learning_rate": 5.873817034700315e-07, + "loss": 121.375, + "step": 931 + }, + { + "epoch": 0.008822332238430155, + "grad_norm": 782.8112182617188, + "learning_rate": 5.880126182965299e-07, + "loss": 45.0625, + "step": 932 + }, + { + "epoch": 0.00883179826014521, + "grad_norm": 834.4720458984375, + "learning_rate": 5.886435331230284e-07, + "loss": 102.625, + "step": 933 + }, + { + "epoch": 0.008841264281860262, + "grad_norm": 562.317626953125, + "learning_rate": 5.892744479495268e-07, + "loss": 47.2188, + "step": 934 + }, + { + "epoch": 0.008850730303575317, + "grad_norm": 570.50390625, + "learning_rate": 5.899053627760253e-07, + "loss": 55.6562, + "step": 935 + }, + { + "epoch": 0.00886019632529037, + "grad_norm": 3540.698974609375, + "learning_rate": 5.905362776025236e-07, + "loss": 100.75, + "step": 936 + }, + { + "epoch": 0.008869662347005425, + "grad_norm": 847.4028930664062, + "learning_rate": 5.91167192429022e-07, + "loss": 120.4375, + "step": 937 + }, + { + "epoch": 0.008879128368720478, + "grad_norm": 480.6512451171875, + "learning_rate": 5.917981072555205e-07, + "loss": 48.5781, + "step": 938 + }, + { + "epoch": 0.008888594390435533, + "grad_norm": 584.4874877929688, + "learning_rate": 5.924290220820189e-07, + "loss": 47.7188, + "step": 939 + }, + { + "epoch": 0.008898060412150586, + "grad_norm": 3.527505874633789, + "learning_rate": 5.930599369085173e-07, + "loss": 0.8545, + "step": 940 + }, + { + "epoch": 0.008907526433865639, + "grad_norm": 511.4902648925781, + "learning_rate": 5.936908517350158e-07, + "loss": 51.1875, + "step": 941 + }, + { + "epoch": 0.008916992455580693, + "grad_norm": 798.3194580078125, + "learning_rate": 5.943217665615141e-07, + "loss": 94.2812, + "step": 942 + }, + { + "epoch": 0.008926458477295746, + "grad_norm": 662.8392333984375, + "learning_rate": 5.949526813880126e-07, + "loss": 83.75, + "step": 943 + }, + { + "epoch": 0.008935924499010801, + "grad_norm": 534.926513671875, + "learning_rate": 5.95583596214511e-07, + "loss": 43.7188, + "step": 944 + }, + { + "epoch": 0.008945390520725854, + "grad_norm": 987.9302368164062, + "learning_rate": 5.962145110410094e-07, + "loss": 68.25, + "step": 945 + }, + { + "epoch": 0.008954856542440909, + "grad_norm": 415.9031982421875, + "learning_rate": 5.968454258675078e-07, + "loss": 49.0938, + "step": 946 + }, + { + "epoch": 0.008964322564155962, + "grad_norm": 812.2520141601562, + "learning_rate": 5.974763406940063e-07, + "loss": 47.4688, + "step": 947 + }, + { + "epoch": 0.008973788585871017, + "grad_norm": 442.0588073730469, + "learning_rate": 5.981072555205047e-07, + "loss": 47.0, + "step": 948 + }, + { + "epoch": 0.00898325460758607, + "grad_norm": 1221.280517578125, + "learning_rate": 5.987381703470031e-07, + "loss": 82.0547, + "step": 949 + }, + { + "epoch": 0.008992720629301124, + "grad_norm": 1087.75439453125, + "learning_rate": 5.993690851735016e-07, + "loss": 47.5938, + "step": 950 + }, + { + "epoch": 0.009002186651016177, + "grad_norm": 514.0580444335938, + "learning_rate": 6e-07, + "loss": 75.5625, + "step": 951 + }, + { + "epoch": 0.009011652672731232, + "grad_norm": 681.3024291992188, + "learning_rate": 6.006309148264985e-07, + "loss": 53.5938, + "step": 952 + }, + { + "epoch": 0.009021118694446285, + "grad_norm": 469.5665283203125, + "learning_rate": 6.012618296529967e-07, + "loss": 44.4375, + "step": 953 + }, + { + "epoch": 0.009030584716161338, + "grad_norm": 648.4255981445312, + "learning_rate": 6.018927444794952e-07, + "loss": 42.375, + "step": 954 + }, + { + "epoch": 0.009040050737876393, + "grad_norm": 1290.115478515625, + "learning_rate": 6.025236593059937e-07, + "loss": 79.6406, + "step": 955 + }, + { + "epoch": 0.009049516759591446, + "grad_norm": 584.070068359375, + "learning_rate": 6.03154574132492e-07, + "loss": 84.875, + "step": 956 + }, + { + "epoch": 0.0090589827813065, + "grad_norm": 1431.1536865234375, + "learning_rate": 6.037854889589905e-07, + "loss": 94.0469, + "step": 957 + }, + { + "epoch": 0.009068448803021554, + "grad_norm": 689.6102905273438, + "learning_rate": 6.04416403785489e-07, + "loss": 114.4062, + "step": 958 + }, + { + "epoch": 0.009077914824736608, + "grad_norm": 832.2657470703125, + "learning_rate": 6.050473186119873e-07, + "loss": 101.1875, + "step": 959 + }, + { + "epoch": 0.009087380846451661, + "grad_norm": 624.6807250976562, + "learning_rate": 6.056782334384858e-07, + "loss": 47.9062, + "step": 960 + }, + { + "epoch": 0.009096846868166716, + "grad_norm": 510.3832092285156, + "learning_rate": 6.063091482649843e-07, + "loss": 50.5938, + "step": 961 + }, + { + "epoch": 0.009106312889881769, + "grad_norm": 1298.28271484375, + "learning_rate": 6.069400630914827e-07, + "loss": 108.5625, + "step": 962 + }, + { + "epoch": 0.009115778911596824, + "grad_norm": 587.4338989257812, + "learning_rate": 6.07570977917981e-07, + "loss": 68.2812, + "step": 963 + }, + { + "epoch": 0.009125244933311877, + "grad_norm": 769.8639526367188, + "learning_rate": 6.082018927444794e-07, + "loss": 48.0, + "step": 964 + }, + { + "epoch": 0.009134710955026932, + "grad_norm": 985.637939453125, + "learning_rate": 6.088328075709779e-07, + "loss": 125.5156, + "step": 965 + }, + { + "epoch": 0.009144176976741985, + "grad_norm": 495.4888916015625, + "learning_rate": 6.094637223974763e-07, + "loss": 52.625, + "step": 966 + }, + { + "epoch": 0.00915364299845704, + "grad_norm": 472.966064453125, + "learning_rate": 6.100946372239747e-07, + "loss": 52.5938, + "step": 967 + }, + { + "epoch": 0.009163109020172092, + "grad_norm": 534.1757202148438, + "learning_rate": 6.107255520504732e-07, + "loss": 64.125, + "step": 968 + }, + { + "epoch": 0.009172575041887145, + "grad_norm": 3.035447597503662, + "learning_rate": 6.113564668769716e-07, + "loss": 0.9189, + "step": 969 + }, + { + "epoch": 0.0091820410636022, + "grad_norm": 850.8175659179688, + "learning_rate": 6.1198738170347e-07, + "loss": 44.0625, + "step": 970 + }, + { + "epoch": 0.009191507085317253, + "grad_norm": 549.1148681640625, + "learning_rate": 6.126182965299685e-07, + "loss": 55.1562, + "step": 971 + }, + { + "epoch": 0.009200973107032308, + "grad_norm": 556.2156372070312, + "learning_rate": 6.132492113564669e-07, + "loss": 44.625, + "step": 972 + }, + { + "epoch": 0.00921043912874736, + "grad_norm": 878.2605590820312, + "learning_rate": 6.138801261829652e-07, + "loss": 106.2188, + "step": 973 + }, + { + "epoch": 0.009219905150462416, + "grad_norm": 731.95263671875, + "learning_rate": 6.145110410094637e-07, + "loss": 99.1562, + "step": 974 + }, + { + "epoch": 0.009229371172177469, + "grad_norm": 2.931413412094116, + "learning_rate": 6.151419558359621e-07, + "loss": 0.9131, + "step": 975 + }, + { + "epoch": 0.009238837193892523, + "grad_norm": 982.4998779296875, + "learning_rate": 6.157728706624605e-07, + "loss": 103.2188, + "step": 976 + }, + { + "epoch": 0.009248303215607576, + "grad_norm": 929.77099609375, + "learning_rate": 6.16403785488959e-07, + "loss": 85.25, + "step": 977 + }, + { + "epoch": 0.009257769237322631, + "grad_norm": 1033.7044677734375, + "learning_rate": 6.170347003154574e-07, + "loss": 47.625, + "step": 978 + }, + { + "epoch": 0.009267235259037684, + "grad_norm": 666.0188598632812, + "learning_rate": 6.176656151419558e-07, + "loss": 50.5, + "step": 979 + }, + { + "epoch": 0.009276701280752739, + "grad_norm": 736.0408935546875, + "learning_rate": 6.182965299684543e-07, + "loss": 65.0469, + "step": 980 + }, + { + "epoch": 0.009286167302467792, + "grad_norm": 823.9521484375, + "learning_rate": 6.189274447949526e-07, + "loss": 59.25, + "step": 981 + }, + { + "epoch": 0.009295633324182845, + "grad_norm": 761.8516845703125, + "learning_rate": 6.195583596214511e-07, + "loss": 47.0938, + "step": 982 + }, + { + "epoch": 0.0093050993458979, + "grad_norm": 554.5966186523438, + "learning_rate": 6.201892744479495e-07, + "loss": 47.6875, + "step": 983 + }, + { + "epoch": 0.009314565367612953, + "grad_norm": 593.66064453125, + "learning_rate": 6.208201892744479e-07, + "loss": 48.8125, + "step": 984 + }, + { + "epoch": 0.009324031389328007, + "grad_norm": 570.67724609375, + "learning_rate": 6.214511041009464e-07, + "loss": 51.25, + "step": 985 + }, + { + "epoch": 0.00933349741104306, + "grad_norm": 453.7633972167969, + "learning_rate": 6.220820189274447e-07, + "loss": 41.4688, + "step": 986 + }, + { + "epoch": 0.009342963432758115, + "grad_norm": 819.5733032226562, + "learning_rate": 6.227129337539432e-07, + "loss": 97.6562, + "step": 987 + }, + { + "epoch": 0.009352429454473168, + "grad_norm": 649.6461791992188, + "learning_rate": 6.233438485804417e-07, + "loss": 46.6562, + "step": 988 + }, + { + "epoch": 0.009361895476188223, + "grad_norm": 448.1849365234375, + "learning_rate": 6.2397476340694e-07, + "loss": 74.6875, + "step": 989 + }, + { + "epoch": 0.009371361497903276, + "grad_norm": 1169.21337890625, + "learning_rate": 6.246056782334384e-07, + "loss": 88.0312, + "step": 990 + }, + { + "epoch": 0.00938082751961833, + "grad_norm": 488.2369384765625, + "learning_rate": 6.252365930599369e-07, + "loss": 49.0625, + "step": 991 + }, + { + "epoch": 0.009390293541333384, + "grad_norm": 577.0293579101562, + "learning_rate": 6.258675078864353e-07, + "loss": 62.4688, + "step": 992 + }, + { + "epoch": 0.009399759563048438, + "grad_norm": 549.228515625, + "learning_rate": 6.264984227129337e-07, + "loss": 49.125, + "step": 993 + }, + { + "epoch": 0.009409225584763491, + "grad_norm": 421.6153564453125, + "learning_rate": 6.271293375394322e-07, + "loss": 44.6094, + "step": 994 + }, + { + "epoch": 0.009418691606478546, + "grad_norm": 583.7643432617188, + "learning_rate": 6.277602523659306e-07, + "loss": 47.8906, + "step": 995 + }, + { + "epoch": 0.009428157628193599, + "grad_norm": 895.9959716796875, + "learning_rate": 6.28391167192429e-07, + "loss": 76.8125, + "step": 996 + }, + { + "epoch": 0.009437623649908652, + "grad_norm": 665.9041748046875, + "learning_rate": 6.290220820189274e-07, + "loss": 45.75, + "step": 997 + }, + { + "epoch": 0.009447089671623707, + "grad_norm": 1264.355224609375, + "learning_rate": 6.296529968454259e-07, + "loss": 43.5625, + "step": 998 + }, + { + "epoch": 0.00945655569333876, + "grad_norm": 999.9818725585938, + "learning_rate": 6.302839116719243e-07, + "loss": 115.0625, + "step": 999 + }, + { + "epoch": 0.009466021715053815, + "grad_norm": 428.024169921875, + "learning_rate": 6.309148264984226e-07, + "loss": 40.1094, + "step": 1000 + }, + { + "epoch": 0.009475487736768868, + "grad_norm": 572.3364868164062, + "learning_rate": 6.315457413249211e-07, + "loss": 67.5312, + "step": 1001 + }, + { + "epoch": 0.009484953758483922, + "grad_norm": 787.1051025390625, + "learning_rate": 6.321766561514196e-07, + "loss": 64.4688, + "step": 1002 + }, + { + "epoch": 0.009494419780198975, + "grad_norm": 640.351318359375, + "learning_rate": 6.328075709779179e-07, + "loss": 62.4375, + "step": 1003 + }, + { + "epoch": 0.00950388580191403, + "grad_norm": 687.2269287109375, + "learning_rate": 6.334384858044164e-07, + "loss": 52.1719, + "step": 1004 + }, + { + "epoch": 0.009513351823629083, + "grad_norm": 819.5322875976562, + "learning_rate": 6.340694006309149e-07, + "loss": 58.9375, + "step": 1005 + }, + { + "epoch": 0.009522817845344138, + "grad_norm": 813.7279052734375, + "learning_rate": 6.347003154574132e-07, + "loss": 64.6094, + "step": 1006 + }, + { + "epoch": 0.00953228386705919, + "grad_norm": 2.957379102706909, + "learning_rate": 6.353312302839117e-07, + "loss": 0.8623, + "step": 1007 + }, + { + "epoch": 0.009541749888774246, + "grad_norm": 562.7893676757812, + "learning_rate": 6.3596214511041e-07, + "loss": 44.9375, + "step": 1008 + }, + { + "epoch": 0.009551215910489299, + "grad_norm": 380.1034851074219, + "learning_rate": 6.365930599369085e-07, + "loss": 38.7969, + "step": 1009 + }, + { + "epoch": 0.009560681932204353, + "grad_norm": 3.1068406105041504, + "learning_rate": 6.372239747634069e-07, + "loss": 1.0342, + "step": 1010 + }, + { + "epoch": 0.009570147953919406, + "grad_norm": 1024.3533935546875, + "learning_rate": 6.378548895899053e-07, + "loss": 102.1562, + "step": 1011 + }, + { + "epoch": 0.00957961397563446, + "grad_norm": 684.3878784179688, + "learning_rate": 6.384858044164038e-07, + "loss": 71.5156, + "step": 1012 + }, + { + "epoch": 0.009589079997349514, + "grad_norm": 2.9818782806396484, + "learning_rate": 6.391167192429022e-07, + "loss": 0.9092, + "step": 1013 + }, + { + "epoch": 0.009598546019064567, + "grad_norm": 596.9166870117188, + "learning_rate": 6.397476340694006e-07, + "loss": 41.9688, + "step": 1014 + }, + { + "epoch": 0.009608012040779622, + "grad_norm": 3.332792043685913, + "learning_rate": 6.403785488958991e-07, + "loss": 0.9287, + "step": 1015 + }, + { + "epoch": 0.009617478062494675, + "grad_norm": 2.767293930053711, + "learning_rate": 6.410094637223973e-07, + "loss": 0.877, + "step": 1016 + }, + { + "epoch": 0.00962694408420973, + "grad_norm": 592.91455078125, + "learning_rate": 6.416403785488958e-07, + "loss": 42.3594, + "step": 1017 + }, + { + "epoch": 0.009636410105924783, + "grad_norm": 734.4127197265625, + "learning_rate": 6.422712933753943e-07, + "loss": 44.2188, + "step": 1018 + }, + { + "epoch": 0.009645876127639837, + "grad_norm": 1181.5130615234375, + "learning_rate": 6.429022082018927e-07, + "loss": 127.9062, + "step": 1019 + }, + { + "epoch": 0.00965534214935489, + "grad_norm": 396.13238525390625, + "learning_rate": 6.435331230283911e-07, + "loss": 42.4062, + "step": 1020 + }, + { + "epoch": 0.009664808171069945, + "grad_norm": 1417.650390625, + "learning_rate": 6.441640378548896e-07, + "loss": 118.2812, + "step": 1021 + }, + { + "epoch": 0.009674274192784998, + "grad_norm": 438.482177734375, + "learning_rate": 6.44794952681388e-07, + "loss": 48.875, + "step": 1022 + }, + { + "epoch": 0.009683740214500053, + "grad_norm": 525.5158081054688, + "learning_rate": 6.454258675078864e-07, + "loss": 44.8594, + "step": 1023 + }, + { + "epoch": 0.009693206236215106, + "grad_norm": 583.6134643554688, + "learning_rate": 6.460567823343849e-07, + "loss": 35.5312, + "step": 1024 + }, + { + "epoch": 0.009702672257930159, + "grad_norm": 447.4403991699219, + "learning_rate": 6.466876971608833e-07, + "loss": 44.3125, + "step": 1025 + }, + { + "epoch": 0.009712138279645214, + "grad_norm": 903.1251831054688, + "learning_rate": 6.473186119873816e-07, + "loss": 83.375, + "step": 1026 + }, + { + "epoch": 0.009721604301360267, + "grad_norm": 806.1966552734375, + "learning_rate": 6.4794952681388e-07, + "loss": 50.0, + "step": 1027 + }, + { + "epoch": 0.009731070323075321, + "grad_norm": 593.6306762695312, + "learning_rate": 6.485804416403785e-07, + "loss": 49.625, + "step": 1028 + }, + { + "epoch": 0.009740536344790374, + "grad_norm": 959.9777221679688, + "learning_rate": 6.49211356466877e-07, + "loss": 42.2656, + "step": 1029 + }, + { + "epoch": 0.009750002366505429, + "grad_norm": 1276.7330322265625, + "learning_rate": 6.498422712933753e-07, + "loss": 124.3438, + "step": 1030 + }, + { + "epoch": 0.009759468388220482, + "grad_norm": 804.0978393554688, + "learning_rate": 6.504731861198738e-07, + "loss": 109.4688, + "step": 1031 + }, + { + "epoch": 0.009768934409935537, + "grad_norm": 447.599853515625, + "learning_rate": 6.511041009463723e-07, + "loss": 48.4062, + "step": 1032 + }, + { + "epoch": 0.00977840043165059, + "grad_norm": 432.9711608886719, + "learning_rate": 6.517350157728706e-07, + "loss": 41.75, + "step": 1033 + }, + { + "epoch": 0.009787866453365645, + "grad_norm": 518.04443359375, + "learning_rate": 6.523659305993691e-07, + "loss": 56.0, + "step": 1034 + }, + { + "epoch": 0.009797332475080698, + "grad_norm": 723.4774169921875, + "learning_rate": 6.529968454258675e-07, + "loss": 46.4219, + "step": 1035 + }, + { + "epoch": 0.009806798496795752, + "grad_norm": 474.6865234375, + "learning_rate": 6.536277602523658e-07, + "loss": 45.0938, + "step": 1036 + }, + { + "epoch": 0.009816264518510805, + "grad_norm": 815.59130859375, + "learning_rate": 6.542586750788643e-07, + "loss": 82.7969, + "step": 1037 + }, + { + "epoch": 0.00982573054022586, + "grad_norm": 906.5792846679688, + "learning_rate": 6.548895899053627e-07, + "loss": 45.0469, + "step": 1038 + }, + { + "epoch": 0.009835196561940913, + "grad_norm": 522.0941162109375, + "learning_rate": 6.555205047318612e-07, + "loss": 42.75, + "step": 1039 + }, + { + "epoch": 0.009844662583655966, + "grad_norm": 695.6386108398438, + "learning_rate": 6.561514195583596e-07, + "loss": 83.3125, + "step": 1040 + }, + { + "epoch": 0.00985412860537102, + "grad_norm": 620.719482421875, + "learning_rate": 6.56782334384858e-07, + "loss": 56.7188, + "step": 1041 + }, + { + "epoch": 0.009863594627086074, + "grad_norm": 441.2978820800781, + "learning_rate": 6.574132492113565e-07, + "loss": 46.1562, + "step": 1042 + }, + { + "epoch": 0.009873060648801129, + "grad_norm": 986.527587890625, + "learning_rate": 6.580441640378549e-07, + "loss": 104.6875, + "step": 1043 + }, + { + "epoch": 0.009882526670516182, + "grad_norm": 2.6964478492736816, + "learning_rate": 6.586750788643532e-07, + "loss": 0.8887, + "step": 1044 + }, + { + "epoch": 0.009891992692231236, + "grad_norm": 652.0984497070312, + "learning_rate": 6.593059936908517e-07, + "loss": 99.0938, + "step": 1045 + }, + { + "epoch": 0.00990145871394629, + "grad_norm": 792.8449096679688, + "learning_rate": 6.599369085173502e-07, + "loss": 46.0, + "step": 1046 + }, + { + "epoch": 0.009910924735661344, + "grad_norm": 765.668701171875, + "learning_rate": 6.605678233438485e-07, + "loss": 71.8281, + "step": 1047 + }, + { + "epoch": 0.009920390757376397, + "grad_norm": 852.4647827148438, + "learning_rate": 6.61198738170347e-07, + "loss": 97.8438, + "step": 1048 + }, + { + "epoch": 0.009929856779091452, + "grad_norm": 835.20849609375, + "learning_rate": 6.618296529968454e-07, + "loss": 52.375, + "step": 1049 + }, + { + "epoch": 0.009939322800806505, + "grad_norm": 740.9287109375, + "learning_rate": 6.624605678233438e-07, + "loss": 74.25, + "step": 1050 + }, + { + "epoch": 0.00994878882252156, + "grad_norm": 5165.1748046875, + "learning_rate": 6.630914826498423e-07, + "loss": 91.4062, + "step": 1051 + }, + { + "epoch": 0.009958254844236613, + "grad_norm": 453.8856506347656, + "learning_rate": 6.637223974763407e-07, + "loss": 45.1875, + "step": 1052 + }, + { + "epoch": 0.009967720865951666, + "grad_norm": 350.9989318847656, + "learning_rate": 6.64353312302839e-07, + "loss": 42.4688, + "step": 1053 + }, + { + "epoch": 0.00997718688766672, + "grad_norm": 557.787353515625, + "learning_rate": 6.649842271293375e-07, + "loss": 46.4062, + "step": 1054 + }, + { + "epoch": 0.009986652909381773, + "grad_norm": 643.2884521484375, + "learning_rate": 6.656151419558359e-07, + "loss": 37.3438, + "step": 1055 + }, + { + "epoch": 0.009996118931096828, + "grad_norm": 954.6444091796875, + "learning_rate": 6.662460567823344e-07, + "loss": 46.625, + "step": 1056 + }, + { + "epoch": 0.010005584952811881, + "grad_norm": 669.3492431640625, + "learning_rate": 6.668769716088328e-07, + "loss": 60.4062, + "step": 1057 + }, + { + "epoch": 0.010015050974526936, + "grad_norm": 579.873291015625, + "learning_rate": 6.675078864353312e-07, + "loss": 54.5625, + "step": 1058 + }, + { + "epoch": 0.010024516996241989, + "grad_norm": 1750.781005859375, + "learning_rate": 6.681388012618297e-07, + "loss": 47.7812, + "step": 1059 + }, + { + "epoch": 0.010033983017957044, + "grad_norm": 1044.7310791015625, + "learning_rate": 6.68769716088328e-07, + "loss": 114.0312, + "step": 1060 + }, + { + "epoch": 0.010043449039672097, + "grad_norm": 433.9689025878906, + "learning_rate": 6.694006309148265e-07, + "loss": 38.7656, + "step": 1061 + }, + { + "epoch": 0.010052915061387151, + "grad_norm": 641.3822631835938, + "learning_rate": 6.70031545741325e-07, + "loss": 111.5, + "step": 1062 + }, + { + "epoch": 0.010062381083102204, + "grad_norm": 369.2008361816406, + "learning_rate": 6.706624605678232e-07, + "loss": 42.9688, + "step": 1063 + }, + { + "epoch": 0.010071847104817259, + "grad_norm": 2.9918320178985596, + "learning_rate": 6.712933753943217e-07, + "loss": 0.9243, + "step": 1064 + }, + { + "epoch": 0.010081313126532312, + "grad_norm": 659.8385009765625, + "learning_rate": 6.719242902208202e-07, + "loss": 121.625, + "step": 1065 + }, + { + "epoch": 0.010090779148247367, + "grad_norm": 725.3938598632812, + "learning_rate": 6.725552050473186e-07, + "loss": 63.4688, + "step": 1066 + }, + { + "epoch": 0.01010024516996242, + "grad_norm": 815.791259765625, + "learning_rate": 6.73186119873817e-07, + "loss": 63.0625, + "step": 1067 + }, + { + "epoch": 0.010109711191677473, + "grad_norm": 1127.51953125, + "learning_rate": 6.738170347003155e-07, + "loss": 69.9688, + "step": 1068 + }, + { + "epoch": 0.010119177213392528, + "grad_norm": 634.3098754882812, + "learning_rate": 6.744479495268139e-07, + "loss": 52.4375, + "step": 1069 + }, + { + "epoch": 0.01012864323510758, + "grad_norm": 472.3537902832031, + "learning_rate": 6.750788643533123e-07, + "loss": 43.25, + "step": 1070 + }, + { + "epoch": 0.010138109256822635, + "grad_norm": 632.0870971679688, + "learning_rate": 6.757097791798106e-07, + "loss": 108.5938, + "step": 1071 + }, + { + "epoch": 0.010147575278537688, + "grad_norm": 658.1650390625, + "learning_rate": 6.763406940063091e-07, + "loss": 73.8125, + "step": 1072 + }, + { + "epoch": 0.010157041300252743, + "grad_norm": 710.2623901367188, + "learning_rate": 6.769716088328075e-07, + "loss": 73.25, + "step": 1073 + }, + { + "epoch": 0.010166507321967796, + "grad_norm": 454.2070007324219, + "learning_rate": 6.776025236593059e-07, + "loss": 38.875, + "step": 1074 + }, + { + "epoch": 0.010175973343682851, + "grad_norm": 1152.6356201171875, + "learning_rate": 6.782334384858044e-07, + "loss": 111.2188, + "step": 1075 + }, + { + "epoch": 0.010185439365397904, + "grad_norm": 490.9013977050781, + "learning_rate": 6.788643533123029e-07, + "loss": 58.125, + "step": 1076 + }, + { + "epoch": 0.010194905387112959, + "grad_norm": 629.836669921875, + "learning_rate": 6.794952681388012e-07, + "loss": 65.4375, + "step": 1077 + }, + { + "epoch": 0.010204371408828012, + "grad_norm": 380.32220458984375, + "learning_rate": 6.801261829652997e-07, + "loss": 43.25, + "step": 1078 + }, + { + "epoch": 0.010213837430543066, + "grad_norm": 964.2496337890625, + "learning_rate": 6.807570977917982e-07, + "loss": 33.5781, + "step": 1079 + }, + { + "epoch": 0.01022330345225812, + "grad_norm": 880.822998046875, + "learning_rate": 6.813880126182964e-07, + "loss": 43.4062, + "step": 1080 + }, + { + "epoch": 0.010232769473973174, + "grad_norm": 682.4675903320312, + "learning_rate": 6.820189274447949e-07, + "loss": 46.0, + "step": 1081 + }, + { + "epoch": 0.010242235495688227, + "grad_norm": 619.7885131835938, + "learning_rate": 6.826498422712933e-07, + "loss": 66.7188, + "step": 1082 + }, + { + "epoch": 0.01025170151740328, + "grad_norm": 450.420654296875, + "learning_rate": 6.832807570977917e-07, + "loss": 39.6875, + "step": 1083 + }, + { + "epoch": 0.010261167539118335, + "grad_norm": 504.1923828125, + "learning_rate": 6.839116719242902e-07, + "loss": 48.6875, + "step": 1084 + }, + { + "epoch": 0.010270633560833388, + "grad_norm": 409.48089599609375, + "learning_rate": 6.845425867507886e-07, + "loss": 42.0156, + "step": 1085 + }, + { + "epoch": 0.010280099582548443, + "grad_norm": 737.5615844726562, + "learning_rate": 6.851735015772871e-07, + "loss": 65.1094, + "step": 1086 + }, + { + "epoch": 0.010289565604263496, + "grad_norm": 463.4221496582031, + "learning_rate": 6.858044164037855e-07, + "loss": 45.7188, + "step": 1087 + }, + { + "epoch": 0.01029903162597855, + "grad_norm": 621.4237670898438, + "learning_rate": 6.864353312302839e-07, + "loss": 84.3438, + "step": 1088 + }, + { + "epoch": 0.010308497647693603, + "grad_norm": 1241.0513916015625, + "learning_rate": 6.870662460567823e-07, + "loss": 76.0156, + "step": 1089 + }, + { + "epoch": 0.010317963669408658, + "grad_norm": 465.3297424316406, + "learning_rate": 6.876971608832807e-07, + "loss": 37.4375, + "step": 1090 + }, + { + "epoch": 0.010327429691123711, + "grad_norm": 506.2282409667969, + "learning_rate": 6.883280757097791e-07, + "loss": 45.9375, + "step": 1091 + }, + { + "epoch": 0.010336895712838766, + "grad_norm": 517.960205078125, + "learning_rate": 6.889589905362776e-07, + "loss": 41.4688, + "step": 1092 + }, + { + "epoch": 0.010346361734553819, + "grad_norm": 538.6528930664062, + "learning_rate": 6.895899053627759e-07, + "loss": 85.8438, + "step": 1093 + }, + { + "epoch": 0.010355827756268874, + "grad_norm": 751.6373901367188, + "learning_rate": 6.902208201892744e-07, + "loss": 56.7656, + "step": 1094 + }, + { + "epoch": 0.010365293777983927, + "grad_norm": 767.9102172851562, + "learning_rate": 6.908517350157729e-07, + "loss": 89.2188, + "step": 1095 + }, + { + "epoch": 0.01037475979969898, + "grad_norm": 627.80517578125, + "learning_rate": 6.914826498422713e-07, + "loss": 41.7812, + "step": 1096 + }, + { + "epoch": 0.010384225821414034, + "grad_norm": 619.3834228515625, + "learning_rate": 6.921135646687697e-07, + "loss": 86.2812, + "step": 1097 + }, + { + "epoch": 0.010393691843129087, + "grad_norm": 1102.9801025390625, + "learning_rate": 6.927444794952681e-07, + "loss": 61.7188, + "step": 1098 + }, + { + "epoch": 0.010403157864844142, + "grad_norm": 776.8458862304688, + "learning_rate": 6.933753943217665e-07, + "loss": 87.7656, + "step": 1099 + }, + { + "epoch": 0.010412623886559195, + "grad_norm": 602.0908813476562, + "learning_rate": 6.940063091482649e-07, + "loss": 44.7812, + "step": 1100 + }, + { + "epoch": 0.01042208990827425, + "grad_norm": 575.8475952148438, + "learning_rate": 6.946372239747633e-07, + "loss": 52.9844, + "step": 1101 + }, + { + "epoch": 0.010431555929989303, + "grad_norm": 616.6809692382812, + "learning_rate": 6.952681388012618e-07, + "loss": 65.9062, + "step": 1102 + }, + { + "epoch": 0.010441021951704358, + "grad_norm": 521.318115234375, + "learning_rate": 6.958990536277603e-07, + "loss": 45.8906, + "step": 1103 + }, + { + "epoch": 0.01045048797341941, + "grad_norm": 612.0499267578125, + "learning_rate": 6.965299684542586e-07, + "loss": 49.9844, + "step": 1104 + }, + { + "epoch": 0.010459953995134465, + "grad_norm": 798.2891845703125, + "learning_rate": 6.971608832807571e-07, + "loss": 92.0625, + "step": 1105 + }, + { + "epoch": 0.010469420016849518, + "grad_norm": 961.2871704101562, + "learning_rate": 6.977917981072556e-07, + "loss": 103.5, + "step": 1106 + }, + { + "epoch": 0.010478886038564573, + "grad_norm": 480.06219482421875, + "learning_rate": 6.984227129337538e-07, + "loss": 42.3438, + "step": 1107 + }, + { + "epoch": 0.010488352060279626, + "grad_norm": 650.8759155273438, + "learning_rate": 6.990536277602523e-07, + "loss": 57.6406, + "step": 1108 + }, + { + "epoch": 0.010497818081994681, + "grad_norm": 764.945068359375, + "learning_rate": 6.996845425867508e-07, + "loss": 33.8125, + "step": 1109 + }, + { + "epoch": 0.010507284103709734, + "grad_norm": 823.4776611328125, + "learning_rate": 7.003154574132491e-07, + "loss": 61.5625, + "step": 1110 + }, + { + "epoch": 0.010516750125424787, + "grad_norm": 829.9736938476562, + "learning_rate": 7.009463722397476e-07, + "loss": 56.9375, + "step": 1111 + }, + { + "epoch": 0.010526216147139842, + "grad_norm": 438.975830078125, + "learning_rate": 7.01577287066246e-07, + "loss": 45.9688, + "step": 1112 + }, + { + "epoch": 0.010535682168854895, + "grad_norm": 858.3340454101562, + "learning_rate": 7.022082018927445e-07, + "loss": 44.6875, + "step": 1113 + }, + { + "epoch": 0.01054514819056995, + "grad_norm": 583.1493530273438, + "learning_rate": 7.028391167192429e-07, + "loss": 72.8125, + "step": 1114 + }, + { + "epoch": 0.010554614212285002, + "grad_norm": 483.8931579589844, + "learning_rate": 7.034700315457413e-07, + "loss": 50.7969, + "step": 1115 + }, + { + "epoch": 0.010564080234000057, + "grad_norm": 623.7691040039062, + "learning_rate": 7.041009463722397e-07, + "loss": 43.1875, + "step": 1116 + }, + { + "epoch": 0.01057354625571511, + "grad_norm": 540.7147827148438, + "learning_rate": 7.047318611987381e-07, + "loss": 102.75, + "step": 1117 + }, + { + "epoch": 0.010583012277430165, + "grad_norm": 378.2498779296875, + "learning_rate": 7.053627760252365e-07, + "loss": 38.1562, + "step": 1118 + }, + { + "epoch": 0.010592478299145218, + "grad_norm": 3.1447324752807617, + "learning_rate": 7.05993690851735e-07, + "loss": 0.8916, + "step": 1119 + }, + { + "epoch": 0.010601944320860273, + "grad_norm": 871.4137573242188, + "learning_rate": 7.066246056782334e-07, + "loss": 122.0625, + "step": 1120 + }, + { + "epoch": 0.010611410342575326, + "grad_norm": 780.8139038085938, + "learning_rate": 7.072555205047318e-07, + "loss": 49.2188, + "step": 1121 + }, + { + "epoch": 0.01062087636429038, + "grad_norm": 704.7964477539062, + "learning_rate": 7.078864353312303e-07, + "loss": 88.9375, + "step": 1122 + }, + { + "epoch": 0.010630342386005433, + "grad_norm": 693.3671264648438, + "learning_rate": 7.085173501577287e-07, + "loss": 44.7188, + "step": 1123 + }, + { + "epoch": 0.010639808407720486, + "grad_norm": 1275.65625, + "learning_rate": 7.091482649842271e-07, + "loss": 98.6562, + "step": 1124 + }, + { + "epoch": 0.010649274429435541, + "grad_norm": 491.3037109375, + "learning_rate": 7.097791798107255e-07, + "loss": 63.9375, + "step": 1125 + }, + { + "epoch": 0.010658740451150594, + "grad_norm": 417.89508056640625, + "learning_rate": 7.104100946372239e-07, + "loss": 44.9062, + "step": 1126 + }, + { + "epoch": 0.010668206472865649, + "grad_norm": 407.1385192871094, + "learning_rate": 7.110410094637223e-07, + "loss": 38.9062, + "step": 1127 + }, + { + "epoch": 0.010677672494580702, + "grad_norm": 1325.8424072265625, + "learning_rate": 7.116719242902208e-07, + "loss": 115.625, + "step": 1128 + }, + { + "epoch": 0.010687138516295757, + "grad_norm": 2.8936243057250977, + "learning_rate": 7.123028391167192e-07, + "loss": 1.0005, + "step": 1129 + }, + { + "epoch": 0.01069660453801081, + "grad_norm": 796.4735717773438, + "learning_rate": 7.129337539432176e-07, + "loss": 48.8125, + "step": 1130 + }, + { + "epoch": 0.010706070559725864, + "grad_norm": 617.4832763671875, + "learning_rate": 7.135646687697161e-07, + "loss": 85.75, + "step": 1131 + }, + { + "epoch": 0.010715536581440917, + "grad_norm": 768.2022094726562, + "learning_rate": 7.141955835962145e-07, + "loss": 40.5156, + "step": 1132 + }, + { + "epoch": 0.010725002603155972, + "grad_norm": 429.1643981933594, + "learning_rate": 7.14826498422713e-07, + "loss": 46.7344, + "step": 1133 + }, + { + "epoch": 0.010734468624871025, + "grad_norm": 627.5374145507812, + "learning_rate": 7.154574132492112e-07, + "loss": 88.75, + "step": 1134 + }, + { + "epoch": 0.01074393464658608, + "grad_norm": 535.9691162109375, + "learning_rate": 7.160883280757097e-07, + "loss": 44.9688, + "step": 1135 + }, + { + "epoch": 0.010753400668301133, + "grad_norm": 2.791865110397339, + "learning_rate": 7.167192429022082e-07, + "loss": 0.9404, + "step": 1136 + }, + { + "epoch": 0.010762866690016188, + "grad_norm": 972.8465576171875, + "learning_rate": 7.173501577287065e-07, + "loss": 91.875, + "step": 1137 + }, + { + "epoch": 0.01077233271173124, + "grad_norm": 998.28271484375, + "learning_rate": 7.17981072555205e-07, + "loss": 130.375, + "step": 1138 + }, + { + "epoch": 0.010781798733446294, + "grad_norm": 687.6041870117188, + "learning_rate": 7.186119873817035e-07, + "loss": 68.375, + "step": 1139 + }, + { + "epoch": 0.010791264755161348, + "grad_norm": 599.0660400390625, + "learning_rate": 7.192429022082019e-07, + "loss": 60.7812, + "step": 1140 + }, + { + "epoch": 0.010800730776876401, + "grad_norm": 440.9027404785156, + "learning_rate": 7.198738170347003e-07, + "loss": 42.1719, + "step": 1141 + }, + { + "epoch": 0.010810196798591456, + "grad_norm": 566.9161376953125, + "learning_rate": 7.205047318611988e-07, + "loss": 58.25, + "step": 1142 + }, + { + "epoch": 0.01081966282030651, + "grad_norm": 738.4607543945312, + "learning_rate": 7.211356466876971e-07, + "loss": 85.1875, + "step": 1143 + }, + { + "epoch": 0.010829128842021564, + "grad_norm": 523.1826782226562, + "learning_rate": 7.217665615141955e-07, + "loss": 55.0625, + "step": 1144 + }, + { + "epoch": 0.010838594863736617, + "grad_norm": 1281.4342041015625, + "learning_rate": 7.223974763406939e-07, + "loss": 97.5, + "step": 1145 + }, + { + "epoch": 0.010848060885451672, + "grad_norm": 3.028278112411499, + "learning_rate": 7.230283911671924e-07, + "loss": 0.8862, + "step": 1146 + }, + { + "epoch": 0.010857526907166725, + "grad_norm": 497.7133483886719, + "learning_rate": 7.236593059936908e-07, + "loss": 43.875, + "step": 1147 + }, + { + "epoch": 0.01086699292888178, + "grad_norm": 605.82080078125, + "learning_rate": 7.242902208201892e-07, + "loss": 43.4062, + "step": 1148 + }, + { + "epoch": 0.010876458950596832, + "grad_norm": 1414.20166015625, + "learning_rate": 7.249211356466877e-07, + "loss": 90.5938, + "step": 1149 + }, + { + "epoch": 0.010885924972311887, + "grad_norm": 2.4926888942718506, + "learning_rate": 7.255520504731862e-07, + "loss": 0.8311, + "step": 1150 + }, + { + "epoch": 0.01089539099402694, + "grad_norm": 844.3794555664062, + "learning_rate": 7.261829652996845e-07, + "loss": 92.9688, + "step": 1151 + }, + { + "epoch": 0.010904857015741995, + "grad_norm": 964.7125244140625, + "learning_rate": 7.26813880126183e-07, + "loss": 66.2188, + "step": 1152 + }, + { + "epoch": 0.010914323037457048, + "grad_norm": 480.13623046875, + "learning_rate": 7.274447949526814e-07, + "loss": 46.0469, + "step": 1153 + }, + { + "epoch": 0.010923789059172101, + "grad_norm": 380.8642578125, + "learning_rate": 7.280757097791797e-07, + "loss": 39.7969, + "step": 1154 + }, + { + "epoch": 0.010933255080887156, + "grad_norm": 587.11962890625, + "learning_rate": 7.287066246056782e-07, + "loss": 55.3594, + "step": 1155 + }, + { + "epoch": 0.010942721102602209, + "grad_norm": 395.0832214355469, + "learning_rate": 7.293375394321766e-07, + "loss": 68.5312, + "step": 1156 + }, + { + "epoch": 0.010952187124317263, + "grad_norm": 361.3826599121094, + "learning_rate": 7.29968454258675e-07, + "loss": 37.6562, + "step": 1157 + }, + { + "epoch": 0.010961653146032316, + "grad_norm": 542.1301879882812, + "learning_rate": 7.305993690851735e-07, + "loss": 67.5625, + "step": 1158 + }, + { + "epoch": 0.010971119167747371, + "grad_norm": 416.7520751953125, + "learning_rate": 7.312302839116719e-07, + "loss": 43.9062, + "step": 1159 + }, + { + "epoch": 0.010980585189462424, + "grad_norm": 668.8477172851562, + "learning_rate": 7.318611987381704e-07, + "loss": 86.1094, + "step": 1160 + }, + { + "epoch": 0.010990051211177479, + "grad_norm": 483.09503173828125, + "learning_rate": 7.324921135646687e-07, + "loss": 47.8125, + "step": 1161 + }, + { + "epoch": 0.010999517232892532, + "grad_norm": 793.0693359375, + "learning_rate": 7.331230283911671e-07, + "loss": 94.75, + "step": 1162 + }, + { + "epoch": 0.011008983254607587, + "grad_norm": 389.770263671875, + "learning_rate": 7.337539432176656e-07, + "loss": 44.7031, + "step": 1163 + }, + { + "epoch": 0.01101844927632264, + "grad_norm": 929.1907958984375, + "learning_rate": 7.34384858044164e-07, + "loss": 69.8438, + "step": 1164 + }, + { + "epoch": 0.011027915298037694, + "grad_norm": 1012.8470458984375, + "learning_rate": 7.350157728706624e-07, + "loss": 43.5625, + "step": 1165 + }, + { + "epoch": 0.011037381319752747, + "grad_norm": 511.25439453125, + "learning_rate": 7.356466876971609e-07, + "loss": 42.7031, + "step": 1166 + }, + { + "epoch": 0.0110468473414678, + "grad_norm": 550.285888671875, + "learning_rate": 7.362776025236592e-07, + "loss": 64.5625, + "step": 1167 + }, + { + "epoch": 0.011056313363182855, + "grad_norm": 1324.77392578125, + "learning_rate": 7.369085173501577e-07, + "loss": 104.7188, + "step": 1168 + }, + { + "epoch": 0.011065779384897908, + "grad_norm": 2.3348450660705566, + "learning_rate": 7.375394321766562e-07, + "loss": 0.7603, + "step": 1169 + }, + { + "epoch": 0.011075245406612963, + "grad_norm": 684.0159301757812, + "learning_rate": 7.381703470031545e-07, + "loss": 42.6719, + "step": 1170 + }, + { + "epoch": 0.011084711428328016, + "grad_norm": 575.6126708984375, + "learning_rate": 7.388012618296529e-07, + "loss": 69.6875, + "step": 1171 + }, + { + "epoch": 0.01109417745004307, + "grad_norm": 617.8461303710938, + "learning_rate": 7.394321766561514e-07, + "loss": 68.5938, + "step": 1172 + }, + { + "epoch": 0.011103643471758124, + "grad_norm": 983.6828002929688, + "learning_rate": 7.400630914826498e-07, + "loss": 100.4375, + "step": 1173 + }, + { + "epoch": 0.011113109493473178, + "grad_norm": 647.299560546875, + "learning_rate": 7.406940063091482e-07, + "loss": 51.1562, + "step": 1174 + }, + { + "epoch": 0.011122575515188231, + "grad_norm": 674.8086547851562, + "learning_rate": 7.413249211356467e-07, + "loss": 58.2812, + "step": 1175 + }, + { + "epoch": 0.011132041536903286, + "grad_norm": 575.8084106445312, + "learning_rate": 7.419558359621451e-07, + "loss": 72.3438, + "step": 1176 + }, + { + "epoch": 0.01114150755861834, + "grad_norm": 361.0384216308594, + "learning_rate": 7.425867507886436e-07, + "loss": 39.9688, + "step": 1177 + }, + { + "epoch": 0.011150973580333394, + "grad_norm": 546.4700927734375, + "learning_rate": 7.432176656151419e-07, + "loss": 49.4219, + "step": 1178 + }, + { + "epoch": 0.011160439602048447, + "grad_norm": 941.0468139648438, + "learning_rate": 7.438485804416403e-07, + "loss": 73.9688, + "step": 1179 + }, + { + "epoch": 0.011169905623763502, + "grad_norm": 547.6505737304688, + "learning_rate": 7.444794952681388e-07, + "loss": 59.125, + "step": 1180 + }, + { + "epoch": 0.011179371645478555, + "grad_norm": 570.5455322265625, + "learning_rate": 7.451104100946371e-07, + "loss": 89.0938, + "step": 1181 + }, + { + "epoch": 0.011188837667193608, + "grad_norm": 1102.265625, + "learning_rate": 7.457413249211356e-07, + "loss": 45.2344, + "step": 1182 + }, + { + "epoch": 0.011198303688908662, + "grad_norm": 761.554443359375, + "learning_rate": 7.463722397476341e-07, + "loss": 42.4688, + "step": 1183 + }, + { + "epoch": 0.011207769710623715, + "grad_norm": 823.8197631835938, + "learning_rate": 7.470031545741324e-07, + "loss": 51.4375, + "step": 1184 + }, + { + "epoch": 0.01121723573233877, + "grad_norm": 791.961181640625, + "learning_rate": 7.476340694006309e-07, + "loss": 101.2969, + "step": 1185 + }, + { + "epoch": 0.011226701754053823, + "grad_norm": 641.5358276367188, + "learning_rate": 7.482649842271294e-07, + "loss": 80.0, + "step": 1186 + }, + { + "epoch": 0.011236167775768878, + "grad_norm": 3.2808167934417725, + "learning_rate": 7.488958990536278e-07, + "loss": 0.9468, + "step": 1187 + }, + { + "epoch": 0.011245633797483931, + "grad_norm": 584.5095825195312, + "learning_rate": 7.495268138801261e-07, + "loss": 83.0625, + "step": 1188 + }, + { + "epoch": 0.011255099819198986, + "grad_norm": 727.7489624023438, + "learning_rate": 7.501577287066245e-07, + "loss": 78.5625, + "step": 1189 + }, + { + "epoch": 0.011264565840914039, + "grad_norm": 525.2669677734375, + "learning_rate": 7.50788643533123e-07, + "loss": 83.0625, + "step": 1190 + }, + { + "epoch": 0.011274031862629093, + "grad_norm": 1470.11328125, + "learning_rate": 7.514195583596214e-07, + "loss": 62.875, + "step": 1191 + }, + { + "epoch": 0.011283497884344146, + "grad_norm": 655.149169921875, + "learning_rate": 7.520504731861198e-07, + "loss": 88.0625, + "step": 1192 + }, + { + "epoch": 0.011292963906059201, + "grad_norm": 701.13525390625, + "learning_rate": 7.526813880126183e-07, + "loss": 75.25, + "step": 1193 + }, + { + "epoch": 0.011302429927774254, + "grad_norm": 634.4474487304688, + "learning_rate": 7.533123028391167e-07, + "loss": 63.9688, + "step": 1194 + }, + { + "epoch": 0.011311895949489309, + "grad_norm": 1587.64111328125, + "learning_rate": 7.539432176656151e-07, + "loss": 74.2812, + "step": 1195 + }, + { + "epoch": 0.011321361971204362, + "grad_norm": 776.825439453125, + "learning_rate": 7.545741324921136e-07, + "loss": 40.4688, + "step": 1196 + }, + { + "epoch": 0.011330827992919415, + "grad_norm": 2.978459119796753, + "learning_rate": 7.55205047318612e-07, + "loss": 0.9155, + "step": 1197 + }, + { + "epoch": 0.01134029401463447, + "grad_norm": 1323.5599365234375, + "learning_rate": 7.558359621451103e-07, + "loss": 106.2969, + "step": 1198 + }, + { + "epoch": 0.011349760036349523, + "grad_norm": 997.7550048828125, + "learning_rate": 7.564668769716088e-07, + "loss": 63.9062, + "step": 1199 + }, + { + "epoch": 0.011359226058064577, + "grad_norm": 925.4991455078125, + "learning_rate": 7.570977917981072e-07, + "loss": 88.2812, + "step": 1200 + }, + { + "epoch": 0.01136869207977963, + "grad_norm": 714.1985473632812, + "learning_rate": 7.577287066246056e-07, + "loss": 46.2969, + "step": 1201 + }, + { + "epoch": 0.011378158101494685, + "grad_norm": 908.1348266601562, + "learning_rate": 7.583596214511041e-07, + "loss": 64.4219, + "step": 1202 + }, + { + "epoch": 0.011387624123209738, + "grad_norm": 503.49932861328125, + "learning_rate": 7.589905362776025e-07, + "loss": 48.9531, + "step": 1203 + }, + { + "epoch": 0.011397090144924793, + "grad_norm": 611.954345703125, + "learning_rate": 7.596214511041009e-07, + "loss": 69.125, + "step": 1204 + }, + { + "epoch": 0.011406556166639846, + "grad_norm": 471.5843505859375, + "learning_rate": 7.602523659305994e-07, + "loss": 45.8438, + "step": 1205 + }, + { + "epoch": 0.0114160221883549, + "grad_norm": 890.114013671875, + "learning_rate": 7.608832807570977e-07, + "loss": 50.8438, + "step": 1206 + }, + { + "epoch": 0.011425488210069954, + "grad_norm": 467.24603271484375, + "learning_rate": 7.615141955835962e-07, + "loss": 42.0, + "step": 1207 + }, + { + "epoch": 0.011434954231785008, + "grad_norm": 3.9368038177490234, + "learning_rate": 7.621451104100945e-07, + "loss": 0.7314, + "step": 1208 + }, + { + "epoch": 0.011444420253500062, + "grad_norm": 1478.703369140625, + "learning_rate": 7.62776025236593e-07, + "loss": 108.2812, + "step": 1209 + }, + { + "epoch": 0.011453886275215115, + "grad_norm": 935.6073608398438, + "learning_rate": 7.634069400630915e-07, + "loss": 111.5781, + "step": 1210 + }, + { + "epoch": 0.01146335229693017, + "grad_norm": 661.17041015625, + "learning_rate": 7.640378548895898e-07, + "loss": 54.5938, + "step": 1211 + }, + { + "epoch": 0.011472818318645222, + "grad_norm": 535.427001953125, + "learning_rate": 7.646687697160883e-07, + "loss": 86.625, + "step": 1212 + }, + { + "epoch": 0.011482284340360277, + "grad_norm": 519.889892578125, + "learning_rate": 7.652996845425868e-07, + "loss": 76.875, + "step": 1213 + }, + { + "epoch": 0.01149175036207533, + "grad_norm": 917.4583740234375, + "learning_rate": 7.659305993690851e-07, + "loss": 86.3125, + "step": 1214 + }, + { + "epoch": 0.011501216383790385, + "grad_norm": 595.05322265625, + "learning_rate": 7.665615141955835e-07, + "loss": 42.0938, + "step": 1215 + }, + { + "epoch": 0.011510682405505438, + "grad_norm": 1580.8245849609375, + "learning_rate": 7.67192429022082e-07, + "loss": 88.6562, + "step": 1216 + }, + { + "epoch": 0.011520148427220493, + "grad_norm": 655.0455322265625, + "learning_rate": 7.678233438485804e-07, + "loss": 64.0, + "step": 1217 + }, + { + "epoch": 0.011529614448935546, + "grad_norm": 361.3646545410156, + "learning_rate": 7.684542586750788e-07, + "loss": 36.5625, + "step": 1218 + }, + { + "epoch": 0.0115390804706506, + "grad_norm": 475.9730529785156, + "learning_rate": 7.690851735015772e-07, + "loss": 47.5781, + "step": 1219 + }, + { + "epoch": 0.011548546492365653, + "grad_norm": 847.6423950195312, + "learning_rate": 7.697160883280757e-07, + "loss": 97.4062, + "step": 1220 + }, + { + "epoch": 0.011558012514080708, + "grad_norm": 2.8572347164154053, + "learning_rate": 7.703470031545741e-07, + "loss": 0.8721, + "step": 1221 + }, + { + "epoch": 0.011567478535795761, + "grad_norm": 628.3573608398438, + "learning_rate": 7.709779179810725e-07, + "loss": 46.9375, + "step": 1222 + }, + { + "epoch": 0.011576944557510816, + "grad_norm": 706.1398315429688, + "learning_rate": 7.71608832807571e-07, + "loss": 61.8125, + "step": 1223 + }, + { + "epoch": 0.011586410579225869, + "grad_norm": 695.4225463867188, + "learning_rate": 7.722397476340695e-07, + "loss": 105.375, + "step": 1224 + }, + { + "epoch": 0.011595876600940922, + "grad_norm": 593.7330932617188, + "learning_rate": 7.728706624605677e-07, + "loss": 78.1562, + "step": 1225 + }, + { + "epoch": 0.011605342622655977, + "grad_norm": 437.4502868652344, + "learning_rate": 7.735015772870662e-07, + "loss": 34.5156, + "step": 1226 + }, + { + "epoch": 0.01161480864437103, + "grad_norm": 506.0677795410156, + "learning_rate": 7.741324921135647e-07, + "loss": 54.5, + "step": 1227 + }, + { + "epoch": 0.011624274666086084, + "grad_norm": 2275.670166015625, + "learning_rate": 7.74763406940063e-07, + "loss": 95.8438, + "step": 1228 + }, + { + "epoch": 0.011633740687801137, + "grad_norm": 1117.0675048828125, + "learning_rate": 7.753943217665615e-07, + "loss": 72.2188, + "step": 1229 + }, + { + "epoch": 0.011643206709516192, + "grad_norm": 492.5444030761719, + "learning_rate": 7.760252365930599e-07, + "loss": 37.5938, + "step": 1230 + }, + { + "epoch": 0.011652672731231245, + "grad_norm": 479.2723693847656, + "learning_rate": 7.766561514195583e-07, + "loss": 42.4531, + "step": 1231 + }, + { + "epoch": 0.0116621387529463, + "grad_norm": 686.6377563476562, + "learning_rate": 7.772870662460568e-07, + "loss": 99.6875, + "step": 1232 + }, + { + "epoch": 0.011671604774661353, + "grad_norm": 597.7495727539062, + "learning_rate": 7.779179810725552e-07, + "loss": 49.8125, + "step": 1233 + }, + { + "epoch": 0.011681070796376408, + "grad_norm": 582.8693237304688, + "learning_rate": 7.785488958990536e-07, + "loss": 41.1875, + "step": 1234 + }, + { + "epoch": 0.01169053681809146, + "grad_norm": 992.7817993164062, + "learning_rate": 7.79179810725552e-07, + "loss": 122.8125, + "step": 1235 + }, + { + "epoch": 0.011700002839806515, + "grad_norm": 617.5075073242188, + "learning_rate": 7.798107255520504e-07, + "loss": 60.4688, + "step": 1236 + }, + { + "epoch": 0.011709468861521568, + "grad_norm": 474.9856262207031, + "learning_rate": 7.804416403785489e-07, + "loss": 50.1719, + "step": 1237 + }, + { + "epoch": 0.011718934883236621, + "grad_norm": 723.9612426757812, + "learning_rate": 7.810725552050473e-07, + "loss": 100.1875, + "step": 1238 + }, + { + "epoch": 0.011728400904951676, + "grad_norm": 916.9580688476562, + "learning_rate": 7.817034700315457e-07, + "loss": 81.7812, + "step": 1239 + }, + { + "epoch": 0.011737866926666729, + "grad_norm": 1040.59326171875, + "learning_rate": 7.823343848580442e-07, + "loss": 76.2344, + "step": 1240 + }, + { + "epoch": 0.011747332948381784, + "grad_norm": 549.6194458007812, + "learning_rate": 7.829652996845425e-07, + "loss": 40.5312, + "step": 1241 + }, + { + "epoch": 0.011756798970096837, + "grad_norm": 1106.5904541015625, + "learning_rate": 7.83596214511041e-07, + "loss": 42.125, + "step": 1242 + }, + { + "epoch": 0.011766264991811892, + "grad_norm": 488.5059509277344, + "learning_rate": 7.842271293375394e-07, + "loss": 47.9062, + "step": 1243 + }, + { + "epoch": 0.011775731013526945, + "grad_norm": 781.1760864257812, + "learning_rate": 7.848580441640378e-07, + "loss": 91.6562, + "step": 1244 + }, + { + "epoch": 0.011785197035242, + "grad_norm": 523.8766479492188, + "learning_rate": 7.854889589905362e-07, + "loss": 54.9062, + "step": 1245 + }, + { + "epoch": 0.011794663056957052, + "grad_norm": 1125.2108154296875, + "learning_rate": 7.861198738170347e-07, + "loss": 87.2188, + "step": 1246 + }, + { + "epoch": 0.011804129078672107, + "grad_norm": 2.768118381500244, + "learning_rate": 7.867507886435331e-07, + "loss": 0.7827, + "step": 1247 + }, + { + "epoch": 0.01181359510038716, + "grad_norm": 487.3508605957031, + "learning_rate": 7.873817034700315e-07, + "loss": 68.125, + "step": 1248 + }, + { + "epoch": 0.011823061122102215, + "grad_norm": 490.568359375, + "learning_rate": 7.8801261829653e-07, + "loss": 38.125, + "step": 1249 + }, + { + "epoch": 0.011832527143817268, + "grad_norm": 578.349365234375, + "learning_rate": 7.886435331230284e-07, + "loss": 49.2812, + "step": 1250 + }, + { + "epoch": 0.011841993165532323, + "grad_norm": 605.2449340820312, + "learning_rate": 7.892744479495268e-07, + "loss": 89.5938, + "step": 1251 + }, + { + "epoch": 0.011851459187247376, + "grad_norm": 977.90380859375, + "learning_rate": 7.899053627760251e-07, + "loss": 69.9844, + "step": 1252 + }, + { + "epoch": 0.011860925208962429, + "grad_norm": 812.7798461914062, + "learning_rate": 7.905362776025236e-07, + "loss": 49.125, + "step": 1253 + }, + { + "epoch": 0.011870391230677483, + "grad_norm": 558.04443359375, + "learning_rate": 7.911671924290221e-07, + "loss": 50.4375, + "step": 1254 + }, + { + "epoch": 0.011879857252392536, + "grad_norm": 642.2891845703125, + "learning_rate": 7.917981072555204e-07, + "loss": 45.0625, + "step": 1255 + }, + { + "epoch": 0.011889323274107591, + "grad_norm": 840.6593627929688, + "learning_rate": 7.924290220820189e-07, + "loss": 42.2188, + "step": 1256 + }, + { + "epoch": 0.011898789295822644, + "grad_norm": 524.13427734375, + "learning_rate": 7.930599369085174e-07, + "loss": 46.8438, + "step": 1257 + }, + { + "epoch": 0.011908255317537699, + "grad_norm": 942.5008544921875, + "learning_rate": 7.936908517350157e-07, + "loss": 49.25, + "step": 1258 + }, + { + "epoch": 0.011917721339252752, + "grad_norm": 468.6698913574219, + "learning_rate": 7.943217665615142e-07, + "loss": 51.7812, + "step": 1259 + }, + { + "epoch": 0.011927187360967807, + "grad_norm": 406.6846923828125, + "learning_rate": 7.949526813880127e-07, + "loss": 37.7031, + "step": 1260 + }, + { + "epoch": 0.01193665338268286, + "grad_norm": 668.6524658203125, + "learning_rate": 7.955835962145109e-07, + "loss": 92.0, + "step": 1261 + }, + { + "epoch": 0.011946119404397914, + "grad_norm": 502.8438415527344, + "learning_rate": 7.962145110410094e-07, + "loss": 90.5938, + "step": 1262 + }, + { + "epoch": 0.011955585426112967, + "grad_norm": 669.2996215820312, + "learning_rate": 7.968454258675078e-07, + "loss": 48.2188, + "step": 1263 + }, + { + "epoch": 0.011965051447828022, + "grad_norm": 1613.38330078125, + "learning_rate": 7.974763406940063e-07, + "loss": 92.8594, + "step": 1264 + }, + { + "epoch": 0.011974517469543075, + "grad_norm": 358.9454040527344, + "learning_rate": 7.981072555205047e-07, + "loss": 40.5156, + "step": 1265 + }, + { + "epoch": 0.01198398349125813, + "grad_norm": 888.8532104492188, + "learning_rate": 7.987381703470031e-07, + "loss": 49.625, + "step": 1266 + }, + { + "epoch": 0.011993449512973183, + "grad_norm": 728.9982299804688, + "learning_rate": 7.993690851735016e-07, + "loss": 89.8906, + "step": 1267 + }, + { + "epoch": 0.012002915534688236, + "grad_norm": 413.3031921386719, + "learning_rate": 8e-07, + "loss": 49.0469, + "step": 1268 + }, + { + "epoch": 0.01201238155640329, + "grad_norm": 2.8767576217651367, + "learning_rate": 8.006309148264984e-07, + "loss": 0.7988, + "step": 1269 + }, + { + "epoch": 0.012021847578118344, + "grad_norm": 708.7467651367188, + "learning_rate": 8.012618296529968e-07, + "loss": 85.375, + "step": 1270 + }, + { + "epoch": 0.012031313599833398, + "grad_norm": 487.2876281738281, + "learning_rate": 8.018927444794953e-07, + "loss": 39.0781, + "step": 1271 + }, + { + "epoch": 0.012040779621548451, + "grad_norm": 1376.2908935546875, + "learning_rate": 8.025236593059936e-07, + "loss": 81.5469, + "step": 1272 + }, + { + "epoch": 0.012050245643263506, + "grad_norm": 429.0411682128906, + "learning_rate": 8.031545741324921e-07, + "loss": 38.6875, + "step": 1273 + }, + { + "epoch": 0.012059711664978559, + "grad_norm": 897.2138061523438, + "learning_rate": 8.037854889589905e-07, + "loss": 45.9062, + "step": 1274 + }, + { + "epoch": 0.012069177686693614, + "grad_norm": 506.9822692871094, + "learning_rate": 8.044164037854889e-07, + "loss": 51.5938, + "step": 1275 + }, + { + "epoch": 0.012078643708408667, + "grad_norm": 1107.029296875, + "learning_rate": 8.050473186119874e-07, + "loss": 46.6719, + "step": 1276 + }, + { + "epoch": 0.012088109730123722, + "grad_norm": 507.7869567871094, + "learning_rate": 8.056782334384858e-07, + "loss": 38.375, + "step": 1277 + }, + { + "epoch": 0.012097575751838775, + "grad_norm": 460.1737976074219, + "learning_rate": 8.063091482649842e-07, + "loss": 38.375, + "step": 1278 + }, + { + "epoch": 0.01210704177355383, + "grad_norm": 721.6969604492188, + "learning_rate": 8.069400630914826e-07, + "loss": 45.375, + "step": 1279 + }, + { + "epoch": 0.012116507795268882, + "grad_norm": 656.0482788085938, + "learning_rate": 8.07570977917981e-07, + "loss": 80.4062, + "step": 1280 + }, + { + "epoch": 0.012125973816983935, + "grad_norm": 603.2412109375, + "learning_rate": 8.082018927444795e-07, + "loss": 78.125, + "step": 1281 + }, + { + "epoch": 0.01213543983869899, + "grad_norm": 719.6988525390625, + "learning_rate": 8.088328075709778e-07, + "loss": 43.0312, + "step": 1282 + }, + { + "epoch": 0.012144905860414043, + "grad_norm": 660.5941162109375, + "learning_rate": 8.094637223974763e-07, + "loss": 42.8906, + "step": 1283 + }, + { + "epoch": 0.012154371882129098, + "grad_norm": 505.85272216796875, + "learning_rate": 8.100946372239748e-07, + "loss": 36.7344, + "step": 1284 + }, + { + "epoch": 0.01216383790384415, + "grad_norm": 783.5394287109375, + "learning_rate": 8.107255520504731e-07, + "loss": 80.5156, + "step": 1285 + }, + { + "epoch": 0.012173303925559206, + "grad_norm": 330.84259033203125, + "learning_rate": 8.113564668769716e-07, + "loss": 38.0625, + "step": 1286 + }, + { + "epoch": 0.012182769947274259, + "grad_norm": 688.4213256835938, + "learning_rate": 8.119873817034701e-07, + "loss": 47.6875, + "step": 1287 + }, + { + "epoch": 0.012192235968989313, + "grad_norm": 428.7878112792969, + "learning_rate": 8.126182965299683e-07, + "loss": 38.2188, + "step": 1288 + }, + { + "epoch": 0.012201701990704366, + "grad_norm": 389.74810791015625, + "learning_rate": 8.132492113564668e-07, + "loss": 43.1406, + "step": 1289 + }, + { + "epoch": 0.012211168012419421, + "grad_norm": 693.2640380859375, + "learning_rate": 8.138801261829653e-07, + "loss": 53.3594, + "step": 1290 + }, + { + "epoch": 0.012220634034134474, + "grad_norm": 742.7010498046875, + "learning_rate": 8.145110410094637e-07, + "loss": 63.6719, + "step": 1291 + }, + { + "epoch": 0.012230100055849529, + "grad_norm": 472.6009521484375, + "learning_rate": 8.151419558359621e-07, + "loss": 48.9062, + "step": 1292 + }, + { + "epoch": 0.012239566077564582, + "grad_norm": 420.6577453613281, + "learning_rate": 8.157728706624605e-07, + "loss": 45.3594, + "step": 1293 + }, + { + "epoch": 0.012249032099279637, + "grad_norm": 2.9607489109039307, + "learning_rate": 8.16403785488959e-07, + "loss": 0.9351, + "step": 1294 + }, + { + "epoch": 0.01225849812099469, + "grad_norm": 619.4861450195312, + "learning_rate": 8.170347003154574e-07, + "loss": 47.875, + "step": 1295 + }, + { + "epoch": 0.012267964142709743, + "grad_norm": 450.12255859375, + "learning_rate": 8.176656151419558e-07, + "loss": 38.8906, + "step": 1296 + }, + { + "epoch": 0.012277430164424797, + "grad_norm": 394.38055419921875, + "learning_rate": 8.182965299684542e-07, + "loss": 40.3125, + "step": 1297 + }, + { + "epoch": 0.01228689618613985, + "grad_norm": 549.5364379882812, + "learning_rate": 8.189274447949526e-07, + "loss": 42.25, + "step": 1298 + }, + { + "epoch": 0.012296362207854905, + "grad_norm": 674.5668334960938, + "learning_rate": 8.19558359621451e-07, + "loss": 75.0312, + "step": 1299 + }, + { + "epoch": 0.012305828229569958, + "grad_norm": 1077.7392578125, + "learning_rate": 8.201892744479495e-07, + "loss": 79.1562, + "step": 1300 + }, + { + "epoch": 0.012315294251285013, + "grad_norm": 1165.7916259765625, + "learning_rate": 8.20820189274448e-07, + "loss": 105.7188, + "step": 1301 + }, + { + "epoch": 0.012324760273000066, + "grad_norm": 993.7110595703125, + "learning_rate": 8.214511041009463e-07, + "loss": 80.625, + "step": 1302 + }, + { + "epoch": 0.01233422629471512, + "grad_norm": 1974.6302490234375, + "learning_rate": 8.220820189274448e-07, + "loss": 96.4688, + "step": 1303 + }, + { + "epoch": 0.012343692316430174, + "grad_norm": 988.7732543945312, + "learning_rate": 8.227129337539432e-07, + "loss": 58.0781, + "step": 1304 + }, + { + "epoch": 0.012353158338145228, + "grad_norm": 535.5286865234375, + "learning_rate": 8.233438485804416e-07, + "loss": 40.1406, + "step": 1305 + }, + { + "epoch": 0.012362624359860281, + "grad_norm": 486.5096435546875, + "learning_rate": 8.2397476340694e-07, + "loss": 47.8594, + "step": 1306 + }, + { + "epoch": 0.012372090381575336, + "grad_norm": 428.5718994140625, + "learning_rate": 8.246056782334384e-07, + "loss": 36.9844, + "step": 1307 + }, + { + "epoch": 0.012381556403290389, + "grad_norm": 1061.1573486328125, + "learning_rate": 8.252365930599368e-07, + "loss": 101.9688, + "step": 1308 + }, + { + "epoch": 0.012391022425005442, + "grad_norm": 472.0103759765625, + "learning_rate": 8.258675078864353e-07, + "loss": 35.1875, + "step": 1309 + }, + { + "epoch": 0.012400488446720497, + "grad_norm": 1277.217041015625, + "learning_rate": 8.264984227129337e-07, + "loss": 70.3594, + "step": 1310 + }, + { + "epoch": 0.01240995446843555, + "grad_norm": 368.2793884277344, + "learning_rate": 8.271293375394322e-07, + "loss": 39.6562, + "step": 1311 + }, + { + "epoch": 0.012419420490150605, + "grad_norm": 463.2733459472656, + "learning_rate": 8.277602523659306e-07, + "loss": 46.7031, + "step": 1312 + }, + { + "epoch": 0.012428886511865658, + "grad_norm": 2406.74365234375, + "learning_rate": 8.28391167192429e-07, + "loss": 47.9531, + "step": 1313 + }, + { + "epoch": 0.012438352533580712, + "grad_norm": 2.376315116882324, + "learning_rate": 8.290220820189275e-07, + "loss": 0.8843, + "step": 1314 + }, + { + "epoch": 0.012447818555295765, + "grad_norm": 415.8637390136719, + "learning_rate": 8.296529968454257e-07, + "loss": 38.0312, + "step": 1315 + }, + { + "epoch": 0.01245728457701082, + "grad_norm": 1135.4820556640625, + "learning_rate": 8.302839116719242e-07, + "loss": 94.0312, + "step": 1316 + }, + { + "epoch": 0.012466750598725873, + "grad_norm": 812.2321166992188, + "learning_rate": 8.309148264984227e-07, + "loss": 56.75, + "step": 1317 + }, + { + "epoch": 0.012476216620440928, + "grad_norm": 678.1378173828125, + "learning_rate": 8.31545741324921e-07, + "loss": 43.1562, + "step": 1318 + }, + { + "epoch": 0.01248568264215598, + "grad_norm": 1250.24462890625, + "learning_rate": 8.321766561514195e-07, + "loss": 54.25, + "step": 1319 + }, + { + "epoch": 0.012495148663871036, + "grad_norm": 918.5830688476562, + "learning_rate": 8.32807570977918e-07, + "loss": 79.6406, + "step": 1320 + }, + { + "epoch": 0.012504614685586089, + "grad_norm": 443.50384521484375, + "learning_rate": 8.334384858044164e-07, + "loss": 67.25, + "step": 1321 + }, + { + "epoch": 0.012514080707301143, + "grad_norm": 466.285400390625, + "learning_rate": 8.340694006309148e-07, + "loss": 67.625, + "step": 1322 + }, + { + "epoch": 0.012523546729016196, + "grad_norm": 1024.94775390625, + "learning_rate": 8.347003154574133e-07, + "loss": 96.7188, + "step": 1323 + }, + { + "epoch": 0.01253301275073125, + "grad_norm": 481.89703369140625, + "learning_rate": 8.353312302839116e-07, + "loss": 39.4688, + "step": 1324 + }, + { + "epoch": 0.012542478772446304, + "grad_norm": 1057.9022216796875, + "learning_rate": 8.3596214511041e-07, + "loss": 110.4375, + "step": 1325 + }, + { + "epoch": 0.012551944794161357, + "grad_norm": 468.0632629394531, + "learning_rate": 8.365930599369084e-07, + "loss": 45.0, + "step": 1326 + }, + { + "epoch": 0.012561410815876412, + "grad_norm": 2.9193124771118164, + "learning_rate": 8.372239747634069e-07, + "loss": 0.8169, + "step": 1327 + }, + { + "epoch": 0.012570876837591465, + "grad_norm": 565.5068969726562, + "learning_rate": 8.378548895899054e-07, + "loss": 44.7812, + "step": 1328 + }, + { + "epoch": 0.01258034285930652, + "grad_norm": 569.8988647460938, + "learning_rate": 8.384858044164037e-07, + "loss": 52.9219, + "step": 1329 + }, + { + "epoch": 0.012589808881021573, + "grad_norm": 623.792724609375, + "learning_rate": 8.391167192429022e-07, + "loss": 46.1094, + "step": 1330 + }, + { + "epoch": 0.012599274902736627, + "grad_norm": 1470.1363525390625, + "learning_rate": 8.397476340694007e-07, + "loss": 53.5156, + "step": 1331 + }, + { + "epoch": 0.01260874092445168, + "grad_norm": 454.1531677246094, + "learning_rate": 8.40378548895899e-07, + "loss": 38.1562, + "step": 1332 + }, + { + "epoch": 0.012618206946166735, + "grad_norm": 574.9443969726562, + "learning_rate": 8.410094637223974e-07, + "loss": 88.0312, + "step": 1333 + }, + { + "epoch": 0.012627672967881788, + "grad_norm": 791.2825317382812, + "learning_rate": 8.416403785488959e-07, + "loss": 65.25, + "step": 1334 + }, + { + "epoch": 0.012637138989596843, + "grad_norm": 478.2488098144531, + "learning_rate": 8.422712933753942e-07, + "loss": 38.7969, + "step": 1335 + }, + { + "epoch": 0.012646605011311896, + "grad_norm": 1005.2443237304688, + "learning_rate": 8.429022082018927e-07, + "loss": 57.3125, + "step": 1336 + }, + { + "epoch": 0.01265607103302695, + "grad_norm": 534.0778198242188, + "learning_rate": 8.435331230283911e-07, + "loss": 39.5625, + "step": 1337 + }, + { + "epoch": 0.012665537054742004, + "grad_norm": 920.1619262695312, + "learning_rate": 8.441640378548896e-07, + "loss": 90.9375, + "step": 1338 + }, + { + "epoch": 0.012675003076457057, + "grad_norm": 766.330322265625, + "learning_rate": 8.44794952681388e-07, + "loss": 42.0625, + "step": 1339 + }, + { + "epoch": 0.012684469098172111, + "grad_norm": 478.56695556640625, + "learning_rate": 8.454258675078864e-07, + "loss": 38.6875, + "step": 1340 + }, + { + "epoch": 0.012693935119887164, + "grad_norm": 671.4730834960938, + "learning_rate": 8.460567823343849e-07, + "loss": 53.9375, + "step": 1341 + }, + { + "epoch": 0.012703401141602219, + "grad_norm": 524.099853515625, + "learning_rate": 8.466876971608832e-07, + "loss": 37.6875, + "step": 1342 + }, + { + "epoch": 0.012712867163317272, + "grad_norm": 765.1116333007812, + "learning_rate": 8.473186119873816e-07, + "loss": 81.7188, + "step": 1343 + }, + { + "epoch": 0.012722333185032327, + "grad_norm": 480.188720703125, + "learning_rate": 8.479495268138801e-07, + "loss": 51.4688, + "step": 1344 + }, + { + "epoch": 0.01273179920674738, + "grad_norm": 532.4927978515625, + "learning_rate": 8.485804416403785e-07, + "loss": 38.0625, + "step": 1345 + }, + { + "epoch": 0.012741265228462435, + "grad_norm": 2.4093666076660156, + "learning_rate": 8.492113564668769e-07, + "loss": 0.7751, + "step": 1346 + }, + { + "epoch": 0.012750731250177488, + "grad_norm": 951.308349609375, + "learning_rate": 8.498422712933754e-07, + "loss": 73.5781, + "step": 1347 + }, + { + "epoch": 0.012760197271892542, + "grad_norm": 530.938232421875, + "learning_rate": 8.504731861198738e-07, + "loss": 40.7344, + "step": 1348 + }, + { + "epoch": 0.012769663293607595, + "grad_norm": 737.6542358398438, + "learning_rate": 8.511041009463722e-07, + "loss": 48.4062, + "step": 1349 + }, + { + "epoch": 0.01277912931532265, + "grad_norm": 485.5086975097656, + "learning_rate": 8.517350157728707e-07, + "loss": 39.7344, + "step": 1350 + }, + { + "epoch": 0.012788595337037703, + "grad_norm": 1785.971923828125, + "learning_rate": 8.52365930599369e-07, + "loss": 58.1094, + "step": 1351 + }, + { + "epoch": 0.012798061358752756, + "grad_norm": 465.22564697265625, + "learning_rate": 8.529968454258674e-07, + "loss": 37.1875, + "step": 1352 + }, + { + "epoch": 0.012807527380467811, + "grad_norm": 939.3936157226562, + "learning_rate": 8.536277602523659e-07, + "loss": 87.8438, + "step": 1353 + }, + { + "epoch": 0.012816993402182864, + "grad_norm": 545.9471435546875, + "learning_rate": 8.542586750788643e-07, + "loss": 39.1875, + "step": 1354 + }, + { + "epoch": 0.012826459423897919, + "grad_norm": 334.21453857421875, + "learning_rate": 8.548895899053627e-07, + "loss": 40.6875, + "step": 1355 + }, + { + "epoch": 0.012835925445612972, + "grad_norm": 2.8116471767425537, + "learning_rate": 8.555205047318612e-07, + "loss": 0.8408, + "step": 1356 + }, + { + "epoch": 0.012845391467328026, + "grad_norm": 507.2449035644531, + "learning_rate": 8.561514195583596e-07, + "loss": 40.8438, + "step": 1357 + }, + { + "epoch": 0.01285485748904308, + "grad_norm": 2.6533560752868652, + "learning_rate": 8.567823343848581e-07, + "loss": 0.8555, + "step": 1358 + }, + { + "epoch": 0.012864323510758134, + "grad_norm": 903.3565063476562, + "learning_rate": 8.574132492113564e-07, + "loss": 71.5938, + "step": 1359 + }, + { + "epoch": 0.012873789532473187, + "grad_norm": 573.8442993164062, + "learning_rate": 8.580441640378548e-07, + "loss": 78.5312, + "step": 1360 + }, + { + "epoch": 0.012883255554188242, + "grad_norm": 661.5220947265625, + "learning_rate": 8.586750788643533e-07, + "loss": 41.4375, + "step": 1361 + }, + { + "epoch": 0.012892721575903295, + "grad_norm": 1836.662109375, + "learning_rate": 8.593059936908516e-07, + "loss": 35.9219, + "step": 1362 + }, + { + "epoch": 0.01290218759761835, + "grad_norm": 547.4818725585938, + "learning_rate": 8.599369085173501e-07, + "loss": 46.7344, + "step": 1363 + }, + { + "epoch": 0.012911653619333403, + "grad_norm": 782.3678588867188, + "learning_rate": 8.605678233438486e-07, + "loss": 72.3594, + "step": 1364 + }, + { + "epoch": 0.012921119641048457, + "grad_norm": 649.7166137695312, + "learning_rate": 8.61198738170347e-07, + "loss": 51.3906, + "step": 1365 + }, + { + "epoch": 0.01293058566276351, + "grad_norm": 1104.708740234375, + "learning_rate": 8.618296529968454e-07, + "loss": 120.9375, + "step": 1366 + }, + { + "epoch": 0.012940051684478563, + "grad_norm": 456.02386474609375, + "learning_rate": 8.624605678233438e-07, + "loss": 41.2812, + "step": 1367 + }, + { + "epoch": 0.012949517706193618, + "grad_norm": 400.4045715332031, + "learning_rate": 8.630914826498423e-07, + "loss": 52.9688, + "step": 1368 + }, + { + "epoch": 0.012958983727908671, + "grad_norm": 888.3413696289062, + "learning_rate": 8.637223974763406e-07, + "loss": 42.6094, + "step": 1369 + }, + { + "epoch": 0.012968449749623726, + "grad_norm": 652.0835571289062, + "learning_rate": 8.64353312302839e-07, + "loss": 49.9688, + "step": 1370 + }, + { + "epoch": 0.012977915771338779, + "grad_norm": 1159.310791015625, + "learning_rate": 8.649842271293375e-07, + "loss": 91.0938, + "step": 1371 + }, + { + "epoch": 0.012987381793053834, + "grad_norm": 745.7328491210938, + "learning_rate": 8.656151419558359e-07, + "loss": 86.375, + "step": 1372 + }, + { + "epoch": 0.012996847814768887, + "grad_norm": 763.663330078125, + "learning_rate": 8.662460567823343e-07, + "loss": 72.75, + "step": 1373 + }, + { + "epoch": 0.013006313836483941, + "grad_norm": 507.6583251953125, + "learning_rate": 8.668769716088328e-07, + "loss": 36.0, + "step": 1374 + }, + { + "epoch": 0.013015779858198994, + "grad_norm": 623.54443359375, + "learning_rate": 8.675078864353313e-07, + "loss": 52.8125, + "step": 1375 + }, + { + "epoch": 0.01302524587991405, + "grad_norm": 343.00640869140625, + "learning_rate": 8.681388012618296e-07, + "loss": 42.9688, + "step": 1376 + }, + { + "epoch": 0.013034711901629102, + "grad_norm": 405.62847900390625, + "learning_rate": 8.687697160883281e-07, + "loss": 63.5156, + "step": 1377 + }, + { + "epoch": 0.013044177923344157, + "grad_norm": 446.8899230957031, + "learning_rate": 8.694006309148264e-07, + "loss": 43.6562, + "step": 1378 + }, + { + "epoch": 0.01305364394505921, + "grad_norm": 592.6976928710938, + "learning_rate": 8.700315457413248e-07, + "loss": 66.2188, + "step": 1379 + }, + { + "epoch": 0.013063109966774265, + "grad_norm": 331.5686950683594, + "learning_rate": 8.706624605678233e-07, + "loss": 33.875, + "step": 1380 + }, + { + "epoch": 0.013072575988489318, + "grad_norm": 1398.0421142578125, + "learning_rate": 8.712933753943217e-07, + "loss": 94.2812, + "step": 1381 + }, + { + "epoch": 0.01308204201020437, + "grad_norm": 418.016845703125, + "learning_rate": 8.719242902208201e-07, + "loss": 44.0469, + "step": 1382 + }, + { + "epoch": 0.013091508031919425, + "grad_norm": 487.48724365234375, + "learning_rate": 8.725552050473186e-07, + "loss": 51.8125, + "step": 1383 + }, + { + "epoch": 0.013100974053634478, + "grad_norm": 461.1952209472656, + "learning_rate": 8.73186119873817e-07, + "loss": 41.7344, + "step": 1384 + }, + { + "epoch": 0.013110440075349533, + "grad_norm": 445.71807861328125, + "learning_rate": 8.738170347003155e-07, + "loss": 60.125, + "step": 1385 + }, + { + "epoch": 0.013119906097064586, + "grad_norm": 435.5230712890625, + "learning_rate": 8.744479495268139e-07, + "loss": 43.3438, + "step": 1386 + }, + { + "epoch": 0.013129372118779641, + "grad_norm": 881.2249145507812, + "learning_rate": 8.750788643533122e-07, + "loss": 79.6562, + "step": 1387 + }, + { + "epoch": 0.013138838140494694, + "grad_norm": 821.0104370117188, + "learning_rate": 8.757097791798107e-07, + "loss": 46.625, + "step": 1388 + }, + { + "epoch": 0.013148304162209749, + "grad_norm": 1187.657470703125, + "learning_rate": 8.76340694006309e-07, + "loss": 96.6875, + "step": 1389 + }, + { + "epoch": 0.013157770183924802, + "grad_norm": 1455.6048583984375, + "learning_rate": 8.769716088328075e-07, + "loss": 91.1562, + "step": 1390 + }, + { + "epoch": 0.013167236205639856, + "grad_norm": 681.7944946289062, + "learning_rate": 8.77602523659306e-07, + "loss": 40.2031, + "step": 1391 + }, + { + "epoch": 0.01317670222735491, + "grad_norm": 547.618408203125, + "learning_rate": 8.782334384858043e-07, + "loss": 62.9062, + "step": 1392 + }, + { + "epoch": 0.013186168249069964, + "grad_norm": 713.3663330078125, + "learning_rate": 8.788643533123028e-07, + "loss": 51.4375, + "step": 1393 + }, + { + "epoch": 0.013195634270785017, + "grad_norm": 1101.2574462890625, + "learning_rate": 8.794952681388013e-07, + "loss": 62.5, + "step": 1394 + }, + { + "epoch": 0.01320510029250007, + "grad_norm": 499.2628479003906, + "learning_rate": 8.801261829652997e-07, + "loss": 36.9219, + "step": 1395 + }, + { + "epoch": 0.013214566314215125, + "grad_norm": 3.1810498237609863, + "learning_rate": 8.80757097791798e-07, + "loss": 0.8477, + "step": 1396 + }, + { + "epoch": 0.013224032335930178, + "grad_norm": 448.5975341796875, + "learning_rate": 8.813880126182965e-07, + "loss": 35.5781, + "step": 1397 + }, + { + "epoch": 0.013233498357645233, + "grad_norm": 574.132080078125, + "learning_rate": 8.820189274447949e-07, + "loss": 44.9844, + "step": 1398 + }, + { + "epoch": 0.013242964379360286, + "grad_norm": 781.0548095703125, + "learning_rate": 8.826498422712933e-07, + "loss": 80.1562, + "step": 1399 + }, + { + "epoch": 0.01325243040107534, + "grad_norm": 608.9871215820312, + "learning_rate": 8.832807570977917e-07, + "loss": 45.6875, + "step": 1400 + }, + { + "epoch": 0.013261896422790393, + "grad_norm": 449.8024597167969, + "learning_rate": 8.839116719242902e-07, + "loss": 39.2969, + "step": 1401 + }, + { + "epoch": 0.013271362444505448, + "grad_norm": 713.97802734375, + "learning_rate": 8.845425867507887e-07, + "loss": 82.2812, + "step": 1402 + }, + { + "epoch": 0.013280828466220501, + "grad_norm": 627.7367553710938, + "learning_rate": 8.85173501577287e-07, + "loss": 67.0312, + "step": 1403 + }, + { + "epoch": 0.013290294487935556, + "grad_norm": 1545.5152587890625, + "learning_rate": 8.858044164037855e-07, + "loss": 115.0, + "step": 1404 + }, + { + "epoch": 0.013299760509650609, + "grad_norm": 533.2507934570312, + "learning_rate": 8.86435331230284e-07, + "loss": 35.3125, + "step": 1405 + }, + { + "epoch": 0.013309226531365664, + "grad_norm": 385.31866455078125, + "learning_rate": 8.870662460567822e-07, + "loss": 44.0625, + "step": 1406 + }, + { + "epoch": 0.013318692553080717, + "grad_norm": 836.9967651367188, + "learning_rate": 8.876971608832807e-07, + "loss": 45.1562, + "step": 1407 + }, + { + "epoch": 0.013328158574795771, + "grad_norm": 562.7202758789062, + "learning_rate": 8.883280757097792e-07, + "loss": 74.125, + "step": 1408 + }, + { + "epoch": 0.013337624596510824, + "grad_norm": 710.3111572265625, + "learning_rate": 8.889589905362775e-07, + "loss": 39.375, + "step": 1409 + }, + { + "epoch": 0.013347090618225877, + "grad_norm": 624.1787109375, + "learning_rate": 8.89589905362776e-07, + "loss": 70.6875, + "step": 1410 + }, + { + "epoch": 0.013356556639940932, + "grad_norm": 835.3054809570312, + "learning_rate": 8.902208201892744e-07, + "loss": 71.9062, + "step": 1411 + }, + { + "epoch": 0.013366022661655985, + "grad_norm": 358.1769714355469, + "learning_rate": 8.908517350157729e-07, + "loss": 33.5, + "step": 1412 + }, + { + "epoch": 0.01337548868337104, + "grad_norm": 413.16943359375, + "learning_rate": 8.914826498422713e-07, + "loss": 37.1406, + "step": 1413 + }, + { + "epoch": 0.013384954705086093, + "grad_norm": 564.9970703125, + "learning_rate": 8.921135646687696e-07, + "loss": 45.0312, + "step": 1414 + }, + { + "epoch": 0.013394420726801148, + "grad_norm": 524.179931640625, + "learning_rate": 8.927444794952681e-07, + "loss": 39.0781, + "step": 1415 + }, + { + "epoch": 0.0134038867485162, + "grad_norm": 803.7221069335938, + "learning_rate": 8.933753943217665e-07, + "loss": 101.0, + "step": 1416 + }, + { + "epoch": 0.013413352770231255, + "grad_norm": 1163.7073974609375, + "learning_rate": 8.940063091482649e-07, + "loss": 75.0156, + "step": 1417 + }, + { + "epoch": 0.013422818791946308, + "grad_norm": 498.9340515136719, + "learning_rate": 8.946372239747634e-07, + "loss": 43.2656, + "step": 1418 + }, + { + "epoch": 0.013432284813661363, + "grad_norm": 653.8701171875, + "learning_rate": 8.952681388012618e-07, + "loss": 43.8594, + "step": 1419 + }, + { + "epoch": 0.013441750835376416, + "grad_norm": 570.5690307617188, + "learning_rate": 8.958990536277602e-07, + "loss": 60.7969, + "step": 1420 + }, + { + "epoch": 0.013451216857091471, + "grad_norm": 612.148193359375, + "learning_rate": 8.965299684542587e-07, + "loss": 75.75, + "step": 1421 + }, + { + "epoch": 0.013460682878806524, + "grad_norm": 567.9755249023438, + "learning_rate": 8.971608832807571e-07, + "loss": 47.8438, + "step": 1422 + }, + { + "epoch": 0.013470148900521577, + "grad_norm": 557.1666870117188, + "learning_rate": 8.977917981072554e-07, + "loss": 48.25, + "step": 1423 + }, + { + "epoch": 0.013479614922236632, + "grad_norm": 657.5366821289062, + "learning_rate": 8.984227129337539e-07, + "loss": 82.375, + "step": 1424 + }, + { + "epoch": 0.013489080943951685, + "grad_norm": 464.5755615234375, + "learning_rate": 8.990536277602523e-07, + "loss": 40.3594, + "step": 1425 + }, + { + "epoch": 0.01349854696566674, + "grad_norm": 472.5301513671875, + "learning_rate": 8.996845425867507e-07, + "loss": 46.4219, + "step": 1426 + }, + { + "epoch": 0.013508012987381792, + "grad_norm": 340.2803039550781, + "learning_rate": 9.003154574132492e-07, + "loss": 36.2031, + "step": 1427 + }, + { + "epoch": 0.013517479009096847, + "grad_norm": 624.3343505859375, + "learning_rate": 9.009463722397476e-07, + "loss": 72.875, + "step": 1428 + }, + { + "epoch": 0.0135269450308119, + "grad_norm": 516.1412353515625, + "learning_rate": 9.01577287066246e-07, + "loss": 48.8125, + "step": 1429 + }, + { + "epoch": 0.013536411052526955, + "grad_norm": 1154.2579345703125, + "learning_rate": 9.022082018927445e-07, + "loss": 76.4688, + "step": 1430 + }, + { + "epoch": 0.013545877074242008, + "grad_norm": 3.112196922302246, + "learning_rate": 9.028391167192429e-07, + "loss": 0.8594, + "step": 1431 + }, + { + "epoch": 0.013555343095957063, + "grad_norm": 787.1168212890625, + "learning_rate": 9.034700315457414e-07, + "loss": 40.1406, + "step": 1432 + }, + { + "epoch": 0.013564809117672116, + "grad_norm": 660.9312744140625, + "learning_rate": 9.041009463722396e-07, + "loss": 46.9062, + "step": 1433 + }, + { + "epoch": 0.01357427513938717, + "grad_norm": 354.42498779296875, + "learning_rate": 9.047318611987381e-07, + "loss": 36.2656, + "step": 1434 + }, + { + "epoch": 0.013583741161102223, + "grad_norm": 470.1373596191406, + "learning_rate": 9.053627760252366e-07, + "loss": 41.8594, + "step": 1435 + }, + { + "epoch": 0.013593207182817278, + "grad_norm": 756.4203491210938, + "learning_rate": 9.059936908517349e-07, + "loss": 69.5938, + "step": 1436 + }, + { + "epoch": 0.013602673204532331, + "grad_norm": 591.5595092773438, + "learning_rate": 9.066246056782334e-07, + "loss": 41.6875, + "step": 1437 + }, + { + "epoch": 0.013612139226247384, + "grad_norm": 388.9606628417969, + "learning_rate": 9.072555205047319e-07, + "loss": 44.875, + "step": 1438 + }, + { + "epoch": 0.013621605247962439, + "grad_norm": 634.19775390625, + "learning_rate": 9.078864353312302e-07, + "loss": 61.5625, + "step": 1439 + }, + { + "epoch": 0.013631071269677492, + "grad_norm": 726.5520629882812, + "learning_rate": 9.085173501577287e-07, + "loss": 57.0938, + "step": 1440 + }, + { + "epoch": 0.013640537291392547, + "grad_norm": 459.2733154296875, + "learning_rate": 9.091482649842272e-07, + "loss": 47.875, + "step": 1441 + }, + { + "epoch": 0.0136500033131076, + "grad_norm": 417.86383056640625, + "learning_rate": 9.097791798107255e-07, + "loss": 65.375, + "step": 1442 + }, + { + "epoch": 0.013659469334822654, + "grad_norm": 492.90875244140625, + "learning_rate": 9.104100946372239e-07, + "loss": 42.0938, + "step": 1443 + }, + { + "epoch": 0.013668935356537707, + "grad_norm": 1055.4324951171875, + "learning_rate": 9.110410094637223e-07, + "loss": 83.7656, + "step": 1444 + }, + { + "epoch": 0.013678401378252762, + "grad_norm": 855.2055053710938, + "learning_rate": 9.116719242902208e-07, + "loss": 84.375, + "step": 1445 + }, + { + "epoch": 0.013687867399967815, + "grad_norm": 3.1018226146698, + "learning_rate": 9.123028391167192e-07, + "loss": 0.9126, + "step": 1446 + }, + { + "epoch": 0.01369733342168287, + "grad_norm": 430.2931823730469, + "learning_rate": 9.129337539432176e-07, + "loss": 30.0312, + "step": 1447 + }, + { + "epoch": 0.013706799443397923, + "grad_norm": 518.375244140625, + "learning_rate": 9.135646687697161e-07, + "loss": 37.625, + "step": 1448 + }, + { + "epoch": 0.013716265465112978, + "grad_norm": 650.762939453125, + "learning_rate": 9.141955835962146e-07, + "loss": 41.125, + "step": 1449 + }, + { + "epoch": 0.01372573148682803, + "grad_norm": 745.5169677734375, + "learning_rate": 9.148264984227128e-07, + "loss": 94.7188, + "step": 1450 + }, + { + "epoch": 0.013735197508543085, + "grad_norm": 559.1017456054688, + "learning_rate": 9.154574132492113e-07, + "loss": 42.7812, + "step": 1451 + }, + { + "epoch": 0.013744663530258138, + "grad_norm": 344.24554443359375, + "learning_rate": 9.160883280757098e-07, + "loss": 45.3125, + "step": 1452 + }, + { + "epoch": 0.013754129551973191, + "grad_norm": 2.658862829208374, + "learning_rate": 9.167192429022081e-07, + "loss": 0.8394, + "step": 1453 + }, + { + "epoch": 0.013763595573688246, + "grad_norm": 519.7094116210938, + "learning_rate": 9.173501577287066e-07, + "loss": 47.1875, + "step": 1454 + }, + { + "epoch": 0.0137730615954033, + "grad_norm": 1831.1029052734375, + "learning_rate": 9.17981072555205e-07, + "loss": 74.7031, + "step": 1455 + }, + { + "epoch": 0.013782527617118354, + "grad_norm": 3.121443033218384, + "learning_rate": 9.186119873817034e-07, + "loss": 0.8384, + "step": 1456 + }, + { + "epoch": 0.013791993638833407, + "grad_norm": 353.1127014160156, + "learning_rate": 9.192429022082019e-07, + "loss": 33.5156, + "step": 1457 + }, + { + "epoch": 0.013801459660548462, + "grad_norm": 381.7391052246094, + "learning_rate": 9.198738170347003e-07, + "loss": 38.6094, + "step": 1458 + }, + { + "epoch": 0.013810925682263515, + "grad_norm": 560.4081420898438, + "learning_rate": 9.205047318611988e-07, + "loss": 37.5938, + "step": 1459 + }, + { + "epoch": 0.01382039170397857, + "grad_norm": 683.053466796875, + "learning_rate": 9.211356466876971e-07, + "loss": 78.8906, + "step": 1460 + }, + { + "epoch": 0.013829857725693622, + "grad_norm": 972.4369506835938, + "learning_rate": 9.217665615141955e-07, + "loss": 45.5156, + "step": 1461 + }, + { + "epoch": 0.013839323747408677, + "grad_norm": 648.32421875, + "learning_rate": 9.22397476340694e-07, + "loss": 72.4219, + "step": 1462 + }, + { + "epoch": 0.01384878976912373, + "grad_norm": 525.7362670898438, + "learning_rate": 9.230283911671923e-07, + "loss": 41.3906, + "step": 1463 + }, + { + "epoch": 0.013858255790838785, + "grad_norm": 914.3699951171875, + "learning_rate": 9.236593059936908e-07, + "loss": 85.875, + "step": 1464 + }, + { + "epoch": 0.013867721812553838, + "grad_norm": 1168.3109130859375, + "learning_rate": 9.242902208201893e-07, + "loss": 58.5469, + "step": 1465 + }, + { + "epoch": 0.013877187834268891, + "grad_norm": 2.8909316062927246, + "learning_rate": 9.249211356466876e-07, + "loss": 0.8804, + "step": 1466 + }, + { + "epoch": 0.013886653855983946, + "grad_norm": 588.3839111328125, + "learning_rate": 9.255520504731861e-07, + "loss": 43.5938, + "step": 1467 + }, + { + "epoch": 0.013896119877698999, + "grad_norm": 547.1492309570312, + "learning_rate": 9.261829652996846e-07, + "loss": 42.9219, + "step": 1468 + }, + { + "epoch": 0.013905585899414053, + "grad_norm": 629.670166015625, + "learning_rate": 9.268138801261829e-07, + "loss": 44.375, + "step": 1469 + }, + { + "epoch": 0.013915051921129106, + "grad_norm": 1000.9705810546875, + "learning_rate": 9.274447949526813e-07, + "loss": 94.6875, + "step": 1470 + }, + { + "epoch": 0.013924517942844161, + "grad_norm": 453.7991638183594, + "learning_rate": 9.280757097791798e-07, + "loss": 37.1719, + "step": 1471 + }, + { + "epoch": 0.013933983964559214, + "grad_norm": 431.3731994628906, + "learning_rate": 9.287066246056782e-07, + "loss": 42.3906, + "step": 1472 + }, + { + "epoch": 0.013943449986274269, + "grad_norm": 727.1135864257812, + "learning_rate": 9.293375394321766e-07, + "loss": 69.4375, + "step": 1473 + }, + { + "epoch": 0.013952916007989322, + "grad_norm": 3.21260142326355, + "learning_rate": 9.29968454258675e-07, + "loss": 0.8521, + "step": 1474 + }, + { + "epoch": 0.013962382029704377, + "grad_norm": 534.3606567382812, + "learning_rate": 9.305993690851735e-07, + "loss": 37.9219, + "step": 1475 + }, + { + "epoch": 0.01397184805141943, + "grad_norm": 481.35626220703125, + "learning_rate": 9.312302839116719e-07, + "loss": 43.0625, + "step": 1476 + }, + { + "epoch": 0.013981314073134484, + "grad_norm": 506.96563720703125, + "learning_rate": 9.318611987381702e-07, + "loss": 49.2812, + "step": 1477 + }, + { + "epoch": 0.013990780094849537, + "grad_norm": 577.5778198242188, + "learning_rate": 9.324921135646687e-07, + "loss": 41.1094, + "step": 1478 + }, + { + "epoch": 0.014000246116564592, + "grad_norm": 601.3617553710938, + "learning_rate": 9.331230283911672e-07, + "loss": 62.9844, + "step": 1479 + }, + { + "epoch": 0.014009712138279645, + "grad_norm": 509.481689453125, + "learning_rate": 9.337539432176655e-07, + "loss": 40.7344, + "step": 1480 + }, + { + "epoch": 0.014019178159994698, + "grad_norm": 864.44677734375, + "learning_rate": 9.34384858044164e-07, + "loss": 68.1875, + "step": 1481 + }, + { + "epoch": 0.014028644181709753, + "grad_norm": 512.6714477539062, + "learning_rate": 9.350157728706625e-07, + "loss": 38.8281, + "step": 1482 + }, + { + "epoch": 0.014038110203424806, + "grad_norm": 391.63494873046875, + "learning_rate": 9.356466876971608e-07, + "loss": 35.7188, + "step": 1483 + }, + { + "epoch": 0.01404757622513986, + "grad_norm": 416.9952697753906, + "learning_rate": 9.362776025236593e-07, + "loss": 32.8438, + "step": 1484 + }, + { + "epoch": 0.014057042246854914, + "grad_norm": 564.627197265625, + "learning_rate": 9.369085173501577e-07, + "loss": 44.1719, + "step": 1485 + }, + { + "epoch": 0.014066508268569968, + "grad_norm": 543.583984375, + "learning_rate": 9.37539432176656e-07, + "loss": 48.7344, + "step": 1486 + }, + { + "epoch": 0.014075974290285021, + "grad_norm": 705.085205078125, + "learning_rate": 9.381703470031545e-07, + "loss": 85.9062, + "step": 1487 + }, + { + "epoch": 0.014085440312000076, + "grad_norm": 1149.811767578125, + "learning_rate": 9.388012618296529e-07, + "loss": 84.375, + "step": 1488 + }, + { + "epoch": 0.01409490633371513, + "grad_norm": 453.5669860839844, + "learning_rate": 9.394321766561514e-07, + "loss": 36.5312, + "step": 1489 + }, + { + "epoch": 0.014104372355430184, + "grad_norm": 690.2760620117188, + "learning_rate": 9.400630914826498e-07, + "loss": 76.9688, + "step": 1490 + }, + { + "epoch": 0.014113838377145237, + "grad_norm": 397.5094299316406, + "learning_rate": 9.406940063091482e-07, + "loss": 44.4375, + "step": 1491 + }, + { + "epoch": 0.014123304398860292, + "grad_norm": 387.2882385253906, + "learning_rate": 9.413249211356467e-07, + "loss": 36.2812, + "step": 1492 + }, + { + "epoch": 0.014132770420575345, + "grad_norm": 3.019580125808716, + "learning_rate": 9.419558359621451e-07, + "loss": 0.9253, + "step": 1493 + }, + { + "epoch": 0.014142236442290398, + "grad_norm": 478.53741455078125, + "learning_rate": 9.425867507886435e-07, + "loss": 41.4531, + "step": 1494 + }, + { + "epoch": 0.014151702464005452, + "grad_norm": 837.216064453125, + "learning_rate": 9.43217665615142e-07, + "loss": 96.7812, + "step": 1495 + }, + { + "epoch": 0.014161168485720506, + "grad_norm": 785.90673828125, + "learning_rate": 9.438485804416402e-07, + "loss": 41.0312, + "step": 1496 + }, + { + "epoch": 0.01417063450743556, + "grad_norm": 1076.6455078125, + "learning_rate": 9.444794952681387e-07, + "loss": 124.0938, + "step": 1497 + }, + { + "epoch": 0.014180100529150613, + "grad_norm": 388.8402099609375, + "learning_rate": 9.451104100946372e-07, + "loss": 44.8125, + "step": 1498 + }, + { + "epoch": 0.014189566550865668, + "grad_norm": 439.56536865234375, + "learning_rate": 9.457413249211356e-07, + "loss": 36.5469, + "step": 1499 + }, + { + "epoch": 0.014199032572580721, + "grad_norm": 673.4337158203125, + "learning_rate": 9.46372239747634e-07, + "loss": 38.7344, + "step": 1500 + }, + { + "epoch": 0.014208498594295776, + "grad_norm": 779.1889038085938, + "learning_rate": 9.470031545741325e-07, + "loss": 57.5547, + "step": 1501 + }, + { + "epoch": 0.014217964616010829, + "grad_norm": 872.2674560546875, + "learning_rate": 9.476340694006309e-07, + "loss": 90.4531, + "step": 1502 + }, + { + "epoch": 0.014227430637725883, + "grad_norm": 467.3123474121094, + "learning_rate": 9.482649842271293e-07, + "loss": 41.625, + "step": 1503 + }, + { + "epoch": 0.014236896659440937, + "grad_norm": 557.76806640625, + "learning_rate": 9.488958990536278e-07, + "loss": 47.3594, + "step": 1504 + }, + { + "epoch": 0.014246362681155991, + "grad_norm": 444.07647705078125, + "learning_rate": 9.495268138801261e-07, + "loss": 41.9531, + "step": 1505 + }, + { + "epoch": 0.014255828702871044, + "grad_norm": 643.6551513671875, + "learning_rate": 9.501577287066246e-07, + "loss": 46.0625, + "step": 1506 + }, + { + "epoch": 0.014265294724586099, + "grad_norm": 290.8764953613281, + "learning_rate": 9.507886435331229e-07, + "loss": 35.0, + "step": 1507 + }, + { + "epoch": 0.014274760746301152, + "grad_norm": 2.6246261596679688, + "learning_rate": 9.514195583596214e-07, + "loss": 0.8828, + "step": 1508 + }, + { + "epoch": 0.014284226768016205, + "grad_norm": 591.4506225585938, + "learning_rate": 9.520504731861199e-07, + "loss": 75.7812, + "step": 1509 + }, + { + "epoch": 0.01429369278973126, + "grad_norm": 583.549072265625, + "learning_rate": 9.526813880126182e-07, + "loss": 42.3125, + "step": 1510 + }, + { + "epoch": 0.014303158811446313, + "grad_norm": 834.4395141601562, + "learning_rate": 9.533123028391167e-07, + "loss": 58.5469, + "step": 1511 + }, + { + "epoch": 0.014312624833161368, + "grad_norm": 977.6520385742188, + "learning_rate": 9.53943217665615e-07, + "loss": 70.8906, + "step": 1512 + }, + { + "epoch": 0.01432209085487642, + "grad_norm": 3.245422124862671, + "learning_rate": 9.545741324921136e-07, + "loss": 0.8335, + "step": 1513 + }, + { + "epoch": 0.014331556876591475, + "grad_norm": 535.1561279296875, + "learning_rate": 9.55205047318612e-07, + "loss": 39.4844, + "step": 1514 + }, + { + "epoch": 0.014341022898306528, + "grad_norm": 315.49462890625, + "learning_rate": 9.558359621451103e-07, + "loss": 33.6875, + "step": 1515 + }, + { + "epoch": 0.014350488920021583, + "grad_norm": 376.9138488769531, + "learning_rate": 9.564668769716088e-07, + "loss": 33.1562, + "step": 1516 + }, + { + "epoch": 0.014359954941736636, + "grad_norm": 625.4567260742188, + "learning_rate": 9.570977917981073e-07, + "loss": 95.9062, + "step": 1517 + }, + { + "epoch": 0.01436942096345169, + "grad_norm": 810.0696411132812, + "learning_rate": 9.577287066246056e-07, + "loss": 60.6406, + "step": 1518 + }, + { + "epoch": 0.014378886985166744, + "grad_norm": 1323.385009765625, + "learning_rate": 9.58359621451104e-07, + "loss": 36.625, + "step": 1519 + }, + { + "epoch": 0.014388353006881799, + "grad_norm": 624.9921264648438, + "learning_rate": 9.589905362776026e-07, + "loss": 41.1562, + "step": 1520 + }, + { + "epoch": 0.014397819028596852, + "grad_norm": 348.5631408691406, + "learning_rate": 9.596214511041009e-07, + "loss": 32.3438, + "step": 1521 + }, + { + "epoch": 0.014407285050311906, + "grad_norm": 488.4847412109375, + "learning_rate": 9.602523659305994e-07, + "loss": 37.3906, + "step": 1522 + }, + { + "epoch": 0.01441675107202696, + "grad_norm": 1286.49072265625, + "learning_rate": 9.608832807570978e-07, + "loss": 86.4375, + "step": 1523 + }, + { + "epoch": 0.014426217093742012, + "grad_norm": 645.9542846679688, + "learning_rate": 9.615141955835961e-07, + "loss": 55.0469, + "step": 1524 + }, + { + "epoch": 0.014435683115457067, + "grad_norm": 777.2451171875, + "learning_rate": 9.621451104100946e-07, + "loss": 85.5938, + "step": 1525 + }, + { + "epoch": 0.01444514913717212, + "grad_norm": 1294.2974853515625, + "learning_rate": 9.627760252365931e-07, + "loss": 97.1875, + "step": 1526 + }, + { + "epoch": 0.014454615158887175, + "grad_norm": 527.7368774414062, + "learning_rate": 9.634069400630914e-07, + "loss": 64.7812, + "step": 1527 + }, + { + "epoch": 0.014464081180602228, + "grad_norm": 355.0567321777344, + "learning_rate": 9.640378548895899e-07, + "loss": 35.1562, + "step": 1528 + }, + { + "epoch": 0.014473547202317283, + "grad_norm": 548.6400756835938, + "learning_rate": 9.646687697160882e-07, + "loss": 43.3125, + "step": 1529 + }, + { + "epoch": 0.014483013224032336, + "grad_norm": 515.7227172851562, + "learning_rate": 9.652996845425867e-07, + "loss": 36.5469, + "step": 1530 + }, + { + "epoch": 0.01449247924574739, + "grad_norm": 506.5542297363281, + "learning_rate": 9.659305993690852e-07, + "loss": 33.4375, + "step": 1531 + }, + { + "epoch": 0.014501945267462443, + "grad_norm": 533.2708129882812, + "learning_rate": 9.665615141955834e-07, + "loss": 53.5469, + "step": 1532 + }, + { + "epoch": 0.014511411289177498, + "grad_norm": 808.70263671875, + "learning_rate": 9.67192429022082e-07, + "loss": 76.2031, + "step": 1533 + }, + { + "epoch": 0.014520877310892551, + "grad_norm": 420.9373474121094, + "learning_rate": 9.678233438485804e-07, + "loss": 63.375, + "step": 1534 + }, + { + "epoch": 0.014530343332607606, + "grad_norm": 330.6839904785156, + "learning_rate": 9.684542586750787e-07, + "loss": 32.4531, + "step": 1535 + }, + { + "epoch": 0.014539809354322659, + "grad_norm": 456.32733154296875, + "learning_rate": 9.690851735015772e-07, + "loss": 39.4688, + "step": 1536 + }, + { + "epoch": 0.014549275376037712, + "grad_norm": 495.7166748046875, + "learning_rate": 9.697160883280757e-07, + "loss": 54.875, + "step": 1537 + }, + { + "epoch": 0.014558741397752767, + "grad_norm": 504.2703552246094, + "learning_rate": 9.70347003154574e-07, + "loss": 65.3438, + "step": 1538 + }, + { + "epoch": 0.01456820741946782, + "grad_norm": 3.2078359127044678, + "learning_rate": 9.709779179810725e-07, + "loss": 0.936, + "step": 1539 + }, + { + "epoch": 0.014577673441182874, + "grad_norm": 836.077880859375, + "learning_rate": 9.71608832807571e-07, + "loss": 79.0938, + "step": 1540 + }, + { + "epoch": 0.014587139462897927, + "grad_norm": 338.6009216308594, + "learning_rate": 9.722397476340694e-07, + "loss": 44.0312, + "step": 1541 + }, + { + "epoch": 0.014596605484612982, + "grad_norm": 874.1458740234375, + "learning_rate": 9.728706624605677e-07, + "loss": 34.7656, + "step": 1542 + }, + { + "epoch": 0.014606071506328035, + "grad_norm": 495.95928955078125, + "learning_rate": 9.735015772870662e-07, + "loss": 37.3906, + "step": 1543 + }, + { + "epoch": 0.01461553752804309, + "grad_norm": 472.87640380859375, + "learning_rate": 9.741324921135647e-07, + "loss": 35.3594, + "step": 1544 + }, + { + "epoch": 0.014625003549758143, + "grad_norm": 3.1560075283050537, + "learning_rate": 9.74763406940063e-07, + "loss": 1.0498, + "step": 1545 + }, + { + "epoch": 0.014634469571473198, + "grad_norm": 458.1545104980469, + "learning_rate": 9.753943217665615e-07, + "loss": 45.7656, + "step": 1546 + }, + { + "epoch": 0.01464393559318825, + "grad_norm": 1171.685546875, + "learning_rate": 9.7602523659306e-07, + "loss": 96.75, + "step": 1547 + }, + { + "epoch": 0.014653401614903305, + "grad_norm": 429.1284484863281, + "learning_rate": 9.766561514195583e-07, + "loss": 40.0781, + "step": 1548 + }, + { + "epoch": 0.014662867636618358, + "grad_norm": 359.6435546875, + "learning_rate": 9.772870662460568e-07, + "loss": 39.3125, + "step": 1549 + }, + { + "epoch": 0.014672333658333413, + "grad_norm": 2.8279566764831543, + "learning_rate": 9.779179810725552e-07, + "loss": 0.9326, + "step": 1550 + }, + { + "epoch": 0.014681799680048466, + "grad_norm": 490.55816650390625, + "learning_rate": 9.785488958990535e-07, + "loss": 53.9844, + "step": 1551 + }, + { + "epoch": 0.014691265701763519, + "grad_norm": 823.115478515625, + "learning_rate": 9.79179810725552e-07, + "loss": 54.6719, + "step": 1552 + }, + { + "epoch": 0.014700731723478574, + "grad_norm": 512.02880859375, + "learning_rate": 9.798107255520505e-07, + "loss": 32.25, + "step": 1553 + }, + { + "epoch": 0.014710197745193627, + "grad_norm": 462.1593322753906, + "learning_rate": 9.804416403785488e-07, + "loss": 48.5156, + "step": 1554 + }, + { + "epoch": 0.014719663766908682, + "grad_norm": 1080.4844970703125, + "learning_rate": 9.810725552050473e-07, + "loss": 60.9531, + "step": 1555 + }, + { + "epoch": 0.014729129788623735, + "grad_norm": 522.9070434570312, + "learning_rate": 9.817034700315458e-07, + "loss": 88.2188, + "step": 1556 + }, + { + "epoch": 0.01473859581033879, + "grad_norm": 498.96173095703125, + "learning_rate": 9.82334384858044e-07, + "loss": 37.4219, + "step": 1557 + }, + { + "epoch": 0.014748061832053842, + "grad_norm": 506.362548828125, + "learning_rate": 9.829652996845426e-07, + "loss": 38.75, + "step": 1558 + }, + { + "epoch": 0.014757527853768897, + "grad_norm": 258.36773681640625, + "learning_rate": 9.835962145110408e-07, + "loss": 31.8438, + "step": 1559 + }, + { + "epoch": 0.01476699387548395, + "grad_norm": 1216.8369140625, + "learning_rate": 9.842271293375393e-07, + "loss": 56.375, + "step": 1560 + }, + { + "epoch": 0.014776459897199005, + "grad_norm": 682.7588500976562, + "learning_rate": 9.848580441640378e-07, + "loss": 60.5, + "step": 1561 + }, + { + "epoch": 0.014785925918914058, + "grad_norm": 335.8126220703125, + "learning_rate": 9.85488958990536e-07, + "loss": 46.0625, + "step": 1562 + }, + { + "epoch": 0.014795391940629113, + "grad_norm": 440.4599914550781, + "learning_rate": 9.861198738170346e-07, + "loss": 37.6719, + "step": 1563 + }, + { + "epoch": 0.014804857962344166, + "grad_norm": 540.6492309570312, + "learning_rate": 9.86750788643533e-07, + "loss": 42.2656, + "step": 1564 + }, + { + "epoch": 0.01481432398405922, + "grad_norm": 979.8394775390625, + "learning_rate": 9.873817034700314e-07, + "loss": 76.375, + "step": 1565 + }, + { + "epoch": 0.014823790005774273, + "grad_norm": 1317.697021484375, + "learning_rate": 9.880126182965299e-07, + "loss": 84.0625, + "step": 1566 + }, + { + "epoch": 0.014833256027489326, + "grad_norm": 334.5838317871094, + "learning_rate": 9.886435331230284e-07, + "loss": 36.2969, + "step": 1567 + }, + { + "epoch": 0.014842722049204381, + "grad_norm": 2.8009111881256104, + "learning_rate": 9.892744479495268e-07, + "loss": 0.813, + "step": 1568 + }, + { + "epoch": 0.014852188070919434, + "grad_norm": 3.096045970916748, + "learning_rate": 9.899053627760251e-07, + "loss": 0.939, + "step": 1569 + }, + { + "epoch": 0.014861654092634489, + "grad_norm": 2.948606491088867, + "learning_rate": 9.905362776025236e-07, + "loss": 0.9609, + "step": 1570 + }, + { + "epoch": 0.014871120114349542, + "grad_norm": 512.8611450195312, + "learning_rate": 9.911671924290221e-07, + "loss": 36.4062, + "step": 1571 + }, + { + "epoch": 0.014880586136064597, + "grad_norm": 383.573974609375, + "learning_rate": 9.917981072555204e-07, + "loss": 34.0, + "step": 1572 + }, + { + "epoch": 0.01489005215777965, + "grad_norm": 3.187349557876587, + "learning_rate": 9.924290220820189e-07, + "loss": 0.8804, + "step": 1573 + }, + { + "epoch": 0.014899518179494704, + "grad_norm": 1316.876953125, + "learning_rate": 9.930599369085174e-07, + "loss": 69.4531, + "step": 1574 + }, + { + "epoch": 0.014908984201209757, + "grad_norm": 944.4759521484375, + "learning_rate": 9.936908517350157e-07, + "loss": 40.875, + "step": 1575 + }, + { + "epoch": 0.014918450222924812, + "grad_norm": 332.7432861328125, + "learning_rate": 9.943217665615142e-07, + "loss": 33.6875, + "step": 1576 + }, + { + "epoch": 0.014927916244639865, + "grad_norm": 703.055419921875, + "learning_rate": 9.949526813880126e-07, + "loss": 74.7188, + "step": 1577 + }, + { + "epoch": 0.01493738226635492, + "grad_norm": 1165.20947265625, + "learning_rate": 9.955835962145111e-07, + "loss": 70.4062, + "step": 1578 + }, + { + "epoch": 0.014946848288069973, + "grad_norm": 950.879150390625, + "learning_rate": 9.962145110410094e-07, + "loss": 91.0312, + "step": 1579 + }, + { + "epoch": 0.014956314309785026, + "grad_norm": 1153.678466796875, + "learning_rate": 9.96845425867508e-07, + "loss": 71.6094, + "step": 1580 + }, + { + "epoch": 0.01496578033150008, + "grad_norm": 722.4657592773438, + "learning_rate": 9.974763406940062e-07, + "loss": 72.7344, + "step": 1581 + }, + { + "epoch": 0.014975246353215134, + "grad_norm": 718.4335327148438, + "learning_rate": 9.981072555205047e-07, + "loss": 101.6875, + "step": 1582 + }, + { + "epoch": 0.014984712374930188, + "grad_norm": 2.916945219039917, + "learning_rate": 9.987381703470032e-07, + "loss": 0.9512, + "step": 1583 + }, + { + "epoch": 0.014994178396645241, + "grad_norm": 746.156982421875, + "learning_rate": 9.993690851735015e-07, + "loss": 49.2188, + "step": 1584 + }, + { + "epoch": 0.015003644418360296, + "grad_norm": 585.630859375, + "learning_rate": 1e-06, + "loss": 40.2031, + "step": 1585 + }, + { + "epoch": 0.015013110440075349, + "grad_norm": 985.6940307617188, + "learning_rate": 1.0006309148264984e-06, + "loss": 72.7188, + "step": 1586 + }, + { + "epoch": 0.015022576461790404, + "grad_norm": 938.5748291015625, + "learning_rate": 1.001261829652997e-06, + "loss": 52.5391, + "step": 1587 + }, + { + "epoch": 0.015032042483505457, + "grad_norm": 565.2317504882812, + "learning_rate": 1.0018927444794952e-06, + "loss": 44.6562, + "step": 1588 + }, + { + "epoch": 0.015041508505220512, + "grad_norm": 1024.2633056640625, + "learning_rate": 1.0025236593059935e-06, + "loss": 85.125, + "step": 1589 + }, + { + "epoch": 0.015050974526935565, + "grad_norm": 458.9192810058594, + "learning_rate": 1.003154574132492e-06, + "loss": 38.7188, + "step": 1590 + }, + { + "epoch": 0.01506044054865062, + "grad_norm": 890.6364135742188, + "learning_rate": 1.0037854889589905e-06, + "loss": 77.5, + "step": 1591 + }, + { + "epoch": 0.015069906570365672, + "grad_norm": 1131.025390625, + "learning_rate": 1.004416403785489e-06, + "loss": 75.6562, + "step": 1592 + }, + { + "epoch": 0.015079372592080727, + "grad_norm": 486.6618957519531, + "learning_rate": 1.0050473186119875e-06, + "loss": 41.5312, + "step": 1593 + }, + { + "epoch": 0.01508883861379578, + "grad_norm": 851.5337524414062, + "learning_rate": 1.0056782334384858e-06, + "loss": 60.7656, + "step": 1594 + }, + { + "epoch": 0.015098304635510833, + "grad_norm": 561.5545043945312, + "learning_rate": 1.0063091482649842e-06, + "loss": 38.4375, + "step": 1595 + }, + { + "epoch": 0.015107770657225888, + "grad_norm": 614.7553100585938, + "learning_rate": 1.0069400630914825e-06, + "loss": 40.9844, + "step": 1596 + }, + { + "epoch": 0.01511723667894094, + "grad_norm": 1123.0914306640625, + "learning_rate": 1.007570977917981e-06, + "loss": 100.125, + "step": 1597 + }, + { + "epoch": 0.015126702700655996, + "grad_norm": 639.1223754882812, + "learning_rate": 1.0082018927444795e-06, + "loss": 33.25, + "step": 1598 + }, + { + "epoch": 0.015136168722371049, + "grad_norm": 540.2785034179688, + "learning_rate": 1.0088328075709778e-06, + "loss": 40.3438, + "step": 1599 + }, + { + "epoch": 0.015145634744086103, + "grad_norm": 530.3887329101562, + "learning_rate": 1.0094637223974763e-06, + "loss": 80.0156, + "step": 1600 + }, + { + "epoch": 0.015155100765801156, + "grad_norm": 619.4019165039062, + "learning_rate": 1.0100946372239748e-06, + "loss": 39.0781, + "step": 1601 + }, + { + "epoch": 0.015164566787516211, + "grad_norm": 2.5680928230285645, + "learning_rate": 1.010725552050473e-06, + "loss": 0.7715, + "step": 1602 + }, + { + "epoch": 0.015174032809231264, + "grad_norm": 647.6741943359375, + "learning_rate": 1.0113564668769716e-06, + "loss": 54.4531, + "step": 1603 + }, + { + "epoch": 0.015183498830946319, + "grad_norm": 500.75054931640625, + "learning_rate": 1.01198738170347e-06, + "loss": 37.1719, + "step": 1604 + }, + { + "epoch": 0.015192964852661372, + "grad_norm": 609.9459838867188, + "learning_rate": 1.0126182965299683e-06, + "loss": 41.7188, + "step": 1605 + }, + { + "epoch": 0.015202430874376427, + "grad_norm": 757.8089599609375, + "learning_rate": 1.0132492113564668e-06, + "loss": 33.6875, + "step": 1606 + }, + { + "epoch": 0.01521189689609148, + "grad_norm": 495.50653076171875, + "learning_rate": 1.0138801261829653e-06, + "loss": 63.8125, + "step": 1607 + }, + { + "epoch": 0.015221362917806533, + "grad_norm": 384.1427307128906, + "learning_rate": 1.0145110410094638e-06, + "loss": 39.0781, + "step": 1608 + }, + { + "epoch": 0.015230828939521587, + "grad_norm": 392.07281494140625, + "learning_rate": 1.015141955835962e-06, + "loss": 33.9219, + "step": 1609 + }, + { + "epoch": 0.01524029496123664, + "grad_norm": 606.2182006835938, + "learning_rate": 1.0157728706624604e-06, + "loss": 32.2188, + "step": 1610 + }, + { + "epoch": 0.015249760982951695, + "grad_norm": 359.3892822265625, + "learning_rate": 1.0164037854889589e-06, + "loss": 37.5156, + "step": 1611 + }, + { + "epoch": 0.015259227004666748, + "grad_norm": 322.7551574707031, + "learning_rate": 1.0170347003154574e-06, + "loss": 38.7031, + "step": 1612 + }, + { + "epoch": 0.015268693026381803, + "grad_norm": 710.5620727539062, + "learning_rate": 1.0176656151419558e-06, + "loss": 94.2188, + "step": 1613 + }, + { + "epoch": 0.015278159048096856, + "grad_norm": 981.6589965820312, + "learning_rate": 1.0182965299684543e-06, + "loss": 45.1094, + "step": 1614 + }, + { + "epoch": 0.01528762506981191, + "grad_norm": 384.23712158203125, + "learning_rate": 1.0189274447949528e-06, + "loss": 31.875, + "step": 1615 + }, + { + "epoch": 0.015297091091526964, + "grad_norm": 885.2128295898438, + "learning_rate": 1.019558359621451e-06, + "loss": 36.6406, + "step": 1616 + }, + { + "epoch": 0.015306557113242018, + "grad_norm": 3.574967384338379, + "learning_rate": 1.0201892744479494e-06, + "loss": 0.7729, + "step": 1617 + }, + { + "epoch": 0.015316023134957071, + "grad_norm": 586.27197265625, + "learning_rate": 1.0208201892744479e-06, + "loss": 32.9531, + "step": 1618 + }, + { + "epoch": 0.015325489156672126, + "grad_norm": 314.91815185546875, + "learning_rate": 1.0214511041009464e-06, + "loss": 34.0, + "step": 1619 + }, + { + "epoch": 0.015334955178387179, + "grad_norm": 345.89617919921875, + "learning_rate": 1.0220820189274449e-06, + "loss": 51.1719, + "step": 1620 + }, + { + "epoch": 0.015344421200102234, + "grad_norm": 949.4100341796875, + "learning_rate": 1.0227129337539432e-06, + "loss": 79.625, + "step": 1621 + }, + { + "epoch": 0.015353887221817287, + "grad_norm": 720.751220703125, + "learning_rate": 1.0233438485804414e-06, + "loss": 34.7969, + "step": 1622 + }, + { + "epoch": 0.01536335324353234, + "grad_norm": 392.671630859375, + "learning_rate": 1.02397476340694e-06, + "loss": 39.0, + "step": 1623 + }, + { + "epoch": 0.015372819265247395, + "grad_norm": 531.5504760742188, + "learning_rate": 1.0246056782334384e-06, + "loss": 55.7891, + "step": 1624 + }, + { + "epoch": 0.015382285286962448, + "grad_norm": 565.0335083007812, + "learning_rate": 1.025236593059937e-06, + "loss": 72.5312, + "step": 1625 + }, + { + "epoch": 0.015391751308677502, + "grad_norm": 519.9074096679688, + "learning_rate": 1.0258675078864354e-06, + "loss": 43.3594, + "step": 1626 + }, + { + "epoch": 0.015401217330392555, + "grad_norm": 3.1751227378845215, + "learning_rate": 1.0264984227129337e-06, + "loss": 0.7734, + "step": 1627 + }, + { + "epoch": 0.01541068335210761, + "grad_norm": 1490.4371337890625, + "learning_rate": 1.0271293375394322e-06, + "loss": 103.2031, + "step": 1628 + }, + { + "epoch": 0.015420149373822663, + "grad_norm": 385.8720703125, + "learning_rate": 1.0277602523659305e-06, + "loss": 40.4844, + "step": 1629 + }, + { + "epoch": 0.015429615395537718, + "grad_norm": 922.3438110351562, + "learning_rate": 1.028391167192429e-06, + "loss": 76.7188, + "step": 1630 + }, + { + "epoch": 0.01543908141725277, + "grad_norm": 403.16351318359375, + "learning_rate": 1.0290220820189274e-06, + "loss": 33.6406, + "step": 1631 + }, + { + "epoch": 0.015448547438967826, + "grad_norm": 1458.5068359375, + "learning_rate": 1.0296529968454257e-06, + "loss": 104.0312, + "step": 1632 + }, + { + "epoch": 0.015458013460682879, + "grad_norm": 499.0540771484375, + "learning_rate": 1.0302839116719242e-06, + "loss": 89.0312, + "step": 1633 + }, + { + "epoch": 0.015467479482397933, + "grad_norm": 474.2492980957031, + "learning_rate": 1.0309148264984227e-06, + "loss": 48.6094, + "step": 1634 + }, + { + "epoch": 0.015476945504112986, + "grad_norm": 558.6455688476562, + "learning_rate": 1.0315457413249212e-06, + "loss": 71.8125, + "step": 1635 + }, + { + "epoch": 0.015486411525828041, + "grad_norm": 343.21044921875, + "learning_rate": 1.0321766561514195e-06, + "loss": 28.0781, + "step": 1636 + }, + { + "epoch": 0.015495877547543094, + "grad_norm": 569.449951171875, + "learning_rate": 1.032807570977918e-06, + "loss": 44.0625, + "step": 1637 + }, + { + "epoch": 0.015505343569258147, + "grad_norm": 325.5155029296875, + "learning_rate": 1.0334384858044163e-06, + "loss": 34.25, + "step": 1638 + }, + { + "epoch": 0.015514809590973202, + "grad_norm": 361.2317810058594, + "learning_rate": 1.0340694006309148e-06, + "loss": 35.625, + "step": 1639 + }, + { + "epoch": 0.015524275612688255, + "grad_norm": 859.0040283203125, + "learning_rate": 1.0347003154574132e-06, + "loss": 75.1875, + "step": 1640 + }, + { + "epoch": 0.01553374163440331, + "grad_norm": 1238.6195068359375, + "learning_rate": 1.0353312302839117e-06, + "loss": 46.0156, + "step": 1641 + }, + { + "epoch": 0.015543207656118363, + "grad_norm": 390.5838928222656, + "learning_rate": 1.0359621451104102e-06, + "loss": 38.5, + "step": 1642 + }, + { + "epoch": 0.015552673677833417, + "grad_norm": 398.3189392089844, + "learning_rate": 1.0365930599369083e-06, + "loss": 39.1875, + "step": 1643 + }, + { + "epoch": 0.01556213969954847, + "grad_norm": 547.1773681640625, + "learning_rate": 1.0372239747634068e-06, + "loss": 40.4219, + "step": 1644 + }, + { + "epoch": 0.015571605721263525, + "grad_norm": 518.6434936523438, + "learning_rate": 1.0378548895899053e-06, + "loss": 36.375, + "step": 1645 + }, + { + "epoch": 0.015581071742978578, + "grad_norm": 615.5721435546875, + "learning_rate": 1.0384858044164038e-06, + "loss": 79.4531, + "step": 1646 + }, + { + "epoch": 0.015590537764693633, + "grad_norm": 1050.259521484375, + "learning_rate": 1.0391167192429023e-06, + "loss": 65.8281, + "step": 1647 + }, + { + "epoch": 0.015600003786408686, + "grad_norm": 1045.0582275390625, + "learning_rate": 1.0397476340694008e-06, + "loss": 99.9531, + "step": 1648 + }, + { + "epoch": 0.01560946980812374, + "grad_norm": 385.0968322753906, + "learning_rate": 1.0403785488958988e-06, + "loss": 32.7344, + "step": 1649 + }, + { + "epoch": 0.015618935829838794, + "grad_norm": 842.92529296875, + "learning_rate": 1.0410094637223973e-06, + "loss": 60.8438, + "step": 1650 + }, + { + "epoch": 0.01562840185155385, + "grad_norm": 519.292724609375, + "learning_rate": 1.0416403785488958e-06, + "loss": 71.5156, + "step": 1651 + }, + { + "epoch": 0.0156378678732689, + "grad_norm": 582.1716918945312, + "learning_rate": 1.0422712933753943e-06, + "loss": 53.9844, + "step": 1652 + }, + { + "epoch": 0.015647333894983954, + "grad_norm": 597.8706665039062, + "learning_rate": 1.0429022082018928e-06, + "loss": 39.0, + "step": 1653 + }, + { + "epoch": 0.015656799916699007, + "grad_norm": 931.6563720703125, + "learning_rate": 1.043533123028391e-06, + "loss": 41.4531, + "step": 1654 + }, + { + "epoch": 0.015666265938414064, + "grad_norm": 949.6720581054688, + "learning_rate": 1.0441640378548896e-06, + "loss": 62.9375, + "step": 1655 + }, + { + "epoch": 0.015675731960129117, + "grad_norm": 254.19271850585938, + "learning_rate": 1.0447949526813879e-06, + "loss": 32.25, + "step": 1656 + }, + { + "epoch": 0.01568519798184417, + "grad_norm": 391.3070983886719, + "learning_rate": 1.0454258675078864e-06, + "loss": 32.0, + "step": 1657 + }, + { + "epoch": 0.015694664003559223, + "grad_norm": 840.4525756835938, + "learning_rate": 1.0460567823343848e-06, + "loss": 99.7969, + "step": 1658 + }, + { + "epoch": 0.01570413002527428, + "grad_norm": 344.9937438964844, + "learning_rate": 1.0466876971608833e-06, + "loss": 36.4219, + "step": 1659 + }, + { + "epoch": 0.015713596046989332, + "grad_norm": 636.9520874023438, + "learning_rate": 1.0473186119873816e-06, + "loss": 34.2344, + "step": 1660 + }, + { + "epoch": 0.015723062068704385, + "grad_norm": 381.01953125, + "learning_rate": 1.0479495268138801e-06, + "loss": 49.0312, + "step": 1661 + }, + { + "epoch": 0.01573252809041944, + "grad_norm": 743.704345703125, + "learning_rate": 1.0485804416403786e-06, + "loss": 91.5625, + "step": 1662 + }, + { + "epoch": 0.015741994112134495, + "grad_norm": 496.76959228515625, + "learning_rate": 1.0492113564668769e-06, + "loss": 45.5, + "step": 1663 + }, + { + "epoch": 0.015751460133849548, + "grad_norm": 855.9776000976562, + "learning_rate": 1.0498422712933754e-06, + "loss": 61.6875, + "step": 1664 + }, + { + "epoch": 0.0157609261555646, + "grad_norm": 397.4556579589844, + "learning_rate": 1.0504731861198737e-06, + "loss": 40.7969, + "step": 1665 + }, + { + "epoch": 0.015770392177279654, + "grad_norm": 891.388671875, + "learning_rate": 1.0511041009463722e-06, + "loss": 98.2188, + "step": 1666 + }, + { + "epoch": 0.015779858198994707, + "grad_norm": 613.842529296875, + "learning_rate": 1.0517350157728706e-06, + "loss": 77.5312, + "step": 1667 + }, + { + "epoch": 0.015789324220709763, + "grad_norm": 519.4662475585938, + "learning_rate": 1.0523659305993691e-06, + "loss": 52.7969, + "step": 1668 + }, + { + "epoch": 0.015798790242424816, + "grad_norm": 457.2646179199219, + "learning_rate": 1.0529968454258676e-06, + "loss": 31.2656, + "step": 1669 + }, + { + "epoch": 0.01580825626413987, + "grad_norm": 650.14111328125, + "learning_rate": 1.053627760252366e-06, + "loss": 48.8438, + "step": 1670 + }, + { + "epoch": 0.015817722285854922, + "grad_norm": 2242.306884765625, + "learning_rate": 1.0542586750788642e-06, + "loss": 47.9844, + "step": 1671 + }, + { + "epoch": 0.01582718830756998, + "grad_norm": 545.1943359375, + "learning_rate": 1.0548895899053627e-06, + "loss": 30.9219, + "step": 1672 + }, + { + "epoch": 0.015836654329285032, + "grad_norm": 616.8423461914062, + "learning_rate": 1.0555205047318612e-06, + "loss": 78.1875, + "step": 1673 + }, + { + "epoch": 0.015846120351000085, + "grad_norm": 2.920377731323242, + "learning_rate": 1.0561514195583597e-06, + "loss": 0.978, + "step": 1674 + }, + { + "epoch": 0.015855586372715138, + "grad_norm": 580.9616088867188, + "learning_rate": 1.0567823343848582e-06, + "loss": 36.1406, + "step": 1675 + }, + { + "epoch": 0.015865052394430194, + "grad_norm": 542.4140625, + "learning_rate": 1.0574132492113562e-06, + "loss": 64.7031, + "step": 1676 + }, + { + "epoch": 0.015874518416145247, + "grad_norm": 433.63714599609375, + "learning_rate": 1.0580441640378547e-06, + "loss": 57.5, + "step": 1677 + }, + { + "epoch": 0.0158839844378603, + "grad_norm": 567.9453125, + "learning_rate": 1.0586750788643532e-06, + "loss": 36.1875, + "step": 1678 + }, + { + "epoch": 0.015893450459575353, + "grad_norm": 605.5845336914062, + "learning_rate": 1.0593059936908517e-06, + "loss": 37.3438, + "step": 1679 + }, + { + "epoch": 0.015902916481290406, + "grad_norm": 468.4525146484375, + "learning_rate": 1.0599369085173502e-06, + "loss": 42.5156, + "step": 1680 + }, + { + "epoch": 0.015912382503005463, + "grad_norm": 452.6356506347656, + "learning_rate": 1.0605678233438485e-06, + "loss": 35.375, + "step": 1681 + }, + { + "epoch": 0.015921848524720516, + "grad_norm": 1780.895263671875, + "learning_rate": 1.061198738170347e-06, + "loss": 84.25, + "step": 1682 + }, + { + "epoch": 0.01593131454643557, + "grad_norm": 533.7827758789062, + "learning_rate": 1.0618296529968453e-06, + "loss": 34.375, + "step": 1683 + }, + { + "epoch": 0.015940780568150622, + "grad_norm": 866.1103515625, + "learning_rate": 1.0624605678233438e-06, + "loss": 105.4688, + "step": 1684 + }, + { + "epoch": 0.01595024658986568, + "grad_norm": 826.0418701171875, + "learning_rate": 1.0630914826498422e-06, + "loss": 40.2656, + "step": 1685 + }, + { + "epoch": 0.01595971261158073, + "grad_norm": 480.28546142578125, + "learning_rate": 1.0637223974763407e-06, + "loss": 38.0938, + "step": 1686 + }, + { + "epoch": 0.015969178633295784, + "grad_norm": 665.2730102539062, + "learning_rate": 1.064353312302839e-06, + "loss": 72.9062, + "step": 1687 + }, + { + "epoch": 0.015978644655010837, + "grad_norm": 540.0803833007812, + "learning_rate": 1.0649842271293375e-06, + "loss": 41.2344, + "step": 1688 + }, + { + "epoch": 0.015988110676725894, + "grad_norm": 885.970947265625, + "learning_rate": 1.065615141955836e-06, + "loss": 73.9375, + "step": 1689 + }, + { + "epoch": 0.015997576698440947, + "grad_norm": 474.86865234375, + "learning_rate": 1.0662460567823343e-06, + "loss": 37.3594, + "step": 1690 + }, + { + "epoch": 0.016007042720156, + "grad_norm": 731.8013916015625, + "learning_rate": 1.0668769716088328e-06, + "loss": 100.0078, + "step": 1691 + }, + { + "epoch": 0.016016508741871053, + "grad_norm": 656.1320190429688, + "learning_rate": 1.067507886435331e-06, + "loss": 37.125, + "step": 1692 + }, + { + "epoch": 0.01602597476358611, + "grad_norm": 2.8406147956848145, + "learning_rate": 1.0681388012618296e-06, + "loss": 0.9629, + "step": 1693 + }, + { + "epoch": 0.016035440785301162, + "grad_norm": 475.0668029785156, + "learning_rate": 1.068769716088328e-06, + "loss": 77.8438, + "step": 1694 + }, + { + "epoch": 0.016044906807016215, + "grad_norm": 351.7608947753906, + "learning_rate": 1.0694006309148265e-06, + "loss": 32.3281, + "step": 1695 + }, + { + "epoch": 0.01605437282873127, + "grad_norm": 1165.3621826171875, + "learning_rate": 1.0700315457413248e-06, + "loss": 54.625, + "step": 1696 + }, + { + "epoch": 0.01606383885044632, + "grad_norm": 498.9257507324219, + "learning_rate": 1.0706624605678233e-06, + "loss": 73.0625, + "step": 1697 + }, + { + "epoch": 0.016073304872161378, + "grad_norm": 2.999356985092163, + "learning_rate": 1.0712933753943216e-06, + "loss": 0.875, + "step": 1698 + }, + { + "epoch": 0.01608277089387643, + "grad_norm": 540.3839721679688, + "learning_rate": 1.07192429022082e-06, + "loss": 42.0156, + "step": 1699 + }, + { + "epoch": 0.016092236915591484, + "grad_norm": 2.838178873062134, + "learning_rate": 1.0725552050473186e-06, + "loss": 0.8828, + "step": 1700 + }, + { + "epoch": 0.016101702937306537, + "grad_norm": 400.80426025390625, + "learning_rate": 1.073186119873817e-06, + "loss": 41.4375, + "step": 1701 + }, + { + "epoch": 0.016111168959021593, + "grad_norm": 627.386962890625, + "learning_rate": 1.0738170347003156e-06, + "loss": 44.4844, + "step": 1702 + }, + { + "epoch": 0.016120634980736646, + "grad_norm": 398.4049072265625, + "learning_rate": 1.0744479495268136e-06, + "loss": 33.2969, + "step": 1703 + }, + { + "epoch": 0.0161301010024517, + "grad_norm": 487.301513671875, + "learning_rate": 1.0750788643533121e-06, + "loss": 67.4062, + "step": 1704 + }, + { + "epoch": 0.016139567024166752, + "grad_norm": 3.3775322437286377, + "learning_rate": 1.0757097791798106e-06, + "loss": 0.9165, + "step": 1705 + }, + { + "epoch": 0.01614903304588181, + "grad_norm": 420.8460693359375, + "learning_rate": 1.0763406940063091e-06, + "loss": 46.8594, + "step": 1706 + }, + { + "epoch": 0.016158499067596862, + "grad_norm": 353.3575439453125, + "learning_rate": 1.0769716088328076e-06, + "loss": 46.5625, + "step": 1707 + }, + { + "epoch": 0.016167965089311915, + "grad_norm": 508.2467956542969, + "learning_rate": 1.077602523659306e-06, + "loss": 48.9844, + "step": 1708 + }, + { + "epoch": 0.016177431111026968, + "grad_norm": 716.7093505859375, + "learning_rate": 1.0782334384858044e-06, + "loss": 84.8125, + "step": 1709 + }, + { + "epoch": 0.01618689713274202, + "grad_norm": 425.7799377441406, + "learning_rate": 1.0788643533123027e-06, + "loss": 37.375, + "step": 1710 + }, + { + "epoch": 0.016196363154457077, + "grad_norm": 287.42010498046875, + "learning_rate": 1.0794952681388012e-06, + "loss": 36.875, + "step": 1711 + }, + { + "epoch": 0.01620582917617213, + "grad_norm": 472.5465393066406, + "learning_rate": 1.0801261829652996e-06, + "loss": 65.3438, + "step": 1712 + }, + { + "epoch": 0.016215295197887183, + "grad_norm": 2.9172630310058594, + "learning_rate": 1.0807570977917981e-06, + "loss": 0.8633, + "step": 1713 + }, + { + "epoch": 0.016224761219602236, + "grad_norm": 368.5461120605469, + "learning_rate": 1.0813880126182964e-06, + "loss": 35.9531, + "step": 1714 + }, + { + "epoch": 0.016234227241317293, + "grad_norm": 1042.0780029296875, + "learning_rate": 1.082018927444795e-06, + "loss": 70.7969, + "step": 1715 + }, + { + "epoch": 0.016243693263032346, + "grad_norm": 738.8729858398438, + "learning_rate": 1.0826498422712932e-06, + "loss": 79.75, + "step": 1716 + }, + { + "epoch": 0.0162531592847474, + "grad_norm": 384.94500732421875, + "learning_rate": 1.0832807570977917e-06, + "loss": 37.1562, + "step": 1717 + }, + { + "epoch": 0.016262625306462452, + "grad_norm": 336.48931884765625, + "learning_rate": 1.0839116719242902e-06, + "loss": 29.5938, + "step": 1718 + }, + { + "epoch": 0.01627209132817751, + "grad_norm": 1249.4678955078125, + "learning_rate": 1.0845425867507887e-06, + "loss": 75.3438, + "step": 1719 + }, + { + "epoch": 0.01628155734989256, + "grad_norm": 348.5146484375, + "learning_rate": 1.085173501577287e-06, + "loss": 31.6562, + "step": 1720 + }, + { + "epoch": 0.016291023371607614, + "grad_norm": 901.5489501953125, + "learning_rate": 1.0858044164037854e-06, + "loss": 75.8125, + "step": 1721 + }, + { + "epoch": 0.016300489393322667, + "grad_norm": 380.92779541015625, + "learning_rate": 1.086435331230284e-06, + "loss": 39.2188, + "step": 1722 + }, + { + "epoch": 0.01630995541503772, + "grad_norm": 995.5256958007812, + "learning_rate": 1.0870662460567822e-06, + "loss": 51.0781, + "step": 1723 + }, + { + "epoch": 0.016319421436752777, + "grad_norm": 808.4404907226562, + "learning_rate": 1.0876971608832807e-06, + "loss": 97.5469, + "step": 1724 + }, + { + "epoch": 0.01632888745846783, + "grad_norm": 992.0001831054688, + "learning_rate": 1.088328075709779e-06, + "loss": 72.4844, + "step": 1725 + }, + { + "epoch": 0.016338353480182883, + "grad_norm": 753.4857788085938, + "learning_rate": 1.0889589905362775e-06, + "loss": 55.25, + "step": 1726 + }, + { + "epoch": 0.016347819501897936, + "grad_norm": 368.0390625, + "learning_rate": 1.089589905362776e-06, + "loss": 33.9531, + "step": 1727 + }, + { + "epoch": 0.016357285523612992, + "grad_norm": 351.5782470703125, + "learning_rate": 1.0902208201892745e-06, + "loss": 33.6953, + "step": 1728 + }, + { + "epoch": 0.016366751545328045, + "grad_norm": 586.8836059570312, + "learning_rate": 1.090851735015773e-06, + "loss": 67.4375, + "step": 1729 + }, + { + "epoch": 0.0163762175670431, + "grad_norm": 2019.5118408203125, + "learning_rate": 1.0914826498422712e-06, + "loss": 73.3906, + "step": 1730 + }, + { + "epoch": 0.01638568358875815, + "grad_norm": 567.8345336914062, + "learning_rate": 1.0921135646687695e-06, + "loss": 63.2031, + "step": 1731 + }, + { + "epoch": 0.016395149610473208, + "grad_norm": 330.94940185546875, + "learning_rate": 1.092744479495268e-06, + "loss": 30.9062, + "step": 1732 + }, + { + "epoch": 0.01640461563218826, + "grad_norm": 429.05035400390625, + "learning_rate": 1.0933753943217665e-06, + "loss": 52.875, + "step": 1733 + }, + { + "epoch": 0.016414081653903314, + "grad_norm": 814.8743286132812, + "learning_rate": 1.094006309148265e-06, + "loss": 80.3125, + "step": 1734 + }, + { + "epoch": 0.016423547675618367, + "grad_norm": 423.9749755859375, + "learning_rate": 1.0946372239747635e-06, + "loss": 62.1875, + "step": 1735 + }, + { + "epoch": 0.01643301369733342, + "grad_norm": 411.9945983886719, + "learning_rate": 1.0952681388012616e-06, + "loss": 37.7344, + "step": 1736 + }, + { + "epoch": 0.016442479719048476, + "grad_norm": 602.2581176757812, + "learning_rate": 1.09589905362776e-06, + "loss": 70.75, + "step": 1737 + }, + { + "epoch": 0.01645194574076353, + "grad_norm": 704.5748901367188, + "learning_rate": 1.0965299684542586e-06, + "loss": 88.8594, + "step": 1738 + }, + { + "epoch": 0.016461411762478582, + "grad_norm": 698.968994140625, + "learning_rate": 1.097160883280757e-06, + "loss": 62.1406, + "step": 1739 + }, + { + "epoch": 0.016470877784193635, + "grad_norm": 446.2467346191406, + "learning_rate": 1.0977917981072555e-06, + "loss": 38.8438, + "step": 1740 + }, + { + "epoch": 0.016480343805908692, + "grad_norm": 452.7189025878906, + "learning_rate": 1.098422712933754e-06, + "loss": 35.125, + "step": 1741 + }, + { + "epoch": 0.016489809827623745, + "grad_norm": 468.8706970214844, + "learning_rate": 1.0990536277602523e-06, + "loss": 36.5156, + "step": 1742 + }, + { + "epoch": 0.016499275849338798, + "grad_norm": 2.7570667266845703, + "learning_rate": 1.0996845425867506e-06, + "loss": 0.8096, + "step": 1743 + }, + { + "epoch": 0.01650874187105385, + "grad_norm": 653.8993530273438, + "learning_rate": 1.100315457413249e-06, + "loss": 30.0156, + "step": 1744 + }, + { + "epoch": 0.016518207892768907, + "grad_norm": 680.657958984375, + "learning_rate": 1.1009463722397476e-06, + "loss": 31.5469, + "step": 1745 + }, + { + "epoch": 0.01652767391448396, + "grad_norm": 953.1234741210938, + "learning_rate": 1.101577287066246e-06, + "loss": 89.5625, + "step": 1746 + }, + { + "epoch": 0.016537139936199013, + "grad_norm": 486.2539367675781, + "learning_rate": 1.1022082018927444e-06, + "loss": 36.9062, + "step": 1747 + }, + { + "epoch": 0.016546605957914066, + "grad_norm": 721.6468505859375, + "learning_rate": 1.1028391167192429e-06, + "loss": 65.0625, + "step": 1748 + }, + { + "epoch": 0.016556071979629123, + "grad_norm": 963.2139282226562, + "learning_rate": 1.1034700315457413e-06, + "loss": 55.8672, + "step": 1749 + }, + { + "epoch": 0.016565538001344176, + "grad_norm": 237.7793731689453, + "learning_rate": 1.1041009463722396e-06, + "loss": 27.4844, + "step": 1750 + }, + { + "epoch": 0.01657500402305923, + "grad_norm": 362.14984130859375, + "learning_rate": 1.1047318611987381e-06, + "loss": 45.7344, + "step": 1751 + }, + { + "epoch": 0.016584470044774282, + "grad_norm": 719.058349609375, + "learning_rate": 1.1053627760252366e-06, + "loss": 45.4062, + "step": 1752 + }, + { + "epoch": 0.016593936066489335, + "grad_norm": 828.1848754882812, + "learning_rate": 1.1059936908517349e-06, + "loss": 38.2656, + "step": 1753 + }, + { + "epoch": 0.01660340208820439, + "grad_norm": 541.8892822265625, + "learning_rate": 1.1066246056782334e-06, + "loss": 39.875, + "step": 1754 + }, + { + "epoch": 0.016612868109919444, + "grad_norm": 591.5081176757812, + "learning_rate": 1.1072555205047319e-06, + "loss": 37.3281, + "step": 1755 + }, + { + "epoch": 0.016622334131634497, + "grad_norm": 654.0038452148438, + "learning_rate": 1.1078864353312304e-06, + "loss": 35.4844, + "step": 1756 + }, + { + "epoch": 0.01663180015334955, + "grad_norm": 326.9129333496094, + "learning_rate": 1.1085173501577287e-06, + "loss": 39.0312, + "step": 1757 + }, + { + "epoch": 0.016641266175064607, + "grad_norm": 1263.4073486328125, + "learning_rate": 1.109148264984227e-06, + "loss": 88.2812, + "step": 1758 + }, + { + "epoch": 0.01665073219677966, + "grad_norm": 770.7827758789062, + "learning_rate": 1.1097791798107254e-06, + "loss": 89.8125, + "step": 1759 + }, + { + "epoch": 0.016660198218494713, + "grad_norm": 597.7844848632812, + "learning_rate": 1.110410094637224e-06, + "loss": 83.4375, + "step": 1760 + }, + { + "epoch": 0.016669664240209766, + "grad_norm": 551.2822875976562, + "learning_rate": 1.1110410094637224e-06, + "loss": 52.4062, + "step": 1761 + }, + { + "epoch": 0.016679130261924822, + "grad_norm": 2.9120187759399414, + "learning_rate": 1.111671924290221e-06, + "loss": 0.834, + "step": 1762 + }, + { + "epoch": 0.016688596283639875, + "grad_norm": 295.5687561035156, + "learning_rate": 1.1123028391167194e-06, + "loss": 37.3125, + "step": 1763 + }, + { + "epoch": 0.01669806230535493, + "grad_norm": 2.733940362930298, + "learning_rate": 1.1129337539432175e-06, + "loss": 0.8711, + "step": 1764 + }, + { + "epoch": 0.01670752832706998, + "grad_norm": 368.4912109375, + "learning_rate": 1.113564668769716e-06, + "loss": 38.7031, + "step": 1765 + }, + { + "epoch": 0.016716994348785034, + "grad_norm": 770.675537109375, + "learning_rate": 1.1141955835962145e-06, + "loss": 84.4375, + "step": 1766 + }, + { + "epoch": 0.01672646037050009, + "grad_norm": 366.6790771484375, + "learning_rate": 1.114826498422713e-06, + "loss": 36.7031, + "step": 1767 + }, + { + "epoch": 0.016735926392215144, + "grad_norm": 332.6624450683594, + "learning_rate": 1.1154574132492114e-06, + "loss": 32.7656, + "step": 1768 + }, + { + "epoch": 0.016745392413930197, + "grad_norm": 792.1138305664062, + "learning_rate": 1.1160883280757097e-06, + "loss": 53.5469, + "step": 1769 + }, + { + "epoch": 0.01675485843564525, + "grad_norm": 409.1287536621094, + "learning_rate": 1.116719242902208e-06, + "loss": 34.7031, + "step": 1770 + }, + { + "epoch": 0.016764324457360306, + "grad_norm": 2.813776731491089, + "learning_rate": 1.1173501577287065e-06, + "loss": 0.8457, + "step": 1771 + }, + { + "epoch": 0.01677379047907536, + "grad_norm": 412.25531005859375, + "learning_rate": 1.117981072555205e-06, + "loss": 36.2969, + "step": 1772 + }, + { + "epoch": 0.016783256500790412, + "grad_norm": 525.5071411132812, + "learning_rate": 1.1186119873817035e-06, + "loss": 37.1875, + "step": 1773 + }, + { + "epoch": 0.016792722522505465, + "grad_norm": 533.2871704101562, + "learning_rate": 1.119242902208202e-06, + "loss": 39.5469, + "step": 1774 + }, + { + "epoch": 0.016802188544220522, + "grad_norm": 2.7620437145233154, + "learning_rate": 1.1198738170347003e-06, + "loss": 0.9568, + "step": 1775 + }, + { + "epoch": 0.016811654565935575, + "grad_norm": 348.8665466308594, + "learning_rate": 1.1205047318611987e-06, + "loss": 30.0312, + "step": 1776 + }, + { + "epoch": 0.016821120587650628, + "grad_norm": 493.4899597167969, + "learning_rate": 1.121135646687697e-06, + "loss": 53.8438, + "step": 1777 + }, + { + "epoch": 0.01683058660936568, + "grad_norm": 725.303955078125, + "learning_rate": 1.1217665615141955e-06, + "loss": 45.6719, + "step": 1778 + }, + { + "epoch": 0.016840052631080734, + "grad_norm": 308.6080017089844, + "learning_rate": 1.122397476340694e-06, + "loss": 32.8594, + "step": 1779 + }, + { + "epoch": 0.01684951865279579, + "grad_norm": 324.0197448730469, + "learning_rate": 1.1230283911671923e-06, + "loss": 41.5469, + "step": 1780 + }, + { + "epoch": 0.016858984674510843, + "grad_norm": 3.0031628608703613, + "learning_rate": 1.1236593059936908e-06, + "loss": 0.9819, + "step": 1781 + }, + { + "epoch": 0.016868450696225896, + "grad_norm": 626.6452026367188, + "learning_rate": 1.1242902208201893e-06, + "loss": 32.8203, + "step": 1782 + }, + { + "epoch": 0.01687791671794095, + "grad_norm": 281.9786682128906, + "learning_rate": 1.1249211356466878e-06, + "loss": 35.6875, + "step": 1783 + }, + { + "epoch": 0.016887382739656006, + "grad_norm": 420.8489990234375, + "learning_rate": 1.125552050473186e-06, + "loss": 47.4062, + "step": 1784 + }, + { + "epoch": 0.01689684876137106, + "grad_norm": 265.87347412109375, + "learning_rate": 1.1261829652996845e-06, + "loss": 33.8906, + "step": 1785 + }, + { + "epoch": 0.016906314783086112, + "grad_norm": 525.3480834960938, + "learning_rate": 1.1268138801261828e-06, + "loss": 38.8828, + "step": 1786 + }, + { + "epoch": 0.016915780804801165, + "grad_norm": 228.9912872314453, + "learning_rate": 1.1274447949526813e-06, + "loss": 28.3125, + "step": 1787 + }, + { + "epoch": 0.01692524682651622, + "grad_norm": 560.3884887695312, + "learning_rate": 1.1280757097791798e-06, + "loss": 34.8906, + "step": 1788 + }, + { + "epoch": 0.016934712848231274, + "grad_norm": 492.8253479003906, + "learning_rate": 1.1287066246056783e-06, + "loss": 53.6875, + "step": 1789 + }, + { + "epoch": 0.016944178869946327, + "grad_norm": 490.9655456542969, + "learning_rate": 1.1293375394321766e-06, + "loss": 32.375, + "step": 1790 + }, + { + "epoch": 0.01695364489166138, + "grad_norm": 1440.664306640625, + "learning_rate": 1.1299684542586749e-06, + "loss": 73.5312, + "step": 1791 + }, + { + "epoch": 0.016963110913376437, + "grad_norm": 542.3134155273438, + "learning_rate": 1.1305993690851734e-06, + "loss": 47.7188, + "step": 1792 + }, + { + "epoch": 0.01697257693509149, + "grad_norm": 493.11968994140625, + "learning_rate": 1.1312302839116719e-06, + "loss": 31.6875, + "step": 1793 + }, + { + "epoch": 0.016982042956806543, + "grad_norm": 443.6826477050781, + "learning_rate": 1.1318611987381703e-06, + "loss": 34.9219, + "step": 1794 + }, + { + "epoch": 0.016991508978521596, + "grad_norm": 295.4015808105469, + "learning_rate": 1.1324921135646688e-06, + "loss": 32.7969, + "step": 1795 + }, + { + "epoch": 0.01700097500023665, + "grad_norm": 425.1336364746094, + "learning_rate": 1.1331230283911673e-06, + "loss": 37.7812, + "step": 1796 + }, + { + "epoch": 0.017010441021951705, + "grad_norm": 402.3957824707031, + "learning_rate": 1.1337539432176654e-06, + "loss": 36.2031, + "step": 1797 + }, + { + "epoch": 0.01701990704366676, + "grad_norm": 318.6095275878906, + "learning_rate": 1.1343848580441639e-06, + "loss": 31.75, + "step": 1798 + }, + { + "epoch": 0.01702937306538181, + "grad_norm": 787.3699340820312, + "learning_rate": 1.1350157728706624e-06, + "loss": 77.6875, + "step": 1799 + }, + { + "epoch": 0.017038839087096865, + "grad_norm": 433.5979309082031, + "learning_rate": 1.1356466876971609e-06, + "loss": 42.5625, + "step": 1800 + }, + { + "epoch": 0.01704830510881192, + "grad_norm": 206.4489288330078, + "learning_rate": 1.1362776025236594e-06, + "loss": 29.4062, + "step": 1801 + }, + { + "epoch": 0.017057771130526974, + "grad_norm": 409.8436279296875, + "learning_rate": 1.1369085173501577e-06, + "loss": 32.6719, + "step": 1802 + }, + { + "epoch": 0.017067237152242027, + "grad_norm": 2.417670249938965, + "learning_rate": 1.1375394321766561e-06, + "loss": 0.8501, + "step": 1803 + }, + { + "epoch": 0.01707670317395708, + "grad_norm": 576.62841796875, + "learning_rate": 1.1381703470031544e-06, + "loss": 74.3281, + "step": 1804 + }, + { + "epoch": 0.017086169195672136, + "grad_norm": 339.8450927734375, + "learning_rate": 1.138801261829653e-06, + "loss": 29.5156, + "step": 1805 + }, + { + "epoch": 0.01709563521738719, + "grad_norm": 469.55010986328125, + "learning_rate": 1.1394321766561514e-06, + "loss": 47.5, + "step": 1806 + }, + { + "epoch": 0.017105101239102243, + "grad_norm": 677.39697265625, + "learning_rate": 1.14006309148265e-06, + "loss": 91.3438, + "step": 1807 + }, + { + "epoch": 0.017114567260817296, + "grad_norm": 713.2576904296875, + "learning_rate": 1.1406940063091482e-06, + "loss": 57.875, + "step": 1808 + }, + { + "epoch": 0.01712403328253235, + "grad_norm": 1504.5362548828125, + "learning_rate": 1.1413249211356467e-06, + "loss": 68.6172, + "step": 1809 + }, + { + "epoch": 0.017133499304247405, + "grad_norm": 812.543701171875, + "learning_rate": 1.141955835962145e-06, + "loss": 72.8438, + "step": 1810 + }, + { + "epoch": 0.017142965325962458, + "grad_norm": 386.485107421875, + "learning_rate": 1.1425867507886435e-06, + "loss": 33.8281, + "step": 1811 + }, + { + "epoch": 0.01715243134767751, + "grad_norm": 801.1355590820312, + "learning_rate": 1.143217665615142e-06, + "loss": 76.4219, + "step": 1812 + }, + { + "epoch": 0.017161897369392564, + "grad_norm": 604.911376953125, + "learning_rate": 1.1438485804416402e-06, + "loss": 64.625, + "step": 1813 + }, + { + "epoch": 0.01717136339110762, + "grad_norm": 817.3651123046875, + "learning_rate": 1.1444794952681387e-06, + "loss": 91.5625, + "step": 1814 + }, + { + "epoch": 0.017180829412822674, + "grad_norm": 300.9106140136719, + "learning_rate": 1.1451104100946372e-06, + "loss": 29.7812, + "step": 1815 + }, + { + "epoch": 0.017190295434537727, + "grad_norm": 678.43896484375, + "learning_rate": 1.1457413249211357e-06, + "loss": 77.25, + "step": 1816 + }, + { + "epoch": 0.01719976145625278, + "grad_norm": 2.983091115951538, + "learning_rate": 1.146372239747634e-06, + "loss": 0.8179, + "step": 1817 + }, + { + "epoch": 0.017209227477967836, + "grad_norm": 298.7666015625, + "learning_rate": 1.1470031545741325e-06, + "loss": 37.6406, + "step": 1818 + }, + { + "epoch": 0.01721869349968289, + "grad_norm": 640.8468017578125, + "learning_rate": 1.1476340694006308e-06, + "loss": 31.625, + "step": 1819 + }, + { + "epoch": 0.017228159521397942, + "grad_norm": 322.48614501953125, + "learning_rate": 1.1482649842271293e-06, + "loss": 34.25, + "step": 1820 + }, + { + "epoch": 0.017237625543112995, + "grad_norm": 2.909320592880249, + "learning_rate": 1.1488958990536277e-06, + "loss": 0.9058, + "step": 1821 + }, + { + "epoch": 0.017247091564828048, + "grad_norm": 3.1159634590148926, + "learning_rate": 1.1495268138801262e-06, + "loss": 0.7253, + "step": 1822 + }, + { + "epoch": 0.017256557586543105, + "grad_norm": 747.490478515625, + "learning_rate": 1.1501577287066247e-06, + "loss": 86.75, + "step": 1823 + }, + { + "epoch": 0.017266023608258158, + "grad_norm": 960.1616821289062, + "learning_rate": 1.1507886435331228e-06, + "loss": 58.8359, + "step": 1824 + }, + { + "epoch": 0.01727548962997321, + "grad_norm": 421.95654296875, + "learning_rate": 1.1514195583596213e-06, + "loss": 40.7656, + "step": 1825 + }, + { + "epoch": 0.017284955651688264, + "grad_norm": 924.6177368164062, + "learning_rate": 1.1520504731861198e-06, + "loss": 57.375, + "step": 1826 + }, + { + "epoch": 0.01729442167340332, + "grad_norm": 2.9176149368286133, + "learning_rate": 1.1526813880126183e-06, + "loss": 0.8599, + "step": 1827 + }, + { + "epoch": 0.017303887695118373, + "grad_norm": 320.55743408203125, + "learning_rate": 1.1533123028391168e-06, + "loss": 41.0938, + "step": 1828 + }, + { + "epoch": 0.017313353716833426, + "grad_norm": 354.20330810546875, + "learning_rate": 1.1539432176656153e-06, + "loss": 35.9688, + "step": 1829 + }, + { + "epoch": 0.01732281973854848, + "grad_norm": 542.0612182617188, + "learning_rate": 1.1545741324921135e-06, + "loss": 51.5469, + "step": 1830 + }, + { + "epoch": 0.017332285760263536, + "grad_norm": 272.3559875488281, + "learning_rate": 1.1552050473186118e-06, + "loss": 31.7344, + "step": 1831 + }, + { + "epoch": 0.01734175178197859, + "grad_norm": 722.7666625976562, + "learning_rate": 1.1558359621451103e-06, + "loss": 68.375, + "step": 1832 + }, + { + "epoch": 0.01735121780369364, + "grad_norm": 314.66668701171875, + "learning_rate": 1.1564668769716088e-06, + "loss": 30.2969, + "step": 1833 + }, + { + "epoch": 0.017360683825408695, + "grad_norm": 469.9563903808594, + "learning_rate": 1.1570977917981073e-06, + "loss": 36.9688, + "step": 1834 + }, + { + "epoch": 0.01737014984712375, + "grad_norm": 316.02679443359375, + "learning_rate": 1.1577287066246056e-06, + "loss": 37.9531, + "step": 1835 + }, + { + "epoch": 0.017379615868838804, + "grad_norm": 507.0118408203125, + "learning_rate": 1.158359621451104e-06, + "loss": 27.1094, + "step": 1836 + }, + { + "epoch": 0.017389081890553857, + "grad_norm": 416.8763427734375, + "learning_rate": 1.1589905362776024e-06, + "loss": 33.5, + "step": 1837 + }, + { + "epoch": 0.01739854791226891, + "grad_norm": 590.4400634765625, + "learning_rate": 1.1596214511041009e-06, + "loss": 78.75, + "step": 1838 + }, + { + "epoch": 0.017408013933983963, + "grad_norm": 640.1343383789062, + "learning_rate": 1.1602523659305993e-06, + "loss": 41.5156, + "step": 1839 + }, + { + "epoch": 0.01741747995569902, + "grad_norm": 570.1962890625, + "learning_rate": 1.1608832807570978e-06, + "loss": 79.125, + "step": 1840 + }, + { + "epoch": 0.017426945977414073, + "grad_norm": 398.20880126953125, + "learning_rate": 1.1615141955835961e-06, + "loss": 36.8438, + "step": 1841 + }, + { + "epoch": 0.017436411999129126, + "grad_norm": 1069.8817138671875, + "learning_rate": 1.1621451104100946e-06, + "loss": 75.7969, + "step": 1842 + }, + { + "epoch": 0.01744587802084418, + "grad_norm": 451.7537841796875, + "learning_rate": 1.162776025236593e-06, + "loss": 38.5156, + "step": 1843 + }, + { + "epoch": 0.017455344042559235, + "grad_norm": 380.56103515625, + "learning_rate": 1.1634069400630914e-06, + "loss": 41.0625, + "step": 1844 + }, + { + "epoch": 0.017464810064274288, + "grad_norm": 460.89923095703125, + "learning_rate": 1.1640378548895899e-06, + "loss": 34.1719, + "step": 1845 + }, + { + "epoch": 0.01747427608598934, + "grad_norm": 378.84075927734375, + "learning_rate": 1.1646687697160882e-06, + "loss": 33.5, + "step": 1846 + }, + { + "epoch": 0.017483742107704394, + "grad_norm": 815.3104858398438, + "learning_rate": 1.1652996845425867e-06, + "loss": 68.1094, + "step": 1847 + }, + { + "epoch": 0.01749320812941945, + "grad_norm": 409.7140197753906, + "learning_rate": 1.1659305993690851e-06, + "loss": 38.5781, + "step": 1848 + }, + { + "epoch": 0.017502674151134504, + "grad_norm": 288.7345275878906, + "learning_rate": 1.1665615141955836e-06, + "loss": 33.8438, + "step": 1849 + }, + { + "epoch": 0.017512140172849557, + "grad_norm": 426.1141052246094, + "learning_rate": 1.1671924290220821e-06, + "loss": 31.75, + "step": 1850 + }, + { + "epoch": 0.01752160619456461, + "grad_norm": 551.4730834960938, + "learning_rate": 1.1678233438485802e-06, + "loss": 40.875, + "step": 1851 + }, + { + "epoch": 0.017531072216279663, + "grad_norm": 1395.2723388671875, + "learning_rate": 1.1684542586750787e-06, + "loss": 116.2812, + "step": 1852 + }, + { + "epoch": 0.01754053823799472, + "grad_norm": 396.5265197753906, + "learning_rate": 1.1690851735015772e-06, + "loss": 32.4688, + "step": 1853 + }, + { + "epoch": 0.017550004259709772, + "grad_norm": 2760.52734375, + "learning_rate": 1.1697160883280757e-06, + "loss": 78.5156, + "step": 1854 + }, + { + "epoch": 0.017559470281424825, + "grad_norm": 363.58056640625, + "learning_rate": 1.1703470031545742e-06, + "loss": 29.1562, + "step": 1855 + }, + { + "epoch": 0.017568936303139878, + "grad_norm": 391.2961120605469, + "learning_rate": 1.1709779179810727e-06, + "loss": 31.1719, + "step": 1856 + }, + { + "epoch": 0.017578402324854935, + "grad_norm": 395.7762451171875, + "learning_rate": 1.1716088328075707e-06, + "loss": 30.4219, + "step": 1857 + }, + { + "epoch": 0.017587868346569988, + "grad_norm": 577.7101440429688, + "learning_rate": 1.1722397476340692e-06, + "loss": 40.5781, + "step": 1858 + }, + { + "epoch": 0.01759733436828504, + "grad_norm": 842.7997436523438, + "learning_rate": 1.1728706624605677e-06, + "loss": 78.9375, + "step": 1859 + }, + { + "epoch": 0.017606800390000094, + "grad_norm": 929.2735595703125, + "learning_rate": 1.1735015772870662e-06, + "loss": 56.6719, + "step": 1860 + }, + { + "epoch": 0.01761626641171515, + "grad_norm": 841.3222045898438, + "learning_rate": 1.1741324921135647e-06, + "loss": 37.4531, + "step": 1861 + }, + { + "epoch": 0.017625732433430203, + "grad_norm": 439.88836669921875, + "learning_rate": 1.174763406940063e-06, + "loss": 32.125, + "step": 1862 + }, + { + "epoch": 0.017635198455145256, + "grad_norm": 556.1554565429688, + "learning_rate": 1.1753943217665615e-06, + "loss": 48.1875, + "step": 1863 + }, + { + "epoch": 0.01764466447686031, + "grad_norm": 609.6834716796875, + "learning_rate": 1.1760252365930598e-06, + "loss": 44.2969, + "step": 1864 + }, + { + "epoch": 0.017654130498575362, + "grad_norm": 256.44342041015625, + "learning_rate": 1.1766561514195583e-06, + "loss": 31.6719, + "step": 1865 + }, + { + "epoch": 0.01766359652029042, + "grad_norm": 1196.64599609375, + "learning_rate": 1.1772870662460567e-06, + "loss": 72.5625, + "step": 1866 + }, + { + "epoch": 0.01767306254200547, + "grad_norm": 410.1291198730469, + "learning_rate": 1.1779179810725552e-06, + "loss": 33.25, + "step": 1867 + }, + { + "epoch": 0.017682528563720525, + "grad_norm": 450.711181640625, + "learning_rate": 1.1785488958990535e-06, + "loss": 38.25, + "step": 1868 + }, + { + "epoch": 0.017691994585435578, + "grad_norm": 283.5461120605469, + "learning_rate": 1.179179810725552e-06, + "loss": 27.7188, + "step": 1869 + }, + { + "epoch": 0.017701460607150634, + "grad_norm": 759.0654907226562, + "learning_rate": 1.1798107255520505e-06, + "loss": 82.0938, + "step": 1870 + }, + { + "epoch": 0.017710926628865687, + "grad_norm": 652.7574462890625, + "learning_rate": 1.1804416403785488e-06, + "loss": 92.0312, + "step": 1871 + }, + { + "epoch": 0.01772039265058074, + "grad_norm": 866.2205200195312, + "learning_rate": 1.1810725552050473e-06, + "loss": 59.1406, + "step": 1872 + }, + { + "epoch": 0.017729858672295793, + "grad_norm": 756.0506591796875, + "learning_rate": 1.1817034700315456e-06, + "loss": 66.8125, + "step": 1873 + }, + { + "epoch": 0.01773932469401085, + "grad_norm": 3.616075277328491, + "learning_rate": 1.182334384858044e-06, + "loss": 0.937, + "step": 1874 + }, + { + "epoch": 0.017748790715725903, + "grad_norm": 334.2268371582031, + "learning_rate": 1.1829652996845425e-06, + "loss": 30.9375, + "step": 1875 + }, + { + "epoch": 0.017758256737440956, + "grad_norm": 779.3422241210938, + "learning_rate": 1.183596214511041e-06, + "loss": 56.375, + "step": 1876 + }, + { + "epoch": 0.01776772275915601, + "grad_norm": 669.1897583007812, + "learning_rate": 1.1842271293375395e-06, + "loss": 49.75, + "step": 1877 + }, + { + "epoch": 0.017777188780871065, + "grad_norm": 416.17315673828125, + "learning_rate": 1.1848580441640378e-06, + "loss": 33.625, + "step": 1878 + }, + { + "epoch": 0.017786654802586118, + "grad_norm": 338.4939880371094, + "learning_rate": 1.185488958990536e-06, + "loss": 33.8281, + "step": 1879 + }, + { + "epoch": 0.01779612082430117, + "grad_norm": 444.8028259277344, + "learning_rate": 1.1861198738170346e-06, + "loss": 34.7188, + "step": 1880 + }, + { + "epoch": 0.017805586846016224, + "grad_norm": 1514.822998046875, + "learning_rate": 1.186750788643533e-06, + "loss": 72.8594, + "step": 1881 + }, + { + "epoch": 0.017815052867731277, + "grad_norm": 268.0196838378906, + "learning_rate": 1.1873817034700316e-06, + "loss": 25.7344, + "step": 1882 + }, + { + "epoch": 0.017824518889446334, + "grad_norm": 601.5250854492188, + "learning_rate": 1.18801261829653e-06, + "loss": 46.5312, + "step": 1883 + }, + { + "epoch": 0.017833984911161387, + "grad_norm": 697.5971069335938, + "learning_rate": 1.1886435331230281e-06, + "loss": 36.0, + "step": 1884 + }, + { + "epoch": 0.01784345093287644, + "grad_norm": 559.5159912109375, + "learning_rate": 1.1892744479495266e-06, + "loss": 38.5312, + "step": 1885 + }, + { + "epoch": 0.017852916954591493, + "grad_norm": 1390.0335693359375, + "learning_rate": 1.1899053627760251e-06, + "loss": 75.0938, + "step": 1886 + }, + { + "epoch": 0.01786238297630655, + "grad_norm": 327.7848205566406, + "learning_rate": 1.1905362776025236e-06, + "loss": 32.1562, + "step": 1887 + }, + { + "epoch": 0.017871848998021602, + "grad_norm": 521.9337158203125, + "learning_rate": 1.191167192429022e-06, + "loss": 62.9844, + "step": 1888 + }, + { + "epoch": 0.017881315019736655, + "grad_norm": 657.2332763671875, + "learning_rate": 1.1917981072555206e-06, + "loss": 40.7188, + "step": 1889 + }, + { + "epoch": 0.017890781041451708, + "grad_norm": 372.6358642578125, + "learning_rate": 1.1924290220820189e-06, + "loss": 36.3125, + "step": 1890 + }, + { + "epoch": 0.017900247063166765, + "grad_norm": 845.8232421875, + "learning_rate": 1.1930599369085172e-06, + "loss": 59.7344, + "step": 1891 + }, + { + "epoch": 0.017909713084881818, + "grad_norm": 242.6021270751953, + "learning_rate": 1.1936908517350157e-06, + "loss": 31.0469, + "step": 1892 + }, + { + "epoch": 0.01791917910659687, + "grad_norm": 433.31024169921875, + "learning_rate": 1.1943217665615141e-06, + "loss": 29.7031, + "step": 1893 + }, + { + "epoch": 0.017928645128311924, + "grad_norm": 452.2779846191406, + "learning_rate": 1.1949526813880126e-06, + "loss": 35.8438, + "step": 1894 + }, + { + "epoch": 0.017938111150026977, + "grad_norm": 250.09254455566406, + "learning_rate": 1.195583596214511e-06, + "loss": 32.0938, + "step": 1895 + }, + { + "epoch": 0.017947577171742033, + "grad_norm": 911.1326904296875, + "learning_rate": 1.1962145110410094e-06, + "loss": 46.5781, + "step": 1896 + }, + { + "epoch": 0.017957043193457086, + "grad_norm": 845.9611206054688, + "learning_rate": 1.196845425867508e-06, + "loss": 60.1875, + "step": 1897 + }, + { + "epoch": 0.01796650921517214, + "grad_norm": 321.9201965332031, + "learning_rate": 1.1974763406940062e-06, + "loss": 34.6406, + "step": 1898 + }, + { + "epoch": 0.017975975236887192, + "grad_norm": 3.446354389190674, + "learning_rate": 1.1981072555205047e-06, + "loss": 0.908, + "step": 1899 + }, + { + "epoch": 0.01798544125860225, + "grad_norm": 653.6058959960938, + "learning_rate": 1.1987381703470032e-06, + "loss": 78.6719, + "step": 1900 + }, + { + "epoch": 0.0179949072803173, + "grad_norm": 602.793212890625, + "learning_rate": 1.1993690851735015e-06, + "loss": 32.9531, + "step": 1901 + }, + { + "epoch": 0.018004373302032355, + "grad_norm": 805.3904418945312, + "learning_rate": 1.2e-06, + "loss": 59.2188, + "step": 1902 + }, + { + "epoch": 0.018013839323747408, + "grad_norm": 460.6910400390625, + "learning_rate": 1.2006309148264984e-06, + "loss": 34.8906, + "step": 1903 + }, + { + "epoch": 0.018023305345462464, + "grad_norm": 872.9583740234375, + "learning_rate": 1.201261829652997e-06, + "loss": 63.8438, + "step": 1904 + }, + { + "epoch": 0.018032771367177517, + "grad_norm": 467.94189453125, + "learning_rate": 1.2018927444794952e-06, + "loss": 46.8125, + "step": 1905 + }, + { + "epoch": 0.01804223738889257, + "grad_norm": 671.99658203125, + "learning_rate": 1.2025236593059935e-06, + "loss": 86.8438, + "step": 1906 + }, + { + "epoch": 0.018051703410607623, + "grad_norm": 564.1636352539062, + "learning_rate": 1.203154574132492e-06, + "loss": 38.6406, + "step": 1907 + }, + { + "epoch": 0.018061169432322676, + "grad_norm": 320.685302734375, + "learning_rate": 1.2037854889589905e-06, + "loss": 32.6875, + "step": 1908 + }, + { + "epoch": 0.018070635454037733, + "grad_norm": 428.6639709472656, + "learning_rate": 1.204416403785489e-06, + "loss": 31.5781, + "step": 1909 + }, + { + "epoch": 0.018080101475752786, + "grad_norm": 509.7602233886719, + "learning_rate": 1.2050473186119875e-06, + "loss": 53.8906, + "step": 1910 + }, + { + "epoch": 0.01808956749746784, + "grad_norm": 312.21990966796875, + "learning_rate": 1.2056782334384857e-06, + "loss": 36.0469, + "step": 1911 + }, + { + "epoch": 0.01809903351918289, + "grad_norm": 783.103759765625, + "learning_rate": 1.206309148264984e-06, + "loss": 52.7188, + "step": 1912 + }, + { + "epoch": 0.018108499540897948, + "grad_norm": 709.7259521484375, + "learning_rate": 1.2069400630914825e-06, + "loss": 44.2031, + "step": 1913 + }, + { + "epoch": 0.018117965562613, + "grad_norm": 358.9385986328125, + "learning_rate": 1.207570977917981e-06, + "loss": 33.5, + "step": 1914 + }, + { + "epoch": 0.018127431584328054, + "grad_norm": 329.25048828125, + "learning_rate": 1.2082018927444795e-06, + "loss": 33.5156, + "step": 1915 + }, + { + "epoch": 0.018136897606043107, + "grad_norm": 345.3670349121094, + "learning_rate": 1.208832807570978e-06, + "loss": 33.5156, + "step": 1916 + }, + { + "epoch": 0.018146363627758164, + "grad_norm": 1009.281494140625, + "learning_rate": 1.2094637223974763e-06, + "loss": 65.1562, + "step": 1917 + }, + { + "epoch": 0.018155829649473217, + "grad_norm": 556.6182250976562, + "learning_rate": 1.2100946372239746e-06, + "loss": 40.2344, + "step": 1918 + }, + { + "epoch": 0.01816529567118827, + "grad_norm": 4317.2724609375, + "learning_rate": 1.210725552050473e-06, + "loss": 67.0156, + "step": 1919 + }, + { + "epoch": 0.018174761692903323, + "grad_norm": 666.6458740234375, + "learning_rate": 1.2113564668769715e-06, + "loss": 63.6641, + "step": 1920 + }, + { + "epoch": 0.018184227714618376, + "grad_norm": 272.67718505859375, + "learning_rate": 1.21198738170347e-06, + "loss": 32.5, + "step": 1921 + }, + { + "epoch": 0.018193693736333432, + "grad_norm": 660.0942993164062, + "learning_rate": 1.2126182965299685e-06, + "loss": 65.5, + "step": 1922 + }, + { + "epoch": 0.018203159758048485, + "grad_norm": 1903.6678466796875, + "learning_rate": 1.2132492113564668e-06, + "loss": 67.75, + "step": 1923 + }, + { + "epoch": 0.018212625779763538, + "grad_norm": 3.1346993446350098, + "learning_rate": 1.2138801261829653e-06, + "loss": 0.9429, + "step": 1924 + }, + { + "epoch": 0.01822209180147859, + "grad_norm": 364.39508056640625, + "learning_rate": 1.2145110410094636e-06, + "loss": 39.0625, + "step": 1925 + }, + { + "epoch": 0.018231557823193648, + "grad_norm": 779.1327514648438, + "learning_rate": 1.215141955835962e-06, + "loss": 43.0156, + "step": 1926 + }, + { + "epoch": 0.0182410238449087, + "grad_norm": 587.0258178710938, + "learning_rate": 1.2157728706624606e-06, + "loss": 69.7188, + "step": 1927 + }, + { + "epoch": 0.018250489866623754, + "grad_norm": 781.679931640625, + "learning_rate": 1.2164037854889589e-06, + "loss": 94.2344, + "step": 1928 + }, + { + "epoch": 0.018259955888338807, + "grad_norm": 577.1917724609375, + "learning_rate": 1.2170347003154573e-06, + "loss": 38.25, + "step": 1929 + }, + { + "epoch": 0.018269421910053863, + "grad_norm": 418.002685546875, + "learning_rate": 1.2176656151419558e-06, + "loss": 41.9375, + "step": 1930 + }, + { + "epoch": 0.018278887931768916, + "grad_norm": 1406.249755859375, + "learning_rate": 1.2182965299684541e-06, + "loss": 79.6406, + "step": 1931 + }, + { + "epoch": 0.01828835395348397, + "grad_norm": 1071.46728515625, + "learning_rate": 1.2189274447949526e-06, + "loss": 77.7891, + "step": 1932 + }, + { + "epoch": 0.018297819975199022, + "grad_norm": 506.83966064453125, + "learning_rate": 1.219558359621451e-06, + "loss": 36.75, + "step": 1933 + }, + { + "epoch": 0.01830728599691408, + "grad_norm": 765.6309204101562, + "learning_rate": 1.2201892744479494e-06, + "loss": 39.4609, + "step": 1934 + }, + { + "epoch": 0.01831675201862913, + "grad_norm": 387.5756530761719, + "learning_rate": 1.2208201892744479e-06, + "loss": 31.5938, + "step": 1935 + }, + { + "epoch": 0.018326218040344185, + "grad_norm": 281.5417785644531, + "learning_rate": 1.2214511041009464e-06, + "loss": 30.8594, + "step": 1936 + }, + { + "epoch": 0.018335684062059238, + "grad_norm": 1204.9063720703125, + "learning_rate": 1.2220820189274449e-06, + "loss": 80.25, + "step": 1937 + }, + { + "epoch": 0.01834515008377429, + "grad_norm": 278.17864990234375, + "learning_rate": 1.2227129337539431e-06, + "loss": 29.8125, + "step": 1938 + }, + { + "epoch": 0.018354616105489347, + "grad_norm": 345.4440612792969, + "learning_rate": 1.2233438485804414e-06, + "loss": 30.625, + "step": 1939 + }, + { + "epoch": 0.0183640821272044, + "grad_norm": 266.744873046875, + "learning_rate": 1.22397476340694e-06, + "loss": 28.2344, + "step": 1940 + }, + { + "epoch": 0.018373548148919453, + "grad_norm": 601.2613525390625, + "learning_rate": 1.2246056782334384e-06, + "loss": 62.4219, + "step": 1941 + }, + { + "epoch": 0.018383014170634506, + "grad_norm": 526.4821166992188, + "learning_rate": 1.225236593059937e-06, + "loss": 39.6562, + "step": 1942 + }, + { + "epoch": 0.018392480192349563, + "grad_norm": 672.8414916992188, + "learning_rate": 1.2258675078864354e-06, + "loss": 74.375, + "step": 1943 + }, + { + "epoch": 0.018401946214064616, + "grad_norm": 503.8292541503906, + "learning_rate": 1.2264984227129339e-06, + "loss": 63.6094, + "step": 1944 + }, + { + "epoch": 0.01841141223577967, + "grad_norm": 1755.0703125, + "learning_rate": 1.227129337539432e-06, + "loss": 59.75, + "step": 1945 + }, + { + "epoch": 0.01842087825749472, + "grad_norm": 419.3904113769531, + "learning_rate": 1.2277602523659305e-06, + "loss": 50.0, + "step": 1946 + }, + { + "epoch": 0.018430344279209778, + "grad_norm": 1302.19482421875, + "learning_rate": 1.228391167192429e-06, + "loss": 86.9531, + "step": 1947 + }, + { + "epoch": 0.01843981030092483, + "grad_norm": 375.87249755859375, + "learning_rate": 1.2290220820189274e-06, + "loss": 31.25, + "step": 1948 + }, + { + "epoch": 0.018449276322639884, + "grad_norm": 717.37158203125, + "learning_rate": 1.229652996845426e-06, + "loss": 75.2031, + "step": 1949 + }, + { + "epoch": 0.018458742344354937, + "grad_norm": 670.8231811523438, + "learning_rate": 1.2302839116719242e-06, + "loss": 56.4219, + "step": 1950 + }, + { + "epoch": 0.01846820836606999, + "grad_norm": 458.2268371582031, + "learning_rate": 1.2309148264984225e-06, + "loss": 48.9844, + "step": 1951 + }, + { + "epoch": 0.018477674387785047, + "grad_norm": 363.7681579589844, + "learning_rate": 1.231545741324921e-06, + "loss": 33.4219, + "step": 1952 + }, + { + "epoch": 0.0184871404095001, + "grad_norm": 315.9241638183594, + "learning_rate": 1.2321766561514195e-06, + "loss": 30.2969, + "step": 1953 + }, + { + "epoch": 0.018496606431215153, + "grad_norm": 638.6665649414062, + "learning_rate": 1.232807570977918e-06, + "loss": 57.6094, + "step": 1954 + }, + { + "epoch": 0.018506072452930206, + "grad_norm": 443.9428405761719, + "learning_rate": 1.2334384858044165e-06, + "loss": 43.8125, + "step": 1955 + }, + { + "epoch": 0.018515538474645262, + "grad_norm": 497.1656799316406, + "learning_rate": 1.2340694006309147e-06, + "loss": 35.7656, + "step": 1956 + }, + { + "epoch": 0.018525004496360315, + "grad_norm": 3.553157091140747, + "learning_rate": 1.2347003154574132e-06, + "loss": 1.041, + "step": 1957 + }, + { + "epoch": 0.018534470518075368, + "grad_norm": 345.2137756347656, + "learning_rate": 1.2353312302839115e-06, + "loss": 32.6406, + "step": 1958 + }, + { + "epoch": 0.01854393653979042, + "grad_norm": 1053.6658935546875, + "learning_rate": 1.23596214511041e-06, + "loss": 41.1719, + "step": 1959 + }, + { + "epoch": 0.018553402561505478, + "grad_norm": 669.4173583984375, + "learning_rate": 1.2365930599369085e-06, + "loss": 63.0938, + "step": 1960 + }, + { + "epoch": 0.01856286858322053, + "grad_norm": 2.7527425289154053, + "learning_rate": 1.2372239747634068e-06, + "loss": 0.916, + "step": 1961 + }, + { + "epoch": 0.018572334604935584, + "grad_norm": 436.4435119628906, + "learning_rate": 1.2378548895899053e-06, + "loss": 46.9844, + "step": 1962 + }, + { + "epoch": 0.018581800626650637, + "grad_norm": 346.7664489746094, + "learning_rate": 1.2384858044164038e-06, + "loss": 29.4219, + "step": 1963 + }, + { + "epoch": 0.01859126664836569, + "grad_norm": 3.0576558113098145, + "learning_rate": 1.2391167192429023e-06, + "loss": 0.7871, + "step": 1964 + }, + { + "epoch": 0.018600732670080746, + "grad_norm": 570.0885620117188, + "learning_rate": 1.2397476340694005e-06, + "loss": 51.0469, + "step": 1965 + }, + { + "epoch": 0.0186101986917958, + "grad_norm": 348.0074768066406, + "learning_rate": 1.240378548895899e-06, + "loss": 30.625, + "step": 1966 + }, + { + "epoch": 0.018619664713510852, + "grad_norm": 967.4291381835938, + "learning_rate": 1.2410094637223973e-06, + "loss": 40.375, + "step": 1967 + }, + { + "epoch": 0.018629130735225905, + "grad_norm": 344.5946350097656, + "learning_rate": 1.2416403785488958e-06, + "loss": 31.7188, + "step": 1968 + }, + { + "epoch": 0.01863859675694096, + "grad_norm": 936.2931518554688, + "learning_rate": 1.2422712933753943e-06, + "loss": 78.375, + "step": 1969 + }, + { + "epoch": 0.018648062778656015, + "grad_norm": 601.0995483398438, + "learning_rate": 1.2429022082018928e-06, + "loss": 70.8281, + "step": 1970 + }, + { + "epoch": 0.018657528800371068, + "grad_norm": 1084.6976318359375, + "learning_rate": 1.2435331230283913e-06, + "loss": 88.9688, + "step": 1971 + }, + { + "epoch": 0.01866699482208612, + "grad_norm": 302.46209716796875, + "learning_rate": 1.2441640378548894e-06, + "loss": 28.7812, + "step": 1972 + }, + { + "epoch": 0.018676460843801177, + "grad_norm": 762.4901733398438, + "learning_rate": 1.2447949526813879e-06, + "loss": 84.2656, + "step": 1973 + }, + { + "epoch": 0.01868592686551623, + "grad_norm": 645.4723510742188, + "learning_rate": 1.2454258675078863e-06, + "loss": 33.1484, + "step": 1974 + }, + { + "epoch": 0.018695392887231283, + "grad_norm": 496.0794982910156, + "learning_rate": 1.2460567823343848e-06, + "loss": 72.7188, + "step": 1975 + }, + { + "epoch": 0.018704858908946336, + "grad_norm": 390.42578125, + "learning_rate": 1.2466876971608833e-06, + "loss": 46.6875, + "step": 1976 + }, + { + "epoch": 0.018714324930661393, + "grad_norm": 2.5872249603271484, + "learning_rate": 1.2473186119873818e-06, + "loss": 0.6982, + "step": 1977 + }, + { + "epoch": 0.018723790952376446, + "grad_norm": 2218.848388671875, + "learning_rate": 1.24794952681388e-06, + "loss": 36.3438, + "step": 1978 + }, + { + "epoch": 0.0187332569740915, + "grad_norm": 258.38397216796875, + "learning_rate": 1.2485804416403784e-06, + "loss": 33.4531, + "step": 1979 + }, + { + "epoch": 0.01874272299580655, + "grad_norm": 1076.1114501953125, + "learning_rate": 1.2492113564668769e-06, + "loss": 79.7344, + "step": 1980 + }, + { + "epoch": 0.018752189017521605, + "grad_norm": 622.9656372070312, + "learning_rate": 1.2498422712933754e-06, + "loss": 64.8438, + "step": 1981 + }, + { + "epoch": 0.01876165503923666, + "grad_norm": 512.4779663085938, + "learning_rate": 1.2504731861198739e-06, + "loss": 36.2812, + "step": 1982 + }, + { + "epoch": 0.018771121060951714, + "grad_norm": 349.68328857421875, + "learning_rate": 1.2511041009463721e-06, + "loss": 38.8906, + "step": 1983 + }, + { + "epoch": 0.018780587082666767, + "grad_norm": 289.9325866699219, + "learning_rate": 1.2517350157728706e-06, + "loss": 35.0625, + "step": 1984 + }, + { + "epoch": 0.01879005310438182, + "grad_norm": 563.7092895507812, + "learning_rate": 1.252365930599369e-06, + "loss": 84.5938, + "step": 1985 + }, + { + "epoch": 0.018799519126096877, + "grad_norm": 3.008485794067383, + "learning_rate": 1.2529968454258674e-06, + "loss": 0.9077, + "step": 1986 + }, + { + "epoch": 0.01880898514781193, + "grad_norm": 533.5364379882812, + "learning_rate": 1.253627760252366e-06, + "loss": 71.4688, + "step": 1987 + }, + { + "epoch": 0.018818451169526983, + "grad_norm": 719.5607299804688, + "learning_rate": 1.2542586750788644e-06, + "loss": 60.1875, + "step": 1988 + }, + { + "epoch": 0.018827917191242036, + "grad_norm": 369.4945983886719, + "learning_rate": 1.2548895899053627e-06, + "loss": 55.0312, + "step": 1989 + }, + { + "epoch": 0.018837383212957092, + "grad_norm": 475.2533264160156, + "learning_rate": 1.2555205047318612e-06, + "loss": 47.1406, + "step": 1990 + }, + { + "epoch": 0.018846849234672145, + "grad_norm": 375.1695556640625, + "learning_rate": 1.2561514195583597e-06, + "loss": 38.5938, + "step": 1991 + }, + { + "epoch": 0.018856315256387198, + "grad_norm": 370.98065185546875, + "learning_rate": 1.256782334384858e-06, + "loss": 46.6094, + "step": 1992 + }, + { + "epoch": 0.01886578127810225, + "grad_norm": 439.9637756347656, + "learning_rate": 1.2574132492113564e-06, + "loss": 38.75, + "step": 1993 + }, + { + "epoch": 0.018875247299817304, + "grad_norm": 314.25677490234375, + "learning_rate": 1.2580441640378547e-06, + "loss": 31.3125, + "step": 1994 + }, + { + "epoch": 0.01888471332153236, + "grad_norm": 540.0752563476562, + "learning_rate": 1.2586750788643532e-06, + "loss": 39.0156, + "step": 1995 + }, + { + "epoch": 0.018894179343247414, + "grad_norm": 419.053955078125, + "learning_rate": 1.2593059936908517e-06, + "loss": 30.2344, + "step": 1996 + }, + { + "epoch": 0.018903645364962467, + "grad_norm": 602.151611328125, + "learning_rate": 1.2599369085173502e-06, + "loss": 43.2812, + "step": 1997 + }, + { + "epoch": 0.01891311138667752, + "grad_norm": 604.1399536132812, + "learning_rate": 1.2605678233438487e-06, + "loss": 77.2812, + "step": 1998 + }, + { + "epoch": 0.018922577408392576, + "grad_norm": 523.5217895507812, + "learning_rate": 1.261198738170347e-06, + "loss": 36.3438, + "step": 1999 + }, + { + "epoch": 0.01893204343010763, + "grad_norm": 621.0302124023438, + "learning_rate": 1.2618296529968453e-06, + "loss": 38.1562, + "step": 2000 + }, + { + "epoch": 0.018941509451822682, + "grad_norm": 524.3231811523438, + "learning_rate": 1.2624605678233437e-06, + "loss": 48.4688, + "step": 2001 + }, + { + "epoch": 0.018950975473537735, + "grad_norm": 270.48089599609375, + "learning_rate": 1.2630914826498422e-06, + "loss": 27.9609, + "step": 2002 + }, + { + "epoch": 0.01896044149525279, + "grad_norm": 705.258056640625, + "learning_rate": 1.2637223974763407e-06, + "loss": 47.4531, + "step": 2003 + }, + { + "epoch": 0.018969907516967845, + "grad_norm": 608.187255859375, + "learning_rate": 1.2643533123028392e-06, + "loss": 67.0, + "step": 2004 + }, + { + "epoch": 0.018979373538682898, + "grad_norm": 969.8892822265625, + "learning_rate": 1.2649842271293373e-06, + "loss": 47.0703, + "step": 2005 + }, + { + "epoch": 0.01898883956039795, + "grad_norm": 553.6990966796875, + "learning_rate": 1.2656151419558358e-06, + "loss": 38.2266, + "step": 2006 + }, + { + "epoch": 0.018998305582113004, + "grad_norm": 369.3213806152344, + "learning_rate": 1.2662460567823343e-06, + "loss": 43.9688, + "step": 2007 + }, + { + "epoch": 0.01900777160382806, + "grad_norm": 301.6854553222656, + "learning_rate": 1.2668769716088328e-06, + "loss": 32.7344, + "step": 2008 + }, + { + "epoch": 0.019017237625543113, + "grad_norm": 811.435791015625, + "learning_rate": 1.2675078864353313e-06, + "loss": 73.0703, + "step": 2009 + }, + { + "epoch": 0.019026703647258166, + "grad_norm": 1138.5885009765625, + "learning_rate": 1.2681388012618298e-06, + "loss": 66.125, + "step": 2010 + }, + { + "epoch": 0.01903616966897322, + "grad_norm": 403.1347961425781, + "learning_rate": 1.268769716088328e-06, + "loss": 43.2812, + "step": 2011 + }, + { + "epoch": 0.019045635690688276, + "grad_norm": 510.0248107910156, + "learning_rate": 1.2694006309148263e-06, + "loss": 29.125, + "step": 2012 + }, + { + "epoch": 0.01905510171240333, + "grad_norm": 375.2066650390625, + "learning_rate": 1.2700315457413248e-06, + "loss": 34.6562, + "step": 2013 + }, + { + "epoch": 0.01906456773411838, + "grad_norm": 704.4481201171875, + "learning_rate": 1.2706624605678233e-06, + "loss": 64.375, + "step": 2014 + }, + { + "epoch": 0.019074033755833435, + "grad_norm": 265.7681579589844, + "learning_rate": 1.2712933753943218e-06, + "loss": 39.3906, + "step": 2015 + }, + { + "epoch": 0.01908349977754849, + "grad_norm": 303.23138427734375, + "learning_rate": 1.27192429022082e-06, + "loss": 33.2812, + "step": 2016 + }, + { + "epoch": 0.019092965799263544, + "grad_norm": 2.9379146099090576, + "learning_rate": 1.2725552050473186e-06, + "loss": 0.8718, + "step": 2017 + }, + { + "epoch": 0.019102431820978597, + "grad_norm": 578.0509033203125, + "learning_rate": 1.273186119873817e-06, + "loss": 54.25, + "step": 2018 + }, + { + "epoch": 0.01911189784269365, + "grad_norm": 487.6370544433594, + "learning_rate": 1.2738170347003153e-06, + "loss": 67.1562, + "step": 2019 + }, + { + "epoch": 0.019121363864408707, + "grad_norm": 558.8097534179688, + "learning_rate": 1.2744479495268138e-06, + "loss": 30.25, + "step": 2020 + }, + { + "epoch": 0.01913082988612376, + "grad_norm": 634.3668212890625, + "learning_rate": 1.2750788643533123e-06, + "loss": 70.9531, + "step": 2021 + }, + { + "epoch": 0.019140295907838813, + "grad_norm": 705.0823974609375, + "learning_rate": 1.2757097791798106e-06, + "loss": 71.8125, + "step": 2022 + }, + { + "epoch": 0.019149761929553866, + "grad_norm": 687.8134155273438, + "learning_rate": 1.2763406940063091e-06, + "loss": 38.7656, + "step": 2023 + }, + { + "epoch": 0.01915922795126892, + "grad_norm": 317.710205078125, + "learning_rate": 1.2769716088328076e-06, + "loss": 29.3594, + "step": 2024 + }, + { + "epoch": 0.019168693972983975, + "grad_norm": 684.2935180664062, + "learning_rate": 1.2776025236593059e-06, + "loss": 33.1719, + "step": 2025 + }, + { + "epoch": 0.019178159994699028, + "grad_norm": 2.823340654373169, + "learning_rate": 1.2782334384858044e-06, + "loss": 0.979, + "step": 2026 + }, + { + "epoch": 0.01918762601641408, + "grad_norm": 297.4220275878906, + "learning_rate": 1.2788643533123027e-06, + "loss": 28.3125, + "step": 2027 + }, + { + "epoch": 0.019197092038129134, + "grad_norm": 2.8904025554656982, + "learning_rate": 1.2794952681388011e-06, + "loss": 0.8682, + "step": 2028 + }, + { + "epoch": 0.01920655805984419, + "grad_norm": 517.52978515625, + "learning_rate": 1.2801261829652996e-06, + "loss": 31.2969, + "step": 2029 + }, + { + "epoch": 0.019216024081559244, + "grad_norm": 463.05462646484375, + "learning_rate": 1.2807570977917981e-06, + "loss": 27.75, + "step": 2030 + }, + { + "epoch": 0.019225490103274297, + "grad_norm": 482.45391845703125, + "learning_rate": 1.2813880126182966e-06, + "loss": 44.9375, + "step": 2031 + }, + { + "epoch": 0.01923495612498935, + "grad_norm": 298.6792907714844, + "learning_rate": 1.2820189274447947e-06, + "loss": 30.2656, + "step": 2032 + }, + { + "epoch": 0.019244422146704406, + "grad_norm": 701.9141845703125, + "learning_rate": 1.2826498422712932e-06, + "loss": 58.9297, + "step": 2033 + }, + { + "epoch": 0.01925388816841946, + "grad_norm": 1098.4503173828125, + "learning_rate": 1.2832807570977917e-06, + "loss": 53.4766, + "step": 2034 + }, + { + "epoch": 0.019263354190134512, + "grad_norm": 409.1884765625, + "learning_rate": 1.2839116719242902e-06, + "loss": 35.8906, + "step": 2035 + }, + { + "epoch": 0.019272820211849565, + "grad_norm": 361.0182800292969, + "learning_rate": 1.2845425867507887e-06, + "loss": 30.7969, + "step": 2036 + }, + { + "epoch": 0.019282286233564618, + "grad_norm": 337.2953186035156, + "learning_rate": 1.2851735015772872e-06, + "loss": 33.3125, + "step": 2037 + }, + { + "epoch": 0.019291752255279675, + "grad_norm": 2342.3994140625, + "learning_rate": 1.2858044164037854e-06, + "loss": 61.9531, + "step": 2038 + }, + { + "epoch": 0.019301218276994728, + "grad_norm": 318.1278381347656, + "learning_rate": 1.2864353312302837e-06, + "loss": 36.1562, + "step": 2039 + }, + { + "epoch": 0.01931068429870978, + "grad_norm": 870.2205810546875, + "learning_rate": 1.2870662460567822e-06, + "loss": 42.5, + "step": 2040 + }, + { + "epoch": 0.019320150320424834, + "grad_norm": 586.2507934570312, + "learning_rate": 1.2876971608832807e-06, + "loss": 32.8906, + "step": 2041 + }, + { + "epoch": 0.01932961634213989, + "grad_norm": 247.02252197265625, + "learning_rate": 1.2883280757097792e-06, + "loss": 32.7031, + "step": 2042 + }, + { + "epoch": 0.019339082363854943, + "grad_norm": 2.9542806148529053, + "learning_rate": 1.2889589905362775e-06, + "loss": 0.9224, + "step": 2043 + }, + { + "epoch": 0.019348548385569996, + "grad_norm": 876.9345092773438, + "learning_rate": 1.289589905362776e-06, + "loss": 84.6562, + "step": 2044 + }, + { + "epoch": 0.01935801440728505, + "grad_norm": 923.361328125, + "learning_rate": 1.2902208201892745e-06, + "loss": 79.6953, + "step": 2045 + }, + { + "epoch": 0.019367480429000106, + "grad_norm": 528.9297485351562, + "learning_rate": 1.2908517350157727e-06, + "loss": 37.0781, + "step": 2046 + }, + { + "epoch": 0.01937694645071516, + "grad_norm": 703.0487060546875, + "learning_rate": 1.2914826498422712e-06, + "loss": 79.1719, + "step": 2047 + }, + { + "epoch": 0.01938641247243021, + "grad_norm": 370.8716125488281, + "learning_rate": 1.2921135646687697e-06, + "loss": 29.7812, + "step": 2048 + }, + { + "epoch": 0.019395878494145265, + "grad_norm": 259.76593017578125, + "learning_rate": 1.292744479495268e-06, + "loss": 38.5312, + "step": 2049 + }, + { + "epoch": 0.019405344515860318, + "grad_norm": 342.0113220214844, + "learning_rate": 1.2933753943217665e-06, + "loss": 30.0625, + "step": 2050 + }, + { + "epoch": 0.019414810537575374, + "grad_norm": 2.956616163253784, + "learning_rate": 1.294006309148265e-06, + "loss": 0.8704, + "step": 2051 + }, + { + "epoch": 0.019424276559290427, + "grad_norm": 674.3938598632812, + "learning_rate": 1.2946372239747633e-06, + "loss": 69.9062, + "step": 2052 + }, + { + "epoch": 0.01943374258100548, + "grad_norm": 1767.0224609375, + "learning_rate": 1.2952681388012618e-06, + "loss": 57.0625, + "step": 2053 + }, + { + "epoch": 0.019443208602720533, + "grad_norm": 394.44293212890625, + "learning_rate": 1.29589905362776e-06, + "loss": 42.3281, + "step": 2054 + }, + { + "epoch": 0.01945267462443559, + "grad_norm": 346.6348571777344, + "learning_rate": 1.2965299684542585e-06, + "loss": 37.8594, + "step": 2055 + }, + { + "epoch": 0.019462140646150643, + "grad_norm": 321.0770568847656, + "learning_rate": 1.297160883280757e-06, + "loss": 47.8281, + "step": 2056 + }, + { + "epoch": 0.019471606667865696, + "grad_norm": 724.1366577148438, + "learning_rate": 1.2977917981072555e-06, + "loss": 37.0469, + "step": 2057 + }, + { + "epoch": 0.01948107268958075, + "grad_norm": 520.2060546875, + "learning_rate": 1.298422712933754e-06, + "loss": 42.9375, + "step": 2058 + }, + { + "epoch": 0.019490538711295805, + "grad_norm": 407.670654296875, + "learning_rate": 1.2990536277602523e-06, + "loss": 46.4219, + "step": 2059 + }, + { + "epoch": 0.019500004733010858, + "grad_norm": 841.496337890625, + "learning_rate": 1.2996845425867506e-06, + "loss": 55.125, + "step": 2060 + }, + { + "epoch": 0.01950947075472591, + "grad_norm": 421.41510009765625, + "learning_rate": 1.300315457413249e-06, + "loss": 23.1797, + "step": 2061 + }, + { + "epoch": 0.019518936776440964, + "grad_norm": 382.92901611328125, + "learning_rate": 1.3009463722397476e-06, + "loss": 33.1406, + "step": 2062 + }, + { + "epoch": 0.01952840279815602, + "grad_norm": 865.9940795898438, + "learning_rate": 1.301577287066246e-06, + "loss": 62.9531, + "step": 2063 + }, + { + "epoch": 0.019537868819871074, + "grad_norm": 492.7709655761719, + "learning_rate": 1.3022082018927446e-06, + "loss": 51.6562, + "step": 2064 + }, + { + "epoch": 0.019547334841586127, + "grad_norm": 913.2398681640625, + "learning_rate": 1.3028391167192428e-06, + "loss": 72.3906, + "step": 2065 + }, + { + "epoch": 0.01955680086330118, + "grad_norm": 724.1826171875, + "learning_rate": 1.3034700315457411e-06, + "loss": 27.125, + "step": 2066 + }, + { + "epoch": 0.019566266885016233, + "grad_norm": 254.63461303710938, + "learning_rate": 1.3041009463722396e-06, + "loss": 31.4688, + "step": 2067 + }, + { + "epoch": 0.01957573290673129, + "grad_norm": 308.5326232910156, + "learning_rate": 1.3047318611987381e-06, + "loss": 30.4531, + "step": 2068 + }, + { + "epoch": 0.019585198928446342, + "grad_norm": 720.0283203125, + "learning_rate": 1.3053627760252366e-06, + "loss": 69.3125, + "step": 2069 + }, + { + "epoch": 0.019594664950161395, + "grad_norm": 604.1643676757812, + "learning_rate": 1.305993690851735e-06, + "loss": 33.5938, + "step": 2070 + }, + { + "epoch": 0.019604130971876448, + "grad_norm": 477.911376953125, + "learning_rate": 1.3066246056782334e-06, + "loss": 34.9375, + "step": 2071 + }, + { + "epoch": 0.019613596993591505, + "grad_norm": 343.9098205566406, + "learning_rate": 1.3072555205047317e-06, + "loss": 32.7812, + "step": 2072 + }, + { + "epoch": 0.019623063015306558, + "grad_norm": 687.4589233398438, + "learning_rate": 1.3078864353312301e-06, + "loss": 52.1406, + "step": 2073 + }, + { + "epoch": 0.01963252903702161, + "grad_norm": 260.613525390625, + "learning_rate": 1.3085173501577286e-06, + "loss": 30.0312, + "step": 2074 + }, + { + "epoch": 0.019641995058736664, + "grad_norm": 503.62603759765625, + "learning_rate": 1.3091482649842271e-06, + "loss": 40.0, + "step": 2075 + }, + { + "epoch": 0.01965146108045172, + "grad_norm": 823.0601196289062, + "learning_rate": 1.3097791798107254e-06, + "loss": 41.0469, + "step": 2076 + }, + { + "epoch": 0.019660927102166773, + "grad_norm": 480.853759765625, + "learning_rate": 1.310410094637224e-06, + "loss": 29.375, + "step": 2077 + }, + { + "epoch": 0.019670393123881826, + "grad_norm": 431.7024841308594, + "learning_rate": 1.3110410094637224e-06, + "loss": 33.7812, + "step": 2078 + }, + { + "epoch": 0.01967985914559688, + "grad_norm": 3.1616618633270264, + "learning_rate": 1.3116719242902207e-06, + "loss": 0.9175, + "step": 2079 + }, + { + "epoch": 0.019689325167311932, + "grad_norm": 332.2991027832031, + "learning_rate": 1.3123028391167192e-06, + "loss": 32.875, + "step": 2080 + }, + { + "epoch": 0.01969879118902699, + "grad_norm": 1270.9605712890625, + "learning_rate": 1.3129337539432177e-06, + "loss": 54.9297, + "step": 2081 + }, + { + "epoch": 0.01970825721074204, + "grad_norm": 386.9015197753906, + "learning_rate": 1.313564668769716e-06, + "loss": 30.7656, + "step": 2082 + }, + { + "epoch": 0.019717723232457095, + "grad_norm": 713.4321899414062, + "learning_rate": 1.3141955835962144e-06, + "loss": 42.875, + "step": 2083 + }, + { + "epoch": 0.019727189254172148, + "grad_norm": 251.7477264404297, + "learning_rate": 1.314826498422713e-06, + "loss": 30.8438, + "step": 2084 + }, + { + "epoch": 0.019736655275887204, + "grad_norm": 373.73443603515625, + "learning_rate": 1.3154574132492114e-06, + "loss": 35.7031, + "step": 2085 + }, + { + "epoch": 0.019746121297602257, + "grad_norm": 648.2665405273438, + "learning_rate": 1.3160883280757097e-06, + "loss": 45.0703, + "step": 2086 + }, + { + "epoch": 0.01975558731931731, + "grad_norm": 364.82330322265625, + "learning_rate": 1.316719242902208e-06, + "loss": 34.2344, + "step": 2087 + }, + { + "epoch": 0.019765053341032363, + "grad_norm": 337.90667724609375, + "learning_rate": 1.3173501577287065e-06, + "loss": 30.3438, + "step": 2088 + }, + { + "epoch": 0.01977451936274742, + "grad_norm": 399.9173278808594, + "learning_rate": 1.317981072555205e-06, + "loss": 29.4062, + "step": 2089 + }, + { + "epoch": 0.019783985384462473, + "grad_norm": 295.9457702636719, + "learning_rate": 1.3186119873817035e-06, + "loss": 36.1406, + "step": 2090 + }, + { + "epoch": 0.019793451406177526, + "grad_norm": 525.6752319335938, + "learning_rate": 1.319242902208202e-06, + "loss": 71.2812, + "step": 2091 + }, + { + "epoch": 0.01980291742789258, + "grad_norm": 3.1102352142333984, + "learning_rate": 1.3198738170347005e-06, + "loss": 0.7585, + "step": 2092 + }, + { + "epoch": 0.019812383449607632, + "grad_norm": 284.2369689941406, + "learning_rate": 1.3205047318611985e-06, + "loss": 34.8125, + "step": 2093 + }, + { + "epoch": 0.019821849471322688, + "grad_norm": 763.8524780273438, + "learning_rate": 1.321135646687697e-06, + "loss": 86.2812, + "step": 2094 + }, + { + "epoch": 0.01983131549303774, + "grad_norm": 294.59027099609375, + "learning_rate": 1.3217665615141955e-06, + "loss": 29.0469, + "step": 2095 + }, + { + "epoch": 0.019840781514752794, + "grad_norm": 751.1774291992188, + "learning_rate": 1.322397476340694e-06, + "loss": 59.375, + "step": 2096 + }, + { + "epoch": 0.019850247536467847, + "grad_norm": 639.15234375, + "learning_rate": 1.3230283911671925e-06, + "loss": 65.0312, + "step": 2097 + }, + { + "epoch": 0.019859713558182904, + "grad_norm": 343.3891906738281, + "learning_rate": 1.3236593059936908e-06, + "loss": 27.25, + "step": 2098 + }, + { + "epoch": 0.019869179579897957, + "grad_norm": 377.7643127441406, + "learning_rate": 1.324290220820189e-06, + "loss": 35.3438, + "step": 2099 + }, + { + "epoch": 0.01987864560161301, + "grad_norm": 771.504150390625, + "learning_rate": 1.3249211356466875e-06, + "loss": 35.2812, + "step": 2100 + }, + { + "epoch": 0.019888111623328063, + "grad_norm": 579.07421875, + "learning_rate": 1.325552050473186e-06, + "loss": 78.5625, + "step": 2101 + }, + { + "epoch": 0.01989757764504312, + "grad_norm": 352.45166015625, + "learning_rate": 1.3261829652996845e-06, + "loss": 34.25, + "step": 2102 + }, + { + "epoch": 0.019907043666758172, + "grad_norm": 680.9039916992188, + "learning_rate": 1.326813880126183e-06, + "loss": 40.9688, + "step": 2103 + }, + { + "epoch": 0.019916509688473225, + "grad_norm": 724.0121459960938, + "learning_rate": 1.3274447949526813e-06, + "loss": 62.125, + "step": 2104 + }, + { + "epoch": 0.019925975710188278, + "grad_norm": 567.20703125, + "learning_rate": 1.3280757097791798e-06, + "loss": 69.0625, + "step": 2105 + }, + { + "epoch": 0.01993544173190333, + "grad_norm": 614.3329467773438, + "learning_rate": 1.328706624605678e-06, + "loss": 34.3906, + "step": 2106 + }, + { + "epoch": 0.019944907753618388, + "grad_norm": 2.9560792446136475, + "learning_rate": 1.3293375394321766e-06, + "loss": 1.0537, + "step": 2107 + }, + { + "epoch": 0.01995437377533344, + "grad_norm": 612.74169921875, + "learning_rate": 1.329968454258675e-06, + "loss": 38.1094, + "step": 2108 + }, + { + "epoch": 0.019963839797048494, + "grad_norm": 583.07421875, + "learning_rate": 1.3305993690851733e-06, + "loss": 66.7812, + "step": 2109 + }, + { + "epoch": 0.019973305818763547, + "grad_norm": 772.7844848632812, + "learning_rate": 1.3312302839116718e-06, + "loss": 61.4062, + "step": 2110 + }, + { + "epoch": 0.019982771840478603, + "grad_norm": 456.80267333984375, + "learning_rate": 1.3318611987381703e-06, + "loss": 37.0938, + "step": 2111 + }, + { + "epoch": 0.019992237862193656, + "grad_norm": 1307.0657958984375, + "learning_rate": 1.3324921135646688e-06, + "loss": 50.8516, + "step": 2112 + }, + { + "epoch": 0.02000170388390871, + "grad_norm": 536.4132080078125, + "learning_rate": 1.3331230283911671e-06, + "loss": 74.4062, + "step": 2113 + }, + { + "epoch": 0.020011169905623762, + "grad_norm": 307.3956298828125, + "learning_rate": 1.3337539432176656e-06, + "loss": 37.6406, + "step": 2114 + }, + { + "epoch": 0.02002063592733882, + "grad_norm": 713.4292602539062, + "learning_rate": 1.3343848580441639e-06, + "loss": 35.4531, + "step": 2115 + }, + { + "epoch": 0.020030101949053872, + "grad_norm": 543.1886596679688, + "learning_rate": 1.3350157728706624e-06, + "loss": 35.4062, + "step": 2116 + }, + { + "epoch": 0.020039567970768925, + "grad_norm": 2.9747438430786133, + "learning_rate": 1.3356466876971609e-06, + "loss": 0.9966, + "step": 2117 + }, + { + "epoch": 0.020049033992483978, + "grad_norm": 1229.5357666015625, + "learning_rate": 1.3362776025236594e-06, + "loss": 55.6875, + "step": 2118 + }, + { + "epoch": 0.020058500014199034, + "grad_norm": 336.291015625, + "learning_rate": 1.3369085173501579e-06, + "loss": 29.25, + "step": 2119 + }, + { + "epoch": 0.020067966035914087, + "grad_norm": 234.57203674316406, + "learning_rate": 1.337539432176656e-06, + "loss": 34.5312, + "step": 2120 + }, + { + "epoch": 0.02007743205762914, + "grad_norm": 884.5182495117188, + "learning_rate": 1.3381703470031544e-06, + "loss": 32.4219, + "step": 2121 + }, + { + "epoch": 0.020086898079344193, + "grad_norm": 730.8841552734375, + "learning_rate": 1.338801261829653e-06, + "loss": 47.8516, + "step": 2122 + }, + { + "epoch": 0.020096364101059246, + "grad_norm": 339.9352111816406, + "learning_rate": 1.3394321766561514e-06, + "loss": 33.875, + "step": 2123 + }, + { + "epoch": 0.020105830122774303, + "grad_norm": 692.74755859375, + "learning_rate": 1.34006309148265e-06, + "loss": 49.875, + "step": 2124 + }, + { + "epoch": 0.020115296144489356, + "grad_norm": 1087.572021484375, + "learning_rate": 1.3406940063091484e-06, + "loss": 66.5312, + "step": 2125 + }, + { + "epoch": 0.02012476216620441, + "grad_norm": 383.54071044921875, + "learning_rate": 1.3413249211356465e-06, + "loss": 28.7812, + "step": 2126 + }, + { + "epoch": 0.020134228187919462, + "grad_norm": 545.7343139648438, + "learning_rate": 1.341955835962145e-06, + "loss": 37.75, + "step": 2127 + }, + { + "epoch": 0.020143694209634518, + "grad_norm": 270.11297607421875, + "learning_rate": 1.3425867507886434e-06, + "loss": 33.7969, + "step": 2128 + }, + { + "epoch": 0.02015316023134957, + "grad_norm": 874.2034912109375, + "learning_rate": 1.343217665615142e-06, + "loss": 48.75, + "step": 2129 + }, + { + "epoch": 0.020162626253064624, + "grad_norm": 395.3534240722656, + "learning_rate": 1.3438485804416404e-06, + "loss": 28.9062, + "step": 2130 + }, + { + "epoch": 0.020172092274779677, + "grad_norm": 419.6441650390625, + "learning_rate": 1.3444794952681387e-06, + "loss": 35.0938, + "step": 2131 + }, + { + "epoch": 0.020181558296494734, + "grad_norm": 3.747548818588257, + "learning_rate": 1.3451104100946372e-06, + "loss": 0.9048, + "step": 2132 + }, + { + "epoch": 0.020191024318209787, + "grad_norm": 478.88494873046875, + "learning_rate": 1.3457413249211355e-06, + "loss": 31.8906, + "step": 2133 + }, + { + "epoch": 0.02020049033992484, + "grad_norm": 367.72052001953125, + "learning_rate": 1.346372239747634e-06, + "loss": 35.2812, + "step": 2134 + }, + { + "epoch": 0.020209956361639893, + "grad_norm": 606.7471313476562, + "learning_rate": 1.3470031545741325e-06, + "loss": 42.5781, + "step": 2135 + }, + { + "epoch": 0.020219422383354946, + "grad_norm": 289.1445007324219, + "learning_rate": 1.347634069400631e-06, + "loss": 33.7969, + "step": 2136 + }, + { + "epoch": 0.020228888405070002, + "grad_norm": 3.322514295578003, + "learning_rate": 1.3482649842271292e-06, + "loss": 0.8223, + "step": 2137 + }, + { + "epoch": 0.020238354426785055, + "grad_norm": 846.284423828125, + "learning_rate": 1.3488958990536277e-06, + "loss": 49.125, + "step": 2138 + }, + { + "epoch": 0.02024782044850011, + "grad_norm": 511.39361572265625, + "learning_rate": 1.3495268138801262e-06, + "loss": 53.6875, + "step": 2139 + }, + { + "epoch": 0.02025728647021516, + "grad_norm": 3.100926637649536, + "learning_rate": 1.3501577287066245e-06, + "loss": 0.853, + "step": 2140 + }, + { + "epoch": 0.020266752491930218, + "grad_norm": 349.5829772949219, + "learning_rate": 1.350788643533123e-06, + "loss": 43.5, + "step": 2141 + }, + { + "epoch": 0.02027621851364527, + "grad_norm": 1338.6728515625, + "learning_rate": 1.3514195583596213e-06, + "loss": 43.8594, + "step": 2142 + }, + { + "epoch": 0.020285684535360324, + "grad_norm": 220.35922241210938, + "learning_rate": 1.3520504731861198e-06, + "loss": 26.6719, + "step": 2143 + }, + { + "epoch": 0.020295150557075377, + "grad_norm": 301.002685546875, + "learning_rate": 1.3526813880126183e-06, + "loss": 31.7969, + "step": 2144 + }, + { + "epoch": 0.020304616578790433, + "grad_norm": 475.5758361816406, + "learning_rate": 1.3533123028391168e-06, + "loss": 30.125, + "step": 2145 + }, + { + "epoch": 0.020314082600505486, + "grad_norm": 366.2695007324219, + "learning_rate": 1.353943217665615e-06, + "loss": 35.7031, + "step": 2146 + }, + { + "epoch": 0.02032354862222054, + "grad_norm": 324.18475341796875, + "learning_rate": 1.3545741324921135e-06, + "loss": 29.3125, + "step": 2147 + }, + { + "epoch": 0.020333014643935592, + "grad_norm": 279.09429931640625, + "learning_rate": 1.3552050473186118e-06, + "loss": 31.5469, + "step": 2148 + }, + { + "epoch": 0.020342480665650645, + "grad_norm": 1049.1007080078125, + "learning_rate": 1.3558359621451103e-06, + "loss": 62.0234, + "step": 2149 + }, + { + "epoch": 0.020351946687365702, + "grad_norm": 303.7206726074219, + "learning_rate": 1.3564668769716088e-06, + "loss": 31.8594, + "step": 2150 + }, + { + "epoch": 0.020361412709080755, + "grad_norm": 383.5340881347656, + "learning_rate": 1.3570977917981073e-06, + "loss": 31.5781, + "step": 2151 + }, + { + "epoch": 0.020370878730795808, + "grad_norm": 849.4844970703125, + "learning_rate": 1.3577287066246058e-06, + "loss": 53.1562, + "step": 2152 + }, + { + "epoch": 0.02038034475251086, + "grad_norm": 201.6598663330078, + "learning_rate": 1.3583596214511039e-06, + "loss": 25.8125, + "step": 2153 + }, + { + "epoch": 0.020389810774225917, + "grad_norm": 487.2587890625, + "learning_rate": 1.3589905362776023e-06, + "loss": 32.6406, + "step": 2154 + }, + { + "epoch": 0.02039927679594097, + "grad_norm": 1636.7860107421875, + "learning_rate": 1.3596214511041008e-06, + "loss": 70.0156, + "step": 2155 + }, + { + "epoch": 0.020408742817656023, + "grad_norm": 489.4699401855469, + "learning_rate": 1.3602523659305993e-06, + "loss": 62.8906, + "step": 2156 + }, + { + "epoch": 0.020418208839371076, + "grad_norm": 499.9373474121094, + "learning_rate": 1.3608832807570978e-06, + "loss": 33.7656, + "step": 2157 + }, + { + "epoch": 0.020427674861086133, + "grad_norm": 3.4868760108947754, + "learning_rate": 1.3615141955835963e-06, + "loss": 1.0005, + "step": 2158 + }, + { + "epoch": 0.020437140882801186, + "grad_norm": 945.8406982421875, + "learning_rate": 1.3621451104100946e-06, + "loss": 68.0625, + "step": 2159 + }, + { + "epoch": 0.02044660690451624, + "grad_norm": 349.6187438964844, + "learning_rate": 1.3627760252365929e-06, + "loss": 30.1719, + "step": 2160 + }, + { + "epoch": 0.020456072926231292, + "grad_norm": 459.9718017578125, + "learning_rate": 1.3634069400630914e-06, + "loss": 31.1328, + "step": 2161 + }, + { + "epoch": 0.02046553894794635, + "grad_norm": 713.6366577148438, + "learning_rate": 1.3640378548895899e-06, + "loss": 33.2031, + "step": 2162 + }, + { + "epoch": 0.0204750049696614, + "grad_norm": 1604.6351318359375, + "learning_rate": 1.3646687697160884e-06, + "loss": 95.4062, + "step": 2163 + }, + { + "epoch": 0.020484470991376454, + "grad_norm": 930.7652587890625, + "learning_rate": 1.3652996845425866e-06, + "loss": 52.6094, + "step": 2164 + }, + { + "epoch": 0.020493937013091507, + "grad_norm": 540.1885986328125, + "learning_rate": 1.3659305993690851e-06, + "loss": 37.5625, + "step": 2165 + }, + { + "epoch": 0.02050340303480656, + "grad_norm": 1086.6595458984375, + "learning_rate": 1.3665615141955834e-06, + "loss": 44.8672, + "step": 2166 + }, + { + "epoch": 0.020512869056521617, + "grad_norm": 662.2283325195312, + "learning_rate": 1.367192429022082e-06, + "loss": 52.4062, + "step": 2167 + }, + { + "epoch": 0.02052233507823667, + "grad_norm": 1089.257568359375, + "learning_rate": 1.3678233438485804e-06, + "loss": 49.6953, + "step": 2168 + }, + { + "epoch": 0.020531801099951723, + "grad_norm": 296.2262268066406, + "learning_rate": 1.368454258675079e-06, + "loss": 29.0156, + "step": 2169 + }, + { + "epoch": 0.020541267121666776, + "grad_norm": 475.3573303222656, + "learning_rate": 1.3690851735015772e-06, + "loss": 38.875, + "step": 2170 + }, + { + "epoch": 0.020550733143381832, + "grad_norm": 562.4888916015625, + "learning_rate": 1.3697160883280757e-06, + "loss": 51.1406, + "step": 2171 + }, + { + "epoch": 0.020560199165096885, + "grad_norm": 613.7973022460938, + "learning_rate": 1.3703470031545742e-06, + "loss": 48.1172, + "step": 2172 + }, + { + "epoch": 0.02056966518681194, + "grad_norm": 515.345947265625, + "learning_rate": 1.3709779179810724e-06, + "loss": 70.4219, + "step": 2173 + }, + { + "epoch": 0.02057913120852699, + "grad_norm": 290.5258483886719, + "learning_rate": 1.371608832807571e-06, + "loss": 30.5938, + "step": 2174 + }, + { + "epoch": 0.020588597230242048, + "grad_norm": 539.18115234375, + "learning_rate": 1.3722397476340692e-06, + "loss": 51.375, + "step": 2175 + }, + { + "epoch": 0.0205980632519571, + "grad_norm": 3.3848085403442383, + "learning_rate": 1.3728706624605677e-06, + "loss": 0.708, + "step": 2176 + }, + { + "epoch": 0.020607529273672154, + "grad_norm": 299.11480712890625, + "learning_rate": 1.3735015772870662e-06, + "loss": 33.375, + "step": 2177 + }, + { + "epoch": 0.020616995295387207, + "grad_norm": 365.3611145019531, + "learning_rate": 1.3741324921135647e-06, + "loss": 27.7188, + "step": 2178 + }, + { + "epoch": 0.02062646131710226, + "grad_norm": 348.3011169433594, + "learning_rate": 1.3747634069400632e-06, + "loss": 30.1641, + "step": 2179 + }, + { + "epoch": 0.020635927338817316, + "grad_norm": 391.5630798339844, + "learning_rate": 1.3753943217665615e-06, + "loss": 35.8281, + "step": 2180 + }, + { + "epoch": 0.02064539336053237, + "grad_norm": 509.5236511230469, + "learning_rate": 1.3760252365930598e-06, + "loss": 40.4062, + "step": 2181 + }, + { + "epoch": 0.020654859382247422, + "grad_norm": 558.9053955078125, + "learning_rate": 1.3766561514195582e-06, + "loss": 63.1406, + "step": 2182 + }, + { + "epoch": 0.020664325403962475, + "grad_norm": 749.6072998046875, + "learning_rate": 1.3772870662460567e-06, + "loss": 80.0, + "step": 2183 + }, + { + "epoch": 0.020673791425677532, + "grad_norm": 619.44287109375, + "learning_rate": 1.3779179810725552e-06, + "loss": 56.6719, + "step": 2184 + }, + { + "epoch": 0.020683257447392585, + "grad_norm": 620.7703247070312, + "learning_rate": 1.3785488958990537e-06, + "loss": 36.9219, + "step": 2185 + }, + { + "epoch": 0.020692723469107638, + "grad_norm": 729.588134765625, + "learning_rate": 1.3791798107255518e-06, + "loss": 76.375, + "step": 2186 + }, + { + "epoch": 0.02070218949082269, + "grad_norm": 761.2530517578125, + "learning_rate": 1.3798107255520503e-06, + "loss": 91.5625, + "step": 2187 + }, + { + "epoch": 0.020711655512537747, + "grad_norm": 249.72628784179688, + "learning_rate": 1.3804416403785488e-06, + "loss": 30.0, + "step": 2188 + }, + { + "epoch": 0.0207211215342528, + "grad_norm": 1183.1383056640625, + "learning_rate": 1.3810725552050473e-06, + "loss": 50.9766, + "step": 2189 + }, + { + "epoch": 0.020730587555967853, + "grad_norm": 448.05950927734375, + "learning_rate": 1.3817034700315458e-06, + "loss": 37.0625, + "step": 2190 + }, + { + "epoch": 0.020740053577682906, + "grad_norm": 640.8497314453125, + "learning_rate": 1.3823343848580443e-06, + "loss": 78.9375, + "step": 2191 + }, + { + "epoch": 0.02074951959939796, + "grad_norm": 282.38458251953125, + "learning_rate": 1.3829652996845425e-06, + "loss": 27.5938, + "step": 2192 + }, + { + "epoch": 0.020758985621113016, + "grad_norm": 824.2235717773438, + "learning_rate": 1.3835962145110408e-06, + "loss": 72.0625, + "step": 2193 + }, + { + "epoch": 0.02076845164282807, + "grad_norm": 846.9154052734375, + "learning_rate": 1.3842271293375393e-06, + "loss": 78.6562, + "step": 2194 + }, + { + "epoch": 0.020777917664543122, + "grad_norm": 758.4829711914062, + "learning_rate": 1.3848580441640378e-06, + "loss": 78.0781, + "step": 2195 + }, + { + "epoch": 0.020787383686258175, + "grad_norm": 680.2816772460938, + "learning_rate": 1.3854889589905363e-06, + "loss": 42.4688, + "step": 2196 + }, + { + "epoch": 0.02079684970797323, + "grad_norm": 831.1616821289062, + "learning_rate": 1.3861198738170346e-06, + "loss": 33.6562, + "step": 2197 + }, + { + "epoch": 0.020806315729688284, + "grad_norm": 780.0845336914062, + "learning_rate": 1.386750788643533e-06, + "loss": 33.25, + "step": 2198 + }, + { + "epoch": 0.020815781751403337, + "grad_norm": 2.9668502807617188, + "learning_rate": 1.3873817034700316e-06, + "loss": 0.7876, + "step": 2199 + }, + { + "epoch": 0.02082524777311839, + "grad_norm": 375.05340576171875, + "learning_rate": 1.3880126182965298e-06, + "loss": 40.5312, + "step": 2200 + }, + { + "epoch": 0.020834713794833447, + "grad_norm": 515.5603637695312, + "learning_rate": 1.3886435331230283e-06, + "loss": 46.2812, + "step": 2201 + }, + { + "epoch": 0.0208441798165485, + "grad_norm": 375.2019958496094, + "learning_rate": 1.3892744479495266e-06, + "loss": 41.9062, + "step": 2202 + }, + { + "epoch": 0.020853645838263553, + "grad_norm": 315.7974548339844, + "learning_rate": 1.3899053627760251e-06, + "loss": 29.2031, + "step": 2203 + }, + { + "epoch": 0.020863111859978606, + "grad_norm": 1154.222900390625, + "learning_rate": 1.3905362776025236e-06, + "loss": 35.8594, + "step": 2204 + }, + { + "epoch": 0.020872577881693662, + "grad_norm": 321.7087097167969, + "learning_rate": 1.391167192429022e-06, + "loss": 31.5938, + "step": 2205 + }, + { + "epoch": 0.020882043903408715, + "grad_norm": 1726.2049560546875, + "learning_rate": 1.3917981072555206e-06, + "loss": 61.75, + "step": 2206 + }, + { + "epoch": 0.02089150992512377, + "grad_norm": 326.9066467285156, + "learning_rate": 1.3924290220820189e-06, + "loss": 29.7188, + "step": 2207 + }, + { + "epoch": 0.02090097594683882, + "grad_norm": 411.2711486816406, + "learning_rate": 1.3930599369085172e-06, + "loss": 41.2344, + "step": 2208 + }, + { + "epoch": 0.020910441968553874, + "grad_norm": 521.641357421875, + "learning_rate": 1.3936908517350156e-06, + "loss": 64.8438, + "step": 2209 + }, + { + "epoch": 0.02091990799026893, + "grad_norm": 1179.251708984375, + "learning_rate": 1.3943217665615141e-06, + "loss": 86.3125, + "step": 2210 + }, + { + "epoch": 0.020929374011983984, + "grad_norm": 493.3608093261719, + "learning_rate": 1.3949526813880126e-06, + "loss": 55.8906, + "step": 2211 + }, + { + "epoch": 0.020938840033699037, + "grad_norm": 267.9371643066406, + "learning_rate": 1.3955835962145111e-06, + "loss": 32.8594, + "step": 2212 + }, + { + "epoch": 0.02094830605541409, + "grad_norm": 585.3629760742188, + "learning_rate": 1.3962145110410092e-06, + "loss": 33.3516, + "step": 2213 + }, + { + "epoch": 0.020957772077129146, + "grad_norm": 755.7814331054688, + "learning_rate": 1.3968454258675077e-06, + "loss": 70.6875, + "step": 2214 + }, + { + "epoch": 0.0209672380988442, + "grad_norm": 326.96282958984375, + "learning_rate": 1.3974763406940062e-06, + "loss": 33.6562, + "step": 2215 + }, + { + "epoch": 0.020976704120559252, + "grad_norm": 1201.5894775390625, + "learning_rate": 1.3981072555205047e-06, + "loss": 52.5312, + "step": 2216 + }, + { + "epoch": 0.020986170142274305, + "grad_norm": 757.2534790039062, + "learning_rate": 1.3987381703470032e-06, + "loss": 81.5234, + "step": 2217 + }, + { + "epoch": 0.020995636163989362, + "grad_norm": 1139.031982421875, + "learning_rate": 1.3993690851735017e-06, + "loss": 65.8594, + "step": 2218 + }, + { + "epoch": 0.021005102185704415, + "grad_norm": 486.8087158203125, + "learning_rate": 1.4e-06, + "loss": 31.1016, + "step": 2219 + }, + { + "epoch": 0.021014568207419468, + "grad_norm": 552.7048950195312, + "learning_rate": 1.4006309148264982e-06, + "loss": 67.75, + "step": 2220 + }, + { + "epoch": 0.02102403422913452, + "grad_norm": 458.4835205078125, + "learning_rate": 1.4012618296529967e-06, + "loss": 39.4375, + "step": 2221 + }, + { + "epoch": 0.021033500250849574, + "grad_norm": 206.02810668945312, + "learning_rate": 1.4018927444794952e-06, + "loss": 29.2031, + "step": 2222 + }, + { + "epoch": 0.02104296627256463, + "grad_norm": 266.6800842285156, + "learning_rate": 1.4025236593059937e-06, + "loss": 28.9062, + "step": 2223 + }, + { + "epoch": 0.021052432294279683, + "grad_norm": 638.2230834960938, + "learning_rate": 1.403154574132492e-06, + "loss": 27.6562, + "step": 2224 + }, + { + "epoch": 0.021061898315994736, + "grad_norm": 515.5881958007812, + "learning_rate": 1.4037854889589905e-06, + "loss": 69.5312, + "step": 2225 + }, + { + "epoch": 0.02107136433770979, + "grad_norm": 897.3941040039062, + "learning_rate": 1.404416403785489e-06, + "loss": 32.9219, + "step": 2226 + }, + { + "epoch": 0.021080830359424846, + "grad_norm": 254.4386444091797, + "learning_rate": 1.4050473186119872e-06, + "loss": 37.2031, + "step": 2227 + }, + { + "epoch": 0.0210902963811399, + "grad_norm": 308.8963317871094, + "learning_rate": 1.4056782334384857e-06, + "loss": 32.5469, + "step": 2228 + }, + { + "epoch": 0.021099762402854952, + "grad_norm": 591.424072265625, + "learning_rate": 1.4063091482649842e-06, + "loss": 83.625, + "step": 2229 + }, + { + "epoch": 0.021109228424570005, + "grad_norm": 252.09744262695312, + "learning_rate": 1.4069400630914825e-06, + "loss": 29.3281, + "step": 2230 + }, + { + "epoch": 0.02111869444628506, + "grad_norm": 255.0266571044922, + "learning_rate": 1.407570977917981e-06, + "loss": 32.125, + "step": 2231 + }, + { + "epoch": 0.021128160468000114, + "grad_norm": 748.6367797851562, + "learning_rate": 1.4082018927444795e-06, + "loss": 73.5, + "step": 2232 + }, + { + "epoch": 0.021137626489715167, + "grad_norm": 417.3565368652344, + "learning_rate": 1.408832807570978e-06, + "loss": 38.5, + "step": 2233 + }, + { + "epoch": 0.02114709251143022, + "grad_norm": 711.2984619140625, + "learning_rate": 1.4094637223974763e-06, + "loss": 37.375, + "step": 2234 + }, + { + "epoch": 0.021156558533145273, + "grad_norm": 494.2431640625, + "learning_rate": 1.4100946372239746e-06, + "loss": 51.6719, + "step": 2235 + }, + { + "epoch": 0.02116602455486033, + "grad_norm": 476.32330322265625, + "learning_rate": 1.410725552050473e-06, + "loss": 68.3438, + "step": 2236 + }, + { + "epoch": 0.021175490576575383, + "grad_norm": 387.900390625, + "learning_rate": 1.4113564668769715e-06, + "loss": 62.5, + "step": 2237 + }, + { + "epoch": 0.021184956598290436, + "grad_norm": 3.5105319023132324, + "learning_rate": 1.41198738170347e-06, + "loss": 0.9607, + "step": 2238 + }, + { + "epoch": 0.02119442262000549, + "grad_norm": 401.2019958496094, + "learning_rate": 1.4126182965299685e-06, + "loss": 24.9688, + "step": 2239 + }, + { + "epoch": 0.021203888641720545, + "grad_norm": 440.0290222167969, + "learning_rate": 1.4132492113564668e-06, + "loss": 35.5781, + "step": 2240 + }, + { + "epoch": 0.0212133546634356, + "grad_norm": 439.07598876953125, + "learning_rate": 1.413880126182965e-06, + "loss": 29.9688, + "step": 2241 + }, + { + "epoch": 0.02122282068515065, + "grad_norm": 866.3509521484375, + "learning_rate": 1.4145110410094636e-06, + "loss": 37.7109, + "step": 2242 + }, + { + "epoch": 0.021232286706865704, + "grad_norm": 584.0252075195312, + "learning_rate": 1.415141955835962e-06, + "loss": 30.5781, + "step": 2243 + }, + { + "epoch": 0.02124175272858076, + "grad_norm": 909.0205078125, + "learning_rate": 1.4157728706624606e-06, + "loss": 36.9922, + "step": 2244 + }, + { + "epoch": 0.021251218750295814, + "grad_norm": 1023.5075073242188, + "learning_rate": 1.416403785488959e-06, + "loss": 71.7344, + "step": 2245 + }, + { + "epoch": 0.021260684772010867, + "grad_norm": 956.8889770507812, + "learning_rate": 1.4170347003154573e-06, + "loss": 34.2344, + "step": 2246 + }, + { + "epoch": 0.02127015079372592, + "grad_norm": 444.9805603027344, + "learning_rate": 1.4176656151419556e-06, + "loss": 33.1875, + "step": 2247 + }, + { + "epoch": 0.021279616815440973, + "grad_norm": 876.4585571289062, + "learning_rate": 1.4182965299684541e-06, + "loss": 58.3125, + "step": 2248 + }, + { + "epoch": 0.02128908283715603, + "grad_norm": 602.4058837890625, + "learning_rate": 1.4189274447949526e-06, + "loss": 71.5938, + "step": 2249 + }, + { + "epoch": 0.021298548858871082, + "grad_norm": 973.3199462890625, + "learning_rate": 1.419558359621451e-06, + "loss": 49.2812, + "step": 2250 + }, + { + "epoch": 0.021308014880586135, + "grad_norm": 1073.0609130859375, + "learning_rate": 1.4201892744479496e-06, + "loss": 60.9219, + "step": 2251 + }, + { + "epoch": 0.02131748090230119, + "grad_norm": 749.9246826171875, + "learning_rate": 1.4208201892744479e-06, + "loss": 71.9141, + "step": 2252 + }, + { + "epoch": 0.021326946924016245, + "grad_norm": 470.78277587890625, + "learning_rate": 1.4214511041009464e-06, + "loss": 46.3281, + "step": 2253 + }, + { + "epoch": 0.021336412945731298, + "grad_norm": 2.87326717376709, + "learning_rate": 1.4220820189274446e-06, + "loss": 0.8613, + "step": 2254 + }, + { + "epoch": 0.02134587896744635, + "grad_norm": 669.472900390625, + "learning_rate": 1.4227129337539431e-06, + "loss": 48.9062, + "step": 2255 + }, + { + "epoch": 0.021355344989161404, + "grad_norm": 645.2957763671875, + "learning_rate": 1.4233438485804416e-06, + "loss": 48.0, + "step": 2256 + }, + { + "epoch": 0.02136481101087646, + "grad_norm": 389.7939453125, + "learning_rate": 1.42397476340694e-06, + "loss": 37.0469, + "step": 2257 + }, + { + "epoch": 0.021374277032591513, + "grad_norm": 634.0523071289062, + "learning_rate": 1.4246056782334384e-06, + "loss": 35.4062, + "step": 2258 + }, + { + "epoch": 0.021383743054306566, + "grad_norm": 292.4303894042969, + "learning_rate": 1.425236593059937e-06, + "loss": 31.9531, + "step": 2259 + }, + { + "epoch": 0.02139320907602162, + "grad_norm": 853.3124389648438, + "learning_rate": 1.4258675078864352e-06, + "loss": 34.4688, + "step": 2260 + }, + { + "epoch": 0.021402675097736676, + "grad_norm": 365.15484619140625, + "learning_rate": 1.4264984227129337e-06, + "loss": 32.2812, + "step": 2261 + }, + { + "epoch": 0.02141214111945173, + "grad_norm": 3.1946399211883545, + "learning_rate": 1.4271293375394322e-06, + "loss": 0.9727, + "step": 2262 + }, + { + "epoch": 0.021421607141166782, + "grad_norm": 744.307373046875, + "learning_rate": 1.4277602523659304e-06, + "loss": 67.1875, + "step": 2263 + }, + { + "epoch": 0.021431073162881835, + "grad_norm": 719.1862182617188, + "learning_rate": 1.428391167192429e-06, + "loss": 63.0781, + "step": 2264 + }, + { + "epoch": 0.021440539184596888, + "grad_norm": 3.0538268089294434, + "learning_rate": 1.4290220820189274e-06, + "loss": 0.8662, + "step": 2265 + }, + { + "epoch": 0.021450005206311944, + "grad_norm": 273.0624084472656, + "learning_rate": 1.429652996845426e-06, + "loss": 30.7656, + "step": 2266 + }, + { + "epoch": 0.021459471228026997, + "grad_norm": 321.3061218261719, + "learning_rate": 1.4302839116719242e-06, + "loss": 46.1719, + "step": 2267 + }, + { + "epoch": 0.02146893724974205, + "grad_norm": 362.7535400390625, + "learning_rate": 1.4309148264984225e-06, + "loss": 30.5938, + "step": 2268 + }, + { + "epoch": 0.021478403271457103, + "grad_norm": 4.036404609680176, + "learning_rate": 1.431545741324921e-06, + "loss": 0.9126, + "step": 2269 + }, + { + "epoch": 0.02148786929317216, + "grad_norm": 680.1099853515625, + "learning_rate": 1.4321766561514195e-06, + "loss": 52.9688, + "step": 2270 + }, + { + "epoch": 0.021497335314887213, + "grad_norm": 353.4436950683594, + "learning_rate": 1.432807570977918e-06, + "loss": 33.0938, + "step": 2271 + }, + { + "epoch": 0.021506801336602266, + "grad_norm": 352.1375732421875, + "learning_rate": 1.4334384858044165e-06, + "loss": 33.7656, + "step": 2272 + }, + { + "epoch": 0.02151626735831732, + "grad_norm": 2.380692958831787, + "learning_rate": 1.434069400630915e-06, + "loss": 0.8208, + "step": 2273 + }, + { + "epoch": 0.021525733380032375, + "grad_norm": 214.3273468017578, + "learning_rate": 1.434700315457413e-06, + "loss": 27.2812, + "step": 2274 + }, + { + "epoch": 0.02153519940174743, + "grad_norm": 1165.9039306640625, + "learning_rate": 1.4353312302839115e-06, + "loss": 65.0938, + "step": 2275 + }, + { + "epoch": 0.02154466542346248, + "grad_norm": 1169.017822265625, + "learning_rate": 1.43596214511041e-06, + "loss": 48.0039, + "step": 2276 + }, + { + "epoch": 0.021554131445177534, + "grad_norm": 959.5003051757812, + "learning_rate": 1.4365930599369085e-06, + "loss": 71.0, + "step": 2277 + }, + { + "epoch": 0.021563597466892587, + "grad_norm": 851.91796875, + "learning_rate": 1.437223974763407e-06, + "loss": 64.3438, + "step": 2278 + }, + { + "epoch": 0.021573063488607644, + "grad_norm": 289.9927673339844, + "learning_rate": 1.4378548895899053e-06, + "loss": 37.3594, + "step": 2279 + }, + { + "epoch": 0.021582529510322697, + "grad_norm": 3.5733699798583984, + "learning_rate": 1.4384858044164038e-06, + "loss": 0.8271, + "step": 2280 + }, + { + "epoch": 0.02159199553203775, + "grad_norm": 2.9456920623779297, + "learning_rate": 1.439116719242902e-06, + "loss": 0.8926, + "step": 2281 + }, + { + "epoch": 0.021601461553752803, + "grad_norm": 345.3650207519531, + "learning_rate": 1.4397476340694005e-06, + "loss": 32.2188, + "step": 2282 + }, + { + "epoch": 0.02161092757546786, + "grad_norm": 495.00103759765625, + "learning_rate": 1.440378548895899e-06, + "loss": 26.9844, + "step": 2283 + }, + { + "epoch": 0.021620393597182912, + "grad_norm": 279.93658447265625, + "learning_rate": 1.4410094637223975e-06, + "loss": 45.5469, + "step": 2284 + }, + { + "epoch": 0.021629859618897965, + "grad_norm": 742.8081665039062, + "learning_rate": 1.4416403785488958e-06, + "loss": 67.0469, + "step": 2285 + }, + { + "epoch": 0.02163932564061302, + "grad_norm": 1420.2320556640625, + "learning_rate": 1.4422712933753943e-06, + "loss": 39.4766, + "step": 2286 + }, + { + "epoch": 0.021648791662328075, + "grad_norm": 552.4638061523438, + "learning_rate": 1.4429022082018926e-06, + "loss": 44.7812, + "step": 2287 + }, + { + "epoch": 0.021658257684043128, + "grad_norm": 1008.65380859375, + "learning_rate": 1.443533123028391e-06, + "loss": 50.9766, + "step": 2288 + }, + { + "epoch": 0.02166772370575818, + "grad_norm": 340.0715026855469, + "learning_rate": 1.4441640378548896e-06, + "loss": 27.3594, + "step": 2289 + }, + { + "epoch": 0.021677189727473234, + "grad_norm": 451.9587097167969, + "learning_rate": 1.4447949526813878e-06, + "loss": 43.6094, + "step": 2290 + }, + { + "epoch": 0.021686655749188287, + "grad_norm": 290.93792724609375, + "learning_rate": 1.4454258675078863e-06, + "loss": 34.5938, + "step": 2291 + }, + { + "epoch": 0.021696121770903343, + "grad_norm": 261.5824890136719, + "learning_rate": 1.4460567823343848e-06, + "loss": 31.7031, + "step": 2292 + }, + { + "epoch": 0.021705587792618396, + "grad_norm": 266.9480895996094, + "learning_rate": 1.4466876971608833e-06, + "loss": 35.9844, + "step": 2293 + }, + { + "epoch": 0.02171505381433345, + "grad_norm": 269.03546142578125, + "learning_rate": 1.4473186119873816e-06, + "loss": 31.4844, + "step": 2294 + }, + { + "epoch": 0.021724519836048502, + "grad_norm": 397.3854675292969, + "learning_rate": 1.44794952681388e-06, + "loss": 33.0312, + "step": 2295 + }, + { + "epoch": 0.02173398585776356, + "grad_norm": 405.61083984375, + "learning_rate": 1.4485804416403784e-06, + "loss": 74.9375, + "step": 2296 + }, + { + "epoch": 0.021743451879478612, + "grad_norm": 2.845357656478882, + "learning_rate": 1.4492113564668769e-06, + "loss": 0.9363, + "step": 2297 + }, + { + "epoch": 0.021752917901193665, + "grad_norm": 455.04815673828125, + "learning_rate": 1.4498422712933754e-06, + "loss": 32.5625, + "step": 2298 + }, + { + "epoch": 0.021762383922908718, + "grad_norm": 388.98828125, + "learning_rate": 1.4504731861198739e-06, + "loss": 28.9688, + "step": 2299 + }, + { + "epoch": 0.021771849944623774, + "grad_norm": 1282.2784423828125, + "learning_rate": 1.4511041009463723e-06, + "loss": 42.4766, + "step": 2300 + }, + { + "epoch": 0.021781315966338827, + "grad_norm": 232.36578369140625, + "learning_rate": 1.4517350157728704e-06, + "loss": 29.7188, + "step": 2301 + }, + { + "epoch": 0.02179078198805388, + "grad_norm": 427.59478759765625, + "learning_rate": 1.452365930599369e-06, + "loss": 34.2812, + "step": 2302 + }, + { + "epoch": 0.021800248009768933, + "grad_norm": 453.0247802734375, + "learning_rate": 1.4529968454258674e-06, + "loss": 62.875, + "step": 2303 + }, + { + "epoch": 0.02180971403148399, + "grad_norm": 277.156982421875, + "learning_rate": 1.453627760252366e-06, + "loss": 37.8438, + "step": 2304 + }, + { + "epoch": 0.021819180053199043, + "grad_norm": 519.8728637695312, + "learning_rate": 1.4542586750788644e-06, + "loss": 58.6094, + "step": 2305 + }, + { + "epoch": 0.021828646074914096, + "grad_norm": 688.0687866210938, + "learning_rate": 1.4548895899053629e-06, + "loss": 61.8125, + "step": 2306 + }, + { + "epoch": 0.02183811209662915, + "grad_norm": 3.42303729057312, + "learning_rate": 1.455520504731861e-06, + "loss": 0.9102, + "step": 2307 + }, + { + "epoch": 0.021847578118344202, + "grad_norm": 362.2822265625, + "learning_rate": 1.4561514195583594e-06, + "loss": 31.75, + "step": 2308 + }, + { + "epoch": 0.02185704414005926, + "grad_norm": 498.8395080566406, + "learning_rate": 1.456782334384858e-06, + "loss": 24.3672, + "step": 2309 + }, + { + "epoch": 0.02186651016177431, + "grad_norm": 371.4609069824219, + "learning_rate": 1.4574132492113564e-06, + "loss": 42.5156, + "step": 2310 + }, + { + "epoch": 0.021875976183489364, + "grad_norm": 691.4654541015625, + "learning_rate": 1.458044164037855e-06, + "loss": 83.4688, + "step": 2311 + }, + { + "epoch": 0.021885442205204417, + "grad_norm": 264.5074157714844, + "learning_rate": 1.4586750788643532e-06, + "loss": 30.2031, + "step": 2312 + }, + { + "epoch": 0.021894908226919474, + "grad_norm": 298.0083923339844, + "learning_rate": 1.4593059936908517e-06, + "loss": 29.9531, + "step": 2313 + }, + { + "epoch": 0.021904374248634527, + "grad_norm": 475.6119384765625, + "learning_rate": 1.45993690851735e-06, + "loss": 40.7656, + "step": 2314 + }, + { + "epoch": 0.02191384027034958, + "grad_norm": 583.7343139648438, + "learning_rate": 1.4605678233438485e-06, + "loss": 41.6484, + "step": 2315 + }, + { + "epoch": 0.021923306292064633, + "grad_norm": 623.4053344726562, + "learning_rate": 1.461198738170347e-06, + "loss": 66.9062, + "step": 2316 + }, + { + "epoch": 0.02193277231377969, + "grad_norm": 426.6863708496094, + "learning_rate": 1.4618296529968455e-06, + "loss": 30.6719, + "step": 2317 + }, + { + "epoch": 0.021942238335494742, + "grad_norm": 760.4212036132812, + "learning_rate": 1.4624605678233437e-06, + "loss": 67.5, + "step": 2318 + }, + { + "epoch": 0.021951704357209795, + "grad_norm": 666.774658203125, + "learning_rate": 1.4630914826498422e-06, + "loss": 68.6094, + "step": 2319 + }, + { + "epoch": 0.02196117037892485, + "grad_norm": 345.9541015625, + "learning_rate": 1.4637223974763407e-06, + "loss": 29.0, + "step": 2320 + }, + { + "epoch": 0.0219706364006399, + "grad_norm": 309.9389343261719, + "learning_rate": 1.464353312302839e-06, + "loss": 34.8906, + "step": 2321 + }, + { + "epoch": 0.021980102422354958, + "grad_norm": 496.60577392578125, + "learning_rate": 1.4649842271293375e-06, + "loss": 72.7344, + "step": 2322 + }, + { + "epoch": 0.02198956844407001, + "grad_norm": 1144.9000244140625, + "learning_rate": 1.4656151419558358e-06, + "loss": 33.6562, + "step": 2323 + }, + { + "epoch": 0.021999034465785064, + "grad_norm": 361.1702575683594, + "learning_rate": 1.4662460567823343e-06, + "loss": 36.9219, + "step": 2324 + }, + { + "epoch": 0.022008500487500117, + "grad_norm": 833.6679077148438, + "learning_rate": 1.4668769716088328e-06, + "loss": 48.5938, + "step": 2325 + }, + { + "epoch": 0.022017966509215173, + "grad_norm": 1110.737548828125, + "learning_rate": 1.4675078864353313e-06, + "loss": 41.7344, + "step": 2326 + }, + { + "epoch": 0.022027432530930226, + "grad_norm": 3.564650297164917, + "learning_rate": 1.4681388012618298e-06, + "loss": 1.0181, + "step": 2327 + }, + { + "epoch": 0.02203689855264528, + "grad_norm": 600.9856567382812, + "learning_rate": 1.468769716088328e-06, + "loss": 77.875, + "step": 2328 + }, + { + "epoch": 0.022046364574360332, + "grad_norm": 983.251220703125, + "learning_rate": 1.4694006309148263e-06, + "loss": 50.9219, + "step": 2329 + }, + { + "epoch": 0.02205583059607539, + "grad_norm": 2.533923864364624, + "learning_rate": 1.4700315457413248e-06, + "loss": 0.7642, + "step": 2330 + }, + { + "epoch": 0.022065296617790442, + "grad_norm": 505.85845947265625, + "learning_rate": 1.4706624605678233e-06, + "loss": 40.7422, + "step": 2331 + }, + { + "epoch": 0.022074762639505495, + "grad_norm": 583.2932739257812, + "learning_rate": 1.4712933753943218e-06, + "loss": 64.7344, + "step": 2332 + }, + { + "epoch": 0.022084228661220548, + "grad_norm": 292.03143310546875, + "learning_rate": 1.4719242902208203e-06, + "loss": 25.7969, + "step": 2333 + }, + { + "epoch": 0.0220936946829356, + "grad_norm": 440.4851379394531, + "learning_rate": 1.4725552050473184e-06, + "loss": 35.625, + "step": 2334 + }, + { + "epoch": 0.022103160704650657, + "grad_norm": 474.417724609375, + "learning_rate": 1.4731861198738168e-06, + "loss": 45.1406, + "step": 2335 + }, + { + "epoch": 0.02211262672636571, + "grad_norm": 221.05162048339844, + "learning_rate": 1.4738170347003153e-06, + "loss": 29.8281, + "step": 2336 + }, + { + "epoch": 0.022122092748080763, + "grad_norm": 406.9820861816406, + "learning_rate": 1.4744479495268138e-06, + "loss": 41.9375, + "step": 2337 + }, + { + "epoch": 0.022131558769795816, + "grad_norm": 714.3577270507812, + "learning_rate": 1.4750788643533123e-06, + "loss": 41.1875, + "step": 2338 + }, + { + "epoch": 0.022141024791510873, + "grad_norm": 1455.33349609375, + "learning_rate": 1.4757097791798108e-06, + "loss": 84.6562, + "step": 2339 + }, + { + "epoch": 0.022150490813225926, + "grad_norm": 306.5000305175781, + "learning_rate": 1.476340694006309e-06, + "loss": 38.5938, + "step": 2340 + }, + { + "epoch": 0.02215995683494098, + "grad_norm": 709.8580322265625, + "learning_rate": 1.4769716088328074e-06, + "loss": 80.7812, + "step": 2341 + }, + { + "epoch": 0.022169422856656032, + "grad_norm": 1080.921142578125, + "learning_rate": 1.4776025236593059e-06, + "loss": 52.0859, + "step": 2342 + }, + { + "epoch": 0.02217888887837109, + "grad_norm": 738.951904296875, + "learning_rate": 1.4782334384858044e-06, + "loss": 58.9531, + "step": 2343 + }, + { + "epoch": 0.02218835490008614, + "grad_norm": 1164.78369140625, + "learning_rate": 1.4788643533123029e-06, + "loss": 64.3672, + "step": 2344 + }, + { + "epoch": 0.022197820921801194, + "grad_norm": 615.897705078125, + "learning_rate": 1.4794952681388011e-06, + "loss": 54.9766, + "step": 2345 + }, + { + "epoch": 0.022207286943516247, + "grad_norm": 483.5643005371094, + "learning_rate": 1.4801261829652996e-06, + "loss": 58.0156, + "step": 2346 + }, + { + "epoch": 0.022216752965231304, + "grad_norm": 262.18603515625, + "learning_rate": 1.4807570977917981e-06, + "loss": 27.25, + "step": 2347 + }, + { + "epoch": 0.022226218986946357, + "grad_norm": 290.754150390625, + "learning_rate": 1.4813880126182964e-06, + "loss": 29.9844, + "step": 2348 + }, + { + "epoch": 0.02223568500866141, + "grad_norm": 928.41845703125, + "learning_rate": 1.482018927444795e-06, + "loss": 30.7344, + "step": 2349 + }, + { + "epoch": 0.022245151030376463, + "grad_norm": 1181.9990234375, + "learning_rate": 1.4826498422712934e-06, + "loss": 55.0938, + "step": 2350 + }, + { + "epoch": 0.022254617052091516, + "grad_norm": 370.62176513671875, + "learning_rate": 1.4832807570977917e-06, + "loss": 29.0156, + "step": 2351 + }, + { + "epoch": 0.022264083073806572, + "grad_norm": 258.2720031738281, + "learning_rate": 1.4839116719242902e-06, + "loss": 26.8594, + "step": 2352 + }, + { + "epoch": 0.022273549095521625, + "grad_norm": 609.595703125, + "learning_rate": 1.4845425867507887e-06, + "loss": 48.7109, + "step": 2353 + }, + { + "epoch": 0.02228301511723668, + "grad_norm": 252.6787872314453, + "learning_rate": 1.4851735015772872e-06, + "loss": 27.9062, + "step": 2354 + }, + { + "epoch": 0.02229248113895173, + "grad_norm": 688.6142578125, + "learning_rate": 1.4858044164037854e-06, + "loss": 44.5938, + "step": 2355 + }, + { + "epoch": 0.022301947160666788, + "grad_norm": 369.186279296875, + "learning_rate": 1.4864353312302837e-06, + "loss": 37.75, + "step": 2356 + }, + { + "epoch": 0.02231141318238184, + "grad_norm": 589.7089233398438, + "learning_rate": 1.4870662460567822e-06, + "loss": 55.375, + "step": 2357 + }, + { + "epoch": 0.022320879204096894, + "grad_norm": 868.7932739257812, + "learning_rate": 1.4876971608832807e-06, + "loss": 24.0156, + "step": 2358 + }, + { + "epoch": 0.022330345225811947, + "grad_norm": 2666.613037109375, + "learning_rate": 1.4883280757097792e-06, + "loss": 34.2812, + "step": 2359 + }, + { + "epoch": 0.022339811247527003, + "grad_norm": 330.0297546386719, + "learning_rate": 1.4889589905362777e-06, + "loss": 28.5156, + "step": 2360 + }, + { + "epoch": 0.022349277269242056, + "grad_norm": 252.2090301513672, + "learning_rate": 1.489589905362776e-06, + "loss": 29.5938, + "step": 2361 + }, + { + "epoch": 0.02235874329095711, + "grad_norm": 414.0161437988281, + "learning_rate": 1.4902208201892742e-06, + "loss": 41.4219, + "step": 2362 + }, + { + "epoch": 0.022368209312672162, + "grad_norm": 2.931870222091675, + "learning_rate": 1.4908517350157727e-06, + "loss": 1.0073, + "step": 2363 + }, + { + "epoch": 0.022377675334387215, + "grad_norm": 406.5690002441406, + "learning_rate": 1.4914826498422712e-06, + "loss": 39.7031, + "step": 2364 + }, + { + "epoch": 0.022387141356102272, + "grad_norm": 2.869478225708008, + "learning_rate": 1.4921135646687697e-06, + "loss": 0.7725, + "step": 2365 + }, + { + "epoch": 0.022396607377817325, + "grad_norm": 573.6174926757812, + "learning_rate": 1.4927444794952682e-06, + "loss": 83.7812, + "step": 2366 + }, + { + "epoch": 0.022406073399532378, + "grad_norm": 296.5265197753906, + "learning_rate": 1.4933753943217665e-06, + "loss": 30.6719, + "step": 2367 + }, + { + "epoch": 0.02241553942124743, + "grad_norm": 872.9573364257812, + "learning_rate": 1.4940063091482648e-06, + "loss": 32.4375, + "step": 2368 + }, + { + "epoch": 0.022425005442962487, + "grad_norm": 374.91571044921875, + "learning_rate": 1.4946372239747633e-06, + "loss": 44.2812, + "step": 2369 + }, + { + "epoch": 0.02243447146467754, + "grad_norm": 217.17263793945312, + "learning_rate": 1.4952681388012618e-06, + "loss": 28.75, + "step": 2370 + }, + { + "epoch": 0.022443937486392593, + "grad_norm": 358.05438232421875, + "learning_rate": 1.4958990536277603e-06, + "loss": 31.0625, + "step": 2371 + }, + { + "epoch": 0.022453403508107646, + "grad_norm": 380.25238037109375, + "learning_rate": 1.4965299684542588e-06, + "loss": 27.7109, + "step": 2372 + }, + { + "epoch": 0.022462869529822703, + "grad_norm": 1144.12646484375, + "learning_rate": 1.497160883280757e-06, + "loss": 58.4375, + "step": 2373 + }, + { + "epoch": 0.022472335551537756, + "grad_norm": 3.1962244510650635, + "learning_rate": 1.4977917981072555e-06, + "loss": 0.9067, + "step": 2374 + }, + { + "epoch": 0.02248180157325281, + "grad_norm": 474.6938781738281, + "learning_rate": 1.4984227129337538e-06, + "loss": 42.8906, + "step": 2375 + }, + { + "epoch": 0.022491267594967862, + "grad_norm": 236.0217742919922, + "learning_rate": 1.4990536277602523e-06, + "loss": 31.4062, + "step": 2376 + }, + { + "epoch": 0.022500733616682915, + "grad_norm": 1512.5703125, + "learning_rate": 1.4996845425867508e-06, + "loss": 34.6094, + "step": 2377 + }, + { + "epoch": 0.02251019963839797, + "grad_norm": 328.22357177734375, + "learning_rate": 1.500315457413249e-06, + "loss": 45.4688, + "step": 2378 + }, + { + "epoch": 0.022519665660113024, + "grad_norm": 273.11151123046875, + "learning_rate": 1.5009463722397476e-06, + "loss": 31.625, + "step": 2379 + }, + { + "epoch": 0.022529131681828077, + "grad_norm": 455.5160217285156, + "learning_rate": 1.501577287066246e-06, + "loss": 51.6875, + "step": 2380 + }, + { + "epoch": 0.02253859770354313, + "grad_norm": 711.7984619140625, + "learning_rate": 1.5022082018927443e-06, + "loss": 68.5938, + "step": 2381 + }, + { + "epoch": 0.022548063725258187, + "grad_norm": 554.297607421875, + "learning_rate": 1.5028391167192428e-06, + "loss": 28.7969, + "step": 2382 + }, + { + "epoch": 0.02255752974697324, + "grad_norm": 1103.74267578125, + "learning_rate": 1.5034700315457411e-06, + "loss": 61.9531, + "step": 2383 + }, + { + "epoch": 0.022566995768688293, + "grad_norm": 393.7529602050781, + "learning_rate": 1.5041009463722396e-06, + "loss": 31.2656, + "step": 2384 + }, + { + "epoch": 0.022576461790403346, + "grad_norm": 240.2495880126953, + "learning_rate": 1.504731861198738e-06, + "loss": 28.6406, + "step": 2385 + }, + { + "epoch": 0.022585927812118402, + "grad_norm": 518.358154296875, + "learning_rate": 1.5053627760252366e-06, + "loss": 35.8828, + "step": 2386 + }, + { + "epoch": 0.022595393833833455, + "grad_norm": 314.09832763671875, + "learning_rate": 1.505993690851735e-06, + "loss": 34.375, + "step": 2387 + }, + { + "epoch": 0.02260485985554851, + "grad_norm": 469.20672607421875, + "learning_rate": 1.5066246056782334e-06, + "loss": 32.9219, + "step": 2388 + }, + { + "epoch": 0.02261432587726356, + "grad_norm": 883.1495971679688, + "learning_rate": 1.5072555205047316e-06, + "loss": 40.1094, + "step": 2389 + }, + { + "epoch": 0.022623791898978618, + "grad_norm": 441.7584533691406, + "learning_rate": 1.5078864353312301e-06, + "loss": 32.9844, + "step": 2390 + }, + { + "epoch": 0.02263325792069367, + "grad_norm": 396.71051025390625, + "learning_rate": 1.5085173501577286e-06, + "loss": 32.2656, + "step": 2391 + }, + { + "epoch": 0.022642723942408724, + "grad_norm": 419.2718505859375, + "learning_rate": 1.5091482649842271e-06, + "loss": 40.0625, + "step": 2392 + }, + { + "epoch": 0.022652189964123777, + "grad_norm": 742.7787475585938, + "learning_rate": 1.5097791798107256e-06, + "loss": 75.6094, + "step": 2393 + }, + { + "epoch": 0.02266165598583883, + "grad_norm": 878.5562744140625, + "learning_rate": 1.510410094637224e-06, + "loss": 73.125, + "step": 2394 + }, + { + "epoch": 0.022671122007553886, + "grad_norm": 282.17034912109375, + "learning_rate": 1.5110410094637222e-06, + "loss": 29.6875, + "step": 2395 + }, + { + "epoch": 0.02268058802926894, + "grad_norm": 1083.0106201171875, + "learning_rate": 1.5116719242902207e-06, + "loss": 44.5859, + "step": 2396 + }, + { + "epoch": 0.022690054050983993, + "grad_norm": 1024.00390625, + "learning_rate": 1.5123028391167192e-06, + "loss": 64.7812, + "step": 2397 + }, + { + "epoch": 0.022699520072699046, + "grad_norm": 758.758056640625, + "learning_rate": 1.5129337539432177e-06, + "loss": 83.3125, + "step": 2398 + }, + { + "epoch": 0.022708986094414102, + "grad_norm": 518.4668579101562, + "learning_rate": 1.5135646687697162e-06, + "loss": 50.5938, + "step": 2399 + }, + { + "epoch": 0.022718452116129155, + "grad_norm": 287.4397888183594, + "learning_rate": 1.5141955835962144e-06, + "loss": 30.5781, + "step": 2400 + }, + { + "epoch": 0.022727918137844208, + "grad_norm": 256.382080078125, + "learning_rate": 1.5148264984227127e-06, + "loss": 28.9062, + "step": 2401 + }, + { + "epoch": 0.02273738415955926, + "grad_norm": 569.135009765625, + "learning_rate": 1.5154574132492112e-06, + "loss": 38.8594, + "step": 2402 + }, + { + "epoch": 0.022746850181274317, + "grad_norm": 1330.0994873046875, + "learning_rate": 1.5160883280757097e-06, + "loss": 69.625, + "step": 2403 + }, + { + "epoch": 0.02275631620298937, + "grad_norm": 688.12548828125, + "learning_rate": 1.5167192429022082e-06, + "loss": 36.6562, + "step": 2404 + }, + { + "epoch": 0.022765782224704424, + "grad_norm": 908.9646606445312, + "learning_rate": 1.5173501577287065e-06, + "loss": 92.2188, + "step": 2405 + }, + { + "epoch": 0.022775248246419477, + "grad_norm": 2.612150192260742, + "learning_rate": 1.517981072555205e-06, + "loss": 0.8306, + "step": 2406 + }, + { + "epoch": 0.02278471426813453, + "grad_norm": 321.3191223144531, + "learning_rate": 1.5186119873817035e-06, + "loss": 32.4844, + "step": 2407 + }, + { + "epoch": 0.022794180289849586, + "grad_norm": 584.8010864257812, + "learning_rate": 1.5192429022082017e-06, + "loss": 31.875, + "step": 2408 + }, + { + "epoch": 0.02280364631156464, + "grad_norm": 325.9168395996094, + "learning_rate": 1.5198738170347002e-06, + "loss": 30.75, + "step": 2409 + }, + { + "epoch": 0.022813112333279692, + "grad_norm": 277.4589538574219, + "learning_rate": 1.5205047318611987e-06, + "loss": 43.6094, + "step": 2410 + }, + { + "epoch": 0.022822578354994745, + "grad_norm": 448.549560546875, + "learning_rate": 1.521135646687697e-06, + "loss": 32.6875, + "step": 2411 + }, + { + "epoch": 0.0228320443767098, + "grad_norm": 583.8766479492188, + "learning_rate": 1.5217665615141955e-06, + "loss": 57.5625, + "step": 2412 + }, + { + "epoch": 0.022841510398424855, + "grad_norm": 921.271484375, + "learning_rate": 1.522397476340694e-06, + "loss": 82.5781, + "step": 2413 + }, + { + "epoch": 0.022850976420139908, + "grad_norm": 551.33154296875, + "learning_rate": 1.5230283911671925e-06, + "loss": 32.7812, + "step": 2414 + }, + { + "epoch": 0.02286044244185496, + "grad_norm": 581.7525634765625, + "learning_rate": 1.5236593059936908e-06, + "loss": 41.1016, + "step": 2415 + }, + { + "epoch": 0.022869908463570017, + "grad_norm": 736.3814697265625, + "learning_rate": 1.524290220820189e-06, + "loss": 66.0781, + "step": 2416 + }, + { + "epoch": 0.02287937448528507, + "grad_norm": 442.8851318359375, + "learning_rate": 1.5249211356466875e-06, + "loss": 46.3672, + "step": 2417 + }, + { + "epoch": 0.022888840507000123, + "grad_norm": 362.6528015136719, + "learning_rate": 1.525552050473186e-06, + "loss": 35.5781, + "step": 2418 + }, + { + "epoch": 0.022898306528715176, + "grad_norm": 283.0700988769531, + "learning_rate": 1.5261829652996845e-06, + "loss": 29.5156, + "step": 2419 + }, + { + "epoch": 0.02290777255043023, + "grad_norm": 469.7838134765625, + "learning_rate": 1.526813880126183e-06, + "loss": 32.0156, + "step": 2420 + }, + { + "epoch": 0.022917238572145286, + "grad_norm": 1677.498046875, + "learning_rate": 1.5274447949526815e-06, + "loss": 78.0938, + "step": 2421 + }, + { + "epoch": 0.02292670459386034, + "grad_norm": 1077.3629150390625, + "learning_rate": 1.5280757097791796e-06, + "loss": 80.5938, + "step": 2422 + }, + { + "epoch": 0.02293617061557539, + "grad_norm": 333.0930480957031, + "learning_rate": 1.528706624605678e-06, + "loss": 27.0781, + "step": 2423 + }, + { + "epoch": 0.022945636637290445, + "grad_norm": 498.6107177734375, + "learning_rate": 1.5293375394321766e-06, + "loss": 57.25, + "step": 2424 + }, + { + "epoch": 0.0229551026590055, + "grad_norm": 760.1676025390625, + "learning_rate": 1.529968454258675e-06, + "loss": 76.9219, + "step": 2425 + }, + { + "epoch": 0.022964568680720554, + "grad_norm": 468.530029296875, + "learning_rate": 1.5305993690851736e-06, + "loss": 33.4531, + "step": 2426 + }, + { + "epoch": 0.022974034702435607, + "grad_norm": 1185.3240966796875, + "learning_rate": 1.5312302839116718e-06, + "loss": 61.4062, + "step": 2427 + }, + { + "epoch": 0.02298350072415066, + "grad_norm": 575.8603515625, + "learning_rate": 1.5318611987381701e-06, + "loss": 30.3125, + "step": 2428 + }, + { + "epoch": 0.022992966745865717, + "grad_norm": 507.6539001464844, + "learning_rate": 1.5324921135646686e-06, + "loss": 68.1094, + "step": 2429 + }, + { + "epoch": 0.02300243276758077, + "grad_norm": 420.17901611328125, + "learning_rate": 1.533123028391167e-06, + "loss": 31.7031, + "step": 2430 + }, + { + "epoch": 0.023011898789295823, + "grad_norm": 441.7903137207031, + "learning_rate": 1.5337539432176656e-06, + "loss": 82.5625, + "step": 2431 + }, + { + "epoch": 0.023021364811010876, + "grad_norm": 494.2134704589844, + "learning_rate": 1.534384858044164e-06, + "loss": 28.8906, + "step": 2432 + }, + { + "epoch": 0.02303083083272593, + "grad_norm": 460.95538330078125, + "learning_rate": 1.5350157728706624e-06, + "loss": 59.1562, + "step": 2433 + }, + { + "epoch": 0.023040296854440985, + "grad_norm": 869.9039306640625, + "learning_rate": 1.5356466876971609e-06, + "loss": 52.8047, + "step": 2434 + }, + { + "epoch": 0.023049762876156038, + "grad_norm": 292.0153503417969, + "learning_rate": 1.5362776025236591e-06, + "loss": 32.3438, + "step": 2435 + }, + { + "epoch": 0.02305922889787109, + "grad_norm": 344.89337158203125, + "learning_rate": 1.5369085173501576e-06, + "loss": 27.3594, + "step": 2436 + }, + { + "epoch": 0.023068694919586144, + "grad_norm": 955.6107177734375, + "learning_rate": 1.5375394321766561e-06, + "loss": 58.6875, + "step": 2437 + }, + { + "epoch": 0.0230781609413012, + "grad_norm": 376.27496337890625, + "learning_rate": 1.5381703470031544e-06, + "loss": 47.9062, + "step": 2438 + }, + { + "epoch": 0.023087626963016254, + "grad_norm": 848.0546875, + "learning_rate": 1.538801261829653e-06, + "loss": 65.7188, + "step": 2439 + }, + { + "epoch": 0.023097092984731307, + "grad_norm": 748.07275390625, + "learning_rate": 1.5394321766561514e-06, + "loss": 42.2031, + "step": 2440 + }, + { + "epoch": 0.02310655900644636, + "grad_norm": 299.4213562011719, + "learning_rate": 1.5400630914826499e-06, + "loss": 28.9219, + "step": 2441 + }, + { + "epoch": 0.023116025028161416, + "grad_norm": 791.9278564453125, + "learning_rate": 1.5406940063091482e-06, + "loss": 37.9219, + "step": 2442 + }, + { + "epoch": 0.02312549104987647, + "grad_norm": 1199.754638671875, + "learning_rate": 1.5413249211356467e-06, + "loss": 63.7344, + "step": 2443 + }, + { + "epoch": 0.023134957071591522, + "grad_norm": 535.5100708007812, + "learning_rate": 1.541955835962145e-06, + "loss": 55.3438, + "step": 2444 + }, + { + "epoch": 0.023144423093306575, + "grad_norm": 666.8403930664062, + "learning_rate": 1.5425867507886434e-06, + "loss": 34.9688, + "step": 2445 + }, + { + "epoch": 0.02315388911502163, + "grad_norm": 390.2201232910156, + "learning_rate": 1.543217665615142e-06, + "loss": 57.4688, + "step": 2446 + }, + { + "epoch": 0.023163355136736685, + "grad_norm": 587.0618896484375, + "learning_rate": 1.5438485804416404e-06, + "loss": 50.0312, + "step": 2447 + }, + { + "epoch": 0.023172821158451738, + "grad_norm": 776.1110229492188, + "learning_rate": 1.544479495268139e-06, + "loss": 57.3125, + "step": 2448 + }, + { + "epoch": 0.02318228718016679, + "grad_norm": 654.7110595703125, + "learning_rate": 1.545110410094637e-06, + "loss": 54.0469, + "step": 2449 + }, + { + "epoch": 0.023191753201881844, + "grad_norm": 1110.5638427734375, + "learning_rate": 1.5457413249211355e-06, + "loss": 54.1875, + "step": 2450 + }, + { + "epoch": 0.0232012192235969, + "grad_norm": 504.7755432128906, + "learning_rate": 1.546372239747634e-06, + "loss": 47.6875, + "step": 2451 + }, + { + "epoch": 0.023210685245311953, + "grad_norm": 197.56910705566406, + "learning_rate": 1.5470031545741325e-06, + "loss": 29.3594, + "step": 2452 + }, + { + "epoch": 0.023220151267027006, + "grad_norm": 801.2271728515625, + "learning_rate": 1.547634069400631e-06, + "loss": 43.8281, + "step": 2453 + }, + { + "epoch": 0.02322961728874206, + "grad_norm": 527.1027221679688, + "learning_rate": 1.5482649842271294e-06, + "loss": 52.4844, + "step": 2454 + }, + { + "epoch": 0.023239083310457116, + "grad_norm": 222.69725036621094, + "learning_rate": 1.5488958990536275e-06, + "loss": 23.5781, + "step": 2455 + }, + { + "epoch": 0.02324854933217217, + "grad_norm": 357.06939697265625, + "learning_rate": 1.549526813880126e-06, + "loss": 36.9531, + "step": 2456 + }, + { + "epoch": 0.02325801535388722, + "grad_norm": 431.2113342285156, + "learning_rate": 1.5501577287066245e-06, + "loss": 31.4844, + "step": 2457 + }, + { + "epoch": 0.023267481375602275, + "grad_norm": 611.5222778320312, + "learning_rate": 1.550788643533123e-06, + "loss": 33.9453, + "step": 2458 + }, + { + "epoch": 0.02327694739731733, + "grad_norm": 631.5997314453125, + "learning_rate": 1.5514195583596215e-06, + "loss": 44.2891, + "step": 2459 + }, + { + "epoch": 0.023286413419032384, + "grad_norm": 346.2403869628906, + "learning_rate": 1.5520504731861198e-06, + "loss": 34.3281, + "step": 2460 + }, + { + "epoch": 0.023295879440747437, + "grad_norm": 218.24069213867188, + "learning_rate": 1.5526813880126183e-06, + "loss": 29.5, + "step": 2461 + }, + { + "epoch": 0.02330534546246249, + "grad_norm": 349.93939208984375, + "learning_rate": 1.5533123028391165e-06, + "loss": 34.2812, + "step": 2462 + }, + { + "epoch": 0.023314811484177543, + "grad_norm": 1087.2655029296875, + "learning_rate": 1.553943217665615e-06, + "loss": 85.0469, + "step": 2463 + }, + { + "epoch": 0.0233242775058926, + "grad_norm": 350.50439453125, + "learning_rate": 1.5545741324921135e-06, + "loss": 38.0312, + "step": 2464 + }, + { + "epoch": 0.023333743527607653, + "grad_norm": 238.1165313720703, + "learning_rate": 1.555205047318612e-06, + "loss": 27.3438, + "step": 2465 + }, + { + "epoch": 0.023343209549322706, + "grad_norm": 366.4306335449219, + "learning_rate": 1.5558359621451103e-06, + "loss": 32.8125, + "step": 2466 + }, + { + "epoch": 0.02335267557103776, + "grad_norm": 274.64215087890625, + "learning_rate": 1.5564668769716088e-06, + "loss": 31.4688, + "step": 2467 + }, + { + "epoch": 0.023362141592752815, + "grad_norm": 177.6446533203125, + "learning_rate": 1.5570977917981073e-06, + "loss": 26.2969, + "step": 2468 + }, + { + "epoch": 0.023371607614467868, + "grad_norm": 673.96044921875, + "learning_rate": 1.5577287066246056e-06, + "loss": 40.4375, + "step": 2469 + }, + { + "epoch": 0.02338107363618292, + "grad_norm": 504.27056884765625, + "learning_rate": 1.558359621451104e-06, + "loss": 63.0312, + "step": 2470 + }, + { + "epoch": 0.023390539657897974, + "grad_norm": 697.6470336914062, + "learning_rate": 1.5589905362776023e-06, + "loss": 79.6562, + "step": 2471 + }, + { + "epoch": 0.02340000567961303, + "grad_norm": 712.579345703125, + "learning_rate": 1.5596214511041008e-06, + "loss": 39.7188, + "step": 2472 + }, + { + "epoch": 0.023409471701328084, + "grad_norm": 216.06570434570312, + "learning_rate": 1.5602523659305993e-06, + "loss": 28.3125, + "step": 2473 + }, + { + "epoch": 0.023418937723043137, + "grad_norm": 492.8212890625, + "learning_rate": 1.5608832807570978e-06, + "loss": 43.7969, + "step": 2474 + }, + { + "epoch": 0.02342840374475819, + "grad_norm": 484.0276794433594, + "learning_rate": 1.561514195583596e-06, + "loss": 34.2031, + "step": 2475 + }, + { + "epoch": 0.023437869766473243, + "grad_norm": 3.0328526496887207, + "learning_rate": 1.5621451104100946e-06, + "loss": 0.8733, + "step": 2476 + }, + { + "epoch": 0.0234473357881883, + "grad_norm": 415.3080139160156, + "learning_rate": 1.5627760252365929e-06, + "loss": 28.4688, + "step": 2477 + }, + { + "epoch": 0.023456801809903352, + "grad_norm": 595.7061157226562, + "learning_rate": 1.5634069400630914e-06, + "loss": 58.1562, + "step": 2478 + }, + { + "epoch": 0.023466267831618405, + "grad_norm": 294.48040771484375, + "learning_rate": 1.5640378548895899e-06, + "loss": 30.6875, + "step": 2479 + }, + { + "epoch": 0.023475733853333458, + "grad_norm": 463.8133850097656, + "learning_rate": 1.5646687697160884e-06, + "loss": 32.125, + "step": 2480 + }, + { + "epoch": 0.023485199875048515, + "grad_norm": 539.2545166015625, + "learning_rate": 1.5652996845425868e-06, + "loss": 60.3594, + "step": 2481 + }, + { + "epoch": 0.023494665896763568, + "grad_norm": 603.8751220703125, + "learning_rate": 1.565930599369085e-06, + "loss": 40.3984, + "step": 2482 + }, + { + "epoch": 0.02350413191847862, + "grad_norm": 520.0425415039062, + "learning_rate": 1.5665615141955834e-06, + "loss": 76.8438, + "step": 2483 + }, + { + "epoch": 0.023513597940193674, + "grad_norm": 450.4207458496094, + "learning_rate": 1.567192429022082e-06, + "loss": 34.5312, + "step": 2484 + }, + { + "epoch": 0.02352306396190873, + "grad_norm": 619.543701171875, + "learning_rate": 1.5678233438485804e-06, + "loss": 54.8984, + "step": 2485 + }, + { + "epoch": 0.023532529983623783, + "grad_norm": 635.2056884765625, + "learning_rate": 1.5684542586750789e-06, + "loss": 67.1719, + "step": 2486 + }, + { + "epoch": 0.023541996005338836, + "grad_norm": 1525.3709716796875, + "learning_rate": 1.5690851735015774e-06, + "loss": 39.1875, + "step": 2487 + }, + { + "epoch": 0.02355146202705389, + "grad_norm": 395.6950988769531, + "learning_rate": 1.5697160883280757e-06, + "loss": 30.0156, + "step": 2488 + }, + { + "epoch": 0.023560928048768946, + "grad_norm": 464.6424255371094, + "learning_rate": 1.570347003154574e-06, + "loss": 37.2344, + "step": 2489 + }, + { + "epoch": 0.023570394070484, + "grad_norm": 200.7718963623047, + "learning_rate": 1.5709779179810724e-06, + "loss": 25.4375, + "step": 2490 + }, + { + "epoch": 0.02357986009219905, + "grad_norm": 627.3775024414062, + "learning_rate": 1.571608832807571e-06, + "loss": 71.2812, + "step": 2491 + }, + { + "epoch": 0.023589326113914105, + "grad_norm": 741.108642578125, + "learning_rate": 1.5722397476340694e-06, + "loss": 32.0, + "step": 2492 + }, + { + "epoch": 0.023598792135629158, + "grad_norm": 255.10838317871094, + "learning_rate": 1.5728706624605677e-06, + "loss": 27.5156, + "step": 2493 + }, + { + "epoch": 0.023608258157344214, + "grad_norm": 685.4452514648438, + "learning_rate": 1.5735015772870662e-06, + "loss": 30.9297, + "step": 2494 + }, + { + "epoch": 0.023617724179059267, + "grad_norm": 429.3882751464844, + "learning_rate": 1.5741324921135647e-06, + "loss": 32.9062, + "step": 2495 + }, + { + "epoch": 0.02362719020077432, + "grad_norm": 785.0680541992188, + "learning_rate": 1.574763406940063e-06, + "loss": 50.0, + "step": 2496 + }, + { + "epoch": 0.023636656222489373, + "grad_norm": 1134.9925537109375, + "learning_rate": 1.5753943217665615e-06, + "loss": 50.8906, + "step": 2497 + }, + { + "epoch": 0.02364612224420443, + "grad_norm": 380.7203369140625, + "learning_rate": 1.57602523659306e-06, + "loss": 32.5469, + "step": 2498 + }, + { + "epoch": 0.023655588265919483, + "grad_norm": 607.430419921875, + "learning_rate": 1.5766561514195582e-06, + "loss": 42.6562, + "step": 2499 + }, + { + "epoch": 0.023665054287634536, + "grad_norm": 2.920228958129883, + "learning_rate": 1.5772870662460567e-06, + "loss": 0.918, + "step": 2500 + }, + { + "epoch": 0.02367452030934959, + "grad_norm": 620.69482421875, + "learning_rate": 1.5779179810725552e-06, + "loss": 46.3047, + "step": 2501 + }, + { + "epoch": 0.023683986331064645, + "grad_norm": 366.70196533203125, + "learning_rate": 1.5785488958990535e-06, + "loss": 32.9531, + "step": 2502 + }, + { + "epoch": 0.023693452352779698, + "grad_norm": 620.8310546875, + "learning_rate": 1.579179810725552e-06, + "loss": 63.8125, + "step": 2503 + }, + { + "epoch": 0.02370291837449475, + "grad_norm": 1540.321044921875, + "learning_rate": 1.5798107255520503e-06, + "loss": 56.5703, + "step": 2504 + }, + { + "epoch": 0.023712384396209804, + "grad_norm": 270.3115234375, + "learning_rate": 1.5804416403785488e-06, + "loss": 29.4531, + "step": 2505 + }, + { + "epoch": 0.023721850417924857, + "grad_norm": 1041.8233642578125, + "learning_rate": 1.5810725552050473e-06, + "loss": 41.875, + "step": 2506 + }, + { + "epoch": 0.023731316439639914, + "grad_norm": 3.472623109817505, + "learning_rate": 1.5817034700315458e-06, + "loss": 0.9434, + "step": 2507 + }, + { + "epoch": 0.023740782461354967, + "grad_norm": 719.4097290039062, + "learning_rate": 1.5823343848580442e-06, + "loss": 46.6719, + "step": 2508 + }, + { + "epoch": 0.02375024848307002, + "grad_norm": 241.29510498046875, + "learning_rate": 1.5829652996845425e-06, + "loss": 27.2812, + "step": 2509 + }, + { + "epoch": 0.023759714504785073, + "grad_norm": 2.912916898727417, + "learning_rate": 1.5835962145110408e-06, + "loss": 0.8245, + "step": 2510 + }, + { + "epoch": 0.02376918052650013, + "grad_norm": 278.57989501953125, + "learning_rate": 1.5842271293375393e-06, + "loss": 32.125, + "step": 2511 + }, + { + "epoch": 0.023778646548215182, + "grad_norm": 431.1790771484375, + "learning_rate": 1.5848580441640378e-06, + "loss": 29.9844, + "step": 2512 + }, + { + "epoch": 0.023788112569930235, + "grad_norm": 390.9819641113281, + "learning_rate": 1.5854889589905363e-06, + "loss": 40.8438, + "step": 2513 + }, + { + "epoch": 0.023797578591645288, + "grad_norm": 453.63037109375, + "learning_rate": 1.5861198738170348e-06, + "loss": 29.8047, + "step": 2514 + }, + { + "epoch": 0.023807044613360345, + "grad_norm": 744.235595703125, + "learning_rate": 1.586750788643533e-06, + "loss": 55.5781, + "step": 2515 + }, + { + "epoch": 0.023816510635075398, + "grad_norm": 380.7840881347656, + "learning_rate": 1.5873817034700313e-06, + "loss": 32.3438, + "step": 2516 + }, + { + "epoch": 0.02382597665679045, + "grad_norm": 516.2864990234375, + "learning_rate": 1.5880126182965298e-06, + "loss": 40.5625, + "step": 2517 + }, + { + "epoch": 0.023835442678505504, + "grad_norm": 552.4493408203125, + "learning_rate": 1.5886435331230283e-06, + "loss": 34.2344, + "step": 2518 + }, + { + "epoch": 0.023844908700220557, + "grad_norm": 631.7605590820312, + "learning_rate": 1.5892744479495268e-06, + "loss": 47.7188, + "step": 2519 + }, + { + "epoch": 0.023854374721935613, + "grad_norm": 199.6981964111328, + "learning_rate": 1.5899053627760253e-06, + "loss": 25.0781, + "step": 2520 + }, + { + "epoch": 0.023863840743650666, + "grad_norm": 783.0921630859375, + "learning_rate": 1.5905362776025236e-06, + "loss": 68.0, + "step": 2521 + }, + { + "epoch": 0.02387330676536572, + "grad_norm": 413.32989501953125, + "learning_rate": 1.5911671924290219e-06, + "loss": 69.0312, + "step": 2522 + }, + { + "epoch": 0.023882772787080772, + "grad_norm": 672.3653564453125, + "learning_rate": 1.5917981072555204e-06, + "loss": 63.9844, + "step": 2523 + }, + { + "epoch": 0.02389223880879583, + "grad_norm": 305.021240234375, + "learning_rate": 1.5924290220820189e-06, + "loss": 31.7969, + "step": 2524 + }, + { + "epoch": 0.02390170483051088, + "grad_norm": 3.0268609523773193, + "learning_rate": 1.5930599369085174e-06, + "loss": 0.9761, + "step": 2525 + }, + { + "epoch": 0.023911170852225935, + "grad_norm": 445.1587219238281, + "learning_rate": 1.5936908517350156e-06, + "loss": 30.5625, + "step": 2526 + }, + { + "epoch": 0.023920636873940988, + "grad_norm": 817.5070190429688, + "learning_rate": 1.5943217665615141e-06, + "loss": 69.0938, + "step": 2527 + }, + { + "epoch": 0.023930102895656044, + "grad_norm": 1163.7994384765625, + "learning_rate": 1.5949526813880126e-06, + "loss": 61.3984, + "step": 2528 + }, + { + "epoch": 0.023939568917371097, + "grad_norm": 302.4700622558594, + "learning_rate": 1.595583596214511e-06, + "loss": 40.375, + "step": 2529 + }, + { + "epoch": 0.02394903493908615, + "grad_norm": 205.80819702148438, + "learning_rate": 1.5962145110410094e-06, + "loss": 29.9531, + "step": 2530 + }, + { + "epoch": 0.023958500960801203, + "grad_norm": 1089.6424560546875, + "learning_rate": 1.5968454258675079e-06, + "loss": 39.5977, + "step": 2531 + }, + { + "epoch": 0.02396796698251626, + "grad_norm": 227.6798095703125, + "learning_rate": 1.5974763406940062e-06, + "loss": 28.1875, + "step": 2532 + }, + { + "epoch": 0.023977433004231313, + "grad_norm": 631.6924438476562, + "learning_rate": 1.5981072555205047e-06, + "loss": 73.2188, + "step": 2533 + }, + { + "epoch": 0.023986899025946366, + "grad_norm": 280.169677734375, + "learning_rate": 1.5987381703470032e-06, + "loss": 29.6406, + "step": 2534 + }, + { + "epoch": 0.02399636504766142, + "grad_norm": 531.1072998046875, + "learning_rate": 1.5993690851735016e-06, + "loss": 41.125, + "step": 2535 + }, + { + "epoch": 0.02400583106937647, + "grad_norm": 779.2562866210938, + "learning_rate": 1.6e-06, + "loss": 43.6797, + "step": 2536 + }, + { + "epoch": 0.024015297091091528, + "grad_norm": 418.2118835449219, + "learning_rate": 1.6006309148264982e-06, + "loss": 37.9688, + "step": 2537 + }, + { + "epoch": 0.02402476311280658, + "grad_norm": 493.7008972167969, + "learning_rate": 1.6012618296529967e-06, + "loss": 36.6719, + "step": 2538 + }, + { + "epoch": 0.024034229134521634, + "grad_norm": 1895.2301025390625, + "learning_rate": 1.6018927444794952e-06, + "loss": 80.1562, + "step": 2539 + }, + { + "epoch": 0.024043695156236687, + "grad_norm": 351.86474609375, + "learning_rate": 1.6025236593059937e-06, + "loss": 26.5625, + "step": 2540 + }, + { + "epoch": 0.024053161177951744, + "grad_norm": 223.02389526367188, + "learning_rate": 1.6031545741324922e-06, + "loss": 28.7969, + "step": 2541 + }, + { + "epoch": 0.024062627199666797, + "grad_norm": 933.3494262695312, + "learning_rate": 1.6037854889589907e-06, + "loss": 30.3125, + "step": 2542 + }, + { + "epoch": 0.02407209322138185, + "grad_norm": 240.0073699951172, + "learning_rate": 1.6044164037854887e-06, + "loss": 27.6406, + "step": 2543 + }, + { + "epoch": 0.024081559243096903, + "grad_norm": 371.7530212402344, + "learning_rate": 1.6050473186119872e-06, + "loss": 29.6875, + "step": 2544 + }, + { + "epoch": 0.02409102526481196, + "grad_norm": 692.958984375, + "learning_rate": 1.6056782334384857e-06, + "loss": 48.2734, + "step": 2545 + }, + { + "epoch": 0.024100491286527012, + "grad_norm": 306.611328125, + "learning_rate": 1.6063091482649842e-06, + "loss": 27.9531, + "step": 2546 + }, + { + "epoch": 0.024109957308242065, + "grad_norm": 6738.69384765625, + "learning_rate": 1.6069400630914827e-06, + "loss": 90.9375, + "step": 2547 + }, + { + "epoch": 0.024119423329957118, + "grad_norm": 321.13592529296875, + "learning_rate": 1.607570977917981e-06, + "loss": 30.5625, + "step": 2548 + }, + { + "epoch": 0.02412888935167217, + "grad_norm": 1500.3009033203125, + "learning_rate": 1.6082018927444793e-06, + "loss": 80.7812, + "step": 2549 + }, + { + "epoch": 0.024138355373387228, + "grad_norm": 2.677541971206665, + "learning_rate": 1.6088328075709778e-06, + "loss": 0.9751, + "step": 2550 + }, + { + "epoch": 0.02414782139510228, + "grad_norm": 838.2850341796875, + "learning_rate": 1.6094637223974763e-06, + "loss": 68.3125, + "step": 2551 + }, + { + "epoch": 0.024157287416817334, + "grad_norm": 458.2590637207031, + "learning_rate": 1.6100946372239748e-06, + "loss": 32.4219, + "step": 2552 + }, + { + "epoch": 0.024166753438532387, + "grad_norm": 984.072021484375, + "learning_rate": 1.6107255520504732e-06, + "loss": 35.9844, + "step": 2553 + }, + { + "epoch": 0.024176219460247443, + "grad_norm": 716.7880249023438, + "learning_rate": 1.6113564668769715e-06, + "loss": 62.3594, + "step": 2554 + }, + { + "epoch": 0.024185685481962496, + "grad_norm": 2.534846067428589, + "learning_rate": 1.61198738170347e-06, + "loss": 0.8521, + "step": 2555 + }, + { + "epoch": 0.02419515150367755, + "grad_norm": 792.4749145507812, + "learning_rate": 1.6126182965299683e-06, + "loss": 53.6719, + "step": 2556 + }, + { + "epoch": 0.024204617525392602, + "grad_norm": 526.32763671875, + "learning_rate": 1.6132492113564668e-06, + "loss": 32.125, + "step": 2557 + }, + { + "epoch": 0.02421408354710766, + "grad_norm": 635.0516357421875, + "learning_rate": 1.6138801261829653e-06, + "loss": 54.375, + "step": 2558 + }, + { + "epoch": 0.02422354956882271, + "grad_norm": 782.49072265625, + "learning_rate": 1.6145110410094636e-06, + "loss": 39.4375, + "step": 2559 + }, + { + "epoch": 0.024233015590537765, + "grad_norm": 851.5530395507812, + "learning_rate": 1.615141955835962e-06, + "loss": 75.1406, + "step": 2560 + }, + { + "epoch": 0.024242481612252818, + "grad_norm": 1468.772216796875, + "learning_rate": 1.6157728706624606e-06, + "loss": 60.75, + "step": 2561 + }, + { + "epoch": 0.02425194763396787, + "grad_norm": 291.6654968261719, + "learning_rate": 1.616403785488959e-06, + "loss": 36.4844, + "step": 2562 + }, + { + "epoch": 0.024261413655682927, + "grad_norm": 1018.0240478515625, + "learning_rate": 1.6170347003154573e-06, + "loss": 83.0625, + "step": 2563 + }, + { + "epoch": 0.02427087967739798, + "grad_norm": 481.2201843261719, + "learning_rate": 1.6176656151419556e-06, + "loss": 75.4688, + "step": 2564 + }, + { + "epoch": 0.024280345699113033, + "grad_norm": 418.5592956542969, + "learning_rate": 1.618296529968454e-06, + "loss": 31.3438, + "step": 2565 + }, + { + "epoch": 0.024289811720828086, + "grad_norm": 438.0888671875, + "learning_rate": 1.6189274447949526e-06, + "loss": 67.6562, + "step": 2566 + }, + { + "epoch": 0.024299277742543143, + "grad_norm": 698.9397583007812, + "learning_rate": 1.619558359621451e-06, + "loss": 56.8594, + "step": 2567 + }, + { + "epoch": 0.024308743764258196, + "grad_norm": 279.423583984375, + "learning_rate": 1.6201892744479496e-06, + "loss": 30.7656, + "step": 2568 + }, + { + "epoch": 0.02431820978597325, + "grad_norm": 396.282958984375, + "learning_rate": 1.620820189274448e-06, + "loss": 34.7344, + "step": 2569 + }, + { + "epoch": 0.0243276758076883, + "grad_norm": 3.4179627895355225, + "learning_rate": 1.6214511041009461e-06, + "loss": 0.9595, + "step": 2570 + }, + { + "epoch": 0.024337141829403358, + "grad_norm": 548.4927368164062, + "learning_rate": 1.6220820189274446e-06, + "loss": 33.7734, + "step": 2571 + }, + { + "epoch": 0.02434660785111841, + "grad_norm": 422.91705322265625, + "learning_rate": 1.6227129337539431e-06, + "loss": 57.2344, + "step": 2572 + }, + { + "epoch": 0.024356073872833464, + "grad_norm": 3.200866222381592, + "learning_rate": 1.6233438485804416e-06, + "loss": 0.9248, + "step": 2573 + }, + { + "epoch": 0.024365539894548517, + "grad_norm": 508.5421142578125, + "learning_rate": 1.6239747634069401e-06, + "loss": 39.1328, + "step": 2574 + }, + { + "epoch": 0.024375005916263574, + "grad_norm": 699.3915405273438, + "learning_rate": 1.6246056782334384e-06, + "loss": 78.625, + "step": 2575 + }, + { + "epoch": 0.024384471937978627, + "grad_norm": 325.57568359375, + "learning_rate": 1.6252365930599367e-06, + "loss": 30.2188, + "step": 2576 + }, + { + "epoch": 0.02439393795969368, + "grad_norm": 361.4538269042969, + "learning_rate": 1.6258675078864352e-06, + "loss": 29.0625, + "step": 2577 + }, + { + "epoch": 0.024403403981408733, + "grad_norm": 653.7435913085938, + "learning_rate": 1.6264984227129337e-06, + "loss": 77.5938, + "step": 2578 + }, + { + "epoch": 0.024412870003123786, + "grad_norm": 1085.3311767578125, + "learning_rate": 1.6271293375394322e-06, + "loss": 43.375, + "step": 2579 + }, + { + "epoch": 0.024422336024838842, + "grad_norm": 282.14892578125, + "learning_rate": 1.6277602523659306e-06, + "loss": 36.2656, + "step": 2580 + }, + { + "epoch": 0.024431802046553895, + "grad_norm": 612.9072875976562, + "learning_rate": 1.628391167192429e-06, + "loss": 58.8125, + "step": 2581 + }, + { + "epoch": 0.024441268068268948, + "grad_norm": 418.5953063964844, + "learning_rate": 1.6290220820189274e-06, + "loss": 39.5, + "step": 2582 + }, + { + "epoch": 0.024450734089984, + "grad_norm": 1411.4146728515625, + "learning_rate": 1.6296529968454257e-06, + "loss": 131.5, + "step": 2583 + }, + { + "epoch": 0.024460200111699058, + "grad_norm": 369.2118225097656, + "learning_rate": 1.6302839116719242e-06, + "loss": 35.25, + "step": 2584 + }, + { + "epoch": 0.02446966613341411, + "grad_norm": 588.639404296875, + "learning_rate": 1.6309148264984227e-06, + "loss": 39.2969, + "step": 2585 + }, + { + "epoch": 0.024479132155129164, + "grad_norm": 295.5313415527344, + "learning_rate": 1.631545741324921e-06, + "loss": 29.8125, + "step": 2586 + }, + { + "epoch": 0.024488598176844217, + "grad_norm": 284.24444580078125, + "learning_rate": 1.6321766561514195e-06, + "loss": 31.7969, + "step": 2587 + }, + { + "epoch": 0.024498064198559273, + "grad_norm": 783.0303955078125, + "learning_rate": 1.632807570977918e-06, + "loss": 75.9688, + "step": 2588 + }, + { + "epoch": 0.024507530220274326, + "grad_norm": 3.2457735538482666, + "learning_rate": 1.6334384858044164e-06, + "loss": 0.835, + "step": 2589 + }, + { + "epoch": 0.02451699624198938, + "grad_norm": 255.68154907226562, + "learning_rate": 1.6340694006309147e-06, + "loss": 31.2656, + "step": 2590 + }, + { + "epoch": 0.024526462263704432, + "grad_norm": 474.560791015625, + "learning_rate": 1.6347003154574132e-06, + "loss": 35.0781, + "step": 2591 + }, + { + "epoch": 0.024535928285419485, + "grad_norm": 712.1465454101562, + "learning_rate": 1.6353312302839115e-06, + "loss": 58.5625, + "step": 2592 + }, + { + "epoch": 0.02454539430713454, + "grad_norm": 812.2486572265625, + "learning_rate": 1.63596214511041e-06, + "loss": 90.7812, + "step": 2593 + }, + { + "epoch": 0.024554860328849595, + "grad_norm": 981.7937622070312, + "learning_rate": 1.6365930599369085e-06, + "loss": 65.0547, + "step": 2594 + }, + { + "epoch": 0.024564326350564648, + "grad_norm": 760.3937377929688, + "learning_rate": 1.637223974763407e-06, + "loss": 80.0781, + "step": 2595 + }, + { + "epoch": 0.0245737923722797, + "grad_norm": 940.0902709960938, + "learning_rate": 1.6378548895899053e-06, + "loss": 56.2422, + "step": 2596 + }, + { + "epoch": 0.024583258393994757, + "grad_norm": 632.6734619140625, + "learning_rate": 1.6384858044164035e-06, + "loss": 50.7734, + "step": 2597 + }, + { + "epoch": 0.02459272441570981, + "grad_norm": 459.5621643066406, + "learning_rate": 1.639116719242902e-06, + "loss": 35.7656, + "step": 2598 + }, + { + "epoch": 0.024602190437424863, + "grad_norm": 387.0630187988281, + "learning_rate": 1.6397476340694005e-06, + "loss": 57.5312, + "step": 2599 + }, + { + "epoch": 0.024611656459139916, + "grad_norm": 2.4796934127807617, + "learning_rate": 1.640378548895899e-06, + "loss": 0.8801, + "step": 2600 + }, + { + "epoch": 0.024621122480854973, + "grad_norm": 427.1966247558594, + "learning_rate": 1.6410094637223975e-06, + "loss": 36.3594, + "step": 2601 + }, + { + "epoch": 0.024630588502570026, + "grad_norm": 760.2817993164062, + "learning_rate": 1.641640378548896e-06, + "loss": 78.3438, + "step": 2602 + }, + { + "epoch": 0.02464005452428508, + "grad_norm": 612.71533203125, + "learning_rate": 1.642271293375394e-06, + "loss": 33.2969, + "step": 2603 + }, + { + "epoch": 0.02464952054600013, + "grad_norm": 451.7579650878906, + "learning_rate": 1.6429022082018926e-06, + "loss": 64.7031, + "step": 2604 + }, + { + "epoch": 0.024658986567715185, + "grad_norm": 795.85400390625, + "learning_rate": 1.643533123028391e-06, + "loss": 44.5781, + "step": 2605 + }, + { + "epoch": 0.02466845258943024, + "grad_norm": 547.9177856445312, + "learning_rate": 1.6441640378548896e-06, + "loss": 50.6875, + "step": 2606 + }, + { + "epoch": 0.024677918611145294, + "grad_norm": 285.63006591796875, + "learning_rate": 1.644794952681388e-06, + "loss": 28.5, + "step": 2607 + }, + { + "epoch": 0.024687384632860347, + "grad_norm": 387.19427490234375, + "learning_rate": 1.6454258675078863e-06, + "loss": 51.4531, + "step": 2608 + }, + { + "epoch": 0.0246968506545754, + "grad_norm": 408.64031982421875, + "learning_rate": 1.6460567823343848e-06, + "loss": 37.9219, + "step": 2609 + }, + { + "epoch": 0.024706316676290457, + "grad_norm": 238.26893615722656, + "learning_rate": 1.646687697160883e-06, + "loss": 24.2188, + "step": 2610 + }, + { + "epoch": 0.02471578269800551, + "grad_norm": 858.5697631835938, + "learning_rate": 1.6473186119873816e-06, + "loss": 44.0156, + "step": 2611 + }, + { + "epoch": 0.024725248719720563, + "grad_norm": 270.4810485839844, + "learning_rate": 1.64794952681388e-06, + "loss": 31.3438, + "step": 2612 + }, + { + "epoch": 0.024734714741435616, + "grad_norm": 723.4447631835938, + "learning_rate": 1.6485804416403786e-06, + "loss": 72.5938, + "step": 2613 + }, + { + "epoch": 0.024744180763150672, + "grad_norm": 3.218783140182495, + "learning_rate": 1.6492113564668769e-06, + "loss": 1.0483, + "step": 2614 + }, + { + "epoch": 0.024753646784865725, + "grad_norm": 266.5059814453125, + "learning_rate": 1.6498422712933754e-06, + "loss": 26.2031, + "step": 2615 + }, + { + "epoch": 0.024763112806580778, + "grad_norm": 559.175537109375, + "learning_rate": 1.6504731861198736e-06, + "loss": 57.8125, + "step": 2616 + }, + { + "epoch": 0.02477257882829583, + "grad_norm": 254.09620666503906, + "learning_rate": 1.6511041009463721e-06, + "loss": 29.2031, + "step": 2617 + }, + { + "epoch": 0.024782044850010884, + "grad_norm": 424.2607421875, + "learning_rate": 1.6517350157728706e-06, + "loss": 34.7188, + "step": 2618 + }, + { + "epoch": 0.02479151087172594, + "grad_norm": 910.9334106445312, + "learning_rate": 1.652365930599369e-06, + "loss": 35.1094, + "step": 2619 + }, + { + "epoch": 0.024800976893440994, + "grad_norm": 171.67263793945312, + "learning_rate": 1.6529968454258674e-06, + "loss": 30.0781, + "step": 2620 + }, + { + "epoch": 0.024810442915156047, + "grad_norm": 530.7036743164062, + "learning_rate": 1.6536277602523659e-06, + "loss": 44.5625, + "step": 2621 + }, + { + "epoch": 0.0248199089368711, + "grad_norm": 167.5481414794922, + "learning_rate": 1.6542586750788644e-06, + "loss": 29.8125, + "step": 2622 + }, + { + "epoch": 0.024829374958586156, + "grad_norm": 209.80296325683594, + "learning_rate": 1.6548895899053627e-06, + "loss": 27.3594, + "step": 2623 + }, + { + "epoch": 0.02483884098030121, + "grad_norm": 470.37811279296875, + "learning_rate": 1.6555205047318612e-06, + "loss": 29.2266, + "step": 2624 + }, + { + "epoch": 0.024848307002016262, + "grad_norm": 591.8447875976562, + "learning_rate": 1.6561514195583594e-06, + "loss": 53.3125, + "step": 2625 + }, + { + "epoch": 0.024857773023731315, + "grad_norm": 551.34716796875, + "learning_rate": 1.656782334384858e-06, + "loss": 54.5625, + "step": 2626 + }, + { + "epoch": 0.02486723904544637, + "grad_norm": 286.8594970703125, + "learning_rate": 1.6574132492113564e-06, + "loss": 28.5156, + "step": 2627 + }, + { + "epoch": 0.024876705067161425, + "grad_norm": 392.2592468261719, + "learning_rate": 1.658044164037855e-06, + "loss": 55.0938, + "step": 2628 + }, + { + "epoch": 0.024886171088876478, + "grad_norm": 596.5138549804688, + "learning_rate": 1.6586750788643534e-06, + "loss": 33.5156, + "step": 2629 + }, + { + "epoch": 0.02489563711059153, + "grad_norm": 198.96395874023438, + "learning_rate": 1.6593059936908515e-06, + "loss": 30.2031, + "step": 2630 + }, + { + "epoch": 0.024905103132306587, + "grad_norm": 1028.7899169921875, + "learning_rate": 1.65993690851735e-06, + "loss": 59.875, + "step": 2631 + }, + { + "epoch": 0.02491456915402164, + "grad_norm": 404.69757080078125, + "learning_rate": 1.6605678233438485e-06, + "loss": 29.3438, + "step": 2632 + }, + { + "epoch": 0.024924035175736693, + "grad_norm": 147.0840301513672, + "learning_rate": 1.661198738170347e-06, + "loss": 24.3438, + "step": 2633 + }, + { + "epoch": 0.024933501197451746, + "grad_norm": 326.2648010253906, + "learning_rate": 1.6618296529968454e-06, + "loss": 37.1406, + "step": 2634 + }, + { + "epoch": 0.0249429672191668, + "grad_norm": 1136.46142578125, + "learning_rate": 1.662460567823344e-06, + "loss": 54.6641, + "step": 2635 + }, + { + "epoch": 0.024952433240881856, + "grad_norm": 810.0060424804688, + "learning_rate": 1.663091482649842e-06, + "loss": 31.2344, + "step": 2636 + }, + { + "epoch": 0.02496189926259691, + "grad_norm": 650.9986572265625, + "learning_rate": 1.6637223974763405e-06, + "loss": 65.375, + "step": 2637 + }, + { + "epoch": 0.02497136528431196, + "grad_norm": 374.7366638183594, + "learning_rate": 1.664353312302839e-06, + "loss": 32.0391, + "step": 2638 + }, + { + "epoch": 0.024980831306027015, + "grad_norm": 695.9627075195312, + "learning_rate": 1.6649842271293375e-06, + "loss": 28.0938, + "step": 2639 + }, + { + "epoch": 0.02499029732774207, + "grad_norm": 513.5623779296875, + "learning_rate": 1.665615141955836e-06, + "loss": 34.4688, + "step": 2640 + }, + { + "epoch": 0.024999763349457124, + "grad_norm": 314.4490661621094, + "learning_rate": 1.6662460567823343e-06, + "loss": 36.2656, + "step": 2641 + }, + { + "epoch": 0.025009229371172177, + "grad_norm": 2.8502538204193115, + "learning_rate": 1.6668769716088328e-06, + "loss": 0.8604, + "step": 2642 + }, + { + "epoch": 0.02501869539288723, + "grad_norm": 558.7369384765625, + "learning_rate": 1.667507886435331e-06, + "loss": 56.5469, + "step": 2643 + }, + { + "epoch": 0.025028161414602287, + "grad_norm": 180.07810974121094, + "learning_rate": 1.6681388012618295e-06, + "loss": 25.4844, + "step": 2644 + }, + { + "epoch": 0.02503762743631734, + "grad_norm": 282.7625427246094, + "learning_rate": 1.668769716088328e-06, + "loss": 30.4688, + "step": 2645 + }, + { + "epoch": 0.025047093458032393, + "grad_norm": 403.6738586425781, + "learning_rate": 1.6694006309148265e-06, + "loss": 40.7812, + "step": 2646 + }, + { + "epoch": 0.025056559479747446, + "grad_norm": 379.2559814453125, + "learning_rate": 1.6700315457413248e-06, + "loss": 33.2969, + "step": 2647 + }, + { + "epoch": 0.0250660255014625, + "grad_norm": 351.2530212402344, + "learning_rate": 1.6706624605678233e-06, + "loss": 30.9375, + "step": 2648 + }, + { + "epoch": 0.025075491523177555, + "grad_norm": 322.33331298828125, + "learning_rate": 1.6712933753943218e-06, + "loss": 43.3281, + "step": 2649 + }, + { + "epoch": 0.025084957544892608, + "grad_norm": 446.025390625, + "learning_rate": 1.67192429022082e-06, + "loss": 31.4844, + "step": 2650 + }, + { + "epoch": 0.02509442356660766, + "grad_norm": 319.3594055175781, + "learning_rate": 1.6725552050473186e-06, + "loss": 29.1875, + "step": 2651 + }, + { + "epoch": 0.025103889588322714, + "grad_norm": 3.5256600379943848, + "learning_rate": 1.6731861198738168e-06, + "loss": 0.8628, + "step": 2652 + }, + { + "epoch": 0.02511335561003777, + "grad_norm": 280.93890380859375, + "learning_rate": 1.6738170347003153e-06, + "loss": 30.3906, + "step": 2653 + }, + { + "epoch": 0.025122821631752824, + "grad_norm": 611.7548828125, + "learning_rate": 1.6744479495268138e-06, + "loss": 50.5703, + "step": 2654 + }, + { + "epoch": 0.025132287653467877, + "grad_norm": 819.0433349609375, + "learning_rate": 1.6750788643533123e-06, + "loss": 36.4375, + "step": 2655 + }, + { + "epoch": 0.02514175367518293, + "grad_norm": 3.115039587020874, + "learning_rate": 1.6757097791798108e-06, + "loss": 0.9321, + "step": 2656 + }, + { + "epoch": 0.025151219696897986, + "grad_norm": 249.7755889892578, + "learning_rate": 1.676340694006309e-06, + "loss": 27.2969, + "step": 2657 + }, + { + "epoch": 0.02516068571861304, + "grad_norm": 490.6873474121094, + "learning_rate": 1.6769716088328074e-06, + "loss": 39.2031, + "step": 2658 + }, + { + "epoch": 0.025170151740328092, + "grad_norm": 255.0899200439453, + "learning_rate": 1.6776025236593059e-06, + "loss": 28.1562, + "step": 2659 + }, + { + "epoch": 0.025179617762043145, + "grad_norm": 499.3311767578125, + "learning_rate": 1.6782334384858044e-06, + "loss": 54.5469, + "step": 2660 + }, + { + "epoch": 0.025189083783758198, + "grad_norm": 748.7826538085938, + "learning_rate": 1.6788643533123028e-06, + "loss": 49.4453, + "step": 2661 + }, + { + "epoch": 0.025198549805473255, + "grad_norm": 202.21563720703125, + "learning_rate": 1.6794952681388013e-06, + "loss": 29.4688, + "step": 2662 + }, + { + "epoch": 0.025208015827188308, + "grad_norm": 527.5892944335938, + "learning_rate": 1.6801261829652994e-06, + "loss": 70.0, + "step": 2663 + }, + { + "epoch": 0.02521748184890336, + "grad_norm": 297.2985534667969, + "learning_rate": 1.680757097791798e-06, + "loss": 29.4844, + "step": 2664 + }, + { + "epoch": 0.025226947870618414, + "grad_norm": 496.70867919921875, + "learning_rate": 1.6813880126182964e-06, + "loss": 30.2266, + "step": 2665 + }, + { + "epoch": 0.02523641389233347, + "grad_norm": 281.6521911621094, + "learning_rate": 1.6820189274447949e-06, + "loss": 34.2969, + "step": 2666 + }, + { + "epoch": 0.025245879914048523, + "grad_norm": 355.12335205078125, + "learning_rate": 1.6826498422712934e-06, + "loss": 46.7266, + "step": 2667 + }, + { + "epoch": 0.025255345935763576, + "grad_norm": 694.3997192382812, + "learning_rate": 1.6832807570977919e-06, + "loss": 44.2969, + "step": 2668 + }, + { + "epoch": 0.02526481195747863, + "grad_norm": 373.8608093261719, + "learning_rate": 1.6839116719242902e-06, + "loss": 30.2188, + "step": 2669 + }, + { + "epoch": 0.025274277979193686, + "grad_norm": 608.4727783203125, + "learning_rate": 1.6845425867507884e-06, + "loss": 56.0625, + "step": 2670 + }, + { + "epoch": 0.02528374400090874, + "grad_norm": 900.3016967773438, + "learning_rate": 1.685173501577287e-06, + "loss": 59.4531, + "step": 2671 + }, + { + "epoch": 0.02529321002262379, + "grad_norm": 227.3461151123047, + "learning_rate": 1.6858044164037854e-06, + "loss": 27.6875, + "step": 2672 + }, + { + "epoch": 0.025302676044338845, + "grad_norm": 519.3485107421875, + "learning_rate": 1.686435331230284e-06, + "loss": 31.3359, + "step": 2673 + }, + { + "epoch": 0.0253121420660539, + "grad_norm": 358.24005126953125, + "learning_rate": 1.6870662460567822e-06, + "loss": 34.1953, + "step": 2674 + }, + { + "epoch": 0.025321608087768954, + "grad_norm": 168.932861328125, + "learning_rate": 1.6876971608832807e-06, + "loss": 27.8125, + "step": 2675 + }, + { + "epoch": 0.025331074109484007, + "grad_norm": 546.76904296875, + "learning_rate": 1.6883280757097792e-06, + "loss": 40.8125, + "step": 2676 + }, + { + "epoch": 0.02534054013119906, + "grad_norm": 1566.366455078125, + "learning_rate": 1.6889589905362775e-06, + "loss": 65.125, + "step": 2677 + }, + { + "epoch": 0.025350006152914113, + "grad_norm": 614.32763671875, + "learning_rate": 1.689589905362776e-06, + "loss": 69.7969, + "step": 2678 + }, + { + "epoch": 0.02535947217462917, + "grad_norm": 497.68304443359375, + "learning_rate": 1.6902208201892744e-06, + "loss": 45.4375, + "step": 2679 + }, + { + "epoch": 0.025368938196344223, + "grad_norm": 495.5487976074219, + "learning_rate": 1.6908517350157727e-06, + "loss": 30.375, + "step": 2680 + }, + { + "epoch": 0.025378404218059276, + "grad_norm": 471.08636474609375, + "learning_rate": 1.6914826498422712e-06, + "loss": 37.4062, + "step": 2681 + }, + { + "epoch": 0.02538787023977433, + "grad_norm": 272.4569091796875, + "learning_rate": 1.6921135646687697e-06, + "loss": 28.8203, + "step": 2682 + }, + { + "epoch": 0.025397336261489385, + "grad_norm": 279.0712585449219, + "learning_rate": 1.6927444794952682e-06, + "loss": 27.3438, + "step": 2683 + }, + { + "epoch": 0.025406802283204438, + "grad_norm": 1166.0223388671875, + "learning_rate": 1.6933753943217665e-06, + "loss": 76.125, + "step": 2684 + }, + { + "epoch": 0.02541626830491949, + "grad_norm": 984.75390625, + "learning_rate": 1.6940063091482648e-06, + "loss": 71.75, + "step": 2685 + }, + { + "epoch": 0.025425734326634544, + "grad_norm": 239.48341369628906, + "learning_rate": 1.6946372239747633e-06, + "loss": 36.7656, + "step": 2686 + }, + { + "epoch": 0.0254352003483496, + "grad_norm": 451.8413391113281, + "learning_rate": 1.6952681388012618e-06, + "loss": 37.4844, + "step": 2687 + }, + { + "epoch": 0.025444666370064654, + "grad_norm": 852.718505859375, + "learning_rate": 1.6958990536277602e-06, + "loss": 70.8281, + "step": 2688 + }, + { + "epoch": 0.025454132391779707, + "grad_norm": 464.1305847167969, + "learning_rate": 1.6965299684542587e-06, + "loss": 37.8281, + "step": 2689 + }, + { + "epoch": 0.02546359841349476, + "grad_norm": 434.5806884765625, + "learning_rate": 1.697160883280757e-06, + "loss": 33.2031, + "step": 2690 + }, + { + "epoch": 0.025473064435209813, + "grad_norm": 472.73712158203125, + "learning_rate": 1.6977917981072553e-06, + "loss": 43.4375, + "step": 2691 + }, + { + "epoch": 0.02548253045692487, + "grad_norm": 411.58245849609375, + "learning_rate": 1.6984227129337538e-06, + "loss": 28.8281, + "step": 2692 + }, + { + "epoch": 0.025491996478639922, + "grad_norm": 641.1148681640625, + "learning_rate": 1.6990536277602523e-06, + "loss": 64.2812, + "step": 2693 + }, + { + "epoch": 0.025501462500354975, + "grad_norm": 958.8942260742188, + "learning_rate": 1.6996845425867508e-06, + "loss": 33.6172, + "step": 2694 + }, + { + "epoch": 0.025510928522070028, + "grad_norm": 202.77438354492188, + "learning_rate": 1.7003154574132493e-06, + "loss": 25.4688, + "step": 2695 + }, + { + "epoch": 0.025520394543785085, + "grad_norm": 598.4524536132812, + "learning_rate": 1.7009463722397476e-06, + "loss": 58.1875, + "step": 2696 + }, + { + "epoch": 0.025529860565500138, + "grad_norm": 985.9261474609375, + "learning_rate": 1.7015772870662458e-06, + "loss": 81.9062, + "step": 2697 + }, + { + "epoch": 0.02553932658721519, + "grad_norm": 318.88177490234375, + "learning_rate": 1.7022082018927443e-06, + "loss": 31.4219, + "step": 2698 + }, + { + "epoch": 0.025548792608930244, + "grad_norm": 710.455322265625, + "learning_rate": 1.7028391167192428e-06, + "loss": 48.3594, + "step": 2699 + }, + { + "epoch": 0.0255582586306453, + "grad_norm": 286.2360534667969, + "learning_rate": 1.7034700315457413e-06, + "loss": 26.4062, + "step": 2700 + }, + { + "epoch": 0.025567724652360353, + "grad_norm": 1156.4337158203125, + "learning_rate": 1.7041009463722398e-06, + "loss": 70.1641, + "step": 2701 + }, + { + "epoch": 0.025577190674075406, + "grad_norm": 323.4486083984375, + "learning_rate": 1.704731861198738e-06, + "loss": 39.9062, + "step": 2702 + }, + { + "epoch": 0.02558665669579046, + "grad_norm": 323.2873229980469, + "learning_rate": 1.7053627760252366e-06, + "loss": 31.8438, + "step": 2703 + }, + { + "epoch": 0.025596122717505512, + "grad_norm": 682.8297729492188, + "learning_rate": 1.7059936908517349e-06, + "loss": 68.875, + "step": 2704 + }, + { + "epoch": 0.02560558873922057, + "grad_norm": 373.12109375, + "learning_rate": 1.7066246056782334e-06, + "loss": 49.6094, + "step": 2705 + }, + { + "epoch": 0.025615054760935622, + "grad_norm": 618.4752197265625, + "learning_rate": 1.7072555205047318e-06, + "loss": 40.4062, + "step": 2706 + }, + { + "epoch": 0.025624520782650675, + "grad_norm": 475.98345947265625, + "learning_rate": 1.7078864353312301e-06, + "loss": 39.7422, + "step": 2707 + }, + { + "epoch": 0.025633986804365728, + "grad_norm": 290.0970153808594, + "learning_rate": 1.7085173501577286e-06, + "loss": 35.0, + "step": 2708 + }, + { + "epoch": 0.025643452826080784, + "grad_norm": 844.6924438476562, + "learning_rate": 1.7091482649842271e-06, + "loss": 33.4922, + "step": 2709 + }, + { + "epoch": 0.025652918847795837, + "grad_norm": 727.2000122070312, + "learning_rate": 1.7097791798107254e-06, + "loss": 60.6406, + "step": 2710 + }, + { + "epoch": 0.02566238486951089, + "grad_norm": 572.1240844726562, + "learning_rate": 1.7104100946372239e-06, + "loss": 76.6953, + "step": 2711 + }, + { + "epoch": 0.025671850891225943, + "grad_norm": 626.849853515625, + "learning_rate": 1.7110410094637224e-06, + "loss": 53.6484, + "step": 2712 + }, + { + "epoch": 0.025681316912941, + "grad_norm": 400.2272644042969, + "learning_rate": 1.7116719242902207e-06, + "loss": 45.1406, + "step": 2713 + }, + { + "epoch": 0.025690782934656053, + "grad_norm": 544.2955932617188, + "learning_rate": 1.7123028391167192e-06, + "loss": 64.5, + "step": 2714 + }, + { + "epoch": 0.025700248956371106, + "grad_norm": 542.7023315429688, + "learning_rate": 1.7129337539432176e-06, + "loss": 34.8125, + "step": 2715 + }, + { + "epoch": 0.02570971497808616, + "grad_norm": 243.9163360595703, + "learning_rate": 1.7135646687697161e-06, + "loss": 35.3281, + "step": 2716 + }, + { + "epoch": 0.025719180999801215, + "grad_norm": 2.527618646621704, + "learning_rate": 1.7141955835962144e-06, + "loss": 0.9043, + "step": 2717 + }, + { + "epoch": 0.025728647021516268, + "grad_norm": 2.8647079467773438, + "learning_rate": 1.7148264984227127e-06, + "loss": 0.7739, + "step": 2718 + }, + { + "epoch": 0.02573811304323132, + "grad_norm": 1140.50537109375, + "learning_rate": 1.7154574132492112e-06, + "loss": 53.2031, + "step": 2719 + }, + { + "epoch": 0.025747579064946374, + "grad_norm": 638.27978515625, + "learning_rate": 1.7160883280757097e-06, + "loss": 46.9688, + "step": 2720 + }, + { + "epoch": 0.025757045086661427, + "grad_norm": 378.2288818359375, + "learning_rate": 1.7167192429022082e-06, + "loss": 35.25, + "step": 2721 + }, + { + "epoch": 0.025766511108376484, + "grad_norm": 240.7015838623047, + "learning_rate": 1.7173501577287067e-06, + "loss": 26.5469, + "step": 2722 + }, + { + "epoch": 0.025775977130091537, + "grad_norm": 355.5525207519531, + "learning_rate": 1.7179810725552052e-06, + "loss": 56.7344, + "step": 2723 + }, + { + "epoch": 0.02578544315180659, + "grad_norm": 704.893310546875, + "learning_rate": 1.7186119873817032e-06, + "loss": 31.9844, + "step": 2724 + }, + { + "epoch": 0.025794909173521643, + "grad_norm": 1154.3360595703125, + "learning_rate": 1.7192429022082017e-06, + "loss": 88.2969, + "step": 2725 + }, + { + "epoch": 0.0258043751952367, + "grad_norm": 477.3113708496094, + "learning_rate": 1.7198738170347002e-06, + "loss": 45.9375, + "step": 2726 + }, + { + "epoch": 0.025813841216951752, + "grad_norm": 306.5318603515625, + "learning_rate": 1.7205047318611987e-06, + "loss": 26.4844, + "step": 2727 + }, + { + "epoch": 0.025823307238666805, + "grad_norm": 317.53155517578125, + "learning_rate": 1.7211356466876972e-06, + "loss": 31.1094, + "step": 2728 + }, + { + "epoch": 0.02583277326038186, + "grad_norm": 650.7823486328125, + "learning_rate": 1.7217665615141955e-06, + "loss": 65.7812, + "step": 2729 + }, + { + "epoch": 0.025842239282096915, + "grad_norm": 656.7516479492188, + "learning_rate": 1.722397476340694e-06, + "loss": 55.4219, + "step": 2730 + }, + { + "epoch": 0.025851705303811968, + "grad_norm": 872.271240234375, + "learning_rate": 1.7230283911671923e-06, + "loss": 49.3047, + "step": 2731 + }, + { + "epoch": 0.02586117132552702, + "grad_norm": 3.0492806434631348, + "learning_rate": 1.7236593059936908e-06, + "loss": 0.9639, + "step": 2732 + }, + { + "epoch": 0.025870637347242074, + "grad_norm": 263.12451171875, + "learning_rate": 1.7242902208201892e-06, + "loss": 28.7656, + "step": 2733 + }, + { + "epoch": 0.025880103368957127, + "grad_norm": 362.71343994140625, + "learning_rate": 1.7249211356466875e-06, + "loss": 37.875, + "step": 2734 + }, + { + "epoch": 0.025889569390672183, + "grad_norm": 185.2198486328125, + "learning_rate": 1.725552050473186e-06, + "loss": 23.9219, + "step": 2735 + }, + { + "epoch": 0.025899035412387236, + "grad_norm": 273.0008239746094, + "learning_rate": 1.7261829652996845e-06, + "loss": 26.2188, + "step": 2736 + }, + { + "epoch": 0.02590850143410229, + "grad_norm": 525.3977661132812, + "learning_rate": 1.7268138801261828e-06, + "loss": 40.0312, + "step": 2737 + }, + { + "epoch": 0.025917967455817342, + "grad_norm": 170.51141357421875, + "learning_rate": 1.7274447949526813e-06, + "loss": 25.25, + "step": 2738 + }, + { + "epoch": 0.0259274334775324, + "grad_norm": 3.146193027496338, + "learning_rate": 1.7280757097791798e-06, + "loss": 0.9331, + "step": 2739 + }, + { + "epoch": 0.025936899499247452, + "grad_norm": 3.0030317306518555, + "learning_rate": 1.728706624605678e-06, + "loss": 0.9316, + "step": 2740 + }, + { + "epoch": 0.025946365520962505, + "grad_norm": 3.310807228088379, + "learning_rate": 1.7293375394321766e-06, + "loss": 0.73, + "step": 2741 + }, + { + "epoch": 0.025955831542677558, + "grad_norm": 218.32090759277344, + "learning_rate": 1.729968454258675e-06, + "loss": 25.4062, + "step": 2742 + }, + { + "epoch": 0.025965297564392614, + "grad_norm": 1039.82373046875, + "learning_rate": 1.7305993690851735e-06, + "loss": 43.6719, + "step": 2743 + }, + { + "epoch": 0.025974763586107667, + "grad_norm": 478.22412109375, + "learning_rate": 1.7312302839116718e-06, + "loss": 32.4375, + "step": 2744 + }, + { + "epoch": 0.02598422960782272, + "grad_norm": 550.2298583984375, + "learning_rate": 1.73186119873817e-06, + "loss": 34.2969, + "step": 2745 + }, + { + "epoch": 0.025993695629537773, + "grad_norm": 546.6349487304688, + "learning_rate": 1.7324921135646686e-06, + "loss": 28.6172, + "step": 2746 + }, + { + "epoch": 0.026003161651252826, + "grad_norm": 406.2030334472656, + "learning_rate": 1.733123028391167e-06, + "loss": 38.7969, + "step": 2747 + }, + { + "epoch": 0.026012627672967883, + "grad_norm": 617.5592651367188, + "learning_rate": 1.7337539432176656e-06, + "loss": 47.3125, + "step": 2748 + }, + { + "epoch": 0.026022093694682936, + "grad_norm": 945.5282592773438, + "learning_rate": 1.734384858044164e-06, + "loss": 33.7344, + "step": 2749 + }, + { + "epoch": 0.02603155971639799, + "grad_norm": 799.9861450195312, + "learning_rate": 1.7350157728706626e-06, + "loss": 55.6875, + "step": 2750 + }, + { + "epoch": 0.026041025738113042, + "grad_norm": 746.1524047851562, + "learning_rate": 1.7356466876971606e-06, + "loss": 55.6406, + "step": 2751 + }, + { + "epoch": 0.0260504917598281, + "grad_norm": 529.6326904296875, + "learning_rate": 1.7362776025236591e-06, + "loss": 24.7656, + "step": 2752 + }, + { + "epoch": 0.02605995778154315, + "grad_norm": 217.0042266845703, + "learning_rate": 1.7369085173501576e-06, + "loss": 25.7969, + "step": 2753 + }, + { + "epoch": 0.026069423803258204, + "grad_norm": 585.0179443359375, + "learning_rate": 1.7375394321766561e-06, + "loss": 38.5, + "step": 2754 + }, + { + "epoch": 0.026078889824973257, + "grad_norm": 3.275090217590332, + "learning_rate": 1.7381703470031546e-06, + "loss": 0.96, + "step": 2755 + }, + { + "epoch": 0.026088355846688314, + "grad_norm": 979.0383911132812, + "learning_rate": 1.7388012618296529e-06, + "loss": 56.3359, + "step": 2756 + }, + { + "epoch": 0.026097821868403367, + "grad_norm": 1065.514892578125, + "learning_rate": 1.7394321766561512e-06, + "loss": 40.7031, + "step": 2757 + }, + { + "epoch": 0.02610728789011842, + "grad_norm": 482.52789306640625, + "learning_rate": 1.7400630914826497e-06, + "loss": 39.8594, + "step": 2758 + }, + { + "epoch": 0.026116753911833473, + "grad_norm": 397.1623229980469, + "learning_rate": 1.7406940063091482e-06, + "loss": 27.0625, + "step": 2759 + }, + { + "epoch": 0.02612621993354853, + "grad_norm": 3.2249345779418945, + "learning_rate": 1.7413249211356467e-06, + "loss": 1.0259, + "step": 2760 + }, + { + "epoch": 0.026135685955263582, + "grad_norm": 203.3842315673828, + "learning_rate": 1.7419558359621451e-06, + "loss": 29.1406, + "step": 2761 + }, + { + "epoch": 0.026145151976978635, + "grad_norm": 324.55291748046875, + "learning_rate": 1.7425867507886434e-06, + "loss": 36.7344, + "step": 2762 + }, + { + "epoch": 0.02615461799869369, + "grad_norm": 901.9595947265625, + "learning_rate": 1.743217665615142e-06, + "loss": 40.1719, + "step": 2763 + }, + { + "epoch": 0.02616408402040874, + "grad_norm": 409.9023742675781, + "learning_rate": 1.7438485804416402e-06, + "loss": 25.5156, + "step": 2764 + }, + { + "epoch": 0.026173550042123798, + "grad_norm": 983.4058837890625, + "learning_rate": 1.7444794952681387e-06, + "loss": 60.75, + "step": 2765 + }, + { + "epoch": 0.02618301606383885, + "grad_norm": 186.9498291015625, + "learning_rate": 1.7451104100946372e-06, + "loss": 30.5781, + "step": 2766 + }, + { + "epoch": 0.026192482085553904, + "grad_norm": 645.4475708007812, + "learning_rate": 1.7457413249211355e-06, + "loss": 38.9844, + "step": 2767 + }, + { + "epoch": 0.026201948107268957, + "grad_norm": 461.9501037597656, + "learning_rate": 1.746372239747634e-06, + "loss": 28.625, + "step": 2768 + }, + { + "epoch": 0.026211414128984013, + "grad_norm": 591.8121948242188, + "learning_rate": 1.7470031545741325e-06, + "loss": 35.2031, + "step": 2769 + }, + { + "epoch": 0.026220880150699066, + "grad_norm": 3.4063401222229004, + "learning_rate": 1.747634069400631e-06, + "loss": 1.0864, + "step": 2770 + }, + { + "epoch": 0.02623034617241412, + "grad_norm": 227.680908203125, + "learning_rate": 1.7482649842271292e-06, + "loss": 34.2969, + "step": 2771 + }, + { + "epoch": 0.026239812194129172, + "grad_norm": 223.0672149658203, + "learning_rate": 1.7488958990536277e-06, + "loss": 26.875, + "step": 2772 + }, + { + "epoch": 0.02624927821584423, + "grad_norm": 270.0115661621094, + "learning_rate": 1.749526813880126e-06, + "loss": 28.9375, + "step": 2773 + }, + { + "epoch": 0.026258744237559282, + "grad_norm": 242.89503479003906, + "learning_rate": 1.7501577287066245e-06, + "loss": 26.2969, + "step": 2774 + }, + { + "epoch": 0.026268210259274335, + "grad_norm": 579.7394409179688, + "learning_rate": 1.750788643533123e-06, + "loss": 63.2812, + "step": 2775 + }, + { + "epoch": 0.026277676280989388, + "grad_norm": 591.1427612304688, + "learning_rate": 1.7514195583596215e-06, + "loss": 48.9531, + "step": 2776 + }, + { + "epoch": 0.02628714230270444, + "grad_norm": 1042.229736328125, + "learning_rate": 1.75205047318612e-06, + "loss": 59.0469, + "step": 2777 + }, + { + "epoch": 0.026296608324419497, + "grad_norm": 181.74331665039062, + "learning_rate": 1.752681388012618e-06, + "loss": 27.75, + "step": 2778 + }, + { + "epoch": 0.02630607434613455, + "grad_norm": 471.4330749511719, + "learning_rate": 1.7533123028391165e-06, + "loss": 28.3438, + "step": 2779 + }, + { + "epoch": 0.026315540367849603, + "grad_norm": 238.8199462890625, + "learning_rate": 1.753943217665615e-06, + "loss": 26.8438, + "step": 2780 + }, + { + "epoch": 0.026325006389564656, + "grad_norm": 2.891493797302246, + "learning_rate": 1.7545741324921135e-06, + "loss": 0.8516, + "step": 2781 + }, + { + "epoch": 0.026334472411279713, + "grad_norm": 228.8212432861328, + "learning_rate": 1.755205047318612e-06, + "loss": 30.5312, + "step": 2782 + }, + { + "epoch": 0.026343938432994766, + "grad_norm": 924.3455200195312, + "learning_rate": 1.7558359621451105e-06, + "loss": 61.6094, + "step": 2783 + }, + { + "epoch": 0.02635340445470982, + "grad_norm": 387.9447937011719, + "learning_rate": 1.7564668769716086e-06, + "loss": 57.75, + "step": 2784 + }, + { + "epoch": 0.026362870476424872, + "grad_norm": 416.6798400878906, + "learning_rate": 1.757097791798107e-06, + "loss": 37.8125, + "step": 2785 + }, + { + "epoch": 0.02637233649813993, + "grad_norm": 223.91384887695312, + "learning_rate": 1.7577287066246056e-06, + "loss": 32.5, + "step": 2786 + }, + { + "epoch": 0.02638180251985498, + "grad_norm": 437.2425537109375, + "learning_rate": 1.758359621451104e-06, + "loss": 38.8125, + "step": 2787 + }, + { + "epoch": 0.026391268541570034, + "grad_norm": 522.0120239257812, + "learning_rate": 1.7589905362776025e-06, + "loss": 57.4844, + "step": 2788 + }, + { + "epoch": 0.026400734563285087, + "grad_norm": 622.3343505859375, + "learning_rate": 1.7596214511041008e-06, + "loss": 54.5469, + "step": 2789 + }, + { + "epoch": 0.02641020058500014, + "grad_norm": 217.47483825683594, + "learning_rate": 1.7602523659305993e-06, + "loss": 27.7344, + "step": 2790 + }, + { + "epoch": 0.026419666606715197, + "grad_norm": 2.903043270111084, + "learning_rate": 1.7608832807570976e-06, + "loss": 1.0098, + "step": 2791 + }, + { + "epoch": 0.02642913262843025, + "grad_norm": 434.9029541015625, + "learning_rate": 1.761514195583596e-06, + "loss": 31.8047, + "step": 2792 + }, + { + "epoch": 0.026438598650145303, + "grad_norm": 446.3778381347656, + "learning_rate": 1.7621451104100946e-06, + "loss": 71.75, + "step": 2793 + }, + { + "epoch": 0.026448064671860356, + "grad_norm": 379.9262390136719, + "learning_rate": 1.762776025236593e-06, + "loss": 36.0156, + "step": 2794 + }, + { + "epoch": 0.026457530693575412, + "grad_norm": 1284.861083984375, + "learning_rate": 1.7634069400630914e-06, + "loss": 75.7578, + "step": 2795 + }, + { + "epoch": 0.026466996715290465, + "grad_norm": 207.57882690429688, + "learning_rate": 1.7640378548895899e-06, + "loss": 37.9688, + "step": 2796 + }, + { + "epoch": 0.02647646273700552, + "grad_norm": 564.1875610351562, + "learning_rate": 1.7646687697160883e-06, + "loss": 61.9062, + "step": 2797 + }, + { + "epoch": 0.02648592875872057, + "grad_norm": 264.1395568847656, + "learning_rate": 1.7652996845425866e-06, + "loss": 34.5469, + "step": 2798 + }, + { + "epoch": 0.026495394780435628, + "grad_norm": 771.4548950195312, + "learning_rate": 1.7659305993690851e-06, + "loss": 63.8125, + "step": 2799 + }, + { + "epoch": 0.02650486080215068, + "grad_norm": 178.29049682617188, + "learning_rate": 1.7665615141955834e-06, + "loss": 26.7812, + "step": 2800 + }, + { + "epoch": 0.026514326823865734, + "grad_norm": 749.8849487304688, + "learning_rate": 1.7671924290220819e-06, + "loss": 68.5312, + "step": 2801 + }, + { + "epoch": 0.026523792845580787, + "grad_norm": 580.8681030273438, + "learning_rate": 1.7678233438485804e-06, + "loss": 53.0625, + "step": 2802 + }, + { + "epoch": 0.02653325886729584, + "grad_norm": 288.089599609375, + "learning_rate": 1.7684542586750789e-06, + "loss": 36.3281, + "step": 2803 + }, + { + "epoch": 0.026542724889010896, + "grad_norm": 316.9490661621094, + "learning_rate": 1.7690851735015774e-06, + "loss": 31.8906, + "step": 2804 + }, + { + "epoch": 0.02655219091072595, + "grad_norm": 369.63287353515625, + "learning_rate": 1.7697160883280757e-06, + "loss": 25.1562, + "step": 2805 + }, + { + "epoch": 0.026561656932441002, + "grad_norm": 258.46282958984375, + "learning_rate": 1.770347003154574e-06, + "loss": 25.8906, + "step": 2806 + }, + { + "epoch": 0.026571122954156055, + "grad_norm": 515.8242797851562, + "learning_rate": 1.7709779179810724e-06, + "loss": 44.2031, + "step": 2807 + }, + { + "epoch": 0.026580588975871112, + "grad_norm": 405.8348693847656, + "learning_rate": 1.771608832807571e-06, + "loss": 26.6094, + "step": 2808 + }, + { + "epoch": 0.026590054997586165, + "grad_norm": 3.2274625301361084, + "learning_rate": 1.7722397476340694e-06, + "loss": 0.8809, + "step": 2809 + }, + { + "epoch": 0.026599521019301218, + "grad_norm": 3.190586805343628, + "learning_rate": 1.772870662460568e-06, + "loss": 0.9146, + "step": 2810 + }, + { + "epoch": 0.02660898704101627, + "grad_norm": 341.8644104003906, + "learning_rate": 1.773501577287066e-06, + "loss": 29.2344, + "step": 2811 + }, + { + "epoch": 0.026618453062731327, + "grad_norm": 2.847303628921509, + "learning_rate": 1.7741324921135645e-06, + "loss": 0.7664, + "step": 2812 + }, + { + "epoch": 0.02662791908444638, + "grad_norm": 531.3494262695312, + "learning_rate": 1.774763406940063e-06, + "loss": 33.2969, + "step": 2813 + }, + { + "epoch": 0.026637385106161433, + "grad_norm": 467.3076477050781, + "learning_rate": 1.7753943217665615e-06, + "loss": 37.7812, + "step": 2814 + }, + { + "epoch": 0.026646851127876486, + "grad_norm": 277.7179870605469, + "learning_rate": 1.77602523659306e-06, + "loss": 27.9375, + "step": 2815 + }, + { + "epoch": 0.026656317149591543, + "grad_norm": 1118.260986328125, + "learning_rate": 1.7766561514195584e-06, + "loss": 96.125, + "step": 2816 + }, + { + "epoch": 0.026665783171306596, + "grad_norm": 484.56427001953125, + "learning_rate": 1.7772870662460567e-06, + "loss": 31.6094, + "step": 2817 + }, + { + "epoch": 0.02667524919302165, + "grad_norm": 735.9550170898438, + "learning_rate": 1.777917981072555e-06, + "loss": 30.6875, + "step": 2818 + }, + { + "epoch": 0.026684715214736702, + "grad_norm": 340.3723449707031, + "learning_rate": 1.7785488958990535e-06, + "loss": 29.1406, + "step": 2819 + }, + { + "epoch": 0.026694181236451755, + "grad_norm": 1080.3291015625, + "learning_rate": 1.779179810725552e-06, + "loss": 79.25, + "step": 2820 + }, + { + "epoch": 0.02670364725816681, + "grad_norm": 230.18215942382812, + "learning_rate": 1.7798107255520505e-06, + "loss": 27.8438, + "step": 2821 + }, + { + "epoch": 0.026713113279881864, + "grad_norm": 297.8983154296875, + "learning_rate": 1.7804416403785488e-06, + "loss": 16.8125, + "step": 2822 + }, + { + "epoch": 0.026722579301596917, + "grad_norm": 212.73069763183594, + "learning_rate": 1.7810725552050473e-06, + "loss": 30.5938, + "step": 2823 + }, + { + "epoch": 0.02673204532331197, + "grad_norm": 864.5977783203125, + "learning_rate": 1.7817034700315457e-06, + "loss": 41.9297, + "step": 2824 + }, + { + "epoch": 0.026741511345027027, + "grad_norm": 411.2685546875, + "learning_rate": 1.782334384858044e-06, + "loss": 33.4375, + "step": 2825 + }, + { + "epoch": 0.02675097736674208, + "grad_norm": 367.34368896484375, + "learning_rate": 1.7829652996845425e-06, + "loss": 35.3281, + "step": 2826 + }, + { + "epoch": 0.026760443388457133, + "grad_norm": 304.8624572753906, + "learning_rate": 1.783596214511041e-06, + "loss": 30.0156, + "step": 2827 + }, + { + "epoch": 0.026769909410172186, + "grad_norm": 510.8151550292969, + "learning_rate": 1.7842271293375393e-06, + "loss": 31.7188, + "step": 2828 + }, + { + "epoch": 0.026779375431887242, + "grad_norm": 1009.8579711914062, + "learning_rate": 1.7848580441640378e-06, + "loss": 67.125, + "step": 2829 + }, + { + "epoch": 0.026788841453602295, + "grad_norm": 995.3522338867188, + "learning_rate": 1.7854889589905363e-06, + "loss": 51.8281, + "step": 2830 + }, + { + "epoch": 0.02679830747531735, + "grad_norm": 483.04833984375, + "learning_rate": 1.7861198738170346e-06, + "loss": 28.3125, + "step": 2831 + }, + { + "epoch": 0.0268077734970324, + "grad_norm": 2.9699525833129883, + "learning_rate": 1.786750788643533e-06, + "loss": 0.7661, + "step": 2832 + }, + { + "epoch": 0.026817239518747454, + "grad_norm": 333.9750061035156, + "learning_rate": 1.7873817034700313e-06, + "loss": 41.0938, + "step": 2833 + }, + { + "epoch": 0.02682670554046251, + "grad_norm": 781.246826171875, + "learning_rate": 1.7880126182965298e-06, + "loss": 24.7969, + "step": 2834 + }, + { + "epoch": 0.026836171562177564, + "grad_norm": 395.54901123046875, + "learning_rate": 1.7886435331230283e-06, + "loss": 34.0938, + "step": 2835 + }, + { + "epoch": 0.026845637583892617, + "grad_norm": 434.2903747558594, + "learning_rate": 1.7892744479495268e-06, + "loss": 33.8672, + "step": 2836 + }, + { + "epoch": 0.02685510360560767, + "grad_norm": 797.2318725585938, + "learning_rate": 1.7899053627760253e-06, + "loss": 76.6562, + "step": 2837 + }, + { + "epoch": 0.026864569627322726, + "grad_norm": 235.50624084472656, + "learning_rate": 1.7905362776025236e-06, + "loss": 33.125, + "step": 2838 + }, + { + "epoch": 0.02687403564903778, + "grad_norm": 841.7157592773438, + "learning_rate": 1.7911671924290219e-06, + "loss": 60.5938, + "step": 2839 + }, + { + "epoch": 0.026883501670752832, + "grad_norm": 304.28167724609375, + "learning_rate": 1.7917981072555204e-06, + "loss": 29.9375, + "step": 2840 + }, + { + "epoch": 0.026892967692467885, + "grad_norm": 1488.8602294921875, + "learning_rate": 1.7924290220820189e-06, + "loss": 92.9531, + "step": 2841 + }, + { + "epoch": 0.026902433714182942, + "grad_norm": 407.4042053222656, + "learning_rate": 1.7930599369085173e-06, + "loss": 20.5938, + "step": 2842 + }, + { + "epoch": 0.026911899735897995, + "grad_norm": 265.29425048828125, + "learning_rate": 1.7936908517350158e-06, + "loss": 33.2656, + "step": 2843 + }, + { + "epoch": 0.026921365757613048, + "grad_norm": 1419.1766357421875, + "learning_rate": 1.7943217665615141e-06, + "loss": 94.4922, + "step": 2844 + }, + { + "epoch": 0.0269308317793281, + "grad_norm": 432.6026306152344, + "learning_rate": 1.7949526813880124e-06, + "loss": 39.3125, + "step": 2845 + }, + { + "epoch": 0.026940297801043154, + "grad_norm": 261.84100341796875, + "learning_rate": 1.7955835962145109e-06, + "loss": 29.9844, + "step": 2846 + }, + { + "epoch": 0.02694976382275821, + "grad_norm": 2.5843558311462402, + "learning_rate": 1.7962145110410094e-06, + "loss": 0.8862, + "step": 2847 + }, + { + "epoch": 0.026959229844473263, + "grad_norm": 426.1330261230469, + "learning_rate": 1.7968454258675079e-06, + "loss": 54.3438, + "step": 2848 + }, + { + "epoch": 0.026968695866188316, + "grad_norm": 406.1197204589844, + "learning_rate": 1.7974763406940064e-06, + "loss": 31.6562, + "step": 2849 + }, + { + "epoch": 0.02697816188790337, + "grad_norm": 1153.66845703125, + "learning_rate": 1.7981072555205047e-06, + "loss": 78.4531, + "step": 2850 + }, + { + "epoch": 0.026987627909618426, + "grad_norm": 445.91510009765625, + "learning_rate": 1.798738170347003e-06, + "loss": 53.9375, + "step": 2851 + }, + { + "epoch": 0.02699709393133348, + "grad_norm": 2.7544503211975098, + "learning_rate": 1.7993690851735014e-06, + "loss": 0.9282, + "step": 2852 + }, + { + "epoch": 0.027006559953048532, + "grad_norm": 3.807955741882324, + "learning_rate": 1.8e-06, + "loss": 0.9839, + "step": 2853 + }, + { + "epoch": 0.027016025974763585, + "grad_norm": 195.02525329589844, + "learning_rate": 1.8006309148264984e-06, + "loss": 28.1875, + "step": 2854 + }, + { + "epoch": 0.02702549199647864, + "grad_norm": 699.3561401367188, + "learning_rate": 1.8012618296529967e-06, + "loss": 37.75, + "step": 2855 + }, + { + "epoch": 0.027034958018193694, + "grad_norm": 536.2155151367188, + "learning_rate": 1.8018927444794952e-06, + "loss": 62.4688, + "step": 2856 + }, + { + "epoch": 0.027044424039908747, + "grad_norm": 237.69003295898438, + "learning_rate": 1.8025236593059937e-06, + "loss": 24.875, + "step": 2857 + }, + { + "epoch": 0.0270538900616238, + "grad_norm": 215.80499267578125, + "learning_rate": 1.803154574132492e-06, + "loss": 27.9531, + "step": 2858 + }, + { + "epoch": 0.027063356083338857, + "grad_norm": 216.193359375, + "learning_rate": 1.8037854889589905e-06, + "loss": 24.3594, + "step": 2859 + }, + { + "epoch": 0.02707282210505391, + "grad_norm": 429.9735412597656, + "learning_rate": 1.804416403785489e-06, + "loss": 28.4844, + "step": 2860 + }, + { + "epoch": 0.027082288126768963, + "grad_norm": 1073.587158203125, + "learning_rate": 1.8050473186119872e-06, + "loss": 38.4531, + "step": 2861 + }, + { + "epoch": 0.027091754148484016, + "grad_norm": 748.8652954101562, + "learning_rate": 1.8056782334384857e-06, + "loss": 36.4219, + "step": 2862 + }, + { + "epoch": 0.02710122017019907, + "grad_norm": 675.3941650390625, + "learning_rate": 1.8063091482649842e-06, + "loss": 65.9062, + "step": 2863 + }, + { + "epoch": 0.027110686191914125, + "grad_norm": 281.9307861328125, + "learning_rate": 1.8069400630914827e-06, + "loss": 31.625, + "step": 2864 + }, + { + "epoch": 0.02712015221362918, + "grad_norm": 628.9131469726562, + "learning_rate": 1.807570977917981e-06, + "loss": 66.4688, + "step": 2865 + }, + { + "epoch": 0.02712961823534423, + "grad_norm": 244.9151153564453, + "learning_rate": 1.8082018927444793e-06, + "loss": 29.9062, + "step": 2866 + }, + { + "epoch": 0.027139084257059284, + "grad_norm": 245.7979736328125, + "learning_rate": 1.8088328075709778e-06, + "loss": 29.9219, + "step": 2867 + }, + { + "epoch": 0.02714855027877434, + "grad_norm": 516.9104614257812, + "learning_rate": 1.8094637223974763e-06, + "loss": 47.8125, + "step": 2868 + }, + { + "epoch": 0.027158016300489394, + "grad_norm": 777.917724609375, + "learning_rate": 1.8100946372239747e-06, + "loss": 49.5859, + "step": 2869 + }, + { + "epoch": 0.027167482322204447, + "grad_norm": 752.7832641601562, + "learning_rate": 1.8107255520504732e-06, + "loss": 29.5547, + "step": 2870 + }, + { + "epoch": 0.0271769483439195, + "grad_norm": 332.5315856933594, + "learning_rate": 1.8113564668769717e-06, + "loss": 33.1406, + "step": 2871 + }, + { + "epoch": 0.027186414365634556, + "grad_norm": 2.7734451293945312, + "learning_rate": 1.8119873817034698e-06, + "loss": 0.9702, + "step": 2872 + }, + { + "epoch": 0.02719588038734961, + "grad_norm": 913.514892578125, + "learning_rate": 1.8126182965299683e-06, + "loss": 70.6875, + "step": 2873 + }, + { + "epoch": 0.027205346409064662, + "grad_norm": 236.07952880859375, + "learning_rate": 1.8132492113564668e-06, + "loss": 27.2031, + "step": 2874 + }, + { + "epoch": 0.027214812430779715, + "grad_norm": 219.46685791015625, + "learning_rate": 1.8138801261829653e-06, + "loss": 24.0781, + "step": 2875 + }, + { + "epoch": 0.02722427845249477, + "grad_norm": 544.4822387695312, + "learning_rate": 1.8145110410094638e-06, + "loss": 64.6875, + "step": 2876 + }, + { + "epoch": 0.027233744474209825, + "grad_norm": 251.7190704345703, + "learning_rate": 1.815141955835962e-06, + "loss": 29.0156, + "step": 2877 + }, + { + "epoch": 0.027243210495924878, + "grad_norm": 635.8859252929688, + "learning_rate": 1.8157728706624603e-06, + "loss": 69.0, + "step": 2878 + }, + { + "epoch": 0.02725267651763993, + "grad_norm": 478.3950500488281, + "learning_rate": 1.8164037854889588e-06, + "loss": 61.9375, + "step": 2879 + }, + { + "epoch": 0.027262142539354984, + "grad_norm": 153.2584991455078, + "learning_rate": 1.8170347003154573e-06, + "loss": 27.5156, + "step": 2880 + }, + { + "epoch": 0.02727160856107004, + "grad_norm": 236.82737731933594, + "learning_rate": 1.8176656151419558e-06, + "loss": 26.5156, + "step": 2881 + }, + { + "epoch": 0.027281074582785093, + "grad_norm": 978.193115234375, + "learning_rate": 1.8182965299684543e-06, + "loss": 55.875, + "step": 2882 + }, + { + "epoch": 0.027290540604500146, + "grad_norm": 337.3212585449219, + "learning_rate": 1.8189274447949526e-06, + "loss": 41.5781, + "step": 2883 + }, + { + "epoch": 0.0273000066262152, + "grad_norm": 253.72573852539062, + "learning_rate": 1.819558359621451e-06, + "loss": 31.9531, + "step": 2884 + }, + { + "epoch": 0.027309472647930256, + "grad_norm": 707.2276000976562, + "learning_rate": 1.8201892744479494e-06, + "loss": 63.4531, + "step": 2885 + }, + { + "epoch": 0.02731893866964531, + "grad_norm": 371.9125671386719, + "learning_rate": 1.8208201892744479e-06, + "loss": 28.9219, + "step": 2886 + }, + { + "epoch": 0.027328404691360362, + "grad_norm": 273.8075256347656, + "learning_rate": 1.8214511041009463e-06, + "loss": 29.5625, + "step": 2887 + }, + { + "epoch": 0.027337870713075415, + "grad_norm": 226.48712158203125, + "learning_rate": 1.8220820189274446e-06, + "loss": 26.7188, + "step": 2888 + }, + { + "epoch": 0.027347336734790468, + "grad_norm": 619.0675659179688, + "learning_rate": 1.8227129337539431e-06, + "loss": 80.25, + "step": 2889 + }, + { + "epoch": 0.027356802756505524, + "grad_norm": 349.6076965332031, + "learning_rate": 1.8233438485804416e-06, + "loss": 37.0625, + "step": 2890 + }, + { + "epoch": 0.027366268778220577, + "grad_norm": 414.3895263671875, + "learning_rate": 1.82397476340694e-06, + "loss": 30.1719, + "step": 2891 + }, + { + "epoch": 0.02737573479993563, + "grad_norm": 559.587890625, + "learning_rate": 1.8246056782334384e-06, + "loss": 35.0469, + "step": 2892 + }, + { + "epoch": 0.027385200821650683, + "grad_norm": 226.46749877929688, + "learning_rate": 1.8252365930599369e-06, + "loss": 29.7812, + "step": 2893 + }, + { + "epoch": 0.02739466684336574, + "grad_norm": 327.94012451171875, + "learning_rate": 1.8258675078864352e-06, + "loss": 30.4531, + "step": 2894 + }, + { + "epoch": 0.027404132865080793, + "grad_norm": 335.7247009277344, + "learning_rate": 1.8264984227129337e-06, + "loss": 34.5469, + "step": 2895 + }, + { + "epoch": 0.027413598886795846, + "grad_norm": 3.0588088035583496, + "learning_rate": 1.8271293375394321e-06, + "loss": 0.8994, + "step": 2896 + }, + { + "epoch": 0.0274230649085109, + "grad_norm": 555.658447265625, + "learning_rate": 1.8277602523659306e-06, + "loss": 70.4375, + "step": 2897 + }, + { + "epoch": 0.027432530930225955, + "grad_norm": 351.83941650390625, + "learning_rate": 1.8283911671924291e-06, + "loss": 28.9844, + "step": 2898 + }, + { + "epoch": 0.02744199695194101, + "grad_norm": 296.3174743652344, + "learning_rate": 1.8290220820189272e-06, + "loss": 33.5781, + "step": 2899 + }, + { + "epoch": 0.02745146297365606, + "grad_norm": 3.9454493522644043, + "learning_rate": 1.8296529968454257e-06, + "loss": 1.0605, + "step": 2900 + }, + { + "epoch": 0.027460928995371114, + "grad_norm": 584.4071655273438, + "learning_rate": 1.8302839116719242e-06, + "loss": 54.2812, + "step": 2901 + }, + { + "epoch": 0.02747039501708617, + "grad_norm": 263.21392822265625, + "learning_rate": 1.8309148264984227e-06, + "loss": 26.3203, + "step": 2902 + }, + { + "epoch": 0.027479861038801224, + "grad_norm": 226.60963439941406, + "learning_rate": 1.8315457413249212e-06, + "loss": 28.9219, + "step": 2903 + }, + { + "epoch": 0.027489327060516277, + "grad_norm": 266.7934875488281, + "learning_rate": 1.8321766561514197e-06, + "loss": 28.0938, + "step": 2904 + }, + { + "epoch": 0.02749879308223133, + "grad_norm": 206.91920471191406, + "learning_rate": 1.8328075709779177e-06, + "loss": 27.125, + "step": 2905 + }, + { + "epoch": 0.027508259103946383, + "grad_norm": 355.7350769042969, + "learning_rate": 1.8334384858044162e-06, + "loss": 32.4844, + "step": 2906 + }, + { + "epoch": 0.02751772512566144, + "grad_norm": 579.0957641601562, + "learning_rate": 1.8340694006309147e-06, + "loss": 87.2188, + "step": 2907 + }, + { + "epoch": 0.027527191147376492, + "grad_norm": 436.8077087402344, + "learning_rate": 1.8347003154574132e-06, + "loss": 54.5312, + "step": 2908 + }, + { + "epoch": 0.027536657169091545, + "grad_norm": 3.432182550430298, + "learning_rate": 1.8353312302839117e-06, + "loss": 0.8867, + "step": 2909 + }, + { + "epoch": 0.0275461231908066, + "grad_norm": 271.53558349609375, + "learning_rate": 1.83596214511041e-06, + "loss": 26.8281, + "step": 2910 + }, + { + "epoch": 0.027555589212521655, + "grad_norm": 619.4029541015625, + "learning_rate": 1.8365930599369085e-06, + "loss": 35.9062, + "step": 2911 + }, + { + "epoch": 0.027565055234236708, + "grad_norm": 3.172870397567749, + "learning_rate": 1.8372239747634068e-06, + "loss": 0.9033, + "step": 2912 + }, + { + "epoch": 0.02757452125595176, + "grad_norm": 1512.4412841796875, + "learning_rate": 1.8378548895899053e-06, + "loss": 91.7812, + "step": 2913 + }, + { + "epoch": 0.027583987277666814, + "grad_norm": 600.175048828125, + "learning_rate": 1.8384858044164037e-06, + "loss": 36.6602, + "step": 2914 + }, + { + "epoch": 0.02759345329938187, + "grad_norm": 2.70405650138855, + "learning_rate": 1.839116719242902e-06, + "loss": 0.9741, + "step": 2915 + }, + { + "epoch": 0.027602919321096923, + "grad_norm": 429.63360595703125, + "learning_rate": 1.8397476340694005e-06, + "loss": 45.7812, + "step": 2916 + }, + { + "epoch": 0.027612385342811976, + "grad_norm": 312.9840393066406, + "learning_rate": 1.840378548895899e-06, + "loss": 31.0625, + "step": 2917 + }, + { + "epoch": 0.02762185136452703, + "grad_norm": 343.0050964355469, + "learning_rate": 1.8410094637223975e-06, + "loss": 31.1406, + "step": 2918 + }, + { + "epoch": 0.027631317386242082, + "grad_norm": 746.9326171875, + "learning_rate": 1.8416403785488958e-06, + "loss": 50.0938, + "step": 2919 + }, + { + "epoch": 0.02764078340795714, + "grad_norm": 587.8279418945312, + "learning_rate": 1.8422712933753943e-06, + "loss": 38.9375, + "step": 2920 + }, + { + "epoch": 0.027650249429672192, + "grad_norm": 818.1333618164062, + "learning_rate": 1.8429022082018926e-06, + "loss": 33.8828, + "step": 2921 + }, + { + "epoch": 0.027659715451387245, + "grad_norm": 643.8295288085938, + "learning_rate": 1.843533123028391e-06, + "loss": 74.4375, + "step": 2922 + }, + { + "epoch": 0.027669181473102298, + "grad_norm": 520.696533203125, + "learning_rate": 1.8441640378548895e-06, + "loss": 46.625, + "step": 2923 + }, + { + "epoch": 0.027678647494817354, + "grad_norm": 3.6650547981262207, + "learning_rate": 1.844794952681388e-06, + "loss": 1.0493, + "step": 2924 + }, + { + "epoch": 0.027688113516532407, + "grad_norm": 1017.4833984375, + "learning_rate": 1.8454258675078863e-06, + "loss": 56.0, + "step": 2925 + }, + { + "epoch": 0.02769757953824746, + "grad_norm": 361.9676513671875, + "learning_rate": 1.8460567823343846e-06, + "loss": 33.625, + "step": 2926 + }, + { + "epoch": 0.027707045559962513, + "grad_norm": 262.0277404785156, + "learning_rate": 1.846687697160883e-06, + "loss": 26.3906, + "step": 2927 + }, + { + "epoch": 0.02771651158167757, + "grad_norm": 3.108132839202881, + "learning_rate": 1.8473186119873816e-06, + "loss": 0.8926, + "step": 2928 + }, + { + "epoch": 0.027725977603392623, + "grad_norm": 957.115234375, + "learning_rate": 1.84794952681388e-06, + "loss": 60.3672, + "step": 2929 + }, + { + "epoch": 0.027735443625107676, + "grad_norm": 491.28790283203125, + "learning_rate": 1.8485804416403786e-06, + "loss": 59.625, + "step": 2930 + }, + { + "epoch": 0.02774490964682273, + "grad_norm": 3.061229705810547, + "learning_rate": 1.849211356466877e-06, + "loss": 0.875, + "step": 2931 + }, + { + "epoch": 0.027754375668537782, + "grad_norm": 449.1922302246094, + "learning_rate": 1.8498422712933751e-06, + "loss": 45.25, + "step": 2932 + }, + { + "epoch": 0.02776384169025284, + "grad_norm": 405.2156066894531, + "learning_rate": 1.8504731861198736e-06, + "loss": 27.0625, + "step": 2933 + }, + { + "epoch": 0.02777330771196789, + "grad_norm": 254.56509399414062, + "learning_rate": 1.8511041009463721e-06, + "loss": 27.7109, + "step": 2934 + }, + { + "epoch": 0.027782773733682944, + "grad_norm": 475.0409851074219, + "learning_rate": 1.8517350157728706e-06, + "loss": 49.2969, + "step": 2935 + }, + { + "epoch": 0.027792239755397997, + "grad_norm": 2.91096568107605, + "learning_rate": 1.852365930599369e-06, + "loss": 0.8521, + "step": 2936 + }, + { + "epoch": 0.027801705777113054, + "grad_norm": 625.6689453125, + "learning_rate": 1.8529968454258674e-06, + "loss": 26.6562, + "step": 2937 + }, + { + "epoch": 0.027811171798828107, + "grad_norm": 593.9344482421875, + "learning_rate": 1.8536277602523659e-06, + "loss": 25.9375, + "step": 2938 + }, + { + "epoch": 0.02782063782054316, + "grad_norm": 863.5092163085938, + "learning_rate": 1.8542586750788642e-06, + "loss": 54.0312, + "step": 2939 + }, + { + "epoch": 0.027830103842258213, + "grad_norm": 3.1169614791870117, + "learning_rate": 1.8548895899053627e-06, + "loss": 0.8408, + "step": 2940 + }, + { + "epoch": 0.02783956986397327, + "grad_norm": 1422.12939453125, + "learning_rate": 1.8555205047318611e-06, + "loss": 47.7578, + "step": 2941 + }, + { + "epoch": 0.027849035885688322, + "grad_norm": 273.90093994140625, + "learning_rate": 1.8561514195583596e-06, + "loss": 25.8438, + "step": 2942 + }, + { + "epoch": 0.027858501907403375, + "grad_norm": 215.62661743164062, + "learning_rate": 1.856782334384858e-06, + "loss": 26.4375, + "step": 2943 + }, + { + "epoch": 0.02786796792911843, + "grad_norm": 3.064091444015503, + "learning_rate": 1.8574132492113564e-06, + "loss": 0.8662, + "step": 2944 + }, + { + "epoch": 0.027877433950833485, + "grad_norm": 383.3791809082031, + "learning_rate": 1.858044164037855e-06, + "loss": 44.2266, + "step": 2945 + }, + { + "epoch": 0.027886899972548538, + "grad_norm": 477.2187194824219, + "learning_rate": 1.8586750788643532e-06, + "loss": 31.1016, + "step": 2946 + }, + { + "epoch": 0.02789636599426359, + "grad_norm": 1146.1290283203125, + "learning_rate": 1.8593059936908517e-06, + "loss": 90.8125, + "step": 2947 + }, + { + "epoch": 0.027905832015978644, + "grad_norm": 615.4558715820312, + "learning_rate": 1.85993690851735e-06, + "loss": 31.4375, + "step": 2948 + }, + { + "epoch": 0.027915298037693697, + "grad_norm": 201.60423278808594, + "learning_rate": 1.8605678233438485e-06, + "loss": 27.0781, + "step": 2949 + }, + { + "epoch": 0.027924764059408753, + "grad_norm": 420.35284423828125, + "learning_rate": 1.861198738170347e-06, + "loss": 27.875, + "step": 2950 + }, + { + "epoch": 0.027934230081123806, + "grad_norm": 371.0447998046875, + "learning_rate": 1.8618296529968454e-06, + "loss": 43.7344, + "step": 2951 + }, + { + "epoch": 0.02794369610283886, + "grad_norm": 358.84716796875, + "learning_rate": 1.8624605678233437e-06, + "loss": 36.3281, + "step": 2952 + }, + { + "epoch": 0.027953162124553912, + "grad_norm": 189.9296875, + "learning_rate": 1.8630914826498422e-06, + "loss": 24.7656, + "step": 2953 + }, + { + "epoch": 0.02796262814626897, + "grad_norm": 755.2401733398438, + "learning_rate": 1.8637223974763405e-06, + "loss": 31.0781, + "step": 2954 + }, + { + "epoch": 0.027972094167984022, + "grad_norm": 470.6728515625, + "learning_rate": 1.864353312302839e-06, + "loss": 47.8594, + "step": 2955 + }, + { + "epoch": 0.027981560189699075, + "grad_norm": 568.0224609375, + "learning_rate": 1.8649842271293375e-06, + "loss": 30.3711, + "step": 2956 + }, + { + "epoch": 0.027991026211414128, + "grad_norm": 242.3195343017578, + "learning_rate": 1.865615141955836e-06, + "loss": 28.1719, + "step": 2957 + }, + { + "epoch": 0.028000492233129184, + "grad_norm": 3.082076072692871, + "learning_rate": 1.8662460567823345e-06, + "loss": 0.9028, + "step": 2958 + }, + { + "epoch": 0.028009958254844237, + "grad_norm": 433.72271728515625, + "learning_rate": 1.8668769716088325e-06, + "loss": 31.8984, + "step": 2959 + }, + { + "epoch": 0.02801942427655929, + "grad_norm": 2.7240304946899414, + "learning_rate": 1.867507886435331e-06, + "loss": 0.7896, + "step": 2960 + }, + { + "epoch": 0.028028890298274343, + "grad_norm": 3.179124116897583, + "learning_rate": 1.8681388012618295e-06, + "loss": 1.0146, + "step": 2961 + }, + { + "epoch": 0.028038356319989396, + "grad_norm": 462.6819152832031, + "learning_rate": 1.868769716088328e-06, + "loss": 26.4531, + "step": 2962 + }, + { + "epoch": 0.028047822341704453, + "grad_norm": 3.368075370788574, + "learning_rate": 1.8694006309148265e-06, + "loss": 0.9277, + "step": 2963 + }, + { + "epoch": 0.028057288363419506, + "grad_norm": 726.3485717773438, + "learning_rate": 1.870031545741325e-06, + "loss": 63.5469, + "step": 2964 + }, + { + "epoch": 0.02806675438513456, + "grad_norm": 661.0130615234375, + "learning_rate": 1.8706624605678233e-06, + "loss": 67.2188, + "step": 2965 + }, + { + "epoch": 0.028076220406849612, + "grad_norm": 492.2293395996094, + "learning_rate": 1.8712933753943216e-06, + "loss": 32.0078, + "step": 2966 + }, + { + "epoch": 0.02808568642856467, + "grad_norm": 549.1591796875, + "learning_rate": 1.87192429022082e-06, + "loss": 49.4688, + "step": 2967 + }, + { + "epoch": 0.02809515245027972, + "grad_norm": 166.63673400878906, + "learning_rate": 1.8725552050473185e-06, + "loss": 25.8281, + "step": 2968 + }, + { + "epoch": 0.028104618471994774, + "grad_norm": 802.514404296875, + "learning_rate": 1.873186119873817e-06, + "loss": 26.6172, + "step": 2969 + }, + { + "epoch": 0.028114084493709827, + "grad_norm": 189.3831024169922, + "learning_rate": 1.8738170347003153e-06, + "loss": 30.75, + "step": 2970 + }, + { + "epoch": 0.028123550515424884, + "grad_norm": 181.5418701171875, + "learning_rate": 1.8744479495268138e-06, + "loss": 30.3594, + "step": 2971 + }, + { + "epoch": 0.028133016537139937, + "grad_norm": 380.3318786621094, + "learning_rate": 1.875078864353312e-06, + "loss": 35.2812, + "step": 2972 + }, + { + "epoch": 0.02814248255885499, + "grad_norm": 1136.01806640625, + "learning_rate": 1.8757097791798106e-06, + "loss": 67.2344, + "step": 2973 + }, + { + "epoch": 0.028151948580570043, + "grad_norm": 606.4057006835938, + "learning_rate": 1.876340694006309e-06, + "loss": 32.7031, + "step": 2974 + }, + { + "epoch": 0.028161414602285096, + "grad_norm": 292.56011962890625, + "learning_rate": 1.8769716088328076e-06, + "loss": 39.0625, + "step": 2975 + }, + { + "epoch": 0.028170880624000152, + "grad_norm": 304.9939270019531, + "learning_rate": 1.8776025236593059e-06, + "loss": 33.6562, + "step": 2976 + }, + { + "epoch": 0.028180346645715205, + "grad_norm": 245.83876037597656, + "learning_rate": 1.8782334384858043e-06, + "loss": 26.875, + "step": 2977 + }, + { + "epoch": 0.02818981266743026, + "grad_norm": 437.8835144042969, + "learning_rate": 1.8788643533123028e-06, + "loss": 35.1875, + "step": 2978 + }, + { + "epoch": 0.02819927868914531, + "grad_norm": 394.51348876953125, + "learning_rate": 1.8794952681388011e-06, + "loss": 30.1875, + "step": 2979 + }, + { + "epoch": 0.028208744710860368, + "grad_norm": 377.5068054199219, + "learning_rate": 1.8801261829652996e-06, + "loss": 29.7812, + "step": 2980 + }, + { + "epoch": 0.02821821073257542, + "grad_norm": 235.54354858398438, + "learning_rate": 1.880757097791798e-06, + "loss": 26.125, + "step": 2981 + }, + { + "epoch": 0.028227676754290474, + "grad_norm": 595.423828125, + "learning_rate": 1.8813880126182964e-06, + "loss": 40.6484, + "step": 2982 + }, + { + "epoch": 0.028237142776005527, + "grad_norm": 830.7849731445312, + "learning_rate": 1.8820189274447949e-06, + "loss": 41.2891, + "step": 2983 + }, + { + "epoch": 0.028246608797720583, + "grad_norm": 216.68862915039062, + "learning_rate": 1.8826498422712934e-06, + "loss": 30.125, + "step": 2984 + }, + { + "epoch": 0.028256074819435636, + "grad_norm": 201.319091796875, + "learning_rate": 1.8832807570977919e-06, + "loss": 25.5625, + "step": 2985 + }, + { + "epoch": 0.02826554084115069, + "grad_norm": 896.2003784179688, + "learning_rate": 1.8839116719242901e-06, + "loss": 78.9531, + "step": 2986 + }, + { + "epoch": 0.028275006862865742, + "grad_norm": 213.08804321289062, + "learning_rate": 1.8845425867507884e-06, + "loss": 28.1562, + "step": 2987 + }, + { + "epoch": 0.028284472884580796, + "grad_norm": 618.325439453125, + "learning_rate": 1.885173501577287e-06, + "loss": 32.7344, + "step": 2988 + }, + { + "epoch": 0.028293938906295852, + "grad_norm": 1431.76611328125, + "learning_rate": 1.8858044164037854e-06, + "loss": 62.0781, + "step": 2989 + }, + { + "epoch": 0.028303404928010905, + "grad_norm": 1140.7257080078125, + "learning_rate": 1.886435331230284e-06, + "loss": 59.375, + "step": 2990 + }, + { + "epoch": 0.028312870949725958, + "grad_norm": 856.966796875, + "learning_rate": 1.8870662460567824e-06, + "loss": 66.3906, + "step": 2991 + }, + { + "epoch": 0.02832233697144101, + "grad_norm": 1277.63037109375, + "learning_rate": 1.8876971608832805e-06, + "loss": 64.2344, + "step": 2992 + }, + { + "epoch": 0.028331802993156067, + "grad_norm": 384.1257019042969, + "learning_rate": 1.888328075709779e-06, + "loss": 31.9375, + "step": 2993 + }, + { + "epoch": 0.02834126901487112, + "grad_norm": 1344.2340087890625, + "learning_rate": 1.8889589905362775e-06, + "loss": 59.7188, + "step": 2994 + }, + { + "epoch": 0.028350735036586173, + "grad_norm": 3.85440731048584, + "learning_rate": 1.889589905362776e-06, + "loss": 0.9976, + "step": 2995 + }, + { + "epoch": 0.028360201058301227, + "grad_norm": 471.2390441894531, + "learning_rate": 1.8902208201892744e-06, + "loss": 24.3828, + "step": 2996 + }, + { + "epoch": 0.028369667080016283, + "grad_norm": 173.5531768798828, + "learning_rate": 1.890851735015773e-06, + "loss": 28.6875, + "step": 2997 + }, + { + "epoch": 0.028379133101731336, + "grad_norm": 583.5234375, + "learning_rate": 1.8914826498422712e-06, + "loss": 82.5, + "step": 2998 + }, + { + "epoch": 0.02838859912344639, + "grad_norm": 195.54908752441406, + "learning_rate": 1.8921135646687695e-06, + "loss": 26.8594, + "step": 2999 + }, + { + "epoch": 0.028398065145161442, + "grad_norm": 577.6681518554688, + "learning_rate": 1.892744479495268e-06, + "loss": 58.7109, + "step": 3000 + }, + { + "epoch": 0.0284075311668765, + "grad_norm": 733.3356323242188, + "learning_rate": 1.8933753943217665e-06, + "loss": 52.4609, + "step": 3001 + }, + { + "epoch": 0.02841699718859155, + "grad_norm": 240.54437255859375, + "learning_rate": 1.894006309148265e-06, + "loss": 28.5, + "step": 3002 + }, + { + "epoch": 0.028426463210306605, + "grad_norm": 422.3265380859375, + "learning_rate": 1.8946372239747633e-06, + "loss": 28.8281, + "step": 3003 + }, + { + "epoch": 0.028435929232021658, + "grad_norm": 377.0937805175781, + "learning_rate": 1.8952681388012617e-06, + "loss": 23.2812, + "step": 3004 + }, + { + "epoch": 0.02844539525373671, + "grad_norm": 814.57763671875, + "learning_rate": 1.8958990536277602e-06, + "loss": 63.2812, + "step": 3005 + }, + { + "epoch": 0.028454861275451767, + "grad_norm": 254.05279541015625, + "learning_rate": 1.8965299684542585e-06, + "loss": 28.7344, + "step": 3006 + }, + { + "epoch": 0.02846432729716682, + "grad_norm": 603.8756103515625, + "learning_rate": 1.897160883280757e-06, + "loss": 58.0469, + "step": 3007 + }, + { + "epoch": 0.028473793318881873, + "grad_norm": 396.6353759765625, + "learning_rate": 1.8977917981072555e-06, + "loss": 61.0625, + "step": 3008 + }, + { + "epoch": 0.028483259340596926, + "grad_norm": 161.9495391845703, + "learning_rate": 1.8984227129337538e-06, + "loss": 33.1406, + "step": 3009 + }, + { + "epoch": 0.028492725362311982, + "grad_norm": 301.6107482910156, + "learning_rate": 1.8990536277602523e-06, + "loss": 29.7344, + "step": 3010 + }, + { + "epoch": 0.028502191384027036, + "grad_norm": 715.6193237304688, + "learning_rate": 1.8996845425867508e-06, + "loss": 58.8125, + "step": 3011 + }, + { + "epoch": 0.02851165740574209, + "grad_norm": 612.27978515625, + "learning_rate": 1.9003154574132493e-06, + "loss": 32.6875, + "step": 3012 + }, + { + "epoch": 0.02852112342745714, + "grad_norm": 316.9184875488281, + "learning_rate": 1.9009463722397475e-06, + "loss": 31.1875, + "step": 3013 + }, + { + "epoch": 0.028530589449172198, + "grad_norm": 301.16070556640625, + "learning_rate": 1.9015772870662458e-06, + "loss": 26.8438, + "step": 3014 + }, + { + "epoch": 0.02854005547088725, + "grad_norm": 415.5513916015625, + "learning_rate": 1.9022082018927443e-06, + "loss": 48.7344, + "step": 3015 + }, + { + "epoch": 0.028549521492602304, + "grad_norm": 495.078857421875, + "learning_rate": 1.9028391167192428e-06, + "loss": 38.0625, + "step": 3016 + }, + { + "epoch": 0.028558987514317357, + "grad_norm": 529.0376586914062, + "learning_rate": 1.9034700315457413e-06, + "loss": 64.3438, + "step": 3017 + }, + { + "epoch": 0.02856845353603241, + "grad_norm": 764.2074584960938, + "learning_rate": 1.9041009463722398e-06, + "loss": 31.5, + "step": 3018 + }, + { + "epoch": 0.028577919557747467, + "grad_norm": 186.86245727539062, + "learning_rate": 1.9047318611987383e-06, + "loss": 34.25, + "step": 3019 + }, + { + "epoch": 0.02858738557946252, + "grad_norm": 170.32614135742188, + "learning_rate": 1.9053627760252364e-06, + "loss": 26.0625, + "step": 3020 + }, + { + "epoch": 0.028596851601177573, + "grad_norm": 176.12733459472656, + "learning_rate": 1.9059936908517349e-06, + "loss": 25.8438, + "step": 3021 + }, + { + "epoch": 0.028606317622892626, + "grad_norm": 267.1184997558594, + "learning_rate": 1.9066246056782333e-06, + "loss": 26.0938, + "step": 3022 + }, + { + "epoch": 0.028615783644607682, + "grad_norm": 1719.5550537109375, + "learning_rate": 1.9072555205047318e-06, + "loss": 59.2188, + "step": 3023 + }, + { + "epoch": 0.028625249666322735, + "grad_norm": 551.0123291015625, + "learning_rate": 1.90788643533123e-06, + "loss": 31.5938, + "step": 3024 + }, + { + "epoch": 0.028634715688037788, + "grad_norm": 411.9028625488281, + "learning_rate": 1.9085173501577286e-06, + "loss": 60.875, + "step": 3025 + }, + { + "epoch": 0.02864418170975284, + "grad_norm": 307.43927001953125, + "learning_rate": 1.909148264984227e-06, + "loss": 30.4062, + "step": 3026 + }, + { + "epoch": 0.028653647731467898, + "grad_norm": 3.6146726608276367, + "learning_rate": 1.9097791798107256e-06, + "loss": 1.0459, + "step": 3027 + }, + { + "epoch": 0.02866311375318295, + "grad_norm": 768.6837158203125, + "learning_rate": 1.910410094637224e-06, + "loss": 58.2031, + "step": 3028 + }, + { + "epoch": 0.028672579774898004, + "grad_norm": 376.70452880859375, + "learning_rate": 1.9110410094637226e-06, + "loss": 34.5781, + "step": 3029 + }, + { + "epoch": 0.028682045796613057, + "grad_norm": 289.410400390625, + "learning_rate": 1.9116719242902207e-06, + "loss": 29.3281, + "step": 3030 + }, + { + "epoch": 0.02869151181832811, + "grad_norm": 405.9792175292969, + "learning_rate": 1.912302839116719e-06, + "loss": 29.6562, + "step": 3031 + }, + { + "epoch": 0.028700977840043166, + "grad_norm": 963.8055419921875, + "learning_rate": 1.9129337539432176e-06, + "loss": 71.1172, + "step": 3032 + }, + { + "epoch": 0.02871044386175822, + "grad_norm": 954.6480102539062, + "learning_rate": 1.913564668769716e-06, + "loss": 62.3281, + "step": 3033 + }, + { + "epoch": 0.028719909883473272, + "grad_norm": 313.32989501953125, + "learning_rate": 1.9141955835962146e-06, + "loss": 30.9688, + "step": 3034 + }, + { + "epoch": 0.028729375905188325, + "grad_norm": 692.7454833984375, + "learning_rate": 1.914826498422713e-06, + "loss": 49.0703, + "step": 3035 + }, + { + "epoch": 0.02873884192690338, + "grad_norm": 206.32167053222656, + "learning_rate": 1.915457413249211e-06, + "loss": 26.8438, + "step": 3036 + }, + { + "epoch": 0.028748307948618435, + "grad_norm": 345.513671875, + "learning_rate": 1.9160883280757097e-06, + "loss": 32.4844, + "step": 3037 + }, + { + "epoch": 0.028757773970333488, + "grad_norm": 354.3600158691406, + "learning_rate": 1.916719242902208e-06, + "loss": 31.8438, + "step": 3038 + }, + { + "epoch": 0.02876723999204854, + "grad_norm": 1350.3837890625, + "learning_rate": 1.9173501577287067e-06, + "loss": 38.8438, + "step": 3039 + }, + { + "epoch": 0.028776706013763597, + "grad_norm": 516.7263793945312, + "learning_rate": 1.917981072555205e-06, + "loss": 33.5625, + "step": 3040 + }, + { + "epoch": 0.02878617203547865, + "grad_norm": 313.1994934082031, + "learning_rate": 1.9186119873817037e-06, + "loss": 22.9062, + "step": 3041 + }, + { + "epoch": 0.028795638057193703, + "grad_norm": 358.66943359375, + "learning_rate": 1.9192429022082017e-06, + "loss": 47.5938, + "step": 3042 + }, + { + "epoch": 0.028805104078908756, + "grad_norm": 423.15789794921875, + "learning_rate": 1.9198738170347002e-06, + "loss": 56.2969, + "step": 3043 + }, + { + "epoch": 0.028814570100623813, + "grad_norm": 3.065624237060547, + "learning_rate": 1.9205047318611987e-06, + "loss": 0.7681, + "step": 3044 + }, + { + "epoch": 0.028824036122338866, + "grad_norm": 242.20101928710938, + "learning_rate": 1.921135646687697e-06, + "loss": 29.1094, + "step": 3045 + }, + { + "epoch": 0.02883350214405392, + "grad_norm": 625.7954711914062, + "learning_rate": 1.9217665615141957e-06, + "loss": 34.3281, + "step": 3046 + }, + { + "epoch": 0.02884296816576897, + "grad_norm": 371.7037658691406, + "learning_rate": 1.9223974763406938e-06, + "loss": 35.6719, + "step": 3047 + }, + { + "epoch": 0.028852434187484025, + "grad_norm": 3.605592966079712, + "learning_rate": 1.9230283911671923e-06, + "loss": 1.0073, + "step": 3048 + }, + { + "epoch": 0.02886190020919908, + "grad_norm": 340.3345947265625, + "learning_rate": 1.9236593059936907e-06, + "loss": 38.25, + "step": 3049 + }, + { + "epoch": 0.028871366230914134, + "grad_norm": 430.219970703125, + "learning_rate": 1.9242902208201892e-06, + "loss": 59.1719, + "step": 3050 + }, + { + "epoch": 0.028880832252629187, + "grad_norm": 212.24609375, + "learning_rate": 1.9249211356466877e-06, + "loss": 29.25, + "step": 3051 + }, + { + "epoch": 0.02889029827434424, + "grad_norm": 523.6144409179688, + "learning_rate": 1.9255520504731862e-06, + "loss": 44.7969, + "step": 3052 + }, + { + "epoch": 0.028899764296059297, + "grad_norm": 462.2934875488281, + "learning_rate": 1.9261829652996843e-06, + "loss": 52.25, + "step": 3053 + }, + { + "epoch": 0.02890923031777435, + "grad_norm": 510.462646484375, + "learning_rate": 1.926813880126183e-06, + "loss": 27.5312, + "step": 3054 + }, + { + "epoch": 0.028918696339489403, + "grad_norm": 534.5834350585938, + "learning_rate": 1.9274447949526813e-06, + "loss": 62.4688, + "step": 3055 + }, + { + "epoch": 0.028928162361204456, + "grad_norm": 594.3785400390625, + "learning_rate": 1.9280757097791798e-06, + "loss": 33.0, + "step": 3056 + }, + { + "epoch": 0.028937628382919512, + "grad_norm": 465.9027099609375, + "learning_rate": 1.9287066246056783e-06, + "loss": 40.4219, + "step": 3057 + }, + { + "epoch": 0.028947094404634565, + "grad_norm": 263.90911865234375, + "learning_rate": 1.9293375394321763e-06, + "loss": 25.4062, + "step": 3058 + }, + { + "epoch": 0.028956560426349618, + "grad_norm": 859.252197265625, + "learning_rate": 1.929968454258675e-06, + "loss": 56.3203, + "step": 3059 + }, + { + "epoch": 0.02896602644806467, + "grad_norm": 355.9149475097656, + "learning_rate": 1.9305993690851733e-06, + "loss": 27.9531, + "step": 3060 + }, + { + "epoch": 0.028975492469779724, + "grad_norm": 724.5003051757812, + "learning_rate": 1.931230283911672e-06, + "loss": 57.6875, + "step": 3061 + }, + { + "epoch": 0.02898495849149478, + "grad_norm": 632.14208984375, + "learning_rate": 1.9318611987381703e-06, + "loss": 35.2969, + "step": 3062 + }, + { + "epoch": 0.028994424513209834, + "grad_norm": 907.6917724609375, + "learning_rate": 1.932492113564669e-06, + "loss": 50.9766, + "step": 3063 + }, + { + "epoch": 0.029003890534924887, + "grad_norm": 574.6113891601562, + "learning_rate": 1.933123028391167e-06, + "loss": 21.7891, + "step": 3064 + }, + { + "epoch": 0.02901335655663994, + "grad_norm": 482.6669616699219, + "learning_rate": 1.9337539432176654e-06, + "loss": 35.1406, + "step": 3065 + }, + { + "epoch": 0.029022822578354996, + "grad_norm": 373.1529541015625, + "learning_rate": 1.934384858044164e-06, + "loss": 36.1719, + "step": 3066 + }, + { + "epoch": 0.02903228860007005, + "grad_norm": 532.2530517578125, + "learning_rate": 1.9350157728706623e-06, + "loss": 67.75, + "step": 3067 + }, + { + "epoch": 0.029041754621785102, + "grad_norm": 552.7474365234375, + "learning_rate": 1.935646687697161e-06, + "loss": 31.1719, + "step": 3068 + }, + { + "epoch": 0.029051220643500155, + "grad_norm": 644.087890625, + "learning_rate": 1.9362776025236593e-06, + "loss": 53.1797, + "step": 3069 + }, + { + "epoch": 0.02906068666521521, + "grad_norm": 596.1608276367188, + "learning_rate": 1.9369085173501574e-06, + "loss": 77.2812, + "step": 3070 + }, + { + "epoch": 0.029070152686930265, + "grad_norm": 194.372314453125, + "learning_rate": 1.937539432176656e-06, + "loss": 25.4062, + "step": 3071 + }, + { + "epoch": 0.029079618708645318, + "grad_norm": 842.5868530273438, + "learning_rate": 1.9381703470031544e-06, + "loss": 63.3828, + "step": 3072 + }, + { + "epoch": 0.02908908473036037, + "grad_norm": 592.0133056640625, + "learning_rate": 1.938801261829653e-06, + "loss": 46.6562, + "step": 3073 + }, + { + "epoch": 0.029098550752075424, + "grad_norm": 770.9590454101562, + "learning_rate": 1.9394321766561514e-06, + "loss": 30.0312, + "step": 3074 + }, + { + "epoch": 0.02910801677379048, + "grad_norm": 249.2825927734375, + "learning_rate": 1.94006309148265e-06, + "loss": 29.125, + "step": 3075 + }, + { + "epoch": 0.029117482795505533, + "grad_norm": 620.82373046875, + "learning_rate": 1.940694006309148e-06, + "loss": 67.9531, + "step": 3076 + }, + { + "epoch": 0.029126948817220586, + "grad_norm": 2.656998634338379, + "learning_rate": 1.9413249211356464e-06, + "loss": 0.7539, + "step": 3077 + }, + { + "epoch": 0.02913641483893564, + "grad_norm": 529.2372436523438, + "learning_rate": 1.941955835962145e-06, + "loss": 58.3594, + "step": 3078 + }, + { + "epoch": 0.029145880860650696, + "grad_norm": 785.6359252929688, + "learning_rate": 1.9425867507886434e-06, + "loss": 49.0547, + "step": 3079 + }, + { + "epoch": 0.02915534688236575, + "grad_norm": 211.0615997314453, + "learning_rate": 1.943217665615142e-06, + "loss": 28.0781, + "step": 3080 + }, + { + "epoch": 0.0291648129040808, + "grad_norm": 555.6659545898438, + "learning_rate": 1.9438485804416404e-06, + "loss": 30.0156, + "step": 3081 + }, + { + "epoch": 0.029174278925795855, + "grad_norm": 342.466796875, + "learning_rate": 1.944479495268139e-06, + "loss": 44.625, + "step": 3082 + }, + { + "epoch": 0.02918374494751091, + "grad_norm": 298.42620849609375, + "learning_rate": 1.945110410094637e-06, + "loss": 26.7812, + "step": 3083 + }, + { + "epoch": 0.029193210969225964, + "grad_norm": 678.5958251953125, + "learning_rate": 1.9457413249211355e-06, + "loss": 74.6094, + "step": 3084 + }, + { + "epoch": 0.029202676990941017, + "grad_norm": 535.227783203125, + "learning_rate": 1.946372239747634e-06, + "loss": 56.6562, + "step": 3085 + }, + { + "epoch": 0.02921214301265607, + "grad_norm": 723.6743774414062, + "learning_rate": 1.9470031545741324e-06, + "loss": 70.9375, + "step": 3086 + }, + { + "epoch": 0.029221609034371127, + "grad_norm": 280.4714660644531, + "learning_rate": 1.947634069400631e-06, + "loss": 27.6875, + "step": 3087 + }, + { + "epoch": 0.02923107505608618, + "grad_norm": 439.0805969238281, + "learning_rate": 1.9482649842271294e-06, + "loss": 59.1562, + "step": 3088 + }, + { + "epoch": 0.029240541077801233, + "grad_norm": 978.1036987304688, + "learning_rate": 1.948895899053628e-06, + "loss": 70.375, + "step": 3089 + }, + { + "epoch": 0.029250007099516286, + "grad_norm": 640.5734252929688, + "learning_rate": 1.949526813880126e-06, + "loss": 64.9922, + "step": 3090 + }, + { + "epoch": 0.02925947312123134, + "grad_norm": 164.64476013183594, + "learning_rate": 1.9501577287066245e-06, + "loss": 30.625, + "step": 3091 + }, + { + "epoch": 0.029268939142946395, + "grad_norm": 532.6482543945312, + "learning_rate": 1.950788643533123e-06, + "loss": 26.0391, + "step": 3092 + }, + { + "epoch": 0.029278405164661448, + "grad_norm": 310.9605712890625, + "learning_rate": 1.9514195583596215e-06, + "loss": 28.0938, + "step": 3093 + }, + { + "epoch": 0.0292878711863765, + "grad_norm": 580.2094116210938, + "learning_rate": 1.95205047318612e-06, + "loss": 33.4219, + "step": 3094 + }, + { + "epoch": 0.029297337208091554, + "grad_norm": 336.94915771484375, + "learning_rate": 1.9526813880126185e-06, + "loss": 41.0625, + "step": 3095 + }, + { + "epoch": 0.02930680322980661, + "grad_norm": 398.5977478027344, + "learning_rate": 1.9533123028391165e-06, + "loss": 51.2188, + "step": 3096 + }, + { + "epoch": 0.029316269251521664, + "grad_norm": 601.50634765625, + "learning_rate": 1.953943217665615e-06, + "loss": 59.875, + "step": 3097 + }, + { + "epoch": 0.029325735273236717, + "grad_norm": 600.732666015625, + "learning_rate": 1.9545741324921135e-06, + "loss": 38.8516, + "step": 3098 + }, + { + "epoch": 0.02933520129495177, + "grad_norm": 371.4056396484375, + "learning_rate": 1.955205047318612e-06, + "loss": 33.1562, + "step": 3099 + }, + { + "epoch": 0.029344667316666826, + "grad_norm": 214.05520629882812, + "learning_rate": 1.9558359621451105e-06, + "loss": 21.8828, + "step": 3100 + }, + { + "epoch": 0.02935413333838188, + "grad_norm": 279.1535339355469, + "learning_rate": 1.956466876971609e-06, + "loss": 29.5156, + "step": 3101 + }, + { + "epoch": 0.029363599360096932, + "grad_norm": 578.9552612304688, + "learning_rate": 1.957097791798107e-06, + "loss": 65.875, + "step": 3102 + }, + { + "epoch": 0.029373065381811985, + "grad_norm": 788.7144165039062, + "learning_rate": 1.9577287066246055e-06, + "loss": 31.9609, + "step": 3103 + }, + { + "epoch": 0.029382531403527038, + "grad_norm": 401.9947204589844, + "learning_rate": 1.958359621451104e-06, + "loss": 29.25, + "step": 3104 + }, + { + "epoch": 0.029391997425242095, + "grad_norm": 2.8628127574920654, + "learning_rate": 1.9589905362776025e-06, + "loss": 0.8906, + "step": 3105 + }, + { + "epoch": 0.029401463446957148, + "grad_norm": 599.5540161132812, + "learning_rate": 1.959621451104101e-06, + "loss": 29.2734, + "step": 3106 + }, + { + "epoch": 0.0294109294686722, + "grad_norm": 581.6146240234375, + "learning_rate": 1.960252365930599e-06, + "loss": 43.6406, + "step": 3107 + }, + { + "epoch": 0.029420395490387254, + "grad_norm": 577.175537109375, + "learning_rate": 1.9608832807570976e-06, + "loss": 45.8281, + "step": 3108 + }, + { + "epoch": 0.02942986151210231, + "grad_norm": 510.3935852050781, + "learning_rate": 1.961514195583596e-06, + "loss": 35.6562, + "step": 3109 + }, + { + "epoch": 0.029439327533817363, + "grad_norm": 477.02215576171875, + "learning_rate": 1.9621451104100946e-06, + "loss": 27.6094, + "step": 3110 + }, + { + "epoch": 0.029448793555532416, + "grad_norm": 3.247584342956543, + "learning_rate": 1.962776025236593e-06, + "loss": 0.8472, + "step": 3111 + }, + { + "epoch": 0.02945825957724747, + "grad_norm": 354.92413330078125, + "learning_rate": 1.9634069400630916e-06, + "loss": 30.7891, + "step": 3112 + }, + { + "epoch": 0.029467725598962526, + "grad_norm": 1059.577392578125, + "learning_rate": 1.9640378548895896e-06, + "loss": 66.8906, + "step": 3113 + }, + { + "epoch": 0.02947719162067758, + "grad_norm": 556.3794555664062, + "learning_rate": 1.964668769716088e-06, + "loss": 25.6562, + "step": 3114 + }, + { + "epoch": 0.02948665764239263, + "grad_norm": 199.82968139648438, + "learning_rate": 1.9652996845425866e-06, + "loss": 28.6406, + "step": 3115 + }, + { + "epoch": 0.029496123664107685, + "grad_norm": 712.4205932617188, + "learning_rate": 1.965930599369085e-06, + "loss": 42.6094, + "step": 3116 + }, + { + "epoch": 0.029505589685822738, + "grad_norm": 472.3949279785156, + "learning_rate": 1.9665615141955836e-06, + "loss": 39.2031, + "step": 3117 + }, + { + "epoch": 0.029515055707537794, + "grad_norm": 273.7834167480469, + "learning_rate": 1.9671924290220817e-06, + "loss": 31.2969, + "step": 3118 + }, + { + "epoch": 0.029524521729252847, + "grad_norm": 2.4284160137176514, + "learning_rate": 1.96782334384858e-06, + "loss": 0.7749, + "step": 3119 + }, + { + "epoch": 0.0295339877509679, + "grad_norm": 490.3192138671875, + "learning_rate": 1.9684542586750787e-06, + "loss": 37.4688, + "step": 3120 + }, + { + "epoch": 0.029543453772682953, + "grad_norm": 901.8837890625, + "learning_rate": 1.969085173501577e-06, + "loss": 54.75, + "step": 3121 + }, + { + "epoch": 0.02955291979439801, + "grad_norm": 598.9456176757812, + "learning_rate": 1.9697160883280756e-06, + "loss": 53.375, + "step": 3122 + }, + { + "epoch": 0.029562385816113063, + "grad_norm": 237.89077758789062, + "learning_rate": 1.970347003154574e-06, + "loss": 29.6406, + "step": 3123 + }, + { + "epoch": 0.029571851837828116, + "grad_norm": 1173.3260498046875, + "learning_rate": 1.970977917981072e-06, + "loss": 65.2891, + "step": 3124 + }, + { + "epoch": 0.02958131785954317, + "grad_norm": 488.6764221191406, + "learning_rate": 1.9716088328075707e-06, + "loss": 37.7031, + "step": 3125 + }, + { + "epoch": 0.029590783881258225, + "grad_norm": 2.9530367851257324, + "learning_rate": 1.972239747634069e-06, + "loss": 0.9192, + "step": 3126 + }, + { + "epoch": 0.029600249902973278, + "grad_norm": 543.4263305664062, + "learning_rate": 1.9728706624605677e-06, + "loss": 43.1719, + "step": 3127 + }, + { + "epoch": 0.02960971592468833, + "grad_norm": 899.5927734375, + "learning_rate": 1.973501577287066e-06, + "loss": 41.7031, + "step": 3128 + }, + { + "epoch": 0.029619181946403384, + "grad_norm": 284.1377868652344, + "learning_rate": 1.9741324921135647e-06, + "loss": 28.625, + "step": 3129 + }, + { + "epoch": 0.02962864796811844, + "grad_norm": 577.85205078125, + "learning_rate": 1.9747634069400627e-06, + "loss": 44.4531, + "step": 3130 + }, + { + "epoch": 0.029638113989833494, + "grad_norm": 891.6448974609375, + "learning_rate": 1.9753943217665612e-06, + "loss": 38.6719, + "step": 3131 + }, + { + "epoch": 0.029647580011548547, + "grad_norm": 616.3634643554688, + "learning_rate": 1.9760252365930597e-06, + "loss": 36.3906, + "step": 3132 + }, + { + "epoch": 0.0296570460332636, + "grad_norm": 500.46337890625, + "learning_rate": 1.9766561514195582e-06, + "loss": 26.4844, + "step": 3133 + }, + { + "epoch": 0.029666512054978653, + "grad_norm": 853.7125244140625, + "learning_rate": 1.9772870662460567e-06, + "loss": 51.3281, + "step": 3134 + }, + { + "epoch": 0.02967597807669371, + "grad_norm": 1225.314697265625, + "learning_rate": 1.977917981072555e-06, + "loss": 58.2422, + "step": 3135 + }, + { + "epoch": 0.029685444098408762, + "grad_norm": 1324.1092529296875, + "learning_rate": 1.9785488958990537e-06, + "loss": 47.8906, + "step": 3136 + }, + { + "epoch": 0.029694910120123815, + "grad_norm": 546.4848022460938, + "learning_rate": 1.9791798107255518e-06, + "loss": 37.3594, + "step": 3137 + }, + { + "epoch": 0.029704376141838868, + "grad_norm": 501.7084655761719, + "learning_rate": 1.9798107255520503e-06, + "loss": 41.125, + "step": 3138 + }, + { + "epoch": 0.029713842163553925, + "grad_norm": 308.98101806640625, + "learning_rate": 1.9804416403785487e-06, + "loss": 26.2344, + "step": 3139 + }, + { + "epoch": 0.029723308185268978, + "grad_norm": 244.4647979736328, + "learning_rate": 1.9810725552050472e-06, + "loss": 26.9688, + "step": 3140 + }, + { + "epoch": 0.02973277420698403, + "grad_norm": 405.2528991699219, + "learning_rate": 1.9817034700315457e-06, + "loss": 52.8906, + "step": 3141 + }, + { + "epoch": 0.029742240228699084, + "grad_norm": 698.5236206054688, + "learning_rate": 1.9823343848580442e-06, + "loss": 58.75, + "step": 3142 + }, + { + "epoch": 0.02975170625041414, + "grad_norm": 480.1764831542969, + "learning_rate": 1.9829652996845427e-06, + "loss": 57.3281, + "step": 3143 + }, + { + "epoch": 0.029761172272129193, + "grad_norm": 632.9220581054688, + "learning_rate": 1.983596214511041e-06, + "loss": 26.5, + "step": 3144 + }, + { + "epoch": 0.029770638293844246, + "grad_norm": 488.35662841796875, + "learning_rate": 1.9842271293375393e-06, + "loss": 48.6875, + "step": 3145 + }, + { + "epoch": 0.0297801043155593, + "grad_norm": 399.93798828125, + "learning_rate": 1.9848580441640378e-06, + "loss": 34.8594, + "step": 3146 + }, + { + "epoch": 0.029789570337274352, + "grad_norm": 244.40890502929688, + "learning_rate": 1.9854889589905363e-06, + "loss": 26.4062, + "step": 3147 + }, + { + "epoch": 0.02979903635898941, + "grad_norm": 454.66461181640625, + "learning_rate": 1.9861198738170348e-06, + "loss": 35.5156, + "step": 3148 + }, + { + "epoch": 0.02980850238070446, + "grad_norm": 1342.02099609375, + "learning_rate": 1.9867507886435333e-06, + "loss": 51.4062, + "step": 3149 + }, + { + "epoch": 0.029817968402419515, + "grad_norm": 2.7180888652801514, + "learning_rate": 1.9873817034700313e-06, + "loss": 0.7175, + "step": 3150 + }, + { + "epoch": 0.029827434424134568, + "grad_norm": 261.70416259765625, + "learning_rate": 1.98801261829653e-06, + "loss": 28.8281, + "step": 3151 + }, + { + "epoch": 0.029836900445849624, + "grad_norm": 1050.95947265625, + "learning_rate": 1.9886435331230283e-06, + "loss": 54.2812, + "step": 3152 + }, + { + "epoch": 0.029846366467564677, + "grad_norm": 641.332763671875, + "learning_rate": 1.989274447949527e-06, + "loss": 44.4844, + "step": 3153 + }, + { + "epoch": 0.02985583248927973, + "grad_norm": 632.338623046875, + "learning_rate": 1.9899053627760253e-06, + "loss": 59.6719, + "step": 3154 + }, + { + "epoch": 0.029865298510994783, + "grad_norm": 956.9154663085938, + "learning_rate": 1.9905362776025238e-06, + "loss": 110.4531, + "step": 3155 + }, + { + "epoch": 0.02987476453270984, + "grad_norm": 241.5674591064453, + "learning_rate": 1.9911671924290223e-06, + "loss": 30.6562, + "step": 3156 + }, + { + "epoch": 0.029884230554424893, + "grad_norm": 313.2276611328125, + "learning_rate": 1.9917981072555203e-06, + "loss": 27.8438, + "step": 3157 + }, + { + "epoch": 0.029893696576139946, + "grad_norm": 250.06887817382812, + "learning_rate": 1.992429022082019e-06, + "loss": 27.5156, + "step": 3158 + }, + { + "epoch": 0.029903162597855, + "grad_norm": 281.59246826171875, + "learning_rate": 1.9930599369085173e-06, + "loss": 31.9531, + "step": 3159 + }, + { + "epoch": 0.02991262861957005, + "grad_norm": 412.3233947753906, + "learning_rate": 1.993690851735016e-06, + "loss": 29.1406, + "step": 3160 + }, + { + "epoch": 0.029922094641285108, + "grad_norm": 650.00830078125, + "learning_rate": 1.9943217665615143e-06, + "loss": 34.5781, + "step": 3161 + }, + { + "epoch": 0.02993156066300016, + "grad_norm": 458.2017517089844, + "learning_rate": 1.9949526813880124e-06, + "loss": 27.875, + "step": 3162 + }, + { + "epoch": 0.029941026684715214, + "grad_norm": 172.95606994628906, + "learning_rate": 1.995583596214511e-06, + "loss": 26.3438, + "step": 3163 + }, + { + "epoch": 0.029950492706430267, + "grad_norm": 293.0791015625, + "learning_rate": 1.9962145110410094e-06, + "loss": 39.7188, + "step": 3164 + }, + { + "epoch": 0.029959958728145324, + "grad_norm": 502.2494812011719, + "learning_rate": 1.996845425867508e-06, + "loss": 32.7344, + "step": 3165 + }, + { + "epoch": 0.029969424749860377, + "grad_norm": 223.95590209960938, + "learning_rate": 1.9974763406940064e-06, + "loss": 31.3906, + "step": 3166 + }, + { + "epoch": 0.02997889077157543, + "grad_norm": 786.6055908203125, + "learning_rate": 1.998107255520505e-06, + "loss": 40.7266, + "step": 3167 + }, + { + "epoch": 0.029988356793290483, + "grad_norm": 645.2817993164062, + "learning_rate": 1.998738170347003e-06, + "loss": 35.9688, + "step": 3168 + }, + { + "epoch": 0.02999782281500554, + "grad_norm": 788.1417236328125, + "learning_rate": 1.9993690851735014e-06, + "loss": 57.3438, + "step": 3169 + }, + { + "epoch": 0.030007288836720592, + "grad_norm": 392.1400146484375, + "learning_rate": 2e-06, + "loss": 32.0156, + "step": 3170 + }, + { + "epoch": 0.030016754858435645, + "grad_norm": 486.1142883300781, + "learning_rate": 1.999999999530032e-06, + "loss": 39.6719, + "step": 3171 + }, + { + "epoch": 0.030026220880150698, + "grad_norm": 746.9313354492188, + "learning_rate": 1.9999999981201297e-06, + "loss": 58.25, + "step": 3172 + }, + { + "epoch": 0.03003568690186575, + "grad_norm": 569.4801635742188, + "learning_rate": 1.9999999957702926e-06, + "loss": 37.5312, + "step": 3173 + }, + { + "epoch": 0.030045152923580808, + "grad_norm": 235.57142639160156, + "learning_rate": 1.9999999924805204e-06, + "loss": 27.0312, + "step": 3174 + }, + { + "epoch": 0.03005461894529586, + "grad_norm": 279.6739196777344, + "learning_rate": 1.999999988250813e-06, + "loss": 32.0781, + "step": 3175 + }, + { + "epoch": 0.030064084967010914, + "grad_norm": 442.7533874511719, + "learning_rate": 1.9999999830811707e-06, + "loss": 51.0938, + "step": 3176 + }, + { + "epoch": 0.030073550988725967, + "grad_norm": 421.93572998046875, + "learning_rate": 1.999999976971594e-06, + "loss": 28.2031, + "step": 3177 + }, + { + "epoch": 0.030083017010441023, + "grad_norm": 541.4947509765625, + "learning_rate": 1.999999969922082e-06, + "loss": 54.1719, + "step": 3178 + }, + { + "epoch": 0.030092483032156076, + "grad_norm": 448.1448974609375, + "learning_rate": 1.999999961932635e-06, + "loss": 29.2188, + "step": 3179 + }, + { + "epoch": 0.03010194905387113, + "grad_norm": 789.3575439453125, + "learning_rate": 1.999999953003253e-06, + "loss": 30.3125, + "step": 3180 + }, + { + "epoch": 0.030111415075586182, + "grad_norm": 502.7801818847656, + "learning_rate": 1.9999999431339365e-06, + "loss": 35.125, + "step": 3181 + }, + { + "epoch": 0.03012088109730124, + "grad_norm": 696.0413208007812, + "learning_rate": 1.9999999323246848e-06, + "loss": 63.7422, + "step": 3182 + }, + { + "epoch": 0.03013034711901629, + "grad_norm": 446.79144287109375, + "learning_rate": 1.999999920575498e-06, + "loss": 50.5781, + "step": 3183 + }, + { + "epoch": 0.030139813140731345, + "grad_norm": 3369.864501953125, + "learning_rate": 1.9999999078863765e-06, + "loss": 31.4688, + "step": 3184 + }, + { + "epoch": 0.030149279162446398, + "grad_norm": 694.4679565429688, + "learning_rate": 1.9999998942573203e-06, + "loss": 55.3672, + "step": 3185 + }, + { + "epoch": 0.030158745184161454, + "grad_norm": 998.7286376953125, + "learning_rate": 1.9999998796883295e-06, + "loss": 32.9453, + "step": 3186 + }, + { + "epoch": 0.030168211205876507, + "grad_norm": 182.1781005859375, + "learning_rate": 1.9999998641794035e-06, + "loss": 27.1719, + "step": 3187 + }, + { + "epoch": 0.03017767722759156, + "grad_norm": 513.7490844726562, + "learning_rate": 1.9999998477305425e-06, + "loss": 31.9688, + "step": 3188 + }, + { + "epoch": 0.030187143249306613, + "grad_norm": 455.4222412109375, + "learning_rate": 1.9999998303417472e-06, + "loss": 45.0, + "step": 3189 + }, + { + "epoch": 0.030196609271021666, + "grad_norm": 908.7887573242188, + "learning_rate": 1.9999998120130164e-06, + "loss": 66.8281, + "step": 3190 + }, + { + "epoch": 0.030206075292736723, + "grad_norm": 875.125732421875, + "learning_rate": 1.9999997927443514e-06, + "loss": 37.9531, + "step": 3191 + }, + { + "epoch": 0.030215541314451776, + "grad_norm": 269.120361328125, + "learning_rate": 1.9999997725357517e-06, + "loss": 24.4062, + "step": 3192 + }, + { + "epoch": 0.03022500733616683, + "grad_norm": 331.8780517578125, + "learning_rate": 1.9999997513872173e-06, + "loss": 31.7188, + "step": 3193 + }, + { + "epoch": 0.03023447335788188, + "grad_norm": 209.45777893066406, + "learning_rate": 1.9999997292987478e-06, + "loss": 27.7969, + "step": 3194 + }, + { + "epoch": 0.030243939379596938, + "grad_norm": 405.7959899902344, + "learning_rate": 1.9999997062703436e-06, + "loss": 54.5938, + "step": 3195 + }, + { + "epoch": 0.03025340540131199, + "grad_norm": 655.9832763671875, + "learning_rate": 1.999999682302005e-06, + "loss": 45.75, + "step": 3196 + }, + { + "epoch": 0.030262871423027044, + "grad_norm": 683.6698608398438, + "learning_rate": 1.9999996573937317e-06, + "loss": 35.8438, + "step": 3197 + }, + { + "epoch": 0.030272337444742097, + "grad_norm": 312.967041015625, + "learning_rate": 1.9999996315455235e-06, + "loss": 26.875, + "step": 3198 + }, + { + "epoch": 0.030281803466457154, + "grad_norm": 204.2545166015625, + "learning_rate": 1.9999996047573815e-06, + "loss": 27.625, + "step": 3199 + }, + { + "epoch": 0.030291269488172207, + "grad_norm": 3.7489678859710693, + "learning_rate": 1.999999577029304e-06, + "loss": 0.9409, + "step": 3200 + }, + { + "epoch": 0.03030073550988726, + "grad_norm": 642.7227172851562, + "learning_rate": 1.9999995483612926e-06, + "loss": 38.2969, + "step": 3201 + }, + { + "epoch": 0.030310201531602313, + "grad_norm": 323.2586364746094, + "learning_rate": 1.999999518753346e-06, + "loss": 31.9531, + "step": 3202 + }, + { + "epoch": 0.030319667553317366, + "grad_norm": 257.43353271484375, + "learning_rate": 1.999999488205466e-06, + "loss": 24.5312, + "step": 3203 + }, + { + "epoch": 0.030329133575032422, + "grad_norm": 332.5235290527344, + "learning_rate": 1.9999994567176504e-06, + "loss": 28.2656, + "step": 3204 + }, + { + "epoch": 0.030338599596747475, + "grad_norm": 525.5113525390625, + "learning_rate": 1.9999994242899008e-06, + "loss": 46.125, + "step": 3205 + }, + { + "epoch": 0.030348065618462528, + "grad_norm": 487.46136474609375, + "learning_rate": 1.999999390922217e-06, + "loss": 69.0625, + "step": 3206 + }, + { + "epoch": 0.03035753164017758, + "grad_norm": 1184.462158203125, + "learning_rate": 1.9999993566145983e-06, + "loss": 44.875, + "step": 3207 + }, + { + "epoch": 0.030366997661892638, + "grad_norm": 803.6336669921875, + "learning_rate": 1.999999321367046e-06, + "loss": 37.25, + "step": 3208 + }, + { + "epoch": 0.03037646368360769, + "grad_norm": 1426.6248779296875, + "learning_rate": 1.999999285179559e-06, + "loss": 36.4219, + "step": 3209 + }, + { + "epoch": 0.030385929705322744, + "grad_norm": 252.62213134765625, + "learning_rate": 1.9999992480521376e-06, + "loss": 39.6875, + "step": 3210 + }, + { + "epoch": 0.030395395727037797, + "grad_norm": 607.0608520507812, + "learning_rate": 1.999999209984782e-06, + "loss": 63.75, + "step": 3211 + }, + { + "epoch": 0.030404861748752853, + "grad_norm": 349.3918151855469, + "learning_rate": 1.999999170977492e-06, + "loss": 29.6875, + "step": 3212 + }, + { + "epoch": 0.030414327770467906, + "grad_norm": 739.0469360351562, + "learning_rate": 1.9999991310302686e-06, + "loss": 57.25, + "step": 3213 + }, + { + "epoch": 0.03042379379218296, + "grad_norm": 3.6623353958129883, + "learning_rate": 1.9999990901431107e-06, + "loss": 0.9355, + "step": 3214 + }, + { + "epoch": 0.030433259813898012, + "grad_norm": 340.71173095703125, + "learning_rate": 1.999999048316018e-06, + "loss": 42.8125, + "step": 3215 + }, + { + "epoch": 0.030442725835613065, + "grad_norm": 1003.0576782226562, + "learning_rate": 1.999999005548992e-06, + "loss": 67.3828, + "step": 3216 + }, + { + "epoch": 0.03045219185732812, + "grad_norm": 229.98934936523438, + "learning_rate": 1.999998961842032e-06, + "loss": 31.4688, + "step": 3217 + }, + { + "epoch": 0.030461657879043175, + "grad_norm": 450.3116149902344, + "learning_rate": 1.999998917195138e-06, + "loss": 47.75, + "step": 3218 + }, + { + "epoch": 0.030471123900758228, + "grad_norm": 529.7185668945312, + "learning_rate": 1.9999988716083096e-06, + "loss": 31.0, + "step": 3219 + }, + { + "epoch": 0.03048058992247328, + "grad_norm": 523.8282470703125, + "learning_rate": 1.999998825081548e-06, + "loss": 60.5781, + "step": 3220 + }, + { + "epoch": 0.030490055944188337, + "grad_norm": 380.2268981933594, + "learning_rate": 1.9999987776148523e-06, + "loss": 26.7656, + "step": 3221 + }, + { + "epoch": 0.03049952196590339, + "grad_norm": 647.36767578125, + "learning_rate": 1.999998729208222e-06, + "loss": 38.8594, + "step": 3222 + }, + { + "epoch": 0.030508987987618443, + "grad_norm": 432.58770751953125, + "learning_rate": 1.999998679861659e-06, + "loss": 30.0312, + "step": 3223 + }, + { + "epoch": 0.030518454009333496, + "grad_norm": 485.7008361816406, + "learning_rate": 1.999998629575162e-06, + "loss": 26.9219, + "step": 3224 + }, + { + "epoch": 0.030527920031048553, + "grad_norm": 537.4630737304688, + "learning_rate": 1.999998578348731e-06, + "loss": 42.0312, + "step": 3225 + }, + { + "epoch": 0.030537386052763606, + "grad_norm": 813.52978515625, + "learning_rate": 1.999998526182367e-06, + "loss": 61.5938, + "step": 3226 + }, + { + "epoch": 0.03054685207447866, + "grad_norm": 3.3232784271240234, + "learning_rate": 1.9999984730760693e-06, + "loss": 0.8687, + "step": 3227 + }, + { + "epoch": 0.03055631809619371, + "grad_norm": 307.9361877441406, + "learning_rate": 1.9999984190298374e-06, + "loss": 25.6406, + "step": 3228 + }, + { + "epoch": 0.030565784117908768, + "grad_norm": 431.9600524902344, + "learning_rate": 1.999998364043673e-06, + "loss": 36.0312, + "step": 3229 + }, + { + "epoch": 0.03057525013962382, + "grad_norm": 214.1068878173828, + "learning_rate": 1.9999983081175747e-06, + "loss": 28.7188, + "step": 3230 + }, + { + "epoch": 0.030584716161338874, + "grad_norm": 478.6322326660156, + "learning_rate": 1.999998251251543e-06, + "loss": 31.9531, + "step": 3231 + }, + { + "epoch": 0.030594182183053927, + "grad_norm": 517.6741333007812, + "learning_rate": 1.999998193445578e-06, + "loss": 54.8125, + "step": 3232 + }, + { + "epoch": 0.03060364820476898, + "grad_norm": 840.2664794921875, + "learning_rate": 1.99999813469968e-06, + "loss": 63.5625, + "step": 3233 + }, + { + "epoch": 0.030613114226484037, + "grad_norm": 407.71148681640625, + "learning_rate": 1.9999980750138484e-06, + "loss": 28.1875, + "step": 3234 + }, + { + "epoch": 0.03062258024819909, + "grad_norm": 1142.6806640625, + "learning_rate": 1.999998014388084e-06, + "loss": 72.9453, + "step": 3235 + }, + { + "epoch": 0.030632046269914143, + "grad_norm": 384.70233154296875, + "learning_rate": 1.9999979528223865e-06, + "loss": 52.0781, + "step": 3236 + }, + { + "epoch": 0.030641512291629196, + "grad_norm": 435.7012634277344, + "learning_rate": 1.9999978903167557e-06, + "loss": 24.6641, + "step": 3237 + }, + { + "epoch": 0.030650978313344252, + "grad_norm": 437.2834167480469, + "learning_rate": 1.9999978268711923e-06, + "loss": 33.4531, + "step": 3238 + }, + { + "epoch": 0.030660444335059305, + "grad_norm": 639.8961791992188, + "learning_rate": 1.999997762485696e-06, + "loss": 30.1562, + "step": 3239 + }, + { + "epoch": 0.030669910356774358, + "grad_norm": 449.9952087402344, + "learning_rate": 1.9999976971602666e-06, + "loss": 48.8281, + "step": 3240 + }, + { + "epoch": 0.03067937637848941, + "grad_norm": 205.69735717773438, + "learning_rate": 1.9999976308949047e-06, + "loss": 24.4844, + "step": 3241 + }, + { + "epoch": 0.030688842400204468, + "grad_norm": 281.23614501953125, + "learning_rate": 1.99999756368961e-06, + "loss": 35.9844, + "step": 3242 + }, + { + "epoch": 0.03069830842191952, + "grad_norm": 245.3309783935547, + "learning_rate": 1.9999974955443824e-06, + "loss": 28.4062, + "step": 3243 + }, + { + "epoch": 0.030707774443634574, + "grad_norm": 453.17840576171875, + "learning_rate": 1.9999974264592224e-06, + "loss": 52.5625, + "step": 3244 + }, + { + "epoch": 0.030717240465349627, + "grad_norm": 248.95689392089844, + "learning_rate": 1.99999735643413e-06, + "loss": 23.6094, + "step": 3245 + }, + { + "epoch": 0.03072670648706468, + "grad_norm": 190.487060546875, + "learning_rate": 1.9999972854691048e-06, + "loss": 27.7812, + "step": 3246 + }, + { + "epoch": 0.030736172508779736, + "grad_norm": 3.1487772464752197, + "learning_rate": 1.999997213564147e-06, + "loss": 0.9062, + "step": 3247 + }, + { + "epoch": 0.03074563853049479, + "grad_norm": 359.7401123046875, + "learning_rate": 1.9999971407192573e-06, + "loss": 24.375, + "step": 3248 + }, + { + "epoch": 0.030755104552209842, + "grad_norm": 169.2506866455078, + "learning_rate": 1.9999970669344354e-06, + "loss": 27.2031, + "step": 3249 + }, + { + "epoch": 0.030764570573924895, + "grad_norm": 623.527099609375, + "learning_rate": 1.9999969922096813e-06, + "loss": 60.375, + "step": 3250 + }, + { + "epoch": 0.03077403659563995, + "grad_norm": 1009.948486328125, + "learning_rate": 1.9999969165449947e-06, + "loss": 83.3125, + "step": 3251 + }, + { + "epoch": 0.030783502617355005, + "grad_norm": 623.0900268554688, + "learning_rate": 1.999996839940377e-06, + "loss": 51.1562, + "step": 3252 + }, + { + "epoch": 0.030792968639070058, + "grad_norm": 2.6727352142333984, + "learning_rate": 1.9999967623958264e-06, + "loss": 0.9238, + "step": 3253 + }, + { + "epoch": 0.03080243466078511, + "grad_norm": 721.450439453125, + "learning_rate": 1.999996683911344e-06, + "loss": 40.4688, + "step": 3254 + }, + { + "epoch": 0.030811900682500167, + "grad_norm": 2.913428783416748, + "learning_rate": 1.99999660448693e-06, + "loss": 0.854, + "step": 3255 + }, + { + "epoch": 0.03082136670421522, + "grad_norm": 1240.0350341796875, + "learning_rate": 1.9999965241225843e-06, + "loss": 46.7812, + "step": 3256 + }, + { + "epoch": 0.030830832725930273, + "grad_norm": 896.8504638671875, + "learning_rate": 1.9999964428183066e-06, + "loss": 59.8125, + "step": 3257 + }, + { + "epoch": 0.030840298747645326, + "grad_norm": 219.51719665527344, + "learning_rate": 1.9999963605740976e-06, + "loss": 29.2656, + "step": 3258 + }, + { + "epoch": 0.03084976476936038, + "grad_norm": 500.43133544921875, + "learning_rate": 1.999996277389957e-06, + "loss": 28.9922, + "step": 3259 + }, + { + "epoch": 0.030859230791075436, + "grad_norm": 537.9011840820312, + "learning_rate": 1.999996193265885e-06, + "loss": 54.4531, + "step": 3260 + }, + { + "epoch": 0.03086869681279049, + "grad_norm": 184.24314880371094, + "learning_rate": 1.9999961082018816e-06, + "loss": 26.4531, + "step": 3261 + }, + { + "epoch": 0.03087816283450554, + "grad_norm": 273.1463317871094, + "learning_rate": 1.9999960221979466e-06, + "loss": 33.4531, + "step": 3262 + }, + { + "epoch": 0.030887628856220595, + "grad_norm": 652.9874877929688, + "learning_rate": 1.9999959352540808e-06, + "loss": 74.25, + "step": 3263 + }, + { + "epoch": 0.03089709487793565, + "grad_norm": 987.5709228515625, + "learning_rate": 1.9999958473702836e-06, + "loss": 42.7344, + "step": 3264 + }, + { + "epoch": 0.030906560899650704, + "grad_norm": 492.3360900878906, + "learning_rate": 1.9999957585465556e-06, + "loss": 54.1562, + "step": 3265 + }, + { + "epoch": 0.030916026921365757, + "grad_norm": 1432.8507080078125, + "learning_rate": 1.9999956687828963e-06, + "loss": 27.5469, + "step": 3266 + }, + { + "epoch": 0.03092549294308081, + "grad_norm": 1779.567138671875, + "learning_rate": 1.9999955780793066e-06, + "loss": 36.25, + "step": 3267 + }, + { + "epoch": 0.030934958964795867, + "grad_norm": 905.7508544921875, + "learning_rate": 1.9999954864357856e-06, + "loss": 26.3125, + "step": 3268 + }, + { + "epoch": 0.03094442498651092, + "grad_norm": 330.67169189453125, + "learning_rate": 1.9999953938523346e-06, + "loss": 41.1406, + "step": 3269 + }, + { + "epoch": 0.030953891008225973, + "grad_norm": 620.3714599609375, + "learning_rate": 1.9999953003289522e-06, + "loss": 70.5781, + "step": 3270 + }, + { + "epoch": 0.030963357029941026, + "grad_norm": 459.8768615722656, + "learning_rate": 1.99999520586564e-06, + "loss": 28.7578, + "step": 3271 + }, + { + "epoch": 0.030972823051656082, + "grad_norm": 312.7322998046875, + "learning_rate": 1.9999951104623967e-06, + "loss": 28.1094, + "step": 3272 + }, + { + "epoch": 0.030982289073371135, + "grad_norm": 251.69932556152344, + "learning_rate": 1.999995014119223e-06, + "loss": 27.7031, + "step": 3273 + }, + { + "epoch": 0.030991755095086188, + "grad_norm": 670.822509765625, + "learning_rate": 1.9999949168361195e-06, + "loss": 52.4219, + "step": 3274 + }, + { + "epoch": 0.03100122111680124, + "grad_norm": 512.6666259765625, + "learning_rate": 1.999994818613086e-06, + "loss": 45.1562, + "step": 3275 + }, + { + "epoch": 0.031010687138516294, + "grad_norm": 291.78076171875, + "learning_rate": 1.999994719450122e-06, + "loss": 23.9375, + "step": 3276 + }, + { + "epoch": 0.03102015316023135, + "grad_norm": 406.6192321777344, + "learning_rate": 1.999994619347228e-06, + "loss": 39.5781, + "step": 3277 + }, + { + "epoch": 0.031029619181946404, + "grad_norm": 327.3577880859375, + "learning_rate": 1.9999945183044043e-06, + "loss": 26.7812, + "step": 3278 + }, + { + "epoch": 0.031039085203661457, + "grad_norm": 256.3595886230469, + "learning_rate": 1.999994416321651e-06, + "loss": 27.4062, + "step": 3279 + }, + { + "epoch": 0.03104855122537651, + "grad_norm": 556.26220703125, + "learning_rate": 1.9999943133989677e-06, + "loss": 38.9844, + "step": 3280 + }, + { + "epoch": 0.031058017247091566, + "grad_norm": 293.7099304199219, + "learning_rate": 1.9999942095363548e-06, + "loss": 28.6719, + "step": 3281 + }, + { + "epoch": 0.03106748326880662, + "grad_norm": 356.8780822753906, + "learning_rate": 1.9999941047338123e-06, + "loss": 36.7344, + "step": 3282 + }, + { + "epoch": 0.031076949290521672, + "grad_norm": 186.85328674316406, + "learning_rate": 1.9999939989913406e-06, + "loss": 28.9688, + "step": 3283 + }, + { + "epoch": 0.031086415312236725, + "grad_norm": 768.3455200195312, + "learning_rate": 1.99999389230894e-06, + "loss": 51.9922, + "step": 3284 + }, + { + "epoch": 0.03109588133395178, + "grad_norm": 2.9011898040771484, + "learning_rate": 1.9999937846866094e-06, + "loss": 0.8628, + "step": 3285 + }, + { + "epoch": 0.031105347355666835, + "grad_norm": 928.253662109375, + "learning_rate": 1.99999367612435e-06, + "loss": 40.8672, + "step": 3286 + }, + { + "epoch": 0.031114813377381888, + "grad_norm": 709.6005249023438, + "learning_rate": 1.999993566622162e-06, + "loss": 49.4844, + "step": 3287 + }, + { + "epoch": 0.03112427939909694, + "grad_norm": 417.0143737792969, + "learning_rate": 1.9999934561800444e-06, + "loss": 51.5469, + "step": 3288 + }, + { + "epoch": 0.031133745420811994, + "grad_norm": 385.7736511230469, + "learning_rate": 1.9999933447979985e-06, + "loss": 25.4141, + "step": 3289 + }, + { + "epoch": 0.03114321144252705, + "grad_norm": 233.8036651611328, + "learning_rate": 1.9999932324760235e-06, + "loss": 28.125, + "step": 3290 + }, + { + "epoch": 0.031152677464242103, + "grad_norm": 524.3990478515625, + "learning_rate": 1.9999931192141202e-06, + "loss": 23.707, + "step": 3291 + }, + { + "epoch": 0.031162143485957156, + "grad_norm": 314.5956115722656, + "learning_rate": 1.9999930050122886e-06, + "loss": 35.3516, + "step": 3292 + }, + { + "epoch": 0.03117160950767221, + "grad_norm": 1412.33251953125, + "learning_rate": 1.9999928898705286e-06, + "loss": 53.4531, + "step": 3293 + }, + { + "epoch": 0.031181075529387266, + "grad_norm": 363.5382080078125, + "learning_rate": 1.99999277378884e-06, + "loss": 25.4375, + "step": 3294 + }, + { + "epoch": 0.03119054155110232, + "grad_norm": 517.1614379882812, + "learning_rate": 1.9999926567672233e-06, + "loss": 83.625, + "step": 3295 + }, + { + "epoch": 0.031200007572817372, + "grad_norm": 676.931396484375, + "learning_rate": 1.999992538805679e-06, + "loss": 41.2422, + "step": 3296 + }, + { + "epoch": 0.031209473594532425, + "grad_norm": 289.19940185546875, + "learning_rate": 1.999992419904206e-06, + "loss": 25.5547, + "step": 3297 + }, + { + "epoch": 0.03121893961624748, + "grad_norm": 716.4273071289062, + "learning_rate": 1.9999923000628057e-06, + "loss": 49.5156, + "step": 3298 + }, + { + "epoch": 0.031228405637962534, + "grad_norm": 251.3394012451172, + "learning_rate": 1.999992179281478e-06, + "loss": 32.2031, + "step": 3299 + }, + { + "epoch": 0.031237871659677587, + "grad_norm": 612.5974731445312, + "learning_rate": 1.999992057560222e-06, + "loss": 39.5625, + "step": 3300 + }, + { + "epoch": 0.03124733768139264, + "grad_norm": 194.70318603515625, + "learning_rate": 1.9999919348990387e-06, + "loss": 28.5156, + "step": 3301 + }, + { + "epoch": 0.0312568037031077, + "grad_norm": 314.84722900390625, + "learning_rate": 1.9999918112979285e-06, + "loss": 39.9375, + "step": 3302 + }, + { + "epoch": 0.03126626972482275, + "grad_norm": 277.33990478515625, + "learning_rate": 1.9999916867568905e-06, + "loss": 26.1328, + "step": 3303 + }, + { + "epoch": 0.0312757357465378, + "grad_norm": 389.2458801269531, + "learning_rate": 1.9999915612759257e-06, + "loss": 25.7266, + "step": 3304 + }, + { + "epoch": 0.031285201768252856, + "grad_norm": 604.673828125, + "learning_rate": 1.9999914348550336e-06, + "loss": 32.6484, + "step": 3305 + }, + { + "epoch": 0.03129466778996791, + "grad_norm": 457.7138366699219, + "learning_rate": 1.999991307494215e-06, + "loss": 34.4453, + "step": 3306 + }, + { + "epoch": 0.03130413381168296, + "grad_norm": 426.0896911621094, + "learning_rate": 1.9999911791934693e-06, + "loss": 66.5781, + "step": 3307 + }, + { + "epoch": 0.031313599833398015, + "grad_norm": 365.23931884765625, + "learning_rate": 1.999991049952797e-06, + "loss": 33.7969, + "step": 3308 + }, + { + "epoch": 0.031323065855113075, + "grad_norm": 600.200439453125, + "learning_rate": 1.999990919772198e-06, + "loss": 46.5625, + "step": 3309 + }, + { + "epoch": 0.03133253187682813, + "grad_norm": 202.21177673339844, + "learning_rate": 1.9999907886516732e-06, + "loss": 29.8125, + "step": 3310 + }, + { + "epoch": 0.03134199789854318, + "grad_norm": 303.09619140625, + "learning_rate": 1.9999906565912217e-06, + "loss": 29.0312, + "step": 3311 + }, + { + "epoch": 0.031351463920258234, + "grad_norm": 895.576904296875, + "learning_rate": 1.999990523590844e-06, + "loss": 60.7344, + "step": 3312 + }, + { + "epoch": 0.03136092994197329, + "grad_norm": 513.9495239257812, + "learning_rate": 1.99999038965054e-06, + "loss": 30.375, + "step": 3313 + }, + { + "epoch": 0.03137039596368834, + "grad_norm": 1219.282958984375, + "learning_rate": 1.9999902547703103e-06, + "loss": 51.7344, + "step": 3314 + }, + { + "epoch": 0.03137986198540339, + "grad_norm": 292.9039611816406, + "learning_rate": 1.9999901189501552e-06, + "loss": 28.7344, + "step": 3315 + }, + { + "epoch": 0.031389328007118446, + "grad_norm": 706.609130859375, + "learning_rate": 1.9999899821900744e-06, + "loss": 71.3438, + "step": 3316 + }, + { + "epoch": 0.0313987940288335, + "grad_norm": 510.2052917480469, + "learning_rate": 1.9999898444900678e-06, + "loss": 57.6406, + "step": 3317 + }, + { + "epoch": 0.03140826005054856, + "grad_norm": 673.063720703125, + "learning_rate": 1.9999897058501358e-06, + "loss": 51.3438, + "step": 3318 + }, + { + "epoch": 0.03141772607226361, + "grad_norm": 303.8896789550781, + "learning_rate": 1.9999895662702784e-06, + "loss": 27.1094, + "step": 3319 + }, + { + "epoch": 0.031427192093978665, + "grad_norm": 435.636474609375, + "learning_rate": 1.9999894257504957e-06, + "loss": 61.75, + "step": 3320 + }, + { + "epoch": 0.03143665811569372, + "grad_norm": 607.21435546875, + "learning_rate": 1.9999892842907885e-06, + "loss": 59.7344, + "step": 3321 + }, + { + "epoch": 0.03144612413740877, + "grad_norm": 178.88523864746094, + "learning_rate": 1.9999891418911564e-06, + "loss": 26.7031, + "step": 3322 + }, + { + "epoch": 0.031455590159123824, + "grad_norm": 1023.7301025390625, + "learning_rate": 1.9999889985515993e-06, + "loss": 46.4453, + "step": 3323 + }, + { + "epoch": 0.03146505618083888, + "grad_norm": 848.8565673828125, + "learning_rate": 1.9999888542721177e-06, + "loss": 52.5, + "step": 3324 + }, + { + "epoch": 0.03147452220255393, + "grad_norm": 799.8582153320312, + "learning_rate": 1.9999887090527116e-06, + "loss": 72.1875, + "step": 3325 + }, + { + "epoch": 0.03148398822426899, + "grad_norm": 364.5196228027344, + "learning_rate": 1.9999885628933815e-06, + "loss": 32.2031, + "step": 3326 + }, + { + "epoch": 0.03149345424598404, + "grad_norm": 245.7971649169922, + "learning_rate": 1.9999884157941268e-06, + "loss": 25.6172, + "step": 3327 + }, + { + "epoch": 0.031502920267699096, + "grad_norm": 429.31134033203125, + "learning_rate": 1.999988267754948e-06, + "loss": 44.75, + "step": 3328 + }, + { + "epoch": 0.03151238628941415, + "grad_norm": 1012.7095336914062, + "learning_rate": 1.9999881187758456e-06, + "loss": 58.2344, + "step": 3329 + }, + { + "epoch": 0.0315218523111292, + "grad_norm": 474.3479309082031, + "learning_rate": 1.999987968856819e-06, + "loss": 38.8047, + "step": 3330 + }, + { + "epoch": 0.031531318332844255, + "grad_norm": 456.90093994140625, + "learning_rate": 1.9999878179978693e-06, + "loss": 32.1875, + "step": 3331 + }, + { + "epoch": 0.03154078435455931, + "grad_norm": 299.2099304199219, + "learning_rate": 1.9999876661989954e-06, + "loss": 30.7188, + "step": 3332 + }, + { + "epoch": 0.03155025037627436, + "grad_norm": 306.9521789550781, + "learning_rate": 1.9999875134601988e-06, + "loss": 28.4219, + "step": 3333 + }, + { + "epoch": 0.031559716397989414, + "grad_norm": 603.1840209960938, + "learning_rate": 1.999987359781479e-06, + "loss": 59.4375, + "step": 3334 + }, + { + "epoch": 0.031569182419704474, + "grad_norm": 387.1522521972656, + "learning_rate": 1.9999872051628353e-06, + "loss": 41.7031, + "step": 3335 + }, + { + "epoch": 0.03157864844141953, + "grad_norm": 705.6858520507812, + "learning_rate": 1.9999870496042693e-06, + "loss": 44.8125, + "step": 3336 + }, + { + "epoch": 0.03158811446313458, + "grad_norm": 1911.96484375, + "learning_rate": 1.9999868931057806e-06, + "loss": 44.0625, + "step": 3337 + }, + { + "epoch": 0.03159758048484963, + "grad_norm": 664.0446166992188, + "learning_rate": 1.9999867356673694e-06, + "loss": 33.2812, + "step": 3338 + }, + { + "epoch": 0.031607046506564686, + "grad_norm": 408.93328857421875, + "learning_rate": 1.9999865772890354e-06, + "loss": 32.625, + "step": 3339 + }, + { + "epoch": 0.03161651252827974, + "grad_norm": 276.5694885253906, + "learning_rate": 1.999986417970779e-06, + "loss": 25.7344, + "step": 3340 + }, + { + "epoch": 0.03162597854999479, + "grad_norm": 439.4228820800781, + "learning_rate": 1.9999862577126007e-06, + "loss": 48.4531, + "step": 3341 + }, + { + "epoch": 0.031635444571709845, + "grad_norm": 647.5966796875, + "learning_rate": 1.9999860965145e-06, + "loss": 75.5469, + "step": 3342 + }, + { + "epoch": 0.031644910593424905, + "grad_norm": 363.9642639160156, + "learning_rate": 1.999985934376478e-06, + "loss": 28.4219, + "step": 3343 + }, + { + "epoch": 0.03165437661513996, + "grad_norm": 1019.212646484375, + "learning_rate": 1.999985771298534e-06, + "loss": 67.875, + "step": 3344 + }, + { + "epoch": 0.03166384263685501, + "grad_norm": 348.827392578125, + "learning_rate": 1.999985607280668e-06, + "loss": 29.5625, + "step": 3345 + }, + { + "epoch": 0.031673308658570064, + "grad_norm": 332.645263671875, + "learning_rate": 1.999985442322881e-06, + "loss": 33.9062, + "step": 3346 + }, + { + "epoch": 0.03168277468028512, + "grad_norm": 260.5066223144531, + "learning_rate": 1.999985276425173e-06, + "loss": 24.375, + "step": 3347 + }, + { + "epoch": 0.03169224070200017, + "grad_norm": 191.41416931152344, + "learning_rate": 1.9999851095875436e-06, + "loss": 31.0312, + "step": 3348 + }, + { + "epoch": 0.03170170672371522, + "grad_norm": 658.6119995117188, + "learning_rate": 1.999984941809993e-06, + "loss": 33.2266, + "step": 3349 + }, + { + "epoch": 0.031711172745430276, + "grad_norm": 205.93592834472656, + "learning_rate": 1.999984773092522e-06, + "loss": 23.2812, + "step": 3350 + }, + { + "epoch": 0.03172063876714533, + "grad_norm": 240.97244262695312, + "learning_rate": 1.99998460343513e-06, + "loss": 29.5781, + "step": 3351 + }, + { + "epoch": 0.03173010478886039, + "grad_norm": 1157.7183837890625, + "learning_rate": 1.999984432837818e-06, + "loss": 42.75, + "step": 3352 + }, + { + "epoch": 0.03173957081057544, + "grad_norm": 526.8160400390625, + "learning_rate": 1.999984261300585e-06, + "loss": 30.0078, + "step": 3353 + }, + { + "epoch": 0.031749036832290495, + "grad_norm": 276.1238098144531, + "learning_rate": 1.9999840888234327e-06, + "loss": 30.2344, + "step": 3354 + }, + { + "epoch": 0.03175850285400555, + "grad_norm": 204.40716552734375, + "learning_rate": 1.99998391540636e-06, + "loss": 29.9219, + "step": 3355 + }, + { + "epoch": 0.0317679688757206, + "grad_norm": 208.9193878173828, + "learning_rate": 1.9999837410493673e-06, + "loss": 27.6094, + "step": 3356 + }, + { + "epoch": 0.031777434897435654, + "grad_norm": 904.1305541992188, + "learning_rate": 1.9999835657524553e-06, + "loss": 59.8906, + "step": 3357 + }, + { + "epoch": 0.03178690091915071, + "grad_norm": 334.5029602050781, + "learning_rate": 1.9999833895156234e-06, + "loss": 24.2891, + "step": 3358 + }, + { + "epoch": 0.03179636694086576, + "grad_norm": 221.40673828125, + "learning_rate": 1.9999832123388725e-06, + "loss": 24.25, + "step": 3359 + }, + { + "epoch": 0.03180583296258081, + "grad_norm": 585.0147705078125, + "learning_rate": 1.999983034222202e-06, + "loss": 58.3672, + "step": 3360 + }, + { + "epoch": 0.03181529898429587, + "grad_norm": 562.0262451171875, + "learning_rate": 1.9999828551656132e-06, + "loss": 61.0312, + "step": 3361 + }, + { + "epoch": 0.031824765006010926, + "grad_norm": 1074.15283203125, + "learning_rate": 1.999982675169105e-06, + "loss": 76.8516, + "step": 3362 + }, + { + "epoch": 0.03183423102772598, + "grad_norm": 467.15045166015625, + "learning_rate": 1.999982494232678e-06, + "loss": 55.9375, + "step": 3363 + }, + { + "epoch": 0.03184369704944103, + "grad_norm": 341.1269836425781, + "learning_rate": 1.999982312356333e-06, + "loss": 28.2031, + "step": 3364 + }, + { + "epoch": 0.031853163071156085, + "grad_norm": 390.0650329589844, + "learning_rate": 1.9999821295400693e-06, + "loss": 33.1016, + "step": 3365 + }, + { + "epoch": 0.03186262909287114, + "grad_norm": 556.1158447265625, + "learning_rate": 1.999981945783888e-06, + "loss": 50.9062, + "step": 3366 + }, + { + "epoch": 0.03187209511458619, + "grad_norm": 653.0689697265625, + "learning_rate": 1.9999817610877883e-06, + "loss": 53.6406, + "step": 3367 + }, + { + "epoch": 0.031881561136301244, + "grad_norm": 3.1060891151428223, + "learning_rate": 1.9999815754517706e-06, + "loss": 0.8911, + "step": 3368 + }, + { + "epoch": 0.031891027158016304, + "grad_norm": 477.9071044921875, + "learning_rate": 1.999981388875836e-06, + "loss": 53.4375, + "step": 3369 + }, + { + "epoch": 0.03190049317973136, + "grad_norm": 336.6730041503906, + "learning_rate": 1.9999812013599833e-06, + "loss": 29.9844, + "step": 3370 + }, + { + "epoch": 0.03190995920144641, + "grad_norm": 681.0098876953125, + "learning_rate": 1.9999810129042132e-06, + "loss": 44.7812, + "step": 3371 + }, + { + "epoch": 0.03191942522316146, + "grad_norm": 694.3992919921875, + "learning_rate": 1.9999808235085263e-06, + "loss": 42.2969, + "step": 3372 + }, + { + "epoch": 0.031928891244876516, + "grad_norm": 1285.9647216796875, + "learning_rate": 1.999980633172923e-06, + "loss": 77.5312, + "step": 3373 + }, + { + "epoch": 0.03193835726659157, + "grad_norm": 3.225764036178589, + "learning_rate": 1.999980441897402e-06, + "loss": 1.0474, + "step": 3374 + }, + { + "epoch": 0.03194782328830662, + "grad_norm": 406.3236389160156, + "learning_rate": 1.999980249681965e-06, + "loss": 28.2812, + "step": 3375 + }, + { + "epoch": 0.031957289310021675, + "grad_norm": 3.220310926437378, + "learning_rate": 1.999980056526612e-06, + "loss": 0.8965, + "step": 3376 + }, + { + "epoch": 0.03196675533173673, + "grad_norm": 592.7985229492188, + "learning_rate": 1.999979862431342e-06, + "loss": 48.5625, + "step": 3377 + }, + { + "epoch": 0.03197622135345179, + "grad_norm": 870.5581665039062, + "learning_rate": 1.9999796673961564e-06, + "loss": 57.1719, + "step": 3378 + }, + { + "epoch": 0.03198568737516684, + "grad_norm": 225.9832000732422, + "learning_rate": 1.9999794714210547e-06, + "loss": 28.9531, + "step": 3379 + }, + { + "epoch": 0.031995153396881894, + "grad_norm": 194.03749084472656, + "learning_rate": 1.9999792745060377e-06, + "loss": 21.8906, + "step": 3380 + }, + { + "epoch": 0.03200461941859695, + "grad_norm": 402.4746398925781, + "learning_rate": 1.9999790766511047e-06, + "loss": 33.9531, + "step": 3381 + }, + { + "epoch": 0.032014085440312, + "grad_norm": 397.0271301269531, + "learning_rate": 1.9999788778562565e-06, + "loss": 35.8906, + "step": 3382 + }, + { + "epoch": 0.03202355146202705, + "grad_norm": 746.3742065429688, + "learning_rate": 1.9999786781214936e-06, + "loss": 62.7969, + "step": 3383 + }, + { + "epoch": 0.032033017483742106, + "grad_norm": 539.602783203125, + "learning_rate": 1.999978477446816e-06, + "loss": 40.2031, + "step": 3384 + }, + { + "epoch": 0.03204248350545716, + "grad_norm": 737.9983520507812, + "learning_rate": 1.9999782758322234e-06, + "loss": 48.4375, + "step": 3385 + }, + { + "epoch": 0.03205194952717222, + "grad_norm": 917.9995727539062, + "learning_rate": 1.9999780732777157e-06, + "loss": 73.0078, + "step": 3386 + }, + { + "epoch": 0.03206141554888727, + "grad_norm": 855.3569946289062, + "learning_rate": 1.9999778697832946e-06, + "loss": 44.3828, + "step": 3387 + }, + { + "epoch": 0.032070881570602325, + "grad_norm": 794.7740478515625, + "learning_rate": 1.999977665348959e-06, + "loss": 63.125, + "step": 3388 + }, + { + "epoch": 0.03208034759231738, + "grad_norm": 218.09326171875, + "learning_rate": 1.999977459974709e-06, + "loss": 29.2969, + "step": 3389 + }, + { + "epoch": 0.03208981361403243, + "grad_norm": 424.3377380371094, + "learning_rate": 1.999977253660546e-06, + "loss": 57.7031, + "step": 3390 + }, + { + "epoch": 0.032099279635747484, + "grad_norm": 1058.726806640625, + "learning_rate": 1.9999770464064685e-06, + "loss": 55.0938, + "step": 3391 + }, + { + "epoch": 0.03210874565746254, + "grad_norm": 3.0147533416748047, + "learning_rate": 1.9999768382124786e-06, + "loss": 0.8356, + "step": 3392 + }, + { + "epoch": 0.03211821167917759, + "grad_norm": 3.0228869915008545, + "learning_rate": 1.999976629078575e-06, + "loss": 0.917, + "step": 3393 + }, + { + "epoch": 0.03212767770089264, + "grad_norm": 379.7904357910156, + "learning_rate": 1.9999764190047587e-06, + "loss": 54.0938, + "step": 3394 + }, + { + "epoch": 0.0321371437226077, + "grad_norm": 3.2490735054016113, + "learning_rate": 1.9999762079910295e-06, + "loss": 0.9688, + "step": 3395 + }, + { + "epoch": 0.032146609744322756, + "grad_norm": 403.87677001953125, + "learning_rate": 1.9999759960373877e-06, + "loss": 32.9375, + "step": 3396 + }, + { + "epoch": 0.03215607576603781, + "grad_norm": 313.8369140625, + "learning_rate": 1.9999757831438332e-06, + "loss": 30.1719, + "step": 3397 + }, + { + "epoch": 0.03216554178775286, + "grad_norm": 539.1091918945312, + "learning_rate": 1.999975569310367e-06, + "loss": 54.7734, + "step": 3398 + }, + { + "epoch": 0.032175007809467915, + "grad_norm": 536.8291015625, + "learning_rate": 1.9999753545369885e-06, + "loss": 33.6719, + "step": 3399 + }, + { + "epoch": 0.03218447383118297, + "grad_norm": 281.17779541015625, + "learning_rate": 1.999975138823698e-06, + "loss": 26.6406, + "step": 3400 + }, + { + "epoch": 0.03219393985289802, + "grad_norm": 411.66943359375, + "learning_rate": 1.9999749221704965e-06, + "loss": 36.7578, + "step": 3401 + }, + { + "epoch": 0.032203405874613074, + "grad_norm": 869.3652954101562, + "learning_rate": 1.9999747045773834e-06, + "loss": 48.1875, + "step": 3402 + }, + { + "epoch": 0.03221287189632813, + "grad_norm": 392.89324951171875, + "learning_rate": 1.9999744860443594e-06, + "loss": 33.1172, + "step": 3403 + }, + { + "epoch": 0.03222233791804319, + "grad_norm": 3.7795960903167725, + "learning_rate": 1.999974266571424e-06, + "loss": 0.9243, + "step": 3404 + }, + { + "epoch": 0.03223180393975824, + "grad_norm": 2.7467689514160156, + "learning_rate": 1.999974046158578e-06, + "loss": 0.7791, + "step": 3405 + }, + { + "epoch": 0.03224126996147329, + "grad_norm": 3.111161947250366, + "learning_rate": 1.9999738248058217e-06, + "loss": 1.0098, + "step": 3406 + }, + { + "epoch": 0.032250735983188346, + "grad_norm": 441.5636291503906, + "learning_rate": 1.9999736025131547e-06, + "loss": 38.625, + "step": 3407 + }, + { + "epoch": 0.0322602020049034, + "grad_norm": 559.6317138671875, + "learning_rate": 1.9999733792805776e-06, + "loss": 69.4609, + "step": 3408 + }, + { + "epoch": 0.03226966802661845, + "grad_norm": 3.6285147666931152, + "learning_rate": 1.9999731551080903e-06, + "loss": 1.0176, + "step": 3409 + }, + { + "epoch": 0.032279134048333505, + "grad_norm": 458.4759826660156, + "learning_rate": 1.999972929995694e-06, + "loss": 27.0625, + "step": 3410 + }, + { + "epoch": 0.03228860007004856, + "grad_norm": 3.017141580581665, + "learning_rate": 1.9999727039433877e-06, + "loss": 0.9155, + "step": 3411 + }, + { + "epoch": 0.03229806609176362, + "grad_norm": 264.9696350097656, + "learning_rate": 1.9999724769511723e-06, + "loss": 28.1406, + "step": 3412 + }, + { + "epoch": 0.03230753211347867, + "grad_norm": 522.0541381835938, + "learning_rate": 1.9999722490190476e-06, + "loss": 44.0781, + "step": 3413 + }, + { + "epoch": 0.032316998135193724, + "grad_norm": 745.7404174804688, + "learning_rate": 1.9999720201470145e-06, + "loss": 35.9062, + "step": 3414 + }, + { + "epoch": 0.03232646415690878, + "grad_norm": 2.5280985832214355, + "learning_rate": 1.9999717903350725e-06, + "loss": 0.8511, + "step": 3415 + }, + { + "epoch": 0.03233593017862383, + "grad_norm": 213.4480743408203, + "learning_rate": 1.9999715595832218e-06, + "loss": 30.9688, + "step": 3416 + }, + { + "epoch": 0.03234539620033888, + "grad_norm": 566.2525634765625, + "learning_rate": 1.999971327891463e-06, + "loss": 46.4062, + "step": 3417 + }, + { + "epoch": 0.032354862222053936, + "grad_norm": 724.3072509765625, + "learning_rate": 1.999971095259796e-06, + "loss": 62.0, + "step": 3418 + }, + { + "epoch": 0.03236432824376899, + "grad_norm": 336.5927734375, + "learning_rate": 1.999970861688222e-06, + "loss": 27.375, + "step": 3419 + }, + { + "epoch": 0.03237379426548404, + "grad_norm": 463.91888427734375, + "learning_rate": 1.99997062717674e-06, + "loss": 32.6562, + "step": 3420 + }, + { + "epoch": 0.0323832602871991, + "grad_norm": 254.52938842773438, + "learning_rate": 1.9999703917253505e-06, + "loss": 31.2812, + "step": 3421 + }, + { + "epoch": 0.032392726308914155, + "grad_norm": 293.4938659667969, + "learning_rate": 1.9999701553340543e-06, + "loss": 30.0156, + "step": 3422 + }, + { + "epoch": 0.03240219233062921, + "grad_norm": 1044.8099365234375, + "learning_rate": 1.9999699180028505e-06, + "loss": 38.3906, + "step": 3423 + }, + { + "epoch": 0.03241165835234426, + "grad_norm": 280.5499572753906, + "learning_rate": 1.999969679731741e-06, + "loss": 28.0938, + "step": 3424 + }, + { + "epoch": 0.032421124374059314, + "grad_norm": 1304.0301513671875, + "learning_rate": 1.9999694405207245e-06, + "loss": 87.6094, + "step": 3425 + }, + { + "epoch": 0.03243059039577437, + "grad_norm": 1077.2945556640625, + "learning_rate": 1.9999692003698014e-06, + "loss": 58.5859, + "step": 3426 + }, + { + "epoch": 0.03244005641748942, + "grad_norm": 952.1422729492188, + "learning_rate": 1.999968959278973e-06, + "loss": 30.5781, + "step": 3427 + }, + { + "epoch": 0.03244952243920447, + "grad_norm": 397.8121337890625, + "learning_rate": 1.9999687172482385e-06, + "loss": 26.7812, + "step": 3428 + }, + { + "epoch": 0.032458988460919526, + "grad_norm": 269.05853271484375, + "learning_rate": 1.9999684742775982e-06, + "loss": 29.4219, + "step": 3429 + }, + { + "epoch": 0.032468454482634586, + "grad_norm": 742.05859375, + "learning_rate": 1.999968230367053e-06, + "loss": 31.5469, + "step": 3430 + }, + { + "epoch": 0.03247792050434964, + "grad_norm": 462.35284423828125, + "learning_rate": 1.9999679855166026e-06, + "loss": 64.0, + "step": 3431 + }, + { + "epoch": 0.03248738652606469, + "grad_norm": 265.41192626953125, + "learning_rate": 1.999967739726247e-06, + "loss": 26.1719, + "step": 3432 + }, + { + "epoch": 0.032496852547779745, + "grad_norm": 3.029388904571533, + "learning_rate": 1.9999674929959872e-06, + "loss": 0.8696, + "step": 3433 + }, + { + "epoch": 0.0325063185694948, + "grad_norm": 974.12841796875, + "learning_rate": 1.999967245325823e-06, + "loss": 34.2188, + "step": 3434 + }, + { + "epoch": 0.03251578459120985, + "grad_norm": 274.8855285644531, + "learning_rate": 1.9999669967157547e-06, + "loss": 30.2344, + "step": 3435 + }, + { + "epoch": 0.032525250612924904, + "grad_norm": 213.17384338378906, + "learning_rate": 1.9999667471657822e-06, + "loss": 20.3047, + "step": 3436 + }, + { + "epoch": 0.03253471663463996, + "grad_norm": 855.0426025390625, + "learning_rate": 1.9999664966759064e-06, + "loss": 82.8828, + "step": 3437 + }, + { + "epoch": 0.03254418265635502, + "grad_norm": 175.62197875976562, + "learning_rate": 1.999966245246127e-06, + "loss": 26.2031, + "step": 3438 + }, + { + "epoch": 0.03255364867807007, + "grad_norm": 303.38800048828125, + "learning_rate": 1.9999659928764443e-06, + "loss": 34.3125, + "step": 3439 + }, + { + "epoch": 0.03256311469978512, + "grad_norm": 271.7328796386719, + "learning_rate": 1.9999657395668584e-06, + "loss": 30.7969, + "step": 3440 + }, + { + "epoch": 0.032572580721500176, + "grad_norm": 357.66839599609375, + "learning_rate": 1.99996548531737e-06, + "loss": 27.9375, + "step": 3441 + }, + { + "epoch": 0.03258204674321523, + "grad_norm": 297.5777587890625, + "learning_rate": 1.999965230127979e-06, + "loss": 28.375, + "step": 3442 + }, + { + "epoch": 0.03259151276493028, + "grad_norm": 777.15283203125, + "learning_rate": 1.999964973998686e-06, + "loss": 57.1562, + "step": 3443 + }, + { + "epoch": 0.032600978786645335, + "grad_norm": 1041.4718017578125, + "learning_rate": 1.9999647169294906e-06, + "loss": 89.0625, + "step": 3444 + }, + { + "epoch": 0.03261044480836039, + "grad_norm": 288.6856384277344, + "learning_rate": 1.9999644589203936e-06, + "loss": 32.4062, + "step": 3445 + }, + { + "epoch": 0.03261991083007544, + "grad_norm": 824.8536987304688, + "learning_rate": 1.999964199971395e-06, + "loss": 67.25, + "step": 3446 + }, + { + "epoch": 0.0326293768517905, + "grad_norm": 458.56988525390625, + "learning_rate": 1.999963940082495e-06, + "loss": 45.1719, + "step": 3447 + }, + { + "epoch": 0.032638842873505554, + "grad_norm": 294.4161682128906, + "learning_rate": 1.9999636792536944e-06, + "loss": 32.5469, + "step": 3448 + }, + { + "epoch": 0.03264830889522061, + "grad_norm": 833.5234985351562, + "learning_rate": 1.9999634174849925e-06, + "loss": 72.9219, + "step": 3449 + }, + { + "epoch": 0.03265777491693566, + "grad_norm": 3.1974027156829834, + "learning_rate": 1.9999631547763903e-06, + "loss": 0.9175, + "step": 3450 + }, + { + "epoch": 0.03266724093865071, + "grad_norm": 268.6354064941406, + "learning_rate": 1.9999628911278877e-06, + "loss": 35.7812, + "step": 3451 + }, + { + "epoch": 0.032676706960365766, + "grad_norm": 394.9869384765625, + "learning_rate": 1.999962626539485e-06, + "loss": 22.4219, + "step": 3452 + }, + { + "epoch": 0.03268617298208082, + "grad_norm": 245.78648376464844, + "learning_rate": 1.9999623610111826e-06, + "loss": 24.5547, + "step": 3453 + }, + { + "epoch": 0.03269563900379587, + "grad_norm": 700.4307861328125, + "learning_rate": 1.9999620945429806e-06, + "loss": 50.5938, + "step": 3454 + }, + { + "epoch": 0.03270510502551093, + "grad_norm": 505.1499328613281, + "learning_rate": 1.999961827134879e-06, + "loss": 69.3438, + "step": 3455 + }, + { + "epoch": 0.032714571047225985, + "grad_norm": 620.43603515625, + "learning_rate": 1.999961558786879e-06, + "loss": 60.1562, + "step": 3456 + }, + { + "epoch": 0.03272403706894104, + "grad_norm": 400.2000427246094, + "learning_rate": 1.9999612894989795e-06, + "loss": 31.7812, + "step": 3457 + }, + { + "epoch": 0.03273350309065609, + "grad_norm": 182.91452026367188, + "learning_rate": 1.999961019271182e-06, + "loss": 27.2969, + "step": 3458 + }, + { + "epoch": 0.032742969112371144, + "grad_norm": 467.5648498535156, + "learning_rate": 1.9999607481034857e-06, + "loss": 27.25, + "step": 3459 + }, + { + "epoch": 0.0327524351340862, + "grad_norm": 315.907470703125, + "learning_rate": 1.999960475995891e-06, + "loss": 24.8203, + "step": 3460 + }, + { + "epoch": 0.03276190115580125, + "grad_norm": 3.5324161052703857, + "learning_rate": 1.9999602029483997e-06, + "loss": 0.8428, + "step": 3461 + }, + { + "epoch": 0.0327713671775163, + "grad_norm": 307.3232421875, + "learning_rate": 1.99995992896101e-06, + "loss": 26.7656, + "step": 3462 + }, + { + "epoch": 0.032780833199231356, + "grad_norm": 226.28004455566406, + "learning_rate": 1.9999596540337232e-06, + "loss": 30.7812, + "step": 3463 + }, + { + "epoch": 0.032790299220946416, + "grad_norm": 464.5082092285156, + "learning_rate": 1.9999593781665395e-06, + "loss": 47.5625, + "step": 3464 + }, + { + "epoch": 0.03279976524266147, + "grad_norm": 504.5574035644531, + "learning_rate": 1.999959101359459e-06, + "loss": 37.9219, + "step": 3465 + }, + { + "epoch": 0.03280923126437652, + "grad_norm": 764.0435180664062, + "learning_rate": 1.9999588236124816e-06, + "loss": 58.6953, + "step": 3466 + }, + { + "epoch": 0.032818697286091575, + "grad_norm": 934.8278198242188, + "learning_rate": 1.9999585449256083e-06, + "loss": 33.3438, + "step": 3467 + }, + { + "epoch": 0.03282816330780663, + "grad_norm": 1059.745849609375, + "learning_rate": 1.999958265298839e-06, + "loss": 33.1172, + "step": 3468 + }, + { + "epoch": 0.03283762932952168, + "grad_norm": 483.8174743652344, + "learning_rate": 1.999957984732174e-06, + "loss": 39.2734, + "step": 3469 + }, + { + "epoch": 0.032847095351236734, + "grad_norm": 668.516845703125, + "learning_rate": 1.999957703225614e-06, + "loss": 39.2969, + "step": 3470 + }, + { + "epoch": 0.03285656137295179, + "grad_norm": 388.02362060546875, + "learning_rate": 1.999957420779158e-06, + "loss": 31.4375, + "step": 3471 + }, + { + "epoch": 0.03286602739466684, + "grad_norm": 746.9232177734375, + "learning_rate": 1.9999571373928075e-06, + "loss": 35.6953, + "step": 3472 + }, + { + "epoch": 0.0328754934163819, + "grad_norm": 685.4097900390625, + "learning_rate": 1.9999568530665624e-06, + "loss": 47.4062, + "step": 3473 + }, + { + "epoch": 0.03288495943809695, + "grad_norm": 353.22528076171875, + "learning_rate": 1.999956567800423e-06, + "loss": 39.9219, + "step": 3474 + }, + { + "epoch": 0.032894425459812006, + "grad_norm": 406.44207763671875, + "learning_rate": 1.999956281594389e-06, + "loss": 28.0312, + "step": 3475 + }, + { + "epoch": 0.03290389148152706, + "grad_norm": 860.3763427734375, + "learning_rate": 1.9999559944484613e-06, + "loss": 55.1484, + "step": 3476 + }, + { + "epoch": 0.03291335750324211, + "grad_norm": 391.5418701171875, + "learning_rate": 1.99995570636264e-06, + "loss": 30.1094, + "step": 3477 + }, + { + "epoch": 0.032922823524957165, + "grad_norm": 210.795166015625, + "learning_rate": 1.999955417336926e-06, + "loss": 25.2188, + "step": 3478 + }, + { + "epoch": 0.03293228954667222, + "grad_norm": 240.2946319580078, + "learning_rate": 1.9999551273713184e-06, + "loss": 30.2031, + "step": 3479 + }, + { + "epoch": 0.03294175556838727, + "grad_norm": 456.6177062988281, + "learning_rate": 1.9999548364658177e-06, + "loss": 22.9844, + "step": 3480 + }, + { + "epoch": 0.03295122159010233, + "grad_norm": 1050.611328125, + "learning_rate": 1.999954544620425e-06, + "loss": 58.3828, + "step": 3481 + }, + { + "epoch": 0.032960687611817384, + "grad_norm": 271.2879638671875, + "learning_rate": 1.99995425183514e-06, + "loss": 27.4688, + "step": 3482 + }, + { + "epoch": 0.03297015363353244, + "grad_norm": 528.7586669921875, + "learning_rate": 1.999953958109963e-06, + "loss": 45.8047, + "step": 3483 + }, + { + "epoch": 0.03297961965524749, + "grad_norm": 228.1358184814453, + "learning_rate": 1.9999536634448944e-06, + "loss": 20.75, + "step": 3484 + }, + { + "epoch": 0.03298908567696254, + "grad_norm": 369.1788635253906, + "learning_rate": 1.9999533678399346e-06, + "loss": 28.7188, + "step": 3485 + }, + { + "epoch": 0.032998551698677596, + "grad_norm": 419.4502258300781, + "learning_rate": 1.999953071295083e-06, + "loss": 50.9062, + "step": 3486 + }, + { + "epoch": 0.03300801772039265, + "grad_norm": 310.33221435546875, + "learning_rate": 1.9999527738103416e-06, + "loss": 28.2812, + "step": 3487 + }, + { + "epoch": 0.0330174837421077, + "grad_norm": 762.44189453125, + "learning_rate": 1.9999524753857087e-06, + "loss": 51.0391, + "step": 3488 + }, + { + "epoch": 0.033026949763822755, + "grad_norm": 329.18975830078125, + "learning_rate": 1.999952176021186e-06, + "loss": 41.875, + "step": 3489 + }, + { + "epoch": 0.033036415785537815, + "grad_norm": 234.03919982910156, + "learning_rate": 1.999951875716773e-06, + "loss": 25.2188, + "step": 3490 + }, + { + "epoch": 0.03304588180725287, + "grad_norm": 3.6411685943603516, + "learning_rate": 1.9999515744724705e-06, + "loss": 0.8638, + "step": 3491 + }, + { + "epoch": 0.03305534782896792, + "grad_norm": 1404.9874267578125, + "learning_rate": 1.999951272288279e-06, + "loss": 70.0586, + "step": 3492 + }, + { + "epoch": 0.033064813850682974, + "grad_norm": 673.1476440429688, + "learning_rate": 1.999950969164198e-06, + "loss": 54.2031, + "step": 3493 + }, + { + "epoch": 0.03307427987239803, + "grad_norm": 1212.3482666015625, + "learning_rate": 1.9999506651002277e-06, + "loss": 64.0469, + "step": 3494 + }, + { + "epoch": 0.03308374589411308, + "grad_norm": 562.3330688476562, + "learning_rate": 1.9999503600963693e-06, + "loss": 60.0781, + "step": 3495 + }, + { + "epoch": 0.03309321191582813, + "grad_norm": 2.981330394744873, + "learning_rate": 1.9999500541526224e-06, + "loss": 0.918, + "step": 3496 + }, + { + "epoch": 0.033102677937543186, + "grad_norm": 310.5069885253906, + "learning_rate": 1.999949747268988e-06, + "loss": 31.3281, + "step": 3497 + }, + { + "epoch": 0.033112143959258246, + "grad_norm": 254.0532989501953, + "learning_rate": 1.999949439445465e-06, + "loss": 27.7031, + "step": 3498 + }, + { + "epoch": 0.0331216099809733, + "grad_norm": 228.4735107421875, + "learning_rate": 1.9999491306820553e-06, + "loss": 27.3594, + "step": 3499 + }, + { + "epoch": 0.03313107600268835, + "grad_norm": 524.8206787109375, + "learning_rate": 1.999948820978758e-06, + "loss": 31.1719, + "step": 3500 + }, + { + "epoch": 0.033140542024403405, + "grad_norm": 295.1111145019531, + "learning_rate": 1.9999485103355742e-06, + "loss": 31.625, + "step": 3501 + }, + { + "epoch": 0.03315000804611846, + "grad_norm": 2.845839023590088, + "learning_rate": 1.999948198752504e-06, + "loss": 0.9683, + "step": 3502 + }, + { + "epoch": 0.03315947406783351, + "grad_norm": 398.02520751953125, + "learning_rate": 1.999947886229547e-06, + "loss": 38.1875, + "step": 3503 + }, + { + "epoch": 0.033168940089548564, + "grad_norm": 2246.5, + "learning_rate": 1.9999475727667045e-06, + "loss": 70.875, + "step": 3504 + }, + { + "epoch": 0.03317840611126362, + "grad_norm": 2.946030616760254, + "learning_rate": 1.9999472583639762e-06, + "loss": 0.8477, + "step": 3505 + }, + { + "epoch": 0.03318787213297867, + "grad_norm": 815.18994140625, + "learning_rate": 1.9999469430213624e-06, + "loss": 44.5938, + "step": 3506 + }, + { + "epoch": 0.03319733815469373, + "grad_norm": 349.56695556640625, + "learning_rate": 1.9999466267388635e-06, + "loss": 52.9062, + "step": 3507 + }, + { + "epoch": 0.03320680417640878, + "grad_norm": 348.4049072265625, + "learning_rate": 1.99994630951648e-06, + "loss": 37.2344, + "step": 3508 + }, + { + "epoch": 0.033216270198123836, + "grad_norm": 366.2416687011719, + "learning_rate": 1.999945991354212e-06, + "loss": 33.9531, + "step": 3509 + }, + { + "epoch": 0.03322573621983889, + "grad_norm": 1768.76904296875, + "learning_rate": 1.9999456722520594e-06, + "loss": 76.8438, + "step": 3510 + }, + { + "epoch": 0.03323520224155394, + "grad_norm": 843.784423828125, + "learning_rate": 1.9999453522100234e-06, + "loss": 58.6719, + "step": 3511 + }, + { + "epoch": 0.033244668263268995, + "grad_norm": 426.2342224121094, + "learning_rate": 1.999945031228104e-06, + "loss": 34.9688, + "step": 3512 + }, + { + "epoch": 0.03325413428498405, + "grad_norm": 3.3563289642333984, + "learning_rate": 1.999944709306301e-06, + "loss": 0.8264, + "step": 3513 + }, + { + "epoch": 0.0332636003066991, + "grad_norm": 1592.547119140625, + "learning_rate": 1.999944386444615e-06, + "loss": 62.3906, + "step": 3514 + }, + { + "epoch": 0.033273066328414154, + "grad_norm": 373.6781005859375, + "learning_rate": 1.9999440626430463e-06, + "loss": 37.6406, + "step": 3515 + }, + { + "epoch": 0.033282532350129214, + "grad_norm": 608.7158203125, + "learning_rate": 1.9999437379015953e-06, + "loss": 43.8906, + "step": 3516 + }, + { + "epoch": 0.03329199837184427, + "grad_norm": 249.36276245117188, + "learning_rate": 1.9999434122202625e-06, + "loss": 25.5, + "step": 3517 + }, + { + "epoch": 0.03330146439355932, + "grad_norm": 1094.357177734375, + "learning_rate": 1.999943085599047e-06, + "loss": 40.5312, + "step": 3518 + }, + { + "epoch": 0.03331093041527437, + "grad_norm": 920.9158325195312, + "learning_rate": 1.9999427580379513e-06, + "loss": 30.1484, + "step": 3519 + }, + { + "epoch": 0.033320396436989426, + "grad_norm": 249.7315216064453, + "learning_rate": 1.9999424295369738e-06, + "loss": 32.1094, + "step": 3520 + }, + { + "epoch": 0.03332986245870448, + "grad_norm": 347.0660095214844, + "learning_rate": 1.9999421000961157e-06, + "loss": 27.1094, + "step": 3521 + }, + { + "epoch": 0.03333932848041953, + "grad_norm": 621.0184936523438, + "learning_rate": 1.999941769715377e-06, + "loss": 58.8984, + "step": 3522 + }, + { + "epoch": 0.033348794502134585, + "grad_norm": 603.5217895507812, + "learning_rate": 1.999941438394758e-06, + "loss": 61.1406, + "step": 3523 + }, + { + "epoch": 0.033358260523849645, + "grad_norm": 276.02044677734375, + "learning_rate": 1.999941106134259e-06, + "loss": 34.6875, + "step": 3524 + }, + { + "epoch": 0.0333677265455647, + "grad_norm": 299.7690124511719, + "learning_rate": 1.9999407729338803e-06, + "loss": 31.7344, + "step": 3525 + }, + { + "epoch": 0.03337719256727975, + "grad_norm": 502.2052307128906, + "learning_rate": 1.999940438793623e-06, + "loss": 34.6016, + "step": 3526 + }, + { + "epoch": 0.033386658588994804, + "grad_norm": 178.7254180908203, + "learning_rate": 1.9999401037134863e-06, + "loss": 25.2031, + "step": 3527 + }, + { + "epoch": 0.03339612461070986, + "grad_norm": 336.6799621582031, + "learning_rate": 1.999939767693471e-06, + "loss": 27.6016, + "step": 3528 + }, + { + "epoch": 0.03340559063242491, + "grad_norm": 864.380615234375, + "learning_rate": 1.999939430733577e-06, + "loss": 21.6016, + "step": 3529 + }, + { + "epoch": 0.03341505665413996, + "grad_norm": 242.75253295898438, + "learning_rate": 1.9999390928338053e-06, + "loss": 30.4062, + "step": 3530 + }, + { + "epoch": 0.033424522675855016, + "grad_norm": 493.6935729980469, + "learning_rate": 1.999938753994156e-06, + "loss": 38.0938, + "step": 3531 + }, + { + "epoch": 0.03343398869757007, + "grad_norm": 1510.447998046875, + "learning_rate": 1.9999384142146296e-06, + "loss": 64.6641, + "step": 3532 + }, + { + "epoch": 0.03344345471928513, + "grad_norm": 359.5534362792969, + "learning_rate": 1.9999380734952255e-06, + "loss": 32.375, + "step": 3533 + }, + { + "epoch": 0.03345292074100018, + "grad_norm": 212.59149169921875, + "learning_rate": 1.9999377318359447e-06, + "loss": 26.7188, + "step": 3534 + }, + { + "epoch": 0.033462386762715235, + "grad_norm": 561.9942626953125, + "learning_rate": 1.9999373892367877e-06, + "loss": 74.375, + "step": 3535 + }, + { + "epoch": 0.03347185278443029, + "grad_norm": 693.9342651367188, + "learning_rate": 1.999937045697755e-06, + "loss": 53.3906, + "step": 3536 + }, + { + "epoch": 0.03348131880614534, + "grad_norm": 250.42579650878906, + "learning_rate": 1.999936701218846e-06, + "loss": 31.4062, + "step": 3537 + }, + { + "epoch": 0.033490784827860394, + "grad_norm": 396.32916259765625, + "learning_rate": 1.9999363558000615e-06, + "loss": 48.1875, + "step": 3538 + }, + { + "epoch": 0.03350025084957545, + "grad_norm": 687.023681640625, + "learning_rate": 1.999936009441402e-06, + "loss": 42.5859, + "step": 3539 + }, + { + "epoch": 0.0335097168712905, + "grad_norm": 180.9481658935547, + "learning_rate": 1.999935662142868e-06, + "loss": 29.2656, + "step": 3540 + }, + { + "epoch": 0.03351918289300556, + "grad_norm": 344.0596008300781, + "learning_rate": 1.9999353139044594e-06, + "loss": 58.4375, + "step": 3541 + }, + { + "epoch": 0.03352864891472061, + "grad_norm": 3.174522876739502, + "learning_rate": 1.9999349647261765e-06, + "loss": 0.8989, + "step": 3542 + }, + { + "epoch": 0.033538114936435666, + "grad_norm": 430.52044677734375, + "learning_rate": 1.99993461460802e-06, + "loss": 38.6953, + "step": 3543 + }, + { + "epoch": 0.03354758095815072, + "grad_norm": 424.6161193847656, + "learning_rate": 1.99993426354999e-06, + "loss": 30.1406, + "step": 3544 + }, + { + "epoch": 0.03355704697986577, + "grad_norm": 559.4381713867188, + "learning_rate": 1.9999339115520863e-06, + "loss": 30.4844, + "step": 3545 + }, + { + "epoch": 0.033566513001580825, + "grad_norm": 816.8515625, + "learning_rate": 1.9999335586143103e-06, + "loss": 65.6562, + "step": 3546 + }, + { + "epoch": 0.03357597902329588, + "grad_norm": 550.7627563476562, + "learning_rate": 1.999933204736662e-06, + "loss": 51.6406, + "step": 3547 + }, + { + "epoch": 0.03358544504501093, + "grad_norm": 308.96734619140625, + "learning_rate": 1.9999328499191406e-06, + "loss": 27.4219, + "step": 3548 + }, + { + "epoch": 0.033594911066725984, + "grad_norm": 442.1771545410156, + "learning_rate": 1.999932494161748e-06, + "loss": 45.4219, + "step": 3549 + }, + { + "epoch": 0.033604377088441044, + "grad_norm": 770.6197509765625, + "learning_rate": 1.9999321374644838e-06, + "loss": 50.0469, + "step": 3550 + }, + { + "epoch": 0.0336138431101561, + "grad_norm": 786.3685302734375, + "learning_rate": 1.999931779827349e-06, + "loss": 63.2812, + "step": 3551 + }, + { + "epoch": 0.03362330913187115, + "grad_norm": 658.7329711914062, + "learning_rate": 1.9999314212503428e-06, + "loss": 55.7812, + "step": 3552 + }, + { + "epoch": 0.0336327751535862, + "grad_norm": 496.18408203125, + "learning_rate": 1.999931061733466e-06, + "loss": 26.5312, + "step": 3553 + }, + { + "epoch": 0.033642241175301256, + "grad_norm": 537.0125122070312, + "learning_rate": 1.9999307012767193e-06, + "loss": 30.9922, + "step": 3554 + }, + { + "epoch": 0.03365170719701631, + "grad_norm": 636.0557250976562, + "learning_rate": 1.999930339880103e-06, + "loss": 50.0156, + "step": 3555 + }, + { + "epoch": 0.03366117321873136, + "grad_norm": 958.938720703125, + "learning_rate": 1.9999299775436168e-06, + "loss": 34.8906, + "step": 3556 + }, + { + "epoch": 0.033670639240446415, + "grad_norm": 335.8818054199219, + "learning_rate": 1.9999296142672615e-06, + "loss": 35.8281, + "step": 3557 + }, + { + "epoch": 0.03368010526216147, + "grad_norm": 304.2779846191406, + "learning_rate": 1.9999292500510377e-06, + "loss": 29.5625, + "step": 3558 + }, + { + "epoch": 0.03368957128387653, + "grad_norm": 832.9119873046875, + "learning_rate": 1.9999288848949452e-06, + "loss": 65.3906, + "step": 3559 + }, + { + "epoch": 0.03369903730559158, + "grad_norm": 777.8338623046875, + "learning_rate": 1.999928518798985e-06, + "loss": 36.3281, + "step": 3560 + }, + { + "epoch": 0.033708503327306634, + "grad_norm": 511.39398193359375, + "learning_rate": 1.9999281517631566e-06, + "loss": 31.8906, + "step": 3561 + }, + { + "epoch": 0.03371796934902169, + "grad_norm": 563.2837524414062, + "learning_rate": 1.999927783787461e-06, + "loss": 49.1719, + "step": 3562 + }, + { + "epoch": 0.03372743537073674, + "grad_norm": 360.48883056640625, + "learning_rate": 1.9999274148718987e-06, + "loss": 30.25, + "step": 3563 + }, + { + "epoch": 0.03373690139245179, + "grad_norm": 436.39898681640625, + "learning_rate": 1.999927045016469e-06, + "loss": 44.375, + "step": 3564 + }, + { + "epoch": 0.033746367414166846, + "grad_norm": 332.16668701171875, + "learning_rate": 1.9999266742211735e-06, + "loss": 24.082, + "step": 3565 + }, + { + "epoch": 0.0337558334358819, + "grad_norm": 674.393310546875, + "learning_rate": 1.9999263024860114e-06, + "loss": 53.0312, + "step": 3566 + }, + { + "epoch": 0.03376529945759696, + "grad_norm": 3.50730299949646, + "learning_rate": 1.9999259298109837e-06, + "loss": 0.8396, + "step": 3567 + }, + { + "epoch": 0.03377476547931201, + "grad_norm": 349.655517578125, + "learning_rate": 1.999925556196091e-06, + "loss": 26.7266, + "step": 3568 + }, + { + "epoch": 0.033784231501027065, + "grad_norm": 683.425537109375, + "learning_rate": 1.999925181641333e-06, + "loss": 44.0781, + "step": 3569 + }, + { + "epoch": 0.03379369752274212, + "grad_norm": 562.1268310546875, + "learning_rate": 1.999924806146711e-06, + "loss": 32.0859, + "step": 3570 + }, + { + "epoch": 0.03380316354445717, + "grad_norm": 279.72198486328125, + "learning_rate": 1.9999244297122244e-06, + "loss": 31.8906, + "step": 3571 + }, + { + "epoch": 0.033812629566172224, + "grad_norm": 457.8304443359375, + "learning_rate": 1.9999240523378735e-06, + "loss": 61.75, + "step": 3572 + }, + { + "epoch": 0.03382209558788728, + "grad_norm": 266.1946716308594, + "learning_rate": 1.9999236740236596e-06, + "loss": 30.4375, + "step": 3573 + }, + { + "epoch": 0.03383156160960233, + "grad_norm": 204.20361328125, + "learning_rate": 1.999923294769582e-06, + "loss": 30.4375, + "step": 3574 + }, + { + "epoch": 0.03384102763131738, + "grad_norm": 1044.079345703125, + "learning_rate": 1.999922914575642e-06, + "loss": 34.4219, + "step": 3575 + }, + { + "epoch": 0.03385049365303244, + "grad_norm": 538.369873046875, + "learning_rate": 1.999922533441839e-06, + "loss": 49.0625, + "step": 3576 + }, + { + "epoch": 0.033859959674747496, + "grad_norm": 397.9170227050781, + "learning_rate": 1.9999221513681744e-06, + "loss": 44.0938, + "step": 3577 + }, + { + "epoch": 0.03386942569646255, + "grad_norm": 341.1197509765625, + "learning_rate": 1.9999217683546475e-06, + "loss": 31.1406, + "step": 3578 + }, + { + "epoch": 0.0338788917181776, + "grad_norm": 1179.20263671875, + "learning_rate": 1.9999213844012596e-06, + "loss": 54.5859, + "step": 3579 + }, + { + "epoch": 0.033888357739892655, + "grad_norm": 329.9576416015625, + "learning_rate": 1.9999209995080107e-06, + "loss": 30.8281, + "step": 3580 + }, + { + "epoch": 0.03389782376160771, + "grad_norm": 227.19229125976562, + "learning_rate": 1.999920613674901e-06, + "loss": 26.8906, + "step": 3581 + }, + { + "epoch": 0.03390728978332276, + "grad_norm": 1775.5140380859375, + "learning_rate": 1.999920226901931e-06, + "loss": 36.0938, + "step": 3582 + }, + { + "epoch": 0.033916755805037814, + "grad_norm": 588.108154296875, + "learning_rate": 1.9999198391891007e-06, + "loss": 56.5469, + "step": 3583 + }, + { + "epoch": 0.033926221826752874, + "grad_norm": 402.7490234375, + "learning_rate": 1.999919450536411e-06, + "loss": 28.4844, + "step": 3584 + }, + { + "epoch": 0.03393568784846793, + "grad_norm": 189.6348876953125, + "learning_rate": 1.999919060943862e-06, + "loss": 22.9688, + "step": 3585 + }, + { + "epoch": 0.03394515387018298, + "grad_norm": 446.5243835449219, + "learning_rate": 1.999918670411454e-06, + "loss": 46.0781, + "step": 3586 + }, + { + "epoch": 0.03395461989189803, + "grad_norm": 434.6544494628906, + "learning_rate": 1.999918278939188e-06, + "loss": 23.1016, + "step": 3587 + }, + { + "epoch": 0.033964085913613086, + "grad_norm": 518.2042846679688, + "learning_rate": 1.9999178865270637e-06, + "loss": 40.4219, + "step": 3588 + }, + { + "epoch": 0.03397355193532814, + "grad_norm": 760.6729736328125, + "learning_rate": 1.9999174931750816e-06, + "loss": 50.7969, + "step": 3589 + }, + { + "epoch": 0.03398301795704319, + "grad_norm": 787.82666015625, + "learning_rate": 1.999917098883242e-06, + "loss": 51.7969, + "step": 3590 + }, + { + "epoch": 0.033992483978758245, + "grad_norm": 3.397653102874756, + "learning_rate": 1.9999167036515456e-06, + "loss": 0.9446, + "step": 3591 + }, + { + "epoch": 0.0340019500004733, + "grad_norm": 392.83538818359375, + "learning_rate": 1.9999163074799925e-06, + "loss": 44.7812, + "step": 3592 + }, + { + "epoch": 0.03401141602218836, + "grad_norm": 455.41912841796875, + "learning_rate": 1.9999159103685826e-06, + "loss": 40.0781, + "step": 3593 + }, + { + "epoch": 0.03402088204390341, + "grad_norm": 1039.4443359375, + "learning_rate": 1.9999155123173172e-06, + "loss": 77.9375, + "step": 3594 + }, + { + "epoch": 0.034030348065618464, + "grad_norm": 246.38002014160156, + "learning_rate": 1.9999151133261964e-06, + "loss": 27.9062, + "step": 3595 + }, + { + "epoch": 0.03403981408733352, + "grad_norm": 631.3529052734375, + "learning_rate": 1.99991471339522e-06, + "loss": 27.2109, + "step": 3596 + }, + { + "epoch": 0.03404928010904857, + "grad_norm": 656.7232666015625, + "learning_rate": 1.9999143125243896e-06, + "loss": 68.3125, + "step": 3597 + }, + { + "epoch": 0.03405874613076362, + "grad_norm": 3.0064713954925537, + "learning_rate": 1.999913910713704e-06, + "loss": 0.9326, + "step": 3598 + }, + { + "epoch": 0.034068212152478676, + "grad_norm": 780.62939453125, + "learning_rate": 1.9999135079631646e-06, + "loss": 25.8594, + "step": 3599 + }, + { + "epoch": 0.03407767817419373, + "grad_norm": 397.7160949707031, + "learning_rate": 1.9999131042727715e-06, + "loss": 42.125, + "step": 3600 + }, + { + "epoch": 0.03408714419590878, + "grad_norm": 765.478271484375, + "learning_rate": 1.9999126996425254e-06, + "loss": 59.0, + "step": 3601 + }, + { + "epoch": 0.03409661021762384, + "grad_norm": 411.4880676269531, + "learning_rate": 1.9999122940724264e-06, + "loss": 57.875, + "step": 3602 + }, + { + "epoch": 0.034106076239338895, + "grad_norm": 268.0469970703125, + "learning_rate": 1.999911887562475e-06, + "loss": 28.5625, + "step": 3603 + }, + { + "epoch": 0.03411554226105395, + "grad_norm": 848.6107177734375, + "learning_rate": 1.999911480112671e-06, + "loss": 27.2031, + "step": 3604 + }, + { + "epoch": 0.034125008282769, + "grad_norm": 766.35693359375, + "learning_rate": 1.9999110717230156e-06, + "loss": 31.8125, + "step": 3605 + }, + { + "epoch": 0.034134474304484054, + "grad_norm": 380.7010498046875, + "learning_rate": 1.9999106623935087e-06, + "loss": 45.8906, + "step": 3606 + }, + { + "epoch": 0.03414394032619911, + "grad_norm": 231.3485565185547, + "learning_rate": 1.9999102521241505e-06, + "loss": 35.2656, + "step": 3607 + }, + { + "epoch": 0.03415340634791416, + "grad_norm": 303.0037841796875, + "learning_rate": 1.9999098409149424e-06, + "loss": 33.4062, + "step": 3608 + }, + { + "epoch": 0.03416287236962921, + "grad_norm": 1324.03662109375, + "learning_rate": 1.999909428765884e-06, + "loss": 73.0469, + "step": 3609 + }, + { + "epoch": 0.03417233839134427, + "grad_norm": 708.4049682617188, + "learning_rate": 1.9999090156769753e-06, + "loss": 51.5938, + "step": 3610 + }, + { + "epoch": 0.034181804413059326, + "grad_norm": 2235.0634765625, + "learning_rate": 1.9999086016482172e-06, + "loss": 35.8438, + "step": 3611 + }, + { + "epoch": 0.03419127043477438, + "grad_norm": 346.41668701171875, + "learning_rate": 1.9999081866796105e-06, + "loss": 29.0469, + "step": 3612 + }, + { + "epoch": 0.03420073645648943, + "grad_norm": 176.13023376464844, + "learning_rate": 1.999907770771155e-06, + "loss": 27.2969, + "step": 3613 + }, + { + "epoch": 0.034210202478204485, + "grad_norm": 4.402038097381592, + "learning_rate": 1.9999073539228513e-06, + "loss": 0.9004, + "step": 3614 + }, + { + "epoch": 0.03421966849991954, + "grad_norm": 190.398193359375, + "learning_rate": 1.9999069361346997e-06, + "loss": 27.0469, + "step": 3615 + }, + { + "epoch": 0.03422913452163459, + "grad_norm": 780.4185180664062, + "learning_rate": 1.9999065174067003e-06, + "loss": 43.0859, + "step": 3616 + }, + { + "epoch": 0.034238600543349644, + "grad_norm": 487.3522644042969, + "learning_rate": 1.999906097738854e-06, + "loss": 61.6562, + "step": 3617 + }, + { + "epoch": 0.0342480665650647, + "grad_norm": 254.7242431640625, + "learning_rate": 1.999905677131161e-06, + "loss": 27.4219, + "step": 3618 + }, + { + "epoch": 0.03425753258677976, + "grad_norm": 1969.132080078125, + "learning_rate": 1.999905255583622e-06, + "loss": 49.9531, + "step": 3619 + }, + { + "epoch": 0.03426699860849481, + "grad_norm": 570.471923828125, + "learning_rate": 1.999904833096237e-06, + "loss": 66.3281, + "step": 3620 + }, + { + "epoch": 0.03427646463020986, + "grad_norm": 1204.76025390625, + "learning_rate": 1.9999044096690064e-06, + "loss": 53.5781, + "step": 3621 + }, + { + "epoch": 0.034285930651924916, + "grad_norm": 466.4956359863281, + "learning_rate": 1.999903985301931e-06, + "loss": 28.3125, + "step": 3622 + }, + { + "epoch": 0.03429539667363997, + "grad_norm": 285.6156921386719, + "learning_rate": 1.9999035599950105e-06, + "loss": 26.3125, + "step": 3623 + }, + { + "epoch": 0.03430486269535502, + "grad_norm": 460.23260498046875, + "learning_rate": 1.9999031337482456e-06, + "loss": 34.8438, + "step": 3624 + }, + { + "epoch": 0.034314328717070075, + "grad_norm": 843.3438720703125, + "learning_rate": 1.999902706561637e-06, + "loss": 22.7422, + "step": 3625 + }, + { + "epoch": 0.03432379473878513, + "grad_norm": 630.142822265625, + "learning_rate": 1.9999022784351853e-06, + "loss": 51.7344, + "step": 3626 + }, + { + "epoch": 0.03433326076050019, + "grad_norm": 441.0840759277344, + "learning_rate": 1.9999018493688896e-06, + "loss": 30.9453, + "step": 3627 + }, + { + "epoch": 0.03434272678221524, + "grad_norm": 191.78948974609375, + "learning_rate": 1.999901419362752e-06, + "loss": 27.4219, + "step": 3628 + }, + { + "epoch": 0.034352192803930294, + "grad_norm": 282.5002136230469, + "learning_rate": 1.999900988416772e-06, + "loss": 30.4219, + "step": 3629 + }, + { + "epoch": 0.03436165882564535, + "grad_norm": 205.9770050048828, + "learning_rate": 1.99990055653095e-06, + "loss": 30.1094, + "step": 3630 + }, + { + "epoch": 0.0343711248473604, + "grad_norm": 173.59698486328125, + "learning_rate": 1.999900123705286e-06, + "loss": 29.1562, + "step": 3631 + }, + { + "epoch": 0.03438059086907545, + "grad_norm": 413.0645446777344, + "learning_rate": 1.999899689939782e-06, + "loss": 23.4219, + "step": 3632 + }, + { + "epoch": 0.034390056890790506, + "grad_norm": 996.1834716796875, + "learning_rate": 1.9998992552344363e-06, + "loss": 27.2188, + "step": 3633 + }, + { + "epoch": 0.03439952291250556, + "grad_norm": 795.5692749023438, + "learning_rate": 1.999898819589251e-06, + "loss": 35.0547, + "step": 3634 + }, + { + "epoch": 0.03440898893422061, + "grad_norm": 729.8290405273438, + "learning_rate": 1.9998983830042257e-06, + "loss": 53.3594, + "step": 3635 + }, + { + "epoch": 0.03441845495593567, + "grad_norm": 530.6171264648438, + "learning_rate": 1.999897945479361e-06, + "loss": 28.6094, + "step": 3636 + }, + { + "epoch": 0.034427920977650725, + "grad_norm": 2.7295055389404297, + "learning_rate": 1.999897507014657e-06, + "loss": 0.8506, + "step": 3637 + }, + { + "epoch": 0.03443738699936578, + "grad_norm": 614.6702270507812, + "learning_rate": 1.9998970676101146e-06, + "loss": 70.6562, + "step": 3638 + }, + { + "epoch": 0.03444685302108083, + "grad_norm": 505.9822082519531, + "learning_rate": 1.9998966272657344e-06, + "loss": 26.3438, + "step": 3639 + }, + { + "epoch": 0.034456319042795884, + "grad_norm": 918.8512573242188, + "learning_rate": 1.999896185981516e-06, + "loss": 91.7656, + "step": 3640 + }, + { + "epoch": 0.03446578506451094, + "grad_norm": 746.1356811523438, + "learning_rate": 1.99989574375746e-06, + "loss": 66.375, + "step": 3641 + }, + { + "epoch": 0.03447525108622599, + "grad_norm": 299.886962890625, + "learning_rate": 1.9998953005935677e-06, + "loss": 34.0781, + "step": 3642 + }, + { + "epoch": 0.03448471710794104, + "grad_norm": 353.05755615234375, + "learning_rate": 1.9998948564898384e-06, + "loss": 23.7031, + "step": 3643 + }, + { + "epoch": 0.034494183129656096, + "grad_norm": 406.9053039550781, + "learning_rate": 1.9998944114462727e-06, + "loss": 32.4375, + "step": 3644 + }, + { + "epoch": 0.034503649151371156, + "grad_norm": 320.47393798828125, + "learning_rate": 1.999893965462872e-06, + "loss": 31.1875, + "step": 3645 + }, + { + "epoch": 0.03451311517308621, + "grad_norm": 585.9266357421875, + "learning_rate": 1.999893518539636e-06, + "loss": 55.8594, + "step": 3646 + }, + { + "epoch": 0.03452258119480126, + "grad_norm": 513.8401489257812, + "learning_rate": 1.9998930706765644e-06, + "loss": 45.0938, + "step": 3647 + }, + { + "epoch": 0.034532047216516315, + "grad_norm": 285.8667907714844, + "learning_rate": 1.9998926218736585e-06, + "loss": 31.9531, + "step": 3648 + }, + { + "epoch": 0.03454151323823137, + "grad_norm": 198.36863708496094, + "learning_rate": 1.999892172130919e-06, + "loss": 26.9688, + "step": 3649 + }, + { + "epoch": 0.03455097925994642, + "grad_norm": 877.510009765625, + "learning_rate": 1.999891721448346e-06, + "loss": 42.5938, + "step": 3650 + }, + { + "epoch": 0.034560445281661474, + "grad_norm": 673.474853515625, + "learning_rate": 1.9998912698259394e-06, + "loss": 57.2656, + "step": 3651 + }, + { + "epoch": 0.03456991130337653, + "grad_norm": 3.5064213275909424, + "learning_rate": 1.9998908172637e-06, + "loss": 0.9863, + "step": 3652 + }, + { + "epoch": 0.03457937732509159, + "grad_norm": 597.4368896484375, + "learning_rate": 1.9998903637616287e-06, + "loss": 49.9844, + "step": 3653 + }, + { + "epoch": 0.03458884334680664, + "grad_norm": 667.8759155273438, + "learning_rate": 1.9998899093197255e-06, + "loss": 64.2109, + "step": 3654 + }, + { + "epoch": 0.03459830936852169, + "grad_norm": 517.8374633789062, + "learning_rate": 1.9998894539379906e-06, + "loss": 68.6406, + "step": 3655 + }, + { + "epoch": 0.034607775390236746, + "grad_norm": 889.1074829101562, + "learning_rate": 1.9998889976164247e-06, + "loss": 75.4688, + "step": 3656 + }, + { + "epoch": 0.0346172414119518, + "grad_norm": 475.484619140625, + "learning_rate": 1.9998885403550284e-06, + "loss": 27.75, + "step": 3657 + }, + { + "epoch": 0.03462670743366685, + "grad_norm": 284.7806091308594, + "learning_rate": 1.9998880821538016e-06, + "loss": 24.5156, + "step": 3658 + }, + { + "epoch": 0.034636173455381905, + "grad_norm": 611.3776245117188, + "learning_rate": 1.9998876230127455e-06, + "loss": 46.875, + "step": 3659 + }, + { + "epoch": 0.03464563947709696, + "grad_norm": 274.3036804199219, + "learning_rate": 1.99988716293186e-06, + "loss": 32.5469, + "step": 3660 + }, + { + "epoch": 0.03465510549881201, + "grad_norm": 519.9882202148438, + "learning_rate": 1.9998867019111453e-06, + "loss": 32.6172, + "step": 3661 + }, + { + "epoch": 0.03466457152052707, + "grad_norm": 186.0958709716797, + "learning_rate": 1.999886239950602e-06, + "loss": 26.6406, + "step": 3662 + }, + { + "epoch": 0.034674037542242124, + "grad_norm": 179.64280700683594, + "learning_rate": 1.999885777050231e-06, + "loss": 30.0469, + "step": 3663 + }, + { + "epoch": 0.03468350356395718, + "grad_norm": 459.8203430175781, + "learning_rate": 1.9998853132100325e-06, + "loss": 61.8594, + "step": 3664 + }, + { + "epoch": 0.03469296958567223, + "grad_norm": 1351.68359375, + "learning_rate": 1.9998848484300062e-06, + "loss": 42.6406, + "step": 3665 + }, + { + "epoch": 0.03470243560738728, + "grad_norm": 160.0842742919922, + "learning_rate": 1.999884382710154e-06, + "loss": 24.9219, + "step": 3666 + }, + { + "epoch": 0.034711901629102336, + "grad_norm": 277.4002990722656, + "learning_rate": 1.999883916050475e-06, + "loss": 29.2188, + "step": 3667 + }, + { + "epoch": 0.03472136765081739, + "grad_norm": 304.5913391113281, + "learning_rate": 1.9998834484509707e-06, + "loss": 26.2188, + "step": 3668 + }, + { + "epoch": 0.03473083367253244, + "grad_norm": 217.33444213867188, + "learning_rate": 1.9998829799116407e-06, + "loss": 28.9688, + "step": 3669 + }, + { + "epoch": 0.0347402996942475, + "grad_norm": 422.07440185546875, + "learning_rate": 1.9998825104324853e-06, + "loss": 26.0625, + "step": 3670 + }, + { + "epoch": 0.034749765715962555, + "grad_norm": 321.44134521484375, + "learning_rate": 1.999882040013506e-06, + "loss": 43.8906, + "step": 3671 + }, + { + "epoch": 0.03475923173767761, + "grad_norm": 546.3546142578125, + "learning_rate": 1.9998815686547024e-06, + "loss": 56.0, + "step": 3672 + }, + { + "epoch": 0.03476869775939266, + "grad_norm": 345.3348083496094, + "learning_rate": 1.9998810963560754e-06, + "loss": 24.375, + "step": 3673 + }, + { + "epoch": 0.034778163781107714, + "grad_norm": 222.86767578125, + "learning_rate": 1.9998806231176247e-06, + "loss": 27.7812, + "step": 3674 + }, + { + "epoch": 0.03478762980282277, + "grad_norm": 331.78265380859375, + "learning_rate": 1.9998801489393516e-06, + "loss": 26.1406, + "step": 3675 + }, + { + "epoch": 0.03479709582453782, + "grad_norm": 598.6629638671875, + "learning_rate": 1.9998796738212564e-06, + "loss": 46.2188, + "step": 3676 + }, + { + "epoch": 0.03480656184625287, + "grad_norm": 231.94032287597656, + "learning_rate": 1.9998791977633392e-06, + "loss": 21.7578, + "step": 3677 + }, + { + "epoch": 0.034816027867967926, + "grad_norm": 210.1803436279297, + "learning_rate": 1.9998787207656005e-06, + "loss": 27.8594, + "step": 3678 + }, + { + "epoch": 0.034825493889682986, + "grad_norm": 241.201904296875, + "learning_rate": 1.999878242828041e-06, + "loss": 32.25, + "step": 3679 + }, + { + "epoch": 0.03483495991139804, + "grad_norm": 295.5946960449219, + "learning_rate": 1.9998777639506607e-06, + "loss": 33.4844, + "step": 3680 + }, + { + "epoch": 0.03484442593311309, + "grad_norm": 3.016669273376465, + "learning_rate": 1.9998772841334605e-06, + "loss": 0.9214, + "step": 3681 + }, + { + "epoch": 0.034853891954828145, + "grad_norm": 783.1906127929688, + "learning_rate": 1.999876803376441e-06, + "loss": 76.0, + "step": 3682 + }, + { + "epoch": 0.0348633579765432, + "grad_norm": 2.7287237644195557, + "learning_rate": 1.999876321679602e-06, + "loss": 0.9014, + "step": 3683 + }, + { + "epoch": 0.03487282399825825, + "grad_norm": 674.77587890625, + "learning_rate": 1.9998758390429444e-06, + "loss": 46.3672, + "step": 3684 + }, + { + "epoch": 0.034882290019973304, + "grad_norm": 941.4509887695312, + "learning_rate": 1.9998753554664684e-06, + "loss": 27.1016, + "step": 3685 + }, + { + "epoch": 0.03489175604168836, + "grad_norm": 390.5943603515625, + "learning_rate": 1.9998748709501747e-06, + "loss": 44.1562, + "step": 3686 + }, + { + "epoch": 0.03490122206340341, + "grad_norm": 339.7454528808594, + "learning_rate": 1.999874385494064e-06, + "loss": 42.5938, + "step": 3687 + }, + { + "epoch": 0.03491068808511847, + "grad_norm": 295.8238830566406, + "learning_rate": 1.999873899098136e-06, + "loss": 30.75, + "step": 3688 + }, + { + "epoch": 0.03492015410683352, + "grad_norm": 313.1690979003906, + "learning_rate": 1.999873411762392e-06, + "loss": 29.6562, + "step": 3689 + }, + { + "epoch": 0.034929620128548576, + "grad_norm": 495.5023498535156, + "learning_rate": 1.9998729234868316e-06, + "loss": 41.0312, + "step": 3690 + }, + { + "epoch": 0.03493908615026363, + "grad_norm": 589.53125, + "learning_rate": 1.9998724342714557e-06, + "loss": 46.8594, + "step": 3691 + }, + { + "epoch": 0.03494855217197868, + "grad_norm": 3.277158498764038, + "learning_rate": 1.999871944116265e-06, + "loss": 0.7588, + "step": 3692 + }, + { + "epoch": 0.034958018193693735, + "grad_norm": 718.3163452148438, + "learning_rate": 1.99987145302126e-06, + "loss": 36.0, + "step": 3693 + }, + { + "epoch": 0.03496748421540879, + "grad_norm": 156.0826416015625, + "learning_rate": 1.99987096098644e-06, + "loss": 24.7812, + "step": 3694 + }, + { + "epoch": 0.03497695023712384, + "grad_norm": 302.34320068359375, + "learning_rate": 1.999870468011807e-06, + "loss": 28.8672, + "step": 3695 + }, + { + "epoch": 0.0349864162588389, + "grad_norm": 239.62107849121094, + "learning_rate": 1.9998699740973608e-06, + "loss": 25.6406, + "step": 3696 + }, + { + "epoch": 0.034995882280553954, + "grad_norm": 1100.376953125, + "learning_rate": 1.999869479243102e-06, + "loss": 85.4844, + "step": 3697 + }, + { + "epoch": 0.03500534830226901, + "grad_norm": 683.9441528320312, + "learning_rate": 1.9998689834490303e-06, + "loss": 28.3828, + "step": 3698 + }, + { + "epoch": 0.03501481432398406, + "grad_norm": 1305.52734375, + "learning_rate": 1.9998684867151473e-06, + "loss": 54.2656, + "step": 3699 + }, + { + "epoch": 0.03502428034569911, + "grad_norm": 358.50189208984375, + "learning_rate": 1.999867989041453e-06, + "loss": 47.0781, + "step": 3700 + }, + { + "epoch": 0.035033746367414166, + "grad_norm": 634.4055786132812, + "learning_rate": 1.9998674904279474e-06, + "loss": 32.5938, + "step": 3701 + }, + { + "epoch": 0.03504321238912922, + "grad_norm": 268.000244140625, + "learning_rate": 1.999866990874632e-06, + "loss": 32.2031, + "step": 3702 + }, + { + "epoch": 0.03505267841084427, + "grad_norm": 438.3104248046875, + "learning_rate": 1.9998664903815065e-06, + "loss": 62.3906, + "step": 3703 + }, + { + "epoch": 0.035062144432559325, + "grad_norm": 526.880615234375, + "learning_rate": 1.999865988948571e-06, + "loss": 36.3438, + "step": 3704 + }, + { + "epoch": 0.035071610454274385, + "grad_norm": 3.197324752807617, + "learning_rate": 1.999865486575827e-06, + "loss": 0.9087, + "step": 3705 + }, + { + "epoch": 0.03508107647598944, + "grad_norm": 371.7588806152344, + "learning_rate": 1.999864983263274e-06, + "loss": 20.0703, + "step": 3706 + }, + { + "epoch": 0.03509054249770449, + "grad_norm": 900.829833984375, + "learning_rate": 1.9998644790109137e-06, + "loss": 44.0781, + "step": 3707 + }, + { + "epoch": 0.035100008519419544, + "grad_norm": 745.3340454101562, + "learning_rate": 1.9998639738187456e-06, + "loss": 66.2891, + "step": 3708 + }, + { + "epoch": 0.0351094745411346, + "grad_norm": 255.58889770507812, + "learning_rate": 1.9998634676867703e-06, + "loss": 28.8906, + "step": 3709 + }, + { + "epoch": 0.03511894056284965, + "grad_norm": 604.2584228515625, + "learning_rate": 1.9998629606149887e-06, + "loss": 67.9062, + "step": 3710 + }, + { + "epoch": 0.0351284065845647, + "grad_norm": 408.2257080078125, + "learning_rate": 1.9998624526034003e-06, + "loss": 35.3516, + "step": 3711 + }, + { + "epoch": 0.035137872606279756, + "grad_norm": 433.74017333984375, + "learning_rate": 1.999861943652007e-06, + "loss": 31.9688, + "step": 3712 + }, + { + "epoch": 0.035147338627994816, + "grad_norm": 276.6903991699219, + "learning_rate": 1.999861433760808e-06, + "loss": 31.0469, + "step": 3713 + }, + { + "epoch": 0.03515680464970987, + "grad_norm": 327.39892578125, + "learning_rate": 1.9998609229298046e-06, + "loss": 33.375, + "step": 3714 + }, + { + "epoch": 0.03516627067142492, + "grad_norm": 474.4043884277344, + "learning_rate": 1.9998604111589963e-06, + "loss": 31.9688, + "step": 3715 + }, + { + "epoch": 0.035175736693139975, + "grad_norm": 962.20361328125, + "learning_rate": 1.999859898448385e-06, + "loss": 90.9219, + "step": 3716 + }, + { + "epoch": 0.03518520271485503, + "grad_norm": 996.0723266601562, + "learning_rate": 1.99985938479797e-06, + "loss": 66.0234, + "step": 3717 + }, + { + "epoch": 0.03519466873657008, + "grad_norm": 300.168212890625, + "learning_rate": 1.9998588702077524e-06, + "loss": 34.2188, + "step": 3718 + }, + { + "epoch": 0.035204134758285134, + "grad_norm": 322.2218017578125, + "learning_rate": 1.9998583546777327e-06, + "loss": 26.7344, + "step": 3719 + }, + { + "epoch": 0.03521360078000019, + "grad_norm": 735.347900390625, + "learning_rate": 1.999857838207911e-06, + "loss": 48.9844, + "step": 3720 + }, + { + "epoch": 0.03522306680171524, + "grad_norm": 443.6658935546875, + "learning_rate": 1.999857320798288e-06, + "loss": 29.7656, + "step": 3721 + }, + { + "epoch": 0.0352325328234303, + "grad_norm": 524.095458984375, + "learning_rate": 1.9998568024488643e-06, + "loss": 31.7812, + "step": 3722 + }, + { + "epoch": 0.03524199884514535, + "grad_norm": 507.543701171875, + "learning_rate": 1.99985628315964e-06, + "loss": 52.6094, + "step": 3723 + }, + { + "epoch": 0.035251464866860406, + "grad_norm": 3.3106565475463867, + "learning_rate": 1.999855762930616e-06, + "loss": 0.8916, + "step": 3724 + }, + { + "epoch": 0.03526093088857546, + "grad_norm": 649.0613403320312, + "learning_rate": 1.9998552417617925e-06, + "loss": 21.4414, + "step": 3725 + }, + { + "epoch": 0.03527039691029051, + "grad_norm": 623.6490478515625, + "learning_rate": 1.9998547196531703e-06, + "loss": 28.2656, + "step": 3726 + }, + { + "epoch": 0.035279862932005565, + "grad_norm": 3.064885139465332, + "learning_rate": 1.9998541966047497e-06, + "loss": 0.8857, + "step": 3727 + }, + { + "epoch": 0.03528932895372062, + "grad_norm": 329.5775146484375, + "learning_rate": 1.9998536726165313e-06, + "loss": 28.7188, + "step": 3728 + }, + { + "epoch": 0.03529879497543567, + "grad_norm": 243.40049743652344, + "learning_rate": 1.999853147688515e-06, + "loss": 26.4609, + "step": 3729 + }, + { + "epoch": 0.035308260997150724, + "grad_norm": 527.098388671875, + "learning_rate": 1.999852621820702e-06, + "loss": 34.1094, + "step": 3730 + }, + { + "epoch": 0.035317727018865784, + "grad_norm": 292.61505126953125, + "learning_rate": 1.9998520950130927e-06, + "loss": 25.8906, + "step": 3731 + }, + { + "epoch": 0.03532719304058084, + "grad_norm": 375.8937683105469, + "learning_rate": 1.999851567265688e-06, + "loss": 29.7656, + "step": 3732 + }, + { + "epoch": 0.03533665906229589, + "grad_norm": 482.8108215332031, + "learning_rate": 1.999851038578487e-06, + "loss": 28.1172, + "step": 3733 + }, + { + "epoch": 0.03534612508401094, + "grad_norm": 383.53125, + "learning_rate": 1.9998505089514913e-06, + "loss": 14.668, + "step": 3734 + }, + { + "epoch": 0.035355591105725996, + "grad_norm": 462.1753845214844, + "learning_rate": 1.9998499783847013e-06, + "loss": 26.9688, + "step": 3735 + }, + { + "epoch": 0.03536505712744105, + "grad_norm": 224.41970825195312, + "learning_rate": 1.9998494468781173e-06, + "loss": 26.9375, + "step": 3736 + }, + { + "epoch": 0.0353745231491561, + "grad_norm": 319.31756591796875, + "learning_rate": 1.99984891443174e-06, + "loss": 30.4336, + "step": 3737 + }, + { + "epoch": 0.035383989170871155, + "grad_norm": 743.8668823242188, + "learning_rate": 1.9998483810455695e-06, + "loss": 70.8281, + "step": 3738 + }, + { + "epoch": 0.035393455192586215, + "grad_norm": 599.521240234375, + "learning_rate": 1.9998478467196066e-06, + "loss": 31.6562, + "step": 3739 + }, + { + "epoch": 0.03540292121430127, + "grad_norm": 870.069580078125, + "learning_rate": 1.9998473114538518e-06, + "loss": 37.625, + "step": 3740 + }, + { + "epoch": 0.03541238723601632, + "grad_norm": 189.01417541503906, + "learning_rate": 1.999846775248306e-06, + "loss": 27.1094, + "step": 3741 + }, + { + "epoch": 0.035421853257731374, + "grad_norm": 837.0806884765625, + "learning_rate": 1.9998462381029684e-06, + "loss": 28.2031, + "step": 3742 + }, + { + "epoch": 0.03543131927944643, + "grad_norm": 699.5179443359375, + "learning_rate": 1.999845700017841e-06, + "loss": 38.7734, + "step": 3743 + }, + { + "epoch": 0.03544078530116148, + "grad_norm": 498.6907653808594, + "learning_rate": 1.999845160992924e-06, + "loss": 63.3438, + "step": 3744 + }, + { + "epoch": 0.03545025132287653, + "grad_norm": 463.3176574707031, + "learning_rate": 1.999844621028217e-06, + "loss": 43.6406, + "step": 3745 + }, + { + "epoch": 0.035459717344591586, + "grad_norm": 652.0436401367188, + "learning_rate": 1.999844080123721e-06, + "loss": 44.0977, + "step": 3746 + }, + { + "epoch": 0.03546918336630664, + "grad_norm": 245.450439453125, + "learning_rate": 1.999843538279437e-06, + "loss": 28.2188, + "step": 3747 + }, + { + "epoch": 0.0354786493880217, + "grad_norm": 1018.388671875, + "learning_rate": 1.9998429954953653e-06, + "loss": 45.0938, + "step": 3748 + }, + { + "epoch": 0.03548811540973675, + "grad_norm": 145.6427764892578, + "learning_rate": 1.999842451771506e-06, + "loss": 23.5938, + "step": 3749 + }, + { + "epoch": 0.035497581431451805, + "grad_norm": 814.7943725585938, + "learning_rate": 1.99984190710786e-06, + "loss": 69.5156, + "step": 3750 + }, + { + "epoch": 0.03550704745316686, + "grad_norm": 3.715550184249878, + "learning_rate": 1.999841361504427e-06, + "loss": 0.9067, + "step": 3751 + }, + { + "epoch": 0.03551651347488191, + "grad_norm": 2.680546998977661, + "learning_rate": 1.9998408149612087e-06, + "loss": 0.8301, + "step": 3752 + }, + { + "epoch": 0.035525979496596964, + "grad_norm": 3.027709722518921, + "learning_rate": 1.999840267478205e-06, + "loss": 1.0186, + "step": 3753 + }, + { + "epoch": 0.03553544551831202, + "grad_norm": 728.4012451171875, + "learning_rate": 1.999839719055417e-06, + "loss": 55.9688, + "step": 3754 + }, + { + "epoch": 0.03554491154002707, + "grad_norm": 532.959228515625, + "learning_rate": 1.9998391696928437e-06, + "loss": 41.5742, + "step": 3755 + }, + { + "epoch": 0.03555437756174213, + "grad_norm": 501.513427734375, + "learning_rate": 1.999838619390487e-06, + "loss": 46.3906, + "step": 3756 + }, + { + "epoch": 0.03556384358345718, + "grad_norm": 546.4718017578125, + "learning_rate": 1.999838068148347e-06, + "loss": 60.4062, + "step": 3757 + }, + { + "epoch": 0.035573309605172236, + "grad_norm": 470.2256164550781, + "learning_rate": 1.9998375159664245e-06, + "loss": 29.0, + "step": 3758 + }, + { + "epoch": 0.03558277562688729, + "grad_norm": 707.3917236328125, + "learning_rate": 1.9998369628447196e-06, + "loss": 41.0781, + "step": 3759 + }, + { + "epoch": 0.03559224164860234, + "grad_norm": 735.7433471679688, + "learning_rate": 1.999836408783233e-06, + "loss": 42.0781, + "step": 3760 + }, + { + "epoch": 0.035601707670317395, + "grad_norm": 465.1251220703125, + "learning_rate": 1.9998358537819654e-06, + "loss": 53.625, + "step": 3761 + }, + { + "epoch": 0.03561117369203245, + "grad_norm": 251.24496459960938, + "learning_rate": 1.9998352978409173e-06, + "loss": 27.5938, + "step": 3762 + }, + { + "epoch": 0.0356206397137475, + "grad_norm": 499.8003845214844, + "learning_rate": 1.9998347409600887e-06, + "loss": 47.9375, + "step": 3763 + }, + { + "epoch": 0.035630105735462554, + "grad_norm": 1228.9794921875, + "learning_rate": 1.999834183139481e-06, + "loss": 76.3047, + "step": 3764 + }, + { + "epoch": 0.035639571757177614, + "grad_norm": 184.41067504882812, + "learning_rate": 1.9998336243790933e-06, + "loss": 26.2344, + "step": 3765 + }, + { + "epoch": 0.03564903777889267, + "grad_norm": 673.4531860351562, + "learning_rate": 1.999833064678928e-06, + "loss": 48.5859, + "step": 3766 + }, + { + "epoch": 0.03565850380060772, + "grad_norm": 212.01304626464844, + "learning_rate": 1.999832504038984e-06, + "loss": 29.7344, + "step": 3767 + }, + { + "epoch": 0.03566796982232277, + "grad_norm": 539.410888671875, + "learning_rate": 1.9998319424592625e-06, + "loss": 54.4844, + "step": 3768 + }, + { + "epoch": 0.035677435844037826, + "grad_norm": 2037.66015625, + "learning_rate": 1.9998313799397643e-06, + "loss": 56.3672, + "step": 3769 + }, + { + "epoch": 0.03568690186575288, + "grad_norm": 403.28643798828125, + "learning_rate": 1.9998308164804895e-06, + "loss": 34.3906, + "step": 3770 + }, + { + "epoch": 0.03569636788746793, + "grad_norm": 273.6322326660156, + "learning_rate": 1.9998302520814388e-06, + "loss": 28.0, + "step": 3771 + }, + { + "epoch": 0.035705833909182985, + "grad_norm": 315.01910400390625, + "learning_rate": 1.9998296867426126e-06, + "loss": 25.2188, + "step": 3772 + }, + { + "epoch": 0.03571529993089804, + "grad_norm": 552.8152465820312, + "learning_rate": 1.999829120464012e-06, + "loss": 53.5, + "step": 3773 + }, + { + "epoch": 0.0357247659526131, + "grad_norm": 307.8646240234375, + "learning_rate": 1.9998285532456366e-06, + "loss": 27.6094, + "step": 3774 + }, + { + "epoch": 0.03573423197432815, + "grad_norm": 1011.8936157226562, + "learning_rate": 1.9998279850874876e-06, + "loss": 74.4688, + "step": 3775 + }, + { + "epoch": 0.035743697996043204, + "grad_norm": 231.0419464111328, + "learning_rate": 1.9998274159895652e-06, + "loss": 24.7188, + "step": 3776 + }, + { + "epoch": 0.03575316401775826, + "grad_norm": 285.2471008300781, + "learning_rate": 1.99982684595187e-06, + "loss": 24.3438, + "step": 3777 + }, + { + "epoch": 0.03576263003947331, + "grad_norm": 448.3897705078125, + "learning_rate": 1.9998262749744027e-06, + "loss": 30.7812, + "step": 3778 + }, + { + "epoch": 0.03577209606118836, + "grad_norm": 265.532470703125, + "learning_rate": 1.9998257030571638e-06, + "loss": 29.0312, + "step": 3779 + }, + { + "epoch": 0.035781562082903416, + "grad_norm": 911.9351806640625, + "learning_rate": 1.999825130200154e-06, + "loss": 51.4688, + "step": 3780 + }, + { + "epoch": 0.03579102810461847, + "grad_norm": 477.2528381347656, + "learning_rate": 1.9998245564033732e-06, + "loss": 28.0312, + "step": 3781 + }, + { + "epoch": 0.03580049412633353, + "grad_norm": 694.262939453125, + "learning_rate": 1.999823981666823e-06, + "loss": 61.4688, + "step": 3782 + }, + { + "epoch": 0.03580996014804858, + "grad_norm": 434.4980773925781, + "learning_rate": 1.9998234059905025e-06, + "loss": 37.6875, + "step": 3783 + }, + { + "epoch": 0.035819426169763635, + "grad_norm": 181.52154541015625, + "learning_rate": 1.9998228293744136e-06, + "loss": 26.5, + "step": 3784 + }, + { + "epoch": 0.03582889219147869, + "grad_norm": 203.62960815429688, + "learning_rate": 1.999822251818556e-06, + "loss": 25.5312, + "step": 3785 + }, + { + "epoch": 0.03583835821319374, + "grad_norm": 523.548583984375, + "learning_rate": 1.9998216733229305e-06, + "loss": 55.6562, + "step": 3786 + }, + { + "epoch": 0.035847824234908794, + "grad_norm": 458.91632080078125, + "learning_rate": 1.999821093887538e-06, + "loss": 26.0078, + "step": 3787 + }, + { + "epoch": 0.03585729025662385, + "grad_norm": 517.0496215820312, + "learning_rate": 1.9998205135123784e-06, + "loss": 35.4062, + "step": 3788 + }, + { + "epoch": 0.0358667562783389, + "grad_norm": 1582.4830322265625, + "learning_rate": 1.999819932197453e-06, + "loss": 43.3828, + "step": 3789 + }, + { + "epoch": 0.03587622230005395, + "grad_norm": 1118.1650390625, + "learning_rate": 1.9998193499427613e-06, + "loss": 55.7344, + "step": 3790 + }, + { + "epoch": 0.03588568832176901, + "grad_norm": 3.0942821502685547, + "learning_rate": 1.9998187667483045e-06, + "loss": 0.9614, + "step": 3791 + }, + { + "epoch": 0.035895154343484066, + "grad_norm": 363.1468811035156, + "learning_rate": 1.9998181826140836e-06, + "loss": 23.3984, + "step": 3792 + }, + { + "epoch": 0.03590462036519912, + "grad_norm": 611.9802856445312, + "learning_rate": 1.9998175975400984e-06, + "loss": 22.8828, + "step": 3793 + }, + { + "epoch": 0.03591408638691417, + "grad_norm": 540.023193359375, + "learning_rate": 1.9998170115263495e-06, + "loss": 43.1328, + "step": 3794 + }, + { + "epoch": 0.035923552408629225, + "grad_norm": 480.7174377441406, + "learning_rate": 1.999816424572838e-06, + "loss": 38.9844, + "step": 3795 + }, + { + "epoch": 0.03593301843034428, + "grad_norm": 252.84548950195312, + "learning_rate": 1.999815836679564e-06, + "loss": 32.75, + "step": 3796 + }, + { + "epoch": 0.03594248445205933, + "grad_norm": 368.3194885253906, + "learning_rate": 1.999815247846528e-06, + "loss": 25.1562, + "step": 3797 + }, + { + "epoch": 0.035951950473774384, + "grad_norm": 3.242469072341919, + "learning_rate": 1.9998146580737307e-06, + "loss": 0.8354, + "step": 3798 + }, + { + "epoch": 0.03596141649548944, + "grad_norm": 503.243408203125, + "learning_rate": 1.999814067361173e-06, + "loss": 48.4531, + "step": 3799 + }, + { + "epoch": 0.0359708825172045, + "grad_norm": 336.7393798828125, + "learning_rate": 1.999813475708855e-06, + "loss": 31.8281, + "step": 3800 + }, + { + "epoch": 0.03598034853891955, + "grad_norm": 273.2824401855469, + "learning_rate": 1.999812883116777e-06, + "loss": 32.5312, + "step": 3801 + }, + { + "epoch": 0.0359898145606346, + "grad_norm": 620.4681396484375, + "learning_rate": 1.99981228958494e-06, + "loss": 65.1562, + "step": 3802 + }, + { + "epoch": 0.035999280582349656, + "grad_norm": 457.119384765625, + "learning_rate": 1.999811695113345e-06, + "loss": 41.125, + "step": 3803 + }, + { + "epoch": 0.03600874660406471, + "grad_norm": 581.7626953125, + "learning_rate": 1.9998110997019917e-06, + "loss": 36.5078, + "step": 3804 + }, + { + "epoch": 0.03601821262577976, + "grad_norm": 200.71902465820312, + "learning_rate": 1.999810503350881e-06, + "loss": 24.2188, + "step": 3805 + }, + { + "epoch": 0.036027678647494815, + "grad_norm": 519.7050170898438, + "learning_rate": 1.9998099060600136e-06, + "loss": 57.5938, + "step": 3806 + }, + { + "epoch": 0.03603714466920987, + "grad_norm": 321.192138671875, + "learning_rate": 1.99980930782939e-06, + "loss": 32.7969, + "step": 3807 + }, + { + "epoch": 0.03604661069092493, + "grad_norm": 345.9913330078125, + "learning_rate": 1.9998087086590104e-06, + "loss": 25.5156, + "step": 3808 + }, + { + "epoch": 0.03605607671263998, + "grad_norm": 2.711761474609375, + "learning_rate": 1.9998081085488757e-06, + "loss": 0.8521, + "step": 3809 + }, + { + "epoch": 0.036065542734355034, + "grad_norm": 552.8751220703125, + "learning_rate": 1.9998075074989864e-06, + "loss": 45.3438, + "step": 3810 + }, + { + "epoch": 0.03607500875607009, + "grad_norm": 306.6983337402344, + "learning_rate": 1.999806905509343e-06, + "loss": 27.6562, + "step": 3811 + }, + { + "epoch": 0.03608447477778514, + "grad_norm": 180.33729553222656, + "learning_rate": 1.9998063025799466e-06, + "loss": 26.0938, + "step": 3812 + }, + { + "epoch": 0.03609394079950019, + "grad_norm": 1974.910888671875, + "learning_rate": 1.999805698710797e-06, + "loss": 39.5312, + "step": 3813 + }, + { + "epoch": 0.036103406821215246, + "grad_norm": 737.060302734375, + "learning_rate": 1.9998050939018953e-06, + "loss": 60.0625, + "step": 3814 + }, + { + "epoch": 0.0361128728429303, + "grad_norm": 236.17564392089844, + "learning_rate": 1.9998044881532415e-06, + "loss": 25.8594, + "step": 3815 + }, + { + "epoch": 0.03612233886464535, + "grad_norm": 466.0262756347656, + "learning_rate": 1.9998038814648367e-06, + "loss": 35.3125, + "step": 3816 + }, + { + "epoch": 0.03613180488636041, + "grad_norm": 211.784423828125, + "learning_rate": 1.999803273836681e-06, + "loss": 27.7969, + "step": 3817 + }, + { + "epoch": 0.036141270908075465, + "grad_norm": 1029.55029296875, + "learning_rate": 1.999802665268776e-06, + "loss": 57.6094, + "step": 3818 + }, + { + "epoch": 0.03615073692979052, + "grad_norm": 291.9998779296875, + "learning_rate": 1.999802055761121e-06, + "loss": 30.0156, + "step": 3819 + }, + { + "epoch": 0.03616020295150557, + "grad_norm": 532.6057739257812, + "learning_rate": 1.9998014453137172e-06, + "loss": 50.3906, + "step": 3820 + }, + { + "epoch": 0.036169668973220624, + "grad_norm": 242.8946990966797, + "learning_rate": 1.999800833926565e-06, + "loss": 33.4219, + "step": 3821 + }, + { + "epoch": 0.03617913499493568, + "grad_norm": 368.21966552734375, + "learning_rate": 1.999800221599665e-06, + "loss": 26.0234, + "step": 3822 + }, + { + "epoch": 0.03618860101665073, + "grad_norm": 186.35597229003906, + "learning_rate": 1.9997996083330184e-06, + "loss": 31.6719, + "step": 3823 + }, + { + "epoch": 0.03619806703836578, + "grad_norm": 3.0203752517700195, + "learning_rate": 1.9997989941266245e-06, + "loss": 0.818, + "step": 3824 + }, + { + "epoch": 0.03620753306008084, + "grad_norm": 514.2568359375, + "learning_rate": 1.9997983789804853e-06, + "loss": 32.0781, + "step": 3825 + }, + { + "epoch": 0.036216999081795896, + "grad_norm": 275.585205078125, + "learning_rate": 1.9997977628946002e-06, + "loss": 24.5156, + "step": 3826 + }, + { + "epoch": 0.03622646510351095, + "grad_norm": 2.9704911708831787, + "learning_rate": 1.9997971458689703e-06, + "loss": 0.7378, + "step": 3827 + }, + { + "epoch": 0.036235931125226, + "grad_norm": 280.3058166503906, + "learning_rate": 1.999796527903596e-06, + "loss": 36.3906, + "step": 3828 + }, + { + "epoch": 0.036245397146941055, + "grad_norm": 594.6834716796875, + "learning_rate": 1.999795908998478e-06, + "loss": 54.4531, + "step": 3829 + }, + { + "epoch": 0.03625486316865611, + "grad_norm": 457.7345275878906, + "learning_rate": 1.999795289153617e-06, + "loss": 52.2812, + "step": 3830 + }, + { + "epoch": 0.03626432919037116, + "grad_norm": 250.76795959472656, + "learning_rate": 1.9997946683690137e-06, + "loss": 28.6094, + "step": 3831 + }, + { + "epoch": 0.036273795212086214, + "grad_norm": 180.85812377929688, + "learning_rate": 1.999794046644668e-06, + "loss": 26.2031, + "step": 3832 + }, + { + "epoch": 0.03628326123380127, + "grad_norm": 745.2251586914062, + "learning_rate": 1.9997934239805815e-06, + "loss": 55.4062, + "step": 3833 + }, + { + "epoch": 0.03629272725551633, + "grad_norm": 374.36993408203125, + "learning_rate": 1.999792800376754e-06, + "loss": 27.7969, + "step": 3834 + }, + { + "epoch": 0.03630219327723138, + "grad_norm": 376.31781005859375, + "learning_rate": 1.999792175833186e-06, + "loss": 35.5312, + "step": 3835 + }, + { + "epoch": 0.03631165929894643, + "grad_norm": 3.223672866821289, + "learning_rate": 1.9997915503498788e-06, + "loss": 0.8635, + "step": 3836 + }, + { + "epoch": 0.036321125320661486, + "grad_norm": 3.3429481983184814, + "learning_rate": 1.999790923926832e-06, + "loss": 1.0273, + "step": 3837 + }, + { + "epoch": 0.03633059134237654, + "grad_norm": 2911.716552734375, + "learning_rate": 1.9997902965640476e-06, + "loss": 34.6836, + "step": 3838 + }, + { + "epoch": 0.03634005736409159, + "grad_norm": 749.7904052734375, + "learning_rate": 1.9997896682615245e-06, + "loss": 57.5156, + "step": 3839 + }, + { + "epoch": 0.036349523385806645, + "grad_norm": 660.9031372070312, + "learning_rate": 1.9997890390192645e-06, + "loss": 27.375, + "step": 3840 + }, + { + "epoch": 0.0363589894075217, + "grad_norm": 3.061295509338379, + "learning_rate": 1.999788408837268e-06, + "loss": 0.9614, + "step": 3841 + }, + { + "epoch": 0.03636845542923675, + "grad_norm": 226.6290283203125, + "learning_rate": 1.9997877777155352e-06, + "loss": 25.0078, + "step": 3842 + }, + { + "epoch": 0.03637792145095181, + "grad_norm": 537.0454711914062, + "learning_rate": 1.9997871456540676e-06, + "loss": 39.1094, + "step": 3843 + }, + { + "epoch": 0.036387387472666864, + "grad_norm": 177.8740234375, + "learning_rate": 1.9997865126528643e-06, + "loss": 26.7969, + "step": 3844 + }, + { + "epoch": 0.03639685349438192, + "grad_norm": 706.8685302734375, + "learning_rate": 1.999785878711927e-06, + "loss": 38.0312, + "step": 3845 + }, + { + "epoch": 0.03640631951609697, + "grad_norm": 1075.628173828125, + "learning_rate": 1.999785243831256e-06, + "loss": 70.3359, + "step": 3846 + }, + { + "epoch": 0.03641578553781202, + "grad_norm": 265.3126525878906, + "learning_rate": 1.999784608010852e-06, + "loss": 25.2188, + "step": 3847 + }, + { + "epoch": 0.036425251559527076, + "grad_norm": 2.9868297576904297, + "learning_rate": 1.9997839712507157e-06, + "loss": 0.8403, + "step": 3848 + }, + { + "epoch": 0.03643471758124213, + "grad_norm": 353.4396057128906, + "learning_rate": 1.999783333550847e-06, + "loss": 27.4688, + "step": 3849 + }, + { + "epoch": 0.03644418360295718, + "grad_norm": 538.0933227539062, + "learning_rate": 1.9997826949112474e-06, + "loss": 22.6172, + "step": 3850 + }, + { + "epoch": 0.03645364962467224, + "grad_norm": 509.90850830078125, + "learning_rate": 1.9997820553319168e-06, + "loss": 37.6875, + "step": 3851 + }, + { + "epoch": 0.036463115646387295, + "grad_norm": 436.5459289550781, + "learning_rate": 1.9997814148128565e-06, + "loss": 31.1328, + "step": 3852 + }, + { + "epoch": 0.03647258166810235, + "grad_norm": 265.6065979003906, + "learning_rate": 1.9997807733540665e-06, + "loss": 31.4844, + "step": 3853 + }, + { + "epoch": 0.0364820476898174, + "grad_norm": 222.5419921875, + "learning_rate": 1.9997801309555476e-06, + "loss": 27.8906, + "step": 3854 + }, + { + "epoch": 0.036491513711532454, + "grad_norm": 224.63125610351562, + "learning_rate": 1.9997794876173004e-06, + "loss": 21.8125, + "step": 3855 + }, + { + "epoch": 0.03650097973324751, + "grad_norm": 710.0217895507812, + "learning_rate": 1.9997788433393255e-06, + "loss": 53.3906, + "step": 3856 + }, + { + "epoch": 0.03651044575496256, + "grad_norm": 1082.8370361328125, + "learning_rate": 1.9997781981216236e-06, + "loss": 58.9688, + "step": 3857 + }, + { + "epoch": 0.03651991177667761, + "grad_norm": 313.231689453125, + "learning_rate": 1.9997775519641953e-06, + "loss": 27.1719, + "step": 3858 + }, + { + "epoch": 0.036529377798392666, + "grad_norm": 215.9691619873047, + "learning_rate": 1.999776904867041e-06, + "loss": 25.875, + "step": 3859 + }, + { + "epoch": 0.036538843820107726, + "grad_norm": 475.8743591308594, + "learning_rate": 1.999776256830161e-06, + "loss": 33.6094, + "step": 3860 + }, + { + "epoch": 0.03654830984182278, + "grad_norm": 448.6815490722656, + "learning_rate": 1.9997756078535572e-06, + "loss": 63.9453, + "step": 3861 + }, + { + "epoch": 0.03655777586353783, + "grad_norm": 598.0234375, + "learning_rate": 1.999774957937229e-06, + "loss": 52.6719, + "step": 3862 + }, + { + "epoch": 0.036567241885252885, + "grad_norm": 2.6018099784851074, + "learning_rate": 1.9997743070811775e-06, + "loss": 0.8071, + "step": 3863 + }, + { + "epoch": 0.03657670790696794, + "grad_norm": 359.6696472167969, + "learning_rate": 1.999773655285403e-06, + "loss": 34.1719, + "step": 3864 + }, + { + "epoch": 0.03658617392868299, + "grad_norm": 473.6632385253906, + "learning_rate": 1.9997730025499065e-06, + "loss": 27.8281, + "step": 3865 + }, + { + "epoch": 0.036595639950398044, + "grad_norm": 563.3773803710938, + "learning_rate": 1.999772348874688e-06, + "loss": 54.4531, + "step": 3866 + }, + { + "epoch": 0.0366051059721131, + "grad_norm": 300.0921325683594, + "learning_rate": 1.999771694259749e-06, + "loss": 25.1562, + "step": 3867 + }, + { + "epoch": 0.03661457199382816, + "grad_norm": 436.8417053222656, + "learning_rate": 1.9997710387050898e-06, + "loss": 41.6797, + "step": 3868 + }, + { + "epoch": 0.03662403801554321, + "grad_norm": 245.89419555664062, + "learning_rate": 1.9997703822107103e-06, + "loss": 22.7734, + "step": 3869 + }, + { + "epoch": 0.03663350403725826, + "grad_norm": 362.1257629394531, + "learning_rate": 1.999769724776612e-06, + "loss": 27.3594, + "step": 3870 + }, + { + "epoch": 0.036642970058973316, + "grad_norm": 661.2514038085938, + "learning_rate": 1.999769066402795e-06, + "loss": 43.125, + "step": 3871 + }, + { + "epoch": 0.03665243608068837, + "grad_norm": 664.4239501953125, + "learning_rate": 1.9997684070892605e-06, + "loss": 35.8906, + "step": 3872 + }, + { + "epoch": 0.03666190210240342, + "grad_norm": 526.2892456054688, + "learning_rate": 1.9997677468360084e-06, + "loss": 59.0625, + "step": 3873 + }, + { + "epoch": 0.036671368124118475, + "grad_norm": 727.5333251953125, + "learning_rate": 1.9997670856430397e-06, + "loss": 31.5391, + "step": 3874 + }, + { + "epoch": 0.03668083414583353, + "grad_norm": 443.4952087402344, + "learning_rate": 1.999766423510355e-06, + "loss": 23.25, + "step": 3875 + }, + { + "epoch": 0.03669030016754858, + "grad_norm": 676.1340942382812, + "learning_rate": 1.9997657604379548e-06, + "loss": 44.3438, + "step": 3876 + }, + { + "epoch": 0.03669976618926364, + "grad_norm": 694.0687255859375, + "learning_rate": 1.9997650964258402e-06, + "loss": 78.75, + "step": 3877 + }, + { + "epoch": 0.036709232210978694, + "grad_norm": 807.38330078125, + "learning_rate": 1.999764431474011e-06, + "loss": 69.375, + "step": 3878 + }, + { + "epoch": 0.03671869823269375, + "grad_norm": 265.9314880371094, + "learning_rate": 1.9997637655824686e-06, + "loss": 30.375, + "step": 3879 + }, + { + "epoch": 0.0367281642544088, + "grad_norm": 215.1320037841797, + "learning_rate": 1.9997630987512133e-06, + "loss": 26.6719, + "step": 3880 + }, + { + "epoch": 0.03673763027612385, + "grad_norm": 420.2221984863281, + "learning_rate": 1.9997624309802456e-06, + "loss": 29.2812, + "step": 3881 + }, + { + "epoch": 0.036747096297838906, + "grad_norm": 995.6256713867188, + "learning_rate": 1.999761762269566e-06, + "loss": 67.6406, + "step": 3882 + }, + { + "epoch": 0.03675656231955396, + "grad_norm": 230.09864807128906, + "learning_rate": 1.999761092619176e-06, + "loss": 22.3359, + "step": 3883 + }, + { + "epoch": 0.03676602834126901, + "grad_norm": 509.7961120605469, + "learning_rate": 1.999760422029075e-06, + "loss": 31.3203, + "step": 3884 + }, + { + "epoch": 0.036775494362984065, + "grad_norm": 542.6958618164062, + "learning_rate": 1.9997597504992643e-06, + "loss": 49.6406, + "step": 3885 + }, + { + "epoch": 0.036784960384699125, + "grad_norm": 296.7717590332031, + "learning_rate": 1.9997590780297447e-06, + "loss": 36.4219, + "step": 3886 + }, + { + "epoch": 0.03679442640641418, + "grad_norm": 2.9542412757873535, + "learning_rate": 1.9997584046205166e-06, + "loss": 0.8999, + "step": 3887 + }, + { + "epoch": 0.03680389242812923, + "grad_norm": 275.5627746582031, + "learning_rate": 1.999757730271581e-06, + "loss": 39.2969, + "step": 3888 + }, + { + "epoch": 0.036813358449844284, + "grad_norm": 536.58984375, + "learning_rate": 1.9997570549829374e-06, + "loss": 44.6094, + "step": 3889 + }, + { + "epoch": 0.03682282447155934, + "grad_norm": 597.2581787109375, + "learning_rate": 1.9997563787545876e-06, + "loss": 55.9688, + "step": 3890 + }, + { + "epoch": 0.03683229049327439, + "grad_norm": 261.741455078125, + "learning_rate": 1.999755701586532e-06, + "loss": 28.9219, + "step": 3891 + }, + { + "epoch": 0.03684175651498944, + "grad_norm": 282.5636901855469, + "learning_rate": 1.999755023478771e-06, + "loss": 34.7344, + "step": 3892 + }, + { + "epoch": 0.036851222536704496, + "grad_norm": 1203.3533935546875, + "learning_rate": 1.999754344431305e-06, + "loss": 73.2656, + "step": 3893 + }, + { + "epoch": 0.036860688558419556, + "grad_norm": 186.94210815429688, + "learning_rate": 1.9997536644441353e-06, + "loss": 26.2344, + "step": 3894 + }, + { + "epoch": 0.03687015458013461, + "grad_norm": 282.1566162109375, + "learning_rate": 1.999752983517262e-06, + "loss": 25.0781, + "step": 3895 + }, + { + "epoch": 0.03687962060184966, + "grad_norm": 478.47918701171875, + "learning_rate": 1.999752301650686e-06, + "loss": 39.6875, + "step": 3896 + }, + { + "epoch": 0.036889086623564715, + "grad_norm": 404.4712219238281, + "learning_rate": 1.999751618844408e-06, + "loss": 39.4375, + "step": 3897 + }, + { + "epoch": 0.03689855264527977, + "grad_norm": 597.1972045898438, + "learning_rate": 1.999750935098428e-06, + "loss": 62.2188, + "step": 3898 + }, + { + "epoch": 0.03690801866699482, + "grad_norm": 321.4482727050781, + "learning_rate": 1.9997502504127478e-06, + "loss": 28.9688, + "step": 3899 + }, + { + "epoch": 0.036917484688709874, + "grad_norm": 352.9864196777344, + "learning_rate": 1.999749564787367e-06, + "loss": 25.0781, + "step": 3900 + }, + { + "epoch": 0.03692695071042493, + "grad_norm": 224.22055053710938, + "learning_rate": 1.999748878222287e-06, + "loss": 31.7812, + "step": 3901 + }, + { + "epoch": 0.03693641673213998, + "grad_norm": 253.4858856201172, + "learning_rate": 1.999748190717508e-06, + "loss": 25.9375, + "step": 3902 + }, + { + "epoch": 0.03694588275385504, + "grad_norm": 511.7886047363281, + "learning_rate": 1.9997475022730303e-06, + "loss": 54.1562, + "step": 3903 + }, + { + "epoch": 0.03695534877557009, + "grad_norm": 588.7847900390625, + "learning_rate": 1.9997468128888557e-06, + "loss": 54.4609, + "step": 3904 + }, + { + "epoch": 0.036964814797285146, + "grad_norm": 338.9720458984375, + "learning_rate": 1.9997461225649836e-06, + "loss": 19.7969, + "step": 3905 + }, + { + "epoch": 0.0369742808190002, + "grad_norm": 723.777099609375, + "learning_rate": 1.9997454313014152e-06, + "loss": 74.4062, + "step": 3906 + }, + { + "epoch": 0.03698374684071525, + "grad_norm": 449.1311340332031, + "learning_rate": 1.999744739098151e-06, + "loss": 38.0156, + "step": 3907 + }, + { + "epoch": 0.036993212862430305, + "grad_norm": 204.79257202148438, + "learning_rate": 1.9997440459551924e-06, + "loss": 24.5938, + "step": 3908 + }, + { + "epoch": 0.03700267888414536, + "grad_norm": 239.16940307617188, + "learning_rate": 1.999743351872539e-06, + "loss": 32.0469, + "step": 3909 + }, + { + "epoch": 0.03701214490586041, + "grad_norm": 714.3076171875, + "learning_rate": 1.999742656850192e-06, + "loss": 34.0195, + "step": 3910 + }, + { + "epoch": 0.03702161092757547, + "grad_norm": 237.63482666015625, + "learning_rate": 1.999741960888152e-06, + "loss": 23.7266, + "step": 3911 + }, + { + "epoch": 0.037031076949290524, + "grad_norm": 261.95404052734375, + "learning_rate": 1.9997412639864194e-06, + "loss": 30.5, + "step": 3912 + }, + { + "epoch": 0.03704054297100558, + "grad_norm": 209.9373016357422, + "learning_rate": 1.999740566144995e-06, + "loss": 21.0703, + "step": 3913 + }, + { + "epoch": 0.03705000899272063, + "grad_norm": 2.9716081619262695, + "learning_rate": 1.99973986736388e-06, + "loss": 0.8838, + "step": 3914 + }, + { + "epoch": 0.03705947501443568, + "grad_norm": 731.6339111328125, + "learning_rate": 1.9997391676430737e-06, + "loss": 49.6562, + "step": 3915 + }, + { + "epoch": 0.037068941036150736, + "grad_norm": 843.6861572265625, + "learning_rate": 1.9997384669825786e-06, + "loss": 49.2344, + "step": 3916 + }, + { + "epoch": 0.03707840705786579, + "grad_norm": 254.0948486328125, + "learning_rate": 1.999737765382394e-06, + "loss": 26.5312, + "step": 3917 + }, + { + "epoch": 0.03708787307958084, + "grad_norm": 408.58837890625, + "learning_rate": 1.999737062842521e-06, + "loss": 63.7656, + "step": 3918 + }, + { + "epoch": 0.037097339101295895, + "grad_norm": 252.3920440673828, + "learning_rate": 1.9997363593629595e-06, + "loss": 27.3047, + "step": 3919 + }, + { + "epoch": 0.037106805123010955, + "grad_norm": 337.0741882324219, + "learning_rate": 1.9997356549437116e-06, + "loss": 23.6875, + "step": 3920 + }, + { + "epoch": 0.03711627114472601, + "grad_norm": 3.3655593395233154, + "learning_rate": 1.999734949584777e-06, + "loss": 0.9087, + "step": 3921 + }, + { + "epoch": 0.03712573716644106, + "grad_norm": 241.12258911132812, + "learning_rate": 1.9997342432861564e-06, + "loss": 29.2188, + "step": 3922 + }, + { + "epoch": 0.037135203188156114, + "grad_norm": 336.6657409667969, + "learning_rate": 1.999733536047851e-06, + "loss": 27.5, + "step": 3923 + }, + { + "epoch": 0.03714466920987117, + "grad_norm": 528.3167724609375, + "learning_rate": 1.999732827869861e-06, + "loss": 66.0234, + "step": 3924 + }, + { + "epoch": 0.03715413523158622, + "grad_norm": 470.5655212402344, + "learning_rate": 1.999732118752187e-06, + "loss": 49.2188, + "step": 3925 + }, + { + "epoch": 0.03716360125330127, + "grad_norm": 267.2923889160156, + "learning_rate": 1.9997314086948305e-06, + "loss": 24.9766, + "step": 3926 + }, + { + "epoch": 0.037173067275016326, + "grad_norm": 765.3546752929688, + "learning_rate": 1.999730697697791e-06, + "loss": 41.2422, + "step": 3927 + }, + { + "epoch": 0.03718253329673138, + "grad_norm": 610.8989868164062, + "learning_rate": 1.9997299857610696e-06, + "loss": 63.875, + "step": 3928 + }, + { + "epoch": 0.03719199931844644, + "grad_norm": 430.7619934082031, + "learning_rate": 1.999729272884667e-06, + "loss": 28.9453, + "step": 3929 + }, + { + "epoch": 0.03720146534016149, + "grad_norm": 645.8624267578125, + "learning_rate": 1.9997285590685843e-06, + "loss": 36.4961, + "step": 3930 + }, + { + "epoch": 0.037210931361876545, + "grad_norm": 226.1885986328125, + "learning_rate": 1.9997278443128214e-06, + "loss": 24.4688, + "step": 3931 + }, + { + "epoch": 0.0372203973835916, + "grad_norm": 557.6259765625, + "learning_rate": 1.9997271286173797e-06, + "loss": 39.875, + "step": 3932 + }, + { + "epoch": 0.03722986340530665, + "grad_norm": 3.5745017528533936, + "learning_rate": 1.999726411982259e-06, + "loss": 1.0464, + "step": 3933 + }, + { + "epoch": 0.037239329427021704, + "grad_norm": 3.126005172729492, + "learning_rate": 1.999725694407461e-06, + "loss": 0.8364, + "step": 3934 + }, + { + "epoch": 0.03724879544873676, + "grad_norm": 672.6364135742188, + "learning_rate": 1.999724975892986e-06, + "loss": 46.1406, + "step": 3935 + }, + { + "epoch": 0.03725826147045181, + "grad_norm": 422.240966796875, + "learning_rate": 1.9997242564388342e-06, + "loss": 63.75, + "step": 3936 + }, + { + "epoch": 0.03726772749216687, + "grad_norm": 229.31187438964844, + "learning_rate": 1.999723536045007e-06, + "loss": 19.3438, + "step": 3937 + }, + { + "epoch": 0.03727719351388192, + "grad_norm": 445.7690124511719, + "learning_rate": 1.9997228147115043e-06, + "loss": 41.4844, + "step": 3938 + }, + { + "epoch": 0.037286659535596976, + "grad_norm": 353.1162414550781, + "learning_rate": 1.9997220924383275e-06, + "loss": 30.3281, + "step": 3939 + }, + { + "epoch": 0.03729612555731203, + "grad_norm": 835.0104370117188, + "learning_rate": 1.9997213692254764e-06, + "loss": 37.7578, + "step": 3940 + }, + { + "epoch": 0.03730559157902708, + "grad_norm": 517.5938110351562, + "learning_rate": 1.9997206450729528e-06, + "loss": 36.5234, + "step": 3941 + }, + { + "epoch": 0.037315057600742135, + "grad_norm": 288.86279296875, + "learning_rate": 1.9997199199807566e-06, + "loss": 28.8125, + "step": 3942 + }, + { + "epoch": 0.03732452362245719, + "grad_norm": 272.73089599609375, + "learning_rate": 1.9997191939488892e-06, + "loss": 45.625, + "step": 3943 + }, + { + "epoch": 0.03733398964417224, + "grad_norm": 2.424023389816284, + "learning_rate": 1.99971846697735e-06, + "loss": 0.74, + "step": 3944 + }, + { + "epoch": 0.037343455665887294, + "grad_norm": 618.109619140625, + "learning_rate": 1.999717739066141e-06, + "loss": 33.7266, + "step": 3945 + }, + { + "epoch": 0.037352921687602354, + "grad_norm": 516.643798828125, + "learning_rate": 1.999717010215262e-06, + "loss": 33.4844, + "step": 3946 + }, + { + "epoch": 0.03736238770931741, + "grad_norm": 355.71246337890625, + "learning_rate": 1.9997162804247145e-06, + "loss": 28.1719, + "step": 3947 + }, + { + "epoch": 0.03737185373103246, + "grad_norm": 200.2396240234375, + "learning_rate": 1.9997155496944985e-06, + "loss": 22.2969, + "step": 3948 + }, + { + "epoch": 0.03738131975274751, + "grad_norm": 367.6573181152344, + "learning_rate": 1.999714818024615e-06, + "loss": 29.25, + "step": 3949 + }, + { + "epoch": 0.037390785774462566, + "grad_norm": 437.60101318359375, + "learning_rate": 1.9997140854150646e-06, + "loss": 30.8906, + "step": 3950 + }, + { + "epoch": 0.03740025179617762, + "grad_norm": 1148.0882568359375, + "learning_rate": 1.9997133518658476e-06, + "loss": 55.5547, + "step": 3951 + }, + { + "epoch": 0.03740971781789267, + "grad_norm": 353.47894287109375, + "learning_rate": 1.9997126173769657e-06, + "loss": 25.875, + "step": 3952 + }, + { + "epoch": 0.037419183839607725, + "grad_norm": 3.892012357711792, + "learning_rate": 1.999711881948419e-06, + "loss": 1.0361, + "step": 3953 + }, + { + "epoch": 0.037428649861322785, + "grad_norm": 728.363037109375, + "learning_rate": 1.9997111455802076e-06, + "loss": 40.4141, + "step": 3954 + }, + { + "epoch": 0.03743811588303784, + "grad_norm": 187.90052795410156, + "learning_rate": 1.999710408272333e-06, + "loss": 17.1719, + "step": 3955 + }, + { + "epoch": 0.03744758190475289, + "grad_norm": 424.6152038574219, + "learning_rate": 1.999709670024796e-06, + "loss": 34.8281, + "step": 3956 + }, + { + "epoch": 0.037457047926467944, + "grad_norm": 750.8565673828125, + "learning_rate": 1.9997089308375962e-06, + "loss": 81.4062, + "step": 3957 + }, + { + "epoch": 0.037466513948183, + "grad_norm": 175.21832275390625, + "learning_rate": 1.9997081907107356e-06, + "loss": 25.4062, + "step": 3958 + }, + { + "epoch": 0.03747597996989805, + "grad_norm": 952.3078002929688, + "learning_rate": 1.999707449644214e-06, + "loss": 49.4062, + "step": 3959 + }, + { + "epoch": 0.0374854459916131, + "grad_norm": 350.1123046875, + "learning_rate": 1.9997067076380326e-06, + "loss": 33.5938, + "step": 3960 + }, + { + "epoch": 0.037494912013328156, + "grad_norm": 652.3302612304688, + "learning_rate": 1.9997059646921925e-06, + "loss": 31.6641, + "step": 3961 + }, + { + "epoch": 0.03750437803504321, + "grad_norm": 1013.5723876953125, + "learning_rate": 1.999705220806693e-06, + "loss": 36.5312, + "step": 3962 + }, + { + "epoch": 0.03751384405675827, + "grad_norm": 638.8154907226562, + "learning_rate": 1.9997044759815358e-06, + "loss": 27.7344, + "step": 3963 + }, + { + "epoch": 0.03752331007847332, + "grad_norm": 314.95135498046875, + "learning_rate": 1.9997037302167217e-06, + "loss": 29.6016, + "step": 3964 + }, + { + "epoch": 0.037532776100188375, + "grad_norm": 482.0820617675781, + "learning_rate": 1.999702983512251e-06, + "loss": 44.3906, + "step": 3965 + }, + { + "epoch": 0.03754224212190343, + "grad_norm": 390.84503173828125, + "learning_rate": 1.9997022358681248e-06, + "loss": 34.7969, + "step": 3966 + }, + { + "epoch": 0.03755170814361848, + "grad_norm": 285.4569396972656, + "learning_rate": 1.999701487284343e-06, + "loss": 23.0156, + "step": 3967 + }, + { + "epoch": 0.037561174165333534, + "grad_norm": 275.0978088378906, + "learning_rate": 1.999700737760907e-06, + "loss": 21.7188, + "step": 3968 + }, + { + "epoch": 0.03757064018704859, + "grad_norm": 428.83599853515625, + "learning_rate": 1.9996999872978174e-06, + "loss": 34.0312, + "step": 3969 + }, + { + "epoch": 0.03758010620876364, + "grad_norm": 4.7636895179748535, + "learning_rate": 1.999699235895075e-06, + "loss": 1.0586, + "step": 3970 + }, + { + "epoch": 0.03758957223047869, + "grad_norm": 225.14913940429688, + "learning_rate": 1.99969848355268e-06, + "loss": 25.0, + "step": 3971 + }, + { + "epoch": 0.03759903825219375, + "grad_norm": 3.120903491973877, + "learning_rate": 1.999697730270634e-06, + "loss": 0.897, + "step": 3972 + }, + { + "epoch": 0.037608504273908806, + "grad_norm": 547.7289428710938, + "learning_rate": 1.9996969760489366e-06, + "loss": 49.9688, + "step": 3973 + }, + { + "epoch": 0.03761797029562386, + "grad_norm": 216.97418212890625, + "learning_rate": 1.9996962208875892e-06, + "loss": 24.0938, + "step": 3974 + }, + { + "epoch": 0.03762743631733891, + "grad_norm": 387.2430114746094, + "learning_rate": 1.9996954647865926e-06, + "loss": 27.3438, + "step": 3975 + }, + { + "epoch": 0.037636902339053965, + "grad_norm": 292.34246826171875, + "learning_rate": 1.9996947077459473e-06, + "loss": 25.5, + "step": 3976 + }, + { + "epoch": 0.03764636836076902, + "grad_norm": 571.392333984375, + "learning_rate": 1.999693949765654e-06, + "loss": 46.875, + "step": 3977 + }, + { + "epoch": 0.03765583438248407, + "grad_norm": 724.0386962890625, + "learning_rate": 1.999693190845713e-06, + "loss": 61.125, + "step": 3978 + }, + { + "epoch": 0.037665300404199124, + "grad_norm": 230.63526916503906, + "learning_rate": 1.999692430986126e-06, + "loss": 33.0156, + "step": 3979 + }, + { + "epoch": 0.037674766425914184, + "grad_norm": 721.3485717773438, + "learning_rate": 1.999691670186893e-06, + "loss": 73.125, + "step": 3980 + }, + { + "epoch": 0.03768423244762924, + "grad_norm": 686.45556640625, + "learning_rate": 1.9996909084480145e-06, + "loss": 55.2812, + "step": 3981 + }, + { + "epoch": 0.03769369846934429, + "grad_norm": 146.12081909179688, + "learning_rate": 1.999690145769492e-06, + "loss": 21.4375, + "step": 3982 + }, + { + "epoch": 0.03770316449105934, + "grad_norm": 391.4363098144531, + "learning_rate": 1.9996893821513257e-06, + "loss": 58.1562, + "step": 3983 + }, + { + "epoch": 0.037712630512774396, + "grad_norm": 610.6328125, + "learning_rate": 1.999688617593516e-06, + "loss": 52.625, + "step": 3984 + }, + { + "epoch": 0.03772209653448945, + "grad_norm": 417.68218994140625, + "learning_rate": 1.999687852096065e-06, + "loss": 44.0625, + "step": 3985 + }, + { + "epoch": 0.0377315625562045, + "grad_norm": 1586.3619384765625, + "learning_rate": 1.9996870856589715e-06, + "loss": 81.9531, + "step": 3986 + }, + { + "epoch": 0.037741028577919555, + "grad_norm": 570.1763916015625, + "learning_rate": 1.999686318282238e-06, + "loss": 67.9688, + "step": 3987 + }, + { + "epoch": 0.03775049459963461, + "grad_norm": 151.45359802246094, + "learning_rate": 1.9996855499658637e-06, + "loss": 26.375, + "step": 3988 + }, + { + "epoch": 0.03775996062134967, + "grad_norm": 634.9603881835938, + "learning_rate": 1.9996847807098504e-06, + "loss": 30.6406, + "step": 3989 + }, + { + "epoch": 0.03776942664306472, + "grad_norm": 2.9788503646850586, + "learning_rate": 1.999684010514198e-06, + "loss": 0.8584, + "step": 3990 + }, + { + "epoch": 0.037778892664779774, + "grad_norm": 465.9812927246094, + "learning_rate": 1.999683239378908e-06, + "loss": 52.7188, + "step": 3991 + }, + { + "epoch": 0.03778835868649483, + "grad_norm": 531.2736206054688, + "learning_rate": 1.9996824673039808e-06, + "loss": 47.625, + "step": 3992 + }, + { + "epoch": 0.03779782470820988, + "grad_norm": 1159.8486328125, + "learning_rate": 1.999681694289417e-06, + "loss": 54.5547, + "step": 3993 + }, + { + "epoch": 0.03780729072992493, + "grad_norm": 760.0477905273438, + "learning_rate": 1.9996809203352174e-06, + "loss": 48.0, + "step": 3994 + }, + { + "epoch": 0.037816756751639986, + "grad_norm": 457.92926025390625, + "learning_rate": 1.999680145441383e-06, + "loss": 24.3047, + "step": 3995 + }, + { + "epoch": 0.03782622277335504, + "grad_norm": 347.4511413574219, + "learning_rate": 1.999679369607914e-06, + "loss": 41.7812, + "step": 3996 + }, + { + "epoch": 0.0378356887950701, + "grad_norm": 462.1757507324219, + "learning_rate": 1.9996785928348115e-06, + "loss": 45.5156, + "step": 3997 + }, + { + "epoch": 0.03784515481678515, + "grad_norm": 413.759521484375, + "learning_rate": 1.9996778151220767e-06, + "loss": 43.5469, + "step": 3998 + }, + { + "epoch": 0.037854620838500205, + "grad_norm": 210.07760620117188, + "learning_rate": 1.9996770364697092e-06, + "loss": 22.3281, + "step": 3999 + }, + { + "epoch": 0.03786408686021526, + "grad_norm": 517.9163818359375, + "learning_rate": 1.9996762568777107e-06, + "loss": 38.3125, + "step": 4000 + }, + { + "epoch": 0.03787355288193031, + "grad_norm": 359.9374084472656, + "learning_rate": 1.9996754763460812e-06, + "loss": 33.7344, + "step": 4001 + }, + { + "epoch": 0.037883018903645364, + "grad_norm": 348.8034362792969, + "learning_rate": 1.999674694874822e-06, + "loss": 34.1875, + "step": 4002 + }, + { + "epoch": 0.03789248492536042, + "grad_norm": 682.2151489257812, + "learning_rate": 1.9996739124639334e-06, + "loss": 34.0547, + "step": 4003 + }, + { + "epoch": 0.03790195094707547, + "grad_norm": 642.5120849609375, + "learning_rate": 1.9996731291134164e-06, + "loss": 25.4688, + "step": 4004 + }, + { + "epoch": 0.03791141696879052, + "grad_norm": 236.0697784423828, + "learning_rate": 1.9996723448232717e-06, + "loss": 23.2266, + "step": 4005 + }, + { + "epoch": 0.03792088299050558, + "grad_norm": 374.46002197265625, + "learning_rate": 1.9996715595935003e-06, + "loss": 44.9062, + "step": 4006 + }, + { + "epoch": 0.037930349012220636, + "grad_norm": 187.06724548339844, + "learning_rate": 1.999670773424102e-06, + "loss": 25.4688, + "step": 4007 + }, + { + "epoch": 0.03793981503393569, + "grad_norm": 306.43218994140625, + "learning_rate": 1.9996699863150787e-06, + "loss": 21.7031, + "step": 4008 + }, + { + "epoch": 0.03794928105565074, + "grad_norm": 2.5986990928649902, + "learning_rate": 1.9996691982664308e-06, + "loss": 0.9175, + "step": 4009 + }, + { + "epoch": 0.037958747077365795, + "grad_norm": 422.97735595703125, + "learning_rate": 1.9996684092781586e-06, + "loss": 27.1875, + "step": 4010 + }, + { + "epoch": 0.03796821309908085, + "grad_norm": 367.4107360839844, + "learning_rate": 1.9996676193502634e-06, + "loss": 32.0938, + "step": 4011 + }, + { + "epoch": 0.0379776791207959, + "grad_norm": 154.57363891601562, + "learning_rate": 1.9996668284827453e-06, + "loss": 23.6406, + "step": 4012 + }, + { + "epoch": 0.037987145142510954, + "grad_norm": 3.1948070526123047, + "learning_rate": 1.9996660366756055e-06, + "loss": 0.9961, + "step": 4013 + }, + { + "epoch": 0.03799661116422601, + "grad_norm": 866.84716796875, + "learning_rate": 1.999665243928845e-06, + "loss": 55.3125, + "step": 4014 + }, + { + "epoch": 0.03800607718594107, + "grad_norm": 1062.6695556640625, + "learning_rate": 1.9996644502424637e-06, + "loss": 65.8125, + "step": 4015 + }, + { + "epoch": 0.03801554320765612, + "grad_norm": 1309.7294921875, + "learning_rate": 1.999663655616463e-06, + "loss": 45.9688, + "step": 4016 + }, + { + "epoch": 0.03802500922937117, + "grad_norm": 2.63024640083313, + "learning_rate": 1.999662860050844e-06, + "loss": 0.8809, + "step": 4017 + }, + { + "epoch": 0.038034475251086226, + "grad_norm": 663.9353637695312, + "learning_rate": 1.9996620635456063e-06, + "loss": 43.3906, + "step": 4018 + }, + { + "epoch": 0.03804394127280128, + "grad_norm": 488.9586181640625, + "learning_rate": 1.9996612661007516e-06, + "loss": 43.5312, + "step": 4019 + }, + { + "epoch": 0.03805340729451633, + "grad_norm": 340.201904296875, + "learning_rate": 1.9996604677162802e-06, + "loss": 39.1797, + "step": 4020 + }, + { + "epoch": 0.038062873316231385, + "grad_norm": 702.12109375, + "learning_rate": 1.9996596683921935e-06, + "loss": 67.6484, + "step": 4021 + }, + { + "epoch": 0.03807233933794644, + "grad_norm": 441.03387451171875, + "learning_rate": 1.999658868128491e-06, + "loss": 52.7344, + "step": 4022 + }, + { + "epoch": 0.0380818053596615, + "grad_norm": 746.7889404296875, + "learning_rate": 1.999658066925175e-06, + "loss": 46.5625, + "step": 4023 + }, + { + "epoch": 0.03809127138137655, + "grad_norm": 313.63427734375, + "learning_rate": 1.9996572647822443e-06, + "loss": 30.1562, + "step": 4024 + }, + { + "epoch": 0.038100737403091604, + "grad_norm": 324.4981384277344, + "learning_rate": 1.9996564616997018e-06, + "loss": 17.0469, + "step": 4025 + }, + { + "epoch": 0.03811020342480666, + "grad_norm": 252.85865783691406, + "learning_rate": 1.999655657677547e-06, + "loss": 27.8438, + "step": 4026 + }, + { + "epoch": 0.03811966944652171, + "grad_norm": 204.45559692382812, + "learning_rate": 1.9996548527157808e-06, + "loss": 25.4219, + "step": 4027 + }, + { + "epoch": 0.03812913546823676, + "grad_norm": 664.2033081054688, + "learning_rate": 1.999654046814404e-06, + "loss": 31.7422, + "step": 4028 + }, + { + "epoch": 0.038138601489951816, + "grad_norm": 270.58209228515625, + "learning_rate": 1.9996532399734174e-06, + "loss": 22.4531, + "step": 4029 + }, + { + "epoch": 0.03814806751166687, + "grad_norm": 441.8704833984375, + "learning_rate": 1.999652432192822e-06, + "loss": 49.3906, + "step": 4030 + }, + { + "epoch": 0.03815753353338192, + "grad_norm": 3.2485833168029785, + "learning_rate": 1.9996516234726186e-06, + "loss": 0.8062, + "step": 4031 + }, + { + "epoch": 0.03816699955509698, + "grad_norm": 1031.78369140625, + "learning_rate": 1.999650813812807e-06, + "loss": 29.4375, + "step": 4032 + }, + { + "epoch": 0.038176465576812035, + "grad_norm": 764.5628662109375, + "learning_rate": 1.9996500032133893e-06, + "loss": 37.8789, + "step": 4033 + }, + { + "epoch": 0.03818593159852709, + "grad_norm": 219.71282958984375, + "learning_rate": 1.9996491916743654e-06, + "loss": 29.6719, + "step": 4034 + }, + { + "epoch": 0.03819539762024214, + "grad_norm": 533.7113647460938, + "learning_rate": 1.9996483791957363e-06, + "loss": 45.9219, + "step": 4035 + }, + { + "epoch": 0.038204863641957194, + "grad_norm": 136.07557678222656, + "learning_rate": 1.9996475657775025e-06, + "loss": 24.1094, + "step": 4036 + }, + { + "epoch": 0.03821432966367225, + "grad_norm": 1543.44189453125, + "learning_rate": 1.999646751419665e-06, + "loss": 34.8203, + "step": 4037 + }, + { + "epoch": 0.0382237956853873, + "grad_norm": 218.6106414794922, + "learning_rate": 1.9996459361222252e-06, + "loss": 26.1406, + "step": 4038 + }, + { + "epoch": 0.03823326170710235, + "grad_norm": 340.34344482421875, + "learning_rate": 1.999645119885183e-06, + "loss": 27.8125, + "step": 4039 + }, + { + "epoch": 0.03824272772881741, + "grad_norm": 255.4000701904297, + "learning_rate": 1.999644302708539e-06, + "loss": 23.0781, + "step": 4040 + }, + { + "epoch": 0.038252193750532466, + "grad_norm": 764.486083984375, + "learning_rate": 1.999643484592295e-06, + "loss": 97.9922, + "step": 4041 + }, + { + "epoch": 0.03826165977224752, + "grad_norm": 380.1627502441406, + "learning_rate": 1.9996426655364505e-06, + "loss": 23.5625, + "step": 4042 + }, + { + "epoch": 0.03827112579396257, + "grad_norm": 289.04022216796875, + "learning_rate": 1.9996418455410075e-06, + "loss": 29.9297, + "step": 4043 + }, + { + "epoch": 0.038280591815677625, + "grad_norm": 4.1252121925354, + "learning_rate": 1.999641024605966e-06, + "loss": 0.9768, + "step": 4044 + }, + { + "epoch": 0.03829005783739268, + "grad_norm": 229.93797302246094, + "learning_rate": 1.9996402027313268e-06, + "loss": 25.3594, + "step": 4045 + }, + { + "epoch": 0.03829952385910773, + "grad_norm": 448.14874267578125, + "learning_rate": 1.999639379917091e-06, + "loss": 40.7031, + "step": 4046 + }, + { + "epoch": 0.038308989880822784, + "grad_norm": 170.48074340820312, + "learning_rate": 1.9996385561632592e-06, + "loss": 24.4062, + "step": 4047 + }, + { + "epoch": 0.03831845590253784, + "grad_norm": 510.7287902832031, + "learning_rate": 1.999637731469832e-06, + "loss": 53.6484, + "step": 4048 + }, + { + "epoch": 0.0383279219242529, + "grad_norm": 611.3519897460938, + "learning_rate": 1.9996369058368103e-06, + "loss": 56.5156, + "step": 4049 + }, + { + "epoch": 0.03833738794596795, + "grad_norm": 569.4838256835938, + "learning_rate": 1.9996360792641955e-06, + "loss": 31.0312, + "step": 4050 + }, + { + "epoch": 0.038346853967683, + "grad_norm": 255.91615295410156, + "learning_rate": 1.9996352517519878e-06, + "loss": 26.4062, + "step": 4051 + }, + { + "epoch": 0.038356319989398056, + "grad_norm": 911.7078857421875, + "learning_rate": 1.9996344233001875e-06, + "loss": 45.1094, + "step": 4052 + }, + { + "epoch": 0.03836578601111311, + "grad_norm": 350.1571960449219, + "learning_rate": 1.999633593908796e-06, + "loss": 26.1094, + "step": 4053 + }, + { + "epoch": 0.03837525203282816, + "grad_norm": 251.94821166992188, + "learning_rate": 1.999632763577814e-06, + "loss": 28.1406, + "step": 4054 + }, + { + "epoch": 0.038384718054543215, + "grad_norm": 417.5997314453125, + "learning_rate": 1.9996319323072422e-06, + "loss": 39.0938, + "step": 4055 + }, + { + "epoch": 0.03839418407625827, + "grad_norm": 350.8194274902344, + "learning_rate": 1.9996311000970814e-06, + "loss": 34.7812, + "step": 4056 + }, + { + "epoch": 0.03840365009797332, + "grad_norm": 255.92897033691406, + "learning_rate": 1.9996302669473324e-06, + "loss": 27.0312, + "step": 4057 + }, + { + "epoch": 0.03841311611968838, + "grad_norm": 1600.46826171875, + "learning_rate": 1.999629432857996e-06, + "loss": 32.625, + "step": 4058 + }, + { + "epoch": 0.038422582141403434, + "grad_norm": 193.08352661132812, + "learning_rate": 1.999628597829073e-06, + "loss": 28.9688, + "step": 4059 + }, + { + "epoch": 0.03843204816311849, + "grad_norm": 3.2147865295410156, + "learning_rate": 1.9996277618605644e-06, + "loss": 1.0015, + "step": 4060 + }, + { + "epoch": 0.03844151418483354, + "grad_norm": 266.9574890136719, + "learning_rate": 1.9996269249524705e-06, + "loss": 29.2031, + "step": 4061 + }, + { + "epoch": 0.03845098020654859, + "grad_norm": 285.736328125, + "learning_rate": 1.999626087104792e-06, + "loss": 29.9688, + "step": 4062 + }, + { + "epoch": 0.038460446228263646, + "grad_norm": 746.8775634765625, + "learning_rate": 1.9996252483175306e-06, + "loss": 49.4219, + "step": 4063 + }, + { + "epoch": 0.0384699122499787, + "grad_norm": 281.80279541015625, + "learning_rate": 1.999624408590686e-06, + "loss": 29.2031, + "step": 4064 + }, + { + "epoch": 0.03847937827169375, + "grad_norm": 182.1901092529297, + "learning_rate": 1.99962356792426e-06, + "loss": 18.7812, + "step": 4065 + }, + { + "epoch": 0.03848884429340881, + "grad_norm": 305.5857849121094, + "learning_rate": 1.9996227263182522e-06, + "loss": 29.4375, + "step": 4066 + }, + { + "epoch": 0.038498310315123865, + "grad_norm": 323.11883544921875, + "learning_rate": 1.9996218837726645e-06, + "loss": 24.9531, + "step": 4067 + }, + { + "epoch": 0.03850777633683892, + "grad_norm": 556.8881225585938, + "learning_rate": 1.999621040287497e-06, + "loss": 37.8594, + "step": 4068 + }, + { + "epoch": 0.03851724235855397, + "grad_norm": 372.77874755859375, + "learning_rate": 1.9996201958627513e-06, + "loss": 33.4844, + "step": 4069 + }, + { + "epoch": 0.038526708380269024, + "grad_norm": 905.8208618164062, + "learning_rate": 1.999619350498427e-06, + "loss": 34.9922, + "step": 4070 + }, + { + "epoch": 0.03853617440198408, + "grad_norm": 153.20530700683594, + "learning_rate": 1.999618504194526e-06, + "loss": 24.2812, + "step": 4071 + }, + { + "epoch": 0.03854564042369913, + "grad_norm": 730.8421630859375, + "learning_rate": 1.999617656951048e-06, + "loss": 34.9297, + "step": 4072 + }, + { + "epoch": 0.03855510644541418, + "grad_norm": 187.9532470703125, + "learning_rate": 1.9996168087679953e-06, + "loss": 29.3906, + "step": 4073 + }, + { + "epoch": 0.038564572467129236, + "grad_norm": 2.7699813842773438, + "learning_rate": 1.9996159596453674e-06, + "loss": 0.8574, + "step": 4074 + }, + { + "epoch": 0.038574038488844296, + "grad_norm": 384.8652038574219, + "learning_rate": 1.9996151095831656e-06, + "loss": 23.7422, + "step": 4075 + }, + { + "epoch": 0.03858350451055935, + "grad_norm": 564.9271240234375, + "learning_rate": 1.999614258581391e-06, + "loss": 52.7969, + "step": 4076 + }, + { + "epoch": 0.0385929705322744, + "grad_norm": 380.8847961425781, + "learning_rate": 1.999613406640043e-06, + "loss": 25.2266, + "step": 4077 + }, + { + "epoch": 0.038602436553989455, + "grad_norm": 522.300048828125, + "learning_rate": 1.999612553759124e-06, + "loss": 47.2812, + "step": 4078 + }, + { + "epoch": 0.03861190257570451, + "grad_norm": 300.40521240234375, + "learning_rate": 1.9996116999386346e-06, + "loss": 24.5156, + "step": 4079 + }, + { + "epoch": 0.03862136859741956, + "grad_norm": 185.93521118164062, + "learning_rate": 1.9996108451785746e-06, + "loss": 26.4531, + "step": 4080 + }, + { + "epoch": 0.038630834619134614, + "grad_norm": 773.1136474609375, + "learning_rate": 1.999609989478946e-06, + "loss": 47.8438, + "step": 4081 + }, + { + "epoch": 0.03864030064084967, + "grad_norm": 3.478426456451416, + "learning_rate": 1.9996091328397484e-06, + "loss": 0.9595, + "step": 4082 + }, + { + "epoch": 0.03864976666256473, + "grad_norm": 329.6343688964844, + "learning_rate": 1.999608275260984e-06, + "loss": 28.6094, + "step": 4083 + }, + { + "epoch": 0.03865923268427978, + "grad_norm": 392.0374755859375, + "learning_rate": 1.9996074167426523e-06, + "loss": 28.5625, + "step": 4084 + }, + { + "epoch": 0.03866869870599483, + "grad_norm": 931.4622802734375, + "learning_rate": 1.999606557284755e-06, + "loss": 64.0156, + "step": 4085 + }, + { + "epoch": 0.038678164727709886, + "grad_norm": 412.564697265625, + "learning_rate": 1.9996056968872922e-06, + "loss": 44.0469, + "step": 4086 + }, + { + "epoch": 0.03868763074942494, + "grad_norm": 303.4408264160156, + "learning_rate": 1.9996048355502654e-06, + "loss": 36.5, + "step": 4087 + }, + { + "epoch": 0.03869709677113999, + "grad_norm": 152.13156127929688, + "learning_rate": 1.999603973273675e-06, + "loss": 23.2812, + "step": 4088 + }, + { + "epoch": 0.038706562792855045, + "grad_norm": 553.5531616210938, + "learning_rate": 1.9996031100575215e-06, + "loss": 40.8281, + "step": 4089 + }, + { + "epoch": 0.0387160288145701, + "grad_norm": 666.1461181640625, + "learning_rate": 1.9996022459018066e-06, + "loss": 61.7344, + "step": 4090 + }, + { + "epoch": 0.03872549483628515, + "grad_norm": 357.75823974609375, + "learning_rate": 1.9996013808065306e-06, + "loss": 24.9844, + "step": 4091 + }, + { + "epoch": 0.03873496085800021, + "grad_norm": 367.1924133300781, + "learning_rate": 1.9996005147716943e-06, + "loss": 47.25, + "step": 4092 + }, + { + "epoch": 0.038744426879715264, + "grad_norm": 583.586181640625, + "learning_rate": 1.999599647797298e-06, + "loss": 64.7656, + "step": 4093 + }, + { + "epoch": 0.03875389290143032, + "grad_norm": 421.32550048828125, + "learning_rate": 1.9995987798833438e-06, + "loss": 62.0312, + "step": 4094 + }, + { + "epoch": 0.03876335892314537, + "grad_norm": 193.2220458984375, + "learning_rate": 1.9995979110298313e-06, + "loss": 26.0312, + "step": 4095 + }, + { + "epoch": 0.03877282494486042, + "grad_norm": 681.11669921875, + "learning_rate": 1.999597041236762e-06, + "loss": 67.9375, + "step": 4096 + }, + { + "epoch": 0.038782290966575476, + "grad_norm": 3.226996898651123, + "learning_rate": 1.9995961705041364e-06, + "loss": 0.9541, + "step": 4097 + }, + { + "epoch": 0.03879175698829053, + "grad_norm": 3.3791606426239014, + "learning_rate": 1.9995952988319557e-06, + "loss": 0.8025, + "step": 4098 + }, + { + "epoch": 0.03880122301000558, + "grad_norm": 520.2837524414062, + "learning_rate": 1.99959442622022e-06, + "loss": 40.9375, + "step": 4099 + }, + { + "epoch": 0.038810689031720635, + "grad_norm": 392.1064147949219, + "learning_rate": 1.999593552668931e-06, + "loss": 41.5781, + "step": 4100 + }, + { + "epoch": 0.038820155053435695, + "grad_norm": 1083.9539794921875, + "learning_rate": 1.999592678178089e-06, + "loss": 46.3711, + "step": 4101 + }, + { + "epoch": 0.03882962107515075, + "grad_norm": 1068.4798583984375, + "learning_rate": 1.9995918027476946e-06, + "loss": 33.2188, + "step": 4102 + }, + { + "epoch": 0.0388390870968658, + "grad_norm": 408.39105224609375, + "learning_rate": 1.999590926377749e-06, + "loss": 28.375, + "step": 4103 + }, + { + "epoch": 0.038848553118580854, + "grad_norm": 505.2362365722656, + "learning_rate": 1.999590049068253e-06, + "loss": 29.7266, + "step": 4104 + }, + { + "epoch": 0.03885801914029591, + "grad_norm": 654.9423217773438, + "learning_rate": 1.9995891708192077e-06, + "loss": 58.7031, + "step": 4105 + }, + { + "epoch": 0.03886748516201096, + "grad_norm": 157.72303771972656, + "learning_rate": 1.9995882916306134e-06, + "loss": 25.4531, + "step": 4106 + }, + { + "epoch": 0.03887695118372601, + "grad_norm": 478.8612976074219, + "learning_rate": 1.999587411502471e-06, + "loss": 28.5625, + "step": 4107 + }, + { + "epoch": 0.038886417205441066, + "grad_norm": 1775.6209716796875, + "learning_rate": 1.9995865304347815e-06, + "loss": 37.2578, + "step": 4108 + }, + { + "epoch": 0.038895883227156126, + "grad_norm": 248.40603637695312, + "learning_rate": 1.999585648427546e-06, + "loss": 23.8672, + "step": 4109 + }, + { + "epoch": 0.03890534924887118, + "grad_norm": 728.8482666015625, + "learning_rate": 1.9995847654807645e-06, + "loss": 43.0938, + "step": 4110 + }, + { + "epoch": 0.03891481527058623, + "grad_norm": 292.21759033203125, + "learning_rate": 1.999583881594439e-06, + "loss": 39.7812, + "step": 4111 + }, + { + "epoch": 0.038924281292301285, + "grad_norm": 203.60977172851562, + "learning_rate": 1.999582996768569e-06, + "loss": 25.3906, + "step": 4112 + }, + { + "epoch": 0.03893374731401634, + "grad_norm": 362.2775573730469, + "learning_rate": 1.9995821110031564e-06, + "loss": 22.7266, + "step": 4113 + }, + { + "epoch": 0.03894321333573139, + "grad_norm": 198.60812377929688, + "learning_rate": 1.9995812242982017e-06, + "loss": 24.5625, + "step": 4114 + }, + { + "epoch": 0.038952679357446444, + "grad_norm": 215.81008911132812, + "learning_rate": 1.9995803366537054e-06, + "loss": 26.8281, + "step": 4115 + }, + { + "epoch": 0.0389621453791615, + "grad_norm": 269.4090270996094, + "learning_rate": 1.9995794480696687e-06, + "loss": 13.7422, + "step": 4116 + }, + { + "epoch": 0.03897161140087655, + "grad_norm": 3.0936386585235596, + "learning_rate": 1.9995785585460925e-06, + "loss": 0.8953, + "step": 4117 + }, + { + "epoch": 0.03898107742259161, + "grad_norm": 215.70358276367188, + "learning_rate": 1.9995776680829772e-06, + "loss": 24.2578, + "step": 4118 + }, + { + "epoch": 0.03899054344430666, + "grad_norm": 192.2714080810547, + "learning_rate": 1.999576776680324e-06, + "loss": 31.2969, + "step": 4119 + }, + { + "epoch": 0.039000009466021716, + "grad_norm": 633.2465209960938, + "learning_rate": 1.999575884338134e-06, + "loss": 50.5156, + "step": 4120 + }, + { + "epoch": 0.03900947548773677, + "grad_norm": 334.0137939453125, + "learning_rate": 1.9995749910564076e-06, + "loss": 21.8984, + "step": 4121 + }, + { + "epoch": 0.03901894150945182, + "grad_norm": 580.3615112304688, + "learning_rate": 1.9995740968351456e-06, + "loss": 48.2188, + "step": 4122 + }, + { + "epoch": 0.039028407531166875, + "grad_norm": 440.4383239746094, + "learning_rate": 1.9995732016743487e-06, + "loss": 46.3906, + "step": 4123 + }, + { + "epoch": 0.03903787355288193, + "grad_norm": 1218.3585205078125, + "learning_rate": 1.9995723055740182e-06, + "loss": 57.6719, + "step": 4124 + }, + { + "epoch": 0.03904733957459698, + "grad_norm": 719.4497680664062, + "learning_rate": 1.999571408534155e-06, + "loss": 48.4844, + "step": 4125 + }, + { + "epoch": 0.03905680559631204, + "grad_norm": 177.07229614257812, + "learning_rate": 1.9995705105547594e-06, + "loss": 29.8125, + "step": 4126 + }, + { + "epoch": 0.039066271618027094, + "grad_norm": 954.8977661132812, + "learning_rate": 1.9995696116358328e-06, + "loss": 60.0664, + "step": 4127 + }, + { + "epoch": 0.03907573763974215, + "grad_norm": 196.576904296875, + "learning_rate": 1.9995687117773755e-06, + "loss": 23.8281, + "step": 4128 + }, + { + "epoch": 0.0390852036614572, + "grad_norm": 289.41314697265625, + "learning_rate": 1.999567810979389e-06, + "loss": 35.4844, + "step": 4129 + }, + { + "epoch": 0.03909466968317225, + "grad_norm": 428.90985107421875, + "learning_rate": 1.9995669092418732e-06, + "loss": 42.0938, + "step": 4130 + }, + { + "epoch": 0.039104135704887306, + "grad_norm": 610.4267578125, + "learning_rate": 1.9995660065648304e-06, + "loss": 26.5664, + "step": 4131 + }, + { + "epoch": 0.03911360172660236, + "grad_norm": 436.2392578125, + "learning_rate": 1.9995651029482603e-06, + "loss": 68.2031, + "step": 4132 + }, + { + "epoch": 0.03912306774831741, + "grad_norm": 279.411376953125, + "learning_rate": 1.9995641983921634e-06, + "loss": 25.2188, + "step": 4133 + }, + { + "epoch": 0.039132533770032465, + "grad_norm": 436.38671875, + "learning_rate": 1.9995632928965417e-06, + "loss": 41.5703, + "step": 4134 + }, + { + "epoch": 0.039141999791747525, + "grad_norm": 379.0028991699219, + "learning_rate": 1.9995623864613954e-06, + "loss": 42.2344, + "step": 4135 + }, + { + "epoch": 0.03915146581346258, + "grad_norm": 245.71591186523438, + "learning_rate": 1.9995614790867256e-06, + "loss": 29.1406, + "step": 4136 + }, + { + "epoch": 0.03916093183517763, + "grad_norm": 210.48085021972656, + "learning_rate": 1.999560570772533e-06, + "loss": 22.8594, + "step": 4137 + }, + { + "epoch": 0.039170397856892684, + "grad_norm": 701.0407104492188, + "learning_rate": 1.9995596615188182e-06, + "loss": 45.7266, + "step": 4138 + }, + { + "epoch": 0.03917986387860774, + "grad_norm": 458.08837890625, + "learning_rate": 1.999558751325583e-06, + "loss": 53.1562, + "step": 4139 + }, + { + "epoch": 0.03918932990032279, + "grad_norm": 603.677734375, + "learning_rate": 1.999557840192827e-06, + "loss": 60.0, + "step": 4140 + }, + { + "epoch": 0.03919879592203784, + "grad_norm": 668.92724609375, + "learning_rate": 1.999556928120552e-06, + "loss": 25.3477, + "step": 4141 + }, + { + "epoch": 0.039208261943752896, + "grad_norm": 450.9991760253906, + "learning_rate": 1.9995560151087583e-06, + "loss": 38.0469, + "step": 4142 + }, + { + "epoch": 0.03921772796546795, + "grad_norm": 313.8189697265625, + "learning_rate": 1.999555101157447e-06, + "loss": 25.375, + "step": 4143 + }, + { + "epoch": 0.03922719398718301, + "grad_norm": 414.12823486328125, + "learning_rate": 1.9995541862666188e-06, + "loss": 53.5234, + "step": 4144 + }, + { + "epoch": 0.03923666000889806, + "grad_norm": 532.7650756835938, + "learning_rate": 1.999553270436275e-06, + "loss": 41.4414, + "step": 4145 + }, + { + "epoch": 0.039246126030613115, + "grad_norm": 440.55364990234375, + "learning_rate": 1.999552353666416e-06, + "loss": 34.2031, + "step": 4146 + }, + { + "epoch": 0.03925559205232817, + "grad_norm": 322.4089050292969, + "learning_rate": 1.9995514359570426e-06, + "loss": 21.0938, + "step": 4147 + }, + { + "epoch": 0.03926505807404322, + "grad_norm": 460.5558776855469, + "learning_rate": 1.9995505173081564e-06, + "loss": 23.6523, + "step": 4148 + }, + { + "epoch": 0.039274524095758274, + "grad_norm": 2.789703607559204, + "learning_rate": 1.9995495977197572e-06, + "loss": 0.875, + "step": 4149 + }, + { + "epoch": 0.03928399011747333, + "grad_norm": 383.8794860839844, + "learning_rate": 1.999548677191847e-06, + "loss": 29.0781, + "step": 4150 + }, + { + "epoch": 0.03929345613918838, + "grad_norm": 177.30848693847656, + "learning_rate": 1.9995477557244256e-06, + "loss": 25.5625, + "step": 4151 + }, + { + "epoch": 0.03930292216090344, + "grad_norm": 448.502197265625, + "learning_rate": 1.9995468333174942e-06, + "loss": 29.6875, + "step": 4152 + }, + { + "epoch": 0.03931238818261849, + "grad_norm": 592.1004028320312, + "learning_rate": 1.9995459099710543e-06, + "loss": 44.625, + "step": 4153 + }, + { + "epoch": 0.039321854204333546, + "grad_norm": 240.15240478515625, + "learning_rate": 1.9995449856851058e-06, + "loss": 29.9062, + "step": 4154 + }, + { + "epoch": 0.0393313202260486, + "grad_norm": 628.1829223632812, + "learning_rate": 1.9995440604596503e-06, + "loss": 37.6445, + "step": 4155 + }, + { + "epoch": 0.03934078624776365, + "grad_norm": 459.3580322265625, + "learning_rate": 1.9995431342946883e-06, + "loss": 49.2031, + "step": 4156 + }, + { + "epoch": 0.039350252269478705, + "grad_norm": 609.18408203125, + "learning_rate": 1.9995422071902207e-06, + "loss": 46.3594, + "step": 4157 + }, + { + "epoch": 0.03935971829119376, + "grad_norm": 501.2630310058594, + "learning_rate": 1.999541279146249e-06, + "loss": 32.1094, + "step": 4158 + }, + { + "epoch": 0.03936918431290881, + "grad_norm": 379.45849609375, + "learning_rate": 1.999540350162773e-06, + "loss": 25.5859, + "step": 4159 + }, + { + "epoch": 0.039378650334623864, + "grad_norm": 414.5447082519531, + "learning_rate": 1.999539420239794e-06, + "loss": 23.3203, + "step": 4160 + }, + { + "epoch": 0.039388116356338924, + "grad_norm": 454.16644287109375, + "learning_rate": 1.999538489377313e-06, + "loss": 25.0117, + "step": 4161 + }, + { + "epoch": 0.03939758237805398, + "grad_norm": 466.9858703613281, + "learning_rate": 1.9995375575753313e-06, + "loss": 41.0625, + "step": 4162 + }, + { + "epoch": 0.03940704839976903, + "grad_norm": 762.39794921875, + "learning_rate": 1.999536624833849e-06, + "loss": 57.8125, + "step": 4163 + }, + { + "epoch": 0.03941651442148408, + "grad_norm": 319.0615539550781, + "learning_rate": 1.9995356911528675e-06, + "loss": 25.0938, + "step": 4164 + }, + { + "epoch": 0.039425980443199136, + "grad_norm": 550.6720581054688, + "learning_rate": 1.9995347565323873e-06, + "loss": 51.3125, + "step": 4165 + }, + { + "epoch": 0.03943544646491419, + "grad_norm": 880.4418334960938, + "learning_rate": 1.9995338209724094e-06, + "loss": 33.1094, + "step": 4166 + }, + { + "epoch": 0.03944491248662924, + "grad_norm": 3.1767024993896484, + "learning_rate": 1.999532884472935e-06, + "loss": 0.6909, + "step": 4167 + }, + { + "epoch": 0.039454378508344295, + "grad_norm": 336.6927185058594, + "learning_rate": 1.9995319470339644e-06, + "loss": 41.3281, + "step": 4168 + }, + { + "epoch": 0.03946384453005935, + "grad_norm": 376.623046875, + "learning_rate": 1.999531008655499e-06, + "loss": 25.8516, + "step": 4169 + }, + { + "epoch": 0.03947331055177441, + "grad_norm": 717.5015258789062, + "learning_rate": 1.999530069337539e-06, + "loss": 38.5, + "step": 4170 + }, + { + "epoch": 0.03948277657348946, + "grad_norm": 268.05059814453125, + "learning_rate": 1.9995291290800863e-06, + "loss": 36.2969, + "step": 4171 + }, + { + "epoch": 0.039492242595204514, + "grad_norm": 301.7518310546875, + "learning_rate": 1.999528187883141e-06, + "loss": 23.1875, + "step": 4172 + }, + { + "epoch": 0.03950170861691957, + "grad_norm": 242.39727783203125, + "learning_rate": 1.999527245746704e-06, + "loss": 28.3281, + "step": 4173 + }, + { + "epoch": 0.03951117463863462, + "grad_norm": 265.41217041015625, + "learning_rate": 1.9995263026707772e-06, + "loss": 30.0156, + "step": 4174 + }, + { + "epoch": 0.039520640660349673, + "grad_norm": 984.6447143554688, + "learning_rate": 1.9995253586553604e-06, + "loss": 55.7031, + "step": 4175 + }, + { + "epoch": 0.039530106682064726, + "grad_norm": 622.18505859375, + "learning_rate": 1.9995244137004543e-06, + "loss": 53.7812, + "step": 4176 + }, + { + "epoch": 0.03953957270377978, + "grad_norm": 522.2833251953125, + "learning_rate": 1.9995234678060605e-06, + "loss": 29.5156, + "step": 4177 + }, + { + "epoch": 0.03954903872549484, + "grad_norm": 407.25677490234375, + "learning_rate": 1.9995225209721796e-06, + "loss": 30.0156, + "step": 4178 + }, + { + "epoch": 0.03955850474720989, + "grad_norm": 255.40220642089844, + "learning_rate": 1.999521573198813e-06, + "loss": 35.9609, + "step": 4179 + }, + { + "epoch": 0.039567970768924945, + "grad_norm": 275.59222412109375, + "learning_rate": 1.9995206244859605e-06, + "loss": 22.9219, + "step": 4180 + }, + { + "epoch": 0.03957743679064, + "grad_norm": 426.9004821777344, + "learning_rate": 1.999519674833624e-06, + "loss": 32.9375, + "step": 4181 + }, + { + "epoch": 0.03958690281235505, + "grad_norm": 236.41775512695312, + "learning_rate": 1.9995187242418044e-06, + "loss": 24.5469, + "step": 4182 + }, + { + "epoch": 0.039596368834070104, + "grad_norm": 462.0470886230469, + "learning_rate": 1.9995177727105016e-06, + "loss": 36.4922, + "step": 4183 + }, + { + "epoch": 0.03960583485578516, + "grad_norm": 525.508056640625, + "learning_rate": 1.9995168202397172e-06, + "loss": 39.7266, + "step": 4184 + }, + { + "epoch": 0.03961530087750021, + "grad_norm": 545.737060546875, + "learning_rate": 1.999515866829452e-06, + "loss": 60.2344, + "step": 4185 + }, + { + "epoch": 0.039624766899215264, + "grad_norm": 212.68032836914062, + "learning_rate": 1.9995149124797073e-06, + "loss": 28.25, + "step": 4186 + }, + { + "epoch": 0.03963423292093032, + "grad_norm": 247.36636352539062, + "learning_rate": 1.9995139571904835e-06, + "loss": 27.1094, + "step": 4187 + }, + { + "epoch": 0.039643698942645376, + "grad_norm": 255.08346557617188, + "learning_rate": 1.9995130009617816e-06, + "loss": 26.3281, + "step": 4188 + }, + { + "epoch": 0.03965316496436043, + "grad_norm": 1396.5218505859375, + "learning_rate": 1.9995120437936024e-06, + "loss": 87.7188, + "step": 4189 + }, + { + "epoch": 0.03966263098607548, + "grad_norm": 224.7928924560547, + "learning_rate": 1.9995110856859467e-06, + "loss": 21.0781, + "step": 4190 + }, + { + "epoch": 0.039672097007790535, + "grad_norm": 240.50411987304688, + "learning_rate": 1.9995101266388155e-06, + "loss": 27.875, + "step": 4191 + }, + { + "epoch": 0.03968156302950559, + "grad_norm": 516.4314575195312, + "learning_rate": 1.9995091666522104e-06, + "loss": 66.3125, + "step": 4192 + }, + { + "epoch": 0.03969102905122064, + "grad_norm": 305.9889831542969, + "learning_rate": 1.9995082057261314e-06, + "loss": 38.125, + "step": 4193 + }, + { + "epoch": 0.039700495072935695, + "grad_norm": 1036.724853515625, + "learning_rate": 1.9995072438605798e-06, + "loss": 45.8438, + "step": 4194 + }, + { + "epoch": 0.039709961094650754, + "grad_norm": 167.87290954589844, + "learning_rate": 1.999506281055556e-06, + "loss": 23.25, + "step": 4195 + }, + { + "epoch": 0.03971942711636581, + "grad_norm": 465.1285705566406, + "learning_rate": 1.999505317311062e-06, + "loss": 53.3281, + "step": 4196 + }, + { + "epoch": 0.03972889313808086, + "grad_norm": 502.166748046875, + "learning_rate": 1.9995043526270972e-06, + "loss": 26.2578, + "step": 4197 + }, + { + "epoch": 0.039738359159795913, + "grad_norm": 469.6846618652344, + "learning_rate": 1.999503387003664e-06, + "loss": 38.5156, + "step": 4198 + }, + { + "epoch": 0.039747825181510966, + "grad_norm": 518.8941040039062, + "learning_rate": 1.999502420440762e-06, + "loss": 62.9375, + "step": 4199 + }, + { + "epoch": 0.03975729120322602, + "grad_norm": 414.495849609375, + "learning_rate": 1.9995014529383938e-06, + "loss": 24.3438, + "step": 4200 + }, + { + "epoch": 0.03976675722494107, + "grad_norm": 343.53009033203125, + "learning_rate": 1.999500484496558e-06, + "loss": 26.375, + "step": 4201 + }, + { + "epoch": 0.039776223246656126, + "grad_norm": 278.9809875488281, + "learning_rate": 1.9994995151152576e-06, + "loss": 23.6562, + "step": 4202 + }, + { + "epoch": 0.03978568926837118, + "grad_norm": 638.329345703125, + "learning_rate": 1.9994985447944926e-06, + "loss": 38.5781, + "step": 4203 + }, + { + "epoch": 0.03979515529008624, + "grad_norm": 577.7850341796875, + "learning_rate": 1.999497573534264e-06, + "loss": 57.0938, + "step": 4204 + }, + { + "epoch": 0.03980462131180129, + "grad_norm": 1127.9384765625, + "learning_rate": 1.9994966013345724e-06, + "loss": 55.1406, + "step": 4205 + }, + { + "epoch": 0.039814087333516344, + "grad_norm": 210.2228240966797, + "learning_rate": 1.9994956281954193e-06, + "loss": 20.5156, + "step": 4206 + }, + { + "epoch": 0.0398235533552314, + "grad_norm": 786.2171630859375, + "learning_rate": 1.999494654116805e-06, + "loss": 43.8594, + "step": 4207 + }, + { + "epoch": 0.03983301937694645, + "grad_norm": 307.8618469238281, + "learning_rate": 1.999493679098731e-06, + "loss": 37.4531, + "step": 4208 + }, + { + "epoch": 0.039842485398661504, + "grad_norm": 320.9873046875, + "learning_rate": 1.999492703141198e-06, + "loss": 25.5781, + "step": 4209 + }, + { + "epoch": 0.039851951420376557, + "grad_norm": 291.4000244140625, + "learning_rate": 1.9994917262442067e-06, + "loss": 27.3594, + "step": 4210 + }, + { + "epoch": 0.03986141744209161, + "grad_norm": 270.46466064453125, + "learning_rate": 1.9994907484077583e-06, + "loss": 32.0938, + "step": 4211 + }, + { + "epoch": 0.03987088346380666, + "grad_norm": 560.9937744140625, + "learning_rate": 1.9994897696318537e-06, + "loss": 27.1094, + "step": 4212 + }, + { + "epoch": 0.03988034948552172, + "grad_norm": 181.7898712158203, + "learning_rate": 1.9994887899164934e-06, + "loss": 23.7344, + "step": 4213 + }, + { + "epoch": 0.039889815507236775, + "grad_norm": 244.48843383789062, + "learning_rate": 1.999487809261679e-06, + "loss": 23.7812, + "step": 4214 + }, + { + "epoch": 0.03989928152895183, + "grad_norm": 1049.4578857421875, + "learning_rate": 1.999486827667411e-06, + "loss": 22.8906, + "step": 4215 + }, + { + "epoch": 0.03990874755066688, + "grad_norm": 290.2458190917969, + "learning_rate": 1.9994858451336905e-06, + "loss": 33.6094, + "step": 4216 + }, + { + "epoch": 0.039918213572381935, + "grad_norm": 446.4381103515625, + "learning_rate": 1.999484861660518e-06, + "loss": 26.4062, + "step": 4217 + }, + { + "epoch": 0.03992767959409699, + "grad_norm": 260.1158752441406, + "learning_rate": 1.999483877247895e-06, + "loss": 37.9062, + "step": 4218 + }, + { + "epoch": 0.03993714561581204, + "grad_norm": 1259.6085205078125, + "learning_rate": 1.999482891895822e-06, + "loss": 40.8281, + "step": 4219 + }, + { + "epoch": 0.039946611637527094, + "grad_norm": 596.5026245117188, + "learning_rate": 1.999481905604301e-06, + "loss": 59.2188, + "step": 4220 + }, + { + "epoch": 0.039956077659242153, + "grad_norm": 743.5089111328125, + "learning_rate": 1.9994809183733307e-06, + "loss": 51.9531, + "step": 4221 + }, + { + "epoch": 0.039965543680957206, + "grad_norm": 3.3426012992858887, + "learning_rate": 1.999479930202914e-06, + "loss": 0.8433, + "step": 4222 + }, + { + "epoch": 0.03997500970267226, + "grad_norm": 594.6080322265625, + "learning_rate": 1.999478941093051e-06, + "loss": 53.7812, + "step": 4223 + }, + { + "epoch": 0.03998447572438731, + "grad_norm": 369.2522888183594, + "learning_rate": 1.999477951043743e-06, + "loss": 28.4141, + "step": 4224 + }, + { + "epoch": 0.039993941746102366, + "grad_norm": 810.8480224609375, + "learning_rate": 1.9994769600549906e-06, + "loss": 52.3438, + "step": 4225 + }, + { + "epoch": 0.04000340776781742, + "grad_norm": 349.6993103027344, + "learning_rate": 1.999475968126795e-06, + "loss": 29.0938, + "step": 4226 + }, + { + "epoch": 0.04001287378953247, + "grad_norm": 356.4378356933594, + "learning_rate": 1.999474975259157e-06, + "loss": 25.6484, + "step": 4227 + }, + { + "epoch": 0.040022339811247525, + "grad_norm": 469.2930908203125, + "learning_rate": 1.9994739814520776e-06, + "loss": 28.7188, + "step": 4228 + }, + { + "epoch": 0.04003180583296258, + "grad_norm": 297.62274169921875, + "learning_rate": 1.9994729867055574e-06, + "loss": 21.6641, + "step": 4229 + }, + { + "epoch": 0.04004127185467764, + "grad_norm": 249.31524658203125, + "learning_rate": 1.999471991019598e-06, + "loss": 22.9062, + "step": 4230 + }, + { + "epoch": 0.04005073787639269, + "grad_norm": 231.9744873046875, + "learning_rate": 1.9994709943941995e-06, + "loss": 25.8281, + "step": 4231 + }, + { + "epoch": 0.040060203898107744, + "grad_norm": 349.85498046875, + "learning_rate": 1.999469996829364e-06, + "loss": 28.0781, + "step": 4232 + }, + { + "epoch": 0.040069669919822797, + "grad_norm": 594.4486694335938, + "learning_rate": 1.999468998325091e-06, + "loss": 67.6875, + "step": 4233 + }, + { + "epoch": 0.04007913594153785, + "grad_norm": 491.7440185546875, + "learning_rate": 1.9994679988813825e-06, + "loss": 35.2109, + "step": 4234 + }, + { + "epoch": 0.0400886019632529, + "grad_norm": 381.0583801269531, + "learning_rate": 1.9994669984982388e-06, + "loss": 30.9766, + "step": 4235 + }, + { + "epoch": 0.040098067984967956, + "grad_norm": 539.0879516601562, + "learning_rate": 1.9994659971756614e-06, + "loss": 29.6797, + "step": 4236 + }, + { + "epoch": 0.04010753400668301, + "grad_norm": 168.40057373046875, + "learning_rate": 1.999464994913651e-06, + "loss": 30.9375, + "step": 4237 + }, + { + "epoch": 0.04011700002839807, + "grad_norm": 325.1187438964844, + "learning_rate": 1.9994639917122086e-06, + "loss": 48.0469, + "step": 4238 + }, + { + "epoch": 0.04012646605011312, + "grad_norm": 540.296142578125, + "learning_rate": 1.9994629875713345e-06, + "loss": 47.8906, + "step": 4239 + }, + { + "epoch": 0.040135932071828175, + "grad_norm": 379.606201171875, + "learning_rate": 1.999461982491031e-06, + "loss": 24.9062, + "step": 4240 + }, + { + "epoch": 0.04014539809354323, + "grad_norm": 448.7223205566406, + "learning_rate": 1.999460976471298e-06, + "loss": 25.5, + "step": 4241 + }, + { + "epoch": 0.04015486411525828, + "grad_norm": 505.912353515625, + "learning_rate": 1.9994599695121365e-06, + "loss": 27.7266, + "step": 4242 + }, + { + "epoch": 0.040164330136973334, + "grad_norm": 254.7139892578125, + "learning_rate": 1.9994589616135477e-06, + "loss": 26.75, + "step": 4243 + }, + { + "epoch": 0.04017379615868839, + "grad_norm": 2.907914876937866, + "learning_rate": 1.999457952775533e-06, + "loss": 0.8037, + "step": 4244 + }, + { + "epoch": 0.04018326218040344, + "grad_norm": 2.5216822624206543, + "learning_rate": 1.999456942998092e-06, + "loss": 0.7498, + "step": 4245 + }, + { + "epoch": 0.04019272820211849, + "grad_norm": 864.3170166015625, + "learning_rate": 1.9994559322812267e-06, + "loss": 44.3438, + "step": 4246 + }, + { + "epoch": 0.04020219422383355, + "grad_norm": 238.27125549316406, + "learning_rate": 1.999454920624938e-06, + "loss": 27.3906, + "step": 4247 + }, + { + "epoch": 0.040211660245548606, + "grad_norm": 306.498046875, + "learning_rate": 1.9994539080292266e-06, + "loss": 27.0312, + "step": 4248 + }, + { + "epoch": 0.04022112626726366, + "grad_norm": 381.7127990722656, + "learning_rate": 1.999452894494094e-06, + "loss": 28.5625, + "step": 4249 + }, + { + "epoch": 0.04023059228897871, + "grad_norm": 725.77099609375, + "learning_rate": 1.9994518800195402e-06, + "loss": 43.9219, + "step": 4250 + }, + { + "epoch": 0.040240058310693765, + "grad_norm": 438.6523742675781, + "learning_rate": 1.999450864605567e-06, + "loss": 40.4531, + "step": 4251 + }, + { + "epoch": 0.04024952433240882, + "grad_norm": 603.806396484375, + "learning_rate": 1.9994498482521747e-06, + "loss": 47.3438, + "step": 4252 + }, + { + "epoch": 0.04025899035412387, + "grad_norm": 398.61273193359375, + "learning_rate": 1.9994488309593645e-06, + "loss": 32.25, + "step": 4253 + }, + { + "epoch": 0.040268456375838924, + "grad_norm": 289.6053466796875, + "learning_rate": 1.9994478127271376e-06, + "loss": 28.1953, + "step": 4254 + }, + { + "epoch": 0.04027792239755398, + "grad_norm": 599.3291625976562, + "learning_rate": 1.999446793555495e-06, + "loss": 39.7344, + "step": 4255 + }, + { + "epoch": 0.040287388419269037, + "grad_norm": 550.9402465820312, + "learning_rate": 1.999445773444437e-06, + "loss": 43.375, + "step": 4256 + }, + { + "epoch": 0.04029685444098409, + "grad_norm": 240.4545135498047, + "learning_rate": 1.999444752393965e-06, + "loss": 24.9531, + "step": 4257 + }, + { + "epoch": 0.04030632046269914, + "grad_norm": 270.84912109375, + "learning_rate": 1.9994437304040803e-06, + "loss": 27.1094, + "step": 4258 + }, + { + "epoch": 0.040315786484414196, + "grad_norm": 217.29348754882812, + "learning_rate": 1.9994427074747834e-06, + "loss": 24.4844, + "step": 4259 + }, + { + "epoch": 0.04032525250612925, + "grad_norm": 476.41644287109375, + "learning_rate": 1.9994416836060753e-06, + "loss": 36.8711, + "step": 4260 + }, + { + "epoch": 0.0403347185278443, + "grad_norm": 317.719482421875, + "learning_rate": 1.9994406587979573e-06, + "loss": 23.6797, + "step": 4261 + }, + { + "epoch": 0.040344184549559355, + "grad_norm": 3.473365545272827, + "learning_rate": 1.9994396330504298e-06, + "loss": 0.9214, + "step": 4262 + }, + { + "epoch": 0.04035365057127441, + "grad_norm": 746.1533203125, + "learning_rate": 1.999438606363494e-06, + "loss": 26.9766, + "step": 4263 + }, + { + "epoch": 0.04036311659298947, + "grad_norm": 818.9674682617188, + "learning_rate": 1.999437578737151e-06, + "loss": 25.5781, + "step": 4264 + }, + { + "epoch": 0.04037258261470452, + "grad_norm": 3.1164755821228027, + "learning_rate": 1.999436550171402e-06, + "loss": 0.9507, + "step": 4265 + }, + { + "epoch": 0.040382048636419574, + "grad_norm": 396.6611633300781, + "learning_rate": 1.9994355206662473e-06, + "loss": 42.3594, + "step": 4266 + }, + { + "epoch": 0.04039151465813463, + "grad_norm": 487.7198791503906, + "learning_rate": 1.9994344902216883e-06, + "loss": 32.4609, + "step": 4267 + }, + { + "epoch": 0.04040098067984968, + "grad_norm": 518.678466796875, + "learning_rate": 1.999433458837726e-06, + "loss": 19.7578, + "step": 4268 + }, + { + "epoch": 0.04041044670156473, + "grad_norm": 329.0465087890625, + "learning_rate": 1.999432426514361e-06, + "loss": 21.4375, + "step": 4269 + }, + { + "epoch": 0.040419912723279786, + "grad_norm": 774.3861083984375, + "learning_rate": 1.9994313932515948e-06, + "loss": 34.5625, + "step": 4270 + }, + { + "epoch": 0.04042937874499484, + "grad_norm": 427.5816650390625, + "learning_rate": 1.999430359049428e-06, + "loss": 54.8125, + "step": 4271 + }, + { + "epoch": 0.04043884476670989, + "grad_norm": 418.9459533691406, + "learning_rate": 1.999429323907862e-06, + "loss": 22.0, + "step": 4272 + }, + { + "epoch": 0.04044831078842495, + "grad_norm": 730.1962280273438, + "learning_rate": 1.999428287826897e-06, + "loss": 49.6445, + "step": 4273 + }, + { + "epoch": 0.040457776810140005, + "grad_norm": 276.2663879394531, + "learning_rate": 1.9994272508065344e-06, + "loss": 25.9375, + "step": 4274 + }, + { + "epoch": 0.04046724283185506, + "grad_norm": 166.65969848632812, + "learning_rate": 1.9994262128467755e-06, + "loss": 23.3281, + "step": 4275 + }, + { + "epoch": 0.04047670885357011, + "grad_norm": 3.2345097064971924, + "learning_rate": 1.999425173947621e-06, + "loss": 1.0698, + "step": 4276 + }, + { + "epoch": 0.040486174875285164, + "grad_norm": 183.41107177734375, + "learning_rate": 1.9994241341090715e-06, + "loss": 28.0625, + "step": 4277 + }, + { + "epoch": 0.04049564089700022, + "grad_norm": 2.9851529598236084, + "learning_rate": 1.9994230933311286e-06, + "loss": 0.8945, + "step": 4278 + }, + { + "epoch": 0.04050510691871527, + "grad_norm": 690.9226684570312, + "learning_rate": 1.999422051613793e-06, + "loss": 26.9531, + "step": 4279 + }, + { + "epoch": 0.04051457294043032, + "grad_norm": 290.13714599609375, + "learning_rate": 1.999421008957066e-06, + "loss": 16.8789, + "step": 4280 + }, + { + "epoch": 0.04052403896214538, + "grad_norm": 267.4854431152344, + "learning_rate": 1.9994199653609476e-06, + "loss": 28.2031, + "step": 4281 + }, + { + "epoch": 0.040533504983860436, + "grad_norm": 267.61773681640625, + "learning_rate": 1.99941892082544e-06, + "loss": 27.0312, + "step": 4282 + }, + { + "epoch": 0.04054297100557549, + "grad_norm": 361.3482971191406, + "learning_rate": 1.9994178753505433e-06, + "loss": 20.7266, + "step": 4283 + }, + { + "epoch": 0.04055243702729054, + "grad_norm": 455.2206115722656, + "learning_rate": 1.999416828936259e-06, + "loss": 47.8047, + "step": 4284 + }, + { + "epoch": 0.040561903049005595, + "grad_norm": 447.7338562011719, + "learning_rate": 1.999415781582588e-06, + "loss": 46.7656, + "step": 4285 + }, + { + "epoch": 0.04057136907072065, + "grad_norm": 846.3236694335938, + "learning_rate": 1.999414733289531e-06, + "loss": 40.2812, + "step": 4286 + }, + { + "epoch": 0.0405808350924357, + "grad_norm": 818.1011962890625, + "learning_rate": 1.999413684057089e-06, + "loss": 17.5703, + "step": 4287 + }, + { + "epoch": 0.040590301114150754, + "grad_norm": 395.9693298339844, + "learning_rate": 1.999412633885263e-06, + "loss": 27.8438, + "step": 4288 + }, + { + "epoch": 0.04059976713586581, + "grad_norm": 484.3722229003906, + "learning_rate": 1.999411582774055e-06, + "loss": 42.0156, + "step": 4289 + }, + { + "epoch": 0.04060923315758087, + "grad_norm": 279.8345031738281, + "learning_rate": 1.999410530723464e-06, + "loss": 35.6562, + "step": 4290 + }, + { + "epoch": 0.04061869917929592, + "grad_norm": 437.7147216796875, + "learning_rate": 1.999409477733493e-06, + "loss": 26.2656, + "step": 4291 + }, + { + "epoch": 0.04062816520101097, + "grad_norm": 225.82614135742188, + "learning_rate": 1.9994084238041418e-06, + "loss": 25.9531, + "step": 4292 + }, + { + "epoch": 0.040637631222726026, + "grad_norm": 380.1561279296875, + "learning_rate": 1.999407368935412e-06, + "loss": 41.6875, + "step": 4293 + }, + { + "epoch": 0.04064709724444108, + "grad_norm": 311.6032409667969, + "learning_rate": 1.9994063131273038e-06, + "loss": 24.6172, + "step": 4294 + }, + { + "epoch": 0.04065656326615613, + "grad_norm": 828.9896850585938, + "learning_rate": 1.9994052563798188e-06, + "loss": 29.6562, + "step": 4295 + }, + { + "epoch": 0.040666029287871185, + "grad_norm": 279.74530029296875, + "learning_rate": 1.999404198692958e-06, + "loss": 27.6641, + "step": 4296 + }, + { + "epoch": 0.04067549530958624, + "grad_norm": 261.7665710449219, + "learning_rate": 1.9994031400667223e-06, + "loss": 25.3906, + "step": 4297 + }, + { + "epoch": 0.04068496133130129, + "grad_norm": 445.43133544921875, + "learning_rate": 1.9994020805011125e-06, + "loss": 39.3281, + "step": 4298 + }, + { + "epoch": 0.04069442735301635, + "grad_norm": 424.6083984375, + "learning_rate": 1.9994010199961297e-06, + "loss": 36.5156, + "step": 4299 + }, + { + "epoch": 0.040703893374731404, + "grad_norm": 3.1747403144836426, + "learning_rate": 1.9993999585517754e-06, + "loss": 0.9341, + "step": 4300 + }, + { + "epoch": 0.04071335939644646, + "grad_norm": 198.1969757080078, + "learning_rate": 1.9993988961680497e-06, + "loss": 26.9062, + "step": 4301 + }, + { + "epoch": 0.04072282541816151, + "grad_norm": 232.04188537597656, + "learning_rate": 1.9993978328449544e-06, + "loss": 31.0625, + "step": 4302 + }, + { + "epoch": 0.04073229143987656, + "grad_norm": 723.4873657226562, + "learning_rate": 1.99939676858249e-06, + "loss": 30.1719, + "step": 4303 + }, + { + "epoch": 0.040741757461591616, + "grad_norm": 1181.4915771484375, + "learning_rate": 1.9993957033806575e-06, + "loss": 50.1484, + "step": 4304 + }, + { + "epoch": 0.04075122348330667, + "grad_norm": 443.60736083984375, + "learning_rate": 1.999394637239458e-06, + "loss": 44.5938, + "step": 4305 + }, + { + "epoch": 0.04076068950502172, + "grad_norm": 514.5774536132812, + "learning_rate": 1.999393570158893e-06, + "loss": 37.2812, + "step": 4306 + }, + { + "epoch": 0.04077015552673678, + "grad_norm": 181.84347534179688, + "learning_rate": 1.9993925021389625e-06, + "loss": 22.2578, + "step": 4307 + }, + { + "epoch": 0.040779621548451835, + "grad_norm": 761.321044921875, + "learning_rate": 1.999391433179669e-06, + "loss": 33.3828, + "step": 4308 + }, + { + "epoch": 0.04078908757016689, + "grad_norm": 405.2384033203125, + "learning_rate": 1.9993903632810114e-06, + "loss": 31.25, + "step": 4309 + }, + { + "epoch": 0.04079855359188194, + "grad_norm": 764.4010620117188, + "learning_rate": 1.9993892924429923e-06, + "loss": 47.625, + "step": 4310 + }, + { + "epoch": 0.040808019613596994, + "grad_norm": 490.5711669921875, + "learning_rate": 1.9993882206656124e-06, + "loss": 56.5156, + "step": 4311 + }, + { + "epoch": 0.04081748563531205, + "grad_norm": 3.4029462337493896, + "learning_rate": 1.9993871479488725e-06, + "loss": 1.0498, + "step": 4312 + }, + { + "epoch": 0.0408269516570271, + "grad_norm": 2.9011237621307373, + "learning_rate": 1.999386074292774e-06, + "loss": 0.8789, + "step": 4313 + }, + { + "epoch": 0.04083641767874215, + "grad_norm": 1028.3140869140625, + "learning_rate": 1.9993849996973175e-06, + "loss": 37.1836, + "step": 4314 + }, + { + "epoch": 0.040845883700457206, + "grad_norm": 303.9735412597656, + "learning_rate": 1.9993839241625037e-06, + "loss": 29.4531, + "step": 4315 + }, + { + "epoch": 0.040855349722172266, + "grad_norm": 540.9823608398438, + "learning_rate": 1.999382847688334e-06, + "loss": 37.1719, + "step": 4316 + }, + { + "epoch": 0.04086481574388732, + "grad_norm": 854.4536743164062, + "learning_rate": 1.99938177027481e-06, + "loss": 26.375, + "step": 4317 + }, + { + "epoch": 0.04087428176560237, + "grad_norm": 3.248136520385742, + "learning_rate": 1.999380691921932e-06, + "loss": 0.8896, + "step": 4318 + }, + { + "epoch": 0.040883747787317425, + "grad_norm": 212.701171875, + "learning_rate": 1.999379612629701e-06, + "loss": 26.4531, + "step": 4319 + }, + { + "epoch": 0.04089321380903248, + "grad_norm": 279.94635009765625, + "learning_rate": 1.999378532398118e-06, + "loss": 27.8594, + "step": 4320 + }, + { + "epoch": 0.04090267983074753, + "grad_norm": 269.3084411621094, + "learning_rate": 1.9993774512271847e-06, + "loss": 27.8906, + "step": 4321 + }, + { + "epoch": 0.040912145852462584, + "grad_norm": 555.3165283203125, + "learning_rate": 1.999376369116901e-06, + "loss": 37.7148, + "step": 4322 + }, + { + "epoch": 0.04092161187417764, + "grad_norm": 282.8454895019531, + "learning_rate": 1.999375286067269e-06, + "loss": 23.4609, + "step": 4323 + }, + { + "epoch": 0.0409310778958927, + "grad_norm": 2.6049394607543945, + "learning_rate": 1.9993742020782892e-06, + "loss": 0.8857, + "step": 4324 + }, + { + "epoch": 0.04094054391760775, + "grad_norm": 464.0082702636719, + "learning_rate": 1.9993731171499626e-06, + "loss": 37.0, + "step": 4325 + }, + { + "epoch": 0.0409500099393228, + "grad_norm": 599.52587890625, + "learning_rate": 1.99937203128229e-06, + "loss": 41.9375, + "step": 4326 + }, + { + "epoch": 0.040959475961037856, + "grad_norm": 847.5388793945312, + "learning_rate": 1.9993709444752735e-06, + "loss": 51.6172, + "step": 4327 + }, + { + "epoch": 0.04096894198275291, + "grad_norm": 220.5702362060547, + "learning_rate": 1.9993698567289123e-06, + "loss": 30.1094, + "step": 4328 + }, + { + "epoch": 0.04097840800446796, + "grad_norm": 246.2312774658203, + "learning_rate": 1.999368768043209e-06, + "loss": 26.8125, + "step": 4329 + }, + { + "epoch": 0.040987874026183015, + "grad_norm": 298.9996337890625, + "learning_rate": 1.9993676784181642e-06, + "loss": 24.8828, + "step": 4330 + }, + { + "epoch": 0.04099734004789807, + "grad_norm": 316.9929504394531, + "learning_rate": 1.9993665878537786e-06, + "loss": 26.5469, + "step": 4331 + }, + { + "epoch": 0.04100680606961312, + "grad_norm": 301.24517822265625, + "learning_rate": 1.9993654963500534e-06, + "loss": 34.3594, + "step": 4332 + }, + { + "epoch": 0.04101627209132818, + "grad_norm": 270.9178466796875, + "learning_rate": 1.9993644039069893e-06, + "loss": 25.1406, + "step": 4333 + }, + { + "epoch": 0.041025738113043234, + "grad_norm": 503.1211242675781, + "learning_rate": 1.9993633105245882e-06, + "loss": 57.2188, + "step": 4334 + }, + { + "epoch": 0.04103520413475829, + "grad_norm": 324.36163330078125, + "learning_rate": 1.9993622162028504e-06, + "loss": 27.125, + "step": 4335 + }, + { + "epoch": 0.04104467015647334, + "grad_norm": 282.72357177734375, + "learning_rate": 1.9993611209417773e-06, + "loss": 35.8594, + "step": 4336 + }, + { + "epoch": 0.04105413617818839, + "grad_norm": 2.889517307281494, + "learning_rate": 1.9993600247413695e-06, + "loss": 0.9834, + "step": 4337 + }, + { + "epoch": 0.041063602199903446, + "grad_norm": 532.3208618164062, + "learning_rate": 1.9993589276016285e-06, + "loss": 30.3555, + "step": 4338 + }, + { + "epoch": 0.0410730682216185, + "grad_norm": 276.74212646484375, + "learning_rate": 1.999357829522555e-06, + "loss": 29.9688, + "step": 4339 + }, + { + "epoch": 0.04108253424333355, + "grad_norm": 734.364013671875, + "learning_rate": 1.99935673050415e-06, + "loss": 32.9922, + "step": 4340 + }, + { + "epoch": 0.041092000265048605, + "grad_norm": 767.2222290039062, + "learning_rate": 1.999355630546415e-06, + "loss": 42.5781, + "step": 4341 + }, + { + "epoch": 0.041101466286763665, + "grad_norm": 572.6967163085938, + "learning_rate": 1.9993545296493507e-06, + "loss": 24.8047, + "step": 4342 + }, + { + "epoch": 0.04111093230847872, + "grad_norm": 329.046630859375, + "learning_rate": 1.999353427812958e-06, + "loss": 29.75, + "step": 4343 + }, + { + "epoch": 0.04112039833019377, + "grad_norm": 2.8309812545776367, + "learning_rate": 1.999352325037238e-06, + "loss": 0.9062, + "step": 4344 + }, + { + "epoch": 0.041129864351908824, + "grad_norm": 360.21795654296875, + "learning_rate": 1.999351221322192e-06, + "loss": 24.8516, + "step": 4345 + }, + { + "epoch": 0.04113933037362388, + "grad_norm": 451.837646484375, + "learning_rate": 1.999350116667821e-06, + "loss": 52.6094, + "step": 4346 + }, + { + "epoch": 0.04114879639533893, + "grad_norm": 572.2239990234375, + "learning_rate": 1.9993490110741256e-06, + "loss": 33.3008, + "step": 4347 + }, + { + "epoch": 0.04115826241705398, + "grad_norm": 855.2001953125, + "learning_rate": 1.9993479045411073e-06, + "loss": 69.1562, + "step": 4348 + }, + { + "epoch": 0.041167728438769036, + "grad_norm": 593.4892578125, + "learning_rate": 1.999346797068767e-06, + "loss": 32.0938, + "step": 4349 + }, + { + "epoch": 0.041177194460484096, + "grad_norm": 367.2267761230469, + "learning_rate": 1.9993456886571055e-06, + "loss": 36.7578, + "step": 4350 + }, + { + "epoch": 0.04118666048219915, + "grad_norm": 734.198486328125, + "learning_rate": 1.9993445793061246e-06, + "loss": 33.0, + "step": 4351 + }, + { + "epoch": 0.0411961265039142, + "grad_norm": 343.13177490234375, + "learning_rate": 1.9993434690158244e-06, + "loss": 31.0625, + "step": 4352 + }, + { + "epoch": 0.041205592525629255, + "grad_norm": 310.1025390625, + "learning_rate": 1.999342357786206e-06, + "loss": 28.5156, + "step": 4353 + }, + { + "epoch": 0.04121505854734431, + "grad_norm": 3.1348001956939697, + "learning_rate": 1.9993412456172715e-06, + "loss": 0.8623, + "step": 4354 + }, + { + "epoch": 0.04122452456905936, + "grad_norm": 239.41494750976562, + "learning_rate": 1.999340132509021e-06, + "loss": 24.7812, + "step": 4355 + }, + { + "epoch": 0.041233990590774414, + "grad_norm": 372.00067138671875, + "learning_rate": 1.9993390184614554e-06, + "loss": 15.4844, + "step": 4356 + }, + { + "epoch": 0.04124345661248947, + "grad_norm": 558.8966674804688, + "learning_rate": 1.9993379034745765e-06, + "loss": 31.25, + "step": 4357 + }, + { + "epoch": 0.04125292263420452, + "grad_norm": 479.0018310546875, + "learning_rate": 1.999336787548385e-06, + "loss": 29.4844, + "step": 4358 + }, + { + "epoch": 0.04126238865591958, + "grad_norm": 542.1004638671875, + "learning_rate": 1.999335670682882e-06, + "loss": 57.3438, + "step": 4359 + }, + { + "epoch": 0.04127185467763463, + "grad_norm": 300.1023254394531, + "learning_rate": 1.9993345528780683e-06, + "loss": 32.7656, + "step": 4360 + }, + { + "epoch": 0.041281320699349686, + "grad_norm": 309.31231689453125, + "learning_rate": 1.999333434133945e-06, + "loss": 23.9844, + "step": 4361 + }, + { + "epoch": 0.04129078672106474, + "grad_norm": 268.0833740234375, + "learning_rate": 1.9993323144505137e-06, + "loss": 14.6328, + "step": 4362 + }, + { + "epoch": 0.04130025274277979, + "grad_norm": 528.9525146484375, + "learning_rate": 1.999331193827775e-06, + "loss": 44.625, + "step": 4363 + }, + { + "epoch": 0.041309718764494845, + "grad_norm": 354.7735290527344, + "learning_rate": 1.9993300722657294e-06, + "loss": 29.75, + "step": 4364 + }, + { + "epoch": 0.0413191847862099, + "grad_norm": 805.229248046875, + "learning_rate": 1.999328949764379e-06, + "loss": 79.1562, + "step": 4365 + }, + { + "epoch": 0.04132865080792495, + "grad_norm": 354.97613525390625, + "learning_rate": 1.999327826323724e-06, + "loss": 30.8594, + "step": 4366 + }, + { + "epoch": 0.04133811682964001, + "grad_norm": 407.6391296386719, + "learning_rate": 1.9993267019437665e-06, + "loss": 48.3906, + "step": 4367 + }, + { + "epoch": 0.041347582851355064, + "grad_norm": 412.95086669921875, + "learning_rate": 1.9993255766245065e-06, + "loss": 25.1484, + "step": 4368 + }, + { + "epoch": 0.04135704887307012, + "grad_norm": 349.15826416015625, + "learning_rate": 1.999324450365946e-06, + "loss": 23.6406, + "step": 4369 + }, + { + "epoch": 0.04136651489478517, + "grad_norm": 440.78814697265625, + "learning_rate": 1.9993233231680844e-06, + "loss": 37.2656, + "step": 4370 + }, + { + "epoch": 0.04137598091650022, + "grad_norm": 401.6226501464844, + "learning_rate": 1.999322195030925e-06, + "loss": 47.2344, + "step": 4371 + }, + { + "epoch": 0.041385446938215276, + "grad_norm": 167.1709442138672, + "learning_rate": 1.9993210659544674e-06, + "loss": 28.5625, + "step": 4372 + }, + { + "epoch": 0.04139491295993033, + "grad_norm": 344.2583312988281, + "learning_rate": 1.999319935938713e-06, + "loss": 38.9062, + "step": 4373 + }, + { + "epoch": 0.04140437898164538, + "grad_norm": 2.8705453872680664, + "learning_rate": 1.9993188049836624e-06, + "loss": 0.6968, + "step": 4374 + }, + { + "epoch": 0.041413845003360435, + "grad_norm": 3.5838558673858643, + "learning_rate": 1.9993176730893176e-06, + "loss": 0.9438, + "step": 4375 + }, + { + "epoch": 0.041423311025075495, + "grad_norm": 453.57855224609375, + "learning_rate": 1.999316540255679e-06, + "loss": 46.1094, + "step": 4376 + }, + { + "epoch": 0.04143277704679055, + "grad_norm": 3.1955060958862305, + "learning_rate": 1.999315406482748e-06, + "loss": 0.8608, + "step": 4377 + }, + { + "epoch": 0.0414422430685056, + "grad_norm": 627.7334594726562, + "learning_rate": 1.9993142717705254e-06, + "loss": 33.5859, + "step": 4378 + }, + { + "epoch": 0.041451709090220654, + "grad_norm": 610.233154296875, + "learning_rate": 1.9993131361190126e-06, + "loss": 51.7656, + "step": 4379 + }, + { + "epoch": 0.04146117511193571, + "grad_norm": 220.92617797851562, + "learning_rate": 1.9993119995282107e-06, + "loss": 33.9219, + "step": 4380 + }, + { + "epoch": 0.04147064113365076, + "grad_norm": 866.8368530273438, + "learning_rate": 1.9993108619981203e-06, + "loss": 60.1328, + "step": 4381 + }, + { + "epoch": 0.04148010715536581, + "grad_norm": 1408.882568359375, + "learning_rate": 1.9993097235287424e-06, + "loss": 54.7031, + "step": 4382 + }, + { + "epoch": 0.041489573177080866, + "grad_norm": 519.2298583984375, + "learning_rate": 1.9993085841200786e-06, + "loss": 38.7656, + "step": 4383 + }, + { + "epoch": 0.04149903919879592, + "grad_norm": 313.7239685058594, + "learning_rate": 1.9993074437721295e-06, + "loss": 22.9609, + "step": 4384 + }, + { + "epoch": 0.04150850522051098, + "grad_norm": 325.1258544921875, + "learning_rate": 1.999306302484897e-06, + "loss": 33.3984, + "step": 4385 + }, + { + "epoch": 0.04151797124222603, + "grad_norm": 469.89154052734375, + "learning_rate": 1.999305160258381e-06, + "loss": 43.3281, + "step": 4386 + }, + { + "epoch": 0.041527437263941085, + "grad_norm": 392.89080810546875, + "learning_rate": 1.9993040170925834e-06, + "loss": 17.6953, + "step": 4387 + }, + { + "epoch": 0.04153690328565614, + "grad_norm": 491.6268310546875, + "learning_rate": 1.999302872987505e-06, + "loss": 40.4062, + "step": 4388 + }, + { + "epoch": 0.04154636930737119, + "grad_norm": 770.7777709960938, + "learning_rate": 1.999301727943147e-06, + "loss": 63.4766, + "step": 4389 + }, + { + "epoch": 0.041555835329086244, + "grad_norm": 192.2635040283203, + "learning_rate": 1.99930058195951e-06, + "loss": 25.1328, + "step": 4390 + }, + { + "epoch": 0.0415653013508013, + "grad_norm": 3.407944679260254, + "learning_rate": 1.999299435036596e-06, + "loss": 0.9165, + "step": 4391 + }, + { + "epoch": 0.04157476737251635, + "grad_norm": 650.1852416992188, + "learning_rate": 1.9992982871744052e-06, + "loss": 43.9219, + "step": 4392 + }, + { + "epoch": 0.04158423339423141, + "grad_norm": 872.5902709960938, + "learning_rate": 1.999297138372939e-06, + "loss": 51.125, + "step": 4393 + }, + { + "epoch": 0.04159369941594646, + "grad_norm": 456.514892578125, + "learning_rate": 1.9992959886321986e-06, + "loss": 26.6094, + "step": 4394 + }, + { + "epoch": 0.041603165437661516, + "grad_norm": 222.48138427734375, + "learning_rate": 1.999294837952185e-06, + "loss": 26.8359, + "step": 4395 + }, + { + "epoch": 0.04161263145937657, + "grad_norm": 1133.62060546875, + "learning_rate": 1.9992936863328994e-06, + "loss": 47.5078, + "step": 4396 + }, + { + "epoch": 0.04162209748109162, + "grad_norm": 381.217041015625, + "learning_rate": 1.9992925337743427e-06, + "loss": 25.125, + "step": 4397 + }, + { + "epoch": 0.041631563502806675, + "grad_norm": 803.9599609375, + "learning_rate": 1.9992913802765157e-06, + "loss": 47.6016, + "step": 4398 + }, + { + "epoch": 0.04164102952452173, + "grad_norm": 216.14227294921875, + "learning_rate": 1.9992902258394203e-06, + "loss": 23.5938, + "step": 4399 + }, + { + "epoch": 0.04165049554623678, + "grad_norm": 364.2700500488281, + "learning_rate": 1.999289070463057e-06, + "loss": 31.75, + "step": 4400 + }, + { + "epoch": 0.041659961567951834, + "grad_norm": 294.52203369140625, + "learning_rate": 1.9992879141474266e-06, + "loss": 28.5938, + "step": 4401 + }, + { + "epoch": 0.041669427589666894, + "grad_norm": 187.77114868164062, + "learning_rate": 1.9992867568925308e-06, + "loss": 25.8906, + "step": 4402 + }, + { + "epoch": 0.04167889361138195, + "grad_norm": 682.9649658203125, + "learning_rate": 1.9992855986983703e-06, + "loss": 43.7891, + "step": 4403 + }, + { + "epoch": 0.041688359633097, + "grad_norm": 222.41531372070312, + "learning_rate": 1.9992844395649465e-06, + "loss": 30.2891, + "step": 4404 + }, + { + "epoch": 0.04169782565481205, + "grad_norm": 179.41566467285156, + "learning_rate": 1.9992832794922605e-06, + "loss": 23.8594, + "step": 4405 + }, + { + "epoch": 0.041707291676527106, + "grad_norm": 459.00335693359375, + "learning_rate": 1.9992821184803128e-06, + "loss": 68.4062, + "step": 4406 + }, + { + "epoch": 0.04171675769824216, + "grad_norm": 264.3018798828125, + "learning_rate": 1.9992809565291055e-06, + "loss": 26.2344, + "step": 4407 + }, + { + "epoch": 0.04172622371995721, + "grad_norm": 496.1560363769531, + "learning_rate": 1.999279793638638e-06, + "loss": 26.375, + "step": 4408 + }, + { + "epoch": 0.041735689741672265, + "grad_norm": 663.3277587890625, + "learning_rate": 1.999278629808914e-06, + "loss": 40.6562, + "step": 4409 + }, + { + "epoch": 0.041745155763387325, + "grad_norm": 230.322021484375, + "learning_rate": 1.999277465039932e-06, + "loss": 23.8047, + "step": 4410 + }, + { + "epoch": 0.04175462178510238, + "grad_norm": 344.6085510253906, + "learning_rate": 1.9992762993316945e-06, + "loss": 47.4688, + "step": 4411 + }, + { + "epoch": 0.04176408780681743, + "grad_norm": 639.6849365234375, + "learning_rate": 1.9992751326842024e-06, + "loss": 43.4648, + "step": 4412 + }, + { + "epoch": 0.041773553828532484, + "grad_norm": 1151.350341796875, + "learning_rate": 1.9992739650974566e-06, + "loss": 79.6406, + "step": 4413 + }, + { + "epoch": 0.04178301985024754, + "grad_norm": 554.1392822265625, + "learning_rate": 1.9992727965714585e-06, + "loss": 64.5781, + "step": 4414 + }, + { + "epoch": 0.04179248587196259, + "grad_norm": 505.5003662109375, + "learning_rate": 1.9992716271062088e-06, + "loss": 54.3281, + "step": 4415 + }, + { + "epoch": 0.04180195189367764, + "grad_norm": 247.5537567138672, + "learning_rate": 1.9992704567017084e-06, + "loss": 26.2812, + "step": 4416 + }, + { + "epoch": 0.041811417915392696, + "grad_norm": 267.4849853515625, + "learning_rate": 1.9992692853579594e-06, + "loss": 27.8594, + "step": 4417 + }, + { + "epoch": 0.04182088393710775, + "grad_norm": 1203.4268798828125, + "learning_rate": 1.999268113074962e-06, + "loss": 29.4102, + "step": 4418 + }, + { + "epoch": 0.04183034995882281, + "grad_norm": 1704.06103515625, + "learning_rate": 1.9992669398527175e-06, + "loss": 32.1484, + "step": 4419 + }, + { + "epoch": 0.04183981598053786, + "grad_norm": 185.63137817382812, + "learning_rate": 1.9992657656912275e-06, + "loss": 24.6094, + "step": 4420 + }, + { + "epoch": 0.041849282002252915, + "grad_norm": 862.5624389648438, + "learning_rate": 1.9992645905904922e-06, + "loss": 59.4922, + "step": 4421 + }, + { + "epoch": 0.04185874802396797, + "grad_norm": 217.8831329345703, + "learning_rate": 1.9992634145505135e-06, + "loss": 30.0625, + "step": 4422 + }, + { + "epoch": 0.04186821404568302, + "grad_norm": 259.8060607910156, + "learning_rate": 1.9992622375712917e-06, + "loss": 24.3984, + "step": 4423 + }, + { + "epoch": 0.041877680067398074, + "grad_norm": 427.56866455078125, + "learning_rate": 1.999261059652829e-06, + "loss": 26.3125, + "step": 4424 + }, + { + "epoch": 0.04188714608911313, + "grad_norm": 418.6398620605469, + "learning_rate": 1.9992598807951257e-06, + "loss": 33.3203, + "step": 4425 + }, + { + "epoch": 0.04189661211082818, + "grad_norm": 516.4111328125, + "learning_rate": 1.9992587009981828e-06, + "loss": 24.0938, + "step": 4426 + }, + { + "epoch": 0.04190607813254323, + "grad_norm": 643.7626953125, + "learning_rate": 1.9992575202620022e-06, + "loss": 38.7422, + "step": 4427 + }, + { + "epoch": 0.04191554415425829, + "grad_norm": 473.3150634765625, + "learning_rate": 1.9992563385865846e-06, + "loss": 27.0469, + "step": 4428 + }, + { + "epoch": 0.041925010175973346, + "grad_norm": 786.8938598632812, + "learning_rate": 1.99925515597193e-06, + "loss": 42.9141, + "step": 4429 + }, + { + "epoch": 0.0419344761976884, + "grad_norm": 210.9734344482422, + "learning_rate": 1.999253972418042e-06, + "loss": 26.625, + "step": 4430 + }, + { + "epoch": 0.04194394221940345, + "grad_norm": 612.1520385742188, + "learning_rate": 1.9992527879249193e-06, + "loss": 30.1094, + "step": 4431 + }, + { + "epoch": 0.041953408241118505, + "grad_norm": 298.0303039550781, + "learning_rate": 1.999251602492564e-06, + "loss": 28.3438, + "step": 4432 + }, + { + "epoch": 0.04196287426283356, + "grad_norm": 823.0400390625, + "learning_rate": 1.999250416120978e-06, + "loss": 53.0, + "step": 4433 + }, + { + "epoch": 0.04197234028454861, + "grad_norm": 454.8136901855469, + "learning_rate": 1.9992492288101613e-06, + "loss": 47.6719, + "step": 4434 + }, + { + "epoch": 0.041981806306263664, + "grad_norm": 473.9920959472656, + "learning_rate": 1.999248040560115e-06, + "loss": 21.2578, + "step": 4435 + }, + { + "epoch": 0.041991272327978724, + "grad_norm": 350.37896728515625, + "learning_rate": 1.999246851370841e-06, + "loss": 26.1406, + "step": 4436 + }, + { + "epoch": 0.04200073834969378, + "grad_norm": 360.17376708984375, + "learning_rate": 1.9992456612423393e-06, + "loss": 48.6562, + "step": 4437 + }, + { + "epoch": 0.04201020437140883, + "grad_norm": 285.65875244140625, + "learning_rate": 1.999244470174612e-06, + "loss": 31.5781, + "step": 4438 + }, + { + "epoch": 0.04201967039312388, + "grad_norm": 445.0261535644531, + "learning_rate": 1.99924327816766e-06, + "loss": 49.8438, + "step": 4439 + }, + { + "epoch": 0.042029136414838936, + "grad_norm": 452.4782409667969, + "learning_rate": 1.9992420852214842e-06, + "loss": 34.9375, + "step": 4440 + }, + { + "epoch": 0.04203860243655399, + "grad_norm": 674.9003295898438, + "learning_rate": 1.9992408913360862e-06, + "loss": 49.4688, + "step": 4441 + }, + { + "epoch": 0.04204806845826904, + "grad_norm": 331.1766052246094, + "learning_rate": 1.9992396965114663e-06, + "loss": 21.5859, + "step": 4442 + }, + { + "epoch": 0.042057534479984095, + "grad_norm": 910.8904418945312, + "learning_rate": 1.999238500747626e-06, + "loss": 67.2188, + "step": 4443 + }, + { + "epoch": 0.04206700050169915, + "grad_norm": 3.47652006149292, + "learning_rate": 1.999237304044567e-06, + "loss": 0.9858, + "step": 4444 + }, + { + "epoch": 0.04207646652341421, + "grad_norm": 302.6944580078125, + "learning_rate": 1.99923610640229e-06, + "loss": 32.3906, + "step": 4445 + }, + { + "epoch": 0.04208593254512926, + "grad_norm": 652.419677734375, + "learning_rate": 1.9992349078207956e-06, + "loss": 24.5781, + "step": 4446 + }, + { + "epoch": 0.042095398566844314, + "grad_norm": 265.12969970703125, + "learning_rate": 1.9992337083000853e-06, + "loss": 25.6875, + "step": 4447 + }, + { + "epoch": 0.04210486458855937, + "grad_norm": 355.6609802246094, + "learning_rate": 1.9992325078401607e-06, + "loss": 21.4375, + "step": 4448 + }, + { + "epoch": 0.04211433061027442, + "grad_norm": 395.69952392578125, + "learning_rate": 1.9992313064410226e-06, + "loss": 17.7578, + "step": 4449 + }, + { + "epoch": 0.04212379663198947, + "grad_norm": 262.7294921875, + "learning_rate": 1.999230104102672e-06, + "loss": 21.7266, + "step": 4450 + }, + { + "epoch": 0.042133262653704526, + "grad_norm": 964.8975219726562, + "learning_rate": 1.99922890082511e-06, + "loss": 49.25, + "step": 4451 + }, + { + "epoch": 0.04214272867541958, + "grad_norm": 362.2992858886719, + "learning_rate": 1.9992276966083377e-06, + "loss": 46.4688, + "step": 4452 + }, + { + "epoch": 0.04215219469713464, + "grad_norm": 250.45574951171875, + "learning_rate": 1.9992264914523566e-06, + "loss": 23.7344, + "step": 4453 + }, + { + "epoch": 0.04216166071884969, + "grad_norm": 1058.556884765625, + "learning_rate": 1.9992252853571675e-06, + "loss": 61.6328, + "step": 4454 + }, + { + "epoch": 0.042171126740564745, + "grad_norm": 3.6030843257904053, + "learning_rate": 1.999224078322772e-06, + "loss": 0.8398, + "step": 4455 + }, + { + "epoch": 0.0421805927622798, + "grad_norm": 254.81800842285156, + "learning_rate": 1.9992228703491703e-06, + "loss": 24.5781, + "step": 4456 + }, + { + "epoch": 0.04219005878399485, + "grad_norm": 448.5887451171875, + "learning_rate": 1.9992216614363642e-06, + "loss": 54.2031, + "step": 4457 + }, + { + "epoch": 0.042199524805709904, + "grad_norm": 408.8059997558594, + "learning_rate": 1.9992204515843554e-06, + "loss": 22.5469, + "step": 4458 + }, + { + "epoch": 0.04220899082742496, + "grad_norm": 397.5091247558594, + "learning_rate": 1.9992192407931436e-06, + "loss": 43.1719, + "step": 4459 + }, + { + "epoch": 0.04221845684914001, + "grad_norm": 444.2850341796875, + "learning_rate": 1.9992180290627308e-06, + "loss": 21.875, + "step": 4460 + }, + { + "epoch": 0.04222792287085506, + "grad_norm": 278.0747985839844, + "learning_rate": 1.9992168163931184e-06, + "loss": 37.2812, + "step": 4461 + }, + { + "epoch": 0.04223738889257012, + "grad_norm": 176.15139770507812, + "learning_rate": 1.999215602784307e-06, + "loss": 24.875, + "step": 4462 + }, + { + "epoch": 0.042246854914285176, + "grad_norm": 968.7682495117188, + "learning_rate": 1.999214388236298e-06, + "loss": 50.5781, + "step": 4463 + }, + { + "epoch": 0.04225632093600023, + "grad_norm": 243.7685546875, + "learning_rate": 1.9992131727490924e-06, + "loss": 25.7578, + "step": 4464 + }, + { + "epoch": 0.04226578695771528, + "grad_norm": 321.9754333496094, + "learning_rate": 1.9992119563226916e-06, + "loss": 24.2031, + "step": 4465 + }, + { + "epoch": 0.042275252979430335, + "grad_norm": 218.00486755371094, + "learning_rate": 1.9992107389570965e-06, + "loss": 30.4531, + "step": 4466 + }, + { + "epoch": 0.04228471900114539, + "grad_norm": 1358.9815673828125, + "learning_rate": 1.999209520652308e-06, + "loss": 59.9805, + "step": 4467 + }, + { + "epoch": 0.04229418502286044, + "grad_norm": 431.9957275390625, + "learning_rate": 1.9992083014083277e-06, + "loss": 35.7812, + "step": 4468 + }, + { + "epoch": 0.042303651044575494, + "grad_norm": 362.8274841308594, + "learning_rate": 1.999207081225157e-06, + "loss": 17.2969, + "step": 4469 + }, + { + "epoch": 0.04231311706629055, + "grad_norm": 236.18557739257812, + "learning_rate": 1.9992058601027963e-06, + "loss": 21.8906, + "step": 4470 + }, + { + "epoch": 0.04232258308800561, + "grad_norm": 777.0045166015625, + "learning_rate": 1.999204638041247e-06, + "loss": 41.5703, + "step": 4471 + }, + { + "epoch": 0.04233204910972066, + "grad_norm": 255.174072265625, + "learning_rate": 1.9992034150405107e-06, + "loss": 25.6719, + "step": 4472 + }, + { + "epoch": 0.04234151513143571, + "grad_norm": 547.6278686523438, + "learning_rate": 1.999202191100588e-06, + "loss": 37.7344, + "step": 4473 + }, + { + "epoch": 0.042350981153150766, + "grad_norm": 542.1714477539062, + "learning_rate": 1.9992009662214802e-06, + "loss": 26.2305, + "step": 4474 + }, + { + "epoch": 0.04236044717486582, + "grad_norm": 228.31687927246094, + "learning_rate": 1.9991997404031883e-06, + "loss": 23.125, + "step": 4475 + }, + { + "epoch": 0.04236991319658087, + "grad_norm": 504.2222595214844, + "learning_rate": 1.9991985136457142e-06, + "loss": 52.9219, + "step": 4476 + }, + { + "epoch": 0.042379379218295925, + "grad_norm": 3.0417490005493164, + "learning_rate": 1.999197285949058e-06, + "loss": 0.9863, + "step": 4477 + }, + { + "epoch": 0.04238884524001098, + "grad_norm": 571.474365234375, + "learning_rate": 1.9991960573132216e-06, + "loss": 47.4844, + "step": 4478 + }, + { + "epoch": 0.04239831126172604, + "grad_norm": 453.37255859375, + "learning_rate": 1.999194827738206e-06, + "loss": 57.1094, + "step": 4479 + }, + { + "epoch": 0.04240777728344109, + "grad_norm": 296.50775146484375, + "learning_rate": 1.999193597224012e-06, + "loss": 21.6875, + "step": 4480 + }, + { + "epoch": 0.042417243305156144, + "grad_norm": 245.28614807128906, + "learning_rate": 1.999192365770641e-06, + "loss": 22.5938, + "step": 4481 + }, + { + "epoch": 0.0424267093268712, + "grad_norm": 155.96205139160156, + "learning_rate": 1.9991911333780945e-06, + "loss": 23.875, + "step": 4482 + }, + { + "epoch": 0.04243617534858625, + "grad_norm": 271.29742431640625, + "learning_rate": 1.9991899000463726e-06, + "loss": 31.3125, + "step": 4483 + }, + { + "epoch": 0.0424456413703013, + "grad_norm": 456.900146484375, + "learning_rate": 1.999188665775478e-06, + "loss": 29.1406, + "step": 4484 + }, + { + "epoch": 0.042455107392016356, + "grad_norm": 233.85520935058594, + "learning_rate": 1.999187430565411e-06, + "loss": 22.6797, + "step": 4485 + }, + { + "epoch": 0.04246457341373141, + "grad_norm": 400.45684814453125, + "learning_rate": 1.9991861944161722e-06, + "loss": 24.9141, + "step": 4486 + }, + { + "epoch": 0.04247403943544646, + "grad_norm": 350.6846923828125, + "learning_rate": 1.999184957327764e-06, + "loss": 38.6406, + "step": 4487 + }, + { + "epoch": 0.04248350545716152, + "grad_norm": 701.9118041992188, + "learning_rate": 1.999183719300187e-06, + "loss": 31.6875, + "step": 4488 + }, + { + "epoch": 0.042492971478876575, + "grad_norm": 1477.4775390625, + "learning_rate": 1.9991824803334415e-06, + "loss": 89.9844, + "step": 4489 + }, + { + "epoch": 0.04250243750059163, + "grad_norm": 922.721923828125, + "learning_rate": 1.9991812404275305e-06, + "loss": 52.2344, + "step": 4490 + }, + { + "epoch": 0.04251190352230668, + "grad_norm": 3.14371395111084, + "learning_rate": 1.9991799995824536e-06, + "loss": 0.9141, + "step": 4491 + }, + { + "epoch": 0.042521369544021734, + "grad_norm": 503.524658203125, + "learning_rate": 1.9991787577982123e-06, + "loss": 43.4219, + "step": 4492 + }, + { + "epoch": 0.04253083556573679, + "grad_norm": 159.06707763671875, + "learning_rate": 1.999177515074808e-06, + "loss": 23.2344, + "step": 4493 + }, + { + "epoch": 0.04254030158745184, + "grad_norm": 889.537353515625, + "learning_rate": 1.9991762714122423e-06, + "loss": 25.5625, + "step": 4494 + }, + { + "epoch": 0.04254976760916689, + "grad_norm": 267.9351501464844, + "learning_rate": 1.9991750268105158e-06, + "loss": 23.2344, + "step": 4495 + }, + { + "epoch": 0.042559233630881946, + "grad_norm": 356.55194091796875, + "learning_rate": 1.999173781269629e-06, + "loss": 30.9531, + "step": 4496 + }, + { + "epoch": 0.042568699652597006, + "grad_norm": 1323.8984375, + "learning_rate": 1.9991725347895846e-06, + "loss": 68.8438, + "step": 4497 + }, + { + "epoch": 0.04257816567431206, + "grad_norm": 408.9671325683594, + "learning_rate": 1.999171287370383e-06, + "loss": 32.5391, + "step": 4498 + }, + { + "epoch": 0.04258763169602711, + "grad_norm": 337.346923828125, + "learning_rate": 1.9991700390120254e-06, + "loss": 36.1875, + "step": 4499 + }, + { + "epoch": 0.042597097717742165, + "grad_norm": 288.4118347167969, + "learning_rate": 1.999168789714513e-06, + "loss": 21.8984, + "step": 4500 + }, + { + "epoch": 0.04260656373945722, + "grad_norm": 326.4237060546875, + "learning_rate": 1.9991675394778464e-06, + "loss": 42.3594, + "step": 4501 + }, + { + "epoch": 0.04261602976117227, + "grad_norm": 581.9338989257812, + "learning_rate": 1.9991662883020278e-06, + "loss": 28.2031, + "step": 4502 + }, + { + "epoch": 0.042625495782887324, + "grad_norm": 529.5408935546875, + "learning_rate": 1.999165036187058e-06, + "loss": 55.9141, + "step": 4503 + }, + { + "epoch": 0.04263496180460238, + "grad_norm": 276.9686584472656, + "learning_rate": 1.9991637831329375e-06, + "loss": 30.625, + "step": 4504 + }, + { + "epoch": 0.04264442782631744, + "grad_norm": 301.5123596191406, + "learning_rate": 1.9991625291396687e-06, + "loss": 34.1328, + "step": 4505 + }, + { + "epoch": 0.04265389384803249, + "grad_norm": 323.87310791015625, + "learning_rate": 1.9991612742072514e-06, + "loss": 21.5781, + "step": 4506 + }, + { + "epoch": 0.04266335986974754, + "grad_norm": 580.73291015625, + "learning_rate": 1.999160018335688e-06, + "loss": 32.2812, + "step": 4507 + }, + { + "epoch": 0.042672825891462596, + "grad_norm": 276.14825439453125, + "learning_rate": 1.9991587615249792e-06, + "loss": 29.2188, + "step": 4508 + }, + { + "epoch": 0.04268229191317765, + "grad_norm": 1146.98779296875, + "learning_rate": 1.999157503775126e-06, + "loss": 67.6094, + "step": 4509 + }, + { + "epoch": 0.0426917579348927, + "grad_norm": 361.0499267578125, + "learning_rate": 1.9991562450861295e-06, + "loss": 21.3125, + "step": 4510 + }, + { + "epoch": 0.042701223956607755, + "grad_norm": 233.27719116210938, + "learning_rate": 1.9991549854579915e-06, + "loss": 25.8906, + "step": 4511 + }, + { + "epoch": 0.04271068997832281, + "grad_norm": 393.32611083984375, + "learning_rate": 1.999153724890713e-06, + "loss": 28.5938, + "step": 4512 + }, + { + "epoch": 0.04272015600003786, + "grad_norm": 577.8670654296875, + "learning_rate": 1.9991524633842945e-06, + "loss": 54.4219, + "step": 4513 + }, + { + "epoch": 0.04272962202175292, + "grad_norm": 338.1861877441406, + "learning_rate": 1.999151200938738e-06, + "loss": 24.9375, + "step": 4514 + }, + { + "epoch": 0.042739088043467974, + "grad_norm": 207.7797088623047, + "learning_rate": 1.9991499375540445e-06, + "loss": 27.2969, + "step": 4515 + }, + { + "epoch": 0.04274855406518303, + "grad_norm": 442.0766296386719, + "learning_rate": 1.9991486732302146e-06, + "loss": 64.5156, + "step": 4516 + }, + { + "epoch": 0.04275802008689808, + "grad_norm": 546.8529052734375, + "learning_rate": 1.9991474079672504e-06, + "loss": 30.0781, + "step": 4517 + }, + { + "epoch": 0.04276748610861313, + "grad_norm": 631.74609375, + "learning_rate": 1.9991461417651524e-06, + "loss": 51.2812, + "step": 4518 + }, + { + "epoch": 0.042776952130328186, + "grad_norm": 3.7441279888153076, + "learning_rate": 1.999144874623922e-06, + "loss": 0.9434, + "step": 4519 + }, + { + "epoch": 0.04278641815204324, + "grad_norm": 238.37525939941406, + "learning_rate": 1.9991436065435606e-06, + "loss": 24.8281, + "step": 4520 + }, + { + "epoch": 0.04279588417375829, + "grad_norm": 272.838623046875, + "learning_rate": 1.999142337524069e-06, + "loss": 26.4453, + "step": 4521 + }, + { + "epoch": 0.04280535019547335, + "grad_norm": 325.722900390625, + "learning_rate": 1.9991410675654483e-06, + "loss": 27.4844, + "step": 4522 + }, + { + "epoch": 0.042814816217188405, + "grad_norm": 208.0537567138672, + "learning_rate": 1.9991397966677004e-06, + "loss": 22.4531, + "step": 4523 + }, + { + "epoch": 0.04282428223890346, + "grad_norm": 236.81649780273438, + "learning_rate": 1.9991385248308263e-06, + "loss": 24.8125, + "step": 4524 + }, + { + "epoch": 0.04283374826061851, + "grad_norm": 322.5990295410156, + "learning_rate": 1.999137252054827e-06, + "loss": 27.0, + "step": 4525 + }, + { + "epoch": 0.042843214282333564, + "grad_norm": 403.4289855957031, + "learning_rate": 1.9991359783397033e-06, + "loss": 40.0547, + "step": 4526 + }, + { + "epoch": 0.04285268030404862, + "grad_norm": 296.2005615234375, + "learning_rate": 1.999134703685457e-06, + "loss": 21.3164, + "step": 4527 + }, + { + "epoch": 0.04286214632576367, + "grad_norm": 417.1848449707031, + "learning_rate": 1.9991334280920888e-06, + "loss": 20.5156, + "step": 4528 + }, + { + "epoch": 0.04287161234747872, + "grad_norm": 481.565673828125, + "learning_rate": 1.9991321515596003e-06, + "loss": 23.9766, + "step": 4529 + }, + { + "epoch": 0.042881078369193776, + "grad_norm": 867.62890625, + "learning_rate": 1.9991308740879928e-06, + "loss": 62.8594, + "step": 4530 + }, + { + "epoch": 0.042890544390908836, + "grad_norm": 208.0439910888672, + "learning_rate": 1.9991295956772667e-06, + "loss": 31.25, + "step": 4531 + }, + { + "epoch": 0.04290001041262389, + "grad_norm": 424.4955139160156, + "learning_rate": 1.9991283163274243e-06, + "loss": 47.1406, + "step": 4532 + }, + { + "epoch": 0.04290947643433894, + "grad_norm": 368.1078796386719, + "learning_rate": 1.9991270360384666e-06, + "loss": 23.6914, + "step": 4533 + }, + { + "epoch": 0.042918942456053995, + "grad_norm": 919.9414672851562, + "learning_rate": 1.9991257548103938e-06, + "loss": 93.2734, + "step": 4534 + }, + { + "epoch": 0.04292840847776905, + "grad_norm": 260.6031494140625, + "learning_rate": 1.9991244726432083e-06, + "loss": 26.5, + "step": 4535 + }, + { + "epoch": 0.0429378744994841, + "grad_norm": 534.1109008789062, + "learning_rate": 1.9991231895369107e-06, + "loss": 27.6406, + "step": 4536 + }, + { + "epoch": 0.042947340521199154, + "grad_norm": 682.5186767578125, + "learning_rate": 1.9991219054915017e-06, + "loss": 46.2578, + "step": 4537 + }, + { + "epoch": 0.04295680654291421, + "grad_norm": 397.214111328125, + "learning_rate": 1.9991206205069834e-06, + "loss": 42.375, + "step": 4538 + }, + { + "epoch": 0.04296627256462926, + "grad_norm": 249.9923858642578, + "learning_rate": 1.999119334583357e-06, + "loss": 25.7344, + "step": 4539 + }, + { + "epoch": 0.04297573858634432, + "grad_norm": 791.0979614257812, + "learning_rate": 1.9991180477206234e-06, + "loss": 50.0469, + "step": 4540 + }, + { + "epoch": 0.04298520460805937, + "grad_norm": 510.8815002441406, + "learning_rate": 1.999116759918784e-06, + "loss": 51.5625, + "step": 4541 + }, + { + "epoch": 0.042994670629774426, + "grad_norm": 360.553955078125, + "learning_rate": 1.999115471177839e-06, + "loss": 30.1719, + "step": 4542 + }, + { + "epoch": 0.04300413665148948, + "grad_norm": 197.01373291015625, + "learning_rate": 1.9991141814977912e-06, + "loss": 25.4219, + "step": 4543 + }, + { + "epoch": 0.04301360267320453, + "grad_norm": 791.6771850585938, + "learning_rate": 1.999112890878641e-06, + "loss": 42.6094, + "step": 4544 + }, + { + "epoch": 0.043023068694919585, + "grad_norm": 2.9372646808624268, + "learning_rate": 1.9991115993203892e-06, + "loss": 0.9067, + "step": 4545 + }, + { + "epoch": 0.04303253471663464, + "grad_norm": 414.1605529785156, + "learning_rate": 1.9991103068230377e-06, + "loss": 31.9219, + "step": 4546 + }, + { + "epoch": 0.04304200073834969, + "grad_norm": 600.2257080078125, + "learning_rate": 1.999109013386588e-06, + "loss": 58.7969, + "step": 4547 + }, + { + "epoch": 0.04305146676006475, + "grad_norm": 321.1924133300781, + "learning_rate": 1.9991077190110404e-06, + "loss": 22.3125, + "step": 4548 + }, + { + "epoch": 0.043060932781779804, + "grad_norm": 1358.6087646484375, + "learning_rate": 1.9991064236963968e-06, + "loss": 57.3125, + "step": 4549 + }, + { + "epoch": 0.04307039880349486, + "grad_norm": 1433.7969970703125, + "learning_rate": 1.999105127442658e-06, + "loss": 72.375, + "step": 4550 + }, + { + "epoch": 0.04307986482520991, + "grad_norm": 311.6475830078125, + "learning_rate": 1.999103830249825e-06, + "loss": 32.7734, + "step": 4551 + }, + { + "epoch": 0.04308933084692496, + "grad_norm": 1732.8831787109375, + "learning_rate": 1.9991025321179e-06, + "loss": 51.6797, + "step": 4552 + }, + { + "epoch": 0.043098796868640016, + "grad_norm": 814.1118774414062, + "learning_rate": 1.9991012330468835e-06, + "loss": 38.9844, + "step": 4553 + }, + { + "epoch": 0.04310826289035507, + "grad_norm": 930.5918579101562, + "learning_rate": 1.999099933036776e-06, + "loss": 25.8984, + "step": 4554 + }, + { + "epoch": 0.04311772891207012, + "grad_norm": 299.3908386230469, + "learning_rate": 1.999098632087581e-06, + "loss": 20.8672, + "step": 4555 + }, + { + "epoch": 0.043127194933785175, + "grad_norm": 283.8505554199219, + "learning_rate": 1.9990973301992974e-06, + "loss": 29.0312, + "step": 4556 + }, + { + "epoch": 0.043136660955500235, + "grad_norm": 279.1064453125, + "learning_rate": 1.9990960273719277e-06, + "loss": 23.625, + "step": 4557 + }, + { + "epoch": 0.04314612697721529, + "grad_norm": 812.0074462890625, + "learning_rate": 1.9990947236054724e-06, + "loss": 60.8906, + "step": 4558 + }, + { + "epoch": 0.04315559299893034, + "grad_norm": 1061.6507568359375, + "learning_rate": 1.999093418899933e-06, + "loss": 45.5469, + "step": 4559 + }, + { + "epoch": 0.043165059020645394, + "grad_norm": 336.2097473144531, + "learning_rate": 1.999092113255311e-06, + "loss": 33.375, + "step": 4560 + }, + { + "epoch": 0.04317452504236045, + "grad_norm": 393.3794250488281, + "learning_rate": 1.999090806671607e-06, + "loss": 29.1797, + "step": 4561 + }, + { + "epoch": 0.0431839910640755, + "grad_norm": 423.390625, + "learning_rate": 1.999089499148823e-06, + "loss": 26.2109, + "step": 4562 + }, + { + "epoch": 0.04319345708579055, + "grad_norm": 404.48382568359375, + "learning_rate": 1.99908819068696e-06, + "loss": 24.7812, + "step": 4563 + }, + { + "epoch": 0.043202923107505606, + "grad_norm": 609.75390625, + "learning_rate": 1.9990868812860187e-06, + "loss": 42.7031, + "step": 4564 + }, + { + "epoch": 0.043212389129220666, + "grad_norm": 647.4332275390625, + "learning_rate": 1.999085570946001e-06, + "loss": 54.6562, + "step": 4565 + }, + { + "epoch": 0.04322185515093572, + "grad_norm": 409.1749267578125, + "learning_rate": 1.999084259666908e-06, + "loss": 41.0938, + "step": 4566 + }, + { + "epoch": 0.04323132117265077, + "grad_norm": 793.9755859375, + "learning_rate": 1.9990829474487404e-06, + "loss": 67.4375, + "step": 4567 + }, + { + "epoch": 0.043240787194365825, + "grad_norm": 938.6893920898438, + "learning_rate": 1.9990816342915e-06, + "loss": 43.3125, + "step": 4568 + }, + { + "epoch": 0.04325025321608088, + "grad_norm": 171.53858947753906, + "learning_rate": 1.999080320195188e-06, + "loss": 24.7656, + "step": 4569 + }, + { + "epoch": 0.04325971923779593, + "grad_norm": 181.8933563232422, + "learning_rate": 1.9990790051598055e-06, + "loss": 22.6875, + "step": 4570 + }, + { + "epoch": 0.043269185259510984, + "grad_norm": 524.4183349609375, + "learning_rate": 1.999077689185353e-06, + "loss": 36.0469, + "step": 4571 + }, + { + "epoch": 0.04327865128122604, + "grad_norm": 1159.501953125, + "learning_rate": 1.9990763722718335e-06, + "loss": 29.5312, + "step": 4572 + }, + { + "epoch": 0.04328811730294109, + "grad_norm": 3.701143264770508, + "learning_rate": 1.9990750544192466e-06, + "loss": 0.9385, + "step": 4573 + }, + { + "epoch": 0.04329758332465615, + "grad_norm": 345.0570373535156, + "learning_rate": 1.9990737356275944e-06, + "loss": 28.1094, + "step": 4574 + }, + { + "epoch": 0.0433070493463712, + "grad_norm": 2.64680814743042, + "learning_rate": 1.9990724158968775e-06, + "loss": 0.772, + "step": 4575 + }, + { + "epoch": 0.043316515368086256, + "grad_norm": 292.7619323730469, + "learning_rate": 1.9990710952270983e-06, + "loss": 34.5, + "step": 4576 + }, + { + "epoch": 0.04332598138980131, + "grad_norm": 1110.42724609375, + "learning_rate": 1.9990697736182565e-06, + "loss": 79.5156, + "step": 4577 + }, + { + "epoch": 0.04333544741151636, + "grad_norm": 291.5158996582031, + "learning_rate": 1.9990684510703546e-06, + "loss": 23.6094, + "step": 4578 + }, + { + "epoch": 0.043344913433231415, + "grad_norm": 310.3685302734375, + "learning_rate": 1.999067127583393e-06, + "loss": 32.7188, + "step": 4579 + }, + { + "epoch": 0.04335437945494647, + "grad_norm": 405.0980224609375, + "learning_rate": 1.9990658031573734e-06, + "loss": 49.9062, + "step": 4580 + }, + { + "epoch": 0.04336384547666152, + "grad_norm": 667.5455932617188, + "learning_rate": 1.999064477792297e-06, + "loss": 27.6016, + "step": 4581 + }, + { + "epoch": 0.043373311498376574, + "grad_norm": 279.98175048828125, + "learning_rate": 1.999063151488165e-06, + "loss": 31.8438, + "step": 4582 + }, + { + "epoch": 0.043382777520091634, + "grad_norm": 625.1840209960938, + "learning_rate": 1.9990618242449785e-06, + "loss": 63.7344, + "step": 4583 + }, + { + "epoch": 0.04339224354180669, + "grad_norm": 1308.4615478515625, + "learning_rate": 1.999060496062739e-06, + "loss": 60.75, + "step": 4584 + }, + { + "epoch": 0.04340170956352174, + "grad_norm": 305.14569091796875, + "learning_rate": 1.9990591669414474e-06, + "loss": 21.8828, + "step": 4585 + }, + { + "epoch": 0.04341117558523679, + "grad_norm": 508.3394470214844, + "learning_rate": 1.9990578368811057e-06, + "loss": 41.7812, + "step": 4586 + }, + { + "epoch": 0.043420641606951846, + "grad_norm": 643.6275024414062, + "learning_rate": 1.999056505881714e-06, + "loss": 56.1094, + "step": 4587 + }, + { + "epoch": 0.0434301076286669, + "grad_norm": 356.34478759765625, + "learning_rate": 1.9990551739432745e-06, + "loss": 47.5312, + "step": 4588 + }, + { + "epoch": 0.04343957365038195, + "grad_norm": 423.7814636230469, + "learning_rate": 1.9990538410657884e-06, + "loss": 22.5859, + "step": 4589 + }, + { + "epoch": 0.043449039672097005, + "grad_norm": 483.5542297363281, + "learning_rate": 1.9990525072492562e-06, + "loss": 28.7109, + "step": 4590 + }, + { + "epoch": 0.043458505693812065, + "grad_norm": 780.8350219726562, + "learning_rate": 1.99905117249368e-06, + "loss": 19.8203, + "step": 4591 + }, + { + "epoch": 0.04346797171552712, + "grad_norm": 533.1849975585938, + "learning_rate": 1.9990498367990606e-06, + "loss": 42.7812, + "step": 4592 + }, + { + "epoch": 0.04347743773724217, + "grad_norm": 487.14422607421875, + "learning_rate": 1.999048500165399e-06, + "loss": 46.75, + "step": 4593 + }, + { + "epoch": 0.043486903758957224, + "grad_norm": 324.9021301269531, + "learning_rate": 1.999047162592697e-06, + "loss": 33.9844, + "step": 4594 + }, + { + "epoch": 0.04349636978067228, + "grad_norm": 2.7335622310638428, + "learning_rate": 1.9990458240809556e-06, + "loss": 1.0059, + "step": 4595 + }, + { + "epoch": 0.04350583580238733, + "grad_norm": 2.9918503761291504, + "learning_rate": 1.9990444846301763e-06, + "loss": 0.8687, + "step": 4596 + }, + { + "epoch": 0.04351530182410238, + "grad_norm": 290.5342102050781, + "learning_rate": 1.9990431442403605e-06, + "loss": 20.7188, + "step": 4597 + }, + { + "epoch": 0.043524767845817436, + "grad_norm": 387.83148193359375, + "learning_rate": 1.9990418029115083e-06, + "loss": 24.1484, + "step": 4598 + }, + { + "epoch": 0.04353423386753249, + "grad_norm": 202.5797576904297, + "learning_rate": 1.9990404606436223e-06, + "loss": 22.1719, + "step": 4599 + }, + { + "epoch": 0.04354369988924755, + "grad_norm": 203.2494659423828, + "learning_rate": 1.9990391174367033e-06, + "loss": 29.5078, + "step": 4600 + }, + { + "epoch": 0.0435531659109626, + "grad_norm": 497.4156188964844, + "learning_rate": 1.9990377732907525e-06, + "loss": 45.2812, + "step": 4601 + }, + { + "epoch": 0.043562631932677655, + "grad_norm": 349.16070556640625, + "learning_rate": 1.999036428205771e-06, + "loss": 25.0156, + "step": 4602 + }, + { + "epoch": 0.04357209795439271, + "grad_norm": 283.7783203125, + "learning_rate": 1.9990350821817604e-06, + "loss": 22.3281, + "step": 4603 + }, + { + "epoch": 0.04358156397610776, + "grad_norm": 602.7009887695312, + "learning_rate": 1.9990337352187216e-06, + "loss": 41.4141, + "step": 4604 + }, + { + "epoch": 0.043591029997822814, + "grad_norm": 611.7061157226562, + "learning_rate": 1.9990323873166566e-06, + "loss": 42.75, + "step": 4605 + }, + { + "epoch": 0.04360049601953787, + "grad_norm": 188.66864013671875, + "learning_rate": 1.9990310384755658e-06, + "loss": 23.0625, + "step": 4606 + }, + { + "epoch": 0.04360996204125292, + "grad_norm": 706.3943481445312, + "learning_rate": 1.9990296886954504e-06, + "loss": 65.7344, + "step": 4607 + }, + { + "epoch": 0.04361942806296798, + "grad_norm": 282.3215637207031, + "learning_rate": 1.9990283379763127e-06, + "loss": 25.0781, + "step": 4608 + }, + { + "epoch": 0.04362889408468303, + "grad_norm": 376.2720031738281, + "learning_rate": 1.9990269863181533e-06, + "loss": 41.875, + "step": 4609 + }, + { + "epoch": 0.043638360106398086, + "grad_norm": 421.55670166015625, + "learning_rate": 1.9990256337209732e-06, + "loss": 28.125, + "step": 4610 + }, + { + "epoch": 0.04364782612811314, + "grad_norm": 485.9654235839844, + "learning_rate": 1.999024280184774e-06, + "loss": 40.8594, + "step": 4611 + }, + { + "epoch": 0.04365729214982819, + "grad_norm": 716.7913208007812, + "learning_rate": 1.9990229257095573e-06, + "loss": 54.5156, + "step": 4612 + }, + { + "epoch": 0.043666758171543245, + "grad_norm": 825.0210571289062, + "learning_rate": 1.9990215702953243e-06, + "loss": 27.6172, + "step": 4613 + }, + { + "epoch": 0.0436762241932583, + "grad_norm": 1013.9514770507812, + "learning_rate": 1.9990202139420753e-06, + "loss": 74.9531, + "step": 4614 + }, + { + "epoch": 0.04368569021497335, + "grad_norm": 450.6729736328125, + "learning_rate": 1.999018856649813e-06, + "loss": 39.5938, + "step": 4615 + }, + { + "epoch": 0.043695156236688404, + "grad_norm": 513.8416748046875, + "learning_rate": 1.999017498418537e-06, + "loss": 61.0156, + "step": 4616 + }, + { + "epoch": 0.043704622258403464, + "grad_norm": 216.2947235107422, + "learning_rate": 1.99901613924825e-06, + "loss": 21.2656, + "step": 4617 + }, + { + "epoch": 0.04371408828011852, + "grad_norm": 394.2510070800781, + "learning_rate": 1.999014779138953e-06, + "loss": 47.1641, + "step": 4618 + }, + { + "epoch": 0.04372355430183357, + "grad_norm": 3.6238012313842773, + "learning_rate": 1.999013418090647e-06, + "loss": 1.0161, + "step": 4619 + }, + { + "epoch": 0.04373302032354862, + "grad_norm": 209.38743591308594, + "learning_rate": 1.9990120561033333e-06, + "loss": 25.2031, + "step": 4620 + }, + { + "epoch": 0.043742486345263676, + "grad_norm": 236.1110382080078, + "learning_rate": 1.9990106931770138e-06, + "loss": 24.4844, + "step": 4621 + }, + { + "epoch": 0.04375195236697873, + "grad_norm": 796.129638671875, + "learning_rate": 1.9990093293116883e-06, + "loss": 30.75, + "step": 4622 + }, + { + "epoch": 0.04376141838869378, + "grad_norm": 266.05755615234375, + "learning_rate": 1.99900796450736e-06, + "loss": 26.1875, + "step": 4623 + }, + { + "epoch": 0.043770884410408835, + "grad_norm": 1245.3095703125, + "learning_rate": 1.9990065987640286e-06, + "loss": 86.2812, + "step": 4624 + }, + { + "epoch": 0.04378035043212389, + "grad_norm": 213.57850646972656, + "learning_rate": 1.999005232081696e-06, + "loss": 25.0625, + "step": 4625 + }, + { + "epoch": 0.04378981645383895, + "grad_norm": 257.6995849609375, + "learning_rate": 1.9990038644603636e-06, + "loss": 26.5703, + "step": 4626 + }, + { + "epoch": 0.043799282475554, + "grad_norm": 2.8526198863983154, + "learning_rate": 1.9990024959000323e-06, + "loss": 0.9038, + "step": 4627 + }, + { + "epoch": 0.043808748497269054, + "grad_norm": 189.022705078125, + "learning_rate": 1.9990011264007037e-06, + "loss": 27.9219, + "step": 4628 + }, + { + "epoch": 0.04381821451898411, + "grad_norm": 747.3338012695312, + "learning_rate": 1.9989997559623797e-06, + "loss": 30.3281, + "step": 4629 + }, + { + "epoch": 0.04382768054069916, + "grad_norm": 3.5929980278015137, + "learning_rate": 1.9989983845850604e-06, + "loss": 0.8794, + "step": 4630 + }, + { + "epoch": 0.04383714656241421, + "grad_norm": 639.0078735351562, + "learning_rate": 1.9989970122687475e-06, + "loss": 37.7734, + "step": 4631 + }, + { + "epoch": 0.043846612584129266, + "grad_norm": 220.7720184326172, + "learning_rate": 1.998995639013442e-06, + "loss": 28.75, + "step": 4632 + }, + { + "epoch": 0.04385607860584432, + "grad_norm": 543.9453735351562, + "learning_rate": 1.9989942648191467e-06, + "loss": 45.9805, + "step": 4633 + }, + { + "epoch": 0.04386554462755938, + "grad_norm": 695.1509399414062, + "learning_rate": 1.998992889685861e-06, + "loss": 24.2109, + "step": 4634 + }, + { + "epoch": 0.04387501064927443, + "grad_norm": 291.5687561035156, + "learning_rate": 1.998991513613587e-06, + "loss": 23.3906, + "step": 4635 + }, + { + "epoch": 0.043884476670989485, + "grad_norm": 1378.4710693359375, + "learning_rate": 1.9989901366023265e-06, + "loss": 44.5938, + "step": 4636 + }, + { + "epoch": 0.04389394269270454, + "grad_norm": 407.33343505859375, + "learning_rate": 1.9989887586520798e-06, + "loss": 53.5, + "step": 4637 + }, + { + "epoch": 0.04390340871441959, + "grad_norm": 2.8792850971221924, + "learning_rate": 1.9989873797628488e-06, + "loss": 0.8442, + "step": 4638 + }, + { + "epoch": 0.043912874736134644, + "grad_norm": 3.3069965839385986, + "learning_rate": 1.9989859999346343e-06, + "loss": 1.0176, + "step": 4639 + }, + { + "epoch": 0.0439223407578497, + "grad_norm": 745.7233276367188, + "learning_rate": 1.998984619167438e-06, + "loss": 75.9844, + "step": 4640 + }, + { + "epoch": 0.04393180677956475, + "grad_norm": 413.80999755859375, + "learning_rate": 1.9989832374612614e-06, + "loss": 54.625, + "step": 4641 + }, + { + "epoch": 0.0439412728012798, + "grad_norm": 2.845954179763794, + "learning_rate": 1.998981854816106e-06, + "loss": 0.8521, + "step": 4642 + }, + { + "epoch": 0.04395073882299486, + "grad_norm": 479.3583984375, + "learning_rate": 1.998980471231972e-06, + "loss": 36.3438, + "step": 4643 + }, + { + "epoch": 0.043960204844709916, + "grad_norm": 459.5681457519531, + "learning_rate": 1.9989790867088615e-06, + "loss": 24.8594, + "step": 4644 + }, + { + "epoch": 0.04396967086642497, + "grad_norm": 521.2114868164062, + "learning_rate": 1.9989777012467756e-06, + "loss": 26.2344, + "step": 4645 + }, + { + "epoch": 0.04397913688814002, + "grad_norm": 344.7895202636719, + "learning_rate": 1.998976314845716e-06, + "loss": 34.8906, + "step": 4646 + }, + { + "epoch": 0.043988602909855075, + "grad_norm": 430.0755615234375, + "learning_rate": 1.9989749275056834e-06, + "loss": 49.1719, + "step": 4647 + }, + { + "epoch": 0.04399806893157013, + "grad_norm": 284.45013427734375, + "learning_rate": 1.9989735392266792e-06, + "loss": 28.0312, + "step": 4648 + }, + { + "epoch": 0.04400753495328518, + "grad_norm": 261.3016357421875, + "learning_rate": 1.9989721500087048e-06, + "loss": 31.7969, + "step": 4649 + }, + { + "epoch": 0.044017000975000234, + "grad_norm": 787.9575805664062, + "learning_rate": 1.9989707598517617e-06, + "loss": 35.3828, + "step": 4650 + }, + { + "epoch": 0.044026466996715294, + "grad_norm": 267.0346374511719, + "learning_rate": 1.9989693687558512e-06, + "loss": 23.625, + "step": 4651 + }, + { + "epoch": 0.04403593301843035, + "grad_norm": 2.9852328300476074, + "learning_rate": 1.9989679767209747e-06, + "loss": 0.7939, + "step": 4652 + }, + { + "epoch": 0.0440453990401454, + "grad_norm": 3.899380683898926, + "learning_rate": 1.998966583747133e-06, + "loss": 0.8748, + "step": 4653 + }, + { + "epoch": 0.04405486506186045, + "grad_norm": 357.708984375, + "learning_rate": 1.9989651898343275e-06, + "loss": 47.8281, + "step": 4654 + }, + { + "epoch": 0.044064331083575506, + "grad_norm": 233.02635192871094, + "learning_rate": 1.99896379498256e-06, + "loss": 24.5938, + "step": 4655 + }, + { + "epoch": 0.04407379710529056, + "grad_norm": 300.4920654296875, + "learning_rate": 1.9989623991918313e-06, + "loss": 25.3906, + "step": 4656 + }, + { + "epoch": 0.04408326312700561, + "grad_norm": 662.3690795898438, + "learning_rate": 1.998961002462143e-06, + "loss": 40.7031, + "step": 4657 + }, + { + "epoch": 0.044092729148720665, + "grad_norm": 238.47482299804688, + "learning_rate": 1.9989596047934965e-06, + "loss": 29.5156, + "step": 4658 + }, + { + "epoch": 0.04410219517043572, + "grad_norm": 217.72573852539062, + "learning_rate": 1.998958206185893e-06, + "loss": 20.5938, + "step": 4659 + }, + { + "epoch": 0.04411166119215078, + "grad_norm": 236.90574645996094, + "learning_rate": 1.9989568066393333e-06, + "loss": 25.4844, + "step": 4660 + }, + { + "epoch": 0.04412112721386583, + "grad_norm": 185.92510986328125, + "learning_rate": 1.9989554061538196e-06, + "loss": 24.8359, + "step": 4661 + }, + { + "epoch": 0.044130593235580884, + "grad_norm": 444.87554931640625, + "learning_rate": 1.998954004729353e-06, + "loss": 19.0352, + "step": 4662 + }, + { + "epoch": 0.04414005925729594, + "grad_norm": 2.925123691558838, + "learning_rate": 1.998952602365934e-06, + "loss": 0.9385, + "step": 4663 + }, + { + "epoch": 0.04414952527901099, + "grad_norm": 979.5089721679688, + "learning_rate": 1.998951199063565e-06, + "loss": 44.1641, + "step": 4664 + }, + { + "epoch": 0.04415899130072604, + "grad_norm": 268.52423095703125, + "learning_rate": 1.9989497948222467e-06, + "loss": 29.9688, + "step": 4665 + }, + { + "epoch": 0.044168457322441096, + "grad_norm": 460.07366943359375, + "learning_rate": 1.998948389641981e-06, + "loss": 41.0, + "step": 4666 + }, + { + "epoch": 0.04417792334415615, + "grad_norm": 625.9583740234375, + "learning_rate": 1.9989469835227683e-06, + "loss": 41.3594, + "step": 4667 + }, + { + "epoch": 0.0441873893658712, + "grad_norm": 773.0640258789062, + "learning_rate": 1.99894557646461e-06, + "loss": 37.8359, + "step": 4668 + }, + { + "epoch": 0.04419685538758626, + "grad_norm": 397.5743713378906, + "learning_rate": 1.9989441684675086e-06, + "loss": 20.8711, + "step": 4669 + }, + { + "epoch": 0.044206321409301315, + "grad_norm": 472.1509704589844, + "learning_rate": 1.9989427595314647e-06, + "loss": 58.3438, + "step": 4670 + }, + { + "epoch": 0.04421578743101637, + "grad_norm": 715.871826171875, + "learning_rate": 1.998941349656479e-06, + "loss": 23.3281, + "step": 4671 + }, + { + "epoch": 0.04422525345273142, + "grad_norm": 583.1936645507812, + "learning_rate": 1.9989399388425537e-06, + "loss": 43.9609, + "step": 4672 + }, + { + "epoch": 0.044234719474446474, + "grad_norm": 275.5933837890625, + "learning_rate": 1.99893852708969e-06, + "loss": 22.2188, + "step": 4673 + }, + { + "epoch": 0.04424418549616153, + "grad_norm": 771.736572265625, + "learning_rate": 1.998937114397889e-06, + "loss": 19.5078, + "step": 4674 + }, + { + "epoch": 0.04425365151787658, + "grad_norm": 463.2862854003906, + "learning_rate": 1.998935700767152e-06, + "loss": 29.0469, + "step": 4675 + }, + { + "epoch": 0.04426311753959163, + "grad_norm": 207.92591857910156, + "learning_rate": 1.9989342861974808e-06, + "loss": 20.4219, + "step": 4676 + }, + { + "epoch": 0.04427258356130669, + "grad_norm": 332.8160400390625, + "learning_rate": 1.998932870688876e-06, + "loss": 24.3203, + "step": 4677 + }, + { + "epoch": 0.044282049583021746, + "grad_norm": 312.20440673828125, + "learning_rate": 1.998931454241339e-06, + "loss": 21.5703, + "step": 4678 + }, + { + "epoch": 0.0442915156047368, + "grad_norm": 279.5443115234375, + "learning_rate": 1.998930036854872e-06, + "loss": 35.7031, + "step": 4679 + }, + { + "epoch": 0.04430098162645185, + "grad_norm": 261.0059814453125, + "learning_rate": 1.9989286185294755e-06, + "loss": 31.0469, + "step": 4680 + }, + { + "epoch": 0.044310447648166905, + "grad_norm": 228.44863891601562, + "learning_rate": 1.998927199265151e-06, + "loss": 27.6875, + "step": 4681 + }, + { + "epoch": 0.04431991366988196, + "grad_norm": 313.5419616699219, + "learning_rate": 1.9989257790619e-06, + "loss": 27.875, + "step": 4682 + }, + { + "epoch": 0.04432937969159701, + "grad_norm": 431.52392578125, + "learning_rate": 1.998924357919724e-06, + "loss": 28.3203, + "step": 4683 + }, + { + "epoch": 0.044338845713312064, + "grad_norm": 3.209986448287964, + "learning_rate": 1.9989229358386235e-06, + "loss": 0.8555, + "step": 4684 + }, + { + "epoch": 0.04434831173502712, + "grad_norm": 187.57301330566406, + "learning_rate": 1.998921512818601e-06, + "loss": 28.3281, + "step": 4685 + }, + { + "epoch": 0.04435777775674218, + "grad_norm": 711.5709228515625, + "learning_rate": 1.9989200888596568e-06, + "loss": 58.0312, + "step": 4686 + }, + { + "epoch": 0.04436724377845723, + "grad_norm": 409.4300231933594, + "learning_rate": 1.9989186639617933e-06, + "loss": 36.1562, + "step": 4687 + }, + { + "epoch": 0.04437670980017228, + "grad_norm": 254.3749542236328, + "learning_rate": 1.9989172381250107e-06, + "loss": 25.3906, + "step": 4688 + }, + { + "epoch": 0.044386175821887336, + "grad_norm": 413.6383361816406, + "learning_rate": 1.9989158113493108e-06, + "loss": 19.3438, + "step": 4689 + }, + { + "epoch": 0.04439564184360239, + "grad_norm": 689.552001953125, + "learning_rate": 1.9989143836346956e-06, + "loss": 25.1016, + "step": 4690 + }, + { + "epoch": 0.04440510786531744, + "grad_norm": 530.51513671875, + "learning_rate": 1.9989129549811655e-06, + "loss": 25.8125, + "step": 4691 + }, + { + "epoch": 0.044414573887032495, + "grad_norm": 708.2060546875, + "learning_rate": 1.9989115253887223e-06, + "loss": 48.8047, + "step": 4692 + }, + { + "epoch": 0.04442403990874755, + "grad_norm": 408.46923828125, + "learning_rate": 1.998910094857367e-06, + "loss": 31.0312, + "step": 4693 + }, + { + "epoch": 0.04443350593046261, + "grad_norm": 383.8424072265625, + "learning_rate": 1.9989086633871015e-06, + "loss": 25.5781, + "step": 4694 + }, + { + "epoch": 0.04444297195217766, + "grad_norm": 625.19482421875, + "learning_rate": 1.998907230977927e-06, + "loss": 41.6094, + "step": 4695 + }, + { + "epoch": 0.044452437973892714, + "grad_norm": 437.831298828125, + "learning_rate": 1.998905797629844e-06, + "loss": 26.0469, + "step": 4696 + }, + { + "epoch": 0.04446190399560777, + "grad_norm": 264.7362060546875, + "learning_rate": 1.998904363342855e-06, + "loss": 24.8594, + "step": 4697 + }, + { + "epoch": 0.04447137001732282, + "grad_norm": 1333.9796142578125, + "learning_rate": 1.998902928116961e-06, + "loss": 42.2422, + "step": 4698 + }, + { + "epoch": 0.04448083603903787, + "grad_norm": 402.5457763671875, + "learning_rate": 1.998901491952163e-06, + "loss": 36.5469, + "step": 4699 + }, + { + "epoch": 0.044490302060752926, + "grad_norm": 184.3919677734375, + "learning_rate": 1.9989000548484628e-06, + "loss": 28.5859, + "step": 4700 + }, + { + "epoch": 0.04449976808246798, + "grad_norm": 275.09710693359375, + "learning_rate": 1.9988986168058613e-06, + "loss": 30.9219, + "step": 4701 + }, + { + "epoch": 0.04450923410418303, + "grad_norm": 523.4938354492188, + "learning_rate": 1.99889717782436e-06, + "loss": 44.875, + "step": 4702 + }, + { + "epoch": 0.04451870012589809, + "grad_norm": 259.0872802734375, + "learning_rate": 1.9988957379039605e-06, + "loss": 27.4531, + "step": 4703 + }, + { + "epoch": 0.044528166147613145, + "grad_norm": 522.5665893554688, + "learning_rate": 1.998894297044664e-06, + "loss": 35.0, + "step": 4704 + }, + { + "epoch": 0.0445376321693282, + "grad_norm": 580.13623046875, + "learning_rate": 1.998892855246472e-06, + "loss": 62.2188, + "step": 4705 + }, + { + "epoch": 0.04454709819104325, + "grad_norm": 321.9450378417969, + "learning_rate": 1.9988914125093855e-06, + "loss": 23.9766, + "step": 4706 + }, + { + "epoch": 0.044556564212758304, + "grad_norm": 289.5340881347656, + "learning_rate": 1.998889968833406e-06, + "loss": 24.8438, + "step": 4707 + }, + { + "epoch": 0.04456603023447336, + "grad_norm": 270.19000244140625, + "learning_rate": 1.998888524218535e-06, + "loss": 11.4258, + "step": 4708 + }, + { + "epoch": 0.04457549625618841, + "grad_norm": 1326.96337890625, + "learning_rate": 1.9988870786647734e-06, + "loss": 103.8125, + "step": 4709 + }, + { + "epoch": 0.04458496227790346, + "grad_norm": 204.0852813720703, + "learning_rate": 1.9988856321721235e-06, + "loss": 19.0859, + "step": 4710 + }, + { + "epoch": 0.044594428299618516, + "grad_norm": 278.0, + "learning_rate": 1.998884184740586e-06, + "loss": 29.375, + "step": 4711 + }, + { + "epoch": 0.044603894321333576, + "grad_norm": 306.1171569824219, + "learning_rate": 1.998882736370162e-06, + "loss": 30.4297, + "step": 4712 + }, + { + "epoch": 0.04461336034304863, + "grad_norm": 509.03497314453125, + "learning_rate": 1.9988812870608534e-06, + "loss": 60.25, + "step": 4713 + }, + { + "epoch": 0.04462282636476368, + "grad_norm": 360.24713134765625, + "learning_rate": 1.9988798368126618e-06, + "loss": 23.3594, + "step": 4714 + }, + { + "epoch": 0.044632292386478735, + "grad_norm": 845.513916015625, + "learning_rate": 1.9988783856255875e-06, + "loss": 46.2383, + "step": 4715 + }, + { + "epoch": 0.04464175840819379, + "grad_norm": 394.376220703125, + "learning_rate": 1.9988769334996327e-06, + "loss": 38.4531, + "step": 4716 + }, + { + "epoch": 0.04465122442990884, + "grad_norm": 728.5464477539062, + "learning_rate": 1.9988754804347986e-06, + "loss": 56.8438, + "step": 4717 + }, + { + "epoch": 0.044660690451623894, + "grad_norm": 427.53985595703125, + "learning_rate": 1.9988740264310865e-06, + "loss": 30.5938, + "step": 4718 + }, + { + "epoch": 0.04467015647333895, + "grad_norm": 386.4418640136719, + "learning_rate": 1.9988725714884976e-06, + "loss": 48.8594, + "step": 4719 + }, + { + "epoch": 0.04467962249505401, + "grad_norm": 1181.9637451171875, + "learning_rate": 1.9988711156070338e-06, + "loss": 39.6172, + "step": 4720 + }, + { + "epoch": 0.04468908851676906, + "grad_norm": 596.3143310546875, + "learning_rate": 1.998869658786696e-06, + "loss": 31.9844, + "step": 4721 + }, + { + "epoch": 0.04469855453848411, + "grad_norm": 383.9720458984375, + "learning_rate": 1.998868201027486e-06, + "loss": 31.1406, + "step": 4722 + }, + { + "epoch": 0.044708020560199166, + "grad_norm": 404.9472961425781, + "learning_rate": 1.9988667423294046e-06, + "loss": 37.5312, + "step": 4723 + }, + { + "epoch": 0.04471748658191422, + "grad_norm": 962.7823486328125, + "learning_rate": 1.9988652826924533e-06, + "loss": 68.7812, + "step": 4724 + }, + { + "epoch": 0.04472695260362927, + "grad_norm": 267.6839599609375, + "learning_rate": 1.9988638221166338e-06, + "loss": 32.9531, + "step": 4725 + }, + { + "epoch": 0.044736418625344325, + "grad_norm": 583.6217041015625, + "learning_rate": 1.9988623606019472e-06, + "loss": 79.7812, + "step": 4726 + }, + { + "epoch": 0.04474588464705938, + "grad_norm": 902.021484375, + "learning_rate": 1.998860898148395e-06, + "loss": 25.0, + "step": 4727 + }, + { + "epoch": 0.04475535066877443, + "grad_norm": 256.37774658203125, + "learning_rate": 1.9988594347559787e-06, + "loss": 25.9297, + "step": 4728 + }, + { + "epoch": 0.04476481669048949, + "grad_norm": 351.54974365234375, + "learning_rate": 1.998857970424699e-06, + "loss": 28.7969, + "step": 4729 + }, + { + "epoch": 0.044774282712204544, + "grad_norm": 741.7042846679688, + "learning_rate": 1.9988565051545582e-06, + "loss": 51.7031, + "step": 4730 + }, + { + "epoch": 0.0447837487339196, + "grad_norm": 269.14385986328125, + "learning_rate": 1.998855038945557e-06, + "loss": 27.8281, + "step": 4731 + }, + { + "epoch": 0.04479321475563465, + "grad_norm": 247.0953826904297, + "learning_rate": 1.9988535717976974e-06, + "loss": 27.3906, + "step": 4732 + }, + { + "epoch": 0.0448026807773497, + "grad_norm": 688.0090942382812, + "learning_rate": 1.9988521037109804e-06, + "loss": 20.5391, + "step": 4733 + }, + { + "epoch": 0.044812146799064756, + "grad_norm": 400.0714111328125, + "learning_rate": 1.998850634685407e-06, + "loss": 28.2891, + "step": 4734 + }, + { + "epoch": 0.04482161282077981, + "grad_norm": 394.7759704589844, + "learning_rate": 1.9988491647209795e-06, + "loss": 26.5625, + "step": 4735 + }, + { + "epoch": 0.04483107884249486, + "grad_norm": 204.69766235351562, + "learning_rate": 1.9988476938176984e-06, + "loss": 28.6406, + "step": 4736 + }, + { + "epoch": 0.04484054486420992, + "grad_norm": 245.25743103027344, + "learning_rate": 1.9988462219755656e-06, + "loss": 25.2031, + "step": 4737 + }, + { + "epoch": 0.044850010885924975, + "grad_norm": 705.1973266601562, + "learning_rate": 1.9988447491945824e-06, + "loss": 58.125, + "step": 4738 + }, + { + "epoch": 0.04485947690764003, + "grad_norm": 298.877197265625, + "learning_rate": 1.99884327547475e-06, + "loss": 22.0156, + "step": 4739 + }, + { + "epoch": 0.04486894292935508, + "grad_norm": 330.7499084472656, + "learning_rate": 1.99884180081607e-06, + "loss": 20.332, + "step": 4740 + }, + { + "epoch": 0.044878408951070134, + "grad_norm": 408.70806884765625, + "learning_rate": 1.9988403252185437e-06, + "loss": 35.1484, + "step": 4741 + }, + { + "epoch": 0.04488787497278519, + "grad_norm": 493.42620849609375, + "learning_rate": 1.998838848682172e-06, + "loss": 37.6875, + "step": 4742 + }, + { + "epoch": 0.04489734099450024, + "grad_norm": 2.699302911758423, + "learning_rate": 1.9988373712069572e-06, + "loss": 0.8174, + "step": 4743 + }, + { + "epoch": 0.04490680701621529, + "grad_norm": 481.3215637207031, + "learning_rate": 1.9988358927929003e-06, + "loss": 50.4688, + "step": 4744 + }, + { + "epoch": 0.044916273037930346, + "grad_norm": 426.9627685546875, + "learning_rate": 1.9988344134400027e-06, + "loss": 34.7031, + "step": 4745 + }, + { + "epoch": 0.044925739059645406, + "grad_norm": 344.6164245605469, + "learning_rate": 1.9988329331482652e-06, + "loss": 27.4922, + "step": 4746 + }, + { + "epoch": 0.04493520508136046, + "grad_norm": 422.5010070800781, + "learning_rate": 1.9988314519176904e-06, + "loss": 41.9297, + "step": 4747 + }, + { + "epoch": 0.04494467110307551, + "grad_norm": 394.2560729980469, + "learning_rate": 1.9988299697482787e-06, + "loss": 49.9531, + "step": 4748 + }, + { + "epoch": 0.044954137124790565, + "grad_norm": 255.1030731201172, + "learning_rate": 1.9988284866400317e-06, + "loss": 29.7031, + "step": 4749 + }, + { + "epoch": 0.04496360314650562, + "grad_norm": 820.7769775390625, + "learning_rate": 1.998827002592951e-06, + "loss": 24.7344, + "step": 4750 + }, + { + "epoch": 0.04497306916822067, + "grad_norm": 292.0220031738281, + "learning_rate": 1.9988255176070375e-06, + "loss": 26.0312, + "step": 4751 + }, + { + "epoch": 0.044982535189935724, + "grad_norm": 1141.03173828125, + "learning_rate": 1.9988240316822934e-06, + "loss": 59.3438, + "step": 4752 + }, + { + "epoch": 0.04499200121165078, + "grad_norm": 969.8206176757812, + "learning_rate": 1.99882254481872e-06, + "loss": 37.3828, + "step": 4753 + }, + { + "epoch": 0.04500146723336583, + "grad_norm": 457.51177978515625, + "learning_rate": 1.998821057016318e-06, + "loss": 54.9688, + "step": 4754 + }, + { + "epoch": 0.04501093325508089, + "grad_norm": 223.54428100585938, + "learning_rate": 1.9988195682750893e-06, + "loss": 28.125, + "step": 4755 + }, + { + "epoch": 0.04502039927679594, + "grad_norm": 597.4866943359375, + "learning_rate": 1.9988180785950347e-06, + "loss": 62.3125, + "step": 4756 + }, + { + "epoch": 0.045029865298510996, + "grad_norm": 262.5660095214844, + "learning_rate": 1.9988165879761567e-06, + "loss": 26.0938, + "step": 4757 + }, + { + "epoch": 0.04503933132022605, + "grad_norm": 466.81292724609375, + "learning_rate": 1.998815096418456e-06, + "loss": 48.5781, + "step": 4758 + }, + { + "epoch": 0.0450487973419411, + "grad_norm": 671.2261352539062, + "learning_rate": 1.9988136039219337e-06, + "loss": 27.7188, + "step": 4759 + }, + { + "epoch": 0.045058263363656155, + "grad_norm": 2.7984611988067627, + "learning_rate": 1.9988121104865924e-06, + "loss": 0.9058, + "step": 4760 + }, + { + "epoch": 0.04506772938537121, + "grad_norm": 404.8177185058594, + "learning_rate": 1.998810616112432e-06, + "loss": 60.875, + "step": 4761 + }, + { + "epoch": 0.04507719540708626, + "grad_norm": 686.510498046875, + "learning_rate": 1.9988091207994546e-06, + "loss": 43.4062, + "step": 4762 + }, + { + "epoch": 0.04508666142880132, + "grad_norm": 334.0715637207031, + "learning_rate": 1.998807624547662e-06, + "loss": 19.8555, + "step": 4763 + }, + { + "epoch": 0.045096127450516374, + "grad_norm": 227.77944946289062, + "learning_rate": 1.9988061273570546e-06, + "loss": 27.4531, + "step": 4764 + }, + { + "epoch": 0.04510559347223143, + "grad_norm": 651.4144897460938, + "learning_rate": 1.998804629227635e-06, + "loss": 32.375, + "step": 4765 + }, + { + "epoch": 0.04511505949394648, + "grad_norm": 526.6581420898438, + "learning_rate": 1.998803130159404e-06, + "loss": 25.2344, + "step": 4766 + }, + { + "epoch": 0.04512452551566153, + "grad_norm": 2.8971078395843506, + "learning_rate": 1.9988016301523626e-06, + "loss": 0.8555, + "step": 4767 + }, + { + "epoch": 0.045133991537376586, + "grad_norm": 466.20703125, + "learning_rate": 1.9988001292065127e-06, + "loss": 37.5312, + "step": 4768 + }, + { + "epoch": 0.04514345755909164, + "grad_norm": 310.5910339355469, + "learning_rate": 1.998798627321856e-06, + "loss": 29.875, + "step": 4769 + }, + { + "epoch": 0.04515292358080669, + "grad_norm": 173.25942993164062, + "learning_rate": 1.9987971244983933e-06, + "loss": 21.9531, + "step": 4770 + }, + { + "epoch": 0.045162389602521745, + "grad_norm": 500.57537841796875, + "learning_rate": 1.9987956207361267e-06, + "loss": 21.8828, + "step": 4771 + }, + { + "epoch": 0.045171855624236805, + "grad_norm": 218.7486572265625, + "learning_rate": 1.9987941160350567e-06, + "loss": 24.0156, + "step": 4772 + }, + { + "epoch": 0.04518132164595186, + "grad_norm": 627.61474609375, + "learning_rate": 1.9987926103951852e-06, + "loss": 28.3242, + "step": 4773 + }, + { + "epoch": 0.04519078766766691, + "grad_norm": 550.72705078125, + "learning_rate": 1.9987911038165138e-06, + "loss": 29.3672, + "step": 4774 + }, + { + "epoch": 0.045200253689381964, + "grad_norm": 256.69903564453125, + "learning_rate": 1.998789596299044e-06, + "loss": 23.625, + "step": 4775 + }, + { + "epoch": 0.04520971971109702, + "grad_norm": 420.2906494140625, + "learning_rate": 1.9987880878427765e-06, + "loss": 54.9062, + "step": 4776 + }, + { + "epoch": 0.04521918573281207, + "grad_norm": 368.5379943847656, + "learning_rate": 1.9987865784477137e-06, + "loss": 29.1406, + "step": 4777 + }, + { + "epoch": 0.04522865175452712, + "grad_norm": 230.60482788085938, + "learning_rate": 1.998785068113856e-06, + "loss": 29.0312, + "step": 4778 + }, + { + "epoch": 0.045238117776242176, + "grad_norm": 387.39471435546875, + "learning_rate": 1.9987835568412057e-06, + "loss": 16.6406, + "step": 4779 + }, + { + "epoch": 0.045247583797957236, + "grad_norm": 180.14146423339844, + "learning_rate": 1.9987820446297634e-06, + "loss": 21.1641, + "step": 4780 + }, + { + "epoch": 0.04525704981967229, + "grad_norm": 607.1328125, + "learning_rate": 1.998780531479531e-06, + "loss": 43.6719, + "step": 4781 + }, + { + "epoch": 0.04526651584138734, + "grad_norm": 392.7657165527344, + "learning_rate": 1.99877901739051e-06, + "loss": 39.875, + "step": 4782 + }, + { + "epoch": 0.045275981863102395, + "grad_norm": 483.6501159667969, + "learning_rate": 1.9987775023627018e-06, + "loss": 25.5, + "step": 4783 + }, + { + "epoch": 0.04528544788481745, + "grad_norm": 3.2879798412323, + "learning_rate": 1.9987759863961075e-06, + "loss": 0.9214, + "step": 4784 + }, + { + "epoch": 0.0452949139065325, + "grad_norm": 498.51123046875, + "learning_rate": 1.998774469490729e-06, + "loss": 34.4688, + "step": 4785 + }, + { + "epoch": 0.045304379928247554, + "grad_norm": 334.985595703125, + "learning_rate": 1.9987729516465674e-06, + "loss": 28.5, + "step": 4786 + }, + { + "epoch": 0.04531384594996261, + "grad_norm": 914.745361328125, + "learning_rate": 1.998771432863624e-06, + "loss": 52.875, + "step": 4787 + }, + { + "epoch": 0.04532331197167766, + "grad_norm": 1428.537353515625, + "learning_rate": 1.9987699131419007e-06, + "loss": 87.9375, + "step": 4788 + }, + { + "epoch": 0.04533277799339272, + "grad_norm": 892.7255249023438, + "learning_rate": 1.998768392481399e-06, + "loss": 29.3359, + "step": 4789 + }, + { + "epoch": 0.04534224401510777, + "grad_norm": 690.4173583984375, + "learning_rate": 1.9987668708821193e-06, + "loss": 56.3438, + "step": 4790 + }, + { + "epoch": 0.045351710036822826, + "grad_norm": 218.13441467285156, + "learning_rate": 1.998765348344064e-06, + "loss": 25.7344, + "step": 4791 + }, + { + "epoch": 0.04536117605853788, + "grad_norm": 240.02182006835938, + "learning_rate": 1.998763824867234e-06, + "loss": 20.2266, + "step": 4792 + }, + { + "epoch": 0.04537064208025293, + "grad_norm": 476.3431701660156, + "learning_rate": 1.998762300451631e-06, + "loss": 34.7422, + "step": 4793 + }, + { + "epoch": 0.045380108101967985, + "grad_norm": 457.1602783203125, + "learning_rate": 1.9987607750972567e-06, + "loss": 48.6875, + "step": 4794 + }, + { + "epoch": 0.04538957412368304, + "grad_norm": 2.759016752243042, + "learning_rate": 1.9987592488041123e-06, + "loss": 0.8271, + "step": 4795 + }, + { + "epoch": 0.04539904014539809, + "grad_norm": 182.53892517089844, + "learning_rate": 1.9987577215721992e-06, + "loss": 24.2656, + "step": 4796 + }, + { + "epoch": 0.045408506167113144, + "grad_norm": 282.9500732421875, + "learning_rate": 1.9987561934015187e-06, + "loss": 26.5469, + "step": 4797 + }, + { + "epoch": 0.045417972188828204, + "grad_norm": 295.39178466796875, + "learning_rate": 1.998754664292072e-06, + "loss": 22.9219, + "step": 4798 + }, + { + "epoch": 0.04542743821054326, + "grad_norm": 597.5474853515625, + "learning_rate": 1.998753134243861e-06, + "loss": 60.3125, + "step": 4799 + }, + { + "epoch": 0.04543690423225831, + "grad_norm": 261.8683776855469, + "learning_rate": 1.9987516032568874e-06, + "loss": 27.2188, + "step": 4800 + }, + { + "epoch": 0.04544637025397336, + "grad_norm": 240.6015625, + "learning_rate": 1.9987500713311523e-06, + "loss": 28.9219, + "step": 4801 + }, + { + "epoch": 0.045455836275688416, + "grad_norm": 667.9628295898438, + "learning_rate": 1.998748538466657e-06, + "loss": 31.8047, + "step": 4802 + }, + { + "epoch": 0.04546530229740347, + "grad_norm": 2.2086353302001953, + "learning_rate": 1.998747004663403e-06, + "loss": 0.7109, + "step": 4803 + }, + { + "epoch": 0.04547476831911852, + "grad_norm": 445.2358093261719, + "learning_rate": 1.9987454699213915e-06, + "loss": 23.6797, + "step": 4804 + }, + { + "epoch": 0.045484234340833575, + "grad_norm": 276.59246826171875, + "learning_rate": 1.9987439342406242e-06, + "loss": 23.2031, + "step": 4805 + }, + { + "epoch": 0.045493700362548635, + "grad_norm": 245.97760009765625, + "learning_rate": 1.998742397621103e-06, + "loss": 23.4062, + "step": 4806 + }, + { + "epoch": 0.04550316638426369, + "grad_norm": 483.75946044921875, + "learning_rate": 1.9987408600628287e-06, + "loss": 32.0781, + "step": 4807 + }, + { + "epoch": 0.04551263240597874, + "grad_norm": 894.851318359375, + "learning_rate": 1.998739321565803e-06, + "loss": 38.25, + "step": 4808 + }, + { + "epoch": 0.045522098427693794, + "grad_norm": 2.996805191040039, + "learning_rate": 1.9987377821300273e-06, + "loss": 0.917, + "step": 4809 + }, + { + "epoch": 0.04553156444940885, + "grad_norm": 411.85321044921875, + "learning_rate": 1.998736241755503e-06, + "loss": 26.9141, + "step": 4810 + }, + { + "epoch": 0.0455410304711239, + "grad_norm": 191.93630981445312, + "learning_rate": 1.9987347004422318e-06, + "loss": 24.3047, + "step": 4811 + }, + { + "epoch": 0.04555049649283895, + "grad_norm": 1233.132568359375, + "learning_rate": 1.9987331581902146e-06, + "loss": 28.1875, + "step": 4812 + }, + { + "epoch": 0.045559962514554006, + "grad_norm": 785.2987060546875, + "learning_rate": 1.9987316149994537e-06, + "loss": 73.8125, + "step": 4813 + }, + { + "epoch": 0.04556942853626906, + "grad_norm": 240.22434997558594, + "learning_rate": 1.9987300708699495e-06, + "loss": 25.875, + "step": 4814 + }, + { + "epoch": 0.04557889455798412, + "grad_norm": 365.6331481933594, + "learning_rate": 1.9987285258017045e-06, + "loss": 24.6562, + "step": 4815 + }, + { + "epoch": 0.04558836057969917, + "grad_norm": 2.4800925254821777, + "learning_rate": 1.9987269797947195e-06, + "loss": 0.8179, + "step": 4816 + }, + { + "epoch": 0.045597826601414225, + "grad_norm": 476.07958984375, + "learning_rate": 1.998725432848996e-06, + "loss": 43.7969, + "step": 4817 + }, + { + "epoch": 0.04560729262312928, + "grad_norm": 402.21630859375, + "learning_rate": 1.9987238849645353e-06, + "loss": 22.2656, + "step": 4818 + }, + { + "epoch": 0.04561675864484433, + "grad_norm": 315.5611572265625, + "learning_rate": 1.9987223361413395e-06, + "loss": 24.6484, + "step": 4819 + }, + { + "epoch": 0.045626224666559384, + "grad_norm": 1065.3551025390625, + "learning_rate": 1.9987207863794096e-06, + "loss": 51.3906, + "step": 4820 + }, + { + "epoch": 0.04563569068827444, + "grad_norm": 619.24755859375, + "learning_rate": 1.9987192356787474e-06, + "loss": 44.2656, + "step": 4821 + }, + { + "epoch": 0.04564515670998949, + "grad_norm": 836.4766235351562, + "learning_rate": 1.998717684039354e-06, + "loss": 46.125, + "step": 4822 + }, + { + "epoch": 0.04565462273170455, + "grad_norm": 3.0622551441192627, + "learning_rate": 1.998716131461231e-06, + "loss": 0.8967, + "step": 4823 + }, + { + "epoch": 0.0456640887534196, + "grad_norm": 634.5376586914062, + "learning_rate": 1.998714577944379e-06, + "loss": 39.6953, + "step": 4824 + }, + { + "epoch": 0.045673554775134656, + "grad_norm": 374.5390930175781, + "learning_rate": 1.998713023488801e-06, + "loss": 21.5625, + "step": 4825 + }, + { + "epoch": 0.04568302079684971, + "grad_norm": 745.7342529296875, + "learning_rate": 1.9987114680944972e-06, + "loss": 70.7812, + "step": 4826 + }, + { + "epoch": 0.04569248681856476, + "grad_norm": 858.5900268554688, + "learning_rate": 1.9987099117614704e-06, + "loss": 36.2031, + "step": 4827 + }, + { + "epoch": 0.045701952840279815, + "grad_norm": 318.5217590332031, + "learning_rate": 1.9987083544897206e-06, + "loss": 32.4219, + "step": 4828 + }, + { + "epoch": 0.04571141886199487, + "grad_norm": 915.539306640625, + "learning_rate": 1.9987067962792504e-06, + "loss": 24.8281, + "step": 4829 + }, + { + "epoch": 0.04572088488370992, + "grad_norm": 197.02090454101562, + "learning_rate": 1.9987052371300605e-06, + "loss": 26.0, + "step": 4830 + }, + { + "epoch": 0.045730350905424974, + "grad_norm": 588.902587890625, + "learning_rate": 1.9987036770421527e-06, + "loss": 49.5312, + "step": 4831 + }, + { + "epoch": 0.045739816927140034, + "grad_norm": 232.7539825439453, + "learning_rate": 1.9987021160155283e-06, + "loss": 24.2031, + "step": 4832 + }, + { + "epoch": 0.04574928294885509, + "grad_norm": 223.51902770996094, + "learning_rate": 1.998700554050189e-06, + "loss": 23.4688, + "step": 4833 + }, + { + "epoch": 0.04575874897057014, + "grad_norm": 354.3069152832031, + "learning_rate": 1.9986989911461363e-06, + "loss": 33.6875, + "step": 4834 + }, + { + "epoch": 0.04576821499228519, + "grad_norm": 253.83914184570312, + "learning_rate": 1.9986974273033713e-06, + "loss": 26.0781, + "step": 4835 + }, + { + "epoch": 0.045777681014000246, + "grad_norm": 196.713623046875, + "learning_rate": 1.9986958625218956e-06, + "loss": 21.1406, + "step": 4836 + }, + { + "epoch": 0.0457871470357153, + "grad_norm": 409.67352294921875, + "learning_rate": 1.998694296801711e-06, + "loss": 34.8047, + "step": 4837 + }, + { + "epoch": 0.04579661305743035, + "grad_norm": 1933.611328125, + "learning_rate": 1.9986927301428183e-06, + "loss": 34.3906, + "step": 4838 + }, + { + "epoch": 0.045806079079145405, + "grad_norm": 926.9720458984375, + "learning_rate": 1.99869116254522e-06, + "loss": 44.6406, + "step": 4839 + }, + { + "epoch": 0.04581554510086046, + "grad_norm": 506.21929931640625, + "learning_rate": 1.9986895940089165e-06, + "loss": 48.3125, + "step": 4840 + }, + { + "epoch": 0.04582501112257552, + "grad_norm": 901.0478515625, + "learning_rate": 1.99868802453391e-06, + "loss": 63.8281, + "step": 4841 + }, + { + "epoch": 0.04583447714429057, + "grad_norm": 400.96240234375, + "learning_rate": 1.9986864541202014e-06, + "loss": 30.2109, + "step": 4842 + }, + { + "epoch": 0.045843943166005624, + "grad_norm": 547.9796142578125, + "learning_rate": 1.998684882767793e-06, + "loss": 56.5469, + "step": 4843 + }, + { + "epoch": 0.04585340918772068, + "grad_norm": 681.2125244140625, + "learning_rate": 1.9986833104766854e-06, + "loss": 55.5469, + "step": 4844 + }, + { + "epoch": 0.04586287520943573, + "grad_norm": 328.3799133300781, + "learning_rate": 1.9986817372468806e-06, + "loss": 30.4297, + "step": 4845 + }, + { + "epoch": 0.04587234123115078, + "grad_norm": 291.5192565917969, + "learning_rate": 1.9986801630783804e-06, + "loss": 14.3633, + "step": 4846 + }, + { + "epoch": 0.045881807252865836, + "grad_norm": 376.7858581542969, + "learning_rate": 1.998678587971185e-06, + "loss": 16.7656, + "step": 4847 + }, + { + "epoch": 0.04589127327458089, + "grad_norm": 438.61810302734375, + "learning_rate": 1.9986770119252973e-06, + "loss": 21.75, + "step": 4848 + }, + { + "epoch": 0.04590073929629595, + "grad_norm": 3.145371913909912, + "learning_rate": 1.998675434940718e-06, + "loss": 0.7842, + "step": 4849 + }, + { + "epoch": 0.045910205318011, + "grad_norm": 196.5342254638672, + "learning_rate": 1.9986738570174484e-06, + "loss": 24.6016, + "step": 4850 + }, + { + "epoch": 0.045919671339726055, + "grad_norm": 409.3076171875, + "learning_rate": 1.9986722781554907e-06, + "loss": 40.2891, + "step": 4851 + }, + { + "epoch": 0.04592913736144111, + "grad_norm": 482.9364318847656, + "learning_rate": 1.998670698354846e-06, + "loss": 23.875, + "step": 4852 + }, + { + "epoch": 0.04593860338315616, + "grad_norm": 581.1954345703125, + "learning_rate": 1.998669117615516e-06, + "loss": 53.9062, + "step": 4853 + }, + { + "epoch": 0.045948069404871214, + "grad_norm": 1145.2845458984375, + "learning_rate": 1.9986675359375017e-06, + "loss": 59.0234, + "step": 4854 + }, + { + "epoch": 0.04595753542658627, + "grad_norm": 166.15089416503906, + "learning_rate": 1.998665953320805e-06, + "loss": 32.0156, + "step": 4855 + }, + { + "epoch": 0.04596700144830132, + "grad_norm": 646.8658447265625, + "learning_rate": 1.998664369765427e-06, + "loss": 53.3594, + "step": 4856 + }, + { + "epoch": 0.04597646747001637, + "grad_norm": 3.5358567237854004, + "learning_rate": 1.9986627852713695e-06, + "loss": 1.0269, + "step": 4857 + }, + { + "epoch": 0.04598593349173143, + "grad_norm": 278.1242370605469, + "learning_rate": 1.998661199838634e-06, + "loss": 22.4141, + "step": 4858 + }, + { + "epoch": 0.045995399513446486, + "grad_norm": 756.1561279296875, + "learning_rate": 1.9986596134672225e-06, + "loss": 37.6289, + "step": 4859 + }, + { + "epoch": 0.04600486553516154, + "grad_norm": 212.04299926757812, + "learning_rate": 1.9986580261571352e-06, + "loss": 22.2656, + "step": 4860 + }, + { + "epoch": 0.04601433155687659, + "grad_norm": 427.7985534667969, + "learning_rate": 1.998656437908375e-06, + "loss": 47.4062, + "step": 4861 + }, + { + "epoch": 0.046023797578591645, + "grad_norm": 459.7383117675781, + "learning_rate": 1.998654848720942e-06, + "loss": 50.8906, + "step": 4862 + }, + { + "epoch": 0.0460332636003067, + "grad_norm": 337.75225830078125, + "learning_rate": 1.9986532585948387e-06, + "loss": 31.8125, + "step": 4863 + }, + { + "epoch": 0.04604272962202175, + "grad_norm": 280.203857421875, + "learning_rate": 1.9986516675300666e-06, + "loss": 21.4375, + "step": 4864 + }, + { + "epoch": 0.046052195643736804, + "grad_norm": 168.9491424560547, + "learning_rate": 1.9986500755266265e-06, + "loss": 22.5156, + "step": 4865 + }, + { + "epoch": 0.04606166166545186, + "grad_norm": 567.06396484375, + "learning_rate": 1.9986484825845206e-06, + "loss": 25.5234, + "step": 4866 + }, + { + "epoch": 0.04607112768716692, + "grad_norm": 221.18203735351562, + "learning_rate": 1.99864688870375e-06, + "loss": 21.9375, + "step": 4867 + }, + { + "epoch": 0.04608059370888197, + "grad_norm": 690.2825927734375, + "learning_rate": 1.998645293884316e-06, + "loss": 55.6094, + "step": 4868 + }, + { + "epoch": 0.04609005973059702, + "grad_norm": 1215.45654296875, + "learning_rate": 1.9986436981262207e-06, + "loss": 78.7266, + "step": 4869 + }, + { + "epoch": 0.046099525752312076, + "grad_norm": 329.5827941894531, + "learning_rate": 1.9986421014294656e-06, + "loss": 36.1719, + "step": 4870 + }, + { + "epoch": 0.04610899177402713, + "grad_norm": 585.6376342773438, + "learning_rate": 1.9986405037940513e-06, + "loss": 28.7969, + "step": 4871 + }, + { + "epoch": 0.04611845779574218, + "grad_norm": 236.2150115966797, + "learning_rate": 1.9986389052199798e-06, + "loss": 24.2422, + "step": 4872 + }, + { + "epoch": 0.046127923817457235, + "grad_norm": 390.70440673828125, + "learning_rate": 1.9986373057072534e-06, + "loss": 24.1406, + "step": 4873 + }, + { + "epoch": 0.04613738983917229, + "grad_norm": 260.5181579589844, + "learning_rate": 1.998635705255872e-06, + "loss": 22.4141, + "step": 4874 + }, + { + "epoch": 0.04614685586088735, + "grad_norm": 287.16168212890625, + "learning_rate": 1.9986341038658387e-06, + "loss": 30.3281, + "step": 4875 + }, + { + "epoch": 0.0461563218826024, + "grad_norm": 502.43182373046875, + "learning_rate": 1.9986325015371543e-06, + "loss": 26.1562, + "step": 4876 + }, + { + "epoch": 0.046165787904317454, + "grad_norm": 442.6324462890625, + "learning_rate": 1.99863089826982e-06, + "loss": 22.9844, + "step": 4877 + }, + { + "epoch": 0.04617525392603251, + "grad_norm": 677.4999389648438, + "learning_rate": 1.998629294063838e-06, + "loss": 40.125, + "step": 4878 + }, + { + "epoch": 0.04618471994774756, + "grad_norm": 195.59942626953125, + "learning_rate": 1.998627688919209e-06, + "loss": 22.4531, + "step": 4879 + }, + { + "epoch": 0.04619418596946261, + "grad_norm": 666.4011840820312, + "learning_rate": 1.998626082835935e-06, + "loss": 42.4219, + "step": 4880 + }, + { + "epoch": 0.046203651991177666, + "grad_norm": 221.5003204345703, + "learning_rate": 1.9986244758140175e-06, + "loss": 22.2812, + "step": 4881 + }, + { + "epoch": 0.04621311801289272, + "grad_norm": 538.1617431640625, + "learning_rate": 1.998622867853458e-06, + "loss": 40.2812, + "step": 4882 + }, + { + "epoch": 0.04622258403460777, + "grad_norm": 307.6631164550781, + "learning_rate": 1.998621258954258e-06, + "loss": 25.9922, + "step": 4883 + }, + { + "epoch": 0.04623205005632283, + "grad_norm": 416.6775207519531, + "learning_rate": 1.998619649116419e-06, + "loss": 46.0781, + "step": 4884 + }, + { + "epoch": 0.046241516078037885, + "grad_norm": 517.3224487304688, + "learning_rate": 1.998618038339942e-06, + "loss": 19.7031, + "step": 4885 + }, + { + "epoch": 0.04625098209975294, + "grad_norm": 489.37139892578125, + "learning_rate": 1.99861642662483e-06, + "loss": 31.9375, + "step": 4886 + }, + { + "epoch": 0.04626044812146799, + "grad_norm": 1272.1761474609375, + "learning_rate": 1.9986148139710827e-06, + "loss": 28.5156, + "step": 4887 + }, + { + "epoch": 0.046269914143183044, + "grad_norm": 228.46556091308594, + "learning_rate": 1.9986132003787028e-06, + "loss": 25.7734, + "step": 4888 + }, + { + "epoch": 0.0462793801648981, + "grad_norm": 720.1585083007812, + "learning_rate": 1.9986115858476913e-06, + "loss": 35.9531, + "step": 4889 + }, + { + "epoch": 0.04628884618661315, + "grad_norm": 372.1622314453125, + "learning_rate": 1.99860997037805e-06, + "loss": 24.6562, + "step": 4890 + }, + { + "epoch": 0.0462983122083282, + "grad_norm": 169.2445526123047, + "learning_rate": 1.9986083539697807e-06, + "loss": 20.4062, + "step": 4891 + }, + { + "epoch": 0.04630777823004326, + "grad_norm": 686.4298095703125, + "learning_rate": 1.9986067366228836e-06, + "loss": 58.3828, + "step": 4892 + }, + { + "epoch": 0.046317244251758316, + "grad_norm": 194.55233764648438, + "learning_rate": 1.9986051183373617e-06, + "loss": 31.1406, + "step": 4893 + }, + { + "epoch": 0.04632671027347337, + "grad_norm": 845.1546630859375, + "learning_rate": 1.9986034991132156e-06, + "loss": 30.3594, + "step": 4894 + }, + { + "epoch": 0.04633617629518842, + "grad_norm": 475.3451232910156, + "learning_rate": 1.9986018789504477e-06, + "loss": 29.3359, + "step": 4895 + }, + { + "epoch": 0.046345642316903475, + "grad_norm": 281.85797119140625, + "learning_rate": 1.9986002578490584e-06, + "loss": 39.3984, + "step": 4896 + }, + { + "epoch": 0.04635510833861853, + "grad_norm": 3.2843613624572754, + "learning_rate": 1.9985986358090503e-06, + "loss": 0.7766, + "step": 4897 + }, + { + "epoch": 0.04636457436033358, + "grad_norm": 3.4960224628448486, + "learning_rate": 1.9985970128304243e-06, + "loss": 0.9668, + "step": 4898 + }, + { + "epoch": 0.046374040382048634, + "grad_norm": 916.4220581054688, + "learning_rate": 1.998595388913182e-06, + "loss": 59.375, + "step": 4899 + }, + { + "epoch": 0.04638350640376369, + "grad_norm": 261.834716796875, + "learning_rate": 1.998593764057325e-06, + "loss": 43.7969, + "step": 4900 + }, + { + "epoch": 0.04639297242547875, + "grad_norm": 278.4112854003906, + "learning_rate": 1.998592138262855e-06, + "loss": 33.25, + "step": 4901 + }, + { + "epoch": 0.0464024384471938, + "grad_norm": 673.2586059570312, + "learning_rate": 1.998590511529773e-06, + "loss": 34.2344, + "step": 4902 + }, + { + "epoch": 0.04641190446890885, + "grad_norm": 254.03070068359375, + "learning_rate": 1.998588883858081e-06, + "loss": 25.2969, + "step": 4903 + }, + { + "epoch": 0.046421370490623906, + "grad_norm": 369.7664489746094, + "learning_rate": 1.9985872552477807e-06, + "loss": 26.5625, + "step": 4904 + }, + { + "epoch": 0.04643083651233896, + "grad_norm": 577.559814453125, + "learning_rate": 1.9985856256988734e-06, + "loss": 44.2422, + "step": 4905 + }, + { + "epoch": 0.04644030253405401, + "grad_norm": 3.018941640853882, + "learning_rate": 1.99858399521136e-06, + "loss": 0.8472, + "step": 4906 + }, + { + "epoch": 0.046449768555769065, + "grad_norm": 3.3745815753936768, + "learning_rate": 1.998582363785243e-06, + "loss": 0.957, + "step": 4907 + }, + { + "epoch": 0.04645923457748412, + "grad_norm": 530.7095336914062, + "learning_rate": 1.998580731420524e-06, + "loss": 53.9375, + "step": 4908 + }, + { + "epoch": 0.04646870059919917, + "grad_norm": 458.37310791015625, + "learning_rate": 1.998579098117203e-06, + "loss": 47.5938, + "step": 4909 + }, + { + "epoch": 0.04647816662091423, + "grad_norm": 806.6177368164062, + "learning_rate": 1.998577463875283e-06, + "loss": 56.0469, + "step": 4910 + }, + { + "epoch": 0.046487632642629284, + "grad_norm": 263.3856201171875, + "learning_rate": 1.9985758286947654e-06, + "loss": 27.0938, + "step": 4911 + }, + { + "epoch": 0.04649709866434434, + "grad_norm": 2.7006289958953857, + "learning_rate": 1.9985741925756514e-06, + "loss": 0.8325, + "step": 4912 + }, + { + "epoch": 0.04650656468605939, + "grad_norm": 2365.465576171875, + "learning_rate": 1.9985725555179427e-06, + "loss": 64.0312, + "step": 4913 + }, + { + "epoch": 0.04651603070777444, + "grad_norm": 647.1509399414062, + "learning_rate": 1.9985709175216407e-06, + "loss": 20.1172, + "step": 4914 + }, + { + "epoch": 0.046525496729489496, + "grad_norm": 302.82196044921875, + "learning_rate": 1.998569278586747e-06, + "loss": 30.25, + "step": 4915 + }, + { + "epoch": 0.04653496275120455, + "grad_norm": 3.0986053943634033, + "learning_rate": 1.998567638713263e-06, + "loss": 1.0054, + "step": 4916 + }, + { + "epoch": 0.0465444287729196, + "grad_norm": 192.00648498535156, + "learning_rate": 1.9985659979011906e-06, + "loss": 23.9609, + "step": 4917 + }, + { + "epoch": 0.04655389479463466, + "grad_norm": 3.328106164932251, + "learning_rate": 1.998564356150531e-06, + "loss": 0.9902, + "step": 4918 + }, + { + "epoch": 0.046563360816349715, + "grad_norm": 474.21759033203125, + "learning_rate": 1.9985627134612856e-06, + "loss": 38.5312, + "step": 4919 + }, + { + "epoch": 0.04657282683806477, + "grad_norm": 538.9546508789062, + "learning_rate": 1.9985610698334566e-06, + "loss": 49.3359, + "step": 4920 + }, + { + "epoch": 0.04658229285977982, + "grad_norm": 269.71673583984375, + "learning_rate": 1.9985594252670452e-06, + "loss": 25.3359, + "step": 4921 + }, + { + "epoch": 0.046591758881494874, + "grad_norm": 436.94061279296875, + "learning_rate": 1.9985577797620527e-06, + "loss": 49.0625, + "step": 4922 + }, + { + "epoch": 0.04660122490320993, + "grad_norm": 644.2550048828125, + "learning_rate": 1.998556133318481e-06, + "loss": 48.9219, + "step": 4923 + }, + { + "epoch": 0.04661069092492498, + "grad_norm": 233.2210693359375, + "learning_rate": 1.998554485936331e-06, + "loss": 25.4531, + "step": 4924 + }, + { + "epoch": 0.04662015694664003, + "grad_norm": 201.16781616210938, + "learning_rate": 1.9985528376156055e-06, + "loss": 27.4375, + "step": 4925 + }, + { + "epoch": 0.046629622968355086, + "grad_norm": 184.18301391601562, + "learning_rate": 1.998551188356305e-06, + "loss": 20.8281, + "step": 4926 + }, + { + "epoch": 0.046639088990070146, + "grad_norm": 463.6669006347656, + "learning_rate": 1.998549538158431e-06, + "loss": 56.0781, + "step": 4927 + }, + { + "epoch": 0.0466485550117852, + "grad_norm": 2.787112236022949, + "learning_rate": 1.9985478870219858e-06, + "loss": 1.0176, + "step": 4928 + }, + { + "epoch": 0.04665802103350025, + "grad_norm": 410.17059326171875, + "learning_rate": 1.9985462349469704e-06, + "loss": 30.5625, + "step": 4929 + }, + { + "epoch": 0.046667487055215305, + "grad_norm": 1069.431396484375, + "learning_rate": 1.9985445819333865e-06, + "loss": 42.9688, + "step": 4930 + }, + { + "epoch": 0.04667695307693036, + "grad_norm": 945.3935546875, + "learning_rate": 1.9985429279812356e-06, + "loss": 31.4844, + "step": 4931 + }, + { + "epoch": 0.04668641909864541, + "grad_norm": 612.7189331054688, + "learning_rate": 1.998541273090519e-06, + "loss": 56.5781, + "step": 4932 + }, + { + "epoch": 0.046695885120360464, + "grad_norm": 658.8267211914062, + "learning_rate": 1.9985396172612394e-06, + "loss": 80.6562, + "step": 4933 + }, + { + "epoch": 0.04670535114207552, + "grad_norm": 424.18438720703125, + "learning_rate": 1.9985379604933968e-06, + "loss": 25.4531, + "step": 4934 + }, + { + "epoch": 0.04671481716379058, + "grad_norm": 277.6717224121094, + "learning_rate": 1.9985363027869937e-06, + "loss": 26.6875, + "step": 4935 + }, + { + "epoch": 0.04672428318550563, + "grad_norm": 676.702392578125, + "learning_rate": 1.9985346441420317e-06, + "loss": 32.75, + "step": 4936 + }, + { + "epoch": 0.04673374920722068, + "grad_norm": 395.3481750488281, + "learning_rate": 1.9985329845585117e-06, + "loss": 29.2578, + "step": 4937 + }, + { + "epoch": 0.046743215228935736, + "grad_norm": 245.13829040527344, + "learning_rate": 1.9985313240364357e-06, + "loss": 25.7344, + "step": 4938 + }, + { + "epoch": 0.04675268125065079, + "grad_norm": 344.7325439453125, + "learning_rate": 1.9985296625758056e-06, + "loss": 31.8281, + "step": 4939 + }, + { + "epoch": 0.04676214727236584, + "grad_norm": 399.8819885253906, + "learning_rate": 1.998528000176622e-06, + "loss": 26.625, + "step": 4940 + }, + { + "epoch": 0.046771613294080895, + "grad_norm": 406.46124267578125, + "learning_rate": 1.9985263368388873e-06, + "loss": 41.9531, + "step": 4941 + }, + { + "epoch": 0.04678107931579595, + "grad_norm": 356.9564514160156, + "learning_rate": 1.9985246725626032e-06, + "loss": 28.5781, + "step": 4942 + }, + { + "epoch": 0.046790545337511, + "grad_norm": 161.81138610839844, + "learning_rate": 1.9985230073477703e-06, + "loss": 25.7812, + "step": 4943 + }, + { + "epoch": 0.04680001135922606, + "grad_norm": 2.849299669265747, + "learning_rate": 1.998521341194391e-06, + "loss": 0.7217, + "step": 4944 + }, + { + "epoch": 0.046809477380941114, + "grad_norm": 252.18081665039062, + "learning_rate": 1.9985196741024664e-06, + "loss": 26.1562, + "step": 4945 + }, + { + "epoch": 0.04681894340265617, + "grad_norm": 199.55926513671875, + "learning_rate": 1.9985180060719985e-06, + "loss": 24.5078, + "step": 4946 + }, + { + "epoch": 0.04682840942437122, + "grad_norm": 345.8384094238281, + "learning_rate": 1.9985163371029885e-06, + "loss": 26.2812, + "step": 4947 + }, + { + "epoch": 0.04683787544608627, + "grad_norm": 318.3001708984375, + "learning_rate": 1.998514667195438e-06, + "loss": 33.3125, + "step": 4948 + }, + { + "epoch": 0.046847341467801326, + "grad_norm": 335.01953125, + "learning_rate": 1.998512996349349e-06, + "loss": 48.7031, + "step": 4949 + }, + { + "epoch": 0.04685680748951638, + "grad_norm": 475.6780700683594, + "learning_rate": 1.9985113245647224e-06, + "loss": 52.9375, + "step": 4950 + }, + { + "epoch": 0.04686627351123143, + "grad_norm": 443.92999267578125, + "learning_rate": 1.9985096518415602e-06, + "loss": 46.9688, + "step": 4951 + }, + { + "epoch": 0.046875739532946485, + "grad_norm": 853.9534301757812, + "learning_rate": 1.998507978179864e-06, + "loss": 69.3828, + "step": 4952 + }, + { + "epoch": 0.046885205554661545, + "grad_norm": 441.2580261230469, + "learning_rate": 1.9985063035796353e-06, + "loss": 24.1172, + "step": 4953 + }, + { + "epoch": 0.0468946715763766, + "grad_norm": 538.1654663085938, + "learning_rate": 1.9985046280408755e-06, + "loss": 33.1797, + "step": 4954 + }, + { + "epoch": 0.04690413759809165, + "grad_norm": 3.1755077838897705, + "learning_rate": 1.9985029515635863e-06, + "loss": 0.9346, + "step": 4955 + }, + { + "epoch": 0.046913603619806704, + "grad_norm": 195.17095947265625, + "learning_rate": 1.9985012741477693e-06, + "loss": 23.6094, + "step": 4956 + }, + { + "epoch": 0.04692306964152176, + "grad_norm": 535.0870361328125, + "learning_rate": 1.9984995957934263e-06, + "loss": 60.0938, + "step": 4957 + }, + { + "epoch": 0.04693253566323681, + "grad_norm": 227.46014404296875, + "learning_rate": 1.998497916500558e-06, + "loss": 23.6641, + "step": 4958 + }, + { + "epoch": 0.04694200168495186, + "grad_norm": 176.2168731689453, + "learning_rate": 1.998496236269167e-06, + "loss": 21.2031, + "step": 4959 + }, + { + "epoch": 0.046951467706666916, + "grad_norm": 309.41607666015625, + "learning_rate": 1.9984945550992547e-06, + "loss": 31.9844, + "step": 4960 + }, + { + "epoch": 0.046960933728381976, + "grad_norm": 2.762362241744995, + "learning_rate": 1.9984928729908223e-06, + "loss": 0.8433, + "step": 4961 + }, + { + "epoch": 0.04697039975009703, + "grad_norm": 519.140380859375, + "learning_rate": 1.998491189943872e-06, + "loss": 22.1914, + "step": 4962 + }, + { + "epoch": 0.04697986577181208, + "grad_norm": 600.2186279296875, + "learning_rate": 1.998489505958404e-06, + "loss": 55.8516, + "step": 4963 + }, + { + "epoch": 0.046989331793527135, + "grad_norm": 339.1544494628906, + "learning_rate": 1.9984878210344215e-06, + "loss": 30.2969, + "step": 4964 + }, + { + "epoch": 0.04699879781524219, + "grad_norm": 319.9688720703125, + "learning_rate": 1.998486135171925e-06, + "loss": 29.9688, + "step": 4965 + }, + { + "epoch": 0.04700826383695724, + "grad_norm": 218.00880432128906, + "learning_rate": 1.9984844483709165e-06, + "loss": 24.8438, + "step": 4966 + }, + { + "epoch": 0.047017729858672294, + "grad_norm": 234.2946319580078, + "learning_rate": 1.998482760631398e-06, + "loss": 20.7422, + "step": 4967 + }, + { + "epoch": 0.04702719588038735, + "grad_norm": 1051.26904296875, + "learning_rate": 1.9984810719533704e-06, + "loss": 44.9219, + "step": 4968 + }, + { + "epoch": 0.0470366619021024, + "grad_norm": 367.2789611816406, + "learning_rate": 1.9984793823368357e-06, + "loss": 12.3633, + "step": 4969 + }, + { + "epoch": 0.04704612792381746, + "grad_norm": 484.18829345703125, + "learning_rate": 1.998477691781795e-06, + "loss": 36.8438, + "step": 4970 + }, + { + "epoch": 0.04705559394553251, + "grad_norm": 188.30006408691406, + "learning_rate": 1.9984760002882506e-06, + "loss": 23.4219, + "step": 4971 + }, + { + "epoch": 0.047065059967247566, + "grad_norm": 404.5779113769531, + "learning_rate": 1.9984743078562034e-06, + "loss": 42.9531, + "step": 4972 + }, + { + "epoch": 0.04707452598896262, + "grad_norm": 447.71270751953125, + "learning_rate": 1.9984726144856556e-06, + "loss": 27.2188, + "step": 4973 + }, + { + "epoch": 0.04708399201067767, + "grad_norm": 340.4766845703125, + "learning_rate": 1.9984709201766085e-06, + "loss": 15.0, + "step": 4974 + }, + { + "epoch": 0.047093458032392725, + "grad_norm": 351.8328552246094, + "learning_rate": 1.998469224929063e-06, + "loss": 29.1172, + "step": 4975 + }, + { + "epoch": 0.04710292405410778, + "grad_norm": 3.1212470531463623, + "learning_rate": 1.9984675287430223e-06, + "loss": 0.9121, + "step": 4976 + }, + { + "epoch": 0.04711239007582283, + "grad_norm": 253.04283142089844, + "learning_rate": 1.9984658316184868e-06, + "loss": 23.7891, + "step": 4977 + }, + { + "epoch": 0.04712185609753789, + "grad_norm": 267.0601806640625, + "learning_rate": 1.998464133555458e-06, + "loss": 24.4688, + "step": 4978 + }, + { + "epoch": 0.047131322119252944, + "grad_norm": 312.1460876464844, + "learning_rate": 1.9984624345539383e-06, + "loss": 31.1562, + "step": 4979 + }, + { + "epoch": 0.047140788140968, + "grad_norm": 343.7242126464844, + "learning_rate": 1.9984607346139287e-06, + "loss": 24.125, + "step": 4980 + }, + { + "epoch": 0.04715025416268305, + "grad_norm": 501.09521484375, + "learning_rate": 1.9984590337354312e-06, + "loss": 35.2422, + "step": 4981 + }, + { + "epoch": 0.0471597201843981, + "grad_norm": 636.5592651367188, + "learning_rate": 1.9984573319184466e-06, + "loss": 42.5781, + "step": 4982 + }, + { + "epoch": 0.047169186206113156, + "grad_norm": 243.40309143066406, + "learning_rate": 1.9984556291629775e-06, + "loss": 22.6719, + "step": 4983 + }, + { + "epoch": 0.04717865222782821, + "grad_norm": 616.3126220703125, + "learning_rate": 1.998453925469025e-06, + "loss": 29.0156, + "step": 4984 + }, + { + "epoch": 0.04718811824954326, + "grad_norm": 279.968994140625, + "learning_rate": 1.9984522208365906e-06, + "loss": 29.2109, + "step": 4985 + }, + { + "epoch": 0.047197584271258315, + "grad_norm": 711.9099731445312, + "learning_rate": 1.9984505152656763e-06, + "loss": 36.4062, + "step": 4986 + }, + { + "epoch": 0.047207050292973375, + "grad_norm": 3.1869430541992188, + "learning_rate": 1.9984488087562833e-06, + "loss": 0.999, + "step": 4987 + }, + { + "epoch": 0.04721651631468843, + "grad_norm": 355.35211181640625, + "learning_rate": 1.9984471013084135e-06, + "loss": 27.8438, + "step": 4988 + }, + { + "epoch": 0.04722598233640348, + "grad_norm": 318.0705261230469, + "learning_rate": 1.9984453929220683e-06, + "loss": 28.5234, + "step": 4989 + }, + { + "epoch": 0.047235448358118534, + "grad_norm": 367.4602355957031, + "learning_rate": 1.9984436835972497e-06, + "loss": 28.1875, + "step": 4990 + }, + { + "epoch": 0.04724491437983359, + "grad_norm": 512.8790893554688, + "learning_rate": 1.9984419733339587e-06, + "loss": 20.7266, + "step": 4991 + }, + { + "epoch": 0.04725438040154864, + "grad_norm": 3.0510165691375732, + "learning_rate": 1.9984402621321972e-06, + "loss": 0.9121, + "step": 4992 + }, + { + "epoch": 0.04726384642326369, + "grad_norm": 377.33428955078125, + "learning_rate": 1.998438549991967e-06, + "loss": 24.1875, + "step": 4993 + }, + { + "epoch": 0.047273312444978746, + "grad_norm": 472.1523132324219, + "learning_rate": 1.998436836913269e-06, + "loss": 37.9062, + "step": 4994 + }, + { + "epoch": 0.0472827784666938, + "grad_norm": 620.4907836914062, + "learning_rate": 1.9984351228961057e-06, + "loss": 44.1875, + "step": 4995 + }, + { + "epoch": 0.04729224448840886, + "grad_norm": 374.70501708984375, + "learning_rate": 1.998433407940478e-06, + "loss": 46.7188, + "step": 4996 + }, + { + "epoch": 0.04730171051012391, + "grad_norm": 1979.1954345703125, + "learning_rate": 1.998431692046388e-06, + "loss": 46.4062, + "step": 4997 + }, + { + "epoch": 0.047311176531838965, + "grad_norm": 237.84727478027344, + "learning_rate": 1.9984299752138375e-06, + "loss": 22.1016, + "step": 4998 + }, + { + "epoch": 0.04732064255355402, + "grad_norm": 772.0655517578125, + "learning_rate": 1.9984282574428277e-06, + "loss": 84.1562, + "step": 4999 + }, + { + "epoch": 0.04733010857526907, + "grad_norm": 544.4893188476562, + "learning_rate": 1.99842653873336e-06, + "loss": 48.0469, + "step": 5000 + }, + { + "epoch": 0.047339574596984124, + "grad_norm": 417.3282165527344, + "learning_rate": 1.9984248190854365e-06, + "loss": 51.4844, + "step": 5001 + }, + { + "epoch": 0.04734904061869918, + "grad_norm": 472.72613525390625, + "learning_rate": 1.9984230984990586e-06, + "loss": 54.9609, + "step": 5002 + }, + { + "epoch": 0.04735850664041423, + "grad_norm": 179.93121337890625, + "learning_rate": 1.998421376974228e-06, + "loss": 22.7344, + "step": 5003 + }, + { + "epoch": 0.04736797266212929, + "grad_norm": 330.06158447265625, + "learning_rate": 1.998419654510946e-06, + "loss": 24.3984, + "step": 5004 + }, + { + "epoch": 0.04737743868384434, + "grad_norm": 847.3718872070312, + "learning_rate": 1.998417931109215e-06, + "loss": 46.1484, + "step": 5005 + }, + { + "epoch": 0.047386904705559396, + "grad_norm": 603.53076171875, + "learning_rate": 1.9984162067690355e-06, + "loss": 49.4844, + "step": 5006 + }, + { + "epoch": 0.04739637072727445, + "grad_norm": 778.5750732421875, + "learning_rate": 1.99841448149041e-06, + "loss": 61.2578, + "step": 5007 + }, + { + "epoch": 0.0474058367489895, + "grad_norm": 265.6759948730469, + "learning_rate": 1.9984127552733397e-06, + "loss": 28.3438, + "step": 5008 + }, + { + "epoch": 0.047415302770704555, + "grad_norm": 224.01828002929688, + "learning_rate": 1.9984110281178265e-06, + "loss": 23.6641, + "step": 5009 + }, + { + "epoch": 0.04742476879241961, + "grad_norm": 419.162109375, + "learning_rate": 1.998409300023872e-06, + "loss": 59.2812, + "step": 5010 + }, + { + "epoch": 0.04743423481413466, + "grad_norm": 591.21875, + "learning_rate": 1.9984075709914774e-06, + "loss": 30.9062, + "step": 5011 + }, + { + "epoch": 0.047443700835849714, + "grad_norm": 200.12530517578125, + "learning_rate": 1.998405841020645e-06, + "loss": 23.9531, + "step": 5012 + }, + { + "epoch": 0.047453166857564774, + "grad_norm": 406.57696533203125, + "learning_rate": 1.9984041101113756e-06, + "loss": 30.6719, + "step": 5013 + }, + { + "epoch": 0.04746263287927983, + "grad_norm": 232.7919921875, + "learning_rate": 1.9984023782636718e-06, + "loss": 26.0625, + "step": 5014 + }, + { + "epoch": 0.04747209890099488, + "grad_norm": 1282.079833984375, + "learning_rate": 1.998400645477534e-06, + "loss": 66.3906, + "step": 5015 + }, + { + "epoch": 0.04748156492270993, + "grad_norm": 690.8966064453125, + "learning_rate": 1.9983989117529655e-06, + "loss": 48.4531, + "step": 5016 + }, + { + "epoch": 0.047491030944424986, + "grad_norm": 2.6194405555725098, + "learning_rate": 1.9983971770899664e-06, + "loss": 0.8286, + "step": 5017 + }, + { + "epoch": 0.04750049696614004, + "grad_norm": 296.7707214355469, + "learning_rate": 1.998395441488539e-06, + "loss": 21.1094, + "step": 5018 + }, + { + "epoch": 0.04750996298785509, + "grad_norm": 326.5553894042969, + "learning_rate": 1.9983937049486848e-06, + "loss": 22.9219, + "step": 5019 + }, + { + "epoch": 0.047519429009570145, + "grad_norm": 1084.9110107421875, + "learning_rate": 1.9983919674704052e-06, + "loss": 90.7266, + "step": 5020 + }, + { + "epoch": 0.047528895031285205, + "grad_norm": 2.9455926418304443, + "learning_rate": 1.9983902290537026e-06, + "loss": 0.9492, + "step": 5021 + }, + { + "epoch": 0.04753836105300026, + "grad_norm": 2.646332263946533, + "learning_rate": 1.998388489698578e-06, + "loss": 0.7515, + "step": 5022 + }, + { + "epoch": 0.04754782707471531, + "grad_norm": 662.1357421875, + "learning_rate": 1.998386749405033e-06, + "loss": 58.8125, + "step": 5023 + }, + { + "epoch": 0.047557293096430364, + "grad_norm": 376.13946533203125, + "learning_rate": 1.9983850081730696e-06, + "loss": 23.7891, + "step": 5024 + }, + { + "epoch": 0.04756675911814542, + "grad_norm": 279.9977111816406, + "learning_rate": 1.9983832660026888e-06, + "loss": 28.125, + "step": 5025 + }, + { + "epoch": 0.04757622513986047, + "grad_norm": 346.9885559082031, + "learning_rate": 1.9983815228938932e-06, + "loss": 35.0625, + "step": 5026 + }, + { + "epoch": 0.04758569116157552, + "grad_norm": 383.2882995605469, + "learning_rate": 1.9983797788466835e-06, + "loss": 34.9375, + "step": 5027 + }, + { + "epoch": 0.047595157183290576, + "grad_norm": 190.07388305664062, + "learning_rate": 1.998378033861062e-06, + "loss": 23.6562, + "step": 5028 + }, + { + "epoch": 0.04760462320500563, + "grad_norm": 701.0388793945312, + "learning_rate": 1.99837628793703e-06, + "loss": 26.7734, + "step": 5029 + }, + { + "epoch": 0.04761408922672069, + "grad_norm": 218.05516052246094, + "learning_rate": 1.9983745410745894e-06, + "loss": 29.0, + "step": 5030 + }, + { + "epoch": 0.04762355524843574, + "grad_norm": 291.8755187988281, + "learning_rate": 1.9983727932737417e-06, + "loss": 27.7266, + "step": 5031 + }, + { + "epoch": 0.047633021270150795, + "grad_norm": 751.7029418945312, + "learning_rate": 1.9983710445344886e-06, + "loss": 55.2227, + "step": 5032 + }, + { + "epoch": 0.04764248729186585, + "grad_norm": 357.5185546875, + "learning_rate": 1.998369294856831e-06, + "loss": 52.7812, + "step": 5033 + }, + { + "epoch": 0.0476519533135809, + "grad_norm": 312.5255126953125, + "learning_rate": 1.998367544240772e-06, + "loss": 30.2188, + "step": 5034 + }, + { + "epoch": 0.047661419335295954, + "grad_norm": 235.6238250732422, + "learning_rate": 1.998365792686312e-06, + "loss": 26.4609, + "step": 5035 + }, + { + "epoch": 0.04767088535701101, + "grad_norm": 349.7823181152344, + "learning_rate": 1.9983640401934532e-06, + "loss": 38.4219, + "step": 5036 + }, + { + "epoch": 0.04768035137872606, + "grad_norm": 279.1169128417969, + "learning_rate": 1.9983622867621975e-06, + "loss": 22.2109, + "step": 5037 + }, + { + "epoch": 0.04768981740044111, + "grad_norm": 567.4696655273438, + "learning_rate": 1.9983605323925458e-06, + "loss": 26.6953, + "step": 5038 + }, + { + "epoch": 0.04769928342215617, + "grad_norm": 574.74169921875, + "learning_rate": 1.9983587770845005e-06, + "loss": 22.3047, + "step": 5039 + }, + { + "epoch": 0.047708749443871226, + "grad_norm": 579.5692138671875, + "learning_rate": 1.9983570208380626e-06, + "loss": 25.0547, + "step": 5040 + }, + { + "epoch": 0.04771821546558628, + "grad_norm": 203.68820190429688, + "learning_rate": 1.9983552636532337e-06, + "loss": 25.8516, + "step": 5041 + }, + { + "epoch": 0.04772768148730133, + "grad_norm": 636.0509643554688, + "learning_rate": 1.998353505530016e-06, + "loss": 29.9219, + "step": 5042 + }, + { + "epoch": 0.047737147509016385, + "grad_norm": 1583.306640625, + "learning_rate": 1.9983517464684116e-06, + "loss": 43.6406, + "step": 5043 + }, + { + "epoch": 0.04774661353073144, + "grad_norm": 383.0928649902344, + "learning_rate": 1.998349986468421e-06, + "loss": 27.5625, + "step": 5044 + }, + { + "epoch": 0.04775607955244649, + "grad_norm": 351.5042419433594, + "learning_rate": 1.9983482255300464e-06, + "loss": 52.875, + "step": 5045 + }, + { + "epoch": 0.047765545574161544, + "grad_norm": 1133.0323486328125, + "learning_rate": 1.9983464636532895e-06, + "loss": 56.4297, + "step": 5046 + }, + { + "epoch": 0.047775011595876604, + "grad_norm": 163.3997039794922, + "learning_rate": 1.9983447008381517e-06, + "loss": 22.4141, + "step": 5047 + }, + { + "epoch": 0.04778447761759166, + "grad_norm": 368.10888671875, + "learning_rate": 1.998342937084635e-06, + "loss": 25.8203, + "step": 5048 + }, + { + "epoch": 0.04779394363930671, + "grad_norm": 196.52105712890625, + "learning_rate": 1.9983411723927406e-06, + "loss": 23.0781, + "step": 5049 + }, + { + "epoch": 0.04780340966102176, + "grad_norm": 451.6856384277344, + "learning_rate": 1.9983394067624707e-06, + "loss": 27.0703, + "step": 5050 + }, + { + "epoch": 0.047812875682736816, + "grad_norm": 1076.276611328125, + "learning_rate": 1.9983376401938267e-06, + "loss": 48.5781, + "step": 5051 + }, + { + "epoch": 0.04782234170445187, + "grad_norm": 368.2643127441406, + "learning_rate": 1.9983358726868104e-06, + "loss": 32.3516, + "step": 5052 + }, + { + "epoch": 0.04783180772616692, + "grad_norm": 343.957275390625, + "learning_rate": 1.998334104241423e-06, + "loss": 39.9688, + "step": 5053 + }, + { + "epoch": 0.047841273747881975, + "grad_norm": 222.6621551513672, + "learning_rate": 1.9983323348576668e-06, + "loss": 22.6094, + "step": 5054 + }, + { + "epoch": 0.04785073976959703, + "grad_norm": 347.5470275878906, + "learning_rate": 1.998330564535543e-06, + "loss": 47.9844, + "step": 5055 + }, + { + "epoch": 0.04786020579131209, + "grad_norm": 223.65870666503906, + "learning_rate": 1.9983287932750533e-06, + "loss": 22.2812, + "step": 5056 + }, + { + "epoch": 0.04786967181302714, + "grad_norm": 716.675537109375, + "learning_rate": 1.9983270210761996e-06, + "loss": 45.875, + "step": 5057 + }, + { + "epoch": 0.047879137834742194, + "grad_norm": 210.7848358154297, + "learning_rate": 1.9983252479389832e-06, + "loss": 20.8594, + "step": 5058 + }, + { + "epoch": 0.04788860385645725, + "grad_norm": 2.929103374481201, + "learning_rate": 1.9983234738634064e-06, + "loss": 0.8599, + "step": 5059 + }, + { + "epoch": 0.0478980698781723, + "grad_norm": 389.3626708984375, + "learning_rate": 1.9983216988494704e-06, + "loss": 52.375, + "step": 5060 + }, + { + "epoch": 0.04790753589988735, + "grad_norm": 201.16757202148438, + "learning_rate": 1.998319922897177e-06, + "loss": 23.0938, + "step": 5061 + }, + { + "epoch": 0.047917001921602406, + "grad_norm": 459.6940002441406, + "learning_rate": 1.9983181460065276e-06, + "loss": 17.5391, + "step": 5062 + }, + { + "epoch": 0.04792646794331746, + "grad_norm": 460.77978515625, + "learning_rate": 1.998316368177524e-06, + "loss": 28.5078, + "step": 5063 + }, + { + "epoch": 0.04793593396503252, + "grad_norm": 854.8928833007812, + "learning_rate": 1.9983145894101684e-06, + "loss": 67.9688, + "step": 5064 + }, + { + "epoch": 0.04794539998674757, + "grad_norm": 558.1221313476562, + "learning_rate": 1.9983128097044614e-06, + "loss": 22.4844, + "step": 5065 + }, + { + "epoch": 0.047954866008462625, + "grad_norm": 466.6468505859375, + "learning_rate": 1.998311029060406e-06, + "loss": 35.3594, + "step": 5066 + }, + { + "epoch": 0.04796433203017768, + "grad_norm": 223.0441436767578, + "learning_rate": 1.998309247478003e-06, + "loss": 25.2031, + "step": 5067 + }, + { + "epoch": 0.04797379805189273, + "grad_norm": 185.67276000976562, + "learning_rate": 1.998307464957254e-06, + "loss": 19.6328, + "step": 5068 + }, + { + "epoch": 0.047983264073607784, + "grad_norm": 393.2005920410156, + "learning_rate": 1.9983056814981606e-06, + "loss": 51.625, + "step": 5069 + }, + { + "epoch": 0.04799273009532284, + "grad_norm": 290.8404541015625, + "learning_rate": 1.9983038971007254e-06, + "loss": 36.25, + "step": 5070 + }, + { + "epoch": 0.04800219611703789, + "grad_norm": 333.7303466796875, + "learning_rate": 1.9983021117649497e-06, + "loss": 23.8008, + "step": 5071 + }, + { + "epoch": 0.04801166213875294, + "grad_norm": 249.88063049316406, + "learning_rate": 1.9983003254908342e-06, + "loss": 24.3672, + "step": 5072 + }, + { + "epoch": 0.048021128160468, + "grad_norm": 440.74273681640625, + "learning_rate": 1.9982985382783816e-06, + "loss": 27.7969, + "step": 5073 + }, + { + "epoch": 0.048030594182183056, + "grad_norm": 578.466064453125, + "learning_rate": 1.9982967501275935e-06, + "loss": 51.9062, + "step": 5074 + }, + { + "epoch": 0.04804006020389811, + "grad_norm": 2.999251365661621, + "learning_rate": 1.998294961038471e-06, + "loss": 0.8257, + "step": 5075 + }, + { + "epoch": 0.04804952622561316, + "grad_norm": 443.05322265625, + "learning_rate": 1.998293171011017e-06, + "loss": 22.0781, + "step": 5076 + }, + { + "epoch": 0.048058992247328215, + "grad_norm": 1261.7855224609375, + "learning_rate": 1.9982913800452316e-06, + "loss": 92.4531, + "step": 5077 + }, + { + "epoch": 0.04806845826904327, + "grad_norm": 381.1056823730469, + "learning_rate": 1.9982895881411177e-06, + "loss": 26.5938, + "step": 5078 + }, + { + "epoch": 0.04807792429075832, + "grad_norm": 446.94903564453125, + "learning_rate": 1.9982877952986764e-06, + "loss": 33.3281, + "step": 5079 + }, + { + "epoch": 0.048087390312473374, + "grad_norm": 404.22857666015625, + "learning_rate": 1.9982860015179093e-06, + "loss": 42.0391, + "step": 5080 + }, + { + "epoch": 0.04809685633418843, + "grad_norm": 490.7121887207031, + "learning_rate": 1.9982842067988182e-06, + "loss": 38.4219, + "step": 5081 + }, + { + "epoch": 0.04810632235590349, + "grad_norm": 239.42745971679688, + "learning_rate": 1.998282411141405e-06, + "loss": 19.25, + "step": 5082 + }, + { + "epoch": 0.04811578837761854, + "grad_norm": 608.2982177734375, + "learning_rate": 1.998280614545672e-06, + "loss": 19.9219, + "step": 5083 + }, + { + "epoch": 0.04812525439933359, + "grad_norm": 334.5447692871094, + "learning_rate": 1.9982788170116197e-06, + "loss": 32.2031, + "step": 5084 + }, + { + "epoch": 0.048134720421048646, + "grad_norm": 730.7213745117188, + "learning_rate": 1.99827701853925e-06, + "loss": 48.4375, + "step": 5085 + }, + { + "epoch": 0.0481441864427637, + "grad_norm": 196.80300903320312, + "learning_rate": 1.998275219128565e-06, + "loss": 21.9297, + "step": 5086 + }, + { + "epoch": 0.04815365246447875, + "grad_norm": 699.2928466796875, + "learning_rate": 1.9982734187795663e-06, + "loss": 30.1484, + "step": 5087 + }, + { + "epoch": 0.048163118486193805, + "grad_norm": 779.814208984375, + "learning_rate": 1.998271617492255e-06, + "loss": 65.125, + "step": 5088 + }, + { + "epoch": 0.04817258450790886, + "grad_norm": 209.43508911132812, + "learning_rate": 1.998269815266634e-06, + "loss": 22.5938, + "step": 5089 + }, + { + "epoch": 0.04818205052962392, + "grad_norm": 478.05975341796875, + "learning_rate": 1.998268012102704e-06, + "loss": 28.6562, + "step": 5090 + }, + { + "epoch": 0.04819151655133897, + "grad_norm": 281.19329833984375, + "learning_rate": 1.998266208000467e-06, + "loss": 26.7656, + "step": 5091 + }, + { + "epoch": 0.048200982573054024, + "grad_norm": 583.8187866210938, + "learning_rate": 1.998264402959925e-06, + "loss": 53.1094, + "step": 5092 + }, + { + "epoch": 0.04821044859476908, + "grad_norm": 333.410400390625, + "learning_rate": 1.998262596981079e-06, + "loss": 36.3672, + "step": 5093 + }, + { + "epoch": 0.04821991461648413, + "grad_norm": 198.68142700195312, + "learning_rate": 1.9982607900639314e-06, + "loss": 23.9922, + "step": 5094 + }, + { + "epoch": 0.04822938063819918, + "grad_norm": 814.661376953125, + "learning_rate": 1.9982589822084837e-06, + "loss": 51.3125, + "step": 5095 + }, + { + "epoch": 0.048238846659914236, + "grad_norm": 258.6363525390625, + "learning_rate": 1.9982571734147374e-06, + "loss": 26.9297, + "step": 5096 + }, + { + "epoch": 0.04824831268162929, + "grad_norm": 956.1966552734375, + "learning_rate": 1.998255363682694e-06, + "loss": 49.9766, + "step": 5097 + }, + { + "epoch": 0.04825777870334434, + "grad_norm": 336.2134094238281, + "learning_rate": 1.9982535530123556e-06, + "loss": 29.7656, + "step": 5098 + }, + { + "epoch": 0.0482672447250594, + "grad_norm": 227.8562469482422, + "learning_rate": 1.998251741403724e-06, + "loss": 26.625, + "step": 5099 + }, + { + "epoch": 0.048276710746774455, + "grad_norm": 268.8437805175781, + "learning_rate": 1.9982499288568006e-06, + "loss": 37.3594, + "step": 5100 + }, + { + "epoch": 0.04828617676848951, + "grad_norm": 208.97576904296875, + "learning_rate": 1.998248115371587e-06, + "loss": 19.7188, + "step": 5101 + }, + { + "epoch": 0.04829564279020456, + "grad_norm": 4144.763671875, + "learning_rate": 1.9982463009480853e-06, + "loss": 27.875, + "step": 5102 + }, + { + "epoch": 0.048305108811919614, + "grad_norm": 407.7375183105469, + "learning_rate": 1.9982444855862972e-06, + "loss": 33.8125, + "step": 5103 + }, + { + "epoch": 0.04831457483363467, + "grad_norm": 601.0133056640625, + "learning_rate": 1.998242669286224e-06, + "loss": 65.6875, + "step": 5104 + }, + { + "epoch": 0.04832404085534972, + "grad_norm": 415.3020324707031, + "learning_rate": 1.9982408520478677e-06, + "loss": 67.7812, + "step": 5105 + }, + { + "epoch": 0.04833350687706477, + "grad_norm": 231.29164123535156, + "learning_rate": 1.99823903387123e-06, + "loss": 24.6562, + "step": 5106 + }, + { + "epoch": 0.04834297289877983, + "grad_norm": 204.03456115722656, + "learning_rate": 1.9982372147563126e-06, + "loss": 19.6562, + "step": 5107 + }, + { + "epoch": 0.048352438920494886, + "grad_norm": 3.257467746734619, + "learning_rate": 1.998235394703117e-06, + "loss": 0.8982, + "step": 5108 + }, + { + "epoch": 0.04836190494220994, + "grad_norm": 646.3291015625, + "learning_rate": 1.9982335737116452e-06, + "loss": 59.2031, + "step": 5109 + }, + { + "epoch": 0.04837137096392499, + "grad_norm": 3.1203408241271973, + "learning_rate": 1.9982317517818985e-06, + "loss": 1.021, + "step": 5110 + }, + { + "epoch": 0.048380836985640045, + "grad_norm": 2.680015802383423, + "learning_rate": 1.998229928913879e-06, + "loss": 0.9419, + "step": 5111 + }, + { + "epoch": 0.0483903030073551, + "grad_norm": 251.99136352539062, + "learning_rate": 1.9982281051075884e-06, + "loss": 27.5, + "step": 5112 + }, + { + "epoch": 0.04839976902907015, + "grad_norm": 334.4241027832031, + "learning_rate": 1.9982262803630284e-06, + "loss": 22.8359, + "step": 5113 + }, + { + "epoch": 0.048409235050785204, + "grad_norm": 453.2379455566406, + "learning_rate": 1.9982244546802007e-06, + "loss": 36.0781, + "step": 5114 + }, + { + "epoch": 0.04841870107250026, + "grad_norm": 207.59730529785156, + "learning_rate": 1.9982226280591065e-06, + "loss": 25.1797, + "step": 5115 + }, + { + "epoch": 0.04842816709421532, + "grad_norm": 1027.565185546875, + "learning_rate": 1.9982208004997484e-06, + "loss": 68.1875, + "step": 5116 + }, + { + "epoch": 0.04843763311593037, + "grad_norm": 310.982666015625, + "learning_rate": 1.9982189720021277e-06, + "loss": 27.8867, + "step": 5117 + }, + { + "epoch": 0.04844709913764542, + "grad_norm": 213.55662536621094, + "learning_rate": 1.9982171425662457e-06, + "loss": 24.6719, + "step": 5118 + }, + { + "epoch": 0.048456565159360476, + "grad_norm": 202.9219970703125, + "learning_rate": 1.9982153121921052e-06, + "loss": 25.9688, + "step": 5119 + }, + { + "epoch": 0.04846603118107553, + "grad_norm": 365.7906799316406, + "learning_rate": 1.9982134808797064e-06, + "loss": 28.875, + "step": 5120 + }, + { + "epoch": 0.04847549720279058, + "grad_norm": 336.6467590332031, + "learning_rate": 1.9982116486290525e-06, + "loss": 27.9844, + "step": 5121 + }, + { + "epoch": 0.048484963224505635, + "grad_norm": 492.2422180175781, + "learning_rate": 1.9982098154401445e-06, + "loss": 25.3438, + "step": 5122 + }, + { + "epoch": 0.04849442924622069, + "grad_norm": 266.2247619628906, + "learning_rate": 1.998207981312984e-06, + "loss": 24.6406, + "step": 5123 + }, + { + "epoch": 0.04850389526793574, + "grad_norm": 519.4354248046875, + "learning_rate": 1.9982061462475727e-06, + "loss": 38.3438, + "step": 5124 + }, + { + "epoch": 0.0485133612896508, + "grad_norm": 2.8549959659576416, + "learning_rate": 1.998204310243913e-06, + "loss": 0.8242, + "step": 5125 + }, + { + "epoch": 0.048522827311365854, + "grad_norm": 678.2728881835938, + "learning_rate": 1.998202473302006e-06, + "loss": 35.8281, + "step": 5126 + }, + { + "epoch": 0.04853229333308091, + "grad_norm": 223.77845764160156, + "learning_rate": 1.998200635421854e-06, + "loss": 24.6641, + "step": 5127 + }, + { + "epoch": 0.04854175935479596, + "grad_norm": 405.7039794921875, + "learning_rate": 1.9981987966034577e-06, + "loss": 58.7188, + "step": 5128 + }, + { + "epoch": 0.04855122537651101, + "grad_norm": 378.7366027832031, + "learning_rate": 1.99819695684682e-06, + "loss": 30.6484, + "step": 5129 + }, + { + "epoch": 0.048560691398226066, + "grad_norm": 806.5079956054688, + "learning_rate": 1.998195116151942e-06, + "loss": 25.9688, + "step": 5130 + }, + { + "epoch": 0.04857015741994112, + "grad_norm": 444.58526611328125, + "learning_rate": 1.998193274518825e-06, + "loss": 25.9688, + "step": 5131 + }, + { + "epoch": 0.04857962344165617, + "grad_norm": 420.381103515625, + "learning_rate": 1.9981914319474717e-06, + "loss": 47.8203, + "step": 5132 + }, + { + "epoch": 0.04858908946337123, + "grad_norm": 559.9047241210938, + "learning_rate": 1.9981895884378835e-06, + "loss": 54.0, + "step": 5133 + }, + { + "epoch": 0.048598555485086285, + "grad_norm": 250.37710571289062, + "learning_rate": 1.998187743990062e-06, + "loss": 18.1289, + "step": 5134 + }, + { + "epoch": 0.04860802150680134, + "grad_norm": 375.27105712890625, + "learning_rate": 1.9981858986040087e-06, + "loss": 22.9844, + "step": 5135 + }, + { + "epoch": 0.04861748752851639, + "grad_norm": 424.8392333984375, + "learning_rate": 1.998184052279726e-06, + "loss": 41.4688, + "step": 5136 + }, + { + "epoch": 0.048626953550231444, + "grad_norm": 810.2687377929688, + "learning_rate": 1.9981822050172146e-06, + "loss": 50.8203, + "step": 5137 + }, + { + "epoch": 0.0486364195719465, + "grad_norm": 832.7623291015625, + "learning_rate": 1.9981803568164776e-06, + "loss": 48.4609, + "step": 5138 + }, + { + "epoch": 0.04864588559366155, + "grad_norm": 3.305624008178711, + "learning_rate": 1.9981785076775152e-06, + "loss": 0.9136, + "step": 5139 + }, + { + "epoch": 0.0486553516153766, + "grad_norm": 332.3863525390625, + "learning_rate": 1.9981766576003305e-06, + "loss": 25.6406, + "step": 5140 + }, + { + "epoch": 0.048664817637091656, + "grad_norm": 1004.7781372070312, + "learning_rate": 1.9981748065849246e-06, + "loss": 33.3125, + "step": 5141 + }, + { + "epoch": 0.048674283658806716, + "grad_norm": 577.759033203125, + "learning_rate": 1.9981729546312993e-06, + "loss": 35.7656, + "step": 5142 + }, + { + "epoch": 0.04868374968052177, + "grad_norm": 245.04685974121094, + "learning_rate": 1.9981711017394567e-06, + "loss": 27.2969, + "step": 5143 + }, + { + "epoch": 0.04869321570223682, + "grad_norm": 765.3406372070312, + "learning_rate": 1.9981692479093976e-06, + "loss": 67.8438, + "step": 5144 + }, + { + "epoch": 0.048702681723951875, + "grad_norm": 481.549072265625, + "learning_rate": 1.9981673931411246e-06, + "loss": 22.957, + "step": 5145 + }, + { + "epoch": 0.04871214774566693, + "grad_norm": 428.3376770019531, + "learning_rate": 1.998165537434639e-06, + "loss": 28.8906, + "step": 5146 + }, + { + "epoch": 0.04872161376738198, + "grad_norm": 515.824951171875, + "learning_rate": 1.998163680789943e-06, + "loss": 25.4609, + "step": 5147 + }, + { + "epoch": 0.048731079789097034, + "grad_norm": 447.50732421875, + "learning_rate": 1.9981618232070376e-06, + "loss": 50.4062, + "step": 5148 + }, + { + "epoch": 0.04874054581081209, + "grad_norm": 357.427001953125, + "learning_rate": 1.9981599646859254e-06, + "loss": 25.8516, + "step": 5149 + }, + { + "epoch": 0.04875001183252715, + "grad_norm": 531.6966552734375, + "learning_rate": 1.9981581052266077e-06, + "loss": 43.0, + "step": 5150 + }, + { + "epoch": 0.0487594778542422, + "grad_norm": 335.1936950683594, + "learning_rate": 1.9981562448290863e-06, + "loss": 34.4531, + "step": 5151 + }, + { + "epoch": 0.04876894387595725, + "grad_norm": 590.9857788085938, + "learning_rate": 1.9981543834933632e-06, + "loss": 31.9219, + "step": 5152 + }, + { + "epoch": 0.048778409897672306, + "grad_norm": 385.0715026855469, + "learning_rate": 1.99815252121944e-06, + "loss": 26.5703, + "step": 5153 + }, + { + "epoch": 0.04878787591938736, + "grad_norm": 573.6887817382812, + "learning_rate": 1.9981506580073177e-06, + "loss": 49.7656, + "step": 5154 + }, + { + "epoch": 0.04879734194110241, + "grad_norm": 427.3918762207031, + "learning_rate": 1.9981487938569994e-06, + "loss": 29.9062, + "step": 5155 + }, + { + "epoch": 0.048806807962817465, + "grad_norm": 458.49200439453125, + "learning_rate": 1.998146928768486e-06, + "loss": 42.1406, + "step": 5156 + }, + { + "epoch": 0.04881627398453252, + "grad_norm": 278.4169921875, + "learning_rate": 1.998145062741779e-06, + "loss": 30.0469, + "step": 5157 + }, + { + "epoch": 0.04882574000624757, + "grad_norm": 357.3337707519531, + "learning_rate": 1.998143195776881e-06, + "loss": 42.8281, + "step": 5158 + }, + { + "epoch": 0.04883520602796263, + "grad_norm": 211.1962127685547, + "learning_rate": 1.998141327873793e-06, + "loss": 21.7031, + "step": 5159 + }, + { + "epoch": 0.048844672049677684, + "grad_norm": 476.93682861328125, + "learning_rate": 1.9981394590325172e-06, + "loss": 38.8281, + "step": 5160 + }, + { + "epoch": 0.04885413807139274, + "grad_norm": 808.9371337890625, + "learning_rate": 1.9981375892530556e-06, + "loss": 36.4297, + "step": 5161 + }, + { + "epoch": 0.04886360409310779, + "grad_norm": 322.5860900878906, + "learning_rate": 1.9981357185354093e-06, + "loss": 19.1406, + "step": 5162 + }, + { + "epoch": 0.04887307011482284, + "grad_norm": 517.3023681640625, + "learning_rate": 1.9981338468795803e-06, + "loss": 28.5859, + "step": 5163 + }, + { + "epoch": 0.048882536136537896, + "grad_norm": 1200.0032958984375, + "learning_rate": 1.9981319742855705e-06, + "loss": 48.6406, + "step": 5164 + }, + { + "epoch": 0.04889200215825295, + "grad_norm": 1156.256591796875, + "learning_rate": 1.998130100753382e-06, + "loss": 64.7734, + "step": 5165 + }, + { + "epoch": 0.048901468179968, + "grad_norm": 347.3496398925781, + "learning_rate": 1.9981282262830154e-06, + "loss": 37.4531, + "step": 5166 + }, + { + "epoch": 0.048910934201683055, + "grad_norm": 853.1897583007812, + "learning_rate": 1.9981263508744735e-06, + "loss": 28.125, + "step": 5167 + }, + { + "epoch": 0.048920400223398115, + "grad_norm": 425.0667724609375, + "learning_rate": 1.998124474527758e-06, + "loss": 39.2891, + "step": 5168 + }, + { + "epoch": 0.04892986624511317, + "grad_norm": 529.8400268554688, + "learning_rate": 1.9981225972428703e-06, + "loss": 47.3906, + "step": 5169 + }, + { + "epoch": 0.04893933226682822, + "grad_norm": 526.0480346679688, + "learning_rate": 1.998120719019812e-06, + "loss": 30.625, + "step": 5170 + }, + { + "epoch": 0.048948798288543274, + "grad_norm": 219.2708740234375, + "learning_rate": 1.998118839858585e-06, + "loss": 28.0469, + "step": 5171 + }, + { + "epoch": 0.04895826431025833, + "grad_norm": 289.1834716796875, + "learning_rate": 1.998116959759192e-06, + "loss": 26.8438, + "step": 5172 + }, + { + "epoch": 0.04896773033197338, + "grad_norm": 3.072312831878662, + "learning_rate": 1.9981150787216334e-06, + "loss": 0.7971, + "step": 5173 + }, + { + "epoch": 0.04897719635368843, + "grad_norm": 691.4142456054688, + "learning_rate": 1.9981131967459116e-06, + "loss": 58.3906, + "step": 5174 + }, + { + "epoch": 0.048986662375403486, + "grad_norm": 2.5461976528167725, + "learning_rate": 1.9981113138320284e-06, + "loss": 0.717, + "step": 5175 + }, + { + "epoch": 0.048996128397118546, + "grad_norm": 5285.74267578125, + "learning_rate": 1.9981094299799856e-06, + "loss": 58.4688, + "step": 5176 + }, + { + "epoch": 0.0490055944188336, + "grad_norm": 721.47314453125, + "learning_rate": 1.998107545189785e-06, + "loss": 55.4062, + "step": 5177 + }, + { + "epoch": 0.04901506044054865, + "grad_norm": 779.8312377929688, + "learning_rate": 1.998105659461428e-06, + "loss": 22.3125, + "step": 5178 + }, + { + "epoch": 0.049024526462263705, + "grad_norm": 596.8756103515625, + "learning_rate": 1.9981037727949164e-06, + "loss": 59.625, + "step": 5179 + }, + { + "epoch": 0.04903399248397876, + "grad_norm": 321.33233642578125, + "learning_rate": 1.998101885190253e-06, + "loss": 24.6875, + "step": 5180 + }, + { + "epoch": 0.04904345850569381, + "grad_norm": 422.54559326171875, + "learning_rate": 1.998099996647438e-06, + "loss": 38.5625, + "step": 5181 + }, + { + "epoch": 0.049052924527408864, + "grad_norm": 644.8276977539062, + "learning_rate": 1.998098107166474e-06, + "loss": 29.6836, + "step": 5182 + }, + { + "epoch": 0.04906239054912392, + "grad_norm": 236.41900634765625, + "learning_rate": 1.998096216747363e-06, + "loss": 25.1406, + "step": 5183 + }, + { + "epoch": 0.04907185657083897, + "grad_norm": 1430.39306640625, + "learning_rate": 1.9980943253901066e-06, + "loss": 53.4062, + "step": 5184 + }, + { + "epoch": 0.04908132259255403, + "grad_norm": 3.0889763832092285, + "learning_rate": 1.998092433094706e-06, + "loss": 0.9243, + "step": 5185 + }, + { + "epoch": 0.04909078861426908, + "grad_norm": 475.5988464355469, + "learning_rate": 1.9980905398611636e-06, + "loss": 47.4531, + "step": 5186 + }, + { + "epoch": 0.049100254635984136, + "grad_norm": 258.5972900390625, + "learning_rate": 1.998088645689481e-06, + "loss": 23.7891, + "step": 5187 + }, + { + "epoch": 0.04910972065769919, + "grad_norm": 514.62939453125, + "learning_rate": 1.9980867505796598e-06, + "loss": 31.5, + "step": 5188 + }, + { + "epoch": 0.04911918667941424, + "grad_norm": 380.1419372558594, + "learning_rate": 1.998084854531703e-06, + "loss": 20.8828, + "step": 5189 + }, + { + "epoch": 0.049128652701129295, + "grad_norm": 215.22946166992188, + "learning_rate": 1.99808295754561e-06, + "loss": 21.2812, + "step": 5190 + }, + { + "epoch": 0.04913811872284435, + "grad_norm": 517.4558715820312, + "learning_rate": 1.9980810596213846e-06, + "loss": 54.3516, + "step": 5191 + }, + { + "epoch": 0.0491475847445594, + "grad_norm": 633.379150390625, + "learning_rate": 1.998079160759028e-06, + "loss": 31.9609, + "step": 5192 + }, + { + "epoch": 0.04915705076627446, + "grad_norm": 354.2154235839844, + "learning_rate": 1.9980772609585415e-06, + "loss": 23.8906, + "step": 5193 + }, + { + "epoch": 0.049166516787989514, + "grad_norm": 332.4269714355469, + "learning_rate": 1.9980753602199283e-06, + "loss": 21.0391, + "step": 5194 + }, + { + "epoch": 0.04917598280970457, + "grad_norm": 525.1338500976562, + "learning_rate": 1.9980734585431883e-06, + "loss": 51.1172, + "step": 5195 + }, + { + "epoch": 0.04918544883141962, + "grad_norm": 418.5876159667969, + "learning_rate": 1.998071555928324e-06, + "loss": 38.875, + "step": 5196 + }, + { + "epoch": 0.04919491485313467, + "grad_norm": 333.3221435546875, + "learning_rate": 1.998069652375338e-06, + "loss": 25.3125, + "step": 5197 + }, + { + "epoch": 0.049204380874849726, + "grad_norm": 492.2866516113281, + "learning_rate": 1.9980677478842312e-06, + "loss": 36.7969, + "step": 5198 + }, + { + "epoch": 0.04921384689656478, + "grad_norm": 1126.69384765625, + "learning_rate": 1.9980658424550057e-06, + "loss": 41.9219, + "step": 5199 + }, + { + "epoch": 0.04922331291827983, + "grad_norm": 265.4482727050781, + "learning_rate": 1.9980639360876633e-06, + "loss": 31.4531, + "step": 5200 + }, + { + "epoch": 0.049232778939994885, + "grad_norm": 241.1100616455078, + "learning_rate": 1.9980620287822056e-06, + "loss": 24.7578, + "step": 5201 + }, + { + "epoch": 0.049242244961709945, + "grad_norm": 680.2837524414062, + "learning_rate": 1.9980601205386343e-06, + "loss": 22.7188, + "step": 5202 + }, + { + "epoch": 0.049251710983425, + "grad_norm": 1710.6566162109375, + "learning_rate": 1.9980582113569517e-06, + "loss": 45.1094, + "step": 5203 + }, + { + "epoch": 0.04926117700514005, + "grad_norm": 261.5258483886719, + "learning_rate": 1.9980563012371592e-06, + "loss": 26.9531, + "step": 5204 + }, + { + "epoch": 0.049270643026855104, + "grad_norm": 409.3571472167969, + "learning_rate": 1.998054390179259e-06, + "loss": 30.4609, + "step": 5205 + }, + { + "epoch": 0.04928010904857016, + "grad_norm": 993.267822265625, + "learning_rate": 1.9980524781832524e-06, + "loss": 56.0312, + "step": 5206 + }, + { + "epoch": 0.04928957507028521, + "grad_norm": 1005.1253051757812, + "learning_rate": 1.9980505652491413e-06, + "loss": 42.7969, + "step": 5207 + }, + { + "epoch": 0.04929904109200026, + "grad_norm": 587.9796142578125, + "learning_rate": 1.9980486513769277e-06, + "loss": 51.0547, + "step": 5208 + }, + { + "epoch": 0.049308507113715316, + "grad_norm": 554.8817749023438, + "learning_rate": 1.9980467365666133e-06, + "loss": 52.5156, + "step": 5209 + }, + { + "epoch": 0.04931797313543037, + "grad_norm": 392.9414367675781, + "learning_rate": 1.9980448208182e-06, + "loss": 33.2891, + "step": 5210 + }, + { + "epoch": 0.04932743915714543, + "grad_norm": 226.76649475097656, + "learning_rate": 1.9980429041316894e-06, + "loss": 20.5234, + "step": 5211 + }, + { + "epoch": 0.04933690517886048, + "grad_norm": 989.1110229492188, + "learning_rate": 1.998040986507083e-06, + "loss": 36.8906, + "step": 5212 + }, + { + "epoch": 0.049346371200575535, + "grad_norm": 609.2036743164062, + "learning_rate": 1.998039067944384e-06, + "loss": 51.8125, + "step": 5213 + }, + { + "epoch": 0.04935583722229059, + "grad_norm": 2.7856369018554688, + "learning_rate": 1.9980371484435923e-06, + "loss": 0.8857, + "step": 5214 + }, + { + "epoch": 0.04936530324400564, + "grad_norm": 390.9459228515625, + "learning_rate": 1.998035228004711e-06, + "loss": 32.4531, + "step": 5215 + }, + { + "epoch": 0.049374769265720694, + "grad_norm": 2.906804084777832, + "learning_rate": 1.998033306627741e-06, + "loss": 0.7136, + "step": 5216 + }, + { + "epoch": 0.04938423528743575, + "grad_norm": 598.6974487304688, + "learning_rate": 1.998031384312685e-06, + "loss": 34.375, + "step": 5217 + }, + { + "epoch": 0.0493937013091508, + "grad_norm": 691.2288208007812, + "learning_rate": 1.9980294610595445e-06, + "loss": 56.3438, + "step": 5218 + }, + { + "epoch": 0.04940316733086586, + "grad_norm": 579.7899780273438, + "learning_rate": 1.998027536868321e-06, + "loss": 20.2812, + "step": 5219 + }, + { + "epoch": 0.04941263335258091, + "grad_norm": 375.57330322265625, + "learning_rate": 1.9980256117390168e-06, + "loss": 29.1562, + "step": 5220 + }, + { + "epoch": 0.049422099374295966, + "grad_norm": 232.38436889648438, + "learning_rate": 1.9980236856716336e-06, + "loss": 24.3047, + "step": 5221 + }, + { + "epoch": 0.04943156539601102, + "grad_norm": 3.0018670558929443, + "learning_rate": 1.9980217586661725e-06, + "loss": 1.02, + "step": 5222 + }, + { + "epoch": 0.04944103141772607, + "grad_norm": 323.08966064453125, + "learning_rate": 1.9980198307226363e-06, + "loss": 26.5156, + "step": 5223 + }, + { + "epoch": 0.049450497439441125, + "grad_norm": 634.68994140625, + "learning_rate": 1.998017901841026e-06, + "loss": 58.2031, + "step": 5224 + }, + { + "epoch": 0.04945996346115618, + "grad_norm": 1589.779052734375, + "learning_rate": 1.9980159720213445e-06, + "loss": 48.8516, + "step": 5225 + }, + { + "epoch": 0.04946942948287123, + "grad_norm": 580.04248046875, + "learning_rate": 1.9980140412635926e-06, + "loss": 45.0469, + "step": 5226 + }, + { + "epoch": 0.049478895504586284, + "grad_norm": 552.8474731445312, + "learning_rate": 1.998012109567772e-06, + "loss": 38.8906, + "step": 5227 + }, + { + "epoch": 0.049488361526301344, + "grad_norm": 493.69024658203125, + "learning_rate": 1.998010176933885e-06, + "loss": 30.8281, + "step": 5228 + }, + { + "epoch": 0.0494978275480164, + "grad_norm": 314.4693603515625, + "learning_rate": 1.998008243361934e-06, + "loss": 33.3906, + "step": 5229 + }, + { + "epoch": 0.04950729356973145, + "grad_norm": 642.1728515625, + "learning_rate": 1.9980063088519197e-06, + "loss": 24.1055, + "step": 5230 + }, + { + "epoch": 0.0495167595914465, + "grad_norm": 434.8253173828125, + "learning_rate": 1.9980043734038444e-06, + "loss": 35.6719, + "step": 5231 + }, + { + "epoch": 0.049526225613161556, + "grad_norm": 471.2936096191406, + "learning_rate": 1.9980024370177098e-06, + "loss": 36.125, + "step": 5232 + }, + { + "epoch": 0.04953569163487661, + "grad_norm": 3.1551895141601562, + "learning_rate": 1.9980004996935184e-06, + "loss": 0.8281, + "step": 5233 + }, + { + "epoch": 0.04954515765659166, + "grad_norm": 488.4020080566406, + "learning_rate": 1.997998561431271e-06, + "loss": 32.4219, + "step": 5234 + }, + { + "epoch": 0.049554623678306715, + "grad_norm": 326.4213562011719, + "learning_rate": 1.99799662223097e-06, + "loss": 24.3906, + "step": 5235 + }, + { + "epoch": 0.04956408970002177, + "grad_norm": 2.9514169692993164, + "learning_rate": 1.997994682092617e-06, + "loss": 0.8796, + "step": 5236 + }, + { + "epoch": 0.04957355572173683, + "grad_norm": 916.8401489257812, + "learning_rate": 1.997992741016214e-06, + "loss": 40.2188, + "step": 5237 + }, + { + "epoch": 0.04958302174345188, + "grad_norm": 151.92575073242188, + "learning_rate": 1.9979907990017626e-06, + "loss": 20.7266, + "step": 5238 + }, + { + "epoch": 0.049592487765166934, + "grad_norm": 245.1997528076172, + "learning_rate": 1.9979888560492647e-06, + "loss": 28.4688, + "step": 5239 + }, + { + "epoch": 0.04960195378688199, + "grad_norm": 743.9102172851562, + "learning_rate": 1.9979869121587227e-06, + "loss": 58.9219, + "step": 5240 + }, + { + "epoch": 0.04961141980859704, + "grad_norm": 245.05882263183594, + "learning_rate": 1.9979849673301374e-06, + "loss": 21.1562, + "step": 5241 + }, + { + "epoch": 0.04962088583031209, + "grad_norm": 367.3678894042969, + "learning_rate": 1.9979830215635114e-06, + "loss": 25.0469, + "step": 5242 + }, + { + "epoch": 0.049630351852027146, + "grad_norm": 356.7420654296875, + "learning_rate": 1.9979810748588464e-06, + "loss": 21.7344, + "step": 5243 + }, + { + "epoch": 0.0496398178737422, + "grad_norm": 400.4414978027344, + "learning_rate": 1.9979791272161437e-06, + "loss": 28.1562, + "step": 5244 + }, + { + "epoch": 0.04964928389545726, + "grad_norm": 614.1138305664062, + "learning_rate": 1.997977178635406e-06, + "loss": 24.3125, + "step": 5245 + }, + { + "epoch": 0.04965874991717231, + "grad_norm": 884.127685546875, + "learning_rate": 1.9979752291166343e-06, + "loss": 50.2969, + "step": 5246 + }, + { + "epoch": 0.049668215938887365, + "grad_norm": 214.02169799804688, + "learning_rate": 1.997973278659831e-06, + "loss": 34.7812, + "step": 5247 + }, + { + "epoch": 0.04967768196060242, + "grad_norm": 256.9007568359375, + "learning_rate": 1.9979713272649977e-06, + "loss": 23.6875, + "step": 5248 + }, + { + "epoch": 0.04968714798231747, + "grad_norm": 700.6049194335938, + "learning_rate": 1.9979693749321363e-06, + "loss": 62.4375, + "step": 5249 + }, + { + "epoch": 0.049696614004032524, + "grad_norm": 243.4370574951172, + "learning_rate": 1.9979674216612486e-06, + "loss": 23.5234, + "step": 5250 + }, + { + "epoch": 0.04970608002574758, + "grad_norm": 721.7056274414062, + "learning_rate": 1.9979654674523367e-06, + "loss": 39.8125, + "step": 5251 + }, + { + "epoch": 0.04971554604746263, + "grad_norm": 176.00653076171875, + "learning_rate": 1.997963512305402e-06, + "loss": 21.9609, + "step": 5252 + }, + { + "epoch": 0.04972501206917768, + "grad_norm": 398.43841552734375, + "learning_rate": 1.9979615562204463e-06, + "loss": 34.8125, + "step": 5253 + }, + { + "epoch": 0.04973447809089274, + "grad_norm": 3.1930084228515625, + "learning_rate": 1.9979595991974716e-06, + "loss": 0.8975, + "step": 5254 + }, + { + "epoch": 0.049743944112607796, + "grad_norm": 361.5784912109375, + "learning_rate": 1.9979576412364803e-06, + "loss": 19.8125, + "step": 5255 + }, + { + "epoch": 0.04975341013432285, + "grad_norm": 1539.3179931640625, + "learning_rate": 1.9979556823374733e-06, + "loss": 51.9062, + "step": 5256 + }, + { + "epoch": 0.0497628761560379, + "grad_norm": 432.0285949707031, + "learning_rate": 1.997953722500453e-06, + "loss": 34.375, + "step": 5257 + }, + { + "epoch": 0.049772342177752955, + "grad_norm": 608.7377319335938, + "learning_rate": 1.9979517617254215e-06, + "loss": 44.3906, + "step": 5258 + }, + { + "epoch": 0.04978180819946801, + "grad_norm": 283.7597351074219, + "learning_rate": 1.9979498000123796e-06, + "loss": 34.3281, + "step": 5259 + }, + { + "epoch": 0.04979127422118306, + "grad_norm": 1748.102783203125, + "learning_rate": 1.9979478373613304e-06, + "loss": 67.8984, + "step": 5260 + }, + { + "epoch": 0.049800740242898114, + "grad_norm": 511.7937316894531, + "learning_rate": 1.997945873772275e-06, + "loss": 20.4844, + "step": 5261 + }, + { + "epoch": 0.049810206264613174, + "grad_norm": 567.5361938476562, + "learning_rate": 1.997943909245215e-06, + "loss": 31.5234, + "step": 5262 + }, + { + "epoch": 0.04981967228632823, + "grad_norm": 477.16253662109375, + "learning_rate": 1.997941943780153e-06, + "loss": 49.9688, + "step": 5263 + }, + { + "epoch": 0.04982913830804328, + "grad_norm": 392.032958984375, + "learning_rate": 1.99793997737709e-06, + "loss": 25.3906, + "step": 5264 + }, + { + "epoch": 0.04983860432975833, + "grad_norm": 412.4135437011719, + "learning_rate": 1.9979380100360294e-06, + "loss": 27.3672, + "step": 5265 + }, + { + "epoch": 0.049848070351473386, + "grad_norm": 203.4495391845703, + "learning_rate": 1.9979360417569715e-06, + "loss": 26.4219, + "step": 5266 + }, + { + "epoch": 0.04985753637318844, + "grad_norm": 331.6919860839844, + "learning_rate": 1.997934072539918e-06, + "loss": 28.4453, + "step": 5267 + }, + { + "epoch": 0.04986700239490349, + "grad_norm": 682.936279296875, + "learning_rate": 1.997932102384872e-06, + "loss": 37.0781, + "step": 5268 + }, + { + "epoch": 0.049876468416618545, + "grad_norm": 345.3226623535156, + "learning_rate": 1.9979301312918347e-06, + "loss": 32.3594, + "step": 5269 + }, + { + "epoch": 0.0498859344383336, + "grad_norm": 168.68975830078125, + "learning_rate": 1.997928159260808e-06, + "loss": 23.6719, + "step": 5270 + }, + { + "epoch": 0.04989540046004866, + "grad_norm": 440.9328918457031, + "learning_rate": 1.9979261862917934e-06, + "loss": 46.0312, + "step": 5271 + }, + { + "epoch": 0.04990486648176371, + "grad_norm": 996.1240844726562, + "learning_rate": 1.9979242123847936e-06, + "loss": 22.6875, + "step": 5272 + }, + { + "epoch": 0.049914332503478764, + "grad_norm": 1153.6104736328125, + "learning_rate": 1.99792223753981e-06, + "loss": 37.6484, + "step": 5273 + }, + { + "epoch": 0.04992379852519382, + "grad_norm": 413.0576171875, + "learning_rate": 1.997920261756844e-06, + "loss": 35.9062, + "step": 5274 + }, + { + "epoch": 0.04993326454690887, + "grad_norm": 552.7859497070312, + "learning_rate": 1.997918285035898e-06, + "loss": 52.625, + "step": 5275 + }, + { + "epoch": 0.04994273056862392, + "grad_norm": 296.2982177734375, + "learning_rate": 1.9979163073769736e-06, + "loss": 27.5312, + "step": 5276 + }, + { + "epoch": 0.049952196590338976, + "grad_norm": 561.613525390625, + "learning_rate": 1.997914328780073e-06, + "loss": 21.9648, + "step": 5277 + }, + { + "epoch": 0.04996166261205403, + "grad_norm": 573.782958984375, + "learning_rate": 1.997912349245198e-06, + "loss": 43.625, + "step": 5278 + }, + { + "epoch": 0.04997112863376908, + "grad_norm": 371.718505859375, + "learning_rate": 1.99791036877235e-06, + "loss": 27.0781, + "step": 5279 + }, + { + "epoch": 0.04998059465548414, + "grad_norm": 309.628173828125, + "learning_rate": 1.997908387361531e-06, + "loss": 28.2734, + "step": 5280 + }, + { + "epoch": 0.049990060677199195, + "grad_norm": 391.4361877441406, + "learning_rate": 1.997906405012744e-06, + "loss": 24.5938, + "step": 5281 + }, + { + "epoch": 0.04999952669891425, + "grad_norm": 3.188462018966675, + "learning_rate": 1.9979044217259888e-06, + "loss": 0.8979, + "step": 5282 + }, + { + "epoch": 0.0500089927206293, + "grad_norm": 245.1901397705078, + "learning_rate": 1.997902437501269e-06, + "loss": 26.4062, + "step": 5283 + }, + { + "epoch": 0.050018458742344354, + "grad_norm": 1036.894775390625, + "learning_rate": 1.9979004523385852e-06, + "loss": 82.7344, + "step": 5284 + }, + { + "epoch": 0.05002792476405941, + "grad_norm": 427.4485168457031, + "learning_rate": 1.99789846623794e-06, + "loss": 28.0977, + "step": 5285 + }, + { + "epoch": 0.05003739078577446, + "grad_norm": 546.472412109375, + "learning_rate": 1.997896479199336e-06, + "loss": 36.3438, + "step": 5286 + }, + { + "epoch": 0.05004685680748951, + "grad_norm": 416.75482177734375, + "learning_rate": 1.9978944912227735e-06, + "loss": 50.5625, + "step": 5287 + }, + { + "epoch": 0.05005632282920457, + "grad_norm": 428.3577575683594, + "learning_rate": 1.9978925023082553e-06, + "loss": 58.8125, + "step": 5288 + }, + { + "epoch": 0.050065788850919626, + "grad_norm": 621.1582641601562, + "learning_rate": 1.997890512455783e-06, + "loss": 27.5, + "step": 5289 + }, + { + "epoch": 0.05007525487263468, + "grad_norm": 322.2375183105469, + "learning_rate": 1.9978885216653585e-06, + "loss": 23.1562, + "step": 5290 + }, + { + "epoch": 0.05008472089434973, + "grad_norm": 461.04229736328125, + "learning_rate": 1.997886529936984e-06, + "loss": 37.6016, + "step": 5291 + }, + { + "epoch": 0.050094186916064785, + "grad_norm": 312.9264831542969, + "learning_rate": 1.9978845372706606e-06, + "loss": 24.625, + "step": 5292 + }, + { + "epoch": 0.05010365293777984, + "grad_norm": 544.4048461914062, + "learning_rate": 1.997882543666391e-06, + "loss": 29.0156, + "step": 5293 + }, + { + "epoch": 0.05011311895949489, + "grad_norm": 797.8146362304688, + "learning_rate": 1.9978805491241768e-06, + "loss": 55.4219, + "step": 5294 + }, + { + "epoch": 0.050122584981209944, + "grad_norm": 332.91827392578125, + "learning_rate": 1.99787855364402e-06, + "loss": 29.7266, + "step": 5295 + }, + { + "epoch": 0.050132051002925, + "grad_norm": 416.5557861328125, + "learning_rate": 1.9978765572259214e-06, + "loss": 29.0781, + "step": 5296 + }, + { + "epoch": 0.05014151702464006, + "grad_norm": 283.68731689453125, + "learning_rate": 1.9978745598698843e-06, + "loss": 23.0625, + "step": 5297 + }, + { + "epoch": 0.05015098304635511, + "grad_norm": 753.0687255859375, + "learning_rate": 1.9978725615759103e-06, + "loss": 34.9375, + "step": 5298 + }, + { + "epoch": 0.05016044906807016, + "grad_norm": 186.77285766601562, + "learning_rate": 1.9978705623440005e-06, + "loss": 26.7734, + "step": 5299 + }, + { + "epoch": 0.050169915089785216, + "grad_norm": 432.09796142578125, + "learning_rate": 1.9978685621741577e-06, + "loss": 23.1797, + "step": 5300 + }, + { + "epoch": 0.05017938111150027, + "grad_norm": 939.197998046875, + "learning_rate": 1.9978665610663833e-06, + "loss": 47.2969, + "step": 5301 + }, + { + "epoch": 0.05018884713321532, + "grad_norm": 273.6300964355469, + "learning_rate": 1.997864559020679e-06, + "loss": 25.0625, + "step": 5302 + }, + { + "epoch": 0.050198313154930375, + "grad_norm": 315.52984619140625, + "learning_rate": 1.997862556037047e-06, + "loss": 22.2656, + "step": 5303 + }, + { + "epoch": 0.05020777917664543, + "grad_norm": 893.2252807617188, + "learning_rate": 1.997860552115489e-06, + "loss": 48.5312, + "step": 5304 + }, + { + "epoch": 0.05021724519836049, + "grad_norm": 342.21270751953125, + "learning_rate": 1.9978585472560073e-06, + "loss": 23.2969, + "step": 5305 + }, + { + "epoch": 0.05022671122007554, + "grad_norm": 471.5636901855469, + "learning_rate": 1.9978565414586033e-06, + "loss": 34.9062, + "step": 5306 + }, + { + "epoch": 0.050236177241790594, + "grad_norm": 594.0134887695312, + "learning_rate": 1.9978545347232793e-06, + "loss": 35.5, + "step": 5307 + }, + { + "epoch": 0.05024564326350565, + "grad_norm": 246.68966674804688, + "learning_rate": 1.997852527050036e-06, + "loss": 21.2969, + "step": 5308 + }, + { + "epoch": 0.0502551092852207, + "grad_norm": 632.1905517578125, + "learning_rate": 1.9978505184388777e-06, + "loss": 27.6875, + "step": 5309 + }, + { + "epoch": 0.05026457530693575, + "grad_norm": 299.65582275390625, + "learning_rate": 1.997848508889804e-06, + "loss": 37.8125, + "step": 5310 + }, + { + "epoch": 0.050274041328650806, + "grad_norm": 255.455078125, + "learning_rate": 1.9978464984028176e-06, + "loss": 28.0234, + "step": 5311 + }, + { + "epoch": 0.05028350735036586, + "grad_norm": 3.099949359893799, + "learning_rate": 1.9978444869779207e-06, + "loss": 0.9731, + "step": 5312 + }, + { + "epoch": 0.05029297337208091, + "grad_norm": 193.1591033935547, + "learning_rate": 1.9978424746151148e-06, + "loss": 20.5156, + "step": 5313 + }, + { + "epoch": 0.05030243939379597, + "grad_norm": 582.0731201171875, + "learning_rate": 1.9978404613144016e-06, + "loss": 31.4492, + "step": 5314 + }, + { + "epoch": 0.050311905415511025, + "grad_norm": 392.0903625488281, + "learning_rate": 1.9978384470757836e-06, + "loss": 28.8594, + "step": 5315 + }, + { + "epoch": 0.05032137143722608, + "grad_norm": 341.2526550292969, + "learning_rate": 1.997836431899262e-06, + "loss": 17.3164, + "step": 5316 + }, + { + "epoch": 0.05033083745894113, + "grad_norm": 2.517918586730957, + "learning_rate": 1.9978344157848393e-06, + "loss": 0.8789, + "step": 5317 + }, + { + "epoch": 0.050340303480656184, + "grad_norm": 3.3505351543426514, + "learning_rate": 1.9978323987325172e-06, + "loss": 1.0996, + "step": 5318 + }, + { + "epoch": 0.05034976950237124, + "grad_norm": 492.8443298339844, + "learning_rate": 1.997830380742298e-06, + "loss": 56.125, + "step": 5319 + }, + { + "epoch": 0.05035923552408629, + "grad_norm": 540.8740844726562, + "learning_rate": 1.9978283618141825e-06, + "loss": 32.625, + "step": 5320 + }, + { + "epoch": 0.05036870154580134, + "grad_norm": 343.8959655761719, + "learning_rate": 1.9978263419481737e-06, + "loss": 40.7109, + "step": 5321 + }, + { + "epoch": 0.050378167567516396, + "grad_norm": 337.7358703613281, + "learning_rate": 1.9978243211442728e-06, + "loss": 17.4922, + "step": 5322 + }, + { + "epoch": 0.050387633589231456, + "grad_norm": 361.9353332519531, + "learning_rate": 1.997822299402482e-06, + "loss": 22.1328, + "step": 5323 + }, + { + "epoch": 0.05039709961094651, + "grad_norm": 202.66880798339844, + "learning_rate": 1.997820276722803e-06, + "loss": 23.7891, + "step": 5324 + }, + { + "epoch": 0.05040656563266156, + "grad_norm": 2.9936141967773438, + "learning_rate": 1.997818253105238e-06, + "loss": 0.9814, + "step": 5325 + }, + { + "epoch": 0.050416031654376615, + "grad_norm": 548.0919189453125, + "learning_rate": 1.9978162285497885e-06, + "loss": 53.9375, + "step": 5326 + }, + { + "epoch": 0.05042549767609167, + "grad_norm": 267.9744567871094, + "learning_rate": 1.997814203056457e-06, + "loss": 25.4375, + "step": 5327 + }, + { + "epoch": 0.05043496369780672, + "grad_norm": 585.6610107421875, + "learning_rate": 1.9978121766252453e-06, + "loss": 29.1406, + "step": 5328 + }, + { + "epoch": 0.050444429719521774, + "grad_norm": 233.8960418701172, + "learning_rate": 1.9978101492561542e-06, + "loss": 28.1094, + "step": 5329 + }, + { + "epoch": 0.05045389574123683, + "grad_norm": 431.9930114746094, + "learning_rate": 1.9978081209491872e-06, + "loss": 29.5312, + "step": 5330 + }, + { + "epoch": 0.05046336176295189, + "grad_norm": 528.4657592773438, + "learning_rate": 1.9978060917043455e-06, + "loss": 69.125, + "step": 5331 + }, + { + "epoch": 0.05047282778466694, + "grad_norm": 353.7773742675781, + "learning_rate": 1.9978040615216304e-06, + "loss": 38.2812, + "step": 5332 + }, + { + "epoch": 0.05048229380638199, + "grad_norm": 645.0875854492188, + "learning_rate": 1.997802030401045e-06, + "loss": 62.9844, + "step": 5333 + }, + { + "epoch": 0.050491759828097046, + "grad_norm": 3.1926827430725098, + "learning_rate": 1.9977999983425905e-06, + "loss": 0.9255, + "step": 5334 + }, + { + "epoch": 0.0505012258498121, + "grad_norm": 591.0059204101562, + "learning_rate": 1.9977979653462686e-06, + "loss": 31.9688, + "step": 5335 + }, + { + "epoch": 0.05051069187152715, + "grad_norm": 433.75018310546875, + "learning_rate": 1.9977959314120814e-06, + "loss": 36.6953, + "step": 5336 + }, + { + "epoch": 0.050520157893242205, + "grad_norm": 253.76461791992188, + "learning_rate": 1.9977938965400314e-06, + "loss": 20.4844, + "step": 5337 + }, + { + "epoch": 0.05052962391495726, + "grad_norm": 694.1417846679688, + "learning_rate": 1.99779186073012e-06, + "loss": 46.0625, + "step": 5338 + }, + { + "epoch": 0.05053908993667231, + "grad_norm": 172.93321228027344, + "learning_rate": 1.9977898239823488e-06, + "loss": 23.4766, + "step": 5339 + }, + { + "epoch": 0.05054855595838737, + "grad_norm": 295.9559020996094, + "learning_rate": 1.9977877862967204e-06, + "loss": 23.0938, + "step": 5340 + }, + { + "epoch": 0.050558021980102424, + "grad_norm": 442.8326721191406, + "learning_rate": 1.9977857476732364e-06, + "loss": 25.5156, + "step": 5341 + }, + { + "epoch": 0.05056748800181748, + "grad_norm": 172.93882751464844, + "learning_rate": 1.9977837081118985e-06, + "loss": 24.8438, + "step": 5342 + }, + { + "epoch": 0.05057695402353253, + "grad_norm": 170.6522216796875, + "learning_rate": 1.9977816676127087e-06, + "loss": 22.5469, + "step": 5343 + }, + { + "epoch": 0.05058642004524758, + "grad_norm": 570.1143798828125, + "learning_rate": 1.9977796261756693e-06, + "loss": 25.375, + "step": 5344 + }, + { + "epoch": 0.050595886066962636, + "grad_norm": 434.2060241699219, + "learning_rate": 1.9977775838007823e-06, + "loss": 10.5508, + "step": 5345 + }, + { + "epoch": 0.05060535208867769, + "grad_norm": 382.55718994140625, + "learning_rate": 1.997775540488049e-06, + "loss": 31.7969, + "step": 5346 + }, + { + "epoch": 0.05061481811039274, + "grad_norm": 429.2646179199219, + "learning_rate": 1.9977734962374714e-06, + "loss": 32.6406, + "step": 5347 + }, + { + "epoch": 0.0506242841321078, + "grad_norm": 328.7024230957031, + "learning_rate": 1.9977714510490517e-06, + "loss": 42.25, + "step": 5348 + }, + { + "epoch": 0.050633750153822855, + "grad_norm": 667.7344970703125, + "learning_rate": 1.9977694049227917e-06, + "loss": 33.2891, + "step": 5349 + }, + { + "epoch": 0.05064321617553791, + "grad_norm": 428.8728332519531, + "learning_rate": 1.9977673578586934e-06, + "loss": 33.1562, + "step": 5350 + }, + { + "epoch": 0.05065268219725296, + "grad_norm": 208.6680450439453, + "learning_rate": 1.997765309856759e-06, + "loss": 27.8906, + "step": 5351 + }, + { + "epoch": 0.050662148218968014, + "grad_norm": 144.12509155273438, + "learning_rate": 1.9977632609169897e-06, + "loss": 22.3281, + "step": 5352 + }, + { + "epoch": 0.05067161424068307, + "grad_norm": 3.376955986022949, + "learning_rate": 1.997761211039388e-06, + "loss": 0.8599, + "step": 5353 + }, + { + "epoch": 0.05068108026239812, + "grad_norm": 407.7967834472656, + "learning_rate": 1.9977591602239558e-06, + "loss": 19.7344, + "step": 5354 + }, + { + "epoch": 0.050690546284113173, + "grad_norm": 297.9488525390625, + "learning_rate": 1.9977571084706945e-06, + "loss": 31.75, + "step": 5355 + }, + { + "epoch": 0.050700012305828226, + "grad_norm": 1532.1309814453125, + "learning_rate": 1.997755055779607e-06, + "loss": 37.0781, + "step": 5356 + }, + { + "epoch": 0.050709478327543286, + "grad_norm": 624.8471069335938, + "learning_rate": 1.997753002150694e-06, + "loss": 24.9531, + "step": 5357 + }, + { + "epoch": 0.05071894434925834, + "grad_norm": 415.9170227050781, + "learning_rate": 1.9977509475839587e-06, + "loss": 25.4219, + "step": 5358 + }, + { + "epoch": 0.05072841037097339, + "grad_norm": 384.15435791015625, + "learning_rate": 1.997748892079402e-06, + "loss": 28.1523, + "step": 5359 + }, + { + "epoch": 0.050737876392688445, + "grad_norm": 284.5003967285156, + "learning_rate": 1.9977468356370265e-06, + "loss": 20.1875, + "step": 5360 + }, + { + "epoch": 0.0507473424144035, + "grad_norm": 239.40980529785156, + "learning_rate": 1.997744778256834e-06, + "loss": 23.5469, + "step": 5361 + }, + { + "epoch": 0.05075680843611855, + "grad_norm": 679.1757202148438, + "learning_rate": 1.997742719938826e-06, + "loss": 26.7188, + "step": 5362 + }, + { + "epoch": 0.050766274457833604, + "grad_norm": 313.8531188964844, + "learning_rate": 1.997740660683005e-06, + "loss": 23.0938, + "step": 5363 + }, + { + "epoch": 0.05077574047954866, + "grad_norm": 466.4092102050781, + "learning_rate": 1.9977386004893722e-06, + "loss": 13.7969, + "step": 5364 + }, + { + "epoch": 0.05078520650126371, + "grad_norm": 341.6548156738281, + "learning_rate": 1.9977365393579303e-06, + "loss": 26.75, + "step": 5365 + }, + { + "epoch": 0.05079467252297877, + "grad_norm": 438.5145568847656, + "learning_rate": 1.997734477288681e-06, + "loss": 39.2344, + "step": 5366 + }, + { + "epoch": 0.05080413854469382, + "grad_norm": 483.2239074707031, + "learning_rate": 1.997732414281626e-06, + "loss": 44.7266, + "step": 5367 + }, + { + "epoch": 0.050813604566408876, + "grad_norm": 289.5129089355469, + "learning_rate": 1.997730350336768e-06, + "loss": 11.4648, + "step": 5368 + }, + { + "epoch": 0.05082307058812393, + "grad_norm": 751.6818237304688, + "learning_rate": 1.9977282854541083e-06, + "loss": 77.3359, + "step": 5369 + }, + { + "epoch": 0.05083253660983898, + "grad_norm": 310.1125183105469, + "learning_rate": 1.9977262196336487e-06, + "loss": 27.5, + "step": 5370 + }, + { + "epoch": 0.050842002631554035, + "grad_norm": 437.0325012207031, + "learning_rate": 1.997724152875391e-06, + "loss": 25.3203, + "step": 5371 + }, + { + "epoch": 0.05085146865326909, + "grad_norm": 266.029541015625, + "learning_rate": 1.997722085179338e-06, + "loss": 26.3125, + "step": 5372 + }, + { + "epoch": 0.05086093467498414, + "grad_norm": 277.61279296875, + "learning_rate": 1.9977200165454906e-06, + "loss": 28.9531, + "step": 5373 + }, + { + "epoch": 0.0508704006966992, + "grad_norm": 379.05401611328125, + "learning_rate": 1.997717946973852e-06, + "loss": 24.375, + "step": 5374 + }, + { + "epoch": 0.050879866718414254, + "grad_norm": 471.94110107421875, + "learning_rate": 1.997715876464423e-06, + "loss": 28.5156, + "step": 5375 + }, + { + "epoch": 0.05088933274012931, + "grad_norm": 288.6314697265625, + "learning_rate": 1.997713805017206e-06, + "loss": 23.3672, + "step": 5376 + }, + { + "epoch": 0.05089879876184436, + "grad_norm": 230.88404846191406, + "learning_rate": 1.997711732632203e-06, + "loss": 27.4375, + "step": 5377 + }, + { + "epoch": 0.050908264783559413, + "grad_norm": 408.583740234375, + "learning_rate": 1.997709659309416e-06, + "loss": 27.0078, + "step": 5378 + }, + { + "epoch": 0.050917730805274466, + "grad_norm": 302.6225280761719, + "learning_rate": 1.9977075850488465e-06, + "loss": 26.6016, + "step": 5379 + }, + { + "epoch": 0.05092719682698952, + "grad_norm": 247.5236358642578, + "learning_rate": 1.9977055098504972e-06, + "loss": 21.7812, + "step": 5380 + }, + { + "epoch": 0.05093666284870457, + "grad_norm": 444.846435546875, + "learning_rate": 1.9977034337143694e-06, + "loss": 34.875, + "step": 5381 + }, + { + "epoch": 0.050946128870419626, + "grad_norm": 250.2587432861328, + "learning_rate": 1.997701356640465e-06, + "loss": 25.3984, + "step": 5382 + }, + { + "epoch": 0.050955594892134685, + "grad_norm": 169.0071563720703, + "learning_rate": 1.997699278628787e-06, + "loss": 17.0391, + "step": 5383 + }, + { + "epoch": 0.05096506091384974, + "grad_norm": 192.25213623046875, + "learning_rate": 1.9976971996793357e-06, + "loss": 25.5859, + "step": 5384 + }, + { + "epoch": 0.05097452693556479, + "grad_norm": 381.0699157714844, + "learning_rate": 1.9976951197921144e-06, + "loss": 20.6094, + "step": 5385 + }, + { + "epoch": 0.050983992957279844, + "grad_norm": 898.5232543945312, + "learning_rate": 1.9976930389671247e-06, + "loss": 57.5156, + "step": 5386 + }, + { + "epoch": 0.0509934589789949, + "grad_norm": 270.2722473144531, + "learning_rate": 1.9976909572043684e-06, + "loss": 26.6172, + "step": 5387 + }, + { + "epoch": 0.05100292500070995, + "grad_norm": 782.7325439453125, + "learning_rate": 1.997688874503847e-06, + "loss": 53.0469, + "step": 5388 + }, + { + "epoch": 0.051012391022425004, + "grad_norm": 376.7284851074219, + "learning_rate": 1.9976867908655636e-06, + "loss": 42.6719, + "step": 5389 + }, + { + "epoch": 0.051021857044140057, + "grad_norm": 324.336181640625, + "learning_rate": 1.9976847062895194e-06, + "loss": 24.9062, + "step": 5390 + }, + { + "epoch": 0.051031323065855116, + "grad_norm": 285.356689453125, + "learning_rate": 1.997682620775716e-06, + "loss": 27.2344, + "step": 5391 + }, + { + "epoch": 0.05104078908757017, + "grad_norm": 146.22853088378906, + "learning_rate": 1.9976805343241564e-06, + "loss": 18.9531, + "step": 5392 + }, + { + "epoch": 0.05105025510928522, + "grad_norm": 3.0180344581604004, + "learning_rate": 1.997678446934842e-06, + "loss": 0.8267, + "step": 5393 + }, + { + "epoch": 0.051059721131000275, + "grad_norm": 195.66775512695312, + "learning_rate": 1.9976763586077745e-06, + "loss": 24.8203, + "step": 5394 + }, + { + "epoch": 0.05106918715271533, + "grad_norm": 433.33331298828125, + "learning_rate": 1.997674269342956e-06, + "loss": 25.9766, + "step": 5395 + }, + { + "epoch": 0.05107865317443038, + "grad_norm": 3.14933180809021, + "learning_rate": 1.997672179140389e-06, + "loss": 0.9961, + "step": 5396 + }, + { + "epoch": 0.051088119196145435, + "grad_norm": 156.66604614257812, + "learning_rate": 1.9976700880000744e-06, + "loss": 21.9219, + "step": 5397 + }, + { + "epoch": 0.05109758521786049, + "grad_norm": 300.00494384765625, + "learning_rate": 1.9976679959220153e-06, + "loss": 24.4766, + "step": 5398 + }, + { + "epoch": 0.05110705123957554, + "grad_norm": 377.45318603515625, + "learning_rate": 1.997665902906213e-06, + "loss": 34.5391, + "step": 5399 + }, + { + "epoch": 0.0511165172612906, + "grad_norm": 195.10887145996094, + "learning_rate": 1.99766380895267e-06, + "loss": 31.5781, + "step": 5400 + }, + { + "epoch": 0.051125983283005653, + "grad_norm": 552.6152954101562, + "learning_rate": 1.9976617140613876e-06, + "loss": 56.5625, + "step": 5401 + }, + { + "epoch": 0.051135449304720706, + "grad_norm": 241.9661102294922, + "learning_rate": 1.9976596182323685e-06, + "loss": 33.0469, + "step": 5402 + }, + { + "epoch": 0.05114491532643576, + "grad_norm": 370.9960021972656, + "learning_rate": 1.997657521465614e-06, + "loss": 41.1875, + "step": 5403 + }, + { + "epoch": 0.05115438134815081, + "grad_norm": 461.7136535644531, + "learning_rate": 1.997655423761126e-06, + "loss": 38.5781, + "step": 5404 + }, + { + "epoch": 0.051163847369865866, + "grad_norm": 3.225895404815674, + "learning_rate": 1.9976533251189073e-06, + "loss": 0.8521, + "step": 5405 + }, + { + "epoch": 0.05117331339158092, + "grad_norm": 501.816162109375, + "learning_rate": 1.997651225538959e-06, + "loss": 33.8984, + "step": 5406 + }, + { + "epoch": 0.05118277941329597, + "grad_norm": 864.1810913085938, + "learning_rate": 1.9976491250212833e-06, + "loss": 94.5391, + "step": 5407 + }, + { + "epoch": 0.051192245435011025, + "grad_norm": 397.95654296875, + "learning_rate": 1.997647023565883e-06, + "loss": 30.9844, + "step": 5408 + }, + { + "epoch": 0.051201711456726084, + "grad_norm": 208.98770141601562, + "learning_rate": 1.997644921172759e-06, + "loss": 17.0547, + "step": 5409 + }, + { + "epoch": 0.05121117747844114, + "grad_norm": 296.62060546875, + "learning_rate": 1.9976428178419134e-06, + "loss": 52.0469, + "step": 5410 + }, + { + "epoch": 0.05122064350015619, + "grad_norm": 513.4114990234375, + "learning_rate": 1.997640713573349e-06, + "loss": 19.5117, + "step": 5411 + }, + { + "epoch": 0.051230109521871244, + "grad_norm": 286.5925598144531, + "learning_rate": 1.9976386083670673e-06, + "loss": 22.375, + "step": 5412 + }, + { + "epoch": 0.051239575543586297, + "grad_norm": 404.5957946777344, + "learning_rate": 1.9976365022230697e-06, + "loss": 49.3906, + "step": 5413 + }, + { + "epoch": 0.05124904156530135, + "grad_norm": 468.12152099609375, + "learning_rate": 1.997634395141359e-06, + "loss": 45.375, + "step": 5414 + }, + { + "epoch": 0.0512585075870164, + "grad_norm": 557.3435668945312, + "learning_rate": 1.997632287121937e-06, + "loss": 36.0625, + "step": 5415 + }, + { + "epoch": 0.051267973608731456, + "grad_norm": 513.0791015625, + "learning_rate": 1.997630178164805e-06, + "loss": 25.875, + "step": 5416 + }, + { + "epoch": 0.051277439630446515, + "grad_norm": 294.3284606933594, + "learning_rate": 1.997628068269966e-06, + "loss": 30.4062, + "step": 5417 + }, + { + "epoch": 0.05128690565216157, + "grad_norm": 494.54364013671875, + "learning_rate": 1.997625957437421e-06, + "loss": 31.4766, + "step": 5418 + }, + { + "epoch": 0.05129637167387662, + "grad_norm": 323.341064453125, + "learning_rate": 1.997623845667173e-06, + "loss": 23.9766, + "step": 5419 + }, + { + "epoch": 0.051305837695591675, + "grad_norm": 488.78948974609375, + "learning_rate": 1.9976217329592235e-06, + "loss": 43.125, + "step": 5420 + }, + { + "epoch": 0.05131530371730673, + "grad_norm": 2.6845717430114746, + "learning_rate": 1.997619619313574e-06, + "loss": 0.8047, + "step": 5421 + }, + { + "epoch": 0.05132476973902178, + "grad_norm": 605.0252075195312, + "learning_rate": 1.9976175047302272e-06, + "loss": 33.1016, + "step": 5422 + }, + { + "epoch": 0.051334235760736834, + "grad_norm": 2.4544153213500977, + "learning_rate": 1.997615389209185e-06, + "loss": 0.854, + "step": 5423 + }, + { + "epoch": 0.05134370178245189, + "grad_norm": 462.503173828125, + "learning_rate": 1.997613272750449e-06, + "loss": 54.125, + "step": 5424 + }, + { + "epoch": 0.05135316780416694, + "grad_norm": 750.3469848632812, + "learning_rate": 1.9976111553540217e-06, + "loss": 57.6719, + "step": 5425 + }, + { + "epoch": 0.051362633825882, + "grad_norm": 237.41331481933594, + "learning_rate": 1.9976090370199046e-06, + "loss": 26.6641, + "step": 5426 + }, + { + "epoch": 0.05137209984759705, + "grad_norm": 245.6708984375, + "learning_rate": 1.9976069177481005e-06, + "loss": 25.2188, + "step": 5427 + }, + { + "epoch": 0.051381565869312106, + "grad_norm": 214.9398651123047, + "learning_rate": 1.9976047975386097e-06, + "loss": 24.1562, + "step": 5428 + }, + { + "epoch": 0.05139103189102716, + "grad_norm": 269.2427673339844, + "learning_rate": 1.997602676391436e-06, + "loss": 30.0625, + "step": 5429 + }, + { + "epoch": 0.05140049791274221, + "grad_norm": 675.1229858398438, + "learning_rate": 1.997600554306581e-06, + "loss": 27.6875, + "step": 5430 + }, + { + "epoch": 0.051409963934457265, + "grad_norm": 218.0255889892578, + "learning_rate": 1.997598431284046e-06, + "loss": 24.1719, + "step": 5431 + }, + { + "epoch": 0.05141942995617232, + "grad_norm": 601.4052734375, + "learning_rate": 1.997596307323833e-06, + "loss": 22.6172, + "step": 5432 + }, + { + "epoch": 0.05142889597788737, + "grad_norm": 395.4801025390625, + "learning_rate": 1.9975941824259445e-06, + "loss": 48.9688, + "step": 5433 + }, + { + "epoch": 0.05143836199960243, + "grad_norm": 278.6930847167969, + "learning_rate": 1.9975920565903825e-06, + "loss": 22.7188, + "step": 5434 + }, + { + "epoch": 0.051447828021317484, + "grad_norm": 427.4809265136719, + "learning_rate": 1.997589929817149e-06, + "loss": 21.5703, + "step": 5435 + }, + { + "epoch": 0.051457294043032537, + "grad_norm": 227.16278076171875, + "learning_rate": 1.9975878021062456e-06, + "loss": 23.5859, + "step": 5436 + }, + { + "epoch": 0.05146676006474759, + "grad_norm": 664.8355102539062, + "learning_rate": 1.997585673457675e-06, + "loss": 37.4141, + "step": 5437 + }, + { + "epoch": 0.05147622608646264, + "grad_norm": 562.3089599609375, + "learning_rate": 1.9975835438714384e-06, + "loss": 42.9141, + "step": 5438 + }, + { + "epoch": 0.051485692108177696, + "grad_norm": 302.0076904296875, + "learning_rate": 1.997581413347538e-06, + "loss": 34.6953, + "step": 5439 + }, + { + "epoch": 0.05149515812989275, + "grad_norm": 435.7312927246094, + "learning_rate": 1.9975792818859764e-06, + "loss": 24.0625, + "step": 5440 + }, + { + "epoch": 0.0515046241516078, + "grad_norm": 499.3035888671875, + "learning_rate": 1.997577149486755e-06, + "loss": 15.5703, + "step": 5441 + }, + { + "epoch": 0.051514090173322855, + "grad_norm": 228.8357696533203, + "learning_rate": 1.9975750161498763e-06, + "loss": 29.5469, + "step": 5442 + }, + { + "epoch": 0.051523556195037915, + "grad_norm": 345.34661865234375, + "learning_rate": 1.9975728818753416e-06, + "loss": 26.125, + "step": 5443 + }, + { + "epoch": 0.05153302221675297, + "grad_norm": 464.826171875, + "learning_rate": 1.9975707466631532e-06, + "loss": 44.9688, + "step": 5444 + }, + { + "epoch": 0.05154248823846802, + "grad_norm": 652.1891479492188, + "learning_rate": 1.9975686105133134e-06, + "loss": 66.2812, + "step": 5445 + }, + { + "epoch": 0.051551954260183074, + "grad_norm": 214.3760223388672, + "learning_rate": 1.9975664734258238e-06, + "loss": 23.25, + "step": 5446 + }, + { + "epoch": 0.05156142028189813, + "grad_norm": 706.2904663085938, + "learning_rate": 1.997564335400687e-06, + "loss": 44.375, + "step": 5447 + }, + { + "epoch": 0.05157088630361318, + "grad_norm": 677.34765625, + "learning_rate": 1.997562196437904e-06, + "loss": 44.2812, + "step": 5448 + }, + { + "epoch": 0.05158035232532823, + "grad_norm": 1045.811767578125, + "learning_rate": 1.997560056537478e-06, + "loss": 26.6641, + "step": 5449 + }, + { + "epoch": 0.051589818347043286, + "grad_norm": 280.4783020019531, + "learning_rate": 1.9975579156994104e-06, + "loss": 24.4062, + "step": 5450 + }, + { + "epoch": 0.05159928436875834, + "grad_norm": 1245.4703369140625, + "learning_rate": 1.997555773923703e-06, + "loss": 89.5312, + "step": 5451 + }, + { + "epoch": 0.0516087503904734, + "grad_norm": 768.4357299804688, + "learning_rate": 1.997553631210358e-06, + "loss": 64.875, + "step": 5452 + }, + { + "epoch": 0.05161821641218845, + "grad_norm": 651.9760131835938, + "learning_rate": 1.9975514875593776e-06, + "loss": 51.4844, + "step": 5453 + }, + { + "epoch": 0.051627682433903505, + "grad_norm": 310.3631896972656, + "learning_rate": 1.997549342970764e-06, + "loss": 24.0703, + "step": 5454 + }, + { + "epoch": 0.05163714845561856, + "grad_norm": 357.3893127441406, + "learning_rate": 1.9975471974445186e-06, + "loss": 24.375, + "step": 5455 + }, + { + "epoch": 0.05164661447733361, + "grad_norm": 341.1490478515625, + "learning_rate": 1.9975450509806436e-06, + "loss": 24.4688, + "step": 5456 + }, + { + "epoch": 0.051656080499048664, + "grad_norm": 382.71124267578125, + "learning_rate": 1.9975429035791412e-06, + "loss": 51.0625, + "step": 5457 + }, + { + "epoch": 0.05166554652076372, + "grad_norm": 166.29067993164062, + "learning_rate": 1.997540755240014e-06, + "loss": 22.875, + "step": 5458 + }, + { + "epoch": 0.05167501254247877, + "grad_norm": 457.2251281738281, + "learning_rate": 1.9975386059632625e-06, + "loss": 50.2812, + "step": 5459 + }, + { + "epoch": 0.05168447856419383, + "grad_norm": 559.280029296875, + "learning_rate": 1.9975364557488903e-06, + "loss": 49.6094, + "step": 5460 + }, + { + "epoch": 0.05169394458590888, + "grad_norm": 726.8652954101562, + "learning_rate": 1.997534304596898e-06, + "loss": 46.2344, + "step": 5461 + }, + { + "epoch": 0.051703410607623936, + "grad_norm": 772.1675415039062, + "learning_rate": 1.997532152507289e-06, + "loss": 15.832, + "step": 5462 + }, + { + "epoch": 0.05171287662933899, + "grad_norm": 546.2420043945312, + "learning_rate": 1.9975299994800644e-06, + "loss": 21.8203, + "step": 5463 + }, + { + "epoch": 0.05172234265105404, + "grad_norm": 317.6883239746094, + "learning_rate": 1.9975278455152265e-06, + "loss": 25.5, + "step": 5464 + }, + { + "epoch": 0.051731808672769095, + "grad_norm": 686.9398803710938, + "learning_rate": 1.9975256906127777e-06, + "loss": 47.7031, + "step": 5465 + }, + { + "epoch": 0.05174127469448415, + "grad_norm": 504.7706298828125, + "learning_rate": 1.997523534772719e-06, + "loss": 25.75, + "step": 5466 + }, + { + "epoch": 0.0517507407161992, + "grad_norm": 469.89007568359375, + "learning_rate": 1.997521377995053e-06, + "loss": 24.1797, + "step": 5467 + }, + { + "epoch": 0.051760206737914254, + "grad_norm": 362.7875671386719, + "learning_rate": 1.9975192202797824e-06, + "loss": 32.4609, + "step": 5468 + }, + { + "epoch": 0.051769672759629314, + "grad_norm": 1310.3465576171875, + "learning_rate": 1.9975170616269084e-06, + "loss": 39.8438, + "step": 5469 + }, + { + "epoch": 0.05177913878134437, + "grad_norm": 212.76202392578125, + "learning_rate": 1.9975149020364334e-06, + "loss": 21.3047, + "step": 5470 + }, + { + "epoch": 0.05178860480305942, + "grad_norm": 478.8789367675781, + "learning_rate": 1.997512741508359e-06, + "loss": 51.0781, + "step": 5471 + }, + { + "epoch": 0.05179807082477447, + "grad_norm": 215.00352478027344, + "learning_rate": 1.9975105800426877e-06, + "loss": 25.4062, + "step": 5472 + }, + { + "epoch": 0.051807536846489526, + "grad_norm": 275.618896484375, + "learning_rate": 1.9975084176394217e-06, + "loss": 26.6328, + "step": 5473 + }, + { + "epoch": 0.05181700286820458, + "grad_norm": 646.673583984375, + "learning_rate": 1.9975062542985617e-06, + "loss": 16.5703, + "step": 5474 + }, + { + "epoch": 0.05182646888991963, + "grad_norm": 522.8369140625, + "learning_rate": 1.9975040900201113e-06, + "loss": 23.8438, + "step": 5475 + }, + { + "epoch": 0.051835934911634685, + "grad_norm": 734.3524169921875, + "learning_rate": 1.997501924804072e-06, + "loss": 46.75, + "step": 5476 + }, + { + "epoch": 0.051845400933349745, + "grad_norm": 746.54833984375, + "learning_rate": 1.997499758650446e-06, + "loss": 37.3594, + "step": 5477 + }, + { + "epoch": 0.0518548669550648, + "grad_norm": 716.3026123046875, + "learning_rate": 1.9974975915592346e-06, + "loss": 41.0938, + "step": 5478 + }, + { + "epoch": 0.05186433297677985, + "grad_norm": 639.9254150390625, + "learning_rate": 1.9974954235304408e-06, + "loss": 56.2031, + "step": 5479 + }, + { + "epoch": 0.051873798998494904, + "grad_norm": 762.0792846679688, + "learning_rate": 1.9974932545640657e-06, + "loss": 66.375, + "step": 5480 + }, + { + "epoch": 0.05188326502020996, + "grad_norm": 535.5723876953125, + "learning_rate": 1.9974910846601125e-06, + "loss": 35.2422, + "step": 5481 + }, + { + "epoch": 0.05189273104192501, + "grad_norm": 2.8963236808776855, + "learning_rate": 1.997488913818582e-06, + "loss": 0.8838, + "step": 5482 + }, + { + "epoch": 0.05190219706364006, + "grad_norm": 516.295166015625, + "learning_rate": 1.997486742039477e-06, + "loss": 69.6094, + "step": 5483 + }, + { + "epoch": 0.051911663085355116, + "grad_norm": 224.4370880126953, + "learning_rate": 1.9974845693227994e-06, + "loss": 21.1172, + "step": 5484 + }, + { + "epoch": 0.05192112910707017, + "grad_norm": 274.4236755371094, + "learning_rate": 1.997482395668551e-06, + "loss": 25.2031, + "step": 5485 + }, + { + "epoch": 0.05193059512878523, + "grad_norm": 593.9751586914062, + "learning_rate": 1.997480221076734e-06, + "loss": 22.8516, + "step": 5486 + }, + { + "epoch": 0.05194006115050028, + "grad_norm": 350.78460693359375, + "learning_rate": 1.997478045547351e-06, + "loss": 44.9375, + "step": 5487 + }, + { + "epoch": 0.051949527172215335, + "grad_norm": 594.5233154296875, + "learning_rate": 1.9974758690804033e-06, + "loss": 36.9688, + "step": 5488 + }, + { + "epoch": 0.05195899319393039, + "grad_norm": 426.91009521484375, + "learning_rate": 1.997473691675893e-06, + "loss": 43.125, + "step": 5489 + }, + { + "epoch": 0.05196845921564544, + "grad_norm": 661.0267333984375, + "learning_rate": 1.9974715133338223e-06, + "loss": 32.9141, + "step": 5490 + }, + { + "epoch": 0.051977925237360494, + "grad_norm": 375.51519775390625, + "learning_rate": 1.9974693340541935e-06, + "loss": 32.9375, + "step": 5491 + }, + { + "epoch": 0.05198739125907555, + "grad_norm": 3.8487746715545654, + "learning_rate": 1.997467153837008e-06, + "loss": 0.7783, + "step": 5492 + }, + { + "epoch": 0.0519968572807906, + "grad_norm": 293.6549072265625, + "learning_rate": 1.9974649726822684e-06, + "loss": 43.8398, + "step": 5493 + }, + { + "epoch": 0.05200632330250565, + "grad_norm": 197.20999145507812, + "learning_rate": 1.997462790589977e-06, + "loss": 21.6172, + "step": 5494 + }, + { + "epoch": 0.05201578932422071, + "grad_norm": 474.3670654296875, + "learning_rate": 1.997460607560135e-06, + "loss": 33.4766, + "step": 5495 + }, + { + "epoch": 0.052025255345935766, + "grad_norm": 388.3454895019531, + "learning_rate": 1.997458423592745e-06, + "loss": 19.7031, + "step": 5496 + }, + { + "epoch": 0.05203472136765082, + "grad_norm": 311.4346923828125, + "learning_rate": 1.997456238687809e-06, + "loss": 26.875, + "step": 5497 + }, + { + "epoch": 0.05204418738936587, + "grad_norm": 163.6356658935547, + "learning_rate": 1.9974540528453297e-06, + "loss": 22.875, + "step": 5498 + }, + { + "epoch": 0.052053653411080925, + "grad_norm": 453.92156982421875, + "learning_rate": 1.9974518660653075e-06, + "loss": 19.2656, + "step": 5499 + }, + { + "epoch": 0.05206311943279598, + "grad_norm": 514.9556884765625, + "learning_rate": 1.9974496783477457e-06, + "loss": 36.3281, + "step": 5500 + }, + { + "epoch": 0.05207258545451103, + "grad_norm": 183.66847229003906, + "learning_rate": 1.9974474896926467e-06, + "loss": 20.8906, + "step": 5501 + }, + { + "epoch": 0.052082051476226084, + "grad_norm": 261.4858093261719, + "learning_rate": 1.9974453001000114e-06, + "loss": 23.8516, + "step": 5502 + }, + { + "epoch": 0.052091517497941144, + "grad_norm": 328.94427490234375, + "learning_rate": 1.9974431095698424e-06, + "loss": 25.7812, + "step": 5503 + }, + { + "epoch": 0.0521009835196562, + "grad_norm": 432.4238586425781, + "learning_rate": 1.9974409181021417e-06, + "loss": 26.7031, + "step": 5504 + }, + { + "epoch": 0.05211044954137125, + "grad_norm": 671.0927734375, + "learning_rate": 1.9974387256969115e-06, + "loss": 48.0234, + "step": 5505 + }, + { + "epoch": 0.0521199155630863, + "grad_norm": 506.3154602050781, + "learning_rate": 1.997436532354154e-06, + "loss": 25.6328, + "step": 5506 + }, + { + "epoch": 0.052129381584801356, + "grad_norm": 293.83465576171875, + "learning_rate": 1.997434338073871e-06, + "loss": 25.3125, + "step": 5507 + }, + { + "epoch": 0.05213884760651641, + "grad_norm": 649.0279541015625, + "learning_rate": 1.9974321428560645e-06, + "loss": 44.1875, + "step": 5508 + }, + { + "epoch": 0.05214831362823146, + "grad_norm": 315.30029296875, + "learning_rate": 1.9974299467007365e-06, + "loss": 26.6797, + "step": 5509 + }, + { + "epoch": 0.052157779649946515, + "grad_norm": 378.3651123046875, + "learning_rate": 1.9974277496078896e-06, + "loss": 30.6719, + "step": 5510 + }, + { + "epoch": 0.05216724567166157, + "grad_norm": 2.7674808502197266, + "learning_rate": 1.9974255515775255e-06, + "loss": 0.939, + "step": 5511 + }, + { + "epoch": 0.05217671169337663, + "grad_norm": 965.9395751953125, + "learning_rate": 1.997423352609646e-06, + "loss": 58.6875, + "step": 5512 + }, + { + "epoch": 0.05218617771509168, + "grad_norm": 206.8462371826172, + "learning_rate": 1.9974211527042535e-06, + "loss": 24.1094, + "step": 5513 + }, + { + "epoch": 0.052195643736806734, + "grad_norm": 480.78521728515625, + "learning_rate": 1.99741895186135e-06, + "loss": 47.4062, + "step": 5514 + }, + { + "epoch": 0.05220510975852179, + "grad_norm": 787.5599975585938, + "learning_rate": 1.9974167500809375e-06, + "loss": 59.6094, + "step": 5515 + }, + { + "epoch": 0.05221457578023684, + "grad_norm": 494.20697021484375, + "learning_rate": 1.9974145473630185e-06, + "loss": 26.5938, + "step": 5516 + }, + { + "epoch": 0.05222404180195189, + "grad_norm": 639.6586303710938, + "learning_rate": 1.9974123437075945e-06, + "loss": 36.7422, + "step": 5517 + }, + { + "epoch": 0.052233507823666946, + "grad_norm": 713.6619262695312, + "learning_rate": 1.9974101391146677e-06, + "loss": 54.0312, + "step": 5518 + }, + { + "epoch": 0.052242973845382, + "grad_norm": 765.6390380859375, + "learning_rate": 1.99740793358424e-06, + "loss": 20.6797, + "step": 5519 + }, + { + "epoch": 0.05225243986709706, + "grad_norm": 452.2580261230469, + "learning_rate": 1.9974057271163144e-06, + "loss": 27.4453, + "step": 5520 + }, + { + "epoch": 0.05226190588881211, + "grad_norm": 483.1461486816406, + "learning_rate": 1.9974035197108918e-06, + "loss": 43.4844, + "step": 5521 + }, + { + "epoch": 0.052271371910527165, + "grad_norm": 286.26123046875, + "learning_rate": 1.9974013113679747e-06, + "loss": 26.125, + "step": 5522 + }, + { + "epoch": 0.05228083793224222, + "grad_norm": 706.8688354492188, + "learning_rate": 1.9973991020875655e-06, + "loss": 41.7344, + "step": 5523 + }, + { + "epoch": 0.05229030395395727, + "grad_norm": 260.1729736328125, + "learning_rate": 1.997396891869666e-06, + "loss": 23.3906, + "step": 5524 + }, + { + "epoch": 0.052299769975672324, + "grad_norm": 1195.005615234375, + "learning_rate": 1.9973946807142786e-06, + "loss": 22.6562, + "step": 5525 + }, + { + "epoch": 0.05230923599738738, + "grad_norm": 245.28445434570312, + "learning_rate": 1.9973924686214045e-06, + "loss": 19.5312, + "step": 5526 + }, + { + "epoch": 0.05231870201910243, + "grad_norm": 239.15138244628906, + "learning_rate": 1.9973902555910465e-06, + "loss": 21.5469, + "step": 5527 + }, + { + "epoch": 0.05232816804081748, + "grad_norm": 201.17465209960938, + "learning_rate": 1.997388041623207e-06, + "loss": 27.5156, + "step": 5528 + }, + { + "epoch": 0.05233763406253254, + "grad_norm": 396.5793762207031, + "learning_rate": 1.9973858267178873e-06, + "loss": 28.9531, + "step": 5529 + }, + { + "epoch": 0.052347100084247596, + "grad_norm": 1195.634765625, + "learning_rate": 1.99738361087509e-06, + "loss": 22.4922, + "step": 5530 + }, + { + "epoch": 0.05235656610596265, + "grad_norm": 215.78939819335938, + "learning_rate": 1.9973813940948166e-06, + "loss": 25.6562, + "step": 5531 + }, + { + "epoch": 0.0523660321276777, + "grad_norm": 326.3107604980469, + "learning_rate": 1.99737917637707e-06, + "loss": 24.7344, + "step": 5532 + }, + { + "epoch": 0.052375498149392755, + "grad_norm": 462.03851318359375, + "learning_rate": 1.9973769577218515e-06, + "loss": 50.3594, + "step": 5533 + }, + { + "epoch": 0.05238496417110781, + "grad_norm": 1375.3004150390625, + "learning_rate": 1.997374738129164e-06, + "loss": 22.2969, + "step": 5534 + }, + { + "epoch": 0.05239443019282286, + "grad_norm": 619.94970703125, + "learning_rate": 1.9973725175990084e-06, + "loss": 31.5469, + "step": 5535 + }, + { + "epoch": 0.052403896214537914, + "grad_norm": 425.4481201171875, + "learning_rate": 1.997370296131388e-06, + "loss": 45.9531, + "step": 5536 + }, + { + "epoch": 0.05241336223625297, + "grad_norm": 593.907958984375, + "learning_rate": 1.9973680737263043e-06, + "loss": 55.293, + "step": 5537 + }, + { + "epoch": 0.05242282825796803, + "grad_norm": 2.9831011295318604, + "learning_rate": 1.9973658503837594e-06, + "loss": 0.8804, + "step": 5538 + }, + { + "epoch": 0.05243229427968308, + "grad_norm": 565.964599609375, + "learning_rate": 1.9973636261037557e-06, + "loss": 47.0625, + "step": 5539 + }, + { + "epoch": 0.05244176030139813, + "grad_norm": 360.53277587890625, + "learning_rate": 1.997361400886295e-06, + "loss": 32.5938, + "step": 5540 + }, + { + "epoch": 0.052451226323113186, + "grad_norm": 233.06103515625, + "learning_rate": 1.9973591747313796e-06, + "loss": 23.9141, + "step": 5541 + }, + { + "epoch": 0.05246069234482824, + "grad_norm": 216.13095092773438, + "learning_rate": 1.997356947639011e-06, + "loss": 22.5469, + "step": 5542 + }, + { + "epoch": 0.05247015836654329, + "grad_norm": 287.75848388671875, + "learning_rate": 1.9973547196091923e-06, + "loss": 26.375, + "step": 5543 + }, + { + "epoch": 0.052479624388258345, + "grad_norm": 399.1507263183594, + "learning_rate": 1.9973524906419247e-06, + "loss": 35.6406, + "step": 5544 + }, + { + "epoch": 0.0524890904099734, + "grad_norm": 874.6475219726562, + "learning_rate": 1.9973502607372106e-06, + "loss": 52.3125, + "step": 5545 + }, + { + "epoch": 0.05249855643168846, + "grad_norm": 222.24952697753906, + "learning_rate": 1.9973480298950525e-06, + "loss": 21.375, + "step": 5546 + }, + { + "epoch": 0.05250802245340351, + "grad_norm": 384.72698974609375, + "learning_rate": 1.997345798115452e-06, + "loss": 25.6562, + "step": 5547 + }, + { + "epoch": 0.052517488475118564, + "grad_norm": 390.6571044921875, + "learning_rate": 1.997343565398411e-06, + "loss": 27.0312, + "step": 5548 + }, + { + "epoch": 0.05252695449683362, + "grad_norm": 262.771728515625, + "learning_rate": 1.997341331743932e-06, + "loss": 29.4375, + "step": 5549 + }, + { + "epoch": 0.05253642051854867, + "grad_norm": 392.3255920410156, + "learning_rate": 1.9973390971520176e-06, + "loss": 30.375, + "step": 5550 + }, + { + "epoch": 0.05254588654026372, + "grad_norm": 3.223918914794922, + "learning_rate": 1.997336861622669e-06, + "loss": 0.8367, + "step": 5551 + }, + { + "epoch": 0.052555352561978776, + "grad_norm": 399.49517822265625, + "learning_rate": 1.9973346251558884e-06, + "loss": 50.9531, + "step": 5552 + }, + { + "epoch": 0.05256481858369383, + "grad_norm": 459.0816345214844, + "learning_rate": 1.9973323877516784e-06, + "loss": 59.1016, + "step": 5553 + }, + { + "epoch": 0.05257428460540888, + "grad_norm": 514.27783203125, + "learning_rate": 1.9973301494100405e-06, + "loss": 25.2578, + "step": 5554 + }, + { + "epoch": 0.05258375062712394, + "grad_norm": 3.054666757583618, + "learning_rate": 1.9973279101309773e-06, + "loss": 0.8315, + "step": 5555 + }, + { + "epoch": 0.052593216648838995, + "grad_norm": 281.6676940917969, + "learning_rate": 1.997325669914491e-06, + "loss": 24.3281, + "step": 5556 + }, + { + "epoch": 0.05260268267055405, + "grad_norm": 500.47015380859375, + "learning_rate": 1.9973234287605833e-06, + "loss": 27.6562, + "step": 5557 + }, + { + "epoch": 0.0526121486922691, + "grad_norm": 534.552001953125, + "learning_rate": 1.9973211866692567e-06, + "loss": 26.4922, + "step": 5558 + }, + { + "epoch": 0.052621614713984154, + "grad_norm": 411.9308776855469, + "learning_rate": 1.9973189436405125e-06, + "loss": 20.7109, + "step": 5559 + }, + { + "epoch": 0.05263108073569921, + "grad_norm": 757.9192504882812, + "learning_rate": 1.997316699674354e-06, + "loss": 55.5938, + "step": 5560 + }, + { + "epoch": 0.05264054675741426, + "grad_norm": 447.719970703125, + "learning_rate": 1.9973144547707822e-06, + "loss": 35.6875, + "step": 5561 + }, + { + "epoch": 0.05265001277912931, + "grad_norm": 428.537841796875, + "learning_rate": 1.9973122089297997e-06, + "loss": 31.4844, + "step": 5562 + }, + { + "epoch": 0.05265947880084437, + "grad_norm": 448.26104736328125, + "learning_rate": 1.9973099621514087e-06, + "loss": 36.6719, + "step": 5563 + }, + { + "epoch": 0.052668944822559426, + "grad_norm": 823.0311889648438, + "learning_rate": 1.9973077144356114e-06, + "loss": 48.7656, + "step": 5564 + }, + { + "epoch": 0.05267841084427448, + "grad_norm": 163.4945526123047, + "learning_rate": 1.9973054657824096e-06, + "loss": 21.6484, + "step": 5565 + }, + { + "epoch": 0.05268787686598953, + "grad_norm": 902.5891723632812, + "learning_rate": 1.9973032161918057e-06, + "loss": 55.25, + "step": 5566 + }, + { + "epoch": 0.052697342887704585, + "grad_norm": 3.163367986679077, + "learning_rate": 1.9973009656638014e-06, + "loss": 0.9688, + "step": 5567 + }, + { + "epoch": 0.05270680890941964, + "grad_norm": 374.59722900390625, + "learning_rate": 1.997298714198399e-06, + "loss": 27.9844, + "step": 5568 + }, + { + "epoch": 0.05271627493113469, + "grad_norm": 583.3342895507812, + "learning_rate": 1.9972964617956008e-06, + "loss": 40.5781, + "step": 5569 + }, + { + "epoch": 0.052725740952849744, + "grad_norm": 3.309891939163208, + "learning_rate": 1.997294208455409e-06, + "loss": 0.874, + "step": 5570 + }, + { + "epoch": 0.0527352069745648, + "grad_norm": 435.5919494628906, + "learning_rate": 1.9972919541778254e-06, + "loss": 40.7812, + "step": 5571 + }, + { + "epoch": 0.05274467299627986, + "grad_norm": 179.5215606689453, + "learning_rate": 1.9972896989628524e-06, + "loss": 20.0234, + "step": 5572 + }, + { + "epoch": 0.05275413901799491, + "grad_norm": 336.3832092285156, + "learning_rate": 1.9972874428104914e-06, + "loss": 31.6797, + "step": 5573 + }, + { + "epoch": 0.05276360503970996, + "grad_norm": 271.18096923828125, + "learning_rate": 1.9972851857207457e-06, + "loss": 20.9297, + "step": 5574 + }, + { + "epoch": 0.052773071061425016, + "grad_norm": 702.805419921875, + "learning_rate": 1.997282927693616e-06, + "loss": 52.7656, + "step": 5575 + }, + { + "epoch": 0.05278253708314007, + "grad_norm": 247.8122100830078, + "learning_rate": 1.997280668729106e-06, + "loss": 19.7734, + "step": 5576 + }, + { + "epoch": 0.05279200310485512, + "grad_norm": 3.1653003692626953, + "learning_rate": 1.997278408827217e-06, + "loss": 1.082, + "step": 5577 + }, + { + "epoch": 0.052801469126570175, + "grad_norm": 254.62632751464844, + "learning_rate": 1.997276147987951e-06, + "loss": 25.5156, + "step": 5578 + }, + { + "epoch": 0.05281093514828523, + "grad_norm": 293.65850830078125, + "learning_rate": 1.9972738862113104e-06, + "loss": 25.0859, + "step": 5579 + }, + { + "epoch": 0.05282040117000028, + "grad_norm": 319.0703430175781, + "learning_rate": 1.997271623497297e-06, + "loss": 12.3516, + "step": 5580 + }, + { + "epoch": 0.05282986719171534, + "grad_norm": 3.65259051322937, + "learning_rate": 1.9972693598459134e-06, + "loss": 1.0288, + "step": 5581 + }, + { + "epoch": 0.052839333213430394, + "grad_norm": 878.1048583984375, + "learning_rate": 1.9972670952571614e-06, + "loss": 49.8906, + "step": 5582 + }, + { + "epoch": 0.05284879923514545, + "grad_norm": 3.148484230041504, + "learning_rate": 1.9972648297310434e-06, + "loss": 0.9395, + "step": 5583 + }, + { + "epoch": 0.0528582652568605, + "grad_norm": 345.7774658203125, + "learning_rate": 1.997262563267561e-06, + "loss": 27.9219, + "step": 5584 + }, + { + "epoch": 0.05286773127857555, + "grad_norm": 376.03863525390625, + "learning_rate": 1.9972602958667167e-06, + "loss": 50.4375, + "step": 5585 + }, + { + "epoch": 0.052877197300290606, + "grad_norm": 167.8245391845703, + "learning_rate": 1.997258027528513e-06, + "loss": 24.25, + "step": 5586 + }, + { + "epoch": 0.05288666332200566, + "grad_norm": 3.790109872817993, + "learning_rate": 1.997255758252951e-06, + "loss": 1.0063, + "step": 5587 + }, + { + "epoch": 0.05289612934372071, + "grad_norm": 591.8297119140625, + "learning_rate": 1.9972534880400335e-06, + "loss": 28.4844, + "step": 5588 + }, + { + "epoch": 0.05290559536543577, + "grad_norm": 495.22894287109375, + "learning_rate": 1.9972512168897628e-06, + "loss": 24.6406, + "step": 5589 + }, + { + "epoch": 0.052915061387150825, + "grad_norm": 287.3546142578125, + "learning_rate": 1.997248944802141e-06, + "loss": 22.2344, + "step": 5590 + }, + { + "epoch": 0.05292452740886588, + "grad_norm": 410.7136535644531, + "learning_rate": 1.99724667177717e-06, + "loss": 39.3594, + "step": 5591 + }, + { + "epoch": 0.05293399343058093, + "grad_norm": 508.54766845703125, + "learning_rate": 1.9972443978148518e-06, + "loss": 24.2305, + "step": 5592 + }, + { + "epoch": 0.052943459452295984, + "grad_norm": 262.8753967285156, + "learning_rate": 1.997242122915189e-06, + "loss": 22.8984, + "step": 5593 + }, + { + "epoch": 0.05295292547401104, + "grad_norm": 175.68719482421875, + "learning_rate": 1.9972398470781836e-06, + "loss": 23.0469, + "step": 5594 + }, + { + "epoch": 0.05296239149572609, + "grad_norm": 309.2794189453125, + "learning_rate": 1.997237570303837e-06, + "loss": 26.2188, + "step": 5595 + }, + { + "epoch": 0.05297185751744114, + "grad_norm": 331.76654052734375, + "learning_rate": 1.9972352925921525e-06, + "loss": 32.4609, + "step": 5596 + }, + { + "epoch": 0.052981323539156196, + "grad_norm": 492.26025390625, + "learning_rate": 1.9972330139431317e-06, + "loss": 34.75, + "step": 5597 + }, + { + "epoch": 0.052990789560871256, + "grad_norm": 674.7601928710938, + "learning_rate": 1.9972307343567766e-06, + "loss": 24.9375, + "step": 5598 + }, + { + "epoch": 0.05300025558258631, + "grad_norm": 188.34445190429688, + "learning_rate": 1.9972284538330894e-06, + "loss": 19.1172, + "step": 5599 + }, + { + "epoch": 0.05300972160430136, + "grad_norm": 1034.451904296875, + "learning_rate": 1.9972261723720725e-06, + "loss": 46.8438, + "step": 5600 + }, + { + "epoch": 0.053019187626016415, + "grad_norm": 500.3156433105469, + "learning_rate": 1.9972238899737278e-06, + "loss": 59.9062, + "step": 5601 + }, + { + "epoch": 0.05302865364773147, + "grad_norm": 219.652099609375, + "learning_rate": 1.9972216066380576e-06, + "loss": 24.4375, + "step": 5602 + }, + { + "epoch": 0.05303811966944652, + "grad_norm": 225.13917541503906, + "learning_rate": 1.9972193223650637e-06, + "loss": 26.9297, + "step": 5603 + }, + { + "epoch": 0.053047585691161574, + "grad_norm": 281.7875061035156, + "learning_rate": 1.9972170371547487e-06, + "loss": 22.3203, + "step": 5604 + }, + { + "epoch": 0.05305705171287663, + "grad_norm": 409.88055419921875, + "learning_rate": 1.9972147510071146e-06, + "loss": 30.3125, + "step": 5605 + }, + { + "epoch": 0.05306651773459168, + "grad_norm": 333.03515625, + "learning_rate": 1.9972124639221636e-06, + "loss": 26.4219, + "step": 5606 + }, + { + "epoch": 0.05307598375630674, + "grad_norm": 646.8379516601562, + "learning_rate": 1.997210175899897e-06, + "loss": 42.2852, + "step": 5607 + }, + { + "epoch": 0.05308544977802179, + "grad_norm": 228.169921875, + "learning_rate": 1.9972078869403185e-06, + "loss": 21.9609, + "step": 5608 + }, + { + "epoch": 0.053094915799736846, + "grad_norm": 270.39483642578125, + "learning_rate": 1.9972055970434295e-06, + "loss": 16.082, + "step": 5609 + }, + { + "epoch": 0.0531043818214519, + "grad_norm": 3.187453508377075, + "learning_rate": 1.9972033062092316e-06, + "loss": 0.8818, + "step": 5610 + }, + { + "epoch": 0.05311384784316695, + "grad_norm": 229.49549865722656, + "learning_rate": 1.9972010144377278e-06, + "loss": 25.5703, + "step": 5611 + }, + { + "epoch": 0.053123313864882005, + "grad_norm": 168.6467742919922, + "learning_rate": 1.9971987217289197e-06, + "loss": 21.6719, + "step": 5612 + }, + { + "epoch": 0.05313277988659706, + "grad_norm": 428.873779296875, + "learning_rate": 1.99719642808281e-06, + "loss": 23.2656, + "step": 5613 + }, + { + "epoch": 0.05314224590831211, + "grad_norm": 1571.4398193359375, + "learning_rate": 1.9971941334994004e-06, + "loss": 59.2109, + "step": 5614 + }, + { + "epoch": 0.05315171193002717, + "grad_norm": 302.4323425292969, + "learning_rate": 1.9971918379786933e-06, + "loss": 29.8203, + "step": 5615 + }, + { + "epoch": 0.053161177951742224, + "grad_norm": 573.5867919921875, + "learning_rate": 1.9971895415206904e-06, + "loss": 20.6836, + "step": 5616 + }, + { + "epoch": 0.05317064397345728, + "grad_norm": 434.8702697753906, + "learning_rate": 1.9971872441253943e-06, + "loss": 20.5625, + "step": 5617 + }, + { + "epoch": 0.05318010999517233, + "grad_norm": 205.18508911132812, + "learning_rate": 1.997184945792807e-06, + "loss": 23.5391, + "step": 5618 + }, + { + "epoch": 0.05318957601688738, + "grad_norm": 885.43212890625, + "learning_rate": 1.997182646522931e-06, + "loss": 28.1875, + "step": 5619 + }, + { + "epoch": 0.053199042038602436, + "grad_norm": 500.51983642578125, + "learning_rate": 1.997180346315768e-06, + "loss": 57.1875, + "step": 5620 + }, + { + "epoch": 0.05320850806031749, + "grad_norm": 2.845707416534424, + "learning_rate": 1.997178045171321e-06, + "loss": 0.9316, + "step": 5621 + }, + { + "epoch": 0.05321797408203254, + "grad_norm": 398.90057373046875, + "learning_rate": 1.9971757430895906e-06, + "loss": 29.8281, + "step": 5622 + }, + { + "epoch": 0.053227440103747595, + "grad_norm": 313.4096374511719, + "learning_rate": 1.99717344007058e-06, + "loss": 36.1875, + "step": 5623 + }, + { + "epoch": 0.053236906125462655, + "grad_norm": 245.70513916015625, + "learning_rate": 1.9971711361142915e-06, + "loss": 23.7188, + "step": 5624 + }, + { + "epoch": 0.05324637214717771, + "grad_norm": 777.8829345703125, + "learning_rate": 1.9971688312207273e-06, + "loss": 45.5312, + "step": 5625 + }, + { + "epoch": 0.05325583816889276, + "grad_norm": 345.7402038574219, + "learning_rate": 1.9971665253898886e-06, + "loss": 19.3281, + "step": 5626 + }, + { + "epoch": 0.053265304190607814, + "grad_norm": 194.63369750976562, + "learning_rate": 1.9971642186217786e-06, + "loss": 17.7578, + "step": 5627 + }, + { + "epoch": 0.05327477021232287, + "grad_norm": 628.17431640625, + "learning_rate": 1.997161910916399e-06, + "loss": 40.9805, + "step": 5628 + }, + { + "epoch": 0.05328423623403792, + "grad_norm": 551.1390991210938, + "learning_rate": 1.997159602273752e-06, + "loss": 29.3516, + "step": 5629 + }, + { + "epoch": 0.05329370225575297, + "grad_norm": 328.1981201171875, + "learning_rate": 1.99715729269384e-06, + "loss": 22.2422, + "step": 5630 + }, + { + "epoch": 0.053303168277468026, + "grad_norm": 485.4477233886719, + "learning_rate": 1.997154982176665e-06, + "loss": 36.4766, + "step": 5631 + }, + { + "epoch": 0.053312634299183086, + "grad_norm": 312.98065185546875, + "learning_rate": 1.997152670722229e-06, + "loss": 24.2578, + "step": 5632 + }, + { + "epoch": 0.05332210032089814, + "grad_norm": 913.0411987304688, + "learning_rate": 1.9971503583305345e-06, + "loss": 74.0078, + "step": 5633 + }, + { + "epoch": 0.05333156634261319, + "grad_norm": 639.6564331054688, + "learning_rate": 1.9971480450015835e-06, + "loss": 24.7188, + "step": 5634 + }, + { + "epoch": 0.053341032364328245, + "grad_norm": 3.2123684883117676, + "learning_rate": 1.9971457307353783e-06, + "loss": 0.918, + "step": 5635 + }, + { + "epoch": 0.0533504983860433, + "grad_norm": 267.2029113769531, + "learning_rate": 1.997143415531921e-06, + "loss": 22.9297, + "step": 5636 + }, + { + "epoch": 0.05335996440775835, + "grad_norm": 1565.9306640625, + "learning_rate": 1.9971410993912136e-06, + "loss": 50.75, + "step": 5637 + }, + { + "epoch": 0.053369430429473404, + "grad_norm": 330.43438720703125, + "learning_rate": 1.997138782313259e-06, + "loss": 17.1953, + "step": 5638 + }, + { + "epoch": 0.05337889645118846, + "grad_norm": 377.5650939941406, + "learning_rate": 1.997136464298058e-06, + "loss": 28.375, + "step": 5639 + }, + { + "epoch": 0.05338836247290351, + "grad_norm": 370.7406311035156, + "learning_rate": 1.997134145345614e-06, + "loss": 25.1211, + "step": 5640 + }, + { + "epoch": 0.05339782849461857, + "grad_norm": 862.9140625, + "learning_rate": 1.9971318254559287e-06, + "loss": 63.9531, + "step": 5641 + }, + { + "epoch": 0.05340729451633362, + "grad_norm": 280.1409912109375, + "learning_rate": 1.9971295046290043e-06, + "loss": 28.7266, + "step": 5642 + }, + { + "epoch": 0.053416760538048676, + "grad_norm": 371.5050964355469, + "learning_rate": 1.997127182864843e-06, + "loss": 19.4961, + "step": 5643 + }, + { + "epoch": 0.05342622655976373, + "grad_norm": 201.5992889404297, + "learning_rate": 1.997124860163447e-06, + "loss": 24.3125, + "step": 5644 + }, + { + "epoch": 0.05343569258147878, + "grad_norm": 353.76556396484375, + "learning_rate": 1.9971225365248186e-06, + "loss": 27.8906, + "step": 5645 + }, + { + "epoch": 0.053445158603193835, + "grad_norm": 383.3140869140625, + "learning_rate": 1.99712021194896e-06, + "loss": 57.9844, + "step": 5646 + }, + { + "epoch": 0.05345462462490889, + "grad_norm": 552.2037353515625, + "learning_rate": 1.997117886435873e-06, + "loss": 48.7656, + "step": 5647 + }, + { + "epoch": 0.05346409064662394, + "grad_norm": 550.6431274414062, + "learning_rate": 1.9971155599855606e-06, + "loss": 48.5312, + "step": 5648 + }, + { + "epoch": 0.053473556668338994, + "grad_norm": 2.5669212341308594, + "learning_rate": 1.997113232598024e-06, + "loss": 0.9653, + "step": 5649 + }, + { + "epoch": 0.053483022690054054, + "grad_norm": 159.8703155517578, + "learning_rate": 1.9971109042732656e-06, + "loss": 22.2812, + "step": 5650 + }, + { + "epoch": 0.05349248871176911, + "grad_norm": 303.66729736328125, + "learning_rate": 1.997108575011288e-06, + "loss": 21.1328, + "step": 5651 + }, + { + "epoch": 0.05350195473348416, + "grad_norm": 576.9896850585938, + "learning_rate": 1.9971062448120934e-06, + "loss": 54.8984, + "step": 5652 + }, + { + "epoch": 0.05351142075519921, + "grad_norm": 353.2442321777344, + "learning_rate": 1.9971039136756837e-06, + "loss": 23.5703, + "step": 5653 + }, + { + "epoch": 0.053520886776914266, + "grad_norm": 175.97190856933594, + "learning_rate": 1.9971015816020613e-06, + "loss": 23.6875, + "step": 5654 + }, + { + "epoch": 0.05353035279862932, + "grad_norm": 188.78054809570312, + "learning_rate": 1.997099248591228e-06, + "loss": 24.9375, + "step": 5655 + }, + { + "epoch": 0.05353981882034437, + "grad_norm": 340.1147155761719, + "learning_rate": 1.9970969146431864e-06, + "loss": 28.7969, + "step": 5656 + }, + { + "epoch": 0.053549284842059425, + "grad_norm": 388.2791748046875, + "learning_rate": 1.9970945797579385e-06, + "loss": 25.2344, + "step": 5657 + }, + { + "epoch": 0.053558750863774485, + "grad_norm": 495.3157653808594, + "learning_rate": 1.9970922439354867e-06, + "loss": 40.9219, + "step": 5658 + }, + { + "epoch": 0.05356821688548954, + "grad_norm": 394.5104064941406, + "learning_rate": 1.9970899071758333e-06, + "loss": 42.2812, + "step": 5659 + }, + { + "epoch": 0.05357768290720459, + "grad_norm": 749.3380126953125, + "learning_rate": 1.9970875694789798e-06, + "loss": 25.5859, + "step": 5660 + }, + { + "epoch": 0.053587148928919644, + "grad_norm": 388.4966125488281, + "learning_rate": 1.9970852308449292e-06, + "loss": 28.2422, + "step": 5661 + }, + { + "epoch": 0.0535966149506347, + "grad_norm": 292.68475341796875, + "learning_rate": 1.997082891273683e-06, + "loss": 27.2656, + "step": 5662 + }, + { + "epoch": 0.05360608097234975, + "grad_norm": 182.567138671875, + "learning_rate": 1.997080550765244e-06, + "loss": 21.375, + "step": 5663 + }, + { + "epoch": 0.0536155469940648, + "grad_norm": 330.9601135253906, + "learning_rate": 1.997078209319614e-06, + "loss": 28.6406, + "step": 5664 + }, + { + "epoch": 0.053625013015779856, + "grad_norm": 1000.6528930664062, + "learning_rate": 1.9970758669367954e-06, + "loss": 44.0938, + "step": 5665 + }, + { + "epoch": 0.05363447903749491, + "grad_norm": 253.8422393798828, + "learning_rate": 1.99707352361679e-06, + "loss": 20.7227, + "step": 5666 + }, + { + "epoch": 0.05364394505920997, + "grad_norm": 597.3182983398438, + "learning_rate": 1.9970711793596007e-06, + "loss": 37.4219, + "step": 5667 + }, + { + "epoch": 0.05365341108092502, + "grad_norm": 234.28591918945312, + "learning_rate": 1.9970688341652296e-06, + "loss": 18.0234, + "step": 5668 + }, + { + "epoch": 0.053662877102640075, + "grad_norm": 736.8815307617188, + "learning_rate": 1.9970664880336783e-06, + "loss": 48.9531, + "step": 5669 + }, + { + "epoch": 0.05367234312435513, + "grad_norm": 724.9033813476562, + "learning_rate": 1.9970641409649495e-06, + "loss": 45.3281, + "step": 5670 + }, + { + "epoch": 0.05368180914607018, + "grad_norm": 828.4583740234375, + "learning_rate": 1.9970617929590452e-06, + "loss": 51.8906, + "step": 5671 + }, + { + "epoch": 0.053691275167785234, + "grad_norm": 584.1402587890625, + "learning_rate": 1.997059444015968e-06, + "loss": 66.6875, + "step": 5672 + }, + { + "epoch": 0.05370074118950029, + "grad_norm": 367.2530517578125, + "learning_rate": 1.9970570941357196e-06, + "loss": 17.9531, + "step": 5673 + }, + { + "epoch": 0.05371020721121534, + "grad_norm": 418.38555908203125, + "learning_rate": 1.997054743318302e-06, + "loss": 28.4375, + "step": 5674 + }, + { + "epoch": 0.0537196732329304, + "grad_norm": 352.43731689453125, + "learning_rate": 1.9970523915637183e-06, + "loss": 42.0312, + "step": 5675 + }, + { + "epoch": 0.05372913925464545, + "grad_norm": 719.1171264648438, + "learning_rate": 1.9970500388719697e-06, + "loss": 49.3125, + "step": 5676 + }, + { + "epoch": 0.053738605276360506, + "grad_norm": 964.0706176757812, + "learning_rate": 1.9970476852430593e-06, + "loss": 28.6016, + "step": 5677 + }, + { + "epoch": 0.05374807129807556, + "grad_norm": 500.1060791015625, + "learning_rate": 1.9970453306769894e-06, + "loss": 29.1562, + "step": 5678 + }, + { + "epoch": 0.05375753731979061, + "grad_norm": 625.6259765625, + "learning_rate": 1.9970429751737606e-06, + "loss": 40.7266, + "step": 5679 + }, + { + "epoch": 0.053767003341505665, + "grad_norm": 265.9289245605469, + "learning_rate": 1.997040618733377e-06, + "loss": 35.3125, + "step": 5680 + }, + { + "epoch": 0.05377646936322072, + "grad_norm": 405.9252014160156, + "learning_rate": 1.99703826135584e-06, + "loss": 12.8594, + "step": 5681 + }, + { + "epoch": 0.05378593538493577, + "grad_norm": 323.647705078125, + "learning_rate": 1.9970359030411523e-06, + "loss": 23.2891, + "step": 5682 + }, + { + "epoch": 0.053795401406650824, + "grad_norm": 305.0298767089844, + "learning_rate": 1.997033543789315e-06, + "loss": 25.6406, + "step": 5683 + }, + { + "epoch": 0.053804867428365884, + "grad_norm": 310.2297668457031, + "learning_rate": 1.9970311836003315e-06, + "loss": 41.7188, + "step": 5684 + }, + { + "epoch": 0.05381433345008094, + "grad_norm": 398.707275390625, + "learning_rate": 1.997028822474203e-06, + "loss": 45.6875, + "step": 5685 + }, + { + "epoch": 0.05382379947179599, + "grad_norm": 505.1233825683594, + "learning_rate": 1.997026460410933e-06, + "loss": 48.6406, + "step": 5686 + }, + { + "epoch": 0.05383326549351104, + "grad_norm": 192.23684692382812, + "learning_rate": 1.9970240974105223e-06, + "loss": 21.5078, + "step": 5687 + }, + { + "epoch": 0.053842731515226096, + "grad_norm": 295.61651611328125, + "learning_rate": 1.9970217334729744e-06, + "loss": 23.7578, + "step": 5688 + }, + { + "epoch": 0.05385219753694115, + "grad_norm": 353.2547607421875, + "learning_rate": 1.9970193685982908e-06, + "loss": 32.2266, + "step": 5689 + }, + { + "epoch": 0.0538616635586562, + "grad_norm": 422.10052490234375, + "learning_rate": 1.9970170027864736e-06, + "loss": 21.4297, + "step": 5690 + }, + { + "epoch": 0.053871129580371255, + "grad_norm": 949.9984741210938, + "learning_rate": 1.997014636037525e-06, + "loss": 46.5938, + "step": 5691 + }, + { + "epoch": 0.05388059560208631, + "grad_norm": 285.9170227050781, + "learning_rate": 1.9970122683514485e-06, + "loss": 20.9062, + "step": 5692 + }, + { + "epoch": 0.05389006162380137, + "grad_norm": 241.88262939453125, + "learning_rate": 1.9970098997282443e-06, + "loss": 24.9453, + "step": 5693 + }, + { + "epoch": 0.05389952764551642, + "grad_norm": 189.1852569580078, + "learning_rate": 1.997007530167916e-06, + "loss": 23.0156, + "step": 5694 + }, + { + "epoch": 0.053908993667231474, + "grad_norm": 261.3092956542969, + "learning_rate": 1.997005159670466e-06, + "loss": 25.4766, + "step": 5695 + }, + { + "epoch": 0.05391845968894653, + "grad_norm": 152.83155822753906, + "learning_rate": 1.9970027882358953e-06, + "loss": 17.8359, + "step": 5696 + }, + { + "epoch": 0.05392792571066158, + "grad_norm": 5.164846897125244, + "learning_rate": 1.9970004158642074e-06, + "loss": 0.9551, + "step": 5697 + }, + { + "epoch": 0.05393739173237663, + "grad_norm": 485.5975341796875, + "learning_rate": 1.9969980425554037e-06, + "loss": 23.2109, + "step": 5698 + }, + { + "epoch": 0.053946857754091686, + "grad_norm": 405.067626953125, + "learning_rate": 1.9969956683094864e-06, + "loss": 27.4219, + "step": 5699 + }, + { + "epoch": 0.05395632377580674, + "grad_norm": 689.7136840820312, + "learning_rate": 1.9969932931264584e-06, + "loss": 51.375, + "step": 5700 + }, + { + "epoch": 0.0539657897975218, + "grad_norm": 396.62286376953125, + "learning_rate": 1.9969909170063215e-06, + "loss": 32.1562, + "step": 5701 + }, + { + "epoch": 0.05397525581923685, + "grad_norm": 694.4052734375, + "learning_rate": 1.9969885399490777e-06, + "loss": 47.3594, + "step": 5702 + }, + { + "epoch": 0.053984721840951905, + "grad_norm": 171.4988555908203, + "learning_rate": 1.99698616195473e-06, + "loss": 25.4844, + "step": 5703 + }, + { + "epoch": 0.05399418786266696, + "grad_norm": 421.12994384765625, + "learning_rate": 1.99698378302328e-06, + "loss": 32.4141, + "step": 5704 + }, + { + "epoch": 0.05400365388438201, + "grad_norm": 232.99755859375, + "learning_rate": 1.99698140315473e-06, + "loss": 23.8281, + "step": 5705 + }, + { + "epoch": 0.054013119906097064, + "grad_norm": 239.55625915527344, + "learning_rate": 1.9969790223490826e-06, + "loss": 24.8438, + "step": 5706 + }, + { + "epoch": 0.05402258592781212, + "grad_norm": 218.33018493652344, + "learning_rate": 1.9969766406063397e-06, + "loss": 24.375, + "step": 5707 + }, + { + "epoch": 0.05403205194952717, + "grad_norm": 273.0238037109375, + "learning_rate": 1.9969742579265035e-06, + "loss": 22.9141, + "step": 5708 + }, + { + "epoch": 0.05404151797124222, + "grad_norm": 334.6183166503906, + "learning_rate": 1.996971874309576e-06, + "loss": 34.0312, + "step": 5709 + }, + { + "epoch": 0.05405098399295728, + "grad_norm": 217.93284606933594, + "learning_rate": 1.99696948975556e-06, + "loss": 25.8594, + "step": 5710 + }, + { + "epoch": 0.054060450014672336, + "grad_norm": 593.750732421875, + "learning_rate": 1.996967104264458e-06, + "loss": 15.1914, + "step": 5711 + }, + { + "epoch": 0.05406991603638739, + "grad_norm": 997.5277099609375, + "learning_rate": 1.9969647178362713e-06, + "loss": 84.4688, + "step": 5712 + }, + { + "epoch": 0.05407938205810244, + "grad_norm": 661.0353393554688, + "learning_rate": 1.996962330471003e-06, + "loss": 50.25, + "step": 5713 + }, + { + "epoch": 0.054088848079817495, + "grad_norm": 854.0291137695312, + "learning_rate": 1.9969599421686547e-06, + "loss": 39.8281, + "step": 5714 + }, + { + "epoch": 0.05409831410153255, + "grad_norm": 424.1227722167969, + "learning_rate": 1.996957552929229e-06, + "loss": 23.7891, + "step": 5715 + }, + { + "epoch": 0.0541077801232476, + "grad_norm": 779.1250610351562, + "learning_rate": 1.996955162752728e-06, + "loss": 28.5469, + "step": 5716 + }, + { + "epoch": 0.054117246144962654, + "grad_norm": 159.21420288085938, + "learning_rate": 1.996952771639154e-06, + "loss": 21.7812, + "step": 5717 + }, + { + "epoch": 0.054126712166677714, + "grad_norm": 205.27548217773438, + "learning_rate": 1.9969503795885094e-06, + "loss": 23.5078, + "step": 5718 + }, + { + "epoch": 0.05413617818839277, + "grad_norm": 174.98684692382812, + "learning_rate": 1.996947986600796e-06, + "loss": 15.2812, + "step": 5719 + }, + { + "epoch": 0.05414564421010782, + "grad_norm": 418.80621337890625, + "learning_rate": 1.9969455926760167e-06, + "loss": 35.1875, + "step": 5720 + }, + { + "epoch": 0.05415511023182287, + "grad_norm": 417.1382141113281, + "learning_rate": 1.996943197814173e-06, + "loss": 17.5312, + "step": 5721 + }, + { + "epoch": 0.054164576253537926, + "grad_norm": 464.9803466796875, + "learning_rate": 1.9969408020152676e-06, + "loss": 55.6562, + "step": 5722 + }, + { + "epoch": 0.05417404227525298, + "grad_norm": 2.904372453689575, + "learning_rate": 1.996938405279303e-06, + "loss": 0.8608, + "step": 5723 + }, + { + "epoch": 0.05418350829696803, + "grad_norm": 273.75244140625, + "learning_rate": 1.996936007606281e-06, + "loss": 21.4453, + "step": 5724 + }, + { + "epoch": 0.054192974318683085, + "grad_norm": 552.9636840820312, + "learning_rate": 1.996933608996204e-06, + "loss": 52.4219, + "step": 5725 + }, + { + "epoch": 0.05420244034039814, + "grad_norm": 216.53729248046875, + "learning_rate": 1.9969312094490743e-06, + "loss": 22.25, + "step": 5726 + }, + { + "epoch": 0.0542119063621132, + "grad_norm": 969.7297973632812, + "learning_rate": 1.9969288089648944e-06, + "loss": 68.7188, + "step": 5727 + }, + { + "epoch": 0.05422137238382825, + "grad_norm": 3.1679420471191406, + "learning_rate": 1.996926407543666e-06, + "loss": 0.9648, + "step": 5728 + }, + { + "epoch": 0.054230838405543304, + "grad_norm": 836.8701782226562, + "learning_rate": 1.9969240051853914e-06, + "loss": 46.625, + "step": 5729 + }, + { + "epoch": 0.05424030442725836, + "grad_norm": 3.532238245010376, + "learning_rate": 1.9969216018900734e-06, + "loss": 0.9048, + "step": 5730 + }, + { + "epoch": 0.05424977044897341, + "grad_norm": 378.0259094238281, + "learning_rate": 1.996919197657714e-06, + "loss": 31.4922, + "step": 5731 + }, + { + "epoch": 0.05425923647068846, + "grad_norm": 557.8311767578125, + "learning_rate": 1.996916792488315e-06, + "loss": 24.3906, + "step": 5732 + }, + { + "epoch": 0.054268702492403516, + "grad_norm": 402.97491455078125, + "learning_rate": 1.9969143863818795e-06, + "loss": 26.1406, + "step": 5733 + }, + { + "epoch": 0.05427816851411857, + "grad_norm": 677.6795043945312, + "learning_rate": 1.9969119793384094e-06, + "loss": 29.8828, + "step": 5734 + }, + { + "epoch": 0.05428763453583362, + "grad_norm": 505.14886474609375, + "learning_rate": 1.9969095713579066e-06, + "loss": 34.5156, + "step": 5735 + }, + { + "epoch": 0.05429710055754868, + "grad_norm": 659.0269165039062, + "learning_rate": 1.9969071624403735e-06, + "loss": 45.1094, + "step": 5736 + }, + { + "epoch": 0.054306566579263735, + "grad_norm": 422.01129150390625, + "learning_rate": 1.996904752585813e-06, + "loss": 56.6797, + "step": 5737 + }, + { + "epoch": 0.05431603260097879, + "grad_norm": 415.2609558105469, + "learning_rate": 1.9969023417942266e-06, + "loss": 47.625, + "step": 5738 + }, + { + "epoch": 0.05432549862269384, + "grad_norm": 631.7333984375, + "learning_rate": 1.996899930065617e-06, + "loss": 31.1328, + "step": 5739 + }, + { + "epoch": 0.054334964644408894, + "grad_norm": 346.9601135253906, + "learning_rate": 1.996897517399986e-06, + "loss": 24.4375, + "step": 5740 + }, + { + "epoch": 0.05434443066612395, + "grad_norm": 528.3674926757812, + "learning_rate": 1.9968951037973363e-06, + "loss": 30.4844, + "step": 5741 + }, + { + "epoch": 0.054353896687839, + "grad_norm": 657.2178955078125, + "learning_rate": 1.99689268925767e-06, + "loss": 43.0352, + "step": 5742 + }, + { + "epoch": 0.05436336270955405, + "grad_norm": 586.3692626953125, + "learning_rate": 1.9968902737809894e-06, + "loss": 26.4844, + "step": 5743 + }, + { + "epoch": 0.05437282873126911, + "grad_norm": 806.5737915039062, + "learning_rate": 1.996887857367297e-06, + "loss": 22.2969, + "step": 5744 + }, + { + "epoch": 0.054382294752984166, + "grad_norm": 482.68804931640625, + "learning_rate": 1.9968854400165947e-06, + "loss": 51.0781, + "step": 5745 + }, + { + "epoch": 0.05439176077469922, + "grad_norm": 613.7179565429688, + "learning_rate": 1.9968830217288854e-06, + "loss": 56.6875, + "step": 5746 + }, + { + "epoch": 0.05440122679641427, + "grad_norm": 2.7517762184143066, + "learning_rate": 1.9968806025041706e-06, + "loss": 0.8398, + "step": 5747 + }, + { + "epoch": 0.054410692818129325, + "grad_norm": 351.1712646484375, + "learning_rate": 1.9968781823424526e-06, + "loss": 31.1094, + "step": 5748 + }, + { + "epoch": 0.05442015883984438, + "grad_norm": 597.9843139648438, + "learning_rate": 1.9968757612437345e-06, + "loss": 32.7969, + "step": 5749 + }, + { + "epoch": 0.05442962486155943, + "grad_norm": 235.6949920654297, + "learning_rate": 1.9968733392080176e-06, + "loss": 24.4688, + "step": 5750 + }, + { + "epoch": 0.054439090883274484, + "grad_norm": 234.38430786132812, + "learning_rate": 1.9968709162353047e-06, + "loss": 30.0625, + "step": 5751 + }, + { + "epoch": 0.05444855690498954, + "grad_norm": 234.31752014160156, + "learning_rate": 1.996868492325598e-06, + "loss": 19.9141, + "step": 5752 + }, + { + "epoch": 0.0544580229267046, + "grad_norm": 235.2005615234375, + "learning_rate": 1.9968660674789e-06, + "loss": 25.2188, + "step": 5753 + }, + { + "epoch": 0.05446748894841965, + "grad_norm": 401.55242919921875, + "learning_rate": 1.9968636416952125e-06, + "loss": 47.2344, + "step": 5754 + }, + { + "epoch": 0.0544769549701347, + "grad_norm": 837.7681274414062, + "learning_rate": 1.996861214974538e-06, + "loss": 43.4062, + "step": 5755 + }, + { + "epoch": 0.054486420991849756, + "grad_norm": 204.93215942382812, + "learning_rate": 1.9968587873168793e-06, + "loss": 19.4609, + "step": 5756 + }, + { + "epoch": 0.05449588701356481, + "grad_norm": 329.6005554199219, + "learning_rate": 1.9968563587222376e-06, + "loss": 22.125, + "step": 5757 + }, + { + "epoch": 0.05450535303527986, + "grad_norm": 416.6439514160156, + "learning_rate": 1.996853929190616e-06, + "loss": 37.2969, + "step": 5758 + }, + { + "epoch": 0.054514819056994915, + "grad_norm": 245.97755432128906, + "learning_rate": 1.9968514987220163e-06, + "loss": 21.8594, + "step": 5759 + }, + { + "epoch": 0.05452428507870997, + "grad_norm": 420.1456298828125, + "learning_rate": 1.9968490673164412e-06, + "loss": 30.3984, + "step": 5760 + }, + { + "epoch": 0.05453375110042503, + "grad_norm": 426.9433288574219, + "learning_rate": 1.996846634973893e-06, + "loss": 53.2656, + "step": 5761 + }, + { + "epoch": 0.05454321712214008, + "grad_norm": 832.617431640625, + "learning_rate": 1.9968442016943737e-06, + "loss": 59.6875, + "step": 5762 + }, + { + "epoch": 0.054552683143855134, + "grad_norm": 412.642822265625, + "learning_rate": 1.9968417674778855e-06, + "loss": 58.3125, + "step": 5763 + }, + { + "epoch": 0.05456214916557019, + "grad_norm": 199.42921447753906, + "learning_rate": 1.9968393323244315e-06, + "loss": 22.1719, + "step": 5764 + }, + { + "epoch": 0.05457161518728524, + "grad_norm": 3.442962884902954, + "learning_rate": 1.996836896234013e-06, + "loss": 0.9805, + "step": 5765 + }, + { + "epoch": 0.05458108120900029, + "grad_norm": 219.7270965576172, + "learning_rate": 1.9968344592066323e-06, + "loss": 24.4844, + "step": 5766 + }, + { + "epoch": 0.054590547230715346, + "grad_norm": 390.6729736328125, + "learning_rate": 1.9968320212422926e-06, + "loss": 50.2812, + "step": 5767 + }, + { + "epoch": 0.0546000132524304, + "grad_norm": 411.1804504394531, + "learning_rate": 1.996829582340995e-06, + "loss": 60.8281, + "step": 5768 + }, + { + "epoch": 0.05460947927414545, + "grad_norm": 260.2649230957031, + "learning_rate": 1.9968271425027434e-06, + "loss": 21.8203, + "step": 5769 + }, + { + "epoch": 0.05461894529586051, + "grad_norm": 432.86309814453125, + "learning_rate": 1.9968247017275384e-06, + "loss": 47.9688, + "step": 5770 + }, + { + "epoch": 0.054628411317575565, + "grad_norm": 438.3985290527344, + "learning_rate": 1.9968222600153833e-06, + "loss": 43.2734, + "step": 5771 + }, + { + "epoch": 0.05463787733929062, + "grad_norm": 794.7569580078125, + "learning_rate": 1.9968198173662802e-06, + "loss": 80.0, + "step": 5772 + }, + { + "epoch": 0.05464734336100567, + "grad_norm": 486.7784118652344, + "learning_rate": 1.996817373780231e-06, + "loss": 17.9297, + "step": 5773 + }, + { + "epoch": 0.054656809382720724, + "grad_norm": 343.8138732910156, + "learning_rate": 1.9968149292572383e-06, + "loss": 32.9375, + "step": 5774 + }, + { + "epoch": 0.05466627540443578, + "grad_norm": 3.06115460395813, + "learning_rate": 1.9968124837973047e-06, + "loss": 0.853, + "step": 5775 + }, + { + "epoch": 0.05467574142615083, + "grad_norm": 607.7181396484375, + "learning_rate": 1.9968100374004323e-06, + "loss": 51.7344, + "step": 5776 + }, + { + "epoch": 0.05468520744786588, + "grad_norm": 509.88555908203125, + "learning_rate": 1.996807590066623e-06, + "loss": 21.75, + "step": 5777 + }, + { + "epoch": 0.054694673469580936, + "grad_norm": 925.7606201171875, + "learning_rate": 1.9968051417958794e-06, + "loss": 56.5391, + "step": 5778 + }, + { + "epoch": 0.054704139491295996, + "grad_norm": 491.1330871582031, + "learning_rate": 1.9968026925882044e-06, + "loss": 55.375, + "step": 5779 + }, + { + "epoch": 0.05471360551301105, + "grad_norm": 1092.1614990234375, + "learning_rate": 1.9968002424435987e-06, + "loss": 36.2344, + "step": 5780 + }, + { + "epoch": 0.0547230715347261, + "grad_norm": 521.0477905273438, + "learning_rate": 1.9967977913620666e-06, + "loss": 50.0156, + "step": 5781 + }, + { + "epoch": 0.054732537556441155, + "grad_norm": 251.18748474121094, + "learning_rate": 1.996795339343609e-06, + "loss": 20.2969, + "step": 5782 + }, + { + "epoch": 0.05474200357815621, + "grad_norm": 267.9259338378906, + "learning_rate": 1.9967928863882286e-06, + "loss": 29.8125, + "step": 5783 + }, + { + "epoch": 0.05475146959987126, + "grad_norm": 611.448486328125, + "learning_rate": 1.9967904324959277e-06, + "loss": 51.0, + "step": 5784 + }, + { + "epoch": 0.054760935621586314, + "grad_norm": 172.14967346191406, + "learning_rate": 1.9967879776667085e-06, + "loss": 23.9844, + "step": 5785 + }, + { + "epoch": 0.05477040164330137, + "grad_norm": 397.0157165527344, + "learning_rate": 1.996785521900574e-06, + "loss": 28.5, + "step": 5786 + }, + { + "epoch": 0.05477986766501643, + "grad_norm": 280.12872314453125, + "learning_rate": 1.9967830651975255e-06, + "loss": 37.9062, + "step": 5787 + }, + { + "epoch": 0.05478933368673148, + "grad_norm": 401.8898620605469, + "learning_rate": 1.9967806075575655e-06, + "loss": 24.2656, + "step": 5788 + }, + { + "epoch": 0.05479879970844653, + "grad_norm": 815.7952270507812, + "learning_rate": 1.9967781489806973e-06, + "loss": 32.4766, + "step": 5789 + }, + { + "epoch": 0.054808265730161586, + "grad_norm": 394.7861633300781, + "learning_rate": 1.996775689466922e-06, + "loss": 25.8047, + "step": 5790 + }, + { + "epoch": 0.05481773175187664, + "grad_norm": 709.1638793945312, + "learning_rate": 1.9967732290162426e-06, + "loss": 43.9531, + "step": 5791 + }, + { + "epoch": 0.05482719777359169, + "grad_norm": 1125.298828125, + "learning_rate": 1.996770767628661e-06, + "loss": 41.7188, + "step": 5792 + }, + { + "epoch": 0.054836663795306745, + "grad_norm": 222.33689880371094, + "learning_rate": 1.99676830530418e-06, + "loss": 22.2266, + "step": 5793 + }, + { + "epoch": 0.0548461298170218, + "grad_norm": 446.6675109863281, + "learning_rate": 1.9967658420428015e-06, + "loss": 31.1562, + "step": 5794 + }, + { + "epoch": 0.05485559583873685, + "grad_norm": 594.694580078125, + "learning_rate": 1.996763377844528e-06, + "loss": 50.9219, + "step": 5795 + }, + { + "epoch": 0.05486506186045191, + "grad_norm": 430.0958251953125, + "learning_rate": 1.9967609127093615e-06, + "loss": 28.0547, + "step": 5796 + }, + { + "epoch": 0.054874527882166964, + "grad_norm": 678.9195556640625, + "learning_rate": 1.996758446637305e-06, + "loss": 48.125, + "step": 5797 + }, + { + "epoch": 0.05488399390388202, + "grad_norm": 250.9822540283203, + "learning_rate": 1.99675597962836e-06, + "loss": 27.6562, + "step": 5798 + }, + { + "epoch": 0.05489345992559707, + "grad_norm": 3.293403387069702, + "learning_rate": 1.9967535116825295e-06, + "loss": 1.0171, + "step": 5799 + }, + { + "epoch": 0.05490292594731212, + "grad_norm": 217.90586853027344, + "learning_rate": 1.9967510427998157e-06, + "loss": 28.4375, + "step": 5800 + }, + { + "epoch": 0.054912391969027176, + "grad_norm": 180.6394500732422, + "learning_rate": 1.9967485729802203e-06, + "loss": 22.0234, + "step": 5801 + }, + { + "epoch": 0.05492185799074223, + "grad_norm": 510.5787048339844, + "learning_rate": 1.9967461022237463e-06, + "loss": 26.9609, + "step": 5802 + }, + { + "epoch": 0.05493132401245728, + "grad_norm": 3.4474947452545166, + "learning_rate": 1.996743630530396e-06, + "loss": 0.8535, + "step": 5803 + }, + { + "epoch": 0.05494079003417234, + "grad_norm": 294.74273681640625, + "learning_rate": 1.996741157900171e-06, + "loss": 29.0781, + "step": 5804 + }, + { + "epoch": 0.054950256055887395, + "grad_norm": 380.8851318359375, + "learning_rate": 1.9967386843330745e-06, + "loss": 29.2812, + "step": 5805 + }, + { + "epoch": 0.05495972207760245, + "grad_norm": 3.737377882003784, + "learning_rate": 1.996736209829109e-06, + "loss": 0.8579, + "step": 5806 + }, + { + "epoch": 0.0549691880993175, + "grad_norm": 704.2554931640625, + "learning_rate": 1.9967337343882757e-06, + "loss": 31.3359, + "step": 5807 + }, + { + "epoch": 0.054978654121032554, + "grad_norm": 692.1821899414062, + "learning_rate": 1.9967312580105777e-06, + "loss": 42.2344, + "step": 5808 + }, + { + "epoch": 0.05498812014274761, + "grad_norm": 1068.155517578125, + "learning_rate": 1.996728780696017e-06, + "loss": 53.3594, + "step": 5809 + }, + { + "epoch": 0.05499758616446266, + "grad_norm": 345.7562561035156, + "learning_rate": 1.9967263024445964e-06, + "loss": 19.3984, + "step": 5810 + }, + { + "epoch": 0.05500705218617771, + "grad_norm": 946.2713623046875, + "learning_rate": 1.9967238232563177e-06, + "loss": 57.2422, + "step": 5811 + }, + { + "epoch": 0.055016518207892766, + "grad_norm": 200.9973602294922, + "learning_rate": 1.9967213431311833e-06, + "loss": 18.4609, + "step": 5812 + }, + { + "epoch": 0.055025984229607826, + "grad_norm": 894.8177490234375, + "learning_rate": 1.9967188620691953e-06, + "loss": 41.2812, + "step": 5813 + }, + { + "epoch": 0.05503545025132288, + "grad_norm": 267.4609069824219, + "learning_rate": 1.996716380070357e-06, + "loss": 18.2578, + "step": 5814 + }, + { + "epoch": 0.05504491627303793, + "grad_norm": 134.5675048828125, + "learning_rate": 1.9967138971346704e-06, + "loss": 21.1719, + "step": 5815 + }, + { + "epoch": 0.055054382294752985, + "grad_norm": 716.7022094726562, + "learning_rate": 1.9967114132621373e-06, + "loss": 32.3359, + "step": 5816 + }, + { + "epoch": 0.05506384831646804, + "grad_norm": 300.36126708984375, + "learning_rate": 1.99670892845276e-06, + "loss": 19.3203, + "step": 5817 + }, + { + "epoch": 0.05507331433818309, + "grad_norm": 219.91807556152344, + "learning_rate": 1.9967064427065417e-06, + "loss": 25.2344, + "step": 5818 + }, + { + "epoch": 0.055082780359898144, + "grad_norm": 252.606201171875, + "learning_rate": 1.996703956023484e-06, + "loss": 18.0078, + "step": 5819 + }, + { + "epoch": 0.0550922463816132, + "grad_norm": 810.482666015625, + "learning_rate": 1.996701468403589e-06, + "loss": 55.1875, + "step": 5820 + }, + { + "epoch": 0.05510171240332825, + "grad_norm": 404.83282470703125, + "learning_rate": 1.99669897984686e-06, + "loss": 60.0156, + "step": 5821 + }, + { + "epoch": 0.05511117842504331, + "grad_norm": 834.7433471679688, + "learning_rate": 1.9966964903532983e-06, + "loss": 43.7969, + "step": 5822 + }, + { + "epoch": 0.05512064444675836, + "grad_norm": 266.20184326171875, + "learning_rate": 1.996693999922907e-06, + "loss": 18.0703, + "step": 5823 + }, + { + "epoch": 0.055130110468473416, + "grad_norm": 215.77999877929688, + "learning_rate": 1.9966915085556883e-06, + "loss": 26.6719, + "step": 5824 + }, + { + "epoch": 0.05513957649018847, + "grad_norm": 760.3345336914062, + "learning_rate": 1.9966890162516444e-06, + "loss": 67.5625, + "step": 5825 + }, + { + "epoch": 0.05514904251190352, + "grad_norm": 220.17596435546875, + "learning_rate": 1.9966865230107774e-06, + "loss": 32.5469, + "step": 5826 + }, + { + "epoch": 0.055158508533618575, + "grad_norm": 490.73992919921875, + "learning_rate": 1.9966840288330903e-06, + "loss": 26.8672, + "step": 5827 + }, + { + "epoch": 0.05516797455533363, + "grad_norm": 900.350830078125, + "learning_rate": 1.9966815337185843e-06, + "loss": 24.3711, + "step": 5828 + }, + { + "epoch": 0.05517744057704868, + "grad_norm": 1273.1627197265625, + "learning_rate": 1.9966790376672633e-06, + "loss": 47.2188, + "step": 5829 + }, + { + "epoch": 0.05518690659876374, + "grad_norm": 288.6801452636719, + "learning_rate": 1.9966765406791285e-06, + "loss": 21.5156, + "step": 5830 + }, + { + "epoch": 0.055196372620478794, + "grad_norm": 246.97854614257812, + "learning_rate": 1.996674042754183e-06, + "loss": 18.7109, + "step": 5831 + }, + { + "epoch": 0.05520583864219385, + "grad_norm": 377.6014709472656, + "learning_rate": 1.996671543892428e-06, + "loss": 19.2969, + "step": 5832 + }, + { + "epoch": 0.0552153046639089, + "grad_norm": 617.341552734375, + "learning_rate": 1.996669044093867e-06, + "loss": 25.0859, + "step": 5833 + }, + { + "epoch": 0.05522477068562395, + "grad_norm": 1084.240234375, + "learning_rate": 1.9966665433585018e-06, + "loss": 49.0781, + "step": 5834 + }, + { + "epoch": 0.055234236707339006, + "grad_norm": 472.3929748535156, + "learning_rate": 1.9966640416863353e-06, + "loss": 50.4219, + "step": 5835 + }, + { + "epoch": 0.05524370272905406, + "grad_norm": 284.5583801269531, + "learning_rate": 1.996661539077369e-06, + "loss": 27.4219, + "step": 5836 + }, + { + "epoch": 0.05525316875076911, + "grad_norm": 233.41537475585938, + "learning_rate": 1.996659035531606e-06, + "loss": 19.4219, + "step": 5837 + }, + { + "epoch": 0.055262634772484165, + "grad_norm": 604.974365234375, + "learning_rate": 1.996656531049048e-06, + "loss": 80.0312, + "step": 5838 + }, + { + "epoch": 0.055272100794199225, + "grad_norm": 162.1331024169922, + "learning_rate": 1.996654025629698e-06, + "loss": 18.8359, + "step": 5839 + }, + { + "epoch": 0.05528156681591428, + "grad_norm": 485.5374450683594, + "learning_rate": 1.9966515192735576e-06, + "loss": 37.875, + "step": 5840 + }, + { + "epoch": 0.05529103283762933, + "grad_norm": 814.5239868164062, + "learning_rate": 1.9966490119806305e-06, + "loss": 31.5781, + "step": 5841 + }, + { + "epoch": 0.055300498859344384, + "grad_norm": 309.14788818359375, + "learning_rate": 1.9966465037509175e-06, + "loss": 16.9023, + "step": 5842 + }, + { + "epoch": 0.05530996488105944, + "grad_norm": 526.0236206054688, + "learning_rate": 1.9966439945844216e-06, + "loss": 50.9961, + "step": 5843 + }, + { + "epoch": 0.05531943090277449, + "grad_norm": 519.3023071289062, + "learning_rate": 1.9966414844811454e-06, + "loss": 64.3594, + "step": 5844 + }, + { + "epoch": 0.05532889692448954, + "grad_norm": 169.30276489257812, + "learning_rate": 1.9966389734410906e-06, + "loss": 26.9922, + "step": 5845 + }, + { + "epoch": 0.055338362946204596, + "grad_norm": 464.257080078125, + "learning_rate": 1.9966364614642605e-06, + "loss": 20.1406, + "step": 5846 + }, + { + "epoch": 0.055347828967919656, + "grad_norm": 859.1212158203125, + "learning_rate": 1.996633948550657e-06, + "loss": 37.0156, + "step": 5847 + }, + { + "epoch": 0.05535729498963471, + "grad_norm": 606.3424682617188, + "learning_rate": 1.9966314347002823e-06, + "loss": 46.6719, + "step": 5848 + }, + { + "epoch": 0.05536676101134976, + "grad_norm": 295.8800048828125, + "learning_rate": 1.996628919913139e-06, + "loss": 26.2266, + "step": 5849 + }, + { + "epoch": 0.055376227033064815, + "grad_norm": 315.70037841796875, + "learning_rate": 1.9966264041892286e-06, + "loss": 32.25, + "step": 5850 + }, + { + "epoch": 0.05538569305477987, + "grad_norm": 738.1337280273438, + "learning_rate": 1.996623887528555e-06, + "loss": 23.8984, + "step": 5851 + }, + { + "epoch": 0.05539515907649492, + "grad_norm": 304.07891845703125, + "learning_rate": 1.9966213699311196e-06, + "loss": 29.1406, + "step": 5852 + }, + { + "epoch": 0.055404625098209974, + "grad_norm": 550.8828735351562, + "learning_rate": 1.9966188513969247e-06, + "loss": 40.6172, + "step": 5853 + }, + { + "epoch": 0.05541409111992503, + "grad_norm": 226.49148559570312, + "learning_rate": 1.996616331925973e-06, + "loss": 26.6562, + "step": 5854 + }, + { + "epoch": 0.05542355714164008, + "grad_norm": 502.90802001953125, + "learning_rate": 1.996613811518267e-06, + "loss": 53.3125, + "step": 5855 + }, + { + "epoch": 0.05543302316335514, + "grad_norm": 176.33937072753906, + "learning_rate": 1.996611290173809e-06, + "loss": 20.7969, + "step": 5856 + }, + { + "epoch": 0.05544248918507019, + "grad_norm": 2.9278295040130615, + "learning_rate": 1.996608767892601e-06, + "loss": 0.9365, + "step": 5857 + }, + { + "epoch": 0.055451955206785246, + "grad_norm": 224.92710876464844, + "learning_rate": 1.9966062446746455e-06, + "loss": 26.9766, + "step": 5858 + }, + { + "epoch": 0.0554614212285003, + "grad_norm": 478.9147033691406, + "learning_rate": 1.996603720519945e-06, + "loss": 20.4648, + "step": 5859 + }, + { + "epoch": 0.05547088725021535, + "grad_norm": 520.0282592773438, + "learning_rate": 1.996601195428502e-06, + "loss": 45.9062, + "step": 5860 + }, + { + "epoch": 0.055480353271930405, + "grad_norm": 543.88525390625, + "learning_rate": 1.996598669400318e-06, + "loss": 38.1055, + "step": 5861 + }, + { + "epoch": 0.05548981929364546, + "grad_norm": 313.1460266113281, + "learning_rate": 1.996596142435397e-06, + "loss": 34.0469, + "step": 5862 + }, + { + "epoch": 0.05549928531536051, + "grad_norm": 301.33673095703125, + "learning_rate": 1.99659361453374e-06, + "loss": 15.0117, + "step": 5863 + }, + { + "epoch": 0.055508751337075564, + "grad_norm": 388.2138977050781, + "learning_rate": 1.9965910856953503e-06, + "loss": 31.75, + "step": 5864 + }, + { + "epoch": 0.055518217358790624, + "grad_norm": 379.0223083496094, + "learning_rate": 1.9965885559202295e-06, + "loss": 40.2812, + "step": 5865 + }, + { + "epoch": 0.05552768338050568, + "grad_norm": 287.1973571777344, + "learning_rate": 1.99658602520838e-06, + "loss": 28.2188, + "step": 5866 + }, + { + "epoch": 0.05553714940222073, + "grad_norm": 596.8421630859375, + "learning_rate": 1.9965834935598047e-06, + "loss": 37.8828, + "step": 5867 + }, + { + "epoch": 0.05554661542393578, + "grad_norm": 674.7907104492188, + "learning_rate": 1.996580960974506e-06, + "loss": 63.4531, + "step": 5868 + }, + { + "epoch": 0.055556081445650836, + "grad_norm": 334.4691467285156, + "learning_rate": 1.9965784274524857e-06, + "loss": 30.7031, + "step": 5869 + }, + { + "epoch": 0.05556554746736589, + "grad_norm": 442.9364929199219, + "learning_rate": 1.9965758929937465e-06, + "loss": 37.7188, + "step": 5870 + }, + { + "epoch": 0.05557501348908094, + "grad_norm": 192.75999450683594, + "learning_rate": 1.996573357598291e-06, + "loss": 23.2969, + "step": 5871 + }, + { + "epoch": 0.055584479510795995, + "grad_norm": 442.0125732421875, + "learning_rate": 1.9965708212661212e-06, + "loss": 43.9297, + "step": 5872 + }, + { + "epoch": 0.055593945532511055, + "grad_norm": 410.4358215332031, + "learning_rate": 1.9965682839972397e-06, + "loss": 27.6406, + "step": 5873 + }, + { + "epoch": 0.05560341155422611, + "grad_norm": 695.2293701171875, + "learning_rate": 1.996565745791649e-06, + "loss": 43.4141, + "step": 5874 + }, + { + "epoch": 0.05561287757594116, + "grad_norm": 238.68966674804688, + "learning_rate": 1.9965632066493514e-06, + "loss": 26.9375, + "step": 5875 + }, + { + "epoch": 0.055622343597656214, + "grad_norm": 504.7596435546875, + "learning_rate": 1.996560666570349e-06, + "loss": 58.4375, + "step": 5876 + }, + { + "epoch": 0.05563180961937127, + "grad_norm": 432.21221923828125, + "learning_rate": 1.996558125554644e-06, + "loss": 29.9219, + "step": 5877 + }, + { + "epoch": 0.05564127564108632, + "grad_norm": 563.1358032226562, + "learning_rate": 1.99655558360224e-06, + "loss": 58.25, + "step": 5878 + }, + { + "epoch": 0.05565074166280137, + "grad_norm": 464.103759765625, + "learning_rate": 1.996553040713138e-06, + "loss": 34.2695, + "step": 5879 + }, + { + "epoch": 0.055660207684516426, + "grad_norm": 363.8839111328125, + "learning_rate": 1.9965504968873416e-06, + "loss": 27.9141, + "step": 5880 + }, + { + "epoch": 0.05566967370623148, + "grad_norm": 710.8051147460938, + "learning_rate": 1.996547952124852e-06, + "loss": 50.7969, + "step": 5881 + }, + { + "epoch": 0.05567913972794654, + "grad_norm": 298.399169921875, + "learning_rate": 1.996545406425672e-06, + "loss": 20.1016, + "step": 5882 + }, + { + "epoch": 0.05568860574966159, + "grad_norm": 855.5828857421875, + "learning_rate": 1.9965428597898044e-06, + "loss": 39.25, + "step": 5883 + }, + { + "epoch": 0.055698071771376645, + "grad_norm": 253.22251892089844, + "learning_rate": 1.9965403122172516e-06, + "loss": 21.0469, + "step": 5884 + }, + { + "epoch": 0.0557075377930917, + "grad_norm": 249.9491424560547, + "learning_rate": 1.9965377637080155e-06, + "loss": 23.0, + "step": 5885 + }, + { + "epoch": 0.05571700381480675, + "grad_norm": 398.1078796386719, + "learning_rate": 1.996535214262099e-06, + "loss": 44.6719, + "step": 5886 + }, + { + "epoch": 0.055726469836521804, + "grad_norm": 298.50152587890625, + "learning_rate": 1.996532663879504e-06, + "loss": 33.0703, + "step": 5887 + }, + { + "epoch": 0.05573593585823686, + "grad_norm": 598.7393798828125, + "learning_rate": 1.996530112560233e-06, + "loss": 56.3594, + "step": 5888 + }, + { + "epoch": 0.05574540187995191, + "grad_norm": 623.9262084960938, + "learning_rate": 1.9965275603042884e-06, + "loss": 51.4375, + "step": 5889 + }, + { + "epoch": 0.05575486790166697, + "grad_norm": 192.5454559326172, + "learning_rate": 1.996525007111673e-06, + "loss": 24.3984, + "step": 5890 + }, + { + "epoch": 0.05576433392338202, + "grad_norm": 398.5856628417969, + "learning_rate": 1.996522452982389e-06, + "loss": 22.7734, + "step": 5891 + }, + { + "epoch": 0.055773799945097076, + "grad_norm": 519.1632690429688, + "learning_rate": 1.9965198979164388e-06, + "loss": 20.8203, + "step": 5892 + }, + { + "epoch": 0.05578326596681213, + "grad_norm": 428.55865478515625, + "learning_rate": 1.996517341913824e-06, + "loss": 27.0781, + "step": 5893 + }, + { + "epoch": 0.05579273198852718, + "grad_norm": 447.1426086425781, + "learning_rate": 1.9965147849745484e-06, + "loss": 23.9062, + "step": 5894 + }, + { + "epoch": 0.055802198010242235, + "grad_norm": 918.0801391601562, + "learning_rate": 1.9965122270986135e-06, + "loss": 64.7344, + "step": 5895 + }, + { + "epoch": 0.05581166403195729, + "grad_norm": 787.7216186523438, + "learning_rate": 1.996509668286022e-06, + "loss": 46.25, + "step": 5896 + }, + { + "epoch": 0.05582113005367234, + "grad_norm": 196.533447265625, + "learning_rate": 1.9965071085367764e-06, + "loss": 18.2969, + "step": 5897 + }, + { + "epoch": 0.055830596075387394, + "grad_norm": 452.271484375, + "learning_rate": 1.996504547850879e-06, + "loss": 29.3438, + "step": 5898 + }, + { + "epoch": 0.055840062097102454, + "grad_norm": 384.43310546875, + "learning_rate": 1.9965019862283318e-06, + "loss": 24.3125, + "step": 5899 + }, + { + "epoch": 0.05584952811881751, + "grad_norm": 281.0685729980469, + "learning_rate": 1.996499423669138e-06, + "loss": 32.6406, + "step": 5900 + }, + { + "epoch": 0.05585899414053256, + "grad_norm": 492.1231994628906, + "learning_rate": 1.996496860173299e-06, + "loss": 18.0781, + "step": 5901 + }, + { + "epoch": 0.05586846016224761, + "grad_norm": 604.3898315429688, + "learning_rate": 1.996494295740818e-06, + "loss": 48.3281, + "step": 5902 + }, + { + "epoch": 0.055877926183962666, + "grad_norm": 313.40020751953125, + "learning_rate": 1.9964917303716973e-06, + "loss": 24.0312, + "step": 5903 + }, + { + "epoch": 0.05588739220567772, + "grad_norm": 239.508056640625, + "learning_rate": 1.996489164065939e-06, + "loss": 24.0156, + "step": 5904 + }, + { + "epoch": 0.05589685822739277, + "grad_norm": 211.16099548339844, + "learning_rate": 1.996486596823546e-06, + "loss": 22.9531, + "step": 5905 + }, + { + "epoch": 0.055906324249107825, + "grad_norm": 479.8813171386719, + "learning_rate": 1.99648402864452e-06, + "loss": 21.0195, + "step": 5906 + }, + { + "epoch": 0.05591579027082288, + "grad_norm": 564.0578002929688, + "learning_rate": 1.9964814595288645e-06, + "loss": 37.0781, + "step": 5907 + }, + { + "epoch": 0.05592525629253794, + "grad_norm": 173.801513671875, + "learning_rate": 1.996478889476581e-06, + "loss": 15.5625, + "step": 5908 + }, + { + "epoch": 0.05593472231425299, + "grad_norm": 319.0535583496094, + "learning_rate": 1.9964763184876717e-06, + "loss": 42.5391, + "step": 5909 + }, + { + "epoch": 0.055944188335968044, + "grad_norm": 209.28993225097656, + "learning_rate": 1.99647374656214e-06, + "loss": 25.7812, + "step": 5910 + }, + { + "epoch": 0.0559536543576831, + "grad_norm": 357.8801574707031, + "learning_rate": 1.9964711736999874e-06, + "loss": 24.5312, + "step": 5911 + }, + { + "epoch": 0.05596312037939815, + "grad_norm": 3.008938789367676, + "learning_rate": 1.9964685999012168e-06, + "loss": 0.9849, + "step": 5912 + }, + { + "epoch": 0.0559725864011132, + "grad_norm": 311.1399841308594, + "learning_rate": 1.996466025165831e-06, + "loss": 30.5859, + "step": 5913 + }, + { + "epoch": 0.055982052422828256, + "grad_norm": 172.6392059326172, + "learning_rate": 1.9964634494938314e-06, + "loss": 25.4219, + "step": 5914 + }, + { + "epoch": 0.05599151844454331, + "grad_norm": 194.3601837158203, + "learning_rate": 1.9964608728852212e-06, + "loss": 22.0, + "step": 5915 + }, + { + "epoch": 0.05600098446625837, + "grad_norm": 313.2875671386719, + "learning_rate": 1.9964582953400027e-06, + "loss": 36.25, + "step": 5916 + }, + { + "epoch": 0.05601045048797342, + "grad_norm": 312.7739562988281, + "learning_rate": 1.9964557168581784e-06, + "loss": 20.4453, + "step": 5917 + }, + { + "epoch": 0.056019916509688475, + "grad_norm": 3.2622361183166504, + "learning_rate": 1.9964531374397503e-06, + "loss": 0.9829, + "step": 5918 + }, + { + "epoch": 0.05602938253140353, + "grad_norm": 343.8244934082031, + "learning_rate": 1.996450557084721e-06, + "loss": 32.4531, + "step": 5919 + }, + { + "epoch": 0.05603884855311858, + "grad_norm": 242.9092254638672, + "learning_rate": 1.996447975793093e-06, + "loss": 28.8516, + "step": 5920 + }, + { + "epoch": 0.056048314574833634, + "grad_norm": 321.96697998046875, + "learning_rate": 1.9964453935648688e-06, + "loss": 27.2578, + "step": 5921 + }, + { + "epoch": 0.05605778059654869, + "grad_norm": 195.5787353515625, + "learning_rate": 1.996442810400051e-06, + "loss": 19.0781, + "step": 5922 + }, + { + "epoch": 0.05606724661826374, + "grad_norm": 1015.7767944335938, + "learning_rate": 1.9964402262986415e-06, + "loss": 64.4219, + "step": 5923 + }, + { + "epoch": 0.05607671263997879, + "grad_norm": 750.4161987304688, + "learning_rate": 1.9964376412606428e-06, + "loss": 37.8594, + "step": 5924 + }, + { + "epoch": 0.05608617866169385, + "grad_norm": 1129.849853515625, + "learning_rate": 1.9964350552860577e-06, + "loss": 66.25, + "step": 5925 + }, + { + "epoch": 0.056095644683408906, + "grad_norm": 427.5364074707031, + "learning_rate": 1.996432468374889e-06, + "loss": 31.2969, + "step": 5926 + }, + { + "epoch": 0.05610511070512396, + "grad_norm": 406.7435607910156, + "learning_rate": 1.996429880527138e-06, + "loss": 26.2656, + "step": 5927 + }, + { + "epoch": 0.05611457672683901, + "grad_norm": 396.15667724609375, + "learning_rate": 1.9964272917428077e-06, + "loss": 38.8984, + "step": 5928 + }, + { + "epoch": 0.056124042748554065, + "grad_norm": 654.1126098632812, + "learning_rate": 1.996424702021901e-06, + "loss": 56.7344, + "step": 5929 + }, + { + "epoch": 0.05613350877026912, + "grad_norm": 505.5486755371094, + "learning_rate": 1.9964221113644197e-06, + "loss": 27.1016, + "step": 5930 + }, + { + "epoch": 0.05614297479198417, + "grad_norm": 242.86773681640625, + "learning_rate": 1.996419519770366e-06, + "loss": 19.3125, + "step": 5931 + }, + { + "epoch": 0.056152440813699224, + "grad_norm": 275.6515808105469, + "learning_rate": 1.9964169272397435e-06, + "loss": 29.9375, + "step": 5932 + }, + { + "epoch": 0.05616190683541428, + "grad_norm": 369.4269714355469, + "learning_rate": 1.9964143337725536e-06, + "loss": 46.4219, + "step": 5933 + }, + { + "epoch": 0.05617137285712934, + "grad_norm": 3.4647533893585205, + "learning_rate": 1.996411739368799e-06, + "loss": 0.9634, + "step": 5934 + }, + { + "epoch": 0.05618083887884439, + "grad_norm": 368.1000061035156, + "learning_rate": 1.996409144028482e-06, + "loss": 35.7344, + "step": 5935 + }, + { + "epoch": 0.05619030490055944, + "grad_norm": 242.17689514160156, + "learning_rate": 1.9964065477516056e-06, + "loss": 21.1953, + "step": 5936 + }, + { + "epoch": 0.056199770922274496, + "grad_norm": 484.1561279296875, + "learning_rate": 1.996403950538172e-06, + "loss": 26.5078, + "step": 5937 + }, + { + "epoch": 0.05620923694398955, + "grad_norm": 778.7901000976562, + "learning_rate": 1.9964013523881832e-06, + "loss": 50.3906, + "step": 5938 + }, + { + "epoch": 0.0562187029657046, + "grad_norm": 489.4440002441406, + "learning_rate": 1.9963987533016417e-06, + "loss": 34.3281, + "step": 5939 + }, + { + "epoch": 0.056228168987419655, + "grad_norm": 554.1152954101562, + "learning_rate": 1.9963961532785506e-06, + "loss": 43.0312, + "step": 5940 + }, + { + "epoch": 0.05623763500913471, + "grad_norm": 265.2211608886719, + "learning_rate": 1.9963935523189117e-06, + "loss": 21.5, + "step": 5941 + }, + { + "epoch": 0.05624710103084977, + "grad_norm": 767.5594482421875, + "learning_rate": 1.996390950422728e-06, + "loss": 50.875, + "step": 5942 + }, + { + "epoch": 0.05625656705256482, + "grad_norm": 1035.9884033203125, + "learning_rate": 1.996388347590001e-06, + "loss": 54.7969, + "step": 5943 + }, + { + "epoch": 0.056266033074279874, + "grad_norm": 476.7633361816406, + "learning_rate": 1.9963857438207344e-06, + "loss": 53.5625, + "step": 5944 + }, + { + "epoch": 0.05627549909599493, + "grad_norm": 462.16143798828125, + "learning_rate": 1.9963831391149297e-06, + "loss": 37.25, + "step": 5945 + }, + { + "epoch": 0.05628496511770998, + "grad_norm": 222.39834594726562, + "learning_rate": 1.99638053347259e-06, + "loss": 27.9453, + "step": 5946 + }, + { + "epoch": 0.05629443113942503, + "grad_norm": 270.9798889160156, + "learning_rate": 1.996377926893717e-06, + "loss": 21.6641, + "step": 5947 + }, + { + "epoch": 0.056303897161140086, + "grad_norm": 842.5490112304688, + "learning_rate": 1.996375319378314e-06, + "loss": 31.7344, + "step": 5948 + }, + { + "epoch": 0.05631336318285514, + "grad_norm": 233.71734619140625, + "learning_rate": 1.9963727109263825e-06, + "loss": 22.0664, + "step": 5949 + }, + { + "epoch": 0.05632282920457019, + "grad_norm": 501.1098327636719, + "learning_rate": 1.9963701015379256e-06, + "loss": 40.5, + "step": 5950 + }, + { + "epoch": 0.05633229522628525, + "grad_norm": 614.6891479492188, + "learning_rate": 1.996367491212946e-06, + "loss": 51.7734, + "step": 5951 + }, + { + "epoch": 0.056341761248000305, + "grad_norm": 459.8998107910156, + "learning_rate": 1.9963648799514453e-06, + "loss": 22.1719, + "step": 5952 + }, + { + "epoch": 0.05635122726971536, + "grad_norm": 452.5777282714844, + "learning_rate": 1.996362267753427e-06, + "loss": 40.875, + "step": 5953 + }, + { + "epoch": 0.05636069329143041, + "grad_norm": 495.5680847167969, + "learning_rate": 1.9963596546188923e-06, + "loss": 51.7188, + "step": 5954 + }, + { + "epoch": 0.056370159313145464, + "grad_norm": 188.32371520996094, + "learning_rate": 1.996357040547845e-06, + "loss": 25.6328, + "step": 5955 + }, + { + "epoch": 0.05637962533486052, + "grad_norm": 588.9437255859375, + "learning_rate": 1.9963544255402865e-06, + "loss": 34.5781, + "step": 5956 + }, + { + "epoch": 0.05638909135657557, + "grad_norm": 365.0994567871094, + "learning_rate": 1.9963518095962198e-06, + "loss": 20.6523, + "step": 5957 + }, + { + "epoch": 0.05639855737829062, + "grad_norm": 263.25390625, + "learning_rate": 1.996349192715647e-06, + "loss": 30.7188, + "step": 5958 + }, + { + "epoch": 0.05640802340000568, + "grad_norm": 689.253173828125, + "learning_rate": 1.996346574898571e-06, + "loss": 44.8125, + "step": 5959 + }, + { + "epoch": 0.056417489421720736, + "grad_norm": 355.2746276855469, + "learning_rate": 1.9963439561449943e-06, + "loss": 27.0156, + "step": 5960 + }, + { + "epoch": 0.05642695544343579, + "grad_norm": 1856.3203125, + "learning_rate": 1.996341336454919e-06, + "loss": 32.0703, + "step": 5961 + }, + { + "epoch": 0.05643642146515084, + "grad_norm": 400.7957458496094, + "learning_rate": 1.9963387158283468e-06, + "loss": 46.543, + "step": 5962 + }, + { + "epoch": 0.056445887486865895, + "grad_norm": 658.3197021484375, + "learning_rate": 1.996336094265282e-06, + "loss": 55.125, + "step": 5963 + }, + { + "epoch": 0.05645535350858095, + "grad_norm": 737.3098754882812, + "learning_rate": 1.9963334717657252e-06, + "loss": 53.375, + "step": 5964 + }, + { + "epoch": 0.056464819530296, + "grad_norm": 372.07244873046875, + "learning_rate": 1.9963308483296804e-06, + "loss": 26.8359, + "step": 5965 + }, + { + "epoch": 0.056474285552011054, + "grad_norm": 184.05409240722656, + "learning_rate": 1.9963282239571496e-06, + "loss": 26.8281, + "step": 5966 + }, + { + "epoch": 0.05648375157372611, + "grad_norm": 409.6349182128906, + "learning_rate": 1.9963255986481345e-06, + "loss": 46.2969, + "step": 5967 + }, + { + "epoch": 0.05649321759544117, + "grad_norm": 275.34674072265625, + "learning_rate": 1.9963229724026385e-06, + "loss": 24.0234, + "step": 5968 + }, + { + "epoch": 0.05650268361715622, + "grad_norm": 347.6881408691406, + "learning_rate": 1.9963203452206638e-06, + "loss": 20.8672, + "step": 5969 + }, + { + "epoch": 0.05651214963887127, + "grad_norm": 409.62493896484375, + "learning_rate": 1.996317717102213e-06, + "loss": 22.3594, + "step": 5970 + }, + { + "epoch": 0.056521615660586326, + "grad_norm": 191.59320068359375, + "learning_rate": 1.9963150880472877e-06, + "loss": 24.4375, + "step": 5971 + }, + { + "epoch": 0.05653108168230138, + "grad_norm": 701.7164306640625, + "learning_rate": 1.9963124580558914e-06, + "loss": 55.1719, + "step": 5972 + }, + { + "epoch": 0.05654054770401643, + "grad_norm": 233.51388549804688, + "learning_rate": 1.996309827128026e-06, + "loss": 34.1797, + "step": 5973 + }, + { + "epoch": 0.056550013725731485, + "grad_norm": 385.7482604980469, + "learning_rate": 1.9963071952636947e-06, + "loss": 34.9062, + "step": 5974 + }, + { + "epoch": 0.05655947974744654, + "grad_norm": 392.1423034667969, + "learning_rate": 1.996304562462899e-06, + "loss": 30.4375, + "step": 5975 + }, + { + "epoch": 0.05656894576916159, + "grad_norm": 472.9474792480469, + "learning_rate": 1.9963019287256418e-06, + "loss": 51.0781, + "step": 5976 + }, + { + "epoch": 0.05657841179087665, + "grad_norm": 603.6931762695312, + "learning_rate": 1.9962992940519257e-06, + "loss": 53.2812, + "step": 5977 + }, + { + "epoch": 0.056587877812591704, + "grad_norm": 472.1724853515625, + "learning_rate": 1.9962966584417533e-06, + "loss": 27.0469, + "step": 5978 + }, + { + "epoch": 0.05659734383430676, + "grad_norm": 146.69288635253906, + "learning_rate": 1.9962940218951267e-06, + "loss": 19.8047, + "step": 5979 + }, + { + "epoch": 0.05660680985602181, + "grad_norm": 229.28378295898438, + "learning_rate": 1.9962913844120484e-06, + "loss": 20.0703, + "step": 5980 + }, + { + "epoch": 0.05661627587773686, + "grad_norm": 599.288330078125, + "learning_rate": 1.996288745992521e-06, + "loss": 44.3438, + "step": 5981 + }, + { + "epoch": 0.056625741899451916, + "grad_norm": 232.11016845703125, + "learning_rate": 1.996286106636547e-06, + "loss": 22.9922, + "step": 5982 + }, + { + "epoch": 0.05663520792116697, + "grad_norm": 1020.9251098632812, + "learning_rate": 1.996283466344129e-06, + "loss": 27.0371, + "step": 5983 + }, + { + "epoch": 0.05664467394288202, + "grad_norm": 807.2850952148438, + "learning_rate": 1.9962808251152694e-06, + "loss": 68.5469, + "step": 5984 + }, + { + "epoch": 0.05665413996459708, + "grad_norm": 268.6888122558594, + "learning_rate": 1.9962781829499704e-06, + "loss": 24.8672, + "step": 5985 + }, + { + "epoch": 0.056663605986312135, + "grad_norm": 270.078857421875, + "learning_rate": 1.996275539848235e-06, + "loss": 21.7656, + "step": 5986 + }, + { + "epoch": 0.05667307200802719, + "grad_norm": 673.930419921875, + "learning_rate": 1.9962728958100653e-06, + "loss": 30.0625, + "step": 5987 + }, + { + "epoch": 0.05668253802974224, + "grad_norm": 2.5740957260131836, + "learning_rate": 1.996270250835464e-06, + "loss": 0.835, + "step": 5988 + }, + { + "epoch": 0.056692004051457294, + "grad_norm": 339.7935485839844, + "learning_rate": 1.996267604924433e-06, + "loss": 24.7188, + "step": 5989 + }, + { + "epoch": 0.05670147007317235, + "grad_norm": 441.92999267578125, + "learning_rate": 1.996264958076976e-06, + "loss": 27.375, + "step": 5990 + }, + { + "epoch": 0.0567109360948874, + "grad_norm": 1058.60888671875, + "learning_rate": 1.996262310293094e-06, + "loss": 37.0391, + "step": 5991 + }, + { + "epoch": 0.05672040211660245, + "grad_norm": 616.6779174804688, + "learning_rate": 1.9962596615727907e-06, + "loss": 53.0625, + "step": 5992 + }, + { + "epoch": 0.056729868138317506, + "grad_norm": 566.9111938476562, + "learning_rate": 1.996257011916068e-06, + "loss": 26.7656, + "step": 5993 + }, + { + "epoch": 0.056739334160032566, + "grad_norm": 441.0768737792969, + "learning_rate": 1.9962543613229285e-06, + "loss": 41.7812, + "step": 5994 + }, + { + "epoch": 0.05674880018174762, + "grad_norm": 577.2061767578125, + "learning_rate": 1.9962517097933746e-06, + "loss": 30.0859, + "step": 5995 + }, + { + "epoch": 0.05675826620346267, + "grad_norm": 252.3997039794922, + "learning_rate": 1.996249057327409e-06, + "loss": 21.9258, + "step": 5996 + }, + { + "epoch": 0.056767732225177725, + "grad_norm": 172.93797302246094, + "learning_rate": 1.9962464039250344e-06, + "loss": 24.6094, + "step": 5997 + }, + { + "epoch": 0.05677719824689278, + "grad_norm": 197.77999877929688, + "learning_rate": 1.996243749586253e-06, + "loss": 19.6406, + "step": 5998 + }, + { + "epoch": 0.05678666426860783, + "grad_norm": 1617.9158935546875, + "learning_rate": 1.9962410943110667e-06, + "loss": 40.1719, + "step": 5999 + }, + { + "epoch": 0.056796130290322884, + "grad_norm": 543.5609741210938, + "learning_rate": 1.996238438099479e-06, + "loss": 55.8672, + "step": 6000 + }, + { + "epoch": 0.05680559631203794, + "grad_norm": 662.2120361328125, + "learning_rate": 1.996235780951492e-06, + "loss": 25.1875, + "step": 6001 + }, + { + "epoch": 0.056815062333753, + "grad_norm": 344.6439514160156, + "learning_rate": 1.996233122867108e-06, + "loss": 23.3984, + "step": 6002 + }, + { + "epoch": 0.05682452835546805, + "grad_norm": 3.2073068618774414, + "learning_rate": 1.9962304638463296e-06, + "loss": 0.7883, + "step": 6003 + }, + { + "epoch": 0.0568339943771831, + "grad_norm": 736.3690185546875, + "learning_rate": 1.9962278038891595e-06, + "loss": 40.7383, + "step": 6004 + }, + { + "epoch": 0.056843460398898156, + "grad_norm": 448.8374938964844, + "learning_rate": 1.9962251429956e-06, + "loss": 39.8125, + "step": 6005 + }, + { + "epoch": 0.05685292642061321, + "grad_norm": 375.8259582519531, + "learning_rate": 1.996222481165654e-06, + "loss": 13.5664, + "step": 6006 + }, + { + "epoch": 0.05686239244232826, + "grad_norm": 480.1714172363281, + "learning_rate": 1.9962198183993232e-06, + "loss": 27.1484, + "step": 6007 + }, + { + "epoch": 0.056871858464043315, + "grad_norm": 627.6149291992188, + "learning_rate": 1.996217154696611e-06, + "loss": 45.8906, + "step": 6008 + }, + { + "epoch": 0.05688132448575837, + "grad_norm": 487.7811584472656, + "learning_rate": 1.9962144900575193e-06, + "loss": 26.3125, + "step": 6009 + }, + { + "epoch": 0.05689079050747342, + "grad_norm": 415.16302490234375, + "learning_rate": 1.996211824482051e-06, + "loss": 49.7969, + "step": 6010 + }, + { + "epoch": 0.05690025652918848, + "grad_norm": 207.4975128173828, + "learning_rate": 1.9962091579702078e-06, + "loss": 22.4375, + "step": 6011 + }, + { + "epoch": 0.056909722550903534, + "grad_norm": 832.2964477539062, + "learning_rate": 1.9962064905219935e-06, + "loss": 67.5625, + "step": 6012 + }, + { + "epoch": 0.05691918857261859, + "grad_norm": 260.93157958984375, + "learning_rate": 1.9962038221374092e-06, + "loss": 26.8438, + "step": 6013 + }, + { + "epoch": 0.05692865459433364, + "grad_norm": 646.7507934570312, + "learning_rate": 1.9962011528164584e-06, + "loss": 59.375, + "step": 6014 + }, + { + "epoch": 0.05693812061604869, + "grad_norm": 366.0468444824219, + "learning_rate": 1.9961984825591436e-06, + "loss": 22.2891, + "step": 6015 + }, + { + "epoch": 0.056947586637763746, + "grad_norm": 2.9575798511505127, + "learning_rate": 1.996195811365467e-06, + "loss": 0.8589, + "step": 6016 + }, + { + "epoch": 0.0569570526594788, + "grad_norm": 258.05963134765625, + "learning_rate": 1.996193139235431e-06, + "loss": 20.1953, + "step": 6017 + }, + { + "epoch": 0.05696651868119385, + "grad_norm": 541.1207885742188, + "learning_rate": 1.996190466169038e-06, + "loss": 33.6289, + "step": 6018 + }, + { + "epoch": 0.056975984702908905, + "grad_norm": 882.7634887695312, + "learning_rate": 1.996187792166291e-06, + "loss": 52.6719, + "step": 6019 + }, + { + "epoch": 0.056985450724623965, + "grad_norm": 739.6248779296875, + "learning_rate": 1.9961851172271926e-06, + "loss": 45.6562, + "step": 6020 + }, + { + "epoch": 0.05699491674633902, + "grad_norm": 598.9683837890625, + "learning_rate": 1.9961824413517446e-06, + "loss": 45.5312, + "step": 6021 + }, + { + "epoch": 0.05700438276805407, + "grad_norm": 316.1513366699219, + "learning_rate": 1.99617976453995e-06, + "loss": 35.1875, + "step": 6022 + }, + { + "epoch": 0.057013848789769124, + "grad_norm": 355.3245849609375, + "learning_rate": 1.996177086791811e-06, + "loss": 30.7812, + "step": 6023 + }, + { + "epoch": 0.05702331481148418, + "grad_norm": 260.68841552734375, + "learning_rate": 1.996174408107331e-06, + "loss": 24.3281, + "step": 6024 + }, + { + "epoch": 0.05703278083319923, + "grad_norm": 2.6083550453186035, + "learning_rate": 1.9961717284865116e-06, + "loss": 0.8057, + "step": 6025 + }, + { + "epoch": 0.05704224685491428, + "grad_norm": 470.07440185546875, + "learning_rate": 1.9961690479293553e-06, + "loss": 45.9062, + "step": 6026 + }, + { + "epoch": 0.057051712876629336, + "grad_norm": 410.11505126953125, + "learning_rate": 1.9961663664358647e-06, + "loss": 25.9688, + "step": 6027 + }, + { + "epoch": 0.057061178898344396, + "grad_norm": 2.7941408157348633, + "learning_rate": 1.9961636840060428e-06, + "loss": 0.814, + "step": 6028 + }, + { + "epoch": 0.05707064492005945, + "grad_norm": 445.6219482421875, + "learning_rate": 1.996161000639892e-06, + "loss": 28.4609, + "step": 6029 + }, + { + "epoch": 0.0570801109417745, + "grad_norm": 477.0326843261719, + "learning_rate": 1.9961583163374148e-06, + "loss": 37.1562, + "step": 6030 + }, + { + "epoch": 0.057089576963489555, + "grad_norm": 537.1514282226562, + "learning_rate": 1.996155631098613e-06, + "loss": 28.1797, + "step": 6031 + }, + { + "epoch": 0.05709904298520461, + "grad_norm": 545.252197265625, + "learning_rate": 1.99615294492349e-06, + "loss": 46.6094, + "step": 6032 + }, + { + "epoch": 0.05710850900691966, + "grad_norm": 274.0960693359375, + "learning_rate": 1.9961502578120483e-06, + "loss": 29.7969, + "step": 6033 + }, + { + "epoch": 0.057117975028634714, + "grad_norm": 340.5997619628906, + "learning_rate": 1.99614756976429e-06, + "loss": 19.7344, + "step": 6034 + }, + { + "epoch": 0.05712744105034977, + "grad_norm": 3.4164512157440186, + "learning_rate": 1.9961448807802177e-06, + "loss": 0.9756, + "step": 6035 + }, + { + "epoch": 0.05713690707206482, + "grad_norm": 476.407958984375, + "learning_rate": 1.996142190859834e-06, + "loss": 60.5156, + "step": 6036 + }, + { + "epoch": 0.05714637309377988, + "grad_norm": 504.6916809082031, + "learning_rate": 1.996139500003141e-06, + "loss": 53.9375, + "step": 6037 + }, + { + "epoch": 0.05715583911549493, + "grad_norm": 365.3038024902344, + "learning_rate": 1.9961368082101425e-06, + "loss": 50.2188, + "step": 6038 + }, + { + "epoch": 0.057165305137209986, + "grad_norm": 281.85589599609375, + "learning_rate": 1.99613411548084e-06, + "loss": 23.8828, + "step": 6039 + }, + { + "epoch": 0.05717477115892504, + "grad_norm": 262.01751708984375, + "learning_rate": 1.996131421815236e-06, + "loss": 26.9766, + "step": 6040 + }, + { + "epoch": 0.05718423718064009, + "grad_norm": 281.5522766113281, + "learning_rate": 1.9961287272133337e-06, + "loss": 22.8594, + "step": 6041 + }, + { + "epoch": 0.057193703202355145, + "grad_norm": 2.9328384399414062, + "learning_rate": 1.9961260316751347e-06, + "loss": 0.8245, + "step": 6042 + }, + { + "epoch": 0.0572031692240702, + "grad_norm": 424.8170166015625, + "learning_rate": 1.996123335200642e-06, + "loss": 30.2344, + "step": 6043 + }, + { + "epoch": 0.05721263524578525, + "grad_norm": 320.5901184082031, + "learning_rate": 1.9961206377898587e-06, + "loss": 29.5078, + "step": 6044 + }, + { + "epoch": 0.05722210126750031, + "grad_norm": 552.461181640625, + "learning_rate": 1.9961179394427863e-06, + "loss": 32.9297, + "step": 6045 + }, + { + "epoch": 0.057231567289215364, + "grad_norm": 246.4752655029297, + "learning_rate": 1.9961152401594277e-06, + "loss": 19.0859, + "step": 6046 + }, + { + "epoch": 0.05724103331093042, + "grad_norm": 574.8311767578125, + "learning_rate": 1.996112539939786e-06, + "loss": 57.9375, + "step": 6047 + }, + { + "epoch": 0.05725049933264547, + "grad_norm": 466.3529968261719, + "learning_rate": 1.9961098387838635e-06, + "loss": 33.7656, + "step": 6048 + }, + { + "epoch": 0.05725996535436052, + "grad_norm": 2.738769054412842, + "learning_rate": 1.996107136691662e-06, + "loss": 0.855, + "step": 6049 + }, + { + "epoch": 0.057269431376075576, + "grad_norm": 629.6502075195312, + "learning_rate": 1.996104433663185e-06, + "loss": 71.4688, + "step": 6050 + }, + { + "epoch": 0.05727889739779063, + "grad_norm": 205.0482177734375, + "learning_rate": 1.996101729698434e-06, + "loss": 25.0703, + "step": 6051 + }, + { + "epoch": 0.05728836341950568, + "grad_norm": 581.24755859375, + "learning_rate": 1.996099024797413e-06, + "loss": 52.1719, + "step": 6052 + }, + { + "epoch": 0.057297829441220735, + "grad_norm": 258.4392395019531, + "learning_rate": 1.9960963189601235e-06, + "loss": 22.8047, + "step": 6053 + }, + { + "epoch": 0.057307295462935795, + "grad_norm": 2308.922119140625, + "learning_rate": 1.9960936121865683e-06, + "loss": 30.0625, + "step": 6054 + }, + { + "epoch": 0.05731676148465085, + "grad_norm": 575.9912109375, + "learning_rate": 1.9960909044767494e-06, + "loss": 49.75, + "step": 6055 + }, + { + "epoch": 0.0573262275063659, + "grad_norm": 560.9868774414062, + "learning_rate": 1.9960881958306703e-06, + "loss": 41.3281, + "step": 6056 + }, + { + "epoch": 0.057335693528080954, + "grad_norm": 443.5106201171875, + "learning_rate": 1.996085486248333e-06, + "loss": 44.9688, + "step": 6057 + }, + { + "epoch": 0.05734515954979601, + "grad_norm": 521.419189453125, + "learning_rate": 1.99608277572974e-06, + "loss": 42.1875, + "step": 6058 + }, + { + "epoch": 0.05735462557151106, + "grad_norm": 324.8987121582031, + "learning_rate": 1.9960800642748943e-06, + "loss": 32.5625, + "step": 6059 + }, + { + "epoch": 0.05736409159322611, + "grad_norm": 309.31011962890625, + "learning_rate": 1.9960773518837975e-06, + "loss": 29.3125, + "step": 6060 + }, + { + "epoch": 0.057373557614941166, + "grad_norm": 487.1173400878906, + "learning_rate": 1.9960746385564536e-06, + "loss": 28.0391, + "step": 6061 + }, + { + "epoch": 0.05738302363665622, + "grad_norm": 816.8126831054688, + "learning_rate": 1.9960719242928642e-06, + "loss": 42.6094, + "step": 6062 + }, + { + "epoch": 0.05739248965837128, + "grad_norm": 359.6516418457031, + "learning_rate": 1.9960692090930316e-06, + "loss": 25.0312, + "step": 6063 + }, + { + "epoch": 0.05740195568008633, + "grad_norm": 820.0055541992188, + "learning_rate": 1.996066492956959e-06, + "loss": 50.5781, + "step": 6064 + }, + { + "epoch": 0.057411421701801385, + "grad_norm": 189.71844482421875, + "learning_rate": 1.9960637758846482e-06, + "loss": 19.375, + "step": 6065 + }, + { + "epoch": 0.05742088772351644, + "grad_norm": 493.0027770996094, + "learning_rate": 1.996061057876103e-06, + "loss": 28.9844, + "step": 6066 + }, + { + "epoch": 0.05743035374523149, + "grad_norm": 619.5789184570312, + "learning_rate": 1.9960583389313247e-06, + "loss": 28.3047, + "step": 6067 + }, + { + "epoch": 0.057439819766946544, + "grad_norm": 570.5950927734375, + "learning_rate": 1.9960556190503166e-06, + "loss": 32.8672, + "step": 6068 + }, + { + "epoch": 0.0574492857886616, + "grad_norm": 557.8634643554688, + "learning_rate": 1.996052898233081e-06, + "loss": 19.5078, + "step": 6069 + }, + { + "epoch": 0.05745875181037665, + "grad_norm": 350.1029052734375, + "learning_rate": 1.99605017647962e-06, + "loss": 43.8125, + "step": 6070 + }, + { + "epoch": 0.05746821783209171, + "grad_norm": 265.56988525390625, + "learning_rate": 1.9960474537899375e-06, + "loss": 23.25, + "step": 6071 + }, + { + "epoch": 0.05747768385380676, + "grad_norm": 475.6175537109375, + "learning_rate": 1.9960447301640346e-06, + "loss": 69.1406, + "step": 6072 + }, + { + "epoch": 0.057487149875521816, + "grad_norm": 233.93174743652344, + "learning_rate": 1.9960420056019146e-06, + "loss": 24.2266, + "step": 6073 + }, + { + "epoch": 0.05749661589723687, + "grad_norm": 343.20147705078125, + "learning_rate": 1.99603928010358e-06, + "loss": 26.9531, + "step": 6074 + }, + { + "epoch": 0.05750608191895192, + "grad_norm": 795.4180908203125, + "learning_rate": 1.9960365536690327e-06, + "loss": 35.2734, + "step": 6075 + }, + { + "epoch": 0.057515547940666975, + "grad_norm": 472.4461364746094, + "learning_rate": 1.9960338262982764e-06, + "loss": 27.3203, + "step": 6076 + }, + { + "epoch": 0.05752501396238203, + "grad_norm": 484.9478759765625, + "learning_rate": 1.9960310979913132e-06, + "loss": 21.5078, + "step": 6077 + }, + { + "epoch": 0.05753447998409708, + "grad_norm": 149.27859497070312, + "learning_rate": 1.9960283687481453e-06, + "loss": 20.5312, + "step": 6078 + }, + { + "epoch": 0.057543946005812134, + "grad_norm": 622.5048828125, + "learning_rate": 1.9960256385687755e-06, + "loss": 24.6211, + "step": 6079 + }, + { + "epoch": 0.057553412027527194, + "grad_norm": 340.9183044433594, + "learning_rate": 1.9960229074532065e-06, + "loss": 25.0312, + "step": 6080 + }, + { + "epoch": 0.05756287804924225, + "grad_norm": 494.12725830078125, + "learning_rate": 1.996020175401441e-06, + "loss": 41.375, + "step": 6081 + }, + { + "epoch": 0.0575723440709573, + "grad_norm": 733.7509155273438, + "learning_rate": 1.996017442413481e-06, + "loss": 26.6406, + "step": 6082 + }, + { + "epoch": 0.05758181009267235, + "grad_norm": 179.0654296875, + "learning_rate": 1.9960147084893297e-06, + "loss": 20.3203, + "step": 6083 + }, + { + "epoch": 0.057591276114387406, + "grad_norm": 421.48834228515625, + "learning_rate": 1.996011973628989e-06, + "loss": 29.9844, + "step": 6084 + }, + { + "epoch": 0.05760074213610246, + "grad_norm": 612.7059326171875, + "learning_rate": 1.996009237832462e-06, + "loss": 30.9922, + "step": 6085 + }, + { + "epoch": 0.05761020815781751, + "grad_norm": 294.0478515625, + "learning_rate": 1.996006501099751e-06, + "loss": 19.2891, + "step": 6086 + }, + { + "epoch": 0.057619674179532565, + "grad_norm": 296.74542236328125, + "learning_rate": 1.996003763430859e-06, + "loss": 25.2734, + "step": 6087 + }, + { + "epoch": 0.057629140201247625, + "grad_norm": 305.5855407714844, + "learning_rate": 1.9960010248257876e-06, + "loss": 16.8672, + "step": 6088 + }, + { + "epoch": 0.05763860622296268, + "grad_norm": 445.32537841796875, + "learning_rate": 1.995998285284541e-06, + "loss": 53.625, + "step": 6089 + }, + { + "epoch": 0.05764807224467773, + "grad_norm": 356.7520446777344, + "learning_rate": 1.99599554480712e-06, + "loss": 23.1641, + "step": 6090 + }, + { + "epoch": 0.057657538266392784, + "grad_norm": 463.4225158691406, + "learning_rate": 1.9959928033935284e-06, + "loss": 30.7188, + "step": 6091 + }, + { + "epoch": 0.05766700428810784, + "grad_norm": 884.9656372070312, + "learning_rate": 1.995990061043768e-06, + "loss": 36.2266, + "step": 6092 + }, + { + "epoch": 0.05767647030982289, + "grad_norm": 436.34881591796875, + "learning_rate": 1.995987317757842e-06, + "loss": 43.2031, + "step": 6093 + }, + { + "epoch": 0.05768593633153794, + "grad_norm": 437.49847412109375, + "learning_rate": 1.9959845735357524e-06, + "loss": 32.5, + "step": 6094 + }, + { + "epoch": 0.057695402353252996, + "grad_norm": 1481.4610595703125, + "learning_rate": 1.9959818283775024e-06, + "loss": 30.6875, + "step": 6095 + }, + { + "epoch": 0.05770486837496805, + "grad_norm": 371.42626953125, + "learning_rate": 1.9959790822830943e-06, + "loss": 27.7188, + "step": 6096 + }, + { + "epoch": 0.05771433439668311, + "grad_norm": 496.5184326171875, + "learning_rate": 1.995976335252531e-06, + "loss": 44.7188, + "step": 6097 + }, + { + "epoch": 0.05772380041839816, + "grad_norm": 601.0010375976562, + "learning_rate": 1.995973587285814e-06, + "loss": 42.2969, + "step": 6098 + }, + { + "epoch": 0.057733266440113215, + "grad_norm": 206.8031463623047, + "learning_rate": 1.995970838382947e-06, + "loss": 30.9141, + "step": 6099 + }, + { + "epoch": 0.05774273246182827, + "grad_norm": 451.1141357421875, + "learning_rate": 1.9959680885439323e-06, + "loss": 21.1797, + "step": 6100 + }, + { + "epoch": 0.05775219848354332, + "grad_norm": 214.62611389160156, + "learning_rate": 1.995965337768772e-06, + "loss": 18.0547, + "step": 6101 + }, + { + "epoch": 0.057761664505258374, + "grad_norm": 253.42626953125, + "learning_rate": 1.9959625860574695e-06, + "loss": 32.4219, + "step": 6102 + }, + { + "epoch": 0.05777113052697343, + "grad_norm": 422.7759704589844, + "learning_rate": 1.9959598334100265e-06, + "loss": 22.6328, + "step": 6103 + }, + { + "epoch": 0.05778059654868848, + "grad_norm": 319.71099853515625, + "learning_rate": 1.9959570798264465e-06, + "loss": 24.125, + "step": 6104 + }, + { + "epoch": 0.05779006257040353, + "grad_norm": 466.8223571777344, + "learning_rate": 1.9959543253067316e-06, + "loss": 21.2617, + "step": 6105 + }, + { + "epoch": 0.05779952859211859, + "grad_norm": 437.90771484375, + "learning_rate": 1.9959515698508844e-06, + "loss": 45.4453, + "step": 6106 + }, + { + "epoch": 0.057808994613833646, + "grad_norm": 734.5560913085938, + "learning_rate": 1.995948813458907e-06, + "loss": 75.7812, + "step": 6107 + }, + { + "epoch": 0.0578184606355487, + "grad_norm": 521.664794921875, + "learning_rate": 1.995946056130803e-06, + "loss": 55.0312, + "step": 6108 + }, + { + "epoch": 0.05782792665726375, + "grad_norm": 543.9450073242188, + "learning_rate": 1.9959432978665745e-06, + "loss": 40.8281, + "step": 6109 + }, + { + "epoch": 0.057837392678978805, + "grad_norm": 294.8009338378906, + "learning_rate": 1.995940538666224e-06, + "loss": 14.8398, + "step": 6110 + }, + { + "epoch": 0.05784685870069386, + "grad_norm": 629.453369140625, + "learning_rate": 1.995937778529754e-06, + "loss": 46.6484, + "step": 6111 + }, + { + "epoch": 0.05785632472240891, + "grad_norm": 564.6889038085938, + "learning_rate": 1.9959350174571675e-06, + "loss": 54.5625, + "step": 6112 + }, + { + "epoch": 0.057865790744123964, + "grad_norm": 1076.77197265625, + "learning_rate": 1.995932255448467e-06, + "loss": 66.2188, + "step": 6113 + }, + { + "epoch": 0.057875256765839024, + "grad_norm": 152.33499145507812, + "learning_rate": 1.9959294925036548e-06, + "loss": 22.8438, + "step": 6114 + }, + { + "epoch": 0.05788472278755408, + "grad_norm": 187.46165466308594, + "learning_rate": 1.9959267286227333e-06, + "loss": 20.25, + "step": 6115 + }, + { + "epoch": 0.05789418880926913, + "grad_norm": 506.64862060546875, + "learning_rate": 1.9959239638057062e-06, + "loss": 16.2852, + "step": 6116 + }, + { + "epoch": 0.05790365483098418, + "grad_norm": 597.6337280273438, + "learning_rate": 1.995921198052575e-06, + "loss": 51.2422, + "step": 6117 + }, + { + "epoch": 0.057913120852699236, + "grad_norm": 452.3586730957031, + "learning_rate": 1.9959184313633427e-06, + "loss": 36.3125, + "step": 6118 + }, + { + "epoch": 0.05792258687441429, + "grad_norm": 250.21087646484375, + "learning_rate": 1.9959156637380117e-06, + "loss": 22.5156, + "step": 6119 + }, + { + "epoch": 0.05793205289612934, + "grad_norm": 619.9223022460938, + "learning_rate": 1.995912895176585e-06, + "loss": 47.6719, + "step": 6120 + }, + { + "epoch": 0.057941518917844395, + "grad_norm": 155.6558380126953, + "learning_rate": 1.9959101256790646e-06, + "loss": 26.4531, + "step": 6121 + }, + { + "epoch": 0.05795098493955945, + "grad_norm": 351.5205383300781, + "learning_rate": 1.995907355245454e-06, + "loss": 47.8594, + "step": 6122 + }, + { + "epoch": 0.05796045096127451, + "grad_norm": 939.7988891601562, + "learning_rate": 1.995904583875755e-06, + "loss": 25.0977, + "step": 6123 + }, + { + "epoch": 0.05796991698298956, + "grad_norm": 462.1754455566406, + "learning_rate": 1.9959018115699703e-06, + "loss": 32.9766, + "step": 6124 + }, + { + "epoch": 0.057979383004704614, + "grad_norm": 3.2234838008880615, + "learning_rate": 1.995899038328103e-06, + "loss": 0.8818, + "step": 6125 + }, + { + "epoch": 0.05798884902641967, + "grad_norm": 368.183837890625, + "learning_rate": 1.995896264150155e-06, + "loss": 27.5, + "step": 6126 + }, + { + "epoch": 0.05799831504813472, + "grad_norm": 179.4762420654297, + "learning_rate": 1.99589348903613e-06, + "loss": 18.8984, + "step": 6127 + }, + { + "epoch": 0.05800778106984977, + "grad_norm": 465.8351745605469, + "learning_rate": 1.9958907129860294e-06, + "loss": 44.625, + "step": 6128 + }, + { + "epoch": 0.058017247091564826, + "grad_norm": 410.76513671875, + "learning_rate": 1.995887935999856e-06, + "loss": 36.0781, + "step": 6129 + }, + { + "epoch": 0.05802671311327988, + "grad_norm": 502.5469970703125, + "learning_rate": 1.995885158077613e-06, + "loss": 41.0938, + "step": 6130 + }, + { + "epoch": 0.05803617913499494, + "grad_norm": 701.6943969726562, + "learning_rate": 1.995882379219303e-06, + "loss": 33.6484, + "step": 6131 + }, + { + "epoch": 0.05804564515670999, + "grad_norm": 300.2444152832031, + "learning_rate": 1.995879599424928e-06, + "loss": 28.6094, + "step": 6132 + }, + { + "epoch": 0.058055111178425045, + "grad_norm": 454.6190490722656, + "learning_rate": 1.9958768186944914e-06, + "loss": 19.6562, + "step": 6133 + }, + { + "epoch": 0.0580645772001401, + "grad_norm": 2.9063475131988525, + "learning_rate": 1.9958740370279953e-06, + "loss": 0.9331, + "step": 6134 + }, + { + "epoch": 0.05807404322185515, + "grad_norm": 432.82568359375, + "learning_rate": 1.995871254425442e-06, + "loss": 43.2812, + "step": 6135 + }, + { + "epoch": 0.058083509243570204, + "grad_norm": 327.11181640625, + "learning_rate": 1.995868470886835e-06, + "loss": 29.0781, + "step": 6136 + }, + { + "epoch": 0.05809297526528526, + "grad_norm": 319.7499084472656, + "learning_rate": 1.995865686412176e-06, + "loss": 26.7656, + "step": 6137 + }, + { + "epoch": 0.05810244128700031, + "grad_norm": 492.23907470703125, + "learning_rate": 1.995862901001468e-06, + "loss": 33.9961, + "step": 6138 + }, + { + "epoch": 0.05811190730871536, + "grad_norm": 265.81634521484375, + "learning_rate": 1.995860114654714e-06, + "loss": 19.4375, + "step": 6139 + }, + { + "epoch": 0.05812137333043042, + "grad_norm": 260.2234191894531, + "learning_rate": 1.995857327371916e-06, + "loss": 26.6484, + "step": 6140 + }, + { + "epoch": 0.058130839352145476, + "grad_norm": 330.59149169921875, + "learning_rate": 1.9958545391530772e-06, + "loss": 49.7344, + "step": 6141 + }, + { + "epoch": 0.05814030537386053, + "grad_norm": 163.14785766601562, + "learning_rate": 1.9958517499981995e-06, + "loss": 22.8594, + "step": 6142 + }, + { + "epoch": 0.05814977139557558, + "grad_norm": 200.0049591064453, + "learning_rate": 1.9958489599072865e-06, + "loss": 24.4688, + "step": 6143 + }, + { + "epoch": 0.058159237417290635, + "grad_norm": 704.4409790039062, + "learning_rate": 1.9958461688803398e-06, + "loss": 23.1094, + "step": 6144 + }, + { + "epoch": 0.05816870343900569, + "grad_norm": 238.40333557128906, + "learning_rate": 1.9958433769173628e-06, + "loss": 18.1875, + "step": 6145 + }, + { + "epoch": 0.05817816946072074, + "grad_norm": 2.998922348022461, + "learning_rate": 1.9958405840183576e-06, + "loss": 0.8958, + "step": 6146 + }, + { + "epoch": 0.058187635482435794, + "grad_norm": 382.3394775390625, + "learning_rate": 1.995837790183327e-06, + "loss": 22.4609, + "step": 6147 + }, + { + "epoch": 0.05819710150415085, + "grad_norm": 566.8423461914062, + "learning_rate": 1.995834995412274e-06, + "loss": 37.3867, + "step": 6148 + }, + { + "epoch": 0.05820656752586591, + "grad_norm": 490.437744140625, + "learning_rate": 1.9958321997052003e-06, + "loss": 25.5625, + "step": 6149 + }, + { + "epoch": 0.05821603354758096, + "grad_norm": 419.26678466796875, + "learning_rate": 1.9958294030621096e-06, + "loss": 15.8555, + "step": 6150 + }, + { + "epoch": 0.05822549956929601, + "grad_norm": 765.2266845703125, + "learning_rate": 1.995826605483004e-06, + "loss": 35.7656, + "step": 6151 + }, + { + "epoch": 0.058234965591011066, + "grad_norm": 576.5387573242188, + "learning_rate": 1.9958238069678857e-06, + "loss": 39.8203, + "step": 6152 + }, + { + "epoch": 0.05824443161272612, + "grad_norm": 599.8582763671875, + "learning_rate": 1.9958210075167584e-06, + "loss": 33.5781, + "step": 6153 + }, + { + "epoch": 0.05825389763444117, + "grad_norm": 308.6522216796875, + "learning_rate": 1.995818207129624e-06, + "loss": 25.2578, + "step": 6154 + }, + { + "epoch": 0.058263363656156225, + "grad_norm": 718.4147338867188, + "learning_rate": 1.995815405806485e-06, + "loss": 48.1875, + "step": 6155 + }, + { + "epoch": 0.05827282967787128, + "grad_norm": 178.73890686035156, + "learning_rate": 1.9958126035473443e-06, + "loss": 26.3203, + "step": 6156 + }, + { + "epoch": 0.05828229569958634, + "grad_norm": 295.66571044921875, + "learning_rate": 1.9958098003522048e-06, + "loss": 42.0781, + "step": 6157 + }, + { + "epoch": 0.05829176172130139, + "grad_norm": 385.95257568359375, + "learning_rate": 1.995806996221069e-06, + "loss": 43.5625, + "step": 6158 + }, + { + "epoch": 0.058301227743016444, + "grad_norm": 1058.294677734375, + "learning_rate": 1.995804191153939e-06, + "loss": 66.7031, + "step": 6159 + }, + { + "epoch": 0.0583106937647315, + "grad_norm": 411.1528625488281, + "learning_rate": 1.995801385150818e-06, + "loss": 24.8906, + "step": 6160 + }, + { + "epoch": 0.05832015978644655, + "grad_norm": 534.4652099609375, + "learning_rate": 1.9957985782117084e-06, + "loss": 31.625, + "step": 6161 + }, + { + "epoch": 0.0583296258081616, + "grad_norm": 444.7084045410156, + "learning_rate": 1.995795770336613e-06, + "loss": 24.3047, + "step": 6162 + }, + { + "epoch": 0.058339091829876656, + "grad_norm": 195.21707153320312, + "learning_rate": 1.9957929615255346e-06, + "loss": 20.6797, + "step": 6163 + }, + { + "epoch": 0.05834855785159171, + "grad_norm": 1075.02001953125, + "learning_rate": 1.9957901517784753e-06, + "loss": 45.4141, + "step": 6164 + }, + { + "epoch": 0.05835802387330676, + "grad_norm": 3.8759095668792725, + "learning_rate": 1.995787341095438e-06, + "loss": 0.9312, + "step": 6165 + }, + { + "epoch": 0.05836748989502182, + "grad_norm": 313.6600036621094, + "learning_rate": 1.9957845294764257e-06, + "loss": 27.8594, + "step": 6166 + }, + { + "epoch": 0.058376955916736875, + "grad_norm": 686.0399780273438, + "learning_rate": 1.9957817169214403e-06, + "loss": 53.2188, + "step": 6167 + }, + { + "epoch": 0.05838642193845193, + "grad_norm": 608.9328002929688, + "learning_rate": 1.995778903430485e-06, + "loss": 20.7031, + "step": 6168 + }, + { + "epoch": 0.05839588796016698, + "grad_norm": 3.335322141647339, + "learning_rate": 1.9957760890035626e-06, + "loss": 0.9502, + "step": 6169 + }, + { + "epoch": 0.058405353981882034, + "grad_norm": 413.2901916503906, + "learning_rate": 1.9957732736406753e-06, + "loss": 26.7656, + "step": 6170 + }, + { + "epoch": 0.05841482000359709, + "grad_norm": 437.28759765625, + "learning_rate": 1.9957704573418255e-06, + "loss": 41.5625, + "step": 6171 + }, + { + "epoch": 0.05842428602531214, + "grad_norm": 237.4875030517578, + "learning_rate": 1.9957676401070166e-06, + "loss": 22.9688, + "step": 6172 + }, + { + "epoch": 0.05843375204702719, + "grad_norm": 1336.7020263671875, + "learning_rate": 1.9957648219362507e-06, + "loss": 85.8281, + "step": 6173 + }, + { + "epoch": 0.05844321806874225, + "grad_norm": 490.8914794921875, + "learning_rate": 1.995762002829531e-06, + "loss": 43.1406, + "step": 6174 + }, + { + "epoch": 0.058452684090457306, + "grad_norm": 266.9607849121094, + "learning_rate": 1.9957591827868597e-06, + "loss": 39.2812, + "step": 6175 + }, + { + "epoch": 0.05846215011217236, + "grad_norm": 936.501220703125, + "learning_rate": 1.9957563618082393e-06, + "loss": 36.0703, + "step": 6176 + }, + { + "epoch": 0.05847161613388741, + "grad_norm": 1127.9208984375, + "learning_rate": 1.995753539893673e-06, + "loss": 47.7969, + "step": 6177 + }, + { + "epoch": 0.058481082155602465, + "grad_norm": 3.464064836502075, + "learning_rate": 1.995750717043163e-06, + "loss": 0.8115, + "step": 6178 + }, + { + "epoch": 0.05849054817731752, + "grad_norm": 433.02545166015625, + "learning_rate": 1.995747893256712e-06, + "loss": 15.0234, + "step": 6179 + }, + { + "epoch": 0.05850001419903257, + "grad_norm": 3.4729361534118652, + "learning_rate": 1.995745068534323e-06, + "loss": 0.9966, + "step": 6180 + }, + { + "epoch": 0.058509480220747624, + "grad_norm": 308.822509765625, + "learning_rate": 1.9957422428759984e-06, + "loss": 35.5625, + "step": 6181 + }, + { + "epoch": 0.05851894624246268, + "grad_norm": 670.1209106445312, + "learning_rate": 1.9957394162817406e-06, + "loss": 36.2344, + "step": 6182 + }, + { + "epoch": 0.05852841226417774, + "grad_norm": 550.752685546875, + "learning_rate": 1.995736588751553e-06, + "loss": 43.6562, + "step": 6183 + }, + { + "epoch": 0.05853787828589279, + "grad_norm": 175.62399291992188, + "learning_rate": 1.9957337602854373e-06, + "loss": 25.3672, + "step": 6184 + }, + { + "epoch": 0.05854734430760784, + "grad_norm": 293.2277526855469, + "learning_rate": 1.995730930883397e-06, + "loss": 20.8047, + "step": 6185 + }, + { + "epoch": 0.058556810329322896, + "grad_norm": 342.6553955078125, + "learning_rate": 1.9957281005454342e-06, + "loss": 19.4219, + "step": 6186 + }, + { + "epoch": 0.05856627635103795, + "grad_norm": 273.6746520996094, + "learning_rate": 1.9957252692715514e-06, + "loss": 18.6758, + "step": 6187 + }, + { + "epoch": 0.058575742372753, + "grad_norm": 265.65087890625, + "learning_rate": 1.9957224370617523e-06, + "loss": 23.5469, + "step": 6188 + }, + { + "epoch": 0.058585208394468055, + "grad_norm": 206.0570068359375, + "learning_rate": 1.9957196039160385e-06, + "loss": 19.6719, + "step": 6189 + }, + { + "epoch": 0.05859467441618311, + "grad_norm": 942.96484375, + "learning_rate": 1.9957167698344134e-06, + "loss": 54.6328, + "step": 6190 + }, + { + "epoch": 0.05860414043789816, + "grad_norm": 253.86111450195312, + "learning_rate": 1.9957139348168787e-06, + "loss": 22.5234, + "step": 6191 + }, + { + "epoch": 0.05861360645961322, + "grad_norm": 566.4148559570312, + "learning_rate": 1.9957110988634383e-06, + "loss": 55.9062, + "step": 6192 + }, + { + "epoch": 0.058623072481328274, + "grad_norm": 555.9630737304688, + "learning_rate": 1.995708261974094e-06, + "loss": 36.6719, + "step": 6193 + }, + { + "epoch": 0.05863253850304333, + "grad_norm": 479.6463623046875, + "learning_rate": 1.9957054241488487e-06, + "loss": 16.3438, + "step": 6194 + }, + { + "epoch": 0.05864200452475838, + "grad_norm": 3.297760009765625, + "learning_rate": 1.995702585387705e-06, + "loss": 0.9038, + "step": 6195 + }, + { + "epoch": 0.05865147054647343, + "grad_norm": 154.11647033691406, + "learning_rate": 1.9956997456906656e-06, + "loss": 23.1797, + "step": 6196 + }, + { + "epoch": 0.058660936568188486, + "grad_norm": 397.64306640625, + "learning_rate": 1.9956969050577332e-06, + "loss": 23.3672, + "step": 6197 + }, + { + "epoch": 0.05867040258990354, + "grad_norm": 281.5032653808594, + "learning_rate": 1.9956940634889108e-06, + "loss": 23.3789, + "step": 6198 + }, + { + "epoch": 0.05867986861161859, + "grad_norm": 1884.361572265625, + "learning_rate": 1.9956912209842003e-06, + "loss": 73.7812, + "step": 6199 + }, + { + "epoch": 0.05868933463333365, + "grad_norm": 429.50872802734375, + "learning_rate": 1.995688377543605e-06, + "loss": 49.5625, + "step": 6200 + }, + { + "epoch": 0.058698800655048705, + "grad_norm": 187.98019409179688, + "learning_rate": 1.995685533167128e-06, + "loss": 18.0469, + "step": 6201 + }, + { + "epoch": 0.05870826667676376, + "grad_norm": 578.8253784179688, + "learning_rate": 1.9956826878547704e-06, + "loss": 48.4375, + "step": 6202 + }, + { + "epoch": 0.05871773269847881, + "grad_norm": 299.3757629394531, + "learning_rate": 1.9956798416065364e-06, + "loss": 26.7969, + "step": 6203 + }, + { + "epoch": 0.058727198720193864, + "grad_norm": 368.16339111328125, + "learning_rate": 1.995676994422428e-06, + "loss": 31.7891, + "step": 6204 + }, + { + "epoch": 0.05873666474190892, + "grad_norm": 460.57525634765625, + "learning_rate": 1.995674146302448e-06, + "loss": 30.8047, + "step": 6205 + }, + { + "epoch": 0.05874613076362397, + "grad_norm": 531.7388916015625, + "learning_rate": 1.995671297246599e-06, + "loss": 34.0, + "step": 6206 + }, + { + "epoch": 0.05875559678533902, + "grad_norm": 311.6280517578125, + "learning_rate": 1.995668447254884e-06, + "loss": 22.1562, + "step": 6207 + }, + { + "epoch": 0.058765062807054076, + "grad_norm": 473.8367004394531, + "learning_rate": 1.9956655963273053e-06, + "loss": 53.8594, + "step": 6208 + }, + { + "epoch": 0.058774528828769136, + "grad_norm": 231.59571838378906, + "learning_rate": 1.9956627444638655e-06, + "loss": 19.8438, + "step": 6209 + }, + { + "epoch": 0.05878399485048419, + "grad_norm": 377.1076965332031, + "learning_rate": 1.9956598916645677e-06, + "loss": 42.5312, + "step": 6210 + }, + { + "epoch": 0.05879346087219924, + "grad_norm": 637.999755859375, + "learning_rate": 1.9956570379294142e-06, + "loss": 17.6016, + "step": 6211 + }, + { + "epoch": 0.058802926893914295, + "grad_norm": 298.9466857910156, + "learning_rate": 1.9956541832584083e-06, + "loss": 30.6016, + "step": 6212 + }, + { + "epoch": 0.05881239291562935, + "grad_norm": 537.0502319335938, + "learning_rate": 1.9956513276515517e-06, + "loss": 23.9531, + "step": 6213 + }, + { + "epoch": 0.0588218589373444, + "grad_norm": 2.71209454536438, + "learning_rate": 1.9956484711088477e-06, + "loss": 0.8457, + "step": 6214 + }, + { + "epoch": 0.058831324959059454, + "grad_norm": 291.3915100097656, + "learning_rate": 1.9956456136302994e-06, + "loss": 19.0, + "step": 6215 + }, + { + "epoch": 0.05884079098077451, + "grad_norm": 567.547607421875, + "learning_rate": 1.995642755215909e-06, + "loss": 23.2891, + "step": 6216 + }, + { + "epoch": 0.05885025700248957, + "grad_norm": 2.9538023471832275, + "learning_rate": 1.9956398958656787e-06, + "loss": 0.6895, + "step": 6217 + }, + { + "epoch": 0.05885972302420462, + "grad_norm": 695.7349243164062, + "learning_rate": 1.9956370355796117e-06, + "loss": 32.5312, + "step": 6218 + }, + { + "epoch": 0.05886918904591967, + "grad_norm": 229.80381774902344, + "learning_rate": 1.9956341743577106e-06, + "loss": 30.2812, + "step": 6219 + }, + { + "epoch": 0.058878655067634726, + "grad_norm": 359.1683654785156, + "learning_rate": 1.9956313121999786e-06, + "loss": 25.2656, + "step": 6220 + }, + { + "epoch": 0.05888812108934978, + "grad_norm": 334.2339782714844, + "learning_rate": 1.9956284491064176e-06, + "loss": 24.0234, + "step": 6221 + }, + { + "epoch": 0.05889758711106483, + "grad_norm": 436.1961669921875, + "learning_rate": 1.995625585077031e-06, + "loss": 44.0781, + "step": 6222 + }, + { + "epoch": 0.058907053132779885, + "grad_norm": 316.7265930175781, + "learning_rate": 1.9956227201118205e-06, + "loss": 19.1016, + "step": 6223 + }, + { + "epoch": 0.05891651915449494, + "grad_norm": 518.1422729492188, + "learning_rate": 1.9956198542107895e-06, + "loss": 19.7891, + "step": 6224 + }, + { + "epoch": 0.05892598517620999, + "grad_norm": 436.12835693359375, + "learning_rate": 1.9956169873739412e-06, + "loss": 24.8438, + "step": 6225 + }, + { + "epoch": 0.05893545119792505, + "grad_norm": 300.46630859375, + "learning_rate": 1.995614119601277e-06, + "loss": 19.7734, + "step": 6226 + }, + { + "epoch": 0.058944917219640104, + "grad_norm": 263.4805908203125, + "learning_rate": 1.995611250892801e-06, + "loss": 31.4141, + "step": 6227 + }, + { + "epoch": 0.05895438324135516, + "grad_norm": 937.9735107421875, + "learning_rate": 1.9956083812485146e-06, + "loss": 37.6328, + "step": 6228 + }, + { + "epoch": 0.05896384926307021, + "grad_norm": 626.0648193359375, + "learning_rate": 1.995605510668421e-06, + "loss": 28.375, + "step": 6229 + }, + { + "epoch": 0.05897331528478526, + "grad_norm": 461.75750732421875, + "learning_rate": 1.9956026391525235e-06, + "loss": 45.9141, + "step": 6230 + }, + { + "epoch": 0.058982781306500316, + "grad_norm": 3.1103546619415283, + "learning_rate": 1.9955997667008243e-06, + "loss": 0.9331, + "step": 6231 + }, + { + "epoch": 0.05899224732821537, + "grad_norm": 460.9446716308594, + "learning_rate": 1.995596893313326e-06, + "loss": 46.8906, + "step": 6232 + }, + { + "epoch": 0.05900171334993042, + "grad_norm": 238.0462188720703, + "learning_rate": 1.9955940189900313e-06, + "loss": 28.0625, + "step": 6233 + }, + { + "epoch": 0.059011179371645475, + "grad_norm": 363.671142578125, + "learning_rate": 1.9955911437309426e-06, + "loss": 49.0312, + "step": 6234 + }, + { + "epoch": 0.059020645393360535, + "grad_norm": 797.9710693359375, + "learning_rate": 1.9955882675360638e-06, + "loss": 28.4844, + "step": 6235 + }, + { + "epoch": 0.05903011141507559, + "grad_norm": 361.6568908691406, + "learning_rate": 1.995585390405396e-06, + "loss": 58.375, + "step": 6236 + }, + { + "epoch": 0.05903957743679064, + "grad_norm": 883.0831909179688, + "learning_rate": 1.995582512338943e-06, + "loss": 72.3438, + "step": 6237 + }, + { + "epoch": 0.059049043458505694, + "grad_norm": 353.953125, + "learning_rate": 1.995579633336707e-06, + "loss": 52.4688, + "step": 6238 + }, + { + "epoch": 0.05905850948022075, + "grad_norm": 293.3305358886719, + "learning_rate": 1.9955767533986915e-06, + "loss": 28.9062, + "step": 6239 + }, + { + "epoch": 0.0590679755019358, + "grad_norm": 435.7214660644531, + "learning_rate": 1.9955738725248984e-06, + "loss": 17.8828, + "step": 6240 + }, + { + "epoch": 0.05907744152365085, + "grad_norm": 193.99864196777344, + "learning_rate": 1.9955709907153303e-06, + "loss": 21.6484, + "step": 6241 + }, + { + "epoch": 0.059086907545365906, + "grad_norm": 244.4300079345703, + "learning_rate": 1.9955681079699904e-06, + "loss": 30.7969, + "step": 6242 + }, + { + "epoch": 0.059096373567080966, + "grad_norm": 186.37278747558594, + "learning_rate": 1.995565224288881e-06, + "loss": 18.5, + "step": 6243 + }, + { + "epoch": 0.05910583958879602, + "grad_norm": 513.6492919921875, + "learning_rate": 1.9955623396720057e-06, + "loss": 29.6953, + "step": 6244 + }, + { + "epoch": 0.05911530561051107, + "grad_norm": 335.8236083984375, + "learning_rate": 1.995559454119366e-06, + "loss": 54.875, + "step": 6245 + }, + { + "epoch": 0.059124771632226125, + "grad_norm": 286.6195983886719, + "learning_rate": 1.9955565676309655e-06, + "loss": 20.25, + "step": 6246 + }, + { + "epoch": 0.05913423765394118, + "grad_norm": 676.4180297851562, + "learning_rate": 1.9955536802068066e-06, + "loss": 49.4219, + "step": 6247 + }, + { + "epoch": 0.05914370367565623, + "grad_norm": 241.76443481445312, + "learning_rate": 1.995550791846892e-06, + "loss": 27.8281, + "step": 6248 + }, + { + "epoch": 0.059153169697371284, + "grad_norm": 308.77996826171875, + "learning_rate": 1.995547902551224e-06, + "loss": 32.875, + "step": 6249 + }, + { + "epoch": 0.05916263571908634, + "grad_norm": 519.9591064453125, + "learning_rate": 1.995545012319806e-06, + "loss": 48.2812, + "step": 6250 + }, + { + "epoch": 0.05917210174080139, + "grad_norm": 372.7762451171875, + "learning_rate": 1.995542121152641e-06, + "loss": 25.2969, + "step": 6251 + }, + { + "epoch": 0.05918156776251645, + "grad_norm": 216.79339599609375, + "learning_rate": 1.9955392290497303e-06, + "loss": 22.8906, + "step": 6252 + }, + { + "epoch": 0.0591910337842315, + "grad_norm": 715.3034057617188, + "learning_rate": 1.995536336011078e-06, + "loss": 72.3125, + "step": 6253 + }, + { + "epoch": 0.059200499805946556, + "grad_norm": 381.89251708984375, + "learning_rate": 1.995533442036686e-06, + "loss": 22.2422, + "step": 6254 + }, + { + "epoch": 0.05920996582766161, + "grad_norm": 616.2096557617188, + "learning_rate": 1.9955305471265575e-06, + "loss": 47.2031, + "step": 6255 + }, + { + "epoch": 0.05921943184937666, + "grad_norm": 855.9620971679688, + "learning_rate": 1.995527651280695e-06, + "loss": 64.3281, + "step": 6256 + }, + { + "epoch": 0.059228897871091715, + "grad_norm": 3.5289416313171387, + "learning_rate": 1.9955247544991015e-06, + "loss": 0.8943, + "step": 6257 + }, + { + "epoch": 0.05923836389280677, + "grad_norm": 368.30712890625, + "learning_rate": 1.995521856781779e-06, + "loss": 37.0469, + "step": 6258 + }, + { + "epoch": 0.05924782991452182, + "grad_norm": 461.75677490234375, + "learning_rate": 1.995518958128731e-06, + "loss": 41.7812, + "step": 6259 + }, + { + "epoch": 0.05925729593623688, + "grad_norm": 674.7532958984375, + "learning_rate": 1.99551605853996e-06, + "loss": 54.5234, + "step": 6260 + }, + { + "epoch": 0.059266761957951934, + "grad_norm": 456.91595458984375, + "learning_rate": 1.995513158015469e-06, + "loss": 53.2656, + "step": 6261 + }, + { + "epoch": 0.05927622797966699, + "grad_norm": 217.6236572265625, + "learning_rate": 1.99551025655526e-06, + "loss": 11.8125, + "step": 6262 + }, + { + "epoch": 0.05928569400138204, + "grad_norm": 459.2733459472656, + "learning_rate": 1.9955073541593358e-06, + "loss": 50.2188, + "step": 6263 + }, + { + "epoch": 0.05929516002309709, + "grad_norm": 302.5071716308594, + "learning_rate": 1.9955044508277e-06, + "loss": 23.5703, + "step": 6264 + }, + { + "epoch": 0.059304626044812146, + "grad_norm": 358.3125, + "learning_rate": 1.9955015465603543e-06, + "loss": 52.6562, + "step": 6265 + }, + { + "epoch": 0.0593140920665272, + "grad_norm": 1009.1815185546875, + "learning_rate": 1.9954986413573025e-06, + "loss": 43.1719, + "step": 6266 + }, + { + "epoch": 0.05932355808824225, + "grad_norm": 243.70458984375, + "learning_rate": 1.995495735218546e-06, + "loss": 18.9062, + "step": 6267 + }, + { + "epoch": 0.059333024109957305, + "grad_norm": 160.16183471679688, + "learning_rate": 1.995492828144089e-06, + "loss": 21.0, + "step": 6268 + }, + { + "epoch": 0.059342490131672365, + "grad_norm": 164.35496520996094, + "learning_rate": 1.995489920133933e-06, + "loss": 19.6406, + "step": 6269 + }, + { + "epoch": 0.05935195615338742, + "grad_norm": 220.97389221191406, + "learning_rate": 1.9954870111880813e-06, + "loss": 27.5781, + "step": 6270 + }, + { + "epoch": 0.05936142217510247, + "grad_norm": 386.0002746582031, + "learning_rate": 1.995484101306537e-06, + "loss": 29.4766, + "step": 6271 + }, + { + "epoch": 0.059370888196817524, + "grad_norm": 328.4129333496094, + "learning_rate": 1.995481190489302e-06, + "loss": 22.625, + "step": 6272 + }, + { + "epoch": 0.05938035421853258, + "grad_norm": 313.8253173828125, + "learning_rate": 1.9954782787363795e-06, + "loss": 21.3906, + "step": 6273 + }, + { + "epoch": 0.05938982024024763, + "grad_norm": 471.78759765625, + "learning_rate": 1.9954753660477722e-06, + "loss": 20.6719, + "step": 6274 + }, + { + "epoch": 0.05939928626196268, + "grad_norm": 631.31396484375, + "learning_rate": 1.995472452423483e-06, + "loss": 40.8125, + "step": 6275 + }, + { + "epoch": 0.059408752283677736, + "grad_norm": 273.0135803222656, + "learning_rate": 1.9954695378635142e-06, + "loss": 21.4766, + "step": 6276 + }, + { + "epoch": 0.05941821830539279, + "grad_norm": 385.3695373535156, + "learning_rate": 1.995466622367869e-06, + "loss": 20.5117, + "step": 6277 + }, + { + "epoch": 0.05942768432710785, + "grad_norm": 352.4319152832031, + "learning_rate": 1.9954637059365497e-06, + "loss": 34.0, + "step": 6278 + }, + { + "epoch": 0.0594371503488229, + "grad_norm": 633.7155151367188, + "learning_rate": 1.995460788569559e-06, + "loss": 21.0078, + "step": 6279 + }, + { + "epoch": 0.059446616370537955, + "grad_norm": 313.2594909667969, + "learning_rate": 1.9954578702669006e-06, + "loss": 23.2266, + "step": 6280 + }, + { + "epoch": 0.05945608239225301, + "grad_norm": 324.1105041503906, + "learning_rate": 1.9954549510285763e-06, + "loss": 24.0, + "step": 6281 + }, + { + "epoch": 0.05946554841396806, + "grad_norm": 3.2286155223846436, + "learning_rate": 1.9954520308545894e-06, + "loss": 0.9233, + "step": 6282 + }, + { + "epoch": 0.059475014435683114, + "grad_norm": 636.1705932617188, + "learning_rate": 1.995449109744942e-06, + "loss": 62.5625, + "step": 6283 + }, + { + "epoch": 0.05948448045739817, + "grad_norm": 289.4077453613281, + "learning_rate": 1.995446187699637e-06, + "loss": 24.0781, + "step": 6284 + }, + { + "epoch": 0.05949394647911322, + "grad_norm": 258.0048522949219, + "learning_rate": 1.9954432647186776e-06, + "loss": 18.6484, + "step": 6285 + }, + { + "epoch": 0.05950341250082828, + "grad_norm": 2.6121153831481934, + "learning_rate": 1.9954403408020663e-06, + "loss": 0.8503, + "step": 6286 + }, + { + "epoch": 0.05951287852254333, + "grad_norm": 609.2626953125, + "learning_rate": 1.995437415949806e-06, + "loss": 67.1172, + "step": 6287 + }, + { + "epoch": 0.059522344544258386, + "grad_norm": 457.699951171875, + "learning_rate": 1.995434490161899e-06, + "loss": 43.9531, + "step": 6288 + }, + { + "epoch": 0.05953181056597344, + "grad_norm": 259.5176086425781, + "learning_rate": 1.9954315634383483e-06, + "loss": 23.6484, + "step": 6289 + }, + { + "epoch": 0.05954127658768849, + "grad_norm": 430.31390380859375, + "learning_rate": 1.9954286357791566e-06, + "loss": 33.5312, + "step": 6290 + }, + { + "epoch": 0.059550742609403545, + "grad_norm": 423.831787109375, + "learning_rate": 1.995425707184327e-06, + "loss": 25.9922, + "step": 6291 + }, + { + "epoch": 0.0595602086311186, + "grad_norm": 154.39988708496094, + "learning_rate": 1.9954227776538623e-06, + "loss": 21.3828, + "step": 6292 + }, + { + "epoch": 0.05956967465283365, + "grad_norm": 796.2849731445312, + "learning_rate": 1.9954198471877644e-06, + "loss": 51.125, + "step": 6293 + }, + { + "epoch": 0.059579140674548704, + "grad_norm": 414.5036926269531, + "learning_rate": 1.995416915786037e-06, + "loss": 21.4609, + "step": 6294 + }, + { + "epoch": 0.059588606696263764, + "grad_norm": 2.8346567153930664, + "learning_rate": 1.995413983448682e-06, + "loss": 0.8853, + "step": 6295 + }, + { + "epoch": 0.05959807271797882, + "grad_norm": 263.94537353515625, + "learning_rate": 1.9954110501757026e-06, + "loss": 22.4062, + "step": 6296 + }, + { + "epoch": 0.05960753873969387, + "grad_norm": 637.360107421875, + "learning_rate": 1.995408115967102e-06, + "loss": 57.2344, + "step": 6297 + }, + { + "epoch": 0.05961700476140892, + "grad_norm": 291.7852783203125, + "learning_rate": 1.995405180822882e-06, + "loss": 30.6562, + "step": 6298 + }, + { + "epoch": 0.059626470783123976, + "grad_norm": 494.1409606933594, + "learning_rate": 1.9954022447430466e-06, + "loss": 44.1172, + "step": 6299 + }, + { + "epoch": 0.05963593680483903, + "grad_norm": 762.5299072265625, + "learning_rate": 1.9953993077275976e-06, + "loss": 45.0938, + "step": 6300 + }, + { + "epoch": 0.05964540282655408, + "grad_norm": 373.4777526855469, + "learning_rate": 1.9953963697765376e-06, + "loss": 26.1719, + "step": 6301 + }, + { + "epoch": 0.059654868848269135, + "grad_norm": 316.7348937988281, + "learning_rate": 1.99539343088987e-06, + "loss": 25.0859, + "step": 6302 + }, + { + "epoch": 0.05966433486998419, + "grad_norm": 905.0464477539062, + "learning_rate": 1.9953904910675974e-06, + "loss": 31.2969, + "step": 6303 + }, + { + "epoch": 0.05967380089169925, + "grad_norm": 444.50921630859375, + "learning_rate": 1.9953875503097226e-06, + "loss": 25.1719, + "step": 6304 + }, + { + "epoch": 0.0596832669134143, + "grad_norm": 313.89306640625, + "learning_rate": 1.995384608616248e-06, + "loss": 23.6719, + "step": 6305 + }, + { + "epoch": 0.059692732935129354, + "grad_norm": 286.1103210449219, + "learning_rate": 1.9953816659871765e-06, + "loss": 33.0312, + "step": 6306 + }, + { + "epoch": 0.05970219895684441, + "grad_norm": 276.51068115234375, + "learning_rate": 1.9953787224225116e-06, + "loss": 21.3359, + "step": 6307 + }, + { + "epoch": 0.05971166497855946, + "grad_norm": 3.033092498779297, + "learning_rate": 1.995375777922255e-06, + "loss": 0.8501, + "step": 6308 + }, + { + "epoch": 0.05972113100027451, + "grad_norm": 732.479248046875, + "learning_rate": 1.99537283248641e-06, + "loss": 53.4375, + "step": 6309 + }, + { + "epoch": 0.059730597021989566, + "grad_norm": 757.01025390625, + "learning_rate": 1.9953698861149794e-06, + "loss": 50.7188, + "step": 6310 + }, + { + "epoch": 0.05974006304370462, + "grad_norm": 260.27593994140625, + "learning_rate": 1.995366938807966e-06, + "loss": 23.9922, + "step": 6311 + }, + { + "epoch": 0.05974952906541968, + "grad_norm": 271.4865417480469, + "learning_rate": 1.995363990565372e-06, + "loss": 20.3828, + "step": 6312 + }, + { + "epoch": 0.05975899508713473, + "grad_norm": 812.9490966796875, + "learning_rate": 1.995361041387201e-06, + "loss": 19.8438, + "step": 6313 + }, + { + "epoch": 0.059768461108849785, + "grad_norm": 4.351327896118164, + "learning_rate": 1.995358091273455e-06, + "loss": 0.8496, + "step": 6314 + }, + { + "epoch": 0.05977792713056484, + "grad_norm": 485.9508972167969, + "learning_rate": 1.995355140224137e-06, + "loss": 24.8828, + "step": 6315 + }, + { + "epoch": 0.05978739315227989, + "grad_norm": 561.2383422851562, + "learning_rate": 1.99535218823925e-06, + "loss": 42.7578, + "step": 6316 + }, + { + "epoch": 0.059796859173994944, + "grad_norm": 2.6409003734588623, + "learning_rate": 1.995349235318797e-06, + "loss": 0.7812, + "step": 6317 + }, + { + "epoch": 0.05980632519571, + "grad_norm": 297.78875732421875, + "learning_rate": 1.9953462814627805e-06, + "loss": 27.1328, + "step": 6318 + }, + { + "epoch": 0.05981579121742505, + "grad_norm": 2.9294204711914062, + "learning_rate": 1.9953433266712033e-06, + "loss": 1.0103, + "step": 6319 + }, + { + "epoch": 0.0598252572391401, + "grad_norm": 955.739990234375, + "learning_rate": 1.995340370944068e-06, + "loss": 20.8828, + "step": 6320 + }, + { + "epoch": 0.05983472326085516, + "grad_norm": 201.77471923828125, + "learning_rate": 1.9953374142813773e-06, + "loss": 22.5, + "step": 6321 + }, + { + "epoch": 0.059844189282570216, + "grad_norm": 819.0091552734375, + "learning_rate": 1.9953344566831344e-06, + "loss": 38.4766, + "step": 6322 + }, + { + "epoch": 0.05985365530428527, + "grad_norm": 346.6968994140625, + "learning_rate": 1.995331498149342e-06, + "loss": 51.3281, + "step": 6323 + }, + { + "epoch": 0.05986312132600032, + "grad_norm": 222.225830078125, + "learning_rate": 1.9953285386800024e-06, + "loss": 20.6406, + "step": 6324 + }, + { + "epoch": 0.059872587347715375, + "grad_norm": 288.9809265136719, + "learning_rate": 1.995325578275119e-06, + "loss": 22.7578, + "step": 6325 + }, + { + "epoch": 0.05988205336943043, + "grad_norm": 267.2344665527344, + "learning_rate": 1.995322616934694e-06, + "loss": 21.0156, + "step": 6326 + }, + { + "epoch": 0.05989151939114548, + "grad_norm": 336.25592041015625, + "learning_rate": 1.995319654658731e-06, + "loss": 23.5469, + "step": 6327 + }, + { + "epoch": 0.059900985412860534, + "grad_norm": 706.9424438476562, + "learning_rate": 1.9953166914472316e-06, + "loss": 53.2344, + "step": 6328 + }, + { + "epoch": 0.059910451434575594, + "grad_norm": 174.7903289794922, + "learning_rate": 1.9953137273002e-06, + "loss": 19.1094, + "step": 6329 + }, + { + "epoch": 0.05991991745629065, + "grad_norm": 303.8824157714844, + "learning_rate": 1.9953107622176375e-06, + "loss": 30.5312, + "step": 6330 + }, + { + "epoch": 0.0599293834780057, + "grad_norm": 305.72406005859375, + "learning_rate": 1.9953077961995484e-06, + "loss": 25.75, + "step": 6331 + }, + { + "epoch": 0.05993884949972075, + "grad_norm": 462.84222412109375, + "learning_rate": 1.995304829245934e-06, + "loss": 20.0508, + "step": 6332 + }, + { + "epoch": 0.059948315521435806, + "grad_norm": 353.6441650390625, + "learning_rate": 1.9953018613567983e-06, + "loss": 27.5312, + "step": 6333 + }, + { + "epoch": 0.05995778154315086, + "grad_norm": 2.6646080017089844, + "learning_rate": 1.9952988925321434e-06, + "loss": 0.8789, + "step": 6334 + }, + { + "epoch": 0.05996724756486591, + "grad_norm": 912.95458984375, + "learning_rate": 1.9952959227719723e-06, + "loss": 93.0859, + "step": 6335 + }, + { + "epoch": 0.059976713586580965, + "grad_norm": 246.05361938476562, + "learning_rate": 1.9952929520762876e-06, + "loss": 38.8672, + "step": 6336 + }, + { + "epoch": 0.05998617960829602, + "grad_norm": 253.16567993164062, + "learning_rate": 1.9952899804450928e-06, + "loss": 20.0781, + "step": 6337 + }, + { + "epoch": 0.05999564563001108, + "grad_norm": 399.10382080078125, + "learning_rate": 1.99528700787839e-06, + "loss": 25.1484, + "step": 6338 + }, + { + "epoch": 0.06000511165172613, + "grad_norm": 366.8915100097656, + "learning_rate": 1.995284034376182e-06, + "loss": 19.8633, + "step": 6339 + }, + { + "epoch": 0.060014577673441184, + "grad_norm": 332.15069580078125, + "learning_rate": 1.995281059938471e-06, + "loss": 17.7266, + "step": 6340 + }, + { + "epoch": 0.06002404369515624, + "grad_norm": 1136.94677734375, + "learning_rate": 1.995278084565262e-06, + "loss": 38.5312, + "step": 6341 + }, + { + "epoch": 0.06003350971687129, + "grad_norm": 532.398681640625, + "learning_rate": 1.995275108256555e-06, + "loss": 47.9844, + "step": 6342 + }, + { + "epoch": 0.06004297573858634, + "grad_norm": 155.5275421142578, + "learning_rate": 1.995272131012355e-06, + "loss": 24.9375, + "step": 6343 + }, + { + "epoch": 0.060052441760301396, + "grad_norm": 423.2911682128906, + "learning_rate": 1.9952691528326636e-06, + "loss": 40.3906, + "step": 6344 + }, + { + "epoch": 0.06006190778201645, + "grad_norm": 818.2544555664062, + "learning_rate": 1.995266173717484e-06, + "loss": 28.0547, + "step": 6345 + }, + { + "epoch": 0.0600713738037315, + "grad_norm": 401.2441711425781, + "learning_rate": 1.9952631936668195e-06, + "loss": 24.9766, + "step": 6346 + }, + { + "epoch": 0.06008083982544656, + "grad_norm": 785.6786499023438, + "learning_rate": 1.9952602126806717e-06, + "loss": 66.3594, + "step": 6347 + }, + { + "epoch": 0.060090305847161615, + "grad_norm": 839.36767578125, + "learning_rate": 1.9952572307590443e-06, + "loss": 41.1406, + "step": 6348 + }, + { + "epoch": 0.06009977186887667, + "grad_norm": 383.2018737792969, + "learning_rate": 1.9952542479019392e-06, + "loss": 15.168, + "step": 6349 + }, + { + "epoch": 0.06010923789059172, + "grad_norm": 214.7924346923828, + "learning_rate": 1.9952512641093604e-06, + "loss": 24.4766, + "step": 6350 + }, + { + "epoch": 0.060118703912306774, + "grad_norm": 351.66668701171875, + "learning_rate": 1.9952482793813104e-06, + "loss": 26.0938, + "step": 6351 + }, + { + "epoch": 0.06012816993402183, + "grad_norm": 542.2760620117188, + "learning_rate": 1.9952452937177917e-06, + "loss": 48.1328, + "step": 6352 + }, + { + "epoch": 0.06013763595573688, + "grad_norm": 207.5268096923828, + "learning_rate": 1.9952423071188068e-06, + "loss": 26.4297, + "step": 6353 + }, + { + "epoch": 0.06014710197745193, + "grad_norm": 589.1365966796875, + "learning_rate": 1.995239319584359e-06, + "loss": 50.0938, + "step": 6354 + }, + { + "epoch": 0.06015656799916699, + "grad_norm": 229.51954650878906, + "learning_rate": 1.9952363311144512e-06, + "loss": 26.9375, + "step": 6355 + }, + { + "epoch": 0.060166034020882046, + "grad_norm": 310.8518371582031, + "learning_rate": 1.995233341709086e-06, + "loss": 15.6406, + "step": 6356 + }, + { + "epoch": 0.0601755000425971, + "grad_norm": 673.6285400390625, + "learning_rate": 1.995230351368266e-06, + "loss": 50.8516, + "step": 6357 + }, + { + "epoch": 0.06018496606431215, + "grad_norm": 574.564208984375, + "learning_rate": 1.9952273600919942e-06, + "loss": 48.4805, + "step": 6358 + }, + { + "epoch": 0.060194432086027205, + "grad_norm": 200.95123291015625, + "learning_rate": 1.9952243678802734e-06, + "loss": 16.8672, + "step": 6359 + }, + { + "epoch": 0.06020389810774226, + "grad_norm": 147.7655029296875, + "learning_rate": 1.9952213747331064e-06, + "loss": 23.8594, + "step": 6360 + }, + { + "epoch": 0.06021336412945731, + "grad_norm": 536.0313110351562, + "learning_rate": 1.9952183806504965e-06, + "loss": 44.8984, + "step": 6361 + }, + { + "epoch": 0.060222830151172364, + "grad_norm": 389.5287780761719, + "learning_rate": 1.995215385632446e-06, + "loss": 20.1641, + "step": 6362 + }, + { + "epoch": 0.06023229617288742, + "grad_norm": 191.95663452148438, + "learning_rate": 1.9952123896789574e-06, + "loss": 23.2031, + "step": 6363 + }, + { + "epoch": 0.06024176219460248, + "grad_norm": 202.5122528076172, + "learning_rate": 1.9952093927900337e-06, + "loss": 26.8516, + "step": 6364 + }, + { + "epoch": 0.06025122821631753, + "grad_norm": 317.7578125, + "learning_rate": 1.995206394965678e-06, + "loss": 22.6406, + "step": 6365 + }, + { + "epoch": 0.06026069423803258, + "grad_norm": 414.0502624511719, + "learning_rate": 1.995203396205894e-06, + "loss": 31.375, + "step": 6366 + }, + { + "epoch": 0.060270160259747636, + "grad_norm": 421.12939453125, + "learning_rate": 1.9952003965106827e-06, + "loss": 43.3906, + "step": 6367 + }, + { + "epoch": 0.06027962628146269, + "grad_norm": 239.49139404296875, + "learning_rate": 1.9951973958800477e-06, + "loss": 22.5469, + "step": 6368 + }, + { + "epoch": 0.06028909230317774, + "grad_norm": 213.24456787109375, + "learning_rate": 1.9951943943139923e-06, + "loss": 23.2344, + "step": 6369 + }, + { + "epoch": 0.060298558324892795, + "grad_norm": 614.9152221679688, + "learning_rate": 1.9951913918125187e-06, + "loss": 44.2969, + "step": 6370 + }, + { + "epoch": 0.06030802434660785, + "grad_norm": 614.640869140625, + "learning_rate": 1.9951883883756297e-06, + "loss": 63.1562, + "step": 6371 + }, + { + "epoch": 0.06031749036832291, + "grad_norm": 327.3179016113281, + "learning_rate": 1.9951853840033287e-06, + "loss": 17.6797, + "step": 6372 + }, + { + "epoch": 0.06032695639003796, + "grad_norm": 299.9538879394531, + "learning_rate": 1.995182378695618e-06, + "loss": 25.8594, + "step": 6373 + }, + { + "epoch": 0.060336422411753014, + "grad_norm": 484.2112121582031, + "learning_rate": 1.9951793724525004e-06, + "loss": 57.9531, + "step": 6374 + }, + { + "epoch": 0.06034588843346807, + "grad_norm": 447.0586242675781, + "learning_rate": 1.9951763652739793e-06, + "loss": 54.5781, + "step": 6375 + }, + { + "epoch": 0.06035535445518312, + "grad_norm": 1075.6336669921875, + "learning_rate": 1.9951733571600572e-06, + "loss": 47.2031, + "step": 6376 + }, + { + "epoch": 0.06036482047689817, + "grad_norm": 189.13645935058594, + "learning_rate": 1.995170348110737e-06, + "loss": 27.0781, + "step": 6377 + }, + { + "epoch": 0.060374286498613226, + "grad_norm": 264.81591796875, + "learning_rate": 1.9951673381260206e-06, + "loss": 27.9297, + "step": 6378 + }, + { + "epoch": 0.06038375252032828, + "grad_norm": 1292.2562255859375, + "learning_rate": 1.9951643272059123e-06, + "loss": 36.6875, + "step": 6379 + }, + { + "epoch": 0.06039321854204333, + "grad_norm": 433.38092041015625, + "learning_rate": 1.9951613153504145e-06, + "loss": 40.2188, + "step": 6380 + }, + { + "epoch": 0.06040268456375839, + "grad_norm": 190.37326049804688, + "learning_rate": 1.9951583025595293e-06, + "loss": 29.6562, + "step": 6381 + }, + { + "epoch": 0.060412150585473445, + "grad_norm": 593.2374877929688, + "learning_rate": 1.99515528883326e-06, + "loss": 38.1328, + "step": 6382 + }, + { + "epoch": 0.0604216166071885, + "grad_norm": 261.79833984375, + "learning_rate": 1.99515227417161e-06, + "loss": 23.3438, + "step": 6383 + }, + { + "epoch": 0.06043108262890355, + "grad_norm": 391.11749267578125, + "learning_rate": 1.9951492585745813e-06, + "loss": 30.5391, + "step": 6384 + }, + { + "epoch": 0.060440548650618604, + "grad_norm": 309.815185546875, + "learning_rate": 1.995146242042177e-06, + "loss": 29.7031, + "step": 6385 + }, + { + "epoch": 0.06045001467233366, + "grad_norm": 1234.6427001953125, + "learning_rate": 1.9951432245743996e-06, + "loss": 31.4141, + "step": 6386 + }, + { + "epoch": 0.06045948069404871, + "grad_norm": 614.6873779296875, + "learning_rate": 1.995140206171253e-06, + "loss": 47.1328, + "step": 6387 + }, + { + "epoch": 0.06046894671576376, + "grad_norm": 353.2185363769531, + "learning_rate": 1.995137186832739e-06, + "loss": 20.8828, + "step": 6388 + }, + { + "epoch": 0.060478412737478816, + "grad_norm": 2.6680829524993896, + "learning_rate": 1.9951341665588607e-06, + "loss": 0.9287, + "step": 6389 + }, + { + "epoch": 0.060487878759193876, + "grad_norm": 392.48675537109375, + "learning_rate": 1.9951311453496213e-06, + "loss": 50.7188, + "step": 6390 + }, + { + "epoch": 0.06049734478090893, + "grad_norm": 313.3567810058594, + "learning_rate": 1.995128123205023e-06, + "loss": 21.3906, + "step": 6391 + }, + { + "epoch": 0.06050681080262398, + "grad_norm": 310.4812316894531, + "learning_rate": 1.9951251001250692e-06, + "loss": 20.8359, + "step": 6392 + }, + { + "epoch": 0.060516276824339035, + "grad_norm": 3.3456785678863525, + "learning_rate": 1.9951220761097626e-06, + "loss": 1.0337, + "step": 6393 + }, + { + "epoch": 0.06052574284605409, + "grad_norm": 215.39596557617188, + "learning_rate": 1.995119051159106e-06, + "loss": 23.2969, + "step": 6394 + }, + { + "epoch": 0.06053520886776914, + "grad_norm": 659.859375, + "learning_rate": 1.9951160252731022e-06, + "loss": 59.0156, + "step": 6395 + }, + { + "epoch": 0.060544674889484194, + "grad_norm": 408.6947326660156, + "learning_rate": 1.9951129984517537e-06, + "loss": 50.2188, + "step": 6396 + }, + { + "epoch": 0.06055414091119925, + "grad_norm": 313.92816162109375, + "learning_rate": 1.9951099706950644e-06, + "loss": 26.2344, + "step": 6397 + }, + { + "epoch": 0.06056360693291431, + "grad_norm": 702.7360229492188, + "learning_rate": 1.995106942003036e-06, + "loss": 32.8672, + "step": 6398 + }, + { + "epoch": 0.06057307295462936, + "grad_norm": 3.30771803855896, + "learning_rate": 1.9951039123756718e-06, + "loss": 0.9116, + "step": 6399 + }, + { + "epoch": 0.06058253897634441, + "grad_norm": 251.19798278808594, + "learning_rate": 1.995100881812975e-06, + "loss": 19.3359, + "step": 6400 + }, + { + "epoch": 0.060592004998059466, + "grad_norm": 383.645263671875, + "learning_rate": 1.9950978503149474e-06, + "loss": 29.4062, + "step": 6401 + }, + { + "epoch": 0.06060147101977452, + "grad_norm": 743.726318359375, + "learning_rate": 1.9950948178815935e-06, + "loss": 30.5273, + "step": 6402 + }, + { + "epoch": 0.06061093704148957, + "grad_norm": 586.4777221679688, + "learning_rate": 1.9950917845129145e-06, + "loss": 56.1406, + "step": 6403 + }, + { + "epoch": 0.060620403063204625, + "grad_norm": 387.6602783203125, + "learning_rate": 1.9950887502089146e-06, + "loss": 20.9844, + "step": 6404 + }, + { + "epoch": 0.06062986908491968, + "grad_norm": 384.4565124511719, + "learning_rate": 1.9950857149695955e-06, + "loss": 42.2812, + "step": 6405 + }, + { + "epoch": 0.06063933510663473, + "grad_norm": 314.5341796875, + "learning_rate": 1.9950826787949606e-06, + "loss": 41.7969, + "step": 6406 + }, + { + "epoch": 0.06064880112834979, + "grad_norm": 219.42608642578125, + "learning_rate": 1.995079641685013e-06, + "loss": 19.9219, + "step": 6407 + }, + { + "epoch": 0.060658267150064844, + "grad_norm": 381.53900146484375, + "learning_rate": 1.9950766036397548e-06, + "loss": 35.8438, + "step": 6408 + }, + { + "epoch": 0.0606677331717799, + "grad_norm": 221.1123809814453, + "learning_rate": 1.99507356465919e-06, + "loss": 19.8203, + "step": 6409 + }, + { + "epoch": 0.06067719919349495, + "grad_norm": 133.74465942382812, + "learning_rate": 1.99507052474332e-06, + "loss": 20.6562, + "step": 6410 + }, + { + "epoch": 0.06068666521521, + "grad_norm": 261.4531555175781, + "learning_rate": 1.9950674838921485e-06, + "loss": 21.0156, + "step": 6411 + }, + { + "epoch": 0.060696131236925056, + "grad_norm": 3.1523284912109375, + "learning_rate": 1.995064442105679e-06, + "loss": 0.8457, + "step": 6412 + }, + { + "epoch": 0.06070559725864011, + "grad_norm": 444.3761291503906, + "learning_rate": 1.995061399383913e-06, + "loss": 27.6328, + "step": 6413 + }, + { + "epoch": 0.06071506328035516, + "grad_norm": 180.69541931152344, + "learning_rate": 1.995058355726854e-06, + "loss": 20.0547, + "step": 6414 + }, + { + "epoch": 0.06072452930207022, + "grad_norm": 250.25909423828125, + "learning_rate": 1.9950553111345054e-06, + "loss": 27.1484, + "step": 6415 + }, + { + "epoch": 0.060733995323785275, + "grad_norm": 669.0018310546875, + "learning_rate": 1.9950522656068696e-06, + "loss": 54.3984, + "step": 6416 + }, + { + "epoch": 0.06074346134550033, + "grad_norm": 311.34735107421875, + "learning_rate": 1.995049219143949e-06, + "loss": 24.875, + "step": 6417 + }, + { + "epoch": 0.06075292736721538, + "grad_norm": 381.7377014160156, + "learning_rate": 1.995046171745747e-06, + "loss": 27.3438, + "step": 6418 + }, + { + "epoch": 0.060762393388930434, + "grad_norm": 457.1264953613281, + "learning_rate": 1.9950431234122664e-06, + "loss": 40.25, + "step": 6419 + }, + { + "epoch": 0.06077185941064549, + "grad_norm": 300.4454345703125, + "learning_rate": 1.9950400741435097e-06, + "loss": 20.0703, + "step": 6420 + }, + { + "epoch": 0.06078132543236054, + "grad_norm": 697.9312133789062, + "learning_rate": 1.9950370239394803e-06, + "loss": 24.4844, + "step": 6421 + }, + { + "epoch": 0.06079079145407559, + "grad_norm": 381.72711181640625, + "learning_rate": 1.995033972800181e-06, + "loss": 21.7812, + "step": 6422 + }, + { + "epoch": 0.060800257475790646, + "grad_norm": 192.69778442382812, + "learning_rate": 1.995030920725614e-06, + "loss": 23.125, + "step": 6423 + }, + { + "epoch": 0.060809723497505706, + "grad_norm": 681.3165283203125, + "learning_rate": 1.995027867715783e-06, + "loss": 80.25, + "step": 6424 + }, + { + "epoch": 0.06081918951922076, + "grad_norm": 498.2503967285156, + "learning_rate": 1.995024813770691e-06, + "loss": 44.4844, + "step": 6425 + }, + { + "epoch": 0.06082865554093581, + "grad_norm": 319.45953369140625, + "learning_rate": 1.995021758890339e-06, + "loss": 20.5, + "step": 6426 + }, + { + "epoch": 0.060838121562650865, + "grad_norm": 393.1156311035156, + "learning_rate": 1.9950187030747326e-06, + "loss": 30.1328, + "step": 6427 + }, + { + "epoch": 0.06084758758436592, + "grad_norm": 230.73439025878906, + "learning_rate": 1.995015646323873e-06, + "loss": 23.4219, + "step": 6428 + }, + { + "epoch": 0.06085705360608097, + "grad_norm": 455.84832763671875, + "learning_rate": 1.995012588637763e-06, + "loss": 26.6094, + "step": 6429 + }, + { + "epoch": 0.060866519627796024, + "grad_norm": 373.9718017578125, + "learning_rate": 1.9950095300164066e-06, + "loss": 25.0078, + "step": 6430 + }, + { + "epoch": 0.06087598564951108, + "grad_norm": 502.9765625, + "learning_rate": 1.9950064704598054e-06, + "loss": 21.1016, + "step": 6431 + }, + { + "epoch": 0.06088545167122613, + "grad_norm": 819.8850708007812, + "learning_rate": 1.995003409967963e-06, + "loss": 60.4922, + "step": 6432 + }, + { + "epoch": 0.06089491769294119, + "grad_norm": 3.2293081283569336, + "learning_rate": 1.9950003485408826e-06, + "loss": 0.8218, + "step": 6433 + }, + { + "epoch": 0.06090438371465624, + "grad_norm": 536.1895141601562, + "learning_rate": 1.9949972861785656e-06, + "loss": 26.5938, + "step": 6434 + }, + { + "epoch": 0.060913849736371296, + "grad_norm": 666.0015258789062, + "learning_rate": 1.9949942228810167e-06, + "loss": 27.0938, + "step": 6435 + }, + { + "epoch": 0.06092331575808635, + "grad_norm": 498.92669677734375, + "learning_rate": 1.9949911586482376e-06, + "loss": 39.25, + "step": 6436 + }, + { + "epoch": 0.0609327817798014, + "grad_norm": 157.0077667236328, + "learning_rate": 1.994988093480232e-06, + "loss": 18.0391, + "step": 6437 + }, + { + "epoch": 0.060942247801516455, + "grad_norm": 354.0731201171875, + "learning_rate": 1.9949850273770016e-06, + "loss": 27.9062, + "step": 6438 + }, + { + "epoch": 0.06095171382323151, + "grad_norm": 3.666430950164795, + "learning_rate": 1.9949819603385503e-06, + "loss": 0.7993, + "step": 6439 + }, + { + "epoch": 0.06096117984494656, + "grad_norm": 401.12969970703125, + "learning_rate": 1.994978892364881e-06, + "loss": 47.5938, + "step": 6440 + }, + { + "epoch": 0.06097064586666162, + "grad_norm": 897.57421875, + "learning_rate": 1.994975823455996e-06, + "loss": 26.1406, + "step": 6441 + }, + { + "epoch": 0.060980111888376674, + "grad_norm": 226.2285919189453, + "learning_rate": 1.994972753611899e-06, + "loss": 25.1484, + "step": 6442 + }, + { + "epoch": 0.06098957791009173, + "grad_norm": 190.5459747314453, + "learning_rate": 1.9949696828325913e-06, + "loss": 22.0312, + "step": 6443 + }, + { + "epoch": 0.06099904393180678, + "grad_norm": 1363.632080078125, + "learning_rate": 1.9949666111180777e-06, + "loss": 77.1641, + "step": 6444 + }, + { + "epoch": 0.06100850995352183, + "grad_norm": 359.1639709472656, + "learning_rate": 1.9949635384683597e-06, + "loss": 23.6875, + "step": 6445 + }, + { + "epoch": 0.061017975975236886, + "grad_norm": 797.7765502929688, + "learning_rate": 1.9949604648834407e-06, + "loss": 44.3516, + "step": 6446 + }, + { + "epoch": 0.06102744199695194, + "grad_norm": 308.7940368652344, + "learning_rate": 1.994957390363324e-06, + "loss": 37.3281, + "step": 6447 + }, + { + "epoch": 0.06103690801866699, + "grad_norm": 918.5250854492188, + "learning_rate": 1.9949543149080117e-06, + "loss": 69.1641, + "step": 6448 + }, + { + "epoch": 0.061046374040382045, + "grad_norm": 222.4664306640625, + "learning_rate": 1.9949512385175074e-06, + "loss": 26.6719, + "step": 6449 + }, + { + "epoch": 0.061055840062097105, + "grad_norm": 189.8463592529297, + "learning_rate": 1.9949481611918134e-06, + "loss": 20.6562, + "step": 6450 + }, + { + "epoch": 0.06106530608381216, + "grad_norm": 332.1270751953125, + "learning_rate": 1.994945082930933e-06, + "loss": 37.3594, + "step": 6451 + }, + { + "epoch": 0.06107477210552721, + "grad_norm": 566.46337890625, + "learning_rate": 1.9949420037348686e-06, + "loss": 42.0391, + "step": 6452 + }, + { + "epoch": 0.061084238127242264, + "grad_norm": 721.6871948242188, + "learning_rate": 1.9949389236036236e-06, + "loss": 28.5625, + "step": 6453 + }, + { + "epoch": 0.06109370414895732, + "grad_norm": 353.58795166015625, + "learning_rate": 1.994935842537201e-06, + "loss": 26.9219, + "step": 6454 + }, + { + "epoch": 0.06110317017067237, + "grad_norm": 257.77740478515625, + "learning_rate": 1.994932760535603e-06, + "loss": 23.7188, + "step": 6455 + }, + { + "epoch": 0.06111263619238742, + "grad_norm": 286.45233154296875, + "learning_rate": 1.9949296775988334e-06, + "loss": 21.7344, + "step": 6456 + }, + { + "epoch": 0.061122102214102476, + "grad_norm": 606.9606323242188, + "learning_rate": 1.994926593726895e-06, + "loss": 45.7188, + "step": 6457 + }, + { + "epoch": 0.061131568235817536, + "grad_norm": 721.2406616210938, + "learning_rate": 1.994923508919789e-06, + "loss": 10.5664, + "step": 6458 + }, + { + "epoch": 0.06114103425753259, + "grad_norm": 500.7311096191406, + "learning_rate": 1.9949204231775206e-06, + "loss": 31.4531, + "step": 6459 + }, + { + "epoch": 0.06115050027924764, + "grad_norm": 1330.616943359375, + "learning_rate": 1.9949173365000916e-06, + "loss": 57.9219, + "step": 6460 + }, + { + "epoch": 0.061159966300962695, + "grad_norm": 559.162841796875, + "learning_rate": 1.994914248887505e-06, + "loss": 48.0938, + "step": 6461 + }, + { + "epoch": 0.06116943232267775, + "grad_norm": 1057.144775390625, + "learning_rate": 1.9949111603397635e-06, + "loss": 51.8438, + "step": 6462 + }, + { + "epoch": 0.0611788983443928, + "grad_norm": 205.02252197265625, + "learning_rate": 1.994908070856871e-06, + "loss": 24.2188, + "step": 6463 + }, + { + "epoch": 0.061188364366107854, + "grad_norm": 434.7136535644531, + "learning_rate": 1.9949049804388287e-06, + "loss": 60.2812, + "step": 6464 + }, + { + "epoch": 0.06119783038782291, + "grad_norm": 814.7069091796875, + "learning_rate": 1.9949018890856407e-06, + "loss": 39.9766, + "step": 6465 + }, + { + "epoch": 0.06120729640953796, + "grad_norm": 3.391071081161499, + "learning_rate": 1.99489879679731e-06, + "loss": 0.9766, + "step": 6466 + }, + { + "epoch": 0.06121676243125302, + "grad_norm": 546.2738647460938, + "learning_rate": 1.994895703573839e-06, + "loss": 45.4062, + "step": 6467 + }, + { + "epoch": 0.06122622845296807, + "grad_norm": 518.6997680664062, + "learning_rate": 1.9948926094152303e-06, + "loss": 56.8086, + "step": 6468 + }, + { + "epoch": 0.061235694474683126, + "grad_norm": 1354.518310546875, + "learning_rate": 1.9948895143214876e-06, + "loss": 48.5781, + "step": 6469 + }, + { + "epoch": 0.06124516049639818, + "grad_norm": 278.5951843261719, + "learning_rate": 1.9948864182926137e-06, + "loss": 8.3828, + "step": 6470 + }, + { + "epoch": 0.06125462651811323, + "grad_norm": 222.98486328125, + "learning_rate": 1.9948833213286108e-06, + "loss": 23.4219, + "step": 6471 + }, + { + "epoch": 0.061264092539828285, + "grad_norm": 3.3132994174957275, + "learning_rate": 1.9948802234294827e-06, + "loss": 0.9746, + "step": 6472 + }, + { + "epoch": 0.06127355856154334, + "grad_norm": 345.7066955566406, + "learning_rate": 1.994877124595232e-06, + "loss": 24.0391, + "step": 6473 + }, + { + "epoch": 0.06128302458325839, + "grad_norm": 339.7080993652344, + "learning_rate": 1.994874024825861e-06, + "loss": 40.25, + "step": 6474 + }, + { + "epoch": 0.061292490604973444, + "grad_norm": 658.904052734375, + "learning_rate": 1.9948709241213736e-06, + "loss": 53.4062, + "step": 6475 + }, + { + "epoch": 0.061301956626688504, + "grad_norm": 273.4133605957031, + "learning_rate": 1.994867822481772e-06, + "loss": 28.0078, + "step": 6476 + }, + { + "epoch": 0.06131142264840356, + "grad_norm": 437.7381286621094, + "learning_rate": 1.9948647199070594e-06, + "loss": 25.7969, + "step": 6477 + }, + { + "epoch": 0.06132088867011861, + "grad_norm": 833.6455078125, + "learning_rate": 1.994861616397239e-06, + "loss": 57.1719, + "step": 6478 + }, + { + "epoch": 0.06133035469183366, + "grad_norm": 376.00579833984375, + "learning_rate": 1.9948585119523133e-06, + "loss": 22.5469, + "step": 6479 + }, + { + "epoch": 0.061339820713548716, + "grad_norm": 3.658550500869751, + "learning_rate": 1.994855406572285e-06, + "loss": 0.8784, + "step": 6480 + }, + { + "epoch": 0.06134928673526377, + "grad_norm": 503.05621337890625, + "learning_rate": 1.9948523002571574e-06, + "loss": 28.4531, + "step": 6481 + }, + { + "epoch": 0.06135875275697882, + "grad_norm": 603.5861206054688, + "learning_rate": 1.9948491930069334e-06, + "loss": 43.1875, + "step": 6482 + }, + { + "epoch": 0.061368218778693875, + "grad_norm": 302.04583740234375, + "learning_rate": 1.9948460848216163e-06, + "loss": 38.0469, + "step": 6483 + }, + { + "epoch": 0.061377684800408935, + "grad_norm": 297.0077209472656, + "learning_rate": 1.994842975701208e-06, + "loss": 31.3906, + "step": 6484 + }, + { + "epoch": 0.06138715082212399, + "grad_norm": 413.8563537597656, + "learning_rate": 1.9948398656457123e-06, + "loss": 27.8438, + "step": 6485 + }, + { + "epoch": 0.06139661684383904, + "grad_norm": 253.59701538085938, + "learning_rate": 1.9948367546551318e-06, + "loss": 24.0156, + "step": 6486 + }, + { + "epoch": 0.061406082865554094, + "grad_norm": 160.904052734375, + "learning_rate": 1.9948336427294692e-06, + "loss": 18.6719, + "step": 6487 + }, + { + "epoch": 0.06141554888726915, + "grad_norm": 595.7402954101562, + "learning_rate": 1.994830529868728e-06, + "loss": 40.7109, + "step": 6488 + }, + { + "epoch": 0.0614250149089842, + "grad_norm": 3.0041253566741943, + "learning_rate": 1.994827416072911e-06, + "loss": 0.9175, + "step": 6489 + }, + { + "epoch": 0.06143448093069925, + "grad_norm": 936.2440795898438, + "learning_rate": 1.994824301342021e-06, + "loss": 46.6797, + "step": 6490 + }, + { + "epoch": 0.061443946952414306, + "grad_norm": 244.76747131347656, + "learning_rate": 1.9948211856760602e-06, + "loss": 23.7188, + "step": 6491 + }, + { + "epoch": 0.06145341297412936, + "grad_norm": 221.97462463378906, + "learning_rate": 1.994818069075033e-06, + "loss": 21.25, + "step": 6492 + }, + { + "epoch": 0.06146287899584442, + "grad_norm": 392.2120361328125, + "learning_rate": 1.994814951538941e-06, + "loss": 10.5039, + "step": 6493 + }, + { + "epoch": 0.06147234501755947, + "grad_norm": 3.0751254558563232, + "learning_rate": 1.994811833067788e-06, + "loss": 0.8506, + "step": 6494 + }, + { + "epoch": 0.061481811039274525, + "grad_norm": 772.3035888671875, + "learning_rate": 1.9948087136615765e-06, + "loss": 43.9219, + "step": 6495 + }, + { + "epoch": 0.06149127706098958, + "grad_norm": 569.3221435546875, + "learning_rate": 1.994805593320309e-06, + "loss": 56.0312, + "step": 6496 + }, + { + "epoch": 0.06150074308270463, + "grad_norm": 309.8354797363281, + "learning_rate": 1.9948024720439894e-06, + "loss": 35.625, + "step": 6497 + }, + { + "epoch": 0.061510209104419684, + "grad_norm": 689.0972900390625, + "learning_rate": 1.9947993498326203e-06, + "loss": 26.0234, + "step": 6498 + }, + { + "epoch": 0.06151967512613474, + "grad_norm": 404.84326171875, + "learning_rate": 1.9947962266862043e-06, + "loss": 21.4922, + "step": 6499 + }, + { + "epoch": 0.06152914114784979, + "grad_norm": 369.83660888671875, + "learning_rate": 1.994793102604745e-06, + "loss": 28.9922, + "step": 6500 + }, + { + "epoch": 0.06153860716956485, + "grad_norm": 213.2903289794922, + "learning_rate": 1.9947899775882444e-06, + "loss": 23.5156, + "step": 6501 + }, + { + "epoch": 0.0615480731912799, + "grad_norm": 357.76177978515625, + "learning_rate": 1.994786851636706e-06, + "loss": 48.6094, + "step": 6502 + }, + { + "epoch": 0.061557539212994956, + "grad_norm": 346.45849609375, + "learning_rate": 1.994783724750133e-06, + "loss": 39.8594, + "step": 6503 + }, + { + "epoch": 0.06156700523471001, + "grad_norm": 283.2763366699219, + "learning_rate": 1.994780596928528e-06, + "loss": 23.2734, + "step": 6504 + }, + { + "epoch": 0.06157647125642506, + "grad_norm": 548.2725219726562, + "learning_rate": 1.9947774681718934e-06, + "loss": 14.082, + "step": 6505 + }, + { + "epoch": 0.061585937278140115, + "grad_norm": 263.8515625, + "learning_rate": 1.994774338480233e-06, + "loss": 24.4531, + "step": 6506 + }, + { + "epoch": 0.06159540329985517, + "grad_norm": 3.179226875305176, + "learning_rate": 1.9947712078535494e-06, + "loss": 0.8589, + "step": 6507 + }, + { + "epoch": 0.06160486932157022, + "grad_norm": 638.1685791015625, + "learning_rate": 1.994768076291846e-06, + "loss": 55.9609, + "step": 6508 + }, + { + "epoch": 0.061614335343285274, + "grad_norm": 285.19183349609375, + "learning_rate": 1.994764943795125e-06, + "loss": 27.7344, + "step": 6509 + }, + { + "epoch": 0.061623801365000334, + "grad_norm": 252.89337158203125, + "learning_rate": 1.9947618103633895e-06, + "loss": 23.9453, + "step": 6510 + }, + { + "epoch": 0.06163326738671539, + "grad_norm": 268.19171142578125, + "learning_rate": 1.9947586759966427e-06, + "loss": 25.625, + "step": 6511 + }, + { + "epoch": 0.06164273340843044, + "grad_norm": 263.1337890625, + "learning_rate": 1.9947555406948876e-06, + "loss": 21.2891, + "step": 6512 + }, + { + "epoch": 0.06165219943014549, + "grad_norm": 182.66665649414062, + "learning_rate": 1.994752404458127e-06, + "loss": 18.625, + "step": 6513 + }, + { + "epoch": 0.061661665451860546, + "grad_norm": 166.4856414794922, + "learning_rate": 1.994749267286364e-06, + "loss": 18.1172, + "step": 6514 + }, + { + "epoch": 0.0616711314735756, + "grad_norm": 506.74273681640625, + "learning_rate": 1.9947461291796007e-06, + "loss": 44.0625, + "step": 6515 + }, + { + "epoch": 0.06168059749529065, + "grad_norm": 563.1497802734375, + "learning_rate": 1.9947429901378415e-06, + "loss": 57.6562, + "step": 6516 + }, + { + "epoch": 0.061690063517005705, + "grad_norm": 486.1155700683594, + "learning_rate": 1.994739850161088e-06, + "loss": 22.5938, + "step": 6517 + }, + { + "epoch": 0.06169952953872076, + "grad_norm": 299.6109924316406, + "learning_rate": 1.9947367092493443e-06, + "loss": 30.7969, + "step": 6518 + }, + { + "epoch": 0.06170899556043582, + "grad_norm": 522.7945556640625, + "learning_rate": 1.9947335674026126e-06, + "loss": 50.7422, + "step": 6519 + }, + { + "epoch": 0.06171846158215087, + "grad_norm": 269.0880432128906, + "learning_rate": 1.9947304246208963e-06, + "loss": 22.5781, + "step": 6520 + }, + { + "epoch": 0.061727927603865924, + "grad_norm": 249.52621459960938, + "learning_rate": 1.994727280904198e-06, + "loss": 24.7031, + "step": 6521 + }, + { + "epoch": 0.06173739362558098, + "grad_norm": 459.7331848144531, + "learning_rate": 1.9947241362525205e-06, + "loss": 26.3516, + "step": 6522 + }, + { + "epoch": 0.06174685964729603, + "grad_norm": 4.190451145172119, + "learning_rate": 1.9947209906658673e-06, + "loss": 1.0054, + "step": 6523 + }, + { + "epoch": 0.06175632566901108, + "grad_norm": 271.57879638671875, + "learning_rate": 1.994717844144241e-06, + "loss": 26.9766, + "step": 6524 + }, + { + "epoch": 0.061765791690726136, + "grad_norm": 298.4772644042969, + "learning_rate": 1.994714696687644e-06, + "loss": 15.2656, + "step": 6525 + }, + { + "epoch": 0.06177525771244119, + "grad_norm": 323.3160095214844, + "learning_rate": 1.9947115482960805e-06, + "loss": 35.7422, + "step": 6526 + }, + { + "epoch": 0.06178472373415625, + "grad_norm": 2.8903868198394775, + "learning_rate": 1.994708398969553e-06, + "loss": 0.8906, + "step": 6527 + }, + { + "epoch": 0.0617941897558713, + "grad_norm": 2.8263099193573, + "learning_rate": 1.994705248708064e-06, + "loss": 0.8003, + "step": 6528 + }, + { + "epoch": 0.061803655777586355, + "grad_norm": 200.31028747558594, + "learning_rate": 1.9947020975116172e-06, + "loss": 19.7109, + "step": 6529 + }, + { + "epoch": 0.06181312179930141, + "grad_norm": 303.52020263671875, + "learning_rate": 1.9946989453802145e-06, + "loss": 15.3047, + "step": 6530 + }, + { + "epoch": 0.06182258782101646, + "grad_norm": 295.1498718261719, + "learning_rate": 1.9946957923138597e-06, + "loss": 21.7188, + "step": 6531 + }, + { + "epoch": 0.061832053842731514, + "grad_norm": 267.7174377441406, + "learning_rate": 1.994692638312556e-06, + "loss": 35.3906, + "step": 6532 + }, + { + "epoch": 0.06184151986444657, + "grad_norm": 318.5130615234375, + "learning_rate": 1.9946894833763056e-06, + "loss": 29.1094, + "step": 6533 + }, + { + "epoch": 0.06185098588616162, + "grad_norm": 332.28472900390625, + "learning_rate": 1.994686327505112e-06, + "loss": 31.7344, + "step": 6534 + }, + { + "epoch": 0.061860451907876673, + "grad_norm": 333.67144775390625, + "learning_rate": 1.994683170698978e-06, + "loss": 29.3594, + "step": 6535 + }, + { + "epoch": 0.06186991792959173, + "grad_norm": 371.3656311035156, + "learning_rate": 1.994680012957906e-06, + "loss": 24.8516, + "step": 6536 + }, + { + "epoch": 0.061879383951306786, + "grad_norm": 205.0545654296875, + "learning_rate": 1.9946768542819e-06, + "loss": 18.2578, + "step": 6537 + }, + { + "epoch": 0.06188884997302184, + "grad_norm": 256.2228698730469, + "learning_rate": 1.994673694670962e-06, + "loss": 20.2188, + "step": 6538 + }, + { + "epoch": 0.06189831599473689, + "grad_norm": 317.9997863769531, + "learning_rate": 1.994670534125096e-06, + "loss": 27.3672, + "step": 6539 + }, + { + "epoch": 0.061907782016451945, + "grad_norm": 168.1892547607422, + "learning_rate": 1.994667372644304e-06, + "loss": 21.9375, + "step": 6540 + }, + { + "epoch": 0.061917248038167, + "grad_norm": 530.1842041015625, + "learning_rate": 1.9946642102285898e-06, + "loss": 32.6953, + "step": 6541 + }, + { + "epoch": 0.06192671405988205, + "grad_norm": 567.0525512695312, + "learning_rate": 1.9946610468779552e-06, + "loss": 41.6641, + "step": 6542 + }, + { + "epoch": 0.061936180081597104, + "grad_norm": 321.6238098144531, + "learning_rate": 1.9946578825924047e-06, + "loss": 22.5703, + "step": 6543 + }, + { + "epoch": 0.061945646103312164, + "grad_norm": 468.16094970703125, + "learning_rate": 1.9946547173719403e-06, + "loss": 49.125, + "step": 6544 + }, + { + "epoch": 0.06195511212502722, + "grad_norm": 509.3931884765625, + "learning_rate": 1.994651551216565e-06, + "loss": 32.4141, + "step": 6545 + }, + { + "epoch": 0.06196457814674227, + "grad_norm": 244.98410034179688, + "learning_rate": 1.9946483841262817e-06, + "loss": 19.2266, + "step": 6546 + }, + { + "epoch": 0.06197404416845732, + "grad_norm": 448.8028869628906, + "learning_rate": 1.994645216101094e-06, + "loss": 39.0938, + "step": 6547 + }, + { + "epoch": 0.061983510190172376, + "grad_norm": 401.77398681640625, + "learning_rate": 1.9946420471410044e-06, + "loss": 40.5938, + "step": 6548 + }, + { + "epoch": 0.06199297621188743, + "grad_norm": 222.12440490722656, + "learning_rate": 1.994638877246016e-06, + "loss": 25.2891, + "step": 6549 + }, + { + "epoch": 0.06200244223360248, + "grad_norm": 262.3053894042969, + "learning_rate": 1.9946357064161317e-06, + "loss": 22.4805, + "step": 6550 + }, + { + "epoch": 0.062011908255317535, + "grad_norm": 551.2672119140625, + "learning_rate": 1.9946325346513548e-06, + "loss": 44.5625, + "step": 6551 + }, + { + "epoch": 0.06202137427703259, + "grad_norm": 421.7435302734375, + "learning_rate": 1.9946293619516877e-06, + "loss": 34.8906, + "step": 6552 + }, + { + "epoch": 0.06203084029874765, + "grad_norm": 299.5782470703125, + "learning_rate": 1.994626188317134e-06, + "loss": 37.4844, + "step": 6553 + }, + { + "epoch": 0.0620403063204627, + "grad_norm": 303.88323974609375, + "learning_rate": 1.994623013747696e-06, + "loss": 18.5547, + "step": 6554 + }, + { + "epoch": 0.062049772342177754, + "grad_norm": 488.7344055175781, + "learning_rate": 1.9946198382433772e-06, + "loss": 24.7969, + "step": 6555 + }, + { + "epoch": 0.06205923836389281, + "grad_norm": 338.234375, + "learning_rate": 1.994616661804181e-06, + "loss": 43.9375, + "step": 6556 + }, + { + "epoch": 0.06206870438560786, + "grad_norm": 248.6620635986328, + "learning_rate": 1.994613484430109e-06, + "loss": 20.9141, + "step": 6557 + }, + { + "epoch": 0.062078170407322913, + "grad_norm": 303.9061279296875, + "learning_rate": 1.9946103061211654e-06, + "loss": 25.3438, + "step": 6558 + }, + { + "epoch": 0.062087636429037966, + "grad_norm": 214.03416442871094, + "learning_rate": 1.994607126877353e-06, + "loss": 25.9766, + "step": 6559 + }, + { + "epoch": 0.06209710245075302, + "grad_norm": 429.9839782714844, + "learning_rate": 1.9946039466986743e-06, + "loss": 36.0156, + "step": 6560 + }, + { + "epoch": 0.06210656847246807, + "grad_norm": 183.0668182373047, + "learning_rate": 1.994600765585133e-06, + "loss": 23.9766, + "step": 6561 + }, + { + "epoch": 0.06211603449418313, + "grad_norm": 2.3982865810394287, + "learning_rate": 1.994597583536731e-06, + "loss": 0.7388, + "step": 6562 + }, + { + "epoch": 0.062125500515898185, + "grad_norm": 602.2152709960938, + "learning_rate": 1.994594400553473e-06, + "loss": 62.7812, + "step": 6563 + }, + { + "epoch": 0.06213496653761324, + "grad_norm": 499.3941650390625, + "learning_rate": 1.99459121663536e-06, + "loss": 60.4219, + "step": 6564 + }, + { + "epoch": 0.06214443255932829, + "grad_norm": 237.35263061523438, + "learning_rate": 1.9945880317823965e-06, + "loss": 22.875, + "step": 6565 + }, + { + "epoch": 0.062153898581043344, + "grad_norm": 237.043701171875, + "learning_rate": 1.9945848459945847e-06, + "loss": 17.6016, + "step": 6566 + }, + { + "epoch": 0.0621633646027584, + "grad_norm": 581.7696533203125, + "learning_rate": 1.9945816592719276e-06, + "loss": 24.9688, + "step": 6567 + }, + { + "epoch": 0.06217283062447345, + "grad_norm": 347.5903015136719, + "learning_rate": 1.994578471614429e-06, + "loss": 19.332, + "step": 6568 + }, + { + "epoch": 0.062182296646188503, + "grad_norm": 302.53082275390625, + "learning_rate": 1.994575283022091e-06, + "loss": 31.3125, + "step": 6569 + }, + { + "epoch": 0.06219176266790356, + "grad_norm": 444.3642883300781, + "learning_rate": 1.9945720934949173e-06, + "loss": 22.8828, + "step": 6570 + }, + { + "epoch": 0.062201228689618616, + "grad_norm": 181.65528869628906, + "learning_rate": 1.99456890303291e-06, + "loss": 20.3828, + "step": 6571 + }, + { + "epoch": 0.06221069471133367, + "grad_norm": 327.5412902832031, + "learning_rate": 1.994565711636073e-06, + "loss": 21.9219, + "step": 6572 + }, + { + "epoch": 0.06222016073304872, + "grad_norm": 366.6715087890625, + "learning_rate": 1.994562519304409e-06, + "loss": 54.2969, + "step": 6573 + }, + { + "epoch": 0.062229626754763775, + "grad_norm": 311.2423095703125, + "learning_rate": 1.9945593260379204e-06, + "loss": 27.25, + "step": 6574 + }, + { + "epoch": 0.06223909277647883, + "grad_norm": 232.86087036132812, + "learning_rate": 1.994556131836611e-06, + "loss": 20.6484, + "step": 6575 + }, + { + "epoch": 0.06224855879819388, + "grad_norm": 424.0889587402344, + "learning_rate": 1.994552936700484e-06, + "loss": 29.2656, + "step": 6576 + }, + { + "epoch": 0.062258024819908934, + "grad_norm": 358.9302673339844, + "learning_rate": 1.9945497406295417e-06, + "loss": 39.2344, + "step": 6577 + }, + { + "epoch": 0.06226749084162399, + "grad_norm": 281.44281005859375, + "learning_rate": 1.994546543623787e-06, + "loss": 30.3125, + "step": 6578 + }, + { + "epoch": 0.06227695686333905, + "grad_norm": 331.20196533203125, + "learning_rate": 1.9945433456832233e-06, + "loss": 28.6953, + "step": 6579 + }, + { + "epoch": 0.0622864228850541, + "grad_norm": 316.4100646972656, + "learning_rate": 1.994540146807854e-06, + "loss": 27.7188, + "step": 6580 + }, + { + "epoch": 0.062295888906769153, + "grad_norm": 523.3636474609375, + "learning_rate": 1.9945369469976813e-06, + "loss": 51.2188, + "step": 6581 + }, + { + "epoch": 0.062305354928484206, + "grad_norm": 900.6707153320312, + "learning_rate": 1.9945337462527084e-06, + "loss": 101.9375, + "step": 6582 + }, + { + "epoch": 0.06231482095019926, + "grad_norm": 212.4016571044922, + "learning_rate": 1.994530544572939e-06, + "loss": 27.1016, + "step": 6583 + }, + { + "epoch": 0.06232428697191431, + "grad_norm": 1079.9776611328125, + "learning_rate": 1.994527341958375e-06, + "loss": 58.6016, + "step": 6584 + }, + { + "epoch": 0.062333752993629366, + "grad_norm": 542.1168823242188, + "learning_rate": 1.9945241384090206e-06, + "loss": 27.3438, + "step": 6585 + }, + { + "epoch": 0.06234321901534442, + "grad_norm": 915.556396484375, + "learning_rate": 1.994520933924878e-06, + "loss": 16.3516, + "step": 6586 + }, + { + "epoch": 0.06235268503705948, + "grad_norm": 289.3629455566406, + "learning_rate": 1.99451772850595e-06, + "loss": 25.6953, + "step": 6587 + }, + { + "epoch": 0.06236215105877453, + "grad_norm": 283.9156188964844, + "learning_rate": 1.9945145221522407e-06, + "loss": 19.7656, + "step": 6588 + }, + { + "epoch": 0.062371617080489584, + "grad_norm": 489.14520263671875, + "learning_rate": 1.994511314863752e-06, + "loss": 32.8281, + "step": 6589 + }, + { + "epoch": 0.06238108310220464, + "grad_norm": 608.2365112304688, + "learning_rate": 1.9945081066404875e-06, + "loss": 30.0547, + "step": 6590 + }, + { + "epoch": 0.06239054912391969, + "grad_norm": 180.87991333007812, + "learning_rate": 1.99450489748245e-06, + "loss": 21.6094, + "step": 6591 + }, + { + "epoch": 0.062400015145634743, + "grad_norm": 609.9078369140625, + "learning_rate": 1.9945016873896424e-06, + "loss": 34.1953, + "step": 6592 + }, + { + "epoch": 0.062409481167349797, + "grad_norm": 214.10850524902344, + "learning_rate": 1.9944984763620686e-06, + "loss": 22.3906, + "step": 6593 + }, + { + "epoch": 0.06241894718906485, + "grad_norm": 655.64990234375, + "learning_rate": 1.9944952643997304e-06, + "loss": 43.9844, + "step": 6594 + }, + { + "epoch": 0.0624284132107799, + "grad_norm": 229.20582580566406, + "learning_rate": 1.9944920515026312e-06, + "loss": 19.0938, + "step": 6595 + }, + { + "epoch": 0.06243787923249496, + "grad_norm": 551.6954345703125, + "learning_rate": 1.9944888376707745e-06, + "loss": 34.4531, + "step": 6596 + }, + { + "epoch": 0.062447345254210015, + "grad_norm": 265.348388671875, + "learning_rate": 1.994485622904163e-06, + "loss": 32.2656, + "step": 6597 + }, + { + "epoch": 0.06245681127592507, + "grad_norm": 587.7361450195312, + "learning_rate": 1.9944824072027995e-06, + "loss": 24.7266, + "step": 6598 + }, + { + "epoch": 0.06246627729764012, + "grad_norm": 3.1008243560791016, + "learning_rate": 1.9944791905666873e-06, + "loss": 0.8882, + "step": 6599 + }, + { + "epoch": 0.062475743319355174, + "grad_norm": 414.8764953613281, + "learning_rate": 1.9944759729958297e-06, + "loss": 17.4688, + "step": 6600 + }, + { + "epoch": 0.06248520934107023, + "grad_norm": 403.3591613769531, + "learning_rate": 1.994472754490229e-06, + "loss": 41.6406, + "step": 6601 + }, + { + "epoch": 0.06249467536278528, + "grad_norm": 279.6030578613281, + "learning_rate": 1.9944695350498884e-06, + "loss": 21.4453, + "step": 6602 + }, + { + "epoch": 0.06250414138450033, + "grad_norm": 898.2662353515625, + "learning_rate": 1.9944663146748114e-06, + "loss": 49.1953, + "step": 6603 + }, + { + "epoch": 0.0625136074062154, + "grad_norm": 323.9005432128906, + "learning_rate": 1.994463093365001e-06, + "loss": 23.1328, + "step": 6604 + }, + { + "epoch": 0.06252307342793044, + "grad_norm": 497.93505859375, + "learning_rate": 1.9944598711204595e-06, + "loss": 49.4375, + "step": 6605 + }, + { + "epoch": 0.0625325394496455, + "grad_norm": 3.4791600704193115, + "learning_rate": 1.994456647941191e-06, + "loss": 0.957, + "step": 6606 + }, + { + "epoch": 0.06254200547136055, + "grad_norm": 665.6234741210938, + "learning_rate": 1.9944534238271974e-06, + "loss": 47.9609, + "step": 6607 + }, + { + "epoch": 0.0625514714930756, + "grad_norm": 277.54290771484375, + "learning_rate": 1.9944501987784827e-06, + "loss": 28.1562, + "step": 6608 + }, + { + "epoch": 0.06256093751479067, + "grad_norm": 463.15435791015625, + "learning_rate": 1.9944469727950493e-06, + "loss": 59.7031, + "step": 6609 + }, + { + "epoch": 0.06257040353650571, + "grad_norm": 500.3600769042969, + "learning_rate": 1.9944437458769003e-06, + "loss": 44.4844, + "step": 6610 + }, + { + "epoch": 0.06257986955822077, + "grad_norm": 329.5493469238281, + "learning_rate": 1.994440518024039e-06, + "loss": 28.1094, + "step": 6611 + }, + { + "epoch": 0.06258933557993582, + "grad_norm": 565.6698608398438, + "learning_rate": 1.9944372892364683e-06, + "loss": 25.0, + "step": 6612 + }, + { + "epoch": 0.06259880160165088, + "grad_norm": 170.04965209960938, + "learning_rate": 1.9944340595141913e-06, + "loss": 16.9141, + "step": 6613 + }, + { + "epoch": 0.06260826762336592, + "grad_norm": 374.5801696777344, + "learning_rate": 1.994430828857211e-06, + "loss": 62.7266, + "step": 6614 + }, + { + "epoch": 0.06261773364508098, + "grad_norm": 275.47491455078125, + "learning_rate": 1.9944275972655304e-06, + "loss": 29.5938, + "step": 6615 + }, + { + "epoch": 0.06262719966679603, + "grad_norm": 334.5172119140625, + "learning_rate": 1.9944243647391525e-06, + "loss": 20.7031, + "step": 6616 + }, + { + "epoch": 0.06263666568851109, + "grad_norm": 885.1593017578125, + "learning_rate": 1.9944211312780805e-06, + "loss": 31.4375, + "step": 6617 + }, + { + "epoch": 0.06264613171022615, + "grad_norm": 282.8595886230469, + "learning_rate": 1.994417896882317e-06, + "loss": 22.7812, + "step": 6618 + }, + { + "epoch": 0.0626555977319412, + "grad_norm": 354.2593994140625, + "learning_rate": 1.9944146615518657e-06, + "loss": 27.75, + "step": 6619 + }, + { + "epoch": 0.06266506375365626, + "grad_norm": 393.98016357421875, + "learning_rate": 1.994411425286729e-06, + "loss": 33.5469, + "step": 6620 + }, + { + "epoch": 0.0626745297753713, + "grad_norm": 776.6715087890625, + "learning_rate": 1.99440818808691e-06, + "loss": 22.4453, + "step": 6621 + }, + { + "epoch": 0.06268399579708636, + "grad_norm": 382.8057556152344, + "learning_rate": 1.994404949952413e-06, + "loss": 29.8359, + "step": 6622 + }, + { + "epoch": 0.06269346181880141, + "grad_norm": 349.7347717285156, + "learning_rate": 1.9944017108832393e-06, + "loss": 28.0078, + "step": 6623 + }, + { + "epoch": 0.06270292784051647, + "grad_norm": 939.6514282226562, + "learning_rate": 1.9943984708793927e-06, + "loss": 52.3438, + "step": 6624 + }, + { + "epoch": 0.06271239386223151, + "grad_norm": 511.7294616699219, + "learning_rate": 1.9943952299408764e-06, + "loss": 33.0391, + "step": 6625 + }, + { + "epoch": 0.06272185988394657, + "grad_norm": 704.9512939453125, + "learning_rate": 1.9943919880676933e-06, + "loss": 37.0156, + "step": 6626 + }, + { + "epoch": 0.06273132590566163, + "grad_norm": 545.3443603515625, + "learning_rate": 1.9943887452598465e-06, + "loss": 28.7266, + "step": 6627 + }, + { + "epoch": 0.06274079192737668, + "grad_norm": 684.4176635742188, + "learning_rate": 1.9943855015173387e-06, + "loss": 45.4688, + "step": 6628 + }, + { + "epoch": 0.06275025794909174, + "grad_norm": 546.9070434570312, + "learning_rate": 1.994382256840173e-06, + "loss": 48.7969, + "step": 6629 + }, + { + "epoch": 0.06275972397080679, + "grad_norm": 382.7539367675781, + "learning_rate": 1.9943790112283534e-06, + "loss": 42.0312, + "step": 6630 + }, + { + "epoch": 0.06276918999252185, + "grad_norm": 146.4769287109375, + "learning_rate": 1.994375764681882e-06, + "loss": 17.7656, + "step": 6631 + }, + { + "epoch": 0.06277865601423689, + "grad_norm": 269.55206298828125, + "learning_rate": 1.9943725172007617e-06, + "loss": 31.0469, + "step": 6632 + }, + { + "epoch": 0.06278812203595195, + "grad_norm": 473.8993225097656, + "learning_rate": 1.994369268784996e-06, + "loss": 15.2148, + "step": 6633 + }, + { + "epoch": 0.062797588057667, + "grad_norm": 647.9005737304688, + "learning_rate": 1.994366019434588e-06, + "loss": 53.75, + "step": 6634 + }, + { + "epoch": 0.06280705407938206, + "grad_norm": 3.618703603744507, + "learning_rate": 1.994362769149541e-06, + "loss": 0.9863, + "step": 6635 + }, + { + "epoch": 0.06281652010109712, + "grad_norm": 602.4993286132812, + "learning_rate": 1.994359517929857e-06, + "loss": 30.7109, + "step": 6636 + }, + { + "epoch": 0.06282598612281216, + "grad_norm": 272.9553527832031, + "learning_rate": 1.9943562657755404e-06, + "loss": 25.7344, + "step": 6637 + }, + { + "epoch": 0.06283545214452722, + "grad_norm": 508.6067810058594, + "learning_rate": 1.994353012686593e-06, + "loss": 57.3125, + "step": 6638 + }, + { + "epoch": 0.06284491816624227, + "grad_norm": 3.6155178546905518, + "learning_rate": 1.994349758663019e-06, + "loss": 0.9292, + "step": 6639 + }, + { + "epoch": 0.06285438418795733, + "grad_norm": 1027.609375, + "learning_rate": 1.9943465037048203e-06, + "loss": 64.3555, + "step": 6640 + }, + { + "epoch": 0.06286385020967238, + "grad_norm": 428.33770751953125, + "learning_rate": 1.994343247812001e-06, + "loss": 33.6641, + "step": 6641 + }, + { + "epoch": 0.06287331623138744, + "grad_norm": 867.6676635742188, + "learning_rate": 1.9943399909845637e-06, + "loss": 47.7344, + "step": 6642 + }, + { + "epoch": 0.0628827822531025, + "grad_norm": 312.7489929199219, + "learning_rate": 1.9943367332225116e-06, + "loss": 28.2031, + "step": 6643 + }, + { + "epoch": 0.06289224827481754, + "grad_norm": 192.0654754638672, + "learning_rate": 1.994333474525847e-06, + "loss": 27.0703, + "step": 6644 + }, + { + "epoch": 0.0629017142965326, + "grad_norm": 386.3223571777344, + "learning_rate": 1.9943302148945746e-06, + "loss": 20.9609, + "step": 6645 + }, + { + "epoch": 0.06291118031824765, + "grad_norm": 360.61907958984375, + "learning_rate": 1.9943269543286956e-06, + "loss": 21.0859, + "step": 6646 + }, + { + "epoch": 0.06292064633996271, + "grad_norm": 461.377197265625, + "learning_rate": 1.9943236928282145e-06, + "loss": 30.6055, + "step": 6647 + }, + { + "epoch": 0.06293011236167775, + "grad_norm": 2.7292845249176025, + "learning_rate": 1.9943204303931337e-06, + "loss": 0.8647, + "step": 6648 + }, + { + "epoch": 0.06293957838339281, + "grad_norm": 342.566162109375, + "learning_rate": 1.9943171670234563e-06, + "loss": 28.3047, + "step": 6649 + }, + { + "epoch": 0.06294904440510786, + "grad_norm": 408.58258056640625, + "learning_rate": 1.9943139027191855e-06, + "loss": 18.4297, + "step": 6650 + }, + { + "epoch": 0.06295851042682292, + "grad_norm": 352.99078369140625, + "learning_rate": 1.994310637480324e-06, + "loss": 11.1719, + "step": 6651 + }, + { + "epoch": 0.06296797644853798, + "grad_norm": 345.1142883300781, + "learning_rate": 1.994307371306876e-06, + "loss": 22.0234, + "step": 6652 + }, + { + "epoch": 0.06297744247025303, + "grad_norm": 373.28070068359375, + "learning_rate": 1.994304104198843e-06, + "loss": 28.1641, + "step": 6653 + }, + { + "epoch": 0.06298690849196809, + "grad_norm": 232.0203857421875, + "learning_rate": 1.994300836156229e-06, + "loss": 24.8906, + "step": 6654 + }, + { + "epoch": 0.06299637451368313, + "grad_norm": 273.58642578125, + "learning_rate": 1.994297567179037e-06, + "loss": 18.9297, + "step": 6655 + }, + { + "epoch": 0.06300584053539819, + "grad_norm": 199.5651397705078, + "learning_rate": 1.99429429726727e-06, + "loss": 22.2891, + "step": 6656 + }, + { + "epoch": 0.06301530655711324, + "grad_norm": 455.1708984375, + "learning_rate": 1.994291026420931e-06, + "loss": 43.9062, + "step": 6657 + }, + { + "epoch": 0.0630247725788283, + "grad_norm": 553.3795776367188, + "learning_rate": 1.994287754640023e-06, + "loss": 39.2656, + "step": 6658 + }, + { + "epoch": 0.06303423860054334, + "grad_norm": 452.1964416503906, + "learning_rate": 1.994284481924549e-06, + "loss": 19.8789, + "step": 6659 + }, + { + "epoch": 0.0630437046222584, + "grad_norm": 133.99713134765625, + "learning_rate": 1.994281208274513e-06, + "loss": 15.1797, + "step": 6660 + }, + { + "epoch": 0.06305317064397346, + "grad_norm": 228.34942626953125, + "learning_rate": 1.994277933689917e-06, + "loss": 16.1328, + "step": 6661 + }, + { + "epoch": 0.06306263666568851, + "grad_norm": 1012.8562622070312, + "learning_rate": 1.994274658170764e-06, + "loss": 67.2188, + "step": 6662 + }, + { + "epoch": 0.06307210268740357, + "grad_norm": 222.49501037597656, + "learning_rate": 1.994271381717058e-06, + "loss": 23.2031, + "step": 6663 + }, + { + "epoch": 0.06308156870911862, + "grad_norm": 746.7752075195312, + "learning_rate": 1.9942681043288013e-06, + "loss": 54.0391, + "step": 6664 + }, + { + "epoch": 0.06309103473083368, + "grad_norm": 2.6500465869903564, + "learning_rate": 1.994264826005997e-06, + "loss": 0.8252, + "step": 6665 + }, + { + "epoch": 0.06310050075254872, + "grad_norm": 216.68743896484375, + "learning_rate": 1.9942615467486494e-06, + "loss": 22.8281, + "step": 6666 + }, + { + "epoch": 0.06310996677426378, + "grad_norm": 588.1904907226562, + "learning_rate": 1.99425826655676e-06, + "loss": 19.2656, + "step": 6667 + }, + { + "epoch": 0.06311943279597883, + "grad_norm": 515.8344116210938, + "learning_rate": 1.9942549854303327e-06, + "loss": 36.5938, + "step": 6668 + }, + { + "epoch": 0.06312889881769389, + "grad_norm": 297.4466247558594, + "learning_rate": 1.99425170336937e-06, + "loss": 27.6406, + "step": 6669 + }, + { + "epoch": 0.06313836483940895, + "grad_norm": 550.3272094726562, + "learning_rate": 1.9942484203738755e-06, + "loss": 25.7422, + "step": 6670 + }, + { + "epoch": 0.063147830861124, + "grad_norm": 367.9181213378906, + "learning_rate": 1.994245136443852e-06, + "loss": 46.2188, + "step": 6671 + }, + { + "epoch": 0.06315729688283905, + "grad_norm": 603.8129272460938, + "learning_rate": 1.9942418515793034e-06, + "loss": 35.1484, + "step": 6672 + }, + { + "epoch": 0.0631667629045541, + "grad_norm": 387.41595458984375, + "learning_rate": 1.9942385657802316e-06, + "loss": 24.125, + "step": 6673 + }, + { + "epoch": 0.06317622892626916, + "grad_norm": 242.595703125, + "learning_rate": 1.9942352790466407e-06, + "loss": 21.6797, + "step": 6674 + }, + { + "epoch": 0.0631856949479842, + "grad_norm": 321.00048828125, + "learning_rate": 1.994231991378533e-06, + "loss": 22.9609, + "step": 6675 + }, + { + "epoch": 0.06319516096969927, + "grad_norm": 697.1222534179688, + "learning_rate": 1.994228702775912e-06, + "loss": 53.6875, + "step": 6676 + }, + { + "epoch": 0.06320462699141431, + "grad_norm": 256.42822265625, + "learning_rate": 1.9942254132387808e-06, + "loss": 20.7031, + "step": 6677 + }, + { + "epoch": 0.06321409301312937, + "grad_norm": 335.9339294433594, + "learning_rate": 1.994222122767142e-06, + "loss": 22.0234, + "step": 6678 + }, + { + "epoch": 0.06322355903484443, + "grad_norm": 1188.4654541015625, + "learning_rate": 1.9942188313609992e-06, + "loss": 41.9453, + "step": 6679 + }, + { + "epoch": 0.06323302505655948, + "grad_norm": 209.3517608642578, + "learning_rate": 1.9942155390203555e-06, + "loss": 20.4141, + "step": 6680 + }, + { + "epoch": 0.06324249107827454, + "grad_norm": 292.9600524902344, + "learning_rate": 1.994212245745214e-06, + "loss": 16.3086, + "step": 6681 + }, + { + "epoch": 0.06325195709998958, + "grad_norm": 338.4494323730469, + "learning_rate": 1.9942089515355775e-06, + "loss": 30.3438, + "step": 6682 + }, + { + "epoch": 0.06326142312170464, + "grad_norm": 832.0562133789062, + "learning_rate": 1.994205656391449e-06, + "loss": 43.4297, + "step": 6683 + }, + { + "epoch": 0.06327088914341969, + "grad_norm": 254.0127716064453, + "learning_rate": 1.9942023603128325e-06, + "loss": 20.3125, + "step": 6684 + }, + { + "epoch": 0.06328035516513475, + "grad_norm": 316.4044189453125, + "learning_rate": 1.99419906329973e-06, + "loss": 21.5156, + "step": 6685 + }, + { + "epoch": 0.06328982118684981, + "grad_norm": 297.1955261230469, + "learning_rate": 1.994195765352145e-06, + "loss": 27.5078, + "step": 6686 + }, + { + "epoch": 0.06329928720856486, + "grad_norm": 203.60964965820312, + "learning_rate": 1.9941924664700807e-06, + "loss": 19.8047, + "step": 6687 + }, + { + "epoch": 0.06330875323027992, + "grad_norm": 353.3258972167969, + "learning_rate": 1.9941891666535407e-06, + "loss": 18.2422, + "step": 6688 + }, + { + "epoch": 0.06331821925199496, + "grad_norm": 598.8959350585938, + "learning_rate": 1.994185865902527e-06, + "loss": 14.7812, + "step": 6689 + }, + { + "epoch": 0.06332768527371002, + "grad_norm": 218.29974365234375, + "learning_rate": 1.9941825642170435e-06, + "loss": 18.5, + "step": 6690 + }, + { + "epoch": 0.06333715129542507, + "grad_norm": 936.3271484375, + "learning_rate": 1.9941792615970925e-06, + "loss": 77.5625, + "step": 6691 + }, + { + "epoch": 0.06334661731714013, + "grad_norm": 533.9515991210938, + "learning_rate": 1.9941759580426783e-06, + "loss": 48.0938, + "step": 6692 + }, + { + "epoch": 0.06335608333885517, + "grad_norm": 268.0434265136719, + "learning_rate": 1.9941726535538034e-06, + "loss": 16.7344, + "step": 6693 + }, + { + "epoch": 0.06336554936057023, + "grad_norm": 2.9863734245300293, + "learning_rate": 1.9941693481304707e-06, + "loss": 0.9204, + "step": 6694 + }, + { + "epoch": 0.0633750153822853, + "grad_norm": 610.7343139648438, + "learning_rate": 1.994166041772683e-06, + "loss": 50.1719, + "step": 6695 + }, + { + "epoch": 0.06338448140400034, + "grad_norm": 3.437141180038452, + "learning_rate": 1.9941627344804447e-06, + "loss": 0.9609, + "step": 6696 + }, + { + "epoch": 0.0633939474257154, + "grad_norm": 409.10595703125, + "learning_rate": 1.994159426253758e-06, + "loss": 25.3359, + "step": 6697 + }, + { + "epoch": 0.06340341344743045, + "grad_norm": 399.8074951171875, + "learning_rate": 1.994156117092626e-06, + "loss": 26.8516, + "step": 6698 + }, + { + "epoch": 0.0634128794691455, + "grad_norm": 605.4617309570312, + "learning_rate": 1.9941528069970514e-06, + "loss": 36.7422, + "step": 6699 + }, + { + "epoch": 0.06342234549086055, + "grad_norm": 561.4711303710938, + "learning_rate": 1.9941494959670382e-06, + "loss": 64.2812, + "step": 6700 + }, + { + "epoch": 0.06343181151257561, + "grad_norm": 456.05322265625, + "learning_rate": 1.9941461840025893e-06, + "loss": 25.9062, + "step": 6701 + }, + { + "epoch": 0.06344127753429066, + "grad_norm": 450.1174011230469, + "learning_rate": 1.9941428711037076e-06, + "loss": 27.9141, + "step": 6702 + }, + { + "epoch": 0.06345074355600572, + "grad_norm": 3.881624460220337, + "learning_rate": 1.994139557270396e-06, + "loss": 0.8594, + "step": 6703 + }, + { + "epoch": 0.06346020957772078, + "grad_norm": 971.6464233398438, + "learning_rate": 1.9941362425026586e-06, + "loss": 41.0547, + "step": 6704 + }, + { + "epoch": 0.06346967559943582, + "grad_norm": 187.8203582763672, + "learning_rate": 1.9941329268004973e-06, + "loss": 24.0156, + "step": 6705 + }, + { + "epoch": 0.06347914162115088, + "grad_norm": 532.7601928710938, + "learning_rate": 1.994129610163916e-06, + "loss": 37.3984, + "step": 6706 + }, + { + "epoch": 0.06348860764286593, + "grad_norm": 295.7820739746094, + "learning_rate": 1.994126292592917e-06, + "loss": 23.6328, + "step": 6707 + }, + { + "epoch": 0.06349807366458099, + "grad_norm": 1261.7890625, + "learning_rate": 1.9941229740875042e-06, + "loss": 70.3047, + "step": 6708 + }, + { + "epoch": 0.06350753968629604, + "grad_norm": 411.4134521484375, + "learning_rate": 1.9941196546476804e-06, + "loss": 23.5859, + "step": 6709 + }, + { + "epoch": 0.0635170057080111, + "grad_norm": 618.9721069335938, + "learning_rate": 1.994116334273449e-06, + "loss": 38.2891, + "step": 6710 + }, + { + "epoch": 0.06352647172972614, + "grad_norm": 309.77227783203125, + "learning_rate": 1.9941130129648127e-06, + "loss": 29.3984, + "step": 6711 + }, + { + "epoch": 0.0635359377514412, + "grad_norm": 1241.9078369140625, + "learning_rate": 1.9941096907217753e-06, + "loss": 105.0625, + "step": 6712 + }, + { + "epoch": 0.06354540377315626, + "grad_norm": 2.5295939445495605, + "learning_rate": 1.994106367544339e-06, + "loss": 0.8105, + "step": 6713 + }, + { + "epoch": 0.06355486979487131, + "grad_norm": 205.5342559814453, + "learning_rate": 1.9941030434325075e-06, + "loss": 30.9922, + "step": 6714 + }, + { + "epoch": 0.06356433581658637, + "grad_norm": 569.7839965820312, + "learning_rate": 1.9940997183862836e-06, + "loss": 54.0312, + "step": 6715 + }, + { + "epoch": 0.06357380183830141, + "grad_norm": 466.4731140136719, + "learning_rate": 1.994096392405671e-06, + "loss": 58.2812, + "step": 6716 + }, + { + "epoch": 0.06358326786001647, + "grad_norm": 828.9198608398438, + "learning_rate": 1.994093065490672e-06, + "loss": 60.0469, + "step": 6717 + }, + { + "epoch": 0.06359273388173152, + "grad_norm": 427.0306396484375, + "learning_rate": 1.994089737641291e-06, + "loss": 53.6875, + "step": 6718 + }, + { + "epoch": 0.06360219990344658, + "grad_norm": 296.9304504394531, + "learning_rate": 1.9940864088575297e-06, + "loss": 38.1719, + "step": 6719 + }, + { + "epoch": 0.06361166592516163, + "grad_norm": 292.32415771484375, + "learning_rate": 1.9940830791393922e-06, + "loss": 18.2109, + "step": 6720 + }, + { + "epoch": 0.06362113194687669, + "grad_norm": 189.47280883789062, + "learning_rate": 1.994079748486881e-06, + "loss": 27.5, + "step": 6721 + }, + { + "epoch": 0.06363059796859175, + "grad_norm": 425.1897888183594, + "learning_rate": 1.9940764168999997e-06, + "loss": 18.0391, + "step": 6722 + }, + { + "epoch": 0.06364006399030679, + "grad_norm": 511.52850341796875, + "learning_rate": 1.9940730843787507e-06, + "loss": 44.9375, + "step": 6723 + }, + { + "epoch": 0.06364953001202185, + "grad_norm": 502.69390869140625, + "learning_rate": 1.9940697509231383e-06, + "loss": 37.8594, + "step": 6724 + }, + { + "epoch": 0.0636589960337369, + "grad_norm": 426.2101135253906, + "learning_rate": 1.9940664165331647e-06, + "loss": 40.875, + "step": 6725 + }, + { + "epoch": 0.06366846205545196, + "grad_norm": 568.5416259765625, + "learning_rate": 1.9940630812088333e-06, + "loss": 24.2734, + "step": 6726 + }, + { + "epoch": 0.063677928077167, + "grad_norm": 691.176513671875, + "learning_rate": 1.9940597449501475e-06, + "loss": 18.0547, + "step": 6727 + }, + { + "epoch": 0.06368739409888206, + "grad_norm": 357.111328125, + "learning_rate": 1.99405640775711e-06, + "loss": 20.75, + "step": 6728 + }, + { + "epoch": 0.06369686012059712, + "grad_norm": 324.0435791015625, + "learning_rate": 1.994053069629724e-06, + "loss": 26.2969, + "step": 6729 + }, + { + "epoch": 0.06370632614231217, + "grad_norm": 352.7721862792969, + "learning_rate": 1.9940497305679933e-06, + "loss": 20.1797, + "step": 6730 + }, + { + "epoch": 0.06371579216402723, + "grad_norm": 342.6409606933594, + "learning_rate": 1.9940463905719203e-06, + "loss": 22.0469, + "step": 6731 + }, + { + "epoch": 0.06372525818574228, + "grad_norm": 275.0731201171875, + "learning_rate": 1.994043049641508e-06, + "loss": 26.3047, + "step": 6732 + }, + { + "epoch": 0.06373472420745734, + "grad_norm": 3.5069103240966797, + "learning_rate": 1.9940397077767606e-06, + "loss": 0.9619, + "step": 6733 + }, + { + "epoch": 0.06374419022917238, + "grad_norm": 376.8876037597656, + "learning_rate": 1.99403636497768e-06, + "loss": 28.3906, + "step": 6734 + }, + { + "epoch": 0.06375365625088744, + "grad_norm": 3.182755947113037, + "learning_rate": 1.99403302124427e-06, + "loss": 0.9131, + "step": 6735 + }, + { + "epoch": 0.06376312227260249, + "grad_norm": 505.15789794921875, + "learning_rate": 1.9940296765765338e-06, + "loss": 18.875, + "step": 6736 + }, + { + "epoch": 0.06377258829431755, + "grad_norm": 268.4615173339844, + "learning_rate": 1.994026330974474e-06, + "loss": 21.9844, + "step": 6737 + }, + { + "epoch": 0.06378205431603261, + "grad_norm": 753.83984375, + "learning_rate": 1.994022984438094e-06, + "loss": 27.2656, + "step": 6738 + }, + { + "epoch": 0.06379152033774765, + "grad_norm": 239.44686889648438, + "learning_rate": 1.9940196369673976e-06, + "loss": 24.0625, + "step": 6739 + }, + { + "epoch": 0.06380098635946271, + "grad_norm": 348.17950439453125, + "learning_rate": 1.9940162885623876e-06, + "loss": 47.4219, + "step": 6740 + }, + { + "epoch": 0.06381045238117776, + "grad_norm": 383.3602600097656, + "learning_rate": 1.994012939223066e-06, + "loss": 24.3281, + "step": 6741 + }, + { + "epoch": 0.06381991840289282, + "grad_norm": 337.5190734863281, + "learning_rate": 1.994009588949438e-06, + "loss": 13.2891, + "step": 6742 + }, + { + "epoch": 0.06382938442460787, + "grad_norm": 616.780517578125, + "learning_rate": 1.994006237741505e-06, + "loss": 23.3164, + "step": 6743 + }, + { + "epoch": 0.06383885044632293, + "grad_norm": 467.5712585449219, + "learning_rate": 1.994002885599271e-06, + "loss": 47.2969, + "step": 6744 + }, + { + "epoch": 0.06384831646803797, + "grad_norm": 454.1737365722656, + "learning_rate": 1.9939995325227386e-06, + "loss": 40.0938, + "step": 6745 + }, + { + "epoch": 0.06385778248975303, + "grad_norm": 226.8302764892578, + "learning_rate": 1.9939961785119113e-06, + "loss": 26.6875, + "step": 6746 + }, + { + "epoch": 0.06386724851146809, + "grad_norm": 194.16737365722656, + "learning_rate": 1.9939928235667927e-06, + "loss": 24.4375, + "step": 6747 + }, + { + "epoch": 0.06387671453318314, + "grad_norm": 558.4127197265625, + "learning_rate": 1.9939894676873853e-06, + "loss": 50.7969, + "step": 6748 + }, + { + "epoch": 0.0638861805548982, + "grad_norm": 290.07257080078125, + "learning_rate": 1.9939861108736926e-06, + "loss": 18.6094, + "step": 6749 + }, + { + "epoch": 0.06389564657661324, + "grad_norm": 348.2750244140625, + "learning_rate": 1.9939827531257174e-06, + "loss": 43.4688, + "step": 6750 + }, + { + "epoch": 0.0639051125983283, + "grad_norm": 716.6906127929688, + "learning_rate": 1.993979394443463e-06, + "loss": 29.4961, + "step": 6751 + }, + { + "epoch": 0.06391457862004335, + "grad_norm": 942.3460083007812, + "learning_rate": 1.993976034826933e-06, + "loss": 42.8516, + "step": 6752 + }, + { + "epoch": 0.06392404464175841, + "grad_norm": 285.1839904785156, + "learning_rate": 1.99397267427613e-06, + "loss": 18.2734, + "step": 6753 + }, + { + "epoch": 0.06393351066347346, + "grad_norm": 221.7361297607422, + "learning_rate": 1.993969312791057e-06, + "loss": 24.3516, + "step": 6754 + }, + { + "epoch": 0.06394297668518852, + "grad_norm": 359.666259765625, + "learning_rate": 1.9939659503717185e-06, + "loss": 31.7891, + "step": 6755 + }, + { + "epoch": 0.06395244270690358, + "grad_norm": 612.9922485351562, + "learning_rate": 1.9939625870181157e-06, + "loss": 30.2969, + "step": 6756 + }, + { + "epoch": 0.06396190872861862, + "grad_norm": 261.7985534667969, + "learning_rate": 1.993959222730253e-06, + "loss": 25.1094, + "step": 6757 + }, + { + "epoch": 0.06397137475033368, + "grad_norm": 194.78700256347656, + "learning_rate": 1.9939558575081335e-06, + "loss": 19.7227, + "step": 6758 + }, + { + "epoch": 0.06398084077204873, + "grad_norm": 558.3529052734375, + "learning_rate": 1.9939524913517597e-06, + "loss": 52.6875, + "step": 6759 + }, + { + "epoch": 0.06399030679376379, + "grad_norm": 420.88134765625, + "learning_rate": 1.9939491242611356e-06, + "loss": 25.1562, + "step": 6760 + }, + { + "epoch": 0.06399977281547883, + "grad_norm": 511.2099304199219, + "learning_rate": 1.9939457562362642e-06, + "loss": 21.3438, + "step": 6761 + }, + { + "epoch": 0.0640092388371939, + "grad_norm": 226.14028930664062, + "learning_rate": 1.993942387277148e-06, + "loss": 19.7031, + "step": 6762 + }, + { + "epoch": 0.06401870485890894, + "grad_norm": 164.95016479492188, + "learning_rate": 1.9939390173837907e-06, + "loss": 16.8438, + "step": 6763 + }, + { + "epoch": 0.064028170880624, + "grad_norm": 209.2526397705078, + "learning_rate": 1.9939356465561957e-06, + "loss": 17.7422, + "step": 6764 + }, + { + "epoch": 0.06403763690233906, + "grad_norm": 734.7076416015625, + "learning_rate": 1.9939322747943653e-06, + "loss": 39.2891, + "step": 6765 + }, + { + "epoch": 0.0640471029240541, + "grad_norm": 959.2252807617188, + "learning_rate": 1.9939289020983038e-06, + "loss": 41.2812, + "step": 6766 + }, + { + "epoch": 0.06405656894576917, + "grad_norm": 554.6907958984375, + "learning_rate": 1.9939255284680136e-06, + "loss": 39.625, + "step": 6767 + }, + { + "epoch": 0.06406603496748421, + "grad_norm": 346.40972900390625, + "learning_rate": 1.9939221539034977e-06, + "loss": 29.8828, + "step": 6768 + }, + { + "epoch": 0.06407550098919927, + "grad_norm": 543.5216674804688, + "learning_rate": 1.99391877840476e-06, + "loss": 51.0625, + "step": 6769 + }, + { + "epoch": 0.06408496701091432, + "grad_norm": 537.1832885742188, + "learning_rate": 1.9939154019718034e-06, + "loss": 43.3047, + "step": 6770 + }, + { + "epoch": 0.06409443303262938, + "grad_norm": 631.9224853515625, + "learning_rate": 1.9939120246046304e-06, + "loss": 48.0703, + "step": 6771 + }, + { + "epoch": 0.06410389905434444, + "grad_norm": 451.6214599609375, + "learning_rate": 1.9939086463032453e-06, + "loss": 34.8438, + "step": 6772 + }, + { + "epoch": 0.06411336507605948, + "grad_norm": 3.453562021255493, + "learning_rate": 1.9939052670676506e-06, + "loss": 0.9565, + "step": 6773 + }, + { + "epoch": 0.06412283109777454, + "grad_norm": 664.9557495117188, + "learning_rate": 1.9939018868978498e-06, + "loss": 45.0156, + "step": 6774 + }, + { + "epoch": 0.06413229711948959, + "grad_norm": 432.971923828125, + "learning_rate": 1.9938985057938457e-06, + "loss": 10.3164, + "step": 6775 + }, + { + "epoch": 0.06414176314120465, + "grad_norm": 388.8166809082031, + "learning_rate": 1.9938951237556413e-06, + "loss": 28.1719, + "step": 6776 + }, + { + "epoch": 0.0641512291629197, + "grad_norm": 3.854192018508911, + "learning_rate": 1.9938917407832405e-06, + "loss": 0.9795, + "step": 6777 + }, + { + "epoch": 0.06416069518463476, + "grad_norm": 543.2747192382812, + "learning_rate": 1.9938883568766463e-06, + "loss": 38.4844, + "step": 6778 + }, + { + "epoch": 0.0641701612063498, + "grad_norm": 328.5213928222656, + "learning_rate": 1.9938849720358615e-06, + "loss": 22.4766, + "step": 6779 + }, + { + "epoch": 0.06417962722806486, + "grad_norm": 1238.9642333984375, + "learning_rate": 1.9938815862608895e-06, + "loss": 44.8672, + "step": 6780 + }, + { + "epoch": 0.06418909324977992, + "grad_norm": 186.721923828125, + "learning_rate": 1.9938781995517334e-06, + "loss": 16.9922, + "step": 6781 + }, + { + "epoch": 0.06419855927149497, + "grad_norm": 340.6730651855469, + "learning_rate": 1.9938748119083965e-06, + "loss": 15.9688, + "step": 6782 + }, + { + "epoch": 0.06420802529321003, + "grad_norm": 269.8280029296875, + "learning_rate": 1.993871423330882e-06, + "loss": 22.0391, + "step": 6783 + }, + { + "epoch": 0.06421749131492507, + "grad_norm": 440.0679931640625, + "learning_rate": 1.9938680338191927e-06, + "loss": 50.7188, + "step": 6784 + }, + { + "epoch": 0.06422695733664013, + "grad_norm": 367.6082763671875, + "learning_rate": 1.9938646433733326e-06, + "loss": 16.2383, + "step": 6785 + }, + { + "epoch": 0.06423642335835518, + "grad_norm": 508.3544616699219, + "learning_rate": 1.9938612519933043e-06, + "loss": 46.4531, + "step": 6786 + }, + { + "epoch": 0.06424588938007024, + "grad_norm": 250.93865966796875, + "learning_rate": 1.9938578596791106e-06, + "loss": 22.7031, + "step": 6787 + }, + { + "epoch": 0.06425535540178529, + "grad_norm": 514.9205322265625, + "learning_rate": 1.9938544664307555e-06, + "loss": 30.7031, + "step": 6788 + }, + { + "epoch": 0.06426482142350035, + "grad_norm": 518.8614501953125, + "learning_rate": 1.993851072248242e-06, + "loss": 36.5938, + "step": 6789 + }, + { + "epoch": 0.0642742874452154, + "grad_norm": 231.544921875, + "learning_rate": 1.993847677131573e-06, + "loss": 23.0625, + "step": 6790 + }, + { + "epoch": 0.06428375346693045, + "grad_norm": 418.1140441894531, + "learning_rate": 1.9938442810807518e-06, + "loss": 44.1719, + "step": 6791 + }, + { + "epoch": 0.06429321948864551, + "grad_norm": 200.3647003173828, + "learning_rate": 1.9938408840957816e-06, + "loss": 27.8125, + "step": 6792 + }, + { + "epoch": 0.06430268551036056, + "grad_norm": 263.9759521484375, + "learning_rate": 1.9938374861766658e-06, + "loss": 25.1328, + "step": 6793 + }, + { + "epoch": 0.06431215153207562, + "grad_norm": 3.02940034866333, + "learning_rate": 1.9938340873234073e-06, + "loss": 0.8872, + "step": 6794 + }, + { + "epoch": 0.06432161755379066, + "grad_norm": 246.05703735351562, + "learning_rate": 1.9938306875360096e-06, + "loss": 26.1562, + "step": 6795 + }, + { + "epoch": 0.06433108357550572, + "grad_norm": 3.80065655708313, + "learning_rate": 1.993827286814475e-06, + "loss": 0.9409, + "step": 6796 + }, + { + "epoch": 0.06434054959722077, + "grad_norm": 215.72813415527344, + "learning_rate": 1.993823885158808e-06, + "loss": 21.3047, + "step": 6797 + }, + { + "epoch": 0.06435001561893583, + "grad_norm": 264.819580078125, + "learning_rate": 1.993820482569011e-06, + "loss": 21.875, + "step": 6798 + }, + { + "epoch": 0.06435948164065089, + "grad_norm": 820.5552368164062, + "learning_rate": 1.993817079045088e-06, + "loss": 41.4766, + "step": 6799 + }, + { + "epoch": 0.06436894766236594, + "grad_norm": 3.3449745178222656, + "learning_rate": 1.993813674587041e-06, + "loss": 1.0513, + "step": 6800 + }, + { + "epoch": 0.064378413684081, + "grad_norm": 741.0602416992188, + "learning_rate": 1.9938102691948736e-06, + "loss": 43.5469, + "step": 6801 + }, + { + "epoch": 0.06438787970579604, + "grad_norm": 179.12844848632812, + "learning_rate": 1.99380686286859e-06, + "loss": 25.1094, + "step": 6802 + }, + { + "epoch": 0.0643973457275111, + "grad_norm": 476.2220764160156, + "learning_rate": 1.993803455608192e-06, + "loss": 34.7344, + "step": 6803 + }, + { + "epoch": 0.06440681174922615, + "grad_norm": 203.05166625976562, + "learning_rate": 1.9938000474136834e-06, + "loss": 27.0391, + "step": 6804 + }, + { + "epoch": 0.06441627777094121, + "grad_norm": 1141.520263671875, + "learning_rate": 1.9937966382850674e-06, + "loss": 14.7969, + "step": 6805 + }, + { + "epoch": 0.06442574379265625, + "grad_norm": 511.64569091796875, + "learning_rate": 1.9937932282223477e-06, + "loss": 25.0391, + "step": 6806 + }, + { + "epoch": 0.06443520981437131, + "grad_norm": 1415.902099609375, + "learning_rate": 1.9937898172255264e-06, + "loss": 53.1953, + "step": 6807 + }, + { + "epoch": 0.06444467583608637, + "grad_norm": 193.03933715820312, + "learning_rate": 1.993786405294608e-06, + "loss": 19.8125, + "step": 6808 + }, + { + "epoch": 0.06445414185780142, + "grad_norm": 437.5309143066406, + "learning_rate": 1.9937829924295944e-06, + "loss": 39.2656, + "step": 6809 + }, + { + "epoch": 0.06446360787951648, + "grad_norm": 561.9658203125, + "learning_rate": 1.9937795786304895e-06, + "loss": 58.5938, + "step": 6810 + }, + { + "epoch": 0.06447307390123153, + "grad_norm": 482.51116943359375, + "learning_rate": 1.9937761638972966e-06, + "loss": 27.4219, + "step": 6811 + }, + { + "epoch": 0.06448253992294659, + "grad_norm": 3.623176336288452, + "learning_rate": 1.9937727482300187e-06, + "loss": 0.8447, + "step": 6812 + }, + { + "epoch": 0.06449200594466163, + "grad_norm": 596.6619873046875, + "learning_rate": 1.993769331628659e-06, + "loss": 57.9219, + "step": 6813 + }, + { + "epoch": 0.06450147196637669, + "grad_norm": 583.9107666015625, + "learning_rate": 1.9937659140932207e-06, + "loss": 24.7734, + "step": 6814 + }, + { + "epoch": 0.06451093798809175, + "grad_norm": 3.422287702560425, + "learning_rate": 1.9937624956237075e-06, + "loss": 0.9395, + "step": 6815 + }, + { + "epoch": 0.0645204040098068, + "grad_norm": 439.77764892578125, + "learning_rate": 1.993759076220122e-06, + "loss": 17.8359, + "step": 6816 + }, + { + "epoch": 0.06452987003152186, + "grad_norm": 1175.5050048828125, + "learning_rate": 1.9937556558824673e-06, + "loss": 44.9375, + "step": 6817 + }, + { + "epoch": 0.0645393360532369, + "grad_norm": 3.1925745010375977, + "learning_rate": 1.993752234610747e-06, + "loss": 0.937, + "step": 6818 + }, + { + "epoch": 0.06454880207495196, + "grad_norm": 2.8572919368743896, + "learning_rate": 1.993748812404965e-06, + "loss": 0.9341, + "step": 6819 + }, + { + "epoch": 0.06455826809666701, + "grad_norm": 843.656982421875, + "learning_rate": 1.9937453892651233e-06, + "loss": 42.6094, + "step": 6820 + }, + { + "epoch": 0.06456773411838207, + "grad_norm": 386.87481689453125, + "learning_rate": 1.9937419651912255e-06, + "loss": 34.7969, + "step": 6821 + }, + { + "epoch": 0.06457720014009712, + "grad_norm": 508.1947021484375, + "learning_rate": 1.9937385401832748e-06, + "loss": 28.0469, + "step": 6822 + }, + { + "epoch": 0.06458666616181218, + "grad_norm": 847.3078002929688, + "learning_rate": 1.9937351142412746e-06, + "loss": 36.0312, + "step": 6823 + }, + { + "epoch": 0.06459613218352724, + "grad_norm": 599.9219970703125, + "learning_rate": 1.9937316873652283e-06, + "loss": 51.6094, + "step": 6824 + }, + { + "epoch": 0.06460559820524228, + "grad_norm": 369.3455505371094, + "learning_rate": 1.9937282595551384e-06, + "loss": 20.375, + "step": 6825 + }, + { + "epoch": 0.06461506422695734, + "grad_norm": 332.9977111816406, + "learning_rate": 1.993724830811009e-06, + "loss": 34.0859, + "step": 6826 + }, + { + "epoch": 0.06462453024867239, + "grad_norm": 993.9578247070312, + "learning_rate": 1.993721401132843e-06, + "loss": 27.9844, + "step": 6827 + }, + { + "epoch": 0.06463399627038745, + "grad_norm": 269.75994873046875, + "learning_rate": 1.993717970520643e-06, + "loss": 27.7812, + "step": 6828 + }, + { + "epoch": 0.0646434622921025, + "grad_norm": 396.85467529296875, + "learning_rate": 1.993714538974413e-06, + "loss": 29.625, + "step": 6829 + }, + { + "epoch": 0.06465292831381755, + "grad_norm": 248.7867889404297, + "learning_rate": 1.993711106494156e-06, + "loss": 26.0469, + "step": 6830 + }, + { + "epoch": 0.0646623943355326, + "grad_norm": 510.2934875488281, + "learning_rate": 1.993707673079875e-06, + "loss": 32.6172, + "step": 6831 + }, + { + "epoch": 0.06467186035724766, + "grad_norm": 331.2717590332031, + "learning_rate": 1.993704238731574e-06, + "loss": 31.1172, + "step": 6832 + }, + { + "epoch": 0.06468132637896272, + "grad_norm": 318.83172607421875, + "learning_rate": 1.993700803449255e-06, + "loss": 10.2461, + "step": 6833 + }, + { + "epoch": 0.06469079240067777, + "grad_norm": 415.1157531738281, + "learning_rate": 1.993697367232922e-06, + "loss": 27.6172, + "step": 6834 + }, + { + "epoch": 0.06470025842239283, + "grad_norm": 602.515869140625, + "learning_rate": 1.9936939300825784e-06, + "loss": 58.1719, + "step": 6835 + }, + { + "epoch": 0.06470972444410787, + "grad_norm": 251.56094360351562, + "learning_rate": 1.993690491998227e-06, + "loss": 21.1328, + "step": 6836 + }, + { + "epoch": 0.06471919046582293, + "grad_norm": 335.6182556152344, + "learning_rate": 1.993687052979871e-06, + "loss": 17.3398, + "step": 6837 + }, + { + "epoch": 0.06472865648753798, + "grad_norm": 805.90966796875, + "learning_rate": 1.9936836130275144e-06, + "loss": 57.0156, + "step": 6838 + }, + { + "epoch": 0.06473812250925304, + "grad_norm": 557.9126586914062, + "learning_rate": 1.9936801721411595e-06, + "loss": 20.9062, + "step": 6839 + }, + { + "epoch": 0.06474758853096808, + "grad_norm": 364.9588623046875, + "learning_rate": 1.9936767303208096e-06, + "loss": 21.3906, + "step": 6840 + }, + { + "epoch": 0.06475705455268314, + "grad_norm": 271.81390380859375, + "learning_rate": 1.9936732875664684e-06, + "loss": 21.0156, + "step": 6841 + }, + { + "epoch": 0.0647665205743982, + "grad_norm": 582.1041259765625, + "learning_rate": 1.9936698438781394e-06, + "loss": 21.0, + "step": 6842 + }, + { + "epoch": 0.06477598659611325, + "grad_norm": 309.1112365722656, + "learning_rate": 1.993666399255825e-06, + "loss": 19.5547, + "step": 6843 + }, + { + "epoch": 0.06478545261782831, + "grad_norm": 238.97169494628906, + "learning_rate": 1.9936629536995288e-06, + "loss": 18.2109, + "step": 6844 + }, + { + "epoch": 0.06479491863954336, + "grad_norm": 443.24517822265625, + "learning_rate": 1.9936595072092542e-06, + "loss": 25.625, + "step": 6845 + }, + { + "epoch": 0.06480438466125842, + "grad_norm": 990.4677734375, + "learning_rate": 1.993656059785004e-06, + "loss": 66.4531, + "step": 6846 + }, + { + "epoch": 0.06481385068297346, + "grad_norm": 310.2567443847656, + "learning_rate": 1.993652611426782e-06, + "loss": 20.4453, + "step": 6847 + }, + { + "epoch": 0.06482331670468852, + "grad_norm": 807.7911376953125, + "learning_rate": 1.993649162134591e-06, + "loss": 40.1094, + "step": 6848 + }, + { + "epoch": 0.06483278272640357, + "grad_norm": 295.058837890625, + "learning_rate": 1.9936457119084345e-06, + "loss": 24.4062, + "step": 6849 + }, + { + "epoch": 0.06484224874811863, + "grad_norm": 329.2454833984375, + "learning_rate": 1.993642260748316e-06, + "loss": 37.2656, + "step": 6850 + }, + { + "epoch": 0.06485171476983369, + "grad_norm": 1815.8370361328125, + "learning_rate": 1.993638808654238e-06, + "loss": 43.1719, + "step": 6851 + }, + { + "epoch": 0.06486118079154873, + "grad_norm": 186.4219207763672, + "learning_rate": 1.9936353556262046e-06, + "loss": 22.1562, + "step": 6852 + }, + { + "epoch": 0.0648706468132638, + "grad_norm": 210.10174560546875, + "learning_rate": 1.993631901664218e-06, + "loss": 20.8047, + "step": 6853 + }, + { + "epoch": 0.06488011283497884, + "grad_norm": 360.3117370605469, + "learning_rate": 1.9936284467682825e-06, + "loss": 22.6719, + "step": 6854 + }, + { + "epoch": 0.0648895788566939, + "grad_norm": 664.08056640625, + "learning_rate": 1.9936249909384008e-06, + "loss": 55.875, + "step": 6855 + }, + { + "epoch": 0.06489904487840895, + "grad_norm": 244.22320556640625, + "learning_rate": 1.9936215341745763e-06, + "loss": 24.0938, + "step": 6856 + }, + { + "epoch": 0.064908510900124, + "grad_norm": 387.0411071777344, + "learning_rate": 1.993618076476812e-06, + "loss": 16.293, + "step": 6857 + }, + { + "epoch": 0.06491797692183905, + "grad_norm": 270.5316467285156, + "learning_rate": 1.9936146178451117e-06, + "loss": 23.3281, + "step": 6858 + }, + { + "epoch": 0.06492744294355411, + "grad_norm": 534.1759033203125, + "learning_rate": 1.993611158279478e-06, + "loss": 32.6719, + "step": 6859 + }, + { + "epoch": 0.06493690896526917, + "grad_norm": 3.007585048675537, + "learning_rate": 1.9936076977799146e-06, + "loss": 0.9062, + "step": 6860 + }, + { + "epoch": 0.06494637498698422, + "grad_norm": 484.61279296875, + "learning_rate": 1.9936042363464246e-06, + "loss": 39.1562, + "step": 6861 + }, + { + "epoch": 0.06495584100869928, + "grad_norm": 189.550048828125, + "learning_rate": 1.993600773979011e-06, + "loss": 20.5625, + "step": 6862 + }, + { + "epoch": 0.06496530703041432, + "grad_norm": 401.8994445800781, + "learning_rate": 1.9935973106776777e-06, + "loss": 38.2188, + "step": 6863 + }, + { + "epoch": 0.06497477305212938, + "grad_norm": 236.42018127441406, + "learning_rate": 1.9935938464424275e-06, + "loss": 18.3984, + "step": 6864 + }, + { + "epoch": 0.06498423907384443, + "grad_norm": 622.1006469726562, + "learning_rate": 1.9935903812732634e-06, + "loss": 47.9062, + "step": 6865 + }, + { + "epoch": 0.06499370509555949, + "grad_norm": 1445.9755859375, + "learning_rate": 1.9935869151701888e-06, + "loss": 31.1719, + "step": 6866 + }, + { + "epoch": 0.06500317111727455, + "grad_norm": 881.0568237304688, + "learning_rate": 1.9935834481332077e-06, + "loss": 36.4141, + "step": 6867 + }, + { + "epoch": 0.0650126371389896, + "grad_norm": 305.6645812988281, + "learning_rate": 1.993579980162323e-06, + "loss": 27.6719, + "step": 6868 + }, + { + "epoch": 0.06502210316070466, + "grad_norm": 209.15638732910156, + "learning_rate": 1.9935765112575373e-06, + "loss": 18.7969, + "step": 6869 + }, + { + "epoch": 0.0650315691824197, + "grad_norm": 410.0610046386719, + "learning_rate": 1.993573041418854e-06, + "loss": 40.6562, + "step": 6870 + }, + { + "epoch": 0.06504103520413476, + "grad_norm": 345.05059814453125, + "learning_rate": 1.993569570646277e-06, + "loss": 22.0938, + "step": 6871 + }, + { + "epoch": 0.06505050122584981, + "grad_norm": 184.27740478515625, + "learning_rate": 1.9935660989398096e-06, + "loss": 19.0078, + "step": 6872 + }, + { + "epoch": 0.06505996724756487, + "grad_norm": 329.0332336425781, + "learning_rate": 1.993562626299454e-06, + "loss": 26.1328, + "step": 6873 + }, + { + "epoch": 0.06506943326927991, + "grad_norm": 3.336580276489258, + "learning_rate": 1.9935591527252147e-06, + "loss": 0.8638, + "step": 6874 + }, + { + "epoch": 0.06507889929099497, + "grad_norm": 300.8702087402344, + "learning_rate": 1.9935556782170943e-06, + "loss": 21.9688, + "step": 6875 + }, + { + "epoch": 0.06508836531271003, + "grad_norm": 764.0031127929688, + "learning_rate": 1.993552202775096e-06, + "loss": 46.9375, + "step": 6876 + }, + { + "epoch": 0.06509783133442508, + "grad_norm": 385.5001525878906, + "learning_rate": 1.9935487263992236e-06, + "loss": 22.9922, + "step": 6877 + }, + { + "epoch": 0.06510729735614014, + "grad_norm": 366.28759765625, + "learning_rate": 1.99354524908948e-06, + "loss": 26.4531, + "step": 6878 + }, + { + "epoch": 0.06511676337785519, + "grad_norm": 435.46136474609375, + "learning_rate": 1.9935417708458684e-06, + "loss": 34.9062, + "step": 6879 + }, + { + "epoch": 0.06512622939957025, + "grad_norm": 741.1604614257812, + "learning_rate": 1.993538291668392e-06, + "loss": 23.7734, + "step": 6880 + }, + { + "epoch": 0.06513569542128529, + "grad_norm": 627.2677001953125, + "learning_rate": 1.9935348115570544e-06, + "loss": 28.3125, + "step": 6881 + }, + { + "epoch": 0.06514516144300035, + "grad_norm": 247.19837951660156, + "learning_rate": 1.9935313305118586e-06, + "loss": 26.3594, + "step": 6882 + }, + { + "epoch": 0.0651546274647154, + "grad_norm": 551.6948852539062, + "learning_rate": 1.993527848532808e-06, + "loss": 37.9688, + "step": 6883 + }, + { + "epoch": 0.06516409348643046, + "grad_norm": 548.3753051757812, + "learning_rate": 1.993524365619906e-06, + "loss": 22.1172, + "step": 6884 + }, + { + "epoch": 0.06517355950814552, + "grad_norm": 723.0301513671875, + "learning_rate": 1.993520881773156e-06, + "loss": 57.3594, + "step": 6885 + }, + { + "epoch": 0.06518302552986056, + "grad_norm": 234.8803253173828, + "learning_rate": 1.9935173969925605e-06, + "loss": 20.2422, + "step": 6886 + }, + { + "epoch": 0.06519249155157562, + "grad_norm": 242.2017059326172, + "learning_rate": 1.9935139112781235e-06, + "loss": 30.5, + "step": 6887 + }, + { + "epoch": 0.06520195757329067, + "grad_norm": 461.2847900390625, + "learning_rate": 1.993510424629848e-06, + "loss": 34.7656, + "step": 6888 + }, + { + "epoch": 0.06521142359500573, + "grad_norm": 147.1361846923828, + "learning_rate": 1.993506937047737e-06, + "loss": 22.9844, + "step": 6889 + }, + { + "epoch": 0.06522088961672078, + "grad_norm": 349.9389343261719, + "learning_rate": 1.9935034485317947e-06, + "loss": 22.4805, + "step": 6890 + }, + { + "epoch": 0.06523035563843584, + "grad_norm": 323.0074157714844, + "learning_rate": 1.9934999590820233e-06, + "loss": 21.0781, + "step": 6891 + }, + { + "epoch": 0.06523982166015088, + "grad_norm": 325.5917053222656, + "learning_rate": 1.993496468698427e-06, + "loss": 33.7188, + "step": 6892 + }, + { + "epoch": 0.06524928768186594, + "grad_norm": 553.8457641601562, + "learning_rate": 1.9934929773810084e-06, + "loss": 19.6641, + "step": 6893 + }, + { + "epoch": 0.065258753703581, + "grad_norm": 301.255859375, + "learning_rate": 1.9934894851297713e-06, + "loss": 45.4688, + "step": 6894 + }, + { + "epoch": 0.06526821972529605, + "grad_norm": 543.5813598632812, + "learning_rate": 1.9934859919447185e-06, + "loss": 36.7188, + "step": 6895 + }, + { + "epoch": 0.06527768574701111, + "grad_norm": 3.636300563812256, + "learning_rate": 1.9934824978258537e-06, + "loss": 0.9507, + "step": 6896 + }, + { + "epoch": 0.06528715176872615, + "grad_norm": 381.4173889160156, + "learning_rate": 1.9934790027731796e-06, + "loss": 23.0469, + "step": 6897 + }, + { + "epoch": 0.06529661779044121, + "grad_norm": 435.506591796875, + "learning_rate": 1.9934755067867e-06, + "loss": 44.3203, + "step": 6898 + }, + { + "epoch": 0.06530608381215626, + "grad_norm": 251.72735595703125, + "learning_rate": 1.9934720098664178e-06, + "loss": 31.5156, + "step": 6899 + }, + { + "epoch": 0.06531554983387132, + "grad_norm": 237.38902282714844, + "learning_rate": 1.9934685120123372e-06, + "loss": 17.3828, + "step": 6900 + }, + { + "epoch": 0.06532501585558637, + "grad_norm": 319.1595764160156, + "learning_rate": 1.9934650132244604e-06, + "loss": 29.7891, + "step": 6901 + }, + { + "epoch": 0.06533448187730143, + "grad_norm": 297.1484680175781, + "learning_rate": 1.9934615135027912e-06, + "loss": 18.4922, + "step": 6902 + }, + { + "epoch": 0.06534394789901649, + "grad_norm": 782.753173828125, + "learning_rate": 1.993458012847333e-06, + "loss": 59.8438, + "step": 6903 + }, + { + "epoch": 0.06535341392073153, + "grad_norm": 249.27523803710938, + "learning_rate": 1.9934545112580883e-06, + "loss": 19.6484, + "step": 6904 + }, + { + "epoch": 0.06536287994244659, + "grad_norm": 3.2020819187164307, + "learning_rate": 1.9934510087350618e-06, + "loss": 0.8584, + "step": 6905 + }, + { + "epoch": 0.06537234596416164, + "grad_norm": 442.0157775878906, + "learning_rate": 1.9934475052782555e-06, + "loss": 37.7812, + "step": 6906 + }, + { + "epoch": 0.0653818119858767, + "grad_norm": 480.3522033691406, + "learning_rate": 1.9934440008876734e-06, + "loss": 20.9453, + "step": 6907 + }, + { + "epoch": 0.06539127800759174, + "grad_norm": 882.9713745117188, + "learning_rate": 1.993440495563318e-06, + "loss": 44.0312, + "step": 6908 + }, + { + "epoch": 0.0654007440293068, + "grad_norm": 266.6661682128906, + "learning_rate": 1.9934369893051937e-06, + "loss": 21.9688, + "step": 6909 + }, + { + "epoch": 0.06541021005102186, + "grad_norm": 428.4610595703125, + "learning_rate": 1.993433482113303e-06, + "loss": 35.4688, + "step": 6910 + }, + { + "epoch": 0.06541967607273691, + "grad_norm": 205.68333435058594, + "learning_rate": 1.9934299739876493e-06, + "loss": 21.2109, + "step": 6911 + }, + { + "epoch": 0.06542914209445197, + "grad_norm": 432.5670166015625, + "learning_rate": 1.9934264649282364e-06, + "loss": 21.3125, + "step": 6912 + }, + { + "epoch": 0.06543860811616702, + "grad_norm": 410.7072448730469, + "learning_rate": 1.9934229549350675e-06, + "loss": 60.6328, + "step": 6913 + }, + { + "epoch": 0.06544807413788208, + "grad_norm": 211.5836181640625, + "learning_rate": 1.993419444008145e-06, + "loss": 18.6719, + "step": 6914 + }, + { + "epoch": 0.06545754015959712, + "grad_norm": 391.7228698730469, + "learning_rate": 1.9934159321474736e-06, + "loss": 24.1016, + "step": 6915 + }, + { + "epoch": 0.06546700618131218, + "grad_norm": 156.01390075683594, + "learning_rate": 1.9934124193530553e-06, + "loss": 19.3281, + "step": 6916 + }, + { + "epoch": 0.06547647220302723, + "grad_norm": 391.4064636230469, + "learning_rate": 1.993408905624894e-06, + "loss": 24.6328, + "step": 6917 + }, + { + "epoch": 0.06548593822474229, + "grad_norm": 666.3419189453125, + "learning_rate": 1.993405390962993e-06, + "loss": 33.7188, + "step": 6918 + }, + { + "epoch": 0.06549540424645735, + "grad_norm": 865.1968994140625, + "learning_rate": 1.9934018753673556e-06, + "loss": 45.8906, + "step": 6919 + }, + { + "epoch": 0.0655048702681724, + "grad_norm": 625.274658203125, + "learning_rate": 1.993398358837985e-06, + "loss": 28.0273, + "step": 6920 + }, + { + "epoch": 0.06551433628988745, + "grad_norm": 247.78314208984375, + "learning_rate": 1.9933948413748846e-06, + "loss": 23.2578, + "step": 6921 + }, + { + "epoch": 0.0655238023116025, + "grad_norm": 285.0694274902344, + "learning_rate": 1.993391322978058e-06, + "loss": 21.5469, + "step": 6922 + }, + { + "epoch": 0.06553326833331756, + "grad_norm": 485.5765075683594, + "learning_rate": 1.9933878036475077e-06, + "loss": 49.0547, + "step": 6923 + }, + { + "epoch": 0.0655427343550326, + "grad_norm": 188.48890686035156, + "learning_rate": 1.9933842833832376e-06, + "loss": 19.875, + "step": 6924 + }, + { + "epoch": 0.06555220037674767, + "grad_norm": 672.9385986328125, + "learning_rate": 1.993380762185251e-06, + "loss": 49.0469, + "step": 6925 + }, + { + "epoch": 0.06556166639846271, + "grad_norm": 217.60626220703125, + "learning_rate": 1.993377240053551e-06, + "loss": 22.0547, + "step": 6926 + }, + { + "epoch": 0.06557113242017777, + "grad_norm": 673.9539794921875, + "learning_rate": 1.9933737169881412e-06, + "loss": 66.7969, + "step": 6927 + }, + { + "epoch": 0.06558059844189283, + "grad_norm": 433.0676574707031, + "learning_rate": 1.9933701929890246e-06, + "loss": 31.8438, + "step": 6928 + }, + { + "epoch": 0.06559006446360788, + "grad_norm": 394.1878662109375, + "learning_rate": 1.9933666680562045e-06, + "loss": 25.8672, + "step": 6929 + }, + { + "epoch": 0.06559953048532294, + "grad_norm": 407.73394775390625, + "learning_rate": 1.9933631421896847e-06, + "loss": 25.6172, + "step": 6930 + }, + { + "epoch": 0.06560899650703798, + "grad_norm": 344.2967529296875, + "learning_rate": 1.9933596153894683e-06, + "loss": 22.1641, + "step": 6931 + }, + { + "epoch": 0.06561846252875304, + "grad_norm": 212.59588623046875, + "learning_rate": 1.9933560876555577e-06, + "loss": 21.1094, + "step": 6932 + }, + { + "epoch": 0.06562792855046809, + "grad_norm": 472.0190734863281, + "learning_rate": 1.9933525589879575e-06, + "loss": 33.6016, + "step": 6933 + }, + { + "epoch": 0.06563739457218315, + "grad_norm": 345.7137756347656, + "learning_rate": 1.9933490293866705e-06, + "loss": 28.4844, + "step": 6934 + }, + { + "epoch": 0.0656468605938982, + "grad_norm": 388.1624450683594, + "learning_rate": 1.9933454988517e-06, + "loss": 22.0, + "step": 6935 + }, + { + "epoch": 0.06565632661561326, + "grad_norm": 257.78369140625, + "learning_rate": 1.9933419673830494e-06, + "loss": 23.2109, + "step": 6936 + }, + { + "epoch": 0.06566579263732832, + "grad_norm": 293.7497253417969, + "learning_rate": 1.993338434980722e-06, + "loss": 21.3828, + "step": 6937 + }, + { + "epoch": 0.06567525865904336, + "grad_norm": 372.8025207519531, + "learning_rate": 1.993334901644721e-06, + "loss": 30.1641, + "step": 6938 + }, + { + "epoch": 0.06568472468075842, + "grad_norm": 335.0274963378906, + "learning_rate": 1.9933313673750497e-06, + "loss": 24.3906, + "step": 6939 + }, + { + "epoch": 0.06569419070247347, + "grad_norm": 326.9030456542969, + "learning_rate": 1.9933278321717116e-06, + "loss": 34.625, + "step": 6940 + }, + { + "epoch": 0.06570365672418853, + "grad_norm": 962.4365844726562, + "learning_rate": 1.9933242960347102e-06, + "loss": 22.1953, + "step": 6941 + }, + { + "epoch": 0.06571312274590357, + "grad_norm": 987.5614624023438, + "learning_rate": 1.9933207589640486e-06, + "loss": 54.4922, + "step": 6942 + }, + { + "epoch": 0.06572258876761863, + "grad_norm": 419.3966979980469, + "learning_rate": 1.9933172209597297e-06, + "loss": 25.2969, + "step": 6943 + }, + { + "epoch": 0.06573205478933368, + "grad_norm": 3.0983834266662598, + "learning_rate": 1.9933136820217576e-06, + "loss": 0.9219, + "step": 6944 + }, + { + "epoch": 0.06574152081104874, + "grad_norm": 201.05279541015625, + "learning_rate": 1.993310142150135e-06, + "loss": 20.9141, + "step": 6945 + }, + { + "epoch": 0.0657509868327638, + "grad_norm": 1696.6710205078125, + "learning_rate": 1.993306601344866e-06, + "loss": 53.9688, + "step": 6946 + }, + { + "epoch": 0.06576045285447885, + "grad_norm": 362.0982666015625, + "learning_rate": 1.9933030596059528e-06, + "loss": 34.0938, + "step": 6947 + }, + { + "epoch": 0.0657699188761939, + "grad_norm": 148.44747924804688, + "learning_rate": 1.9932995169333994e-06, + "loss": 15.4219, + "step": 6948 + }, + { + "epoch": 0.06577938489790895, + "grad_norm": 515.4299926757812, + "learning_rate": 1.9932959733272094e-06, + "loss": 36.6328, + "step": 6949 + }, + { + "epoch": 0.06578885091962401, + "grad_norm": 540.5668334960938, + "learning_rate": 1.9932924287873854e-06, + "loss": 38.5859, + "step": 6950 + }, + { + "epoch": 0.06579831694133906, + "grad_norm": 621.9428100585938, + "learning_rate": 1.9932888833139312e-06, + "loss": 48.7969, + "step": 6951 + }, + { + "epoch": 0.06580778296305412, + "grad_norm": 308.141845703125, + "learning_rate": 1.9932853369068506e-06, + "loss": 28.9219, + "step": 6952 + }, + { + "epoch": 0.06581724898476918, + "grad_norm": 491.7665710449219, + "learning_rate": 1.9932817895661458e-06, + "loss": 51.2031, + "step": 6953 + }, + { + "epoch": 0.06582671500648422, + "grad_norm": 422.34124755859375, + "learning_rate": 1.9932782412918213e-06, + "loss": 28.375, + "step": 6954 + }, + { + "epoch": 0.06583618102819928, + "grad_norm": 665.733642578125, + "learning_rate": 1.9932746920838793e-06, + "loss": 63.4062, + "step": 6955 + }, + { + "epoch": 0.06584564704991433, + "grad_norm": 959.4545288085938, + "learning_rate": 1.993271141942324e-06, + "loss": 50.1406, + "step": 6956 + }, + { + "epoch": 0.06585511307162939, + "grad_norm": 432.76641845703125, + "learning_rate": 1.9932675908671585e-06, + "loss": 45.2344, + "step": 6957 + }, + { + "epoch": 0.06586457909334444, + "grad_norm": 312.65838623046875, + "learning_rate": 1.9932640388583856e-06, + "loss": 23.2812, + "step": 6958 + }, + { + "epoch": 0.0658740451150595, + "grad_norm": 690.0021362304688, + "learning_rate": 1.99326048591601e-06, + "loss": 45.4375, + "step": 6959 + }, + { + "epoch": 0.06588351113677454, + "grad_norm": 355.4925537109375, + "learning_rate": 1.9932569320400334e-06, + "loss": 26.7969, + "step": 6960 + }, + { + "epoch": 0.0658929771584896, + "grad_norm": 384.564453125, + "learning_rate": 1.99325337723046e-06, + "loss": 11.6758, + "step": 6961 + }, + { + "epoch": 0.06590244318020466, + "grad_norm": 877.8546142578125, + "learning_rate": 1.9932498214872934e-06, + "loss": 44.5781, + "step": 6962 + }, + { + "epoch": 0.06591190920191971, + "grad_norm": 519.3259887695312, + "learning_rate": 1.9932462648105363e-06, + "loss": 28.6719, + "step": 6963 + }, + { + "epoch": 0.06592137522363477, + "grad_norm": 211.10055541992188, + "learning_rate": 1.9932427072001927e-06, + "loss": 26.0781, + "step": 6964 + }, + { + "epoch": 0.06593084124534981, + "grad_norm": 267.9244689941406, + "learning_rate": 1.993239148656265e-06, + "loss": 23.7578, + "step": 6965 + }, + { + "epoch": 0.06594030726706487, + "grad_norm": 218.55584716796875, + "learning_rate": 1.9932355891787575e-06, + "loss": 21.3281, + "step": 6966 + }, + { + "epoch": 0.06594977328877992, + "grad_norm": 246.0401611328125, + "learning_rate": 1.993232028767673e-06, + "loss": 21.9688, + "step": 6967 + }, + { + "epoch": 0.06595923931049498, + "grad_norm": 912.5130004882812, + "learning_rate": 1.993228467423015e-06, + "loss": 56.1172, + "step": 6968 + }, + { + "epoch": 0.06596870533221003, + "grad_norm": 360.392333984375, + "learning_rate": 1.9932249051447866e-06, + "loss": 26.7812, + "step": 6969 + }, + { + "epoch": 0.06597817135392509, + "grad_norm": 449.2380676269531, + "learning_rate": 1.993221341932992e-06, + "loss": 19.543, + "step": 6970 + }, + { + "epoch": 0.06598763737564015, + "grad_norm": 640.5650024414062, + "learning_rate": 1.9932177777876337e-06, + "loss": 24.2578, + "step": 6971 + }, + { + "epoch": 0.06599710339735519, + "grad_norm": 285.7591552734375, + "learning_rate": 1.993214212708715e-06, + "loss": 27.8125, + "step": 6972 + }, + { + "epoch": 0.06600656941907025, + "grad_norm": 510.09771728515625, + "learning_rate": 1.99321064669624e-06, + "loss": 56.6094, + "step": 6973 + }, + { + "epoch": 0.0660160354407853, + "grad_norm": 347.7625732421875, + "learning_rate": 1.9932070797502113e-06, + "loss": 34.4141, + "step": 6974 + }, + { + "epoch": 0.06602550146250036, + "grad_norm": 369.2689514160156, + "learning_rate": 1.993203511870633e-06, + "loss": 26.9141, + "step": 6975 + }, + { + "epoch": 0.0660349674842154, + "grad_norm": 364.98651123046875, + "learning_rate": 1.9931999430575073e-06, + "loss": 18.2383, + "step": 6976 + }, + { + "epoch": 0.06604443350593046, + "grad_norm": 426.84625244140625, + "learning_rate": 1.9931963733108388e-06, + "loss": 49.1562, + "step": 6977 + }, + { + "epoch": 0.06605389952764551, + "grad_norm": 191.73077392578125, + "learning_rate": 1.99319280263063e-06, + "loss": 22.4922, + "step": 6978 + }, + { + "epoch": 0.06606336554936057, + "grad_norm": 807.384521484375, + "learning_rate": 1.993189231016885e-06, + "loss": 76.25, + "step": 6979 + }, + { + "epoch": 0.06607283157107563, + "grad_norm": 497.777099609375, + "learning_rate": 1.993185658469606e-06, + "loss": 40.4844, + "step": 6980 + }, + { + "epoch": 0.06608229759279068, + "grad_norm": 524.8546752929688, + "learning_rate": 1.993182084988798e-06, + "loss": 43.2031, + "step": 6981 + }, + { + "epoch": 0.06609176361450574, + "grad_norm": 488.0404968261719, + "learning_rate": 1.993178510574463e-06, + "loss": 54.4375, + "step": 6982 + }, + { + "epoch": 0.06610122963622078, + "grad_norm": 308.05523681640625, + "learning_rate": 1.9931749352266047e-06, + "loss": 22.9375, + "step": 6983 + }, + { + "epoch": 0.06611069565793584, + "grad_norm": 254.8191375732422, + "learning_rate": 1.993171358945227e-06, + "loss": 22.3359, + "step": 6984 + }, + { + "epoch": 0.06612016167965089, + "grad_norm": 236.36924743652344, + "learning_rate": 1.9931677817303327e-06, + "loss": 20.5391, + "step": 6985 + }, + { + "epoch": 0.06612962770136595, + "grad_norm": 349.7823791503906, + "learning_rate": 1.993164203581925e-06, + "loss": 11.6426, + "step": 6986 + }, + { + "epoch": 0.066139093723081, + "grad_norm": 150.77593994140625, + "learning_rate": 1.9931606245000075e-06, + "loss": 19.7734, + "step": 6987 + }, + { + "epoch": 0.06614855974479605, + "grad_norm": 494.4589538574219, + "learning_rate": 1.993157044484584e-06, + "loss": 55.6484, + "step": 6988 + }, + { + "epoch": 0.06615802576651111, + "grad_norm": 302.33331298828125, + "learning_rate": 1.9931534635356574e-06, + "loss": 21.0312, + "step": 6989 + }, + { + "epoch": 0.06616749178822616, + "grad_norm": 283.27392578125, + "learning_rate": 1.993149881653231e-06, + "loss": 23.3438, + "step": 6990 + }, + { + "epoch": 0.06617695780994122, + "grad_norm": 387.9388122558594, + "learning_rate": 1.993146298837308e-06, + "loss": 45.2188, + "step": 6991 + }, + { + "epoch": 0.06618642383165627, + "grad_norm": 454.2004089355469, + "learning_rate": 1.9931427150878924e-06, + "loss": 57.4375, + "step": 6992 + }, + { + "epoch": 0.06619588985337133, + "grad_norm": 604.1514282226562, + "learning_rate": 1.9931391304049875e-06, + "loss": 32.4062, + "step": 6993 + }, + { + "epoch": 0.06620535587508637, + "grad_norm": 617.65283203125, + "learning_rate": 1.9931355447885964e-06, + "loss": 55.5625, + "step": 6994 + }, + { + "epoch": 0.06621482189680143, + "grad_norm": 461.3884582519531, + "learning_rate": 1.9931319582387224e-06, + "loss": 23.4844, + "step": 6995 + }, + { + "epoch": 0.06622428791851649, + "grad_norm": 292.4773254394531, + "learning_rate": 1.993128370755369e-06, + "loss": 20.0703, + "step": 6996 + }, + { + "epoch": 0.06623375394023154, + "grad_norm": 518.6929321289062, + "learning_rate": 1.993124782338539e-06, + "loss": 39.5234, + "step": 6997 + }, + { + "epoch": 0.0662432199619466, + "grad_norm": 409.8071594238281, + "learning_rate": 1.993121192988237e-06, + "loss": 31.3125, + "step": 6998 + }, + { + "epoch": 0.06625268598366164, + "grad_norm": 575.6433715820312, + "learning_rate": 1.9931176027044656e-06, + "loss": 58.3125, + "step": 6999 + }, + { + "epoch": 0.0662621520053767, + "grad_norm": 190.38702392578125, + "learning_rate": 1.993114011487228e-06, + "loss": 20.8867, + "step": 7000 + }, + { + "epoch": 0.06627161802709175, + "grad_norm": 3.7568228244781494, + "learning_rate": 1.9931104193365277e-06, + "loss": 0.9053, + "step": 7001 + }, + { + "epoch": 0.06628108404880681, + "grad_norm": 302.6896057128906, + "learning_rate": 1.993106826252369e-06, + "loss": 25.1484, + "step": 7002 + }, + { + "epoch": 0.06629055007052186, + "grad_norm": 251.35060119628906, + "learning_rate": 1.993103232234754e-06, + "loss": 22.2656, + "step": 7003 + }, + { + "epoch": 0.06630001609223692, + "grad_norm": 537.5531616210938, + "learning_rate": 1.993099637283686e-06, + "loss": 35.7891, + "step": 7004 + }, + { + "epoch": 0.06630948211395198, + "grad_norm": 669.38818359375, + "learning_rate": 1.9930960413991697e-06, + "loss": 39.5156, + "step": 7005 + }, + { + "epoch": 0.06631894813566702, + "grad_norm": 265.6765441894531, + "learning_rate": 1.9930924445812075e-06, + "loss": 21.4609, + "step": 7006 + }, + { + "epoch": 0.06632841415738208, + "grad_norm": 587.4703979492188, + "learning_rate": 1.993088846829803e-06, + "loss": 32.1562, + "step": 7007 + }, + { + "epoch": 0.06633788017909713, + "grad_norm": 437.32659912109375, + "learning_rate": 1.9930852481449594e-06, + "loss": 22.0, + "step": 7008 + }, + { + "epoch": 0.06634734620081219, + "grad_norm": 375.4875793457031, + "learning_rate": 1.9930816485266804e-06, + "loss": 18.2656, + "step": 7009 + }, + { + "epoch": 0.06635681222252723, + "grad_norm": 279.2256164550781, + "learning_rate": 1.9930780479749693e-06, + "loss": 38.6953, + "step": 7010 + }, + { + "epoch": 0.0663662782442423, + "grad_norm": 294.8179626464844, + "learning_rate": 1.9930744464898293e-06, + "loss": 11.1641, + "step": 7011 + }, + { + "epoch": 0.06637574426595734, + "grad_norm": 278.2580871582031, + "learning_rate": 1.9930708440712644e-06, + "loss": 27.7891, + "step": 7012 + }, + { + "epoch": 0.0663852102876724, + "grad_norm": 688.4192504882812, + "learning_rate": 1.993067240719277e-06, + "loss": 50.8594, + "step": 7013 + }, + { + "epoch": 0.06639467630938746, + "grad_norm": 257.1264953613281, + "learning_rate": 1.9930636364338713e-06, + "loss": 20.6719, + "step": 7014 + }, + { + "epoch": 0.0664041423311025, + "grad_norm": 389.9971923828125, + "learning_rate": 1.9930600312150502e-06, + "loss": 29.8125, + "step": 7015 + }, + { + "epoch": 0.06641360835281757, + "grad_norm": 377.6248474121094, + "learning_rate": 1.9930564250628175e-06, + "loss": 50.6094, + "step": 7016 + }, + { + "epoch": 0.06642307437453261, + "grad_norm": 617.25634765625, + "learning_rate": 1.993052817977176e-06, + "loss": 29.625, + "step": 7017 + }, + { + "epoch": 0.06643254039624767, + "grad_norm": 274.4179382324219, + "learning_rate": 1.9930492099581297e-06, + "loss": 42.6562, + "step": 7018 + }, + { + "epoch": 0.06644200641796272, + "grad_norm": 1021.3731689453125, + "learning_rate": 1.9930456010056814e-06, + "loss": 22.6992, + "step": 7019 + }, + { + "epoch": 0.06645147243967778, + "grad_norm": 301.7784423828125, + "learning_rate": 1.9930419911198354e-06, + "loss": 25.6172, + "step": 7020 + }, + { + "epoch": 0.06646093846139282, + "grad_norm": 253.46226501464844, + "learning_rate": 1.9930383803005943e-06, + "loss": 17.4141, + "step": 7021 + }, + { + "epoch": 0.06647040448310788, + "grad_norm": 1057.086181640625, + "learning_rate": 1.9930347685479614e-06, + "loss": 28.1602, + "step": 7022 + }, + { + "epoch": 0.06647987050482294, + "grad_norm": 724.939208984375, + "learning_rate": 1.993031155861941e-06, + "loss": 31.9219, + "step": 7023 + }, + { + "epoch": 0.06648933652653799, + "grad_norm": 501.06048583984375, + "learning_rate": 1.9930275422425356e-06, + "loss": 45.7188, + "step": 7024 + }, + { + "epoch": 0.06649880254825305, + "grad_norm": 489.389404296875, + "learning_rate": 1.993023927689749e-06, + "loss": 48.4375, + "step": 7025 + }, + { + "epoch": 0.0665082685699681, + "grad_norm": 3.8224358558654785, + "learning_rate": 1.993020312203584e-06, + "loss": 1.0225, + "step": 7026 + }, + { + "epoch": 0.06651773459168316, + "grad_norm": 569.6651611328125, + "learning_rate": 1.9930166957840454e-06, + "loss": 56.3906, + "step": 7027 + }, + { + "epoch": 0.0665272006133982, + "grad_norm": 430.2263488769531, + "learning_rate": 1.993013078431135e-06, + "loss": 19.7734, + "step": 7028 + }, + { + "epoch": 0.06653666663511326, + "grad_norm": 962.0816040039062, + "learning_rate": 1.9930094601448573e-06, + "loss": 36.6953, + "step": 7029 + }, + { + "epoch": 0.06654613265682831, + "grad_norm": 427.25726318359375, + "learning_rate": 1.993005840925215e-06, + "loss": 30.7422, + "step": 7030 + }, + { + "epoch": 0.06655559867854337, + "grad_norm": 292.204345703125, + "learning_rate": 1.9930022207722122e-06, + "loss": 33.2695, + "step": 7031 + }, + { + "epoch": 0.06656506470025843, + "grad_norm": 904.6544799804688, + "learning_rate": 1.992998599685852e-06, + "loss": 63.2188, + "step": 7032 + }, + { + "epoch": 0.06657453072197347, + "grad_norm": 444.1575927734375, + "learning_rate": 1.992994977666137e-06, + "loss": 25.3438, + "step": 7033 + }, + { + "epoch": 0.06658399674368853, + "grad_norm": 1465.41064453125, + "learning_rate": 1.9929913547130717e-06, + "loss": 35.1641, + "step": 7034 + }, + { + "epoch": 0.06659346276540358, + "grad_norm": 3.2639920711517334, + "learning_rate": 1.9929877308266592e-06, + "loss": 1.0557, + "step": 7035 + }, + { + "epoch": 0.06660292878711864, + "grad_norm": 564.2757568359375, + "learning_rate": 1.9929841060069024e-06, + "loss": 35.8828, + "step": 7036 + }, + { + "epoch": 0.06661239480883369, + "grad_norm": 202.95999145507812, + "learning_rate": 1.9929804802538055e-06, + "loss": 21.0156, + "step": 7037 + }, + { + "epoch": 0.06662186083054875, + "grad_norm": 228.28411865234375, + "learning_rate": 1.992976853567372e-06, + "loss": 18.5078, + "step": 7038 + }, + { + "epoch": 0.0666313268522638, + "grad_norm": 825.8689575195312, + "learning_rate": 1.992973225947604e-06, + "loss": 64.4375, + "step": 7039 + }, + { + "epoch": 0.06664079287397885, + "grad_norm": 208.26629638671875, + "learning_rate": 1.992969597394507e-06, + "loss": 23.875, + "step": 7040 + }, + { + "epoch": 0.06665025889569391, + "grad_norm": 764.2674560546875, + "learning_rate": 1.9929659679080817e-06, + "loss": 47.9766, + "step": 7041 + }, + { + "epoch": 0.06665972491740896, + "grad_norm": 407.3411865234375, + "learning_rate": 1.992962337488334e-06, + "loss": 30.4453, + "step": 7042 + }, + { + "epoch": 0.06666919093912402, + "grad_norm": 507.7070007324219, + "learning_rate": 1.992958706135266e-06, + "loss": 56.4375, + "step": 7043 + }, + { + "epoch": 0.06667865696083906, + "grad_norm": 372.09124755859375, + "learning_rate": 1.992955073848881e-06, + "loss": 29.1953, + "step": 7044 + }, + { + "epoch": 0.06668812298255412, + "grad_norm": 849.21875, + "learning_rate": 1.9929514406291835e-06, + "loss": 42.5, + "step": 7045 + }, + { + "epoch": 0.06669758900426917, + "grad_norm": 603.02587890625, + "learning_rate": 1.9929478064761757e-06, + "loss": 48.3125, + "step": 7046 + }, + { + "epoch": 0.06670705502598423, + "grad_norm": 407.5218200683594, + "learning_rate": 1.992944171389862e-06, + "loss": 65.7344, + "step": 7047 + }, + { + "epoch": 0.06671652104769929, + "grad_norm": 708.0255737304688, + "learning_rate": 1.992940535370245e-06, + "loss": 51.9688, + "step": 7048 + }, + { + "epoch": 0.06672598706941434, + "grad_norm": 2.991180896759033, + "learning_rate": 1.9929368984173287e-06, + "loss": 0.7224, + "step": 7049 + }, + { + "epoch": 0.0667354530911294, + "grad_norm": 2.900747537612915, + "learning_rate": 1.992933260531116e-06, + "loss": 1.0098, + "step": 7050 + }, + { + "epoch": 0.06674491911284444, + "grad_norm": 160.05116271972656, + "learning_rate": 1.9929296217116112e-06, + "loss": 18.4766, + "step": 7051 + }, + { + "epoch": 0.0667543851345595, + "grad_norm": 366.960693359375, + "learning_rate": 1.992925981958817e-06, + "loss": 23.1406, + "step": 7052 + }, + { + "epoch": 0.06676385115627455, + "grad_norm": 187.15057373046875, + "learning_rate": 1.9929223412727366e-06, + "loss": 24.7969, + "step": 7053 + }, + { + "epoch": 0.06677331717798961, + "grad_norm": 226.2719268798828, + "learning_rate": 1.992918699653374e-06, + "loss": 25.3828, + "step": 7054 + }, + { + "epoch": 0.06678278319970465, + "grad_norm": 322.85626220703125, + "learning_rate": 1.9929150571007324e-06, + "loss": 32.8281, + "step": 7055 + }, + { + "epoch": 0.06679224922141971, + "grad_norm": 702.88623046875, + "learning_rate": 1.9929114136148153e-06, + "loss": 60.7031, + "step": 7056 + }, + { + "epoch": 0.06680171524313477, + "grad_norm": 381.409423828125, + "learning_rate": 1.9929077691956258e-06, + "loss": 37.0781, + "step": 7057 + }, + { + "epoch": 0.06681118126484982, + "grad_norm": 383.01165771484375, + "learning_rate": 1.9929041238431678e-06, + "loss": 31.375, + "step": 7058 + }, + { + "epoch": 0.06682064728656488, + "grad_norm": 289.4263610839844, + "learning_rate": 1.9929004775574446e-06, + "loss": 32.875, + "step": 7059 + }, + { + "epoch": 0.06683011330827993, + "grad_norm": 293.668701171875, + "learning_rate": 1.9928968303384593e-06, + "loss": 14.9883, + "step": 7060 + }, + { + "epoch": 0.06683957932999499, + "grad_norm": 249.4683837890625, + "learning_rate": 1.992893182186216e-06, + "loss": 21.6172, + "step": 7061 + }, + { + "epoch": 0.06684904535171003, + "grad_norm": 166.2999267578125, + "learning_rate": 1.9928895331007174e-06, + "loss": 20.4219, + "step": 7062 + }, + { + "epoch": 0.06685851137342509, + "grad_norm": 282.2184753417969, + "learning_rate": 1.992885883081967e-06, + "loss": 22.0391, + "step": 7063 + }, + { + "epoch": 0.06686797739514014, + "grad_norm": 385.16790771484375, + "learning_rate": 1.992882232129969e-06, + "loss": 29.2656, + "step": 7064 + }, + { + "epoch": 0.0668774434168552, + "grad_norm": 520.1531372070312, + "learning_rate": 1.9928785802447263e-06, + "loss": 29.0625, + "step": 7065 + }, + { + "epoch": 0.06688690943857026, + "grad_norm": 1207.67431640625, + "learning_rate": 1.9928749274262416e-06, + "loss": 72.3281, + "step": 7066 + }, + { + "epoch": 0.0668963754602853, + "grad_norm": 181.7728271484375, + "learning_rate": 1.9928712736745197e-06, + "loss": 18.0234, + "step": 7067 + }, + { + "epoch": 0.06690584148200036, + "grad_norm": 626.3968505859375, + "learning_rate": 1.992867618989563e-06, + "loss": 22.375, + "step": 7068 + }, + { + "epoch": 0.06691530750371541, + "grad_norm": 245.33677673339844, + "learning_rate": 1.992863963371376e-06, + "loss": 22.9922, + "step": 7069 + }, + { + "epoch": 0.06692477352543047, + "grad_norm": 877.3176879882812, + "learning_rate": 1.9928603068199607e-06, + "loss": 36.7344, + "step": 7070 + }, + { + "epoch": 0.06693423954714552, + "grad_norm": 309.2678527832031, + "learning_rate": 1.9928566493353215e-06, + "loss": 29.9062, + "step": 7071 + }, + { + "epoch": 0.06694370556886058, + "grad_norm": 229.0395050048828, + "learning_rate": 1.992852990917462e-06, + "loss": 21.1484, + "step": 7072 + }, + { + "epoch": 0.06695317159057562, + "grad_norm": 980.53271484375, + "learning_rate": 1.992849331566385e-06, + "loss": 25.6562, + "step": 7073 + }, + { + "epoch": 0.06696263761229068, + "grad_norm": 831.7014770507812, + "learning_rate": 1.992845671282094e-06, + "loss": 67.8906, + "step": 7074 + }, + { + "epoch": 0.06697210363400574, + "grad_norm": 483.8699035644531, + "learning_rate": 1.992842010064593e-06, + "loss": 45.9062, + "step": 7075 + }, + { + "epoch": 0.06698156965572079, + "grad_norm": 212.92527770996094, + "learning_rate": 1.992838347913885e-06, + "loss": 23.9062, + "step": 7076 + }, + { + "epoch": 0.06699103567743585, + "grad_norm": 241.62460327148438, + "learning_rate": 1.9928346848299735e-06, + "loss": 19.9922, + "step": 7077 + }, + { + "epoch": 0.0670005016991509, + "grad_norm": 220.1125946044922, + "learning_rate": 1.992831020812862e-06, + "loss": 16.4922, + "step": 7078 + }, + { + "epoch": 0.06700996772086595, + "grad_norm": 540.07275390625, + "learning_rate": 1.992827355862554e-06, + "loss": 66.9219, + "step": 7079 + }, + { + "epoch": 0.067019433742581, + "grad_norm": 571.005859375, + "learning_rate": 1.992823689979053e-06, + "loss": 33.25, + "step": 7080 + }, + { + "epoch": 0.06702889976429606, + "grad_norm": 545.5924072265625, + "learning_rate": 1.992820023162362e-06, + "loss": 53.7031, + "step": 7081 + }, + { + "epoch": 0.06703836578601112, + "grad_norm": 301.46270751953125, + "learning_rate": 1.9928163554124846e-06, + "loss": 26.0, + "step": 7082 + }, + { + "epoch": 0.06704783180772617, + "grad_norm": 423.6510314941406, + "learning_rate": 1.9928126867294252e-06, + "loss": 43.6562, + "step": 7083 + }, + { + "epoch": 0.06705729782944123, + "grad_norm": 278.29559326171875, + "learning_rate": 1.992809017113186e-06, + "loss": 22.5625, + "step": 7084 + }, + { + "epoch": 0.06706676385115627, + "grad_norm": 289.5505065917969, + "learning_rate": 1.9928053465637706e-06, + "loss": 30.6484, + "step": 7085 + }, + { + "epoch": 0.06707622987287133, + "grad_norm": 1359.153564453125, + "learning_rate": 1.992801675081183e-06, + "loss": 29.2656, + "step": 7086 + }, + { + "epoch": 0.06708569589458638, + "grad_norm": 370.3383483886719, + "learning_rate": 1.9927980026654265e-06, + "loss": 36.3281, + "step": 7087 + }, + { + "epoch": 0.06709516191630144, + "grad_norm": 3.362656354904175, + "learning_rate": 1.9927943293165043e-06, + "loss": 0.9775, + "step": 7088 + }, + { + "epoch": 0.06710462793801648, + "grad_norm": 275.8749694824219, + "learning_rate": 1.9927906550344203e-06, + "loss": 35.1562, + "step": 7089 + }, + { + "epoch": 0.06711409395973154, + "grad_norm": 411.8811950683594, + "learning_rate": 1.9927869798191775e-06, + "loss": 38.2812, + "step": 7090 + }, + { + "epoch": 0.0671235599814466, + "grad_norm": 655.517333984375, + "learning_rate": 1.9927833036707797e-06, + "loss": 47.8828, + "step": 7091 + }, + { + "epoch": 0.06713302600316165, + "grad_norm": 229.78744506835938, + "learning_rate": 1.99277962658923e-06, + "loss": 26.1719, + "step": 7092 + }, + { + "epoch": 0.06714249202487671, + "grad_norm": 3.9210939407348633, + "learning_rate": 1.9927759485745316e-06, + "loss": 1.0649, + "step": 7093 + }, + { + "epoch": 0.06715195804659176, + "grad_norm": 740.2672119140625, + "learning_rate": 1.992772269626689e-06, + "loss": 38.75, + "step": 7094 + }, + { + "epoch": 0.06716142406830682, + "grad_norm": 190.00653076171875, + "learning_rate": 1.992768589745705e-06, + "loss": 23.3281, + "step": 7095 + }, + { + "epoch": 0.06717089009002186, + "grad_norm": 503.8719787597656, + "learning_rate": 1.992764908931583e-06, + "loss": 29.1875, + "step": 7096 + }, + { + "epoch": 0.06718035611173692, + "grad_norm": 430.98345947265625, + "learning_rate": 1.9927612271843265e-06, + "loss": 29.1797, + "step": 7097 + }, + { + "epoch": 0.06718982213345197, + "grad_norm": 433.6294250488281, + "learning_rate": 1.9927575445039392e-06, + "loss": 51.2812, + "step": 7098 + }, + { + "epoch": 0.06719928815516703, + "grad_norm": 2088.318115234375, + "learning_rate": 1.992753860890424e-06, + "loss": 25.5, + "step": 7099 + }, + { + "epoch": 0.06720875417688209, + "grad_norm": 212.83609008789062, + "learning_rate": 1.992750176343785e-06, + "loss": 21.4453, + "step": 7100 + }, + { + "epoch": 0.06721822019859713, + "grad_norm": 302.50738525390625, + "learning_rate": 1.9927464908640256e-06, + "loss": 21.2969, + "step": 7101 + }, + { + "epoch": 0.0672276862203122, + "grad_norm": 387.2843017578125, + "learning_rate": 1.992742804451149e-06, + "loss": 24.043, + "step": 7102 + }, + { + "epoch": 0.06723715224202724, + "grad_norm": 282.415283203125, + "learning_rate": 1.9927391171051586e-06, + "loss": 20.0352, + "step": 7103 + }, + { + "epoch": 0.0672466182637423, + "grad_norm": 1269.8505859375, + "learning_rate": 1.9927354288260585e-06, + "loss": 39.5156, + "step": 7104 + }, + { + "epoch": 0.06725608428545735, + "grad_norm": 228.5485382080078, + "learning_rate": 1.992731739613851e-06, + "loss": 15.7578, + "step": 7105 + }, + { + "epoch": 0.0672655503071724, + "grad_norm": 312.9607238769531, + "learning_rate": 1.9927280494685405e-06, + "loss": 26.4453, + "step": 7106 + }, + { + "epoch": 0.06727501632888745, + "grad_norm": 2.8089849948883057, + "learning_rate": 1.9927243583901303e-06, + "loss": 0.8828, + "step": 7107 + }, + { + "epoch": 0.06728448235060251, + "grad_norm": 3.514019250869751, + "learning_rate": 1.9927206663786235e-06, + "loss": 0.9204, + "step": 7108 + }, + { + "epoch": 0.06729394837231757, + "grad_norm": 347.05035400390625, + "learning_rate": 1.9927169734340244e-06, + "loss": 29.1875, + "step": 7109 + }, + { + "epoch": 0.06730341439403262, + "grad_norm": 572.0265502929688, + "learning_rate": 1.9927132795563355e-06, + "loss": 20.4609, + "step": 7110 + }, + { + "epoch": 0.06731288041574768, + "grad_norm": 846.1424560546875, + "learning_rate": 1.992709584745561e-06, + "loss": 38.2109, + "step": 7111 + }, + { + "epoch": 0.06732234643746272, + "grad_norm": 235.14089965820312, + "learning_rate": 1.9927058890017035e-06, + "loss": 17.0117, + "step": 7112 + }, + { + "epoch": 0.06733181245917778, + "grad_norm": 803.0020751953125, + "learning_rate": 1.9927021923247676e-06, + "loss": 38.2656, + "step": 7113 + }, + { + "epoch": 0.06734127848089283, + "grad_norm": 781.7423095703125, + "learning_rate": 1.992698494714756e-06, + "loss": 45.6094, + "step": 7114 + }, + { + "epoch": 0.06735074450260789, + "grad_norm": 372.0160217285156, + "learning_rate": 1.9926947961716726e-06, + "loss": 25.1797, + "step": 7115 + }, + { + "epoch": 0.06736021052432294, + "grad_norm": 777.1068115234375, + "learning_rate": 1.9926910966955202e-06, + "loss": 21.3906, + "step": 7116 + }, + { + "epoch": 0.067369676546038, + "grad_norm": 255.51487731933594, + "learning_rate": 1.9926873962863035e-06, + "loss": 19.1094, + "step": 7117 + }, + { + "epoch": 0.06737914256775306, + "grad_norm": 359.28350830078125, + "learning_rate": 1.9926836949440245e-06, + "loss": 24.5625, + "step": 7118 + }, + { + "epoch": 0.0673886085894681, + "grad_norm": 582.5042114257812, + "learning_rate": 1.992679992668688e-06, + "loss": 42.9219, + "step": 7119 + }, + { + "epoch": 0.06739807461118316, + "grad_norm": 533.2239379882812, + "learning_rate": 1.992676289460296e-06, + "loss": 30.7422, + "step": 7120 + }, + { + "epoch": 0.06740754063289821, + "grad_norm": 4.044207572937012, + "learning_rate": 1.9926725853188537e-06, + "loss": 0.8438, + "step": 7121 + }, + { + "epoch": 0.06741700665461327, + "grad_norm": 517.273193359375, + "learning_rate": 1.9926688802443633e-06, + "loss": 52.6719, + "step": 7122 + }, + { + "epoch": 0.06742647267632831, + "grad_norm": 232.94125366210938, + "learning_rate": 1.992665174236829e-06, + "loss": 24.875, + "step": 7123 + }, + { + "epoch": 0.06743593869804337, + "grad_norm": 194.4345703125, + "learning_rate": 1.992661467296254e-06, + "loss": 20.125, + "step": 7124 + }, + { + "epoch": 0.06744540471975843, + "grad_norm": 807.0479125976562, + "learning_rate": 1.9926577594226417e-06, + "loss": 45.6328, + "step": 7125 + }, + { + "epoch": 0.06745487074147348, + "grad_norm": 483.4369812011719, + "learning_rate": 1.9926540506159958e-06, + "loss": 15.3906, + "step": 7126 + }, + { + "epoch": 0.06746433676318854, + "grad_norm": 829.7083129882812, + "learning_rate": 1.9926503408763194e-06, + "loss": 49.2188, + "step": 7127 + }, + { + "epoch": 0.06747380278490359, + "grad_norm": 757.771240234375, + "learning_rate": 1.9926466302036163e-06, + "loss": 38.6758, + "step": 7128 + }, + { + "epoch": 0.06748326880661865, + "grad_norm": 221.250244140625, + "learning_rate": 1.9926429185978903e-06, + "loss": 20.0625, + "step": 7129 + }, + { + "epoch": 0.06749273482833369, + "grad_norm": 540.7989501953125, + "learning_rate": 1.9926392060591444e-06, + "loss": 52.1094, + "step": 7130 + }, + { + "epoch": 0.06750220085004875, + "grad_norm": 609.892333984375, + "learning_rate": 1.992635492587382e-06, + "loss": 44.3281, + "step": 7131 + }, + { + "epoch": 0.0675116668717638, + "grad_norm": 210.81375122070312, + "learning_rate": 1.992631778182607e-06, + "loss": 9.3203, + "step": 7132 + }, + { + "epoch": 0.06752113289347886, + "grad_norm": 683.7275390625, + "learning_rate": 1.9926280628448223e-06, + "loss": 52.4375, + "step": 7133 + }, + { + "epoch": 0.06753059891519392, + "grad_norm": 638.9544067382812, + "learning_rate": 1.9926243465740323e-06, + "loss": 23.4062, + "step": 7134 + }, + { + "epoch": 0.06754006493690896, + "grad_norm": 364.5428161621094, + "learning_rate": 1.9926206293702398e-06, + "loss": 14.2891, + "step": 7135 + }, + { + "epoch": 0.06754953095862402, + "grad_norm": 3.409395694732666, + "learning_rate": 1.9926169112334485e-06, + "loss": 0.8457, + "step": 7136 + }, + { + "epoch": 0.06755899698033907, + "grad_norm": 1235.0084228515625, + "learning_rate": 1.9926131921636618e-06, + "loss": 62.8594, + "step": 7137 + }, + { + "epoch": 0.06756846300205413, + "grad_norm": 361.5188903808594, + "learning_rate": 1.9926094721608836e-06, + "loss": 38.0781, + "step": 7138 + }, + { + "epoch": 0.06757792902376918, + "grad_norm": 371.7429504394531, + "learning_rate": 1.9926057512251168e-06, + "loss": 13.4531, + "step": 7139 + }, + { + "epoch": 0.06758739504548424, + "grad_norm": 380.7269287109375, + "learning_rate": 1.992602029356365e-06, + "loss": 21.6484, + "step": 7140 + }, + { + "epoch": 0.06759686106719928, + "grad_norm": 403.5377197265625, + "learning_rate": 1.9925983065546323e-06, + "loss": 40.3281, + "step": 7141 + }, + { + "epoch": 0.06760632708891434, + "grad_norm": 1022.5817260742188, + "learning_rate": 1.9925945828199214e-06, + "loss": 25.8359, + "step": 7142 + }, + { + "epoch": 0.0676157931106294, + "grad_norm": 239.7090301513672, + "learning_rate": 1.9925908581522362e-06, + "loss": 20.3203, + "step": 7143 + }, + { + "epoch": 0.06762525913234445, + "grad_norm": 526.4724731445312, + "learning_rate": 1.9925871325515803e-06, + "loss": 32.5234, + "step": 7144 + }, + { + "epoch": 0.06763472515405951, + "grad_norm": 578.18505859375, + "learning_rate": 1.992583406017957e-06, + "loss": 36.0781, + "step": 7145 + }, + { + "epoch": 0.06764419117577455, + "grad_norm": 435.55755615234375, + "learning_rate": 1.99257967855137e-06, + "loss": 27.6719, + "step": 7146 + }, + { + "epoch": 0.06765365719748961, + "grad_norm": 372.4281311035156, + "learning_rate": 1.9925759501518226e-06, + "loss": 42.8594, + "step": 7147 + }, + { + "epoch": 0.06766312321920466, + "grad_norm": 614.924072265625, + "learning_rate": 1.9925722208193183e-06, + "loss": 53.125, + "step": 7148 + }, + { + "epoch": 0.06767258924091972, + "grad_norm": 465.9676513671875, + "learning_rate": 1.9925684905538605e-06, + "loss": 40.6094, + "step": 7149 + }, + { + "epoch": 0.06768205526263477, + "grad_norm": 388.4211730957031, + "learning_rate": 1.9925647593554535e-06, + "loss": 19.7266, + "step": 7150 + }, + { + "epoch": 0.06769152128434983, + "grad_norm": 314.0950622558594, + "learning_rate": 1.9925610272240995e-06, + "loss": 21.0938, + "step": 7151 + }, + { + "epoch": 0.06770098730606489, + "grad_norm": 461.9523010253906, + "learning_rate": 1.9925572941598034e-06, + "loss": 16.5586, + "step": 7152 + }, + { + "epoch": 0.06771045332777993, + "grad_norm": 289.6257019042969, + "learning_rate": 1.9925535601625675e-06, + "loss": 23.4062, + "step": 7153 + }, + { + "epoch": 0.06771991934949499, + "grad_norm": 874.1072998046875, + "learning_rate": 1.992549825232396e-06, + "loss": 35.7734, + "step": 7154 + }, + { + "epoch": 0.06772938537121004, + "grad_norm": 371.47662353515625, + "learning_rate": 1.992546089369292e-06, + "loss": 24.0625, + "step": 7155 + }, + { + "epoch": 0.0677388513929251, + "grad_norm": 577.498779296875, + "learning_rate": 1.9925423525732595e-06, + "loss": 44.4531, + "step": 7156 + }, + { + "epoch": 0.06774831741464014, + "grad_norm": 392.4335021972656, + "learning_rate": 1.9925386148443018e-06, + "loss": 33.7891, + "step": 7157 + }, + { + "epoch": 0.0677577834363552, + "grad_norm": 275.5910949707031, + "learning_rate": 1.992534876182422e-06, + "loss": 19.3359, + "step": 7158 + }, + { + "epoch": 0.06776724945807025, + "grad_norm": 422.3395690917969, + "learning_rate": 1.9925311365876243e-06, + "loss": 32.5312, + "step": 7159 + }, + { + "epoch": 0.06777671547978531, + "grad_norm": 261.5928649902344, + "learning_rate": 1.9925273960599117e-06, + "loss": 23.0703, + "step": 7160 + }, + { + "epoch": 0.06778618150150037, + "grad_norm": 623.2176513671875, + "learning_rate": 1.992523654599288e-06, + "loss": 54.2812, + "step": 7161 + }, + { + "epoch": 0.06779564752321542, + "grad_norm": 333.0947570800781, + "learning_rate": 1.992519912205757e-06, + "loss": 21.0625, + "step": 7162 + }, + { + "epoch": 0.06780511354493048, + "grad_norm": 267.3924255371094, + "learning_rate": 1.9925161688793216e-06, + "loss": 19.9375, + "step": 7163 + }, + { + "epoch": 0.06781457956664552, + "grad_norm": 558.1334228515625, + "learning_rate": 1.9925124246199855e-06, + "loss": 39.3438, + "step": 7164 + }, + { + "epoch": 0.06782404558836058, + "grad_norm": 952.8624267578125, + "learning_rate": 1.9925086794277523e-06, + "loss": 38.1953, + "step": 7165 + }, + { + "epoch": 0.06783351161007563, + "grad_norm": 3.462919235229492, + "learning_rate": 1.992504933302626e-06, + "loss": 0.8179, + "step": 7166 + }, + { + "epoch": 0.06784297763179069, + "grad_norm": 3.3259918689727783, + "learning_rate": 1.9925011862446086e-06, + "loss": 0.9321, + "step": 7167 + }, + { + "epoch": 0.06785244365350575, + "grad_norm": 254.92205810546875, + "learning_rate": 1.9924974382537052e-06, + "loss": 29.75, + "step": 7168 + }, + { + "epoch": 0.0678619096752208, + "grad_norm": 368.19134521484375, + "learning_rate": 1.9924936893299188e-06, + "loss": 23.1484, + "step": 7169 + }, + { + "epoch": 0.06787137569693585, + "grad_norm": 571.3197631835938, + "learning_rate": 1.992489939473253e-06, + "loss": 43.0156, + "step": 7170 + }, + { + "epoch": 0.0678808417186509, + "grad_norm": 419.21783447265625, + "learning_rate": 1.992486188683711e-06, + "loss": 30.75, + "step": 7171 + }, + { + "epoch": 0.06789030774036596, + "grad_norm": 3.515363931655884, + "learning_rate": 1.9924824369612966e-06, + "loss": 0.8833, + "step": 7172 + }, + { + "epoch": 0.067899773762081, + "grad_norm": 275.60418701171875, + "learning_rate": 1.9924786843060133e-06, + "loss": 17.3672, + "step": 7173 + }, + { + "epoch": 0.06790923978379607, + "grad_norm": 670.97021484375, + "learning_rate": 1.9924749307178645e-06, + "loss": 50.6797, + "step": 7174 + }, + { + "epoch": 0.06791870580551111, + "grad_norm": 358.71697998046875, + "learning_rate": 1.992471176196854e-06, + "loss": 22.9062, + "step": 7175 + }, + { + "epoch": 0.06792817182722617, + "grad_norm": 1319.7027587890625, + "learning_rate": 1.992467420742985e-06, + "loss": 42.7031, + "step": 7176 + }, + { + "epoch": 0.06793763784894123, + "grad_norm": 201.94937133789062, + "learning_rate": 1.9924636643562615e-06, + "loss": 26.375, + "step": 7177 + }, + { + "epoch": 0.06794710387065628, + "grad_norm": 525.1111450195312, + "learning_rate": 1.992459907036686e-06, + "loss": 38.1875, + "step": 7178 + }, + { + "epoch": 0.06795656989237134, + "grad_norm": 249.53758239746094, + "learning_rate": 1.9924561487842637e-06, + "loss": 26.0625, + "step": 7179 + }, + { + "epoch": 0.06796603591408638, + "grad_norm": 318.7208557128906, + "learning_rate": 1.9924523895989967e-06, + "loss": 22.0625, + "step": 7180 + }, + { + "epoch": 0.06797550193580144, + "grad_norm": 3.792858839035034, + "learning_rate": 1.9924486294808893e-06, + "loss": 0.8296, + "step": 7181 + }, + { + "epoch": 0.06798496795751649, + "grad_norm": 497.7381896972656, + "learning_rate": 1.992444868429944e-06, + "loss": 47.6797, + "step": 7182 + }, + { + "epoch": 0.06799443397923155, + "grad_norm": 726.7936401367188, + "learning_rate": 1.9924411064461657e-06, + "loss": 57.7812, + "step": 7183 + }, + { + "epoch": 0.0680039000009466, + "grad_norm": 464.4272766113281, + "learning_rate": 1.992437343529557e-06, + "loss": 38.2969, + "step": 7184 + }, + { + "epoch": 0.06801336602266166, + "grad_norm": 920.5757446289062, + "learning_rate": 1.9924335796801223e-06, + "loss": 39.6875, + "step": 7185 + }, + { + "epoch": 0.06802283204437672, + "grad_norm": 572.5682983398438, + "learning_rate": 1.9924298148978644e-06, + "loss": 48.5, + "step": 7186 + }, + { + "epoch": 0.06803229806609176, + "grad_norm": 248.76382446289062, + "learning_rate": 1.992426049182787e-06, + "loss": 28.6562, + "step": 7187 + }, + { + "epoch": 0.06804176408780682, + "grad_norm": 796.7581176757812, + "learning_rate": 1.9924222825348936e-06, + "loss": 23.3672, + "step": 7188 + }, + { + "epoch": 0.06805123010952187, + "grad_norm": 244.17369079589844, + "learning_rate": 1.9924185149541875e-06, + "loss": 24.7969, + "step": 7189 + }, + { + "epoch": 0.06806069613123693, + "grad_norm": 288.3608703613281, + "learning_rate": 1.9924147464406732e-06, + "loss": 19.8828, + "step": 7190 + }, + { + "epoch": 0.06807016215295197, + "grad_norm": 454.4216003417969, + "learning_rate": 1.992410976994353e-06, + "loss": 32.1875, + "step": 7191 + }, + { + "epoch": 0.06807962817466703, + "grad_norm": 224.9293670654297, + "learning_rate": 1.9924072066152316e-06, + "loss": 21.375, + "step": 7192 + }, + { + "epoch": 0.06808909419638208, + "grad_norm": 346.10150146484375, + "learning_rate": 1.9924034353033115e-06, + "loss": 22.4531, + "step": 7193 + }, + { + "epoch": 0.06809856021809714, + "grad_norm": 205.3007049560547, + "learning_rate": 1.992399663058597e-06, + "loss": 26.75, + "step": 7194 + }, + { + "epoch": 0.0681080262398122, + "grad_norm": 1220.62451171875, + "learning_rate": 1.9923958898810913e-06, + "loss": 54.4219, + "step": 7195 + }, + { + "epoch": 0.06811749226152725, + "grad_norm": 644.1692504882812, + "learning_rate": 1.992392115770798e-06, + "loss": 28.8281, + "step": 7196 + }, + { + "epoch": 0.0681269582832423, + "grad_norm": 223.60032653808594, + "learning_rate": 1.9923883407277206e-06, + "loss": 16.6797, + "step": 7197 + }, + { + "epoch": 0.06813642430495735, + "grad_norm": 220.91775512695312, + "learning_rate": 1.992384564751863e-06, + "loss": 18.9766, + "step": 7198 + }, + { + "epoch": 0.06814589032667241, + "grad_norm": 177.74273681640625, + "learning_rate": 1.992380787843228e-06, + "loss": 20.6953, + "step": 7199 + }, + { + "epoch": 0.06815535634838746, + "grad_norm": 3.635741710662842, + "learning_rate": 1.99237701000182e-06, + "loss": 1.1011, + "step": 7200 + }, + { + "epoch": 0.06816482237010252, + "grad_norm": 207.12049865722656, + "learning_rate": 1.992373231227642e-06, + "loss": 19.0469, + "step": 7201 + }, + { + "epoch": 0.06817428839181756, + "grad_norm": 211.16168212890625, + "learning_rate": 1.992369451520698e-06, + "loss": 20.5156, + "step": 7202 + }, + { + "epoch": 0.06818375441353262, + "grad_norm": 446.1520690917969, + "learning_rate": 1.992365670880991e-06, + "loss": 16.1836, + "step": 7203 + }, + { + "epoch": 0.06819322043524768, + "grad_norm": 292.8475646972656, + "learning_rate": 1.992361889308525e-06, + "loss": 20.375, + "step": 7204 + }, + { + "epoch": 0.06820268645696273, + "grad_norm": 403.8297424316406, + "learning_rate": 1.9923581068033032e-06, + "loss": 46.1719, + "step": 7205 + }, + { + "epoch": 0.06821215247867779, + "grad_norm": 488.4284973144531, + "learning_rate": 1.9923543233653293e-06, + "loss": 45.2031, + "step": 7206 + }, + { + "epoch": 0.06822161850039284, + "grad_norm": 703.1409301757812, + "learning_rate": 1.992350538994607e-06, + "loss": 17.5039, + "step": 7207 + }, + { + "epoch": 0.0682310845221079, + "grad_norm": 315.4235534667969, + "learning_rate": 1.9923467536911395e-06, + "loss": 12.1426, + "step": 7208 + }, + { + "epoch": 0.06824055054382294, + "grad_norm": 241.82199096679688, + "learning_rate": 1.992342967454931e-06, + "loss": 23.5781, + "step": 7209 + }, + { + "epoch": 0.068250016565538, + "grad_norm": 490.59698486328125, + "learning_rate": 1.9923391802859844e-06, + "loss": 65.707, + "step": 7210 + }, + { + "epoch": 0.06825948258725306, + "grad_norm": 618.9129028320312, + "learning_rate": 1.992335392184304e-06, + "loss": 25.5625, + "step": 7211 + }, + { + "epoch": 0.06826894860896811, + "grad_norm": 549.33154296875, + "learning_rate": 1.992331603149892e-06, + "loss": 35.1797, + "step": 7212 + }, + { + "epoch": 0.06827841463068317, + "grad_norm": 2.9776875972747803, + "learning_rate": 1.992327813182754e-06, + "loss": 0.9795, + "step": 7213 + }, + { + "epoch": 0.06828788065239821, + "grad_norm": 465.29010009765625, + "learning_rate": 1.9923240222828913e-06, + "loss": 31.0859, + "step": 7214 + }, + { + "epoch": 0.06829734667411327, + "grad_norm": 164.41929626464844, + "learning_rate": 1.9923202304503093e-06, + "loss": 22.1406, + "step": 7215 + }, + { + "epoch": 0.06830681269582832, + "grad_norm": 421.6223449707031, + "learning_rate": 1.9923164376850106e-06, + "loss": 42.9609, + "step": 7216 + }, + { + "epoch": 0.06831627871754338, + "grad_norm": 405.4000244140625, + "learning_rate": 1.992312643986999e-06, + "loss": 63.7812, + "step": 7217 + }, + { + "epoch": 0.06832574473925843, + "grad_norm": 171.373046875, + "learning_rate": 1.992308849356278e-06, + "loss": 23.4375, + "step": 7218 + }, + { + "epoch": 0.06833521076097349, + "grad_norm": 632.029052734375, + "learning_rate": 1.9923050537928516e-06, + "loss": 42.9922, + "step": 7219 + }, + { + "epoch": 0.06834467678268855, + "grad_norm": 1888.36865234375, + "learning_rate": 1.9923012572967226e-06, + "loss": 40.9062, + "step": 7220 + }, + { + "epoch": 0.06835414280440359, + "grad_norm": 930.0281982421875, + "learning_rate": 1.992297459867895e-06, + "loss": 50.6406, + "step": 7221 + }, + { + "epoch": 0.06836360882611865, + "grad_norm": 1469.2366943359375, + "learning_rate": 1.9922936615063725e-06, + "loss": 53.0234, + "step": 7222 + }, + { + "epoch": 0.0683730748478337, + "grad_norm": 532.34033203125, + "learning_rate": 1.9922898622121583e-06, + "loss": 48.6875, + "step": 7223 + }, + { + "epoch": 0.06838254086954876, + "grad_norm": 376.48193359375, + "learning_rate": 1.9922860619852566e-06, + "loss": 42.9375, + "step": 7224 + }, + { + "epoch": 0.0683920068912638, + "grad_norm": 173.93328857421875, + "learning_rate": 1.9922822608256703e-06, + "loss": 14.8203, + "step": 7225 + }, + { + "epoch": 0.06840147291297886, + "grad_norm": 309.63671875, + "learning_rate": 1.992278458733403e-06, + "loss": 26.7031, + "step": 7226 + }, + { + "epoch": 0.06841093893469391, + "grad_norm": 258.81341552734375, + "learning_rate": 1.992274655708459e-06, + "loss": 20.0156, + "step": 7227 + }, + { + "epoch": 0.06842040495640897, + "grad_norm": 273.595947265625, + "learning_rate": 1.9922708517508407e-06, + "loss": 22.9766, + "step": 7228 + }, + { + "epoch": 0.06842987097812403, + "grad_norm": 263.84368896484375, + "learning_rate": 1.9922670468605525e-06, + "loss": 22.6484, + "step": 7229 + }, + { + "epoch": 0.06843933699983908, + "grad_norm": 256.5375671386719, + "learning_rate": 1.9922632410375984e-06, + "loss": 13.9766, + "step": 7230 + }, + { + "epoch": 0.06844880302155414, + "grad_norm": 1566.0938720703125, + "learning_rate": 1.992259434281981e-06, + "loss": 23.4375, + "step": 7231 + }, + { + "epoch": 0.06845826904326918, + "grad_norm": 571.6846313476562, + "learning_rate": 1.9922556265937046e-06, + "loss": 48.25, + "step": 7232 + }, + { + "epoch": 0.06846773506498424, + "grad_norm": 349.6444396972656, + "learning_rate": 1.992251817972772e-06, + "loss": 43.4219, + "step": 7233 + }, + { + "epoch": 0.06847720108669929, + "grad_norm": 376.4237060546875, + "learning_rate": 1.9922480084191875e-06, + "loss": 17.6602, + "step": 7234 + }, + { + "epoch": 0.06848666710841435, + "grad_norm": 323.5259704589844, + "learning_rate": 1.992244197932954e-06, + "loss": 34.1875, + "step": 7235 + }, + { + "epoch": 0.0684961331301294, + "grad_norm": 1007.5625610351562, + "learning_rate": 1.992240386514076e-06, + "loss": 61.1484, + "step": 7236 + }, + { + "epoch": 0.06850559915184445, + "grad_norm": 1125.7484130859375, + "learning_rate": 1.9922365741625567e-06, + "loss": 41.1406, + "step": 7237 + }, + { + "epoch": 0.06851506517355951, + "grad_norm": 478.22235107421875, + "learning_rate": 1.9922327608783994e-06, + "loss": 43.5, + "step": 7238 + }, + { + "epoch": 0.06852453119527456, + "grad_norm": 620.0689086914062, + "learning_rate": 1.9922289466616076e-06, + "loss": 58.3125, + "step": 7239 + }, + { + "epoch": 0.06853399721698962, + "grad_norm": 166.89840698242188, + "learning_rate": 1.9922251315121855e-06, + "loss": 19.5312, + "step": 7240 + }, + { + "epoch": 0.06854346323870467, + "grad_norm": 243.13241577148438, + "learning_rate": 1.992221315430136e-06, + "loss": 18.0391, + "step": 7241 + }, + { + "epoch": 0.06855292926041973, + "grad_norm": 329.12158203125, + "learning_rate": 1.9922174984154632e-06, + "loss": 27.2188, + "step": 7242 + }, + { + "epoch": 0.06856239528213477, + "grad_norm": 3.7341501712799072, + "learning_rate": 1.9922136804681706e-06, + "loss": 1.0103, + "step": 7243 + }, + { + "epoch": 0.06857186130384983, + "grad_norm": 142.6222381591797, + "learning_rate": 1.9922098615882613e-06, + "loss": 21.0391, + "step": 7244 + }, + { + "epoch": 0.06858132732556488, + "grad_norm": 323.0128479003906, + "learning_rate": 1.9922060417757394e-06, + "loss": 24.4297, + "step": 7245 + }, + { + "epoch": 0.06859079334727994, + "grad_norm": 1174.357666015625, + "learning_rate": 1.9922022210306085e-06, + "loss": 41.8906, + "step": 7246 + }, + { + "epoch": 0.068600259368995, + "grad_norm": 502.02655029296875, + "learning_rate": 1.992198399352872e-06, + "loss": 30.1641, + "step": 7247 + }, + { + "epoch": 0.06860972539071004, + "grad_norm": 713.5994873046875, + "learning_rate": 1.992194576742534e-06, + "loss": 47.9219, + "step": 7248 + }, + { + "epoch": 0.0686191914124251, + "grad_norm": 529.8552856445312, + "learning_rate": 1.9921907531995974e-06, + "loss": 56.0625, + "step": 7249 + }, + { + "epoch": 0.06862865743414015, + "grad_norm": 256.2414855957031, + "learning_rate": 1.992186928724066e-06, + "loss": 24.25, + "step": 7250 + }, + { + "epoch": 0.06863812345585521, + "grad_norm": 343.6942443847656, + "learning_rate": 1.9921831033159433e-06, + "loss": 24.0781, + "step": 7251 + }, + { + "epoch": 0.06864758947757026, + "grad_norm": 788.3477783203125, + "learning_rate": 1.9921792769752326e-06, + "loss": 40.3906, + "step": 7252 + }, + { + "epoch": 0.06865705549928532, + "grad_norm": 244.6634063720703, + "learning_rate": 1.9921754497019387e-06, + "loss": 23.0, + "step": 7253 + }, + { + "epoch": 0.06866652152100038, + "grad_norm": 590.38330078125, + "learning_rate": 1.9921716214960636e-06, + "loss": 36.2031, + "step": 7254 + }, + { + "epoch": 0.06867598754271542, + "grad_norm": 857.8365478515625, + "learning_rate": 1.9921677923576125e-06, + "loss": 69.6562, + "step": 7255 + }, + { + "epoch": 0.06868545356443048, + "grad_norm": 245.41925048828125, + "learning_rate": 1.9921639622865878e-06, + "loss": 22.9141, + "step": 7256 + }, + { + "epoch": 0.06869491958614553, + "grad_norm": 631.0693969726562, + "learning_rate": 1.9921601312829938e-06, + "loss": 59.5234, + "step": 7257 + }, + { + "epoch": 0.06870438560786059, + "grad_norm": 273.6990966796875, + "learning_rate": 1.992156299346834e-06, + "loss": 36.5781, + "step": 7258 + }, + { + "epoch": 0.06871385162957563, + "grad_norm": 469.0268859863281, + "learning_rate": 1.992152466478111e-06, + "loss": 21.3125, + "step": 7259 + }, + { + "epoch": 0.0687233176512907, + "grad_norm": 390.9794616699219, + "learning_rate": 1.99214863267683e-06, + "loss": 23.0312, + "step": 7260 + }, + { + "epoch": 0.06873278367300574, + "grad_norm": 209.4972381591797, + "learning_rate": 1.9921447979429934e-06, + "loss": 22.0312, + "step": 7261 + }, + { + "epoch": 0.0687422496947208, + "grad_norm": 234.5009307861328, + "learning_rate": 1.9921409622766055e-06, + "loss": 16.4219, + "step": 7262 + }, + { + "epoch": 0.06875171571643586, + "grad_norm": 200.12310791015625, + "learning_rate": 1.9921371256776694e-06, + "loss": 17.4688, + "step": 7263 + }, + { + "epoch": 0.0687611817381509, + "grad_norm": 255.74696350097656, + "learning_rate": 1.992133288146189e-06, + "loss": 18.0391, + "step": 7264 + }, + { + "epoch": 0.06877064775986597, + "grad_norm": 376.3258361816406, + "learning_rate": 1.992129449682168e-06, + "loss": 24.0156, + "step": 7265 + }, + { + "epoch": 0.06878011378158101, + "grad_norm": 342.81121826171875, + "learning_rate": 1.99212561028561e-06, + "loss": 36.2188, + "step": 7266 + }, + { + "epoch": 0.06878957980329607, + "grad_norm": 1063.738037109375, + "learning_rate": 1.992121769956518e-06, + "loss": 37.2891, + "step": 7267 + }, + { + "epoch": 0.06879904582501112, + "grad_norm": 777.8397827148438, + "learning_rate": 1.9921179286948963e-06, + "loss": 56.6406, + "step": 7268 + }, + { + "epoch": 0.06880851184672618, + "grad_norm": 503.3398742675781, + "learning_rate": 1.9921140865007485e-06, + "loss": 49.9219, + "step": 7269 + }, + { + "epoch": 0.06881797786844122, + "grad_norm": 3.282646417617798, + "learning_rate": 1.9921102433740776e-06, + "loss": 0.8208, + "step": 7270 + }, + { + "epoch": 0.06882744389015628, + "grad_norm": 334.0233459472656, + "learning_rate": 1.992106399314888e-06, + "loss": 18.5469, + "step": 7271 + }, + { + "epoch": 0.06883690991187134, + "grad_norm": 459.3161926269531, + "learning_rate": 1.992102554323183e-06, + "loss": 47.1875, + "step": 7272 + }, + { + "epoch": 0.06884637593358639, + "grad_norm": 2.9949421882629395, + "learning_rate": 1.9920987083989656e-06, + "loss": 0.8508, + "step": 7273 + }, + { + "epoch": 0.06885584195530145, + "grad_norm": 255.99403381347656, + "learning_rate": 1.9920948615422403e-06, + "loss": 25.9844, + "step": 7274 + }, + { + "epoch": 0.0688653079770165, + "grad_norm": 410.89166259765625, + "learning_rate": 1.9920910137530105e-06, + "loss": 38.5547, + "step": 7275 + }, + { + "epoch": 0.06887477399873156, + "grad_norm": 557.7354125976562, + "learning_rate": 1.9920871650312795e-06, + "loss": 44.2344, + "step": 7276 + }, + { + "epoch": 0.0688842400204466, + "grad_norm": 209.406494140625, + "learning_rate": 1.992083315377051e-06, + "loss": 16.6562, + "step": 7277 + }, + { + "epoch": 0.06889370604216166, + "grad_norm": 503.18731689453125, + "learning_rate": 1.9920794647903287e-06, + "loss": 14.5312, + "step": 7278 + }, + { + "epoch": 0.06890317206387671, + "grad_norm": 825.7523193359375, + "learning_rate": 1.992075613271116e-06, + "loss": 53.8125, + "step": 7279 + }, + { + "epoch": 0.06891263808559177, + "grad_norm": 535.4503784179688, + "learning_rate": 1.9920717608194176e-06, + "loss": 49.8281, + "step": 7280 + }, + { + "epoch": 0.06892210410730683, + "grad_norm": 379.4809265136719, + "learning_rate": 1.9920679074352357e-06, + "loss": 29.3867, + "step": 7281 + }, + { + "epoch": 0.06893157012902187, + "grad_norm": 181.70538330078125, + "learning_rate": 1.9920640531185748e-06, + "loss": 16.7344, + "step": 7282 + }, + { + "epoch": 0.06894103615073693, + "grad_norm": 1178.43408203125, + "learning_rate": 1.992060197869438e-06, + "loss": 60.3516, + "step": 7283 + }, + { + "epoch": 0.06895050217245198, + "grad_norm": 3.7183051109313965, + "learning_rate": 1.992056341687829e-06, + "loss": 1.0669, + "step": 7284 + }, + { + "epoch": 0.06895996819416704, + "grad_norm": 506.26953125, + "learning_rate": 1.9920524845737517e-06, + "loss": 38.2031, + "step": 7285 + }, + { + "epoch": 0.06896943421588209, + "grad_norm": 494.969482421875, + "learning_rate": 1.99204862652721e-06, + "loss": 19.6055, + "step": 7286 + }, + { + "epoch": 0.06897890023759715, + "grad_norm": 319.4105529785156, + "learning_rate": 1.9920447675482065e-06, + "loss": 28.9688, + "step": 7287 + }, + { + "epoch": 0.06898836625931219, + "grad_norm": 231.82077026367188, + "learning_rate": 1.992040907636746e-06, + "loss": 24.4219, + "step": 7288 + }, + { + "epoch": 0.06899783228102725, + "grad_norm": 538.8790893554688, + "learning_rate": 1.9920370467928314e-06, + "loss": 30.6641, + "step": 7289 + }, + { + "epoch": 0.06900729830274231, + "grad_norm": 1028.642822265625, + "learning_rate": 1.9920331850164665e-06, + "loss": 45.9062, + "step": 7290 + }, + { + "epoch": 0.06901676432445736, + "grad_norm": 361.4853820800781, + "learning_rate": 1.9920293223076547e-06, + "loss": 28.9844, + "step": 7291 + }, + { + "epoch": 0.06902623034617242, + "grad_norm": 3.672696590423584, + "learning_rate": 1.9920254586664e-06, + "loss": 1.1592, + "step": 7292 + }, + { + "epoch": 0.06903569636788746, + "grad_norm": 728.8442993164062, + "learning_rate": 1.9920215940927064e-06, + "loss": 62.4219, + "step": 7293 + }, + { + "epoch": 0.06904516238960252, + "grad_norm": 187.85328674316406, + "learning_rate": 1.9920177285865763e-06, + "loss": 23.9141, + "step": 7294 + }, + { + "epoch": 0.06905462841131757, + "grad_norm": 835.5132446289062, + "learning_rate": 1.9920138621480146e-06, + "loss": 51.3672, + "step": 7295 + }, + { + "epoch": 0.06906409443303263, + "grad_norm": 881.5437622070312, + "learning_rate": 1.992009994777024e-06, + "loss": 57.2969, + "step": 7296 + }, + { + "epoch": 0.06907356045474769, + "grad_norm": 1980.6649169921875, + "learning_rate": 1.992006126473609e-06, + "loss": 81.0898, + "step": 7297 + }, + { + "epoch": 0.06908302647646274, + "grad_norm": 777.2039794921875, + "learning_rate": 1.9920022572377725e-06, + "loss": 42.0703, + "step": 7298 + }, + { + "epoch": 0.0690924924981778, + "grad_norm": 341.6648254394531, + "learning_rate": 1.9919983870695185e-06, + "loss": 26.5234, + "step": 7299 + }, + { + "epoch": 0.06910195851989284, + "grad_norm": 410.0570373535156, + "learning_rate": 1.99199451596885e-06, + "loss": 31.5156, + "step": 7300 + }, + { + "epoch": 0.0691114245416079, + "grad_norm": 2.887174367904663, + "learning_rate": 1.9919906439357717e-06, + "loss": 0.8521, + "step": 7301 + }, + { + "epoch": 0.06912089056332295, + "grad_norm": 982.573974609375, + "learning_rate": 1.991986770970287e-06, + "loss": 49.3906, + "step": 7302 + }, + { + "epoch": 0.06913035658503801, + "grad_norm": 207.35841369628906, + "learning_rate": 1.9919828970723987e-06, + "loss": 18.0625, + "step": 7303 + }, + { + "epoch": 0.06913982260675305, + "grad_norm": 389.7633361816406, + "learning_rate": 1.9919790222421113e-06, + "loss": 21.1719, + "step": 7304 + }, + { + "epoch": 0.06914928862846811, + "grad_norm": 350.7929992675781, + "learning_rate": 1.9919751464794283e-06, + "loss": 24.3672, + "step": 7305 + }, + { + "epoch": 0.06915875465018317, + "grad_norm": 404.984375, + "learning_rate": 1.991971269784353e-06, + "loss": 37.5156, + "step": 7306 + }, + { + "epoch": 0.06916822067189822, + "grad_norm": 208.63970947265625, + "learning_rate": 1.991967392156889e-06, + "loss": 20.8906, + "step": 7307 + }, + { + "epoch": 0.06917768669361328, + "grad_norm": 430.841064453125, + "learning_rate": 1.9919635135970403e-06, + "loss": 23.6562, + "step": 7308 + }, + { + "epoch": 0.06918715271532833, + "grad_norm": 564.0575561523438, + "learning_rate": 1.9919596341048106e-06, + "loss": 16.5938, + "step": 7309 + }, + { + "epoch": 0.06919661873704339, + "grad_norm": 489.51654052734375, + "learning_rate": 1.9919557536802035e-06, + "loss": 25.4531, + "step": 7310 + }, + { + "epoch": 0.06920608475875843, + "grad_norm": 641.5396118164062, + "learning_rate": 1.9919518723232223e-06, + "loss": 50.9844, + "step": 7311 + }, + { + "epoch": 0.06921555078047349, + "grad_norm": 1757.1131591796875, + "learning_rate": 1.9919479900338713e-06, + "loss": 58.3359, + "step": 7312 + }, + { + "epoch": 0.06922501680218854, + "grad_norm": 540.346923828125, + "learning_rate": 1.991944106812153e-06, + "loss": 20.4141, + "step": 7313 + }, + { + "epoch": 0.0692344828239036, + "grad_norm": 300.7900695800781, + "learning_rate": 1.9919402226580725e-06, + "loss": 24.4141, + "step": 7314 + }, + { + "epoch": 0.06924394884561866, + "grad_norm": 478.725830078125, + "learning_rate": 1.9919363375716324e-06, + "loss": 54.8594, + "step": 7315 + }, + { + "epoch": 0.0692534148673337, + "grad_norm": 1455.298095703125, + "learning_rate": 1.9919324515528364e-06, + "loss": 58.7031, + "step": 7316 + }, + { + "epoch": 0.06926288088904876, + "grad_norm": 535.9581909179688, + "learning_rate": 1.9919285646016884e-06, + "loss": 33.8828, + "step": 7317 + }, + { + "epoch": 0.06927234691076381, + "grad_norm": 607.1513671875, + "learning_rate": 1.9919246767181925e-06, + "loss": 21.7969, + "step": 7318 + }, + { + "epoch": 0.06928181293247887, + "grad_norm": 200.95388793945312, + "learning_rate": 1.9919207879023515e-06, + "loss": 19.1953, + "step": 7319 + }, + { + "epoch": 0.06929127895419392, + "grad_norm": 189.70559692382812, + "learning_rate": 1.9919168981541698e-06, + "loss": 23.0234, + "step": 7320 + }, + { + "epoch": 0.06930074497590898, + "grad_norm": 809.4456176757812, + "learning_rate": 1.991913007473651e-06, + "loss": 36.4219, + "step": 7321 + }, + { + "epoch": 0.06931021099762402, + "grad_norm": 461.5372009277344, + "learning_rate": 1.991909115860798e-06, + "loss": 33.5312, + "step": 7322 + }, + { + "epoch": 0.06931967701933908, + "grad_norm": 928.681884765625, + "learning_rate": 1.9919052233156154e-06, + "loss": 72.5625, + "step": 7323 + }, + { + "epoch": 0.06932914304105414, + "grad_norm": 890.7039184570312, + "learning_rate": 1.991901329838106e-06, + "loss": 67.375, + "step": 7324 + }, + { + "epoch": 0.06933860906276919, + "grad_norm": 247.6001434326172, + "learning_rate": 1.991897435428274e-06, + "loss": 26.7812, + "step": 7325 + }, + { + "epoch": 0.06934807508448425, + "grad_norm": 563.7797241210938, + "learning_rate": 1.991893540086123e-06, + "loss": 50.1641, + "step": 7326 + }, + { + "epoch": 0.0693575411061993, + "grad_norm": 971.6875, + "learning_rate": 1.9918896438116568e-06, + "loss": 46.9688, + "step": 7327 + }, + { + "epoch": 0.06936700712791435, + "grad_norm": 386.1033020019531, + "learning_rate": 1.991885746604878e-06, + "loss": 22.8828, + "step": 7328 + }, + { + "epoch": 0.0693764731496294, + "grad_norm": 2.9643571376800537, + "learning_rate": 1.991881848465792e-06, + "loss": 0.8228, + "step": 7329 + }, + { + "epoch": 0.06938593917134446, + "grad_norm": 494.2928161621094, + "learning_rate": 1.9918779493944017e-06, + "loss": 40.8672, + "step": 7330 + }, + { + "epoch": 0.0693954051930595, + "grad_norm": 441.4879455566406, + "learning_rate": 1.99187404939071e-06, + "loss": 17.6094, + "step": 7331 + }, + { + "epoch": 0.06940487121477457, + "grad_norm": 387.37603759765625, + "learning_rate": 1.9918701484547217e-06, + "loss": 33.8281, + "step": 7332 + }, + { + "epoch": 0.06941433723648963, + "grad_norm": 203.69883728027344, + "learning_rate": 1.9918662465864396e-06, + "loss": 23.6094, + "step": 7333 + }, + { + "epoch": 0.06942380325820467, + "grad_norm": 344.739501953125, + "learning_rate": 1.991862343785868e-06, + "loss": 33.0938, + "step": 7334 + }, + { + "epoch": 0.06943326927991973, + "grad_norm": 714.5194091796875, + "learning_rate": 1.9918584400530107e-06, + "loss": 73.25, + "step": 7335 + }, + { + "epoch": 0.06944273530163478, + "grad_norm": 291.0477600097656, + "learning_rate": 1.9918545353878702e-06, + "loss": 22.6484, + "step": 7336 + }, + { + "epoch": 0.06945220132334984, + "grad_norm": 407.5873718261719, + "learning_rate": 1.9918506297904515e-06, + "loss": 21.3203, + "step": 7337 + }, + { + "epoch": 0.06946166734506488, + "grad_norm": 560.351806640625, + "learning_rate": 1.9918467232607573e-06, + "loss": 66.6562, + "step": 7338 + }, + { + "epoch": 0.06947113336677994, + "grad_norm": 202.71018981933594, + "learning_rate": 1.991842815798792e-06, + "loss": 26.4453, + "step": 7339 + }, + { + "epoch": 0.069480599388495, + "grad_norm": 900.7262573242188, + "learning_rate": 1.991838907404559e-06, + "loss": 37.3125, + "step": 7340 + }, + { + "epoch": 0.06949006541021005, + "grad_norm": 3.0938000679016113, + "learning_rate": 1.991834998078062e-06, + "loss": 0.998, + "step": 7341 + }, + { + "epoch": 0.06949953143192511, + "grad_norm": 2.958303689956665, + "learning_rate": 1.9918310878193044e-06, + "loss": 0.8511, + "step": 7342 + }, + { + "epoch": 0.06950899745364016, + "grad_norm": 210.72998046875, + "learning_rate": 1.9918271766282905e-06, + "loss": 11.5742, + "step": 7343 + }, + { + "epoch": 0.06951846347535522, + "grad_norm": 1705.0511474609375, + "learning_rate": 1.9918232645050233e-06, + "loss": 46.2422, + "step": 7344 + }, + { + "epoch": 0.06952792949707026, + "grad_norm": 1603.5118408203125, + "learning_rate": 1.9918193514495065e-06, + "loss": 24.4219, + "step": 7345 + }, + { + "epoch": 0.06953739551878532, + "grad_norm": 872.19384765625, + "learning_rate": 1.9918154374617445e-06, + "loss": 47.3125, + "step": 7346 + }, + { + "epoch": 0.06954686154050037, + "grad_norm": 1144.6636962890625, + "learning_rate": 1.99181152254174e-06, + "loss": 44.8672, + "step": 7347 + }, + { + "epoch": 0.06955632756221543, + "grad_norm": 503.4835205078125, + "learning_rate": 1.991807606689498e-06, + "loss": 53.625, + "step": 7348 + }, + { + "epoch": 0.06956579358393049, + "grad_norm": 194.76559448242188, + "learning_rate": 1.9918036899050207e-06, + "loss": 24.5469, + "step": 7349 + }, + { + "epoch": 0.06957525960564553, + "grad_norm": 774.5486450195312, + "learning_rate": 1.9917997721883125e-06, + "loss": 19.4922, + "step": 7350 + }, + { + "epoch": 0.0695847256273606, + "grad_norm": 689.3071899414062, + "learning_rate": 1.9917958535393773e-06, + "loss": 29.7344, + "step": 7351 + }, + { + "epoch": 0.06959419164907564, + "grad_norm": 696.1262817382812, + "learning_rate": 1.9917919339582183e-06, + "loss": 34.6406, + "step": 7352 + }, + { + "epoch": 0.0696036576707907, + "grad_norm": 706.4270629882812, + "learning_rate": 1.9917880134448394e-06, + "loss": 55.2188, + "step": 7353 + }, + { + "epoch": 0.06961312369250575, + "grad_norm": 731.8836059570312, + "learning_rate": 1.991784091999244e-06, + "loss": 59.8125, + "step": 7354 + }, + { + "epoch": 0.0696225897142208, + "grad_norm": 583.183837890625, + "learning_rate": 1.991780169621437e-06, + "loss": 29.9531, + "step": 7355 + }, + { + "epoch": 0.06963205573593585, + "grad_norm": 755.743408203125, + "learning_rate": 1.9917762463114203e-06, + "loss": 38.8828, + "step": 7356 + }, + { + "epoch": 0.06964152175765091, + "grad_norm": 262.7541198730469, + "learning_rate": 1.9917723220691987e-06, + "loss": 23.7812, + "step": 7357 + }, + { + "epoch": 0.06965098777936597, + "grad_norm": 708.4259643554688, + "learning_rate": 1.991768396894776e-06, + "loss": 67.4609, + "step": 7358 + }, + { + "epoch": 0.06966045380108102, + "grad_norm": 331.9125671386719, + "learning_rate": 1.991764470788155e-06, + "loss": 23.6562, + "step": 7359 + }, + { + "epoch": 0.06966991982279608, + "grad_norm": 404.60400390625, + "learning_rate": 1.99176054374934e-06, + "loss": 18.2734, + "step": 7360 + }, + { + "epoch": 0.06967938584451112, + "grad_norm": 173.37332153320312, + "learning_rate": 1.9917566157783348e-06, + "loss": 25.3672, + "step": 7361 + }, + { + "epoch": 0.06968885186622618, + "grad_norm": 3.563624382019043, + "learning_rate": 1.991752686875143e-06, + "loss": 1.0103, + "step": 7362 + }, + { + "epoch": 0.06969831788794123, + "grad_norm": 2.748018980026245, + "learning_rate": 1.991748757039768e-06, + "loss": 0.8833, + "step": 7363 + }, + { + "epoch": 0.06970778390965629, + "grad_norm": 326.2257385253906, + "learning_rate": 1.991744826272213e-06, + "loss": 21.0078, + "step": 7364 + }, + { + "epoch": 0.06971724993137134, + "grad_norm": 502.64202880859375, + "learning_rate": 1.9917408945724836e-06, + "loss": 34.5625, + "step": 7365 + }, + { + "epoch": 0.0697267159530864, + "grad_norm": 520.9301147460938, + "learning_rate": 1.991736961940582e-06, + "loss": 25.7969, + "step": 7366 + }, + { + "epoch": 0.06973618197480146, + "grad_norm": 331.8801574707031, + "learning_rate": 1.9917330283765115e-06, + "loss": 26.9688, + "step": 7367 + }, + { + "epoch": 0.0697456479965165, + "grad_norm": 783.3373413085938, + "learning_rate": 1.991729093880277e-06, + "loss": 49.3438, + "step": 7368 + }, + { + "epoch": 0.06975511401823156, + "grad_norm": 286.8240051269531, + "learning_rate": 1.9917251584518814e-06, + "loss": 32.2188, + "step": 7369 + }, + { + "epoch": 0.06976458003994661, + "grad_norm": 302.6920471191406, + "learning_rate": 1.991721222091329e-06, + "loss": 27.0859, + "step": 7370 + }, + { + "epoch": 0.06977404606166167, + "grad_norm": 420.8332824707031, + "learning_rate": 1.991717284798623e-06, + "loss": 63.0625, + "step": 7371 + }, + { + "epoch": 0.06978351208337671, + "grad_norm": 520.7198486328125, + "learning_rate": 1.9917133465737673e-06, + "loss": 31.1328, + "step": 7372 + }, + { + "epoch": 0.06979297810509177, + "grad_norm": 333.6162414550781, + "learning_rate": 1.9917094074167655e-06, + "loss": 29.1016, + "step": 7373 + }, + { + "epoch": 0.06980244412680682, + "grad_norm": 277.9934387207031, + "learning_rate": 1.9917054673276213e-06, + "loss": 20.0156, + "step": 7374 + }, + { + "epoch": 0.06981191014852188, + "grad_norm": 451.2554016113281, + "learning_rate": 1.991701526306339e-06, + "loss": 37.0078, + "step": 7375 + }, + { + "epoch": 0.06982137617023694, + "grad_norm": 608.8590087890625, + "learning_rate": 1.991697584352921e-06, + "loss": 45.4375, + "step": 7376 + }, + { + "epoch": 0.06983084219195199, + "grad_norm": 494.5029296875, + "learning_rate": 1.991693641467372e-06, + "loss": 50.5312, + "step": 7377 + }, + { + "epoch": 0.06984030821366705, + "grad_norm": 425.85552978515625, + "learning_rate": 1.9916896976496956e-06, + "loss": 31.4062, + "step": 7378 + }, + { + "epoch": 0.06984977423538209, + "grad_norm": 459.0323181152344, + "learning_rate": 1.9916857528998958e-06, + "loss": 49.7188, + "step": 7379 + }, + { + "epoch": 0.06985924025709715, + "grad_norm": 600.57421875, + "learning_rate": 1.991681807217975e-06, + "loss": 60.5625, + "step": 7380 + }, + { + "epoch": 0.0698687062788122, + "grad_norm": 604.4678955078125, + "learning_rate": 1.991677860603939e-06, + "loss": 50.1484, + "step": 7381 + }, + { + "epoch": 0.06987817230052726, + "grad_norm": 665.7972412109375, + "learning_rate": 1.9916739130577897e-06, + "loss": 36.4219, + "step": 7382 + }, + { + "epoch": 0.06988763832224232, + "grad_norm": 195.10986328125, + "learning_rate": 1.9916699645795313e-06, + "loss": 19.125, + "step": 7383 + }, + { + "epoch": 0.06989710434395736, + "grad_norm": 450.8621826171875, + "learning_rate": 1.9916660151691677e-06, + "loss": 39.1875, + "step": 7384 + }, + { + "epoch": 0.06990657036567242, + "grad_norm": 600.1536254882812, + "learning_rate": 1.991662064826703e-06, + "loss": 42.0938, + "step": 7385 + }, + { + "epoch": 0.06991603638738747, + "grad_norm": 3.201138734817505, + "learning_rate": 1.9916581135521396e-06, + "loss": 1.0249, + "step": 7386 + }, + { + "epoch": 0.06992550240910253, + "grad_norm": 189.11904907226562, + "learning_rate": 1.9916541613454827e-06, + "loss": 13.1387, + "step": 7387 + }, + { + "epoch": 0.06993496843081758, + "grad_norm": 262.8631591796875, + "learning_rate": 1.991650208206735e-06, + "loss": 20.7812, + "step": 7388 + }, + { + "epoch": 0.06994443445253264, + "grad_norm": 309.97186279296875, + "learning_rate": 1.9916462541359013e-06, + "loss": 26.9375, + "step": 7389 + }, + { + "epoch": 0.06995390047424768, + "grad_norm": 307.908447265625, + "learning_rate": 1.991642299132984e-06, + "loss": 20.1562, + "step": 7390 + }, + { + "epoch": 0.06996336649596274, + "grad_norm": 630.3785400390625, + "learning_rate": 1.9916383431979876e-06, + "loss": 48.7812, + "step": 7391 + }, + { + "epoch": 0.0699728325176778, + "grad_norm": 405.2330322265625, + "learning_rate": 1.991634386330916e-06, + "loss": 24.5234, + "step": 7392 + }, + { + "epoch": 0.06998229853939285, + "grad_norm": 478.7851867675781, + "learning_rate": 1.9916304285317723e-06, + "loss": 48.5938, + "step": 7393 + }, + { + "epoch": 0.06999176456110791, + "grad_norm": 425.759521484375, + "learning_rate": 1.9916264698005604e-06, + "loss": 44.8125, + "step": 7394 + }, + { + "epoch": 0.07000123058282295, + "grad_norm": 416.8600158691406, + "learning_rate": 1.9916225101372844e-06, + "loss": 39.0391, + "step": 7395 + }, + { + "epoch": 0.07001069660453801, + "grad_norm": 487.284912109375, + "learning_rate": 1.9916185495419473e-06, + "loss": 15.8359, + "step": 7396 + }, + { + "epoch": 0.07002016262625306, + "grad_norm": 3.871490478515625, + "learning_rate": 1.991614588014554e-06, + "loss": 1.0786, + "step": 7397 + }, + { + "epoch": 0.07002962864796812, + "grad_norm": 319.32958984375, + "learning_rate": 1.991610625555107e-06, + "loss": 26.0625, + "step": 7398 + }, + { + "epoch": 0.07003909466968317, + "grad_norm": 2.9754278659820557, + "learning_rate": 1.9916066621636107e-06, + "loss": 0.8823, + "step": 7399 + }, + { + "epoch": 0.07004856069139823, + "grad_norm": 459.6029052734375, + "learning_rate": 1.9916026978400683e-06, + "loss": 37.3594, + "step": 7400 + }, + { + "epoch": 0.07005802671311329, + "grad_norm": 178.2370147705078, + "learning_rate": 1.9915987325844843e-06, + "loss": 20.1172, + "step": 7401 + }, + { + "epoch": 0.07006749273482833, + "grad_norm": 351.64031982421875, + "learning_rate": 1.9915947663968616e-06, + "loss": 11.4766, + "step": 7402 + }, + { + "epoch": 0.07007695875654339, + "grad_norm": 493.5525817871094, + "learning_rate": 1.9915907992772046e-06, + "loss": 43.25, + "step": 7403 + }, + { + "epoch": 0.07008642477825844, + "grad_norm": 690.6556396484375, + "learning_rate": 1.9915868312255165e-06, + "loss": 29.8906, + "step": 7404 + }, + { + "epoch": 0.0700958907999735, + "grad_norm": 342.9231262207031, + "learning_rate": 1.9915828622418017e-06, + "loss": 28.3008, + "step": 7405 + }, + { + "epoch": 0.07010535682168854, + "grad_norm": 634.5818481445312, + "learning_rate": 1.9915788923260634e-06, + "loss": 34.2031, + "step": 7406 + }, + { + "epoch": 0.0701148228434036, + "grad_norm": 449.60992431640625, + "learning_rate": 1.991574921478305e-06, + "loss": 66.25, + "step": 7407 + }, + { + "epoch": 0.07012428886511865, + "grad_norm": 372.2669982910156, + "learning_rate": 1.9915709496985312e-06, + "loss": 40.8047, + "step": 7408 + }, + { + "epoch": 0.07013375488683371, + "grad_norm": 359.44012451171875, + "learning_rate": 1.9915669769867453e-06, + "loss": 26.0469, + "step": 7409 + }, + { + "epoch": 0.07014322090854877, + "grad_norm": 350.62225341796875, + "learning_rate": 1.9915630033429504e-06, + "loss": 34.4766, + "step": 7410 + }, + { + "epoch": 0.07015268693026382, + "grad_norm": 225.7040252685547, + "learning_rate": 1.991559028767151e-06, + "loss": 25.3906, + "step": 7411 + }, + { + "epoch": 0.07016215295197888, + "grad_norm": 235.70399475097656, + "learning_rate": 1.991555053259351e-06, + "loss": 21.4297, + "step": 7412 + }, + { + "epoch": 0.07017161897369392, + "grad_norm": 362.5705871582031, + "learning_rate": 1.991551076819553e-06, + "loss": 27.3828, + "step": 7413 + }, + { + "epoch": 0.07018108499540898, + "grad_norm": 349.64947509765625, + "learning_rate": 1.991547099447762e-06, + "loss": 22.2969, + "step": 7414 + }, + { + "epoch": 0.07019055101712403, + "grad_norm": 440.45037841796875, + "learning_rate": 1.9915431211439816e-06, + "loss": 39.7969, + "step": 7415 + }, + { + "epoch": 0.07020001703883909, + "grad_norm": 706.9877319335938, + "learning_rate": 1.9915391419082145e-06, + "loss": 52.125, + "step": 7416 + }, + { + "epoch": 0.07020948306055413, + "grad_norm": 227.94573974609375, + "learning_rate": 1.9915351617404655e-06, + "loss": 20.3047, + "step": 7417 + }, + { + "epoch": 0.0702189490822692, + "grad_norm": 331.25921630859375, + "learning_rate": 1.9915311806407376e-06, + "loss": 12.1562, + "step": 7418 + }, + { + "epoch": 0.07022841510398425, + "grad_norm": 488.3271789550781, + "learning_rate": 1.991527198609035e-06, + "loss": 44.5156, + "step": 7419 + }, + { + "epoch": 0.0702378811256993, + "grad_norm": 2.469583511352539, + "learning_rate": 1.9915232156453615e-06, + "loss": 0.8057, + "step": 7420 + }, + { + "epoch": 0.07024734714741436, + "grad_norm": 694.6493530273438, + "learning_rate": 1.991519231749721e-06, + "loss": 28.4531, + "step": 7421 + }, + { + "epoch": 0.0702568131691294, + "grad_norm": 2.810957431793213, + "learning_rate": 1.991515246922116e-06, + "loss": 0.8662, + "step": 7422 + }, + { + "epoch": 0.07026627919084447, + "grad_norm": 215.01780700683594, + "learning_rate": 1.991511261162552e-06, + "loss": 25.8047, + "step": 7423 + }, + { + "epoch": 0.07027574521255951, + "grad_norm": 353.0612487792969, + "learning_rate": 1.9915072744710315e-06, + "loss": 34.0, + "step": 7424 + }, + { + "epoch": 0.07028521123427457, + "grad_norm": 388.214111328125, + "learning_rate": 1.9915032868475587e-06, + "loss": 22.5781, + "step": 7425 + }, + { + "epoch": 0.07029467725598963, + "grad_norm": 280.5917053222656, + "learning_rate": 1.9914992982921375e-06, + "loss": 29.9297, + "step": 7426 + }, + { + "epoch": 0.07030414327770468, + "grad_norm": 796.8793334960938, + "learning_rate": 1.9914953088047713e-06, + "loss": 46.3281, + "step": 7427 + }, + { + "epoch": 0.07031360929941974, + "grad_norm": 512.1459350585938, + "learning_rate": 1.9914913183854643e-06, + "loss": 29.0938, + "step": 7428 + }, + { + "epoch": 0.07032307532113478, + "grad_norm": 533.087646484375, + "learning_rate": 1.9914873270342195e-06, + "loss": 50.4141, + "step": 7429 + }, + { + "epoch": 0.07033254134284984, + "grad_norm": 429.98944091796875, + "learning_rate": 1.9914833347510415e-06, + "loss": 42.4375, + "step": 7430 + }, + { + "epoch": 0.07034200736456489, + "grad_norm": 247.05709838867188, + "learning_rate": 1.9914793415359337e-06, + "loss": 20.3438, + "step": 7431 + }, + { + "epoch": 0.07035147338627995, + "grad_norm": 693.5121459960938, + "learning_rate": 1.9914753473888995e-06, + "loss": 68.5938, + "step": 7432 + }, + { + "epoch": 0.070360939407995, + "grad_norm": 217.20272827148438, + "learning_rate": 1.991471352309943e-06, + "loss": 27.4766, + "step": 7433 + }, + { + "epoch": 0.07037040542971006, + "grad_norm": 172.85911560058594, + "learning_rate": 1.991467356299068e-06, + "loss": 21.4375, + "step": 7434 + }, + { + "epoch": 0.07037987145142512, + "grad_norm": 509.9386291503906, + "learning_rate": 1.9914633593562782e-06, + "loss": 37.9375, + "step": 7435 + }, + { + "epoch": 0.07038933747314016, + "grad_norm": 559.1433715820312, + "learning_rate": 1.9914593614815774e-06, + "loss": 38.4609, + "step": 7436 + }, + { + "epoch": 0.07039880349485522, + "grad_norm": 516.0458374023438, + "learning_rate": 1.991455362674969e-06, + "loss": 66.375, + "step": 7437 + }, + { + "epoch": 0.07040826951657027, + "grad_norm": 262.7881164550781, + "learning_rate": 1.991451362936458e-06, + "loss": 26.1406, + "step": 7438 + }, + { + "epoch": 0.07041773553828533, + "grad_norm": 506.0832824707031, + "learning_rate": 1.991447362266046e-06, + "loss": 45.9062, + "step": 7439 + }, + { + "epoch": 0.07042720156000037, + "grad_norm": 403.35589599609375, + "learning_rate": 1.9914433606637387e-06, + "loss": 35.6328, + "step": 7440 + }, + { + "epoch": 0.07043666758171543, + "grad_norm": 1269.18212890625, + "learning_rate": 1.991439358129539e-06, + "loss": 51.8828, + "step": 7441 + }, + { + "epoch": 0.07044613360343048, + "grad_norm": 170.7887725830078, + "learning_rate": 1.9914353546634507e-06, + "loss": 27.5703, + "step": 7442 + }, + { + "epoch": 0.07045559962514554, + "grad_norm": 251.50022888183594, + "learning_rate": 1.9914313502654776e-06, + "loss": 20.8594, + "step": 7443 + }, + { + "epoch": 0.0704650656468606, + "grad_norm": 764.9610595703125, + "learning_rate": 1.991427344935624e-06, + "loss": 53.043, + "step": 7444 + }, + { + "epoch": 0.07047453166857565, + "grad_norm": 421.0448913574219, + "learning_rate": 1.9914233386738925e-06, + "loss": 45.2812, + "step": 7445 + }, + { + "epoch": 0.0704839976902907, + "grad_norm": 476.44927978515625, + "learning_rate": 1.991419331480288e-06, + "loss": 35.2734, + "step": 7446 + }, + { + "epoch": 0.07049346371200575, + "grad_norm": 340.5408935546875, + "learning_rate": 1.9914153233548135e-06, + "loss": 21.0781, + "step": 7447 + }, + { + "epoch": 0.07050292973372081, + "grad_norm": 679.7926025390625, + "learning_rate": 1.9914113142974736e-06, + "loss": 43.625, + "step": 7448 + }, + { + "epoch": 0.07051239575543586, + "grad_norm": 210.23504638671875, + "learning_rate": 1.9914073043082712e-06, + "loss": 22.1406, + "step": 7449 + }, + { + "epoch": 0.07052186177715092, + "grad_norm": 340.9158935546875, + "learning_rate": 1.9914032933872106e-06, + "loss": 23.6016, + "step": 7450 + }, + { + "epoch": 0.07053132779886596, + "grad_norm": 349.7128601074219, + "learning_rate": 1.991399281534295e-06, + "loss": 47.6562, + "step": 7451 + }, + { + "epoch": 0.07054079382058102, + "grad_norm": 648.066162109375, + "learning_rate": 1.991395268749529e-06, + "loss": 37.7656, + "step": 7452 + }, + { + "epoch": 0.07055025984229608, + "grad_norm": 458.8071594238281, + "learning_rate": 1.9913912550329155e-06, + "loss": 40.2188, + "step": 7453 + }, + { + "epoch": 0.07055972586401113, + "grad_norm": 336.4267883300781, + "learning_rate": 1.9913872403844593e-06, + "loss": 19.1484, + "step": 7454 + }, + { + "epoch": 0.07056919188572619, + "grad_norm": 347.7162170410156, + "learning_rate": 1.991383224804163e-06, + "loss": 24.0781, + "step": 7455 + }, + { + "epoch": 0.07057865790744124, + "grad_norm": 315.583740234375, + "learning_rate": 1.9913792082920316e-06, + "loss": 28.5312, + "step": 7456 + }, + { + "epoch": 0.0705881239291563, + "grad_norm": 219.91677856445312, + "learning_rate": 1.991375190848068e-06, + "loss": 27.75, + "step": 7457 + }, + { + "epoch": 0.07059758995087134, + "grad_norm": 202.8128204345703, + "learning_rate": 1.991371172472276e-06, + "loss": 20.7344, + "step": 7458 + }, + { + "epoch": 0.0706070559725864, + "grad_norm": 584.9964599609375, + "learning_rate": 1.9913671531646597e-06, + "loss": 45.3438, + "step": 7459 + }, + { + "epoch": 0.07061652199430145, + "grad_norm": 607.0519409179688, + "learning_rate": 1.9913631329252228e-06, + "loss": 22.293, + "step": 7460 + }, + { + "epoch": 0.07062598801601651, + "grad_norm": 236.73223876953125, + "learning_rate": 1.991359111753969e-06, + "loss": 22.5547, + "step": 7461 + }, + { + "epoch": 0.07063545403773157, + "grad_norm": 371.2930603027344, + "learning_rate": 1.991355089650902e-06, + "loss": 32.8984, + "step": 7462 + }, + { + "epoch": 0.07064492005944661, + "grad_norm": 262.52850341796875, + "learning_rate": 1.991351066616026e-06, + "loss": 22.1484, + "step": 7463 + }, + { + "epoch": 0.07065438608116167, + "grad_norm": 337.3019714355469, + "learning_rate": 1.9913470426493444e-06, + "loss": 33.1562, + "step": 7464 + }, + { + "epoch": 0.07066385210287672, + "grad_norm": 158.21029663085938, + "learning_rate": 1.9913430177508612e-06, + "loss": 15.4453, + "step": 7465 + }, + { + "epoch": 0.07067331812459178, + "grad_norm": 258.78289794921875, + "learning_rate": 1.9913389919205795e-06, + "loss": 24.4688, + "step": 7466 + }, + { + "epoch": 0.07068278414630683, + "grad_norm": 957.1556396484375, + "learning_rate": 1.9913349651585044e-06, + "loss": 18.4922, + "step": 7467 + }, + { + "epoch": 0.07069225016802189, + "grad_norm": 290.12139892578125, + "learning_rate": 1.9913309374646384e-06, + "loss": 21.0859, + "step": 7468 + }, + { + "epoch": 0.07070171618973695, + "grad_norm": 264.09613037109375, + "learning_rate": 1.9913269088389862e-06, + "loss": 19.6719, + "step": 7469 + }, + { + "epoch": 0.07071118221145199, + "grad_norm": 411.2812805175781, + "learning_rate": 1.9913228792815508e-06, + "loss": 31.0859, + "step": 7470 + }, + { + "epoch": 0.07072064823316705, + "grad_norm": 2.386233329772949, + "learning_rate": 1.9913188487923363e-06, + "loss": 0.8325, + "step": 7471 + }, + { + "epoch": 0.0707301142548821, + "grad_norm": 695.2225952148438, + "learning_rate": 1.991314817371347e-06, + "loss": 57.9219, + "step": 7472 + }, + { + "epoch": 0.07073958027659716, + "grad_norm": 336.0799865722656, + "learning_rate": 1.991310785018586e-06, + "loss": 47.4375, + "step": 7473 + }, + { + "epoch": 0.0707490462983122, + "grad_norm": 222.18496704101562, + "learning_rate": 1.9913067517340573e-06, + "loss": 30.1719, + "step": 7474 + }, + { + "epoch": 0.07075851232002726, + "grad_norm": 693.20068359375, + "learning_rate": 1.991302717517765e-06, + "loss": 32.3828, + "step": 7475 + }, + { + "epoch": 0.07076797834174231, + "grad_norm": 429.52557373046875, + "learning_rate": 1.9912986823697125e-06, + "loss": 48.8125, + "step": 7476 + }, + { + "epoch": 0.07077744436345737, + "grad_norm": 657.952392578125, + "learning_rate": 1.9912946462899035e-06, + "loss": 58.4062, + "step": 7477 + }, + { + "epoch": 0.07078691038517243, + "grad_norm": 3.088881015777588, + "learning_rate": 1.9912906092783427e-06, + "loss": 0.8936, + "step": 7478 + }, + { + "epoch": 0.07079637640688748, + "grad_norm": 386.4744567871094, + "learning_rate": 1.9912865713350324e-06, + "loss": 29.2344, + "step": 7479 + }, + { + "epoch": 0.07080584242860254, + "grad_norm": 212.549072265625, + "learning_rate": 1.991282532459978e-06, + "loss": 18.9688, + "step": 7480 + }, + { + "epoch": 0.07081530845031758, + "grad_norm": 421.8505554199219, + "learning_rate": 1.9912784926531816e-06, + "loss": 37.9219, + "step": 7481 + }, + { + "epoch": 0.07082477447203264, + "grad_norm": 427.1658020019531, + "learning_rate": 1.9912744519146487e-06, + "loss": 53.5781, + "step": 7482 + }, + { + "epoch": 0.07083424049374769, + "grad_norm": 287.09490966796875, + "learning_rate": 1.9912704102443816e-06, + "loss": 25.4609, + "step": 7483 + }, + { + "epoch": 0.07084370651546275, + "grad_norm": 281.4053649902344, + "learning_rate": 1.9912663676423855e-06, + "loss": 27.5859, + "step": 7484 + }, + { + "epoch": 0.0708531725371778, + "grad_norm": 2.4766476154327393, + "learning_rate": 1.991262324108663e-06, + "loss": 0.7642, + "step": 7485 + }, + { + "epoch": 0.07086263855889285, + "grad_norm": 228.0844268798828, + "learning_rate": 1.9912582796432185e-06, + "loss": 29.4609, + "step": 7486 + }, + { + "epoch": 0.07087210458060791, + "grad_norm": 328.5888977050781, + "learning_rate": 1.991254234246056e-06, + "loss": 33.7344, + "step": 7487 + }, + { + "epoch": 0.07088157060232296, + "grad_norm": 229.33839416503906, + "learning_rate": 1.9912501879171784e-06, + "loss": 18.4688, + "step": 7488 + }, + { + "epoch": 0.07089103662403802, + "grad_norm": 243.5563507080078, + "learning_rate": 1.991246140656591e-06, + "loss": 23.3906, + "step": 7489 + }, + { + "epoch": 0.07090050264575307, + "grad_norm": 676.9498291015625, + "learning_rate": 1.991242092464296e-06, + "loss": 45.2344, + "step": 7490 + }, + { + "epoch": 0.07090996866746813, + "grad_norm": 3.4449708461761475, + "learning_rate": 1.991238043340298e-06, + "loss": 1.0835, + "step": 7491 + }, + { + "epoch": 0.07091943468918317, + "grad_norm": 401.778076171875, + "learning_rate": 1.991233993284601e-06, + "loss": 57.875, + "step": 7492 + }, + { + "epoch": 0.07092890071089823, + "grad_norm": 179.29722595214844, + "learning_rate": 1.9912299422972083e-06, + "loss": 22.8359, + "step": 7493 + }, + { + "epoch": 0.07093836673261328, + "grad_norm": 235.78250122070312, + "learning_rate": 1.991225890378124e-06, + "loss": 22.8203, + "step": 7494 + }, + { + "epoch": 0.07094783275432834, + "grad_norm": 437.3791198730469, + "learning_rate": 1.991221837527352e-06, + "loss": 38.6719, + "step": 7495 + }, + { + "epoch": 0.0709572987760434, + "grad_norm": 590.5224609375, + "learning_rate": 1.991217783744896e-06, + "loss": 23.8203, + "step": 7496 + }, + { + "epoch": 0.07096676479775844, + "grad_norm": 354.2182922363281, + "learning_rate": 1.9912137290307594e-06, + "loss": 32.3281, + "step": 7497 + }, + { + "epoch": 0.0709762308194735, + "grad_norm": 315.2845764160156, + "learning_rate": 1.9912096733849463e-06, + "loss": 39.75, + "step": 7498 + }, + { + "epoch": 0.07098569684118855, + "grad_norm": 398.7052001953125, + "learning_rate": 1.991205616807461e-06, + "loss": 19.7656, + "step": 7499 + }, + { + "epoch": 0.07099516286290361, + "grad_norm": 254.3289031982422, + "learning_rate": 1.9912015592983067e-06, + "loss": 27.8594, + "step": 7500 + }, + { + "epoch": 0.07100462888461866, + "grad_norm": 243.35340881347656, + "learning_rate": 1.9911975008574874e-06, + "loss": 19.3672, + "step": 7501 + }, + { + "epoch": 0.07101409490633372, + "grad_norm": 323.45721435546875, + "learning_rate": 1.991193441485007e-06, + "loss": 22.4766, + "step": 7502 + }, + { + "epoch": 0.07102356092804876, + "grad_norm": 3.121488571166992, + "learning_rate": 1.9911893811808695e-06, + "loss": 0.9658, + "step": 7503 + }, + { + "epoch": 0.07103302694976382, + "grad_norm": 367.06036376953125, + "learning_rate": 1.991185319945078e-06, + "loss": 20.2812, + "step": 7504 + }, + { + "epoch": 0.07104249297147888, + "grad_norm": 450.65777587890625, + "learning_rate": 1.991181257777637e-06, + "loss": 33.8438, + "step": 7505 + }, + { + "epoch": 0.07105195899319393, + "grad_norm": 258.5393981933594, + "learning_rate": 1.99117719467855e-06, + "loss": 19.375, + "step": 7506 + }, + { + "epoch": 0.07106142501490899, + "grad_norm": 331.35601806640625, + "learning_rate": 1.991173130647821e-06, + "loss": 41.2578, + "step": 7507 + }, + { + "epoch": 0.07107089103662403, + "grad_norm": 643.834228515625, + "learning_rate": 1.9911690656854542e-06, + "loss": 57.7656, + "step": 7508 + }, + { + "epoch": 0.0710803570583391, + "grad_norm": 380.4493408203125, + "learning_rate": 1.991164999791452e-06, + "loss": 26.6016, + "step": 7509 + }, + { + "epoch": 0.07108982308005414, + "grad_norm": 149.14453125, + "learning_rate": 1.99116093296582e-06, + "loss": 8.3945, + "step": 7510 + }, + { + "epoch": 0.0710992891017692, + "grad_norm": 1559.95556640625, + "learning_rate": 1.991156865208561e-06, + "loss": 46.8047, + "step": 7511 + }, + { + "epoch": 0.07110875512348426, + "grad_norm": 558.5841674804688, + "learning_rate": 1.9911527965196784e-06, + "loss": 28.4609, + "step": 7512 + }, + { + "epoch": 0.0711182211451993, + "grad_norm": 289.2507019042969, + "learning_rate": 1.9911487268991774e-06, + "loss": 21.7422, + "step": 7513 + }, + { + "epoch": 0.07112768716691437, + "grad_norm": 364.2064208984375, + "learning_rate": 1.991144656347061e-06, + "loss": 30.75, + "step": 7514 + }, + { + "epoch": 0.07113715318862941, + "grad_norm": 204.95802307128906, + "learning_rate": 1.991140584863333e-06, + "loss": 20.9492, + "step": 7515 + }, + { + "epoch": 0.07114661921034447, + "grad_norm": 291.97039794921875, + "learning_rate": 1.991136512447997e-06, + "loss": 29.4844, + "step": 7516 + }, + { + "epoch": 0.07115608523205952, + "grad_norm": 811.9486694335938, + "learning_rate": 1.9911324391010577e-06, + "loss": 24.2539, + "step": 7517 + }, + { + "epoch": 0.07116555125377458, + "grad_norm": 500.98046875, + "learning_rate": 1.991128364822518e-06, + "loss": 67.1211, + "step": 7518 + }, + { + "epoch": 0.07117501727548962, + "grad_norm": 425.7456359863281, + "learning_rate": 1.991124289612382e-06, + "loss": 54.1875, + "step": 7519 + }, + { + "epoch": 0.07118448329720468, + "grad_norm": 528.4595336914062, + "learning_rate": 1.991120213470654e-06, + "loss": 33.6367, + "step": 7520 + }, + { + "epoch": 0.07119394931891974, + "grad_norm": 276.7221984863281, + "learning_rate": 1.991116136397337e-06, + "loss": 35.2344, + "step": 7521 + }, + { + "epoch": 0.07120341534063479, + "grad_norm": 339.92303466796875, + "learning_rate": 1.991112058392436e-06, + "loss": 13.7109, + "step": 7522 + }, + { + "epoch": 0.07121288136234985, + "grad_norm": 580.5543212890625, + "learning_rate": 1.9911079794559537e-06, + "loss": 39.3438, + "step": 7523 + }, + { + "epoch": 0.0712223473840649, + "grad_norm": 195.14402770996094, + "learning_rate": 1.991103899587895e-06, + "loss": 18.4844, + "step": 7524 + }, + { + "epoch": 0.07123181340577996, + "grad_norm": 516.8076171875, + "learning_rate": 1.9910998187882623e-06, + "loss": 20.6875, + "step": 7525 + }, + { + "epoch": 0.071241279427495, + "grad_norm": 529.9879760742188, + "learning_rate": 1.9910957370570604e-06, + "loss": 30.7031, + "step": 7526 + }, + { + "epoch": 0.07125074544921006, + "grad_norm": 3.1344783306121826, + "learning_rate": 1.991091654394293e-06, + "loss": 0.9546, + "step": 7527 + }, + { + "epoch": 0.07126021147092511, + "grad_norm": 661.7390747070312, + "learning_rate": 1.991087570799964e-06, + "loss": 43.9844, + "step": 7528 + }, + { + "epoch": 0.07126967749264017, + "grad_norm": 553.7164306640625, + "learning_rate": 1.9910834862740773e-06, + "loss": 42.5391, + "step": 7529 + }, + { + "epoch": 0.07127914351435523, + "grad_norm": 992.8172607421875, + "learning_rate": 1.9910794008166364e-06, + "loss": 53.625, + "step": 7530 + }, + { + "epoch": 0.07128860953607027, + "grad_norm": 613.691162109375, + "learning_rate": 1.9910753144276457e-06, + "loss": 55.9219, + "step": 7531 + }, + { + "epoch": 0.07129807555778533, + "grad_norm": 514.1185913085938, + "learning_rate": 1.9910712271071084e-06, + "loss": 46.8125, + "step": 7532 + }, + { + "epoch": 0.07130754157950038, + "grad_norm": 483.29058837890625, + "learning_rate": 1.9910671388550284e-06, + "loss": 52.5469, + "step": 7533 + }, + { + "epoch": 0.07131700760121544, + "grad_norm": 372.5434265136719, + "learning_rate": 1.99106304967141e-06, + "loss": 20.6562, + "step": 7534 + }, + { + "epoch": 0.07132647362293049, + "grad_norm": 849.6724243164062, + "learning_rate": 1.991058959556257e-06, + "loss": 56.4062, + "step": 7535 + }, + { + "epoch": 0.07133593964464555, + "grad_norm": 700.949462890625, + "learning_rate": 1.9910548685095725e-06, + "loss": 34.4727, + "step": 7536 + }, + { + "epoch": 0.07134540566636059, + "grad_norm": 3.351097345352173, + "learning_rate": 1.991050776531361e-06, + "loss": 0.7992, + "step": 7537 + }, + { + "epoch": 0.07135487168807565, + "grad_norm": 762.6841430664062, + "learning_rate": 1.9910466836216266e-06, + "loss": 38.9062, + "step": 7538 + }, + { + "epoch": 0.07136433770979071, + "grad_norm": 287.3771667480469, + "learning_rate": 1.991042589780373e-06, + "loss": 25.9297, + "step": 7539 + }, + { + "epoch": 0.07137380373150576, + "grad_norm": 258.9764099121094, + "learning_rate": 1.9910384950076035e-06, + "loss": 18.1875, + "step": 7540 + }, + { + "epoch": 0.07138326975322082, + "grad_norm": 177.09970092773438, + "learning_rate": 1.991034399303322e-06, + "loss": 21.8984, + "step": 7541 + }, + { + "epoch": 0.07139273577493586, + "grad_norm": 250.75469970703125, + "learning_rate": 1.991030302667533e-06, + "loss": 22.5078, + "step": 7542 + }, + { + "epoch": 0.07140220179665092, + "grad_norm": 3.5665347576141357, + "learning_rate": 1.99102620510024e-06, + "loss": 0.937, + "step": 7543 + }, + { + "epoch": 0.07141166781836597, + "grad_norm": 443.7077331542969, + "learning_rate": 1.9910221066014468e-06, + "loss": 24.6562, + "step": 7544 + }, + { + "epoch": 0.07142113384008103, + "grad_norm": 346.6645202636719, + "learning_rate": 1.9910180071711573e-06, + "loss": 41.625, + "step": 7545 + }, + { + "epoch": 0.07143059986179608, + "grad_norm": 205.47845458984375, + "learning_rate": 1.991013906809375e-06, + "loss": 15.9688, + "step": 7546 + }, + { + "epoch": 0.07144006588351114, + "grad_norm": 277.9532165527344, + "learning_rate": 1.9910098055161043e-06, + "loss": 21.0781, + "step": 7547 + }, + { + "epoch": 0.0714495319052262, + "grad_norm": 961.85009765625, + "learning_rate": 1.991005703291349e-06, + "loss": 85.9062, + "step": 7548 + }, + { + "epoch": 0.07145899792694124, + "grad_norm": 584.6773071289062, + "learning_rate": 1.9910016001351127e-06, + "loss": 21.0547, + "step": 7549 + }, + { + "epoch": 0.0714684639486563, + "grad_norm": 239.01707458496094, + "learning_rate": 1.9909974960473994e-06, + "loss": 27.3281, + "step": 7550 + }, + { + "epoch": 0.07147792997037135, + "grad_norm": 537.2697143554688, + "learning_rate": 1.990993391028213e-06, + "loss": 39.375, + "step": 7551 + }, + { + "epoch": 0.07148739599208641, + "grad_norm": 427.29949951171875, + "learning_rate": 1.9909892850775574e-06, + "loss": 29.8438, + "step": 7552 + }, + { + "epoch": 0.07149686201380145, + "grad_norm": 298.5392761230469, + "learning_rate": 1.990985178195436e-06, + "loss": 24.25, + "step": 7553 + }, + { + "epoch": 0.07150632803551651, + "grad_norm": 764.7437744140625, + "learning_rate": 1.9909810703818533e-06, + "loss": 46.5781, + "step": 7554 + }, + { + "epoch": 0.07151579405723156, + "grad_norm": 214.55145263671875, + "learning_rate": 1.9909769616368128e-06, + "loss": 26.8359, + "step": 7555 + }, + { + "epoch": 0.07152526007894662, + "grad_norm": 253.7930908203125, + "learning_rate": 1.990972851960318e-06, + "loss": 29.8672, + "step": 7556 + }, + { + "epoch": 0.07153472610066168, + "grad_norm": 462.6033630371094, + "learning_rate": 1.990968741352374e-06, + "loss": 48.3203, + "step": 7557 + }, + { + "epoch": 0.07154419212237673, + "grad_norm": 360.2895202636719, + "learning_rate": 1.990964629812983e-06, + "loss": 15.1367, + "step": 7558 + }, + { + "epoch": 0.07155365814409179, + "grad_norm": 533.6305541992188, + "learning_rate": 1.99096051734215e-06, + "loss": 34.75, + "step": 7559 + }, + { + "epoch": 0.07156312416580683, + "grad_norm": 559.9769287109375, + "learning_rate": 1.9909564039398788e-06, + "loss": 29.1172, + "step": 7560 + }, + { + "epoch": 0.07157259018752189, + "grad_norm": 676.2191772460938, + "learning_rate": 1.990952289606173e-06, + "loss": 55.6953, + "step": 7561 + }, + { + "epoch": 0.07158205620923694, + "grad_norm": 372.63348388671875, + "learning_rate": 1.990948174341036e-06, + "loss": 30.6406, + "step": 7562 + }, + { + "epoch": 0.071591522230952, + "grad_norm": 284.4420166015625, + "learning_rate": 1.990944058144473e-06, + "loss": 22.4766, + "step": 7563 + }, + { + "epoch": 0.07160098825266706, + "grad_norm": 571.2545776367188, + "learning_rate": 1.9909399410164864e-06, + "loss": 60.4062, + "step": 7564 + }, + { + "epoch": 0.0716104542743821, + "grad_norm": 461.0743713378906, + "learning_rate": 1.990935822957081e-06, + "loss": 33.8438, + "step": 7565 + }, + { + "epoch": 0.07161992029609716, + "grad_norm": 262.89276123046875, + "learning_rate": 1.9909317039662607e-06, + "loss": 18.0469, + "step": 7566 + }, + { + "epoch": 0.07162938631781221, + "grad_norm": 4.599905967712402, + "learning_rate": 1.9909275840440283e-06, + "loss": 0.938, + "step": 7567 + }, + { + "epoch": 0.07163885233952727, + "grad_norm": 494.8286437988281, + "learning_rate": 1.9909234631903892e-06, + "loss": 45.8477, + "step": 7568 + }, + { + "epoch": 0.07164831836124232, + "grad_norm": 568.5359497070312, + "learning_rate": 1.990919341405346e-06, + "loss": 22.875, + "step": 7569 + }, + { + "epoch": 0.07165778438295738, + "grad_norm": 242.5527801513672, + "learning_rate": 1.990915218688903e-06, + "loss": 24.1094, + "step": 7570 + }, + { + "epoch": 0.07166725040467242, + "grad_norm": 247.5305938720703, + "learning_rate": 1.9909110950410646e-06, + "loss": 22.8906, + "step": 7571 + }, + { + "epoch": 0.07167671642638748, + "grad_norm": 345.92462158203125, + "learning_rate": 1.990906970461834e-06, + "loss": 20.7578, + "step": 7572 + }, + { + "epoch": 0.07168618244810254, + "grad_norm": 710.6264038085938, + "learning_rate": 1.9909028449512155e-06, + "loss": 67.75, + "step": 7573 + }, + { + "epoch": 0.07169564846981759, + "grad_norm": 3.1058802604675293, + "learning_rate": 1.9908987185092126e-06, + "loss": 0.8813, + "step": 7574 + }, + { + "epoch": 0.07170511449153265, + "grad_norm": 766.076904296875, + "learning_rate": 1.9908945911358296e-06, + "loss": 69.1719, + "step": 7575 + }, + { + "epoch": 0.0717145805132477, + "grad_norm": 811.8724975585938, + "learning_rate": 1.9908904628310695e-06, + "loss": 69.0156, + "step": 7576 + }, + { + "epoch": 0.07172404653496275, + "grad_norm": 3.0067429542541504, + "learning_rate": 1.9908863335949375e-06, + "loss": 0.9683, + "step": 7577 + }, + { + "epoch": 0.0717335125566778, + "grad_norm": 1122.7232666015625, + "learning_rate": 1.9908822034274367e-06, + "loss": 73.3125, + "step": 7578 + }, + { + "epoch": 0.07174297857839286, + "grad_norm": 422.63458251953125, + "learning_rate": 1.990878072328571e-06, + "loss": 45.0938, + "step": 7579 + }, + { + "epoch": 0.0717524446001079, + "grad_norm": 1287.688232421875, + "learning_rate": 1.9908739402983443e-06, + "loss": 55.9375, + "step": 7580 + }, + { + "epoch": 0.07176191062182297, + "grad_norm": 361.4249572753906, + "learning_rate": 1.9908698073367605e-06, + "loss": 32.7812, + "step": 7581 + }, + { + "epoch": 0.07177137664353803, + "grad_norm": 334.2077331542969, + "learning_rate": 1.9908656734438234e-06, + "loss": 52.6016, + "step": 7582 + }, + { + "epoch": 0.07178084266525307, + "grad_norm": 571.105224609375, + "learning_rate": 1.9908615386195376e-06, + "loss": 25.0312, + "step": 7583 + }, + { + "epoch": 0.07179030868696813, + "grad_norm": 889.7496337890625, + "learning_rate": 1.990857402863906e-06, + "loss": 26.625, + "step": 7584 + }, + { + "epoch": 0.07179977470868318, + "grad_norm": 849.9676513671875, + "learning_rate": 1.990853266176933e-06, + "loss": 59.3906, + "step": 7585 + }, + { + "epoch": 0.07180924073039824, + "grad_norm": 702.5534057617188, + "learning_rate": 1.9908491285586224e-06, + "loss": 36.4922, + "step": 7586 + }, + { + "epoch": 0.07181870675211328, + "grad_norm": 485.8114929199219, + "learning_rate": 1.990844990008978e-06, + "loss": 26.7266, + "step": 7587 + }, + { + "epoch": 0.07182817277382834, + "grad_norm": 538.670654296875, + "learning_rate": 1.9908408505280037e-06, + "loss": 53.2812, + "step": 7588 + }, + { + "epoch": 0.07183763879554339, + "grad_norm": 316.6219787597656, + "learning_rate": 1.990836710115704e-06, + "loss": 36.4844, + "step": 7589 + }, + { + "epoch": 0.07184710481725845, + "grad_norm": 469.3204345703125, + "learning_rate": 1.9908325687720816e-06, + "loss": 48.9375, + "step": 7590 + }, + { + "epoch": 0.07185657083897351, + "grad_norm": 2.9238390922546387, + "learning_rate": 1.990828426497141e-06, + "loss": 0.8948, + "step": 7591 + }, + { + "epoch": 0.07186603686068856, + "grad_norm": 1306.8033447265625, + "learning_rate": 1.9908242832908865e-06, + "loss": 69.6953, + "step": 7592 + }, + { + "epoch": 0.07187550288240362, + "grad_norm": 530.9364013671875, + "learning_rate": 1.990820139153322e-06, + "loss": 11.0352, + "step": 7593 + }, + { + "epoch": 0.07188496890411866, + "grad_norm": 234.44483947753906, + "learning_rate": 1.9908159940844503e-06, + "loss": 19.3672, + "step": 7594 + }, + { + "epoch": 0.07189443492583372, + "grad_norm": 551.1989135742188, + "learning_rate": 1.9908118480842762e-06, + "loss": 56.75, + "step": 7595 + }, + { + "epoch": 0.07190390094754877, + "grad_norm": 276.1125793457031, + "learning_rate": 1.9908077011528034e-06, + "loss": 24.25, + "step": 7596 + }, + { + "epoch": 0.07191336696926383, + "grad_norm": 1341.75830078125, + "learning_rate": 1.990803553290036e-06, + "loss": 42.5, + "step": 7597 + }, + { + "epoch": 0.07192283299097887, + "grad_norm": 466.9098815917969, + "learning_rate": 1.990799404495978e-06, + "loss": 16.2422, + "step": 7598 + }, + { + "epoch": 0.07193229901269393, + "grad_norm": 239.277099609375, + "learning_rate": 1.9907952547706323e-06, + "loss": 19.9062, + "step": 7599 + }, + { + "epoch": 0.071941765034409, + "grad_norm": 450.53253173828125, + "learning_rate": 1.9907911041140043e-06, + "loss": 45.5156, + "step": 7600 + }, + { + "epoch": 0.07195123105612404, + "grad_norm": 431.4561767578125, + "learning_rate": 1.9907869525260966e-06, + "loss": 31.8125, + "step": 7601 + }, + { + "epoch": 0.0719606970778391, + "grad_norm": 245.55784606933594, + "learning_rate": 1.9907828000069134e-06, + "loss": 29.5938, + "step": 7602 + }, + { + "epoch": 0.07197016309955415, + "grad_norm": 425.8849182128906, + "learning_rate": 1.9907786465564595e-06, + "loss": 22.1797, + "step": 7603 + }, + { + "epoch": 0.0719796291212692, + "grad_norm": 227.21929931640625, + "learning_rate": 1.9907744921747377e-06, + "loss": 20.4453, + "step": 7604 + }, + { + "epoch": 0.07198909514298425, + "grad_norm": 529.9819946289062, + "learning_rate": 1.9907703368617523e-06, + "loss": 30.2578, + "step": 7605 + }, + { + "epoch": 0.07199856116469931, + "grad_norm": 515.3013916015625, + "learning_rate": 1.9907661806175076e-06, + "loss": 53.1406, + "step": 7606 + }, + { + "epoch": 0.07200802718641437, + "grad_norm": 364.48419189453125, + "learning_rate": 1.9907620234420064e-06, + "loss": 30.4062, + "step": 7607 + }, + { + "epoch": 0.07201749320812942, + "grad_norm": 498.19049072265625, + "learning_rate": 1.990757865335254e-06, + "loss": 39.6172, + "step": 7608 + }, + { + "epoch": 0.07202695922984448, + "grad_norm": 450.4951477050781, + "learning_rate": 1.9907537062972536e-06, + "loss": 31.8516, + "step": 7609 + }, + { + "epoch": 0.07203642525155952, + "grad_norm": 545.3037719726562, + "learning_rate": 1.990749546328009e-06, + "loss": 39.3438, + "step": 7610 + }, + { + "epoch": 0.07204589127327458, + "grad_norm": 3.0689587593078613, + "learning_rate": 1.9907453854275243e-06, + "loss": 0.7441, + "step": 7611 + }, + { + "epoch": 0.07205535729498963, + "grad_norm": 269.17596435546875, + "learning_rate": 1.9907412235958034e-06, + "loss": 21.0703, + "step": 7612 + }, + { + "epoch": 0.07206482331670469, + "grad_norm": 390.932373046875, + "learning_rate": 1.9907370608328503e-06, + "loss": 22.3516, + "step": 7613 + }, + { + "epoch": 0.07207428933841974, + "grad_norm": 637.3209838867188, + "learning_rate": 1.9907328971386687e-06, + "loss": 41.3906, + "step": 7614 + }, + { + "epoch": 0.0720837553601348, + "grad_norm": 514.2140502929688, + "learning_rate": 1.9907287325132625e-06, + "loss": 25.4219, + "step": 7615 + }, + { + "epoch": 0.07209322138184986, + "grad_norm": 867.0182495117188, + "learning_rate": 1.990724566956636e-06, + "loss": 31.9531, + "step": 7616 + }, + { + "epoch": 0.0721026874035649, + "grad_norm": 3.556351661682129, + "learning_rate": 1.9907204004687927e-06, + "loss": 0.9634, + "step": 7617 + }, + { + "epoch": 0.07211215342527996, + "grad_norm": 272.9183349609375, + "learning_rate": 1.990716233049737e-06, + "loss": 24.5156, + "step": 7618 + }, + { + "epoch": 0.07212161944699501, + "grad_norm": 511.60369873046875, + "learning_rate": 1.990712064699472e-06, + "loss": 14.0352, + "step": 7619 + }, + { + "epoch": 0.07213108546871007, + "grad_norm": 698.2618408203125, + "learning_rate": 1.9907078954180025e-06, + "loss": 35.9688, + "step": 7620 + }, + { + "epoch": 0.07214055149042511, + "grad_norm": 517.1539916992188, + "learning_rate": 1.990703725205332e-06, + "loss": 36.4531, + "step": 7621 + }, + { + "epoch": 0.07215001751214017, + "grad_norm": 231.24635314941406, + "learning_rate": 1.9906995540614642e-06, + "loss": 22.7812, + "step": 7622 + }, + { + "epoch": 0.07215948353385522, + "grad_norm": 750.360107421875, + "learning_rate": 1.9906953819864033e-06, + "loss": 17.9688, + "step": 7623 + }, + { + "epoch": 0.07216894955557028, + "grad_norm": 229.04315185546875, + "learning_rate": 1.9906912089801536e-06, + "loss": 28.3359, + "step": 7624 + }, + { + "epoch": 0.07217841557728534, + "grad_norm": 3.4716289043426514, + "learning_rate": 1.990687035042718e-06, + "loss": 0.9473, + "step": 7625 + }, + { + "epoch": 0.07218788159900039, + "grad_norm": 568.2807006835938, + "learning_rate": 1.990682860174101e-06, + "loss": 23.3203, + "step": 7626 + }, + { + "epoch": 0.07219734762071545, + "grad_norm": 719.0282592773438, + "learning_rate": 1.9906786843743073e-06, + "loss": 23.0703, + "step": 7627 + }, + { + "epoch": 0.07220681364243049, + "grad_norm": 222.8924560546875, + "learning_rate": 1.9906745076433395e-06, + "loss": 24.7656, + "step": 7628 + }, + { + "epoch": 0.07221627966414555, + "grad_norm": 476.46466064453125, + "learning_rate": 1.9906703299812022e-06, + "loss": 41.2969, + "step": 7629 + }, + { + "epoch": 0.0722257456858606, + "grad_norm": 251.43089294433594, + "learning_rate": 1.9906661513878996e-06, + "loss": 21.0625, + "step": 7630 + }, + { + "epoch": 0.07223521170757566, + "grad_norm": 295.2666015625, + "learning_rate": 1.990661971863435e-06, + "loss": 21.5312, + "step": 7631 + }, + { + "epoch": 0.0722446777292907, + "grad_norm": 283.6393737792969, + "learning_rate": 1.9906577914078126e-06, + "loss": 18.4219, + "step": 7632 + }, + { + "epoch": 0.07225414375100576, + "grad_norm": 200.98802185058594, + "learning_rate": 1.9906536100210364e-06, + "loss": 17.0391, + "step": 7633 + }, + { + "epoch": 0.07226360977272082, + "grad_norm": 911.3287963867188, + "learning_rate": 1.99064942770311e-06, + "loss": 69.5625, + "step": 7634 + }, + { + "epoch": 0.07227307579443587, + "grad_norm": 795.3517456054688, + "learning_rate": 1.9906452444540375e-06, + "loss": 55.5938, + "step": 7635 + }, + { + "epoch": 0.07228254181615093, + "grad_norm": 1243.19140625, + "learning_rate": 1.9906410602738232e-06, + "loss": 31.4531, + "step": 7636 + }, + { + "epoch": 0.07229200783786598, + "grad_norm": 865.4872436523438, + "learning_rate": 1.9906368751624707e-06, + "loss": 53.5703, + "step": 7637 + }, + { + "epoch": 0.07230147385958104, + "grad_norm": 594.3308715820312, + "learning_rate": 1.9906326891199843e-06, + "loss": 56.9219, + "step": 7638 + }, + { + "epoch": 0.07231093988129608, + "grad_norm": 2282.688232421875, + "learning_rate": 1.990628502146367e-06, + "loss": 51.1484, + "step": 7639 + }, + { + "epoch": 0.07232040590301114, + "grad_norm": 286.97314453125, + "learning_rate": 1.9906243142416237e-06, + "loss": 20.2891, + "step": 7640 + }, + { + "epoch": 0.07232987192472619, + "grad_norm": 487.7582702636719, + "learning_rate": 1.990620125405758e-06, + "loss": 39.3906, + "step": 7641 + }, + { + "epoch": 0.07233933794644125, + "grad_norm": 256.0733337402344, + "learning_rate": 1.9906159356387737e-06, + "loss": 26.6094, + "step": 7642 + }, + { + "epoch": 0.07234880396815631, + "grad_norm": 208.0385284423828, + "learning_rate": 1.9906117449406753e-06, + "loss": 22.3828, + "step": 7643 + }, + { + "epoch": 0.07235826998987135, + "grad_norm": 453.10260009765625, + "learning_rate": 1.9906075533114657e-06, + "loss": 24.4766, + "step": 7644 + }, + { + "epoch": 0.07236773601158641, + "grad_norm": 616.23974609375, + "learning_rate": 1.99060336075115e-06, + "loss": 61.0312, + "step": 7645 + }, + { + "epoch": 0.07237720203330146, + "grad_norm": 259.3553771972656, + "learning_rate": 1.990599167259731e-06, + "loss": 23.3906, + "step": 7646 + }, + { + "epoch": 0.07238666805501652, + "grad_norm": 295.90618896484375, + "learning_rate": 1.9905949728372137e-06, + "loss": 31.0156, + "step": 7647 + }, + { + "epoch": 0.07239613407673157, + "grad_norm": 449.4529724121094, + "learning_rate": 1.990590777483601e-06, + "loss": 39.9688, + "step": 7648 + }, + { + "epoch": 0.07240560009844663, + "grad_norm": 1312.597900390625, + "learning_rate": 1.9905865811988977e-06, + "loss": 43.9688, + "step": 7649 + }, + { + "epoch": 0.07241506612016169, + "grad_norm": 307.9249572753906, + "learning_rate": 1.9905823839831077e-06, + "loss": 10.4336, + "step": 7650 + }, + { + "epoch": 0.07242453214187673, + "grad_norm": 479.5008239746094, + "learning_rate": 1.9905781858362346e-06, + "loss": 37.2266, + "step": 7651 + }, + { + "epoch": 0.07243399816359179, + "grad_norm": 386.7599792480469, + "learning_rate": 1.9905739867582824e-06, + "loss": 23.0859, + "step": 7652 + }, + { + "epoch": 0.07244346418530684, + "grad_norm": 408.5174560546875, + "learning_rate": 1.990569786749255e-06, + "loss": 30.7109, + "step": 7653 + }, + { + "epoch": 0.0724529302070219, + "grad_norm": 3.23956298828125, + "learning_rate": 1.9905655858091562e-06, + "loss": 1.0239, + "step": 7654 + }, + { + "epoch": 0.07246239622873694, + "grad_norm": 2.435913324356079, + "learning_rate": 1.9905613839379906e-06, + "loss": 0.8569, + "step": 7655 + }, + { + "epoch": 0.072471862250452, + "grad_norm": 383.08489990234375, + "learning_rate": 1.9905571811357616e-06, + "loss": 62.0938, + "step": 7656 + }, + { + "epoch": 0.07248132827216705, + "grad_norm": 335.65716552734375, + "learning_rate": 1.9905529774024737e-06, + "loss": 27.9609, + "step": 7657 + }, + { + "epoch": 0.07249079429388211, + "grad_norm": 244.8297119140625, + "learning_rate": 1.9905487727381296e-06, + "loss": 17.1953, + "step": 7658 + }, + { + "epoch": 0.07250026031559717, + "grad_norm": 742.3485717773438, + "learning_rate": 1.9905445671427346e-06, + "loss": 53.2812, + "step": 7659 + }, + { + "epoch": 0.07250972633731222, + "grad_norm": 459.9317321777344, + "learning_rate": 1.9905403606162924e-06, + "loss": 25.2812, + "step": 7660 + }, + { + "epoch": 0.07251919235902728, + "grad_norm": 3.1523725986480713, + "learning_rate": 1.9905361531588064e-06, + "loss": 0.9033, + "step": 7661 + }, + { + "epoch": 0.07252865838074232, + "grad_norm": 336.8829040527344, + "learning_rate": 1.990531944770281e-06, + "loss": 45.8281, + "step": 7662 + }, + { + "epoch": 0.07253812440245738, + "grad_norm": 368.6029968261719, + "learning_rate": 1.9905277354507195e-06, + "loss": 33.7969, + "step": 7663 + }, + { + "epoch": 0.07254759042417243, + "grad_norm": 566.5282592773438, + "learning_rate": 1.9905235252001267e-06, + "loss": 40.1094, + "step": 7664 + }, + { + "epoch": 0.07255705644588749, + "grad_norm": 184.63026428222656, + "learning_rate": 1.9905193140185066e-06, + "loss": 29.6719, + "step": 7665 + }, + { + "epoch": 0.07256652246760253, + "grad_norm": 347.4270935058594, + "learning_rate": 1.9905151019058622e-06, + "loss": 43.1719, + "step": 7666 + }, + { + "epoch": 0.0725759884893176, + "grad_norm": 632.411865234375, + "learning_rate": 1.9905108888621986e-06, + "loss": 37.0156, + "step": 7667 + }, + { + "epoch": 0.07258545451103265, + "grad_norm": 1093.432373046875, + "learning_rate": 1.9905066748875187e-06, + "loss": 71.4375, + "step": 7668 + }, + { + "epoch": 0.0725949205327477, + "grad_norm": 485.34173583984375, + "learning_rate": 1.990502459981827e-06, + "loss": 23.2891, + "step": 7669 + }, + { + "epoch": 0.07260438655446276, + "grad_norm": 2.7469990253448486, + "learning_rate": 1.9904982441451275e-06, + "loss": 0.8574, + "step": 7670 + }, + { + "epoch": 0.0726138525761778, + "grad_norm": 447.9671936035156, + "learning_rate": 1.9904940273774246e-06, + "loss": 45.7344, + "step": 7671 + }, + { + "epoch": 0.07262331859789287, + "grad_norm": 237.0879364013672, + "learning_rate": 1.990489809678721e-06, + "loss": 25.0234, + "step": 7672 + }, + { + "epoch": 0.07263278461960791, + "grad_norm": 694.0286254882812, + "learning_rate": 1.990485591049022e-06, + "loss": 27.5234, + "step": 7673 + }, + { + "epoch": 0.07264225064132297, + "grad_norm": 175.84646606445312, + "learning_rate": 1.9904813714883307e-06, + "loss": 16.6641, + "step": 7674 + }, + { + "epoch": 0.07265171666303802, + "grad_norm": 293.7280578613281, + "learning_rate": 1.9904771509966513e-06, + "loss": 27.5859, + "step": 7675 + }, + { + "epoch": 0.07266118268475308, + "grad_norm": 334.0005798339844, + "learning_rate": 1.990472929573988e-06, + "loss": 26.2656, + "step": 7676 + }, + { + "epoch": 0.07267064870646814, + "grad_norm": 322.2906188964844, + "learning_rate": 1.9904687072203445e-06, + "loss": 21.4375, + "step": 7677 + }, + { + "epoch": 0.07268011472818318, + "grad_norm": 3.3601090908050537, + "learning_rate": 1.9904644839357243e-06, + "loss": 1.0142, + "step": 7678 + }, + { + "epoch": 0.07268958074989824, + "grad_norm": 389.1446838378906, + "learning_rate": 1.9904602597201324e-06, + "loss": 34.9453, + "step": 7679 + }, + { + "epoch": 0.07269904677161329, + "grad_norm": 658.0618896484375, + "learning_rate": 1.9904560345735724e-06, + "loss": 42.4531, + "step": 7680 + }, + { + "epoch": 0.07270851279332835, + "grad_norm": 277.7979431152344, + "learning_rate": 1.9904518084960483e-06, + "loss": 26.9922, + "step": 7681 + }, + { + "epoch": 0.0727179788150434, + "grad_norm": 246.89181518554688, + "learning_rate": 1.9904475814875638e-06, + "loss": 26.8438, + "step": 7682 + }, + { + "epoch": 0.07272744483675846, + "grad_norm": 245.27906799316406, + "learning_rate": 1.9904433535481227e-06, + "loss": 21.8594, + "step": 7683 + }, + { + "epoch": 0.0727369108584735, + "grad_norm": 338.4667053222656, + "learning_rate": 1.9904391246777298e-06, + "loss": 18.8281, + "step": 7684 + }, + { + "epoch": 0.07274637688018856, + "grad_norm": 512.8969116210938, + "learning_rate": 1.990434894876388e-06, + "loss": 28.457, + "step": 7685 + }, + { + "epoch": 0.07275584290190362, + "grad_norm": 234.09576416015625, + "learning_rate": 1.990430664144102e-06, + "loss": 23.8594, + "step": 7686 + }, + { + "epoch": 0.07276530892361867, + "grad_norm": 600.8047485351562, + "learning_rate": 1.9904264324808756e-06, + "loss": 34.7734, + "step": 7687 + }, + { + "epoch": 0.07277477494533373, + "grad_norm": 280.4901428222656, + "learning_rate": 1.990422199886713e-06, + "loss": 23.2031, + "step": 7688 + }, + { + "epoch": 0.07278424096704877, + "grad_norm": 292.658447265625, + "learning_rate": 1.990417966361618e-06, + "loss": 31.0703, + "step": 7689 + }, + { + "epoch": 0.07279370698876383, + "grad_norm": 233.83648681640625, + "learning_rate": 1.9904137319055942e-06, + "loss": 18.8359, + "step": 7690 + }, + { + "epoch": 0.07280317301047888, + "grad_norm": 249.70205688476562, + "learning_rate": 1.990409496518646e-06, + "loss": 39.2344, + "step": 7691 + }, + { + "epoch": 0.07281263903219394, + "grad_norm": 254.65052795410156, + "learning_rate": 1.9904052602007774e-06, + "loss": 22.0859, + "step": 7692 + }, + { + "epoch": 0.072822105053909, + "grad_norm": 156.40245056152344, + "learning_rate": 1.9904010229519923e-06, + "loss": 21.2812, + "step": 7693 + }, + { + "epoch": 0.07283157107562405, + "grad_norm": 204.80722045898438, + "learning_rate": 1.990396784772295e-06, + "loss": 25.9844, + "step": 7694 + }, + { + "epoch": 0.0728410370973391, + "grad_norm": 3.2590298652648926, + "learning_rate": 1.9903925456616886e-06, + "loss": 0.9326, + "step": 7695 + }, + { + "epoch": 0.07285050311905415, + "grad_norm": 191.26771545410156, + "learning_rate": 1.990388305620178e-06, + "loss": 19.5469, + "step": 7696 + }, + { + "epoch": 0.07285996914076921, + "grad_norm": 372.13824462890625, + "learning_rate": 1.990384064647766e-06, + "loss": 42.9453, + "step": 7697 + }, + { + "epoch": 0.07286943516248426, + "grad_norm": 324.00885009765625, + "learning_rate": 1.9903798227444584e-06, + "loss": 31.75, + "step": 7698 + }, + { + "epoch": 0.07287890118419932, + "grad_norm": 188.19778442382812, + "learning_rate": 1.990375579910258e-06, + "loss": 19.8828, + "step": 7699 + }, + { + "epoch": 0.07288836720591436, + "grad_norm": 378.6837158203125, + "learning_rate": 1.990371336145169e-06, + "loss": 15.8125, + "step": 7700 + }, + { + "epoch": 0.07289783322762942, + "grad_norm": 950.8076171875, + "learning_rate": 1.990367091449195e-06, + "loss": 64.5469, + "step": 7701 + }, + { + "epoch": 0.07290729924934448, + "grad_norm": 701.5776977539062, + "learning_rate": 1.990362845822341e-06, + "loss": 21.3203, + "step": 7702 + }, + { + "epoch": 0.07291676527105953, + "grad_norm": 526.21484375, + "learning_rate": 1.9903585992646096e-06, + "loss": 37.125, + "step": 7703 + }, + { + "epoch": 0.07292623129277459, + "grad_norm": 258.97271728515625, + "learning_rate": 1.990354351776006e-06, + "loss": 28.2969, + "step": 7704 + }, + { + "epoch": 0.07293569731448964, + "grad_norm": 485.4208068847656, + "learning_rate": 1.9903501033565335e-06, + "loss": 56.7031, + "step": 7705 + }, + { + "epoch": 0.0729451633362047, + "grad_norm": 3.766530752182007, + "learning_rate": 1.9903458540061964e-06, + "loss": 0.813, + "step": 7706 + }, + { + "epoch": 0.07295462935791974, + "grad_norm": 556.9954833984375, + "learning_rate": 1.990341603724999e-06, + "loss": 52.0234, + "step": 7707 + }, + { + "epoch": 0.0729640953796348, + "grad_norm": 259.781982421875, + "learning_rate": 1.9903373525129443e-06, + "loss": 26.4688, + "step": 7708 + }, + { + "epoch": 0.07297356140134985, + "grad_norm": 373.9740905761719, + "learning_rate": 1.9903331003700374e-06, + "loss": 26.2344, + "step": 7709 + }, + { + "epoch": 0.07298302742306491, + "grad_norm": 448.77252197265625, + "learning_rate": 1.9903288472962815e-06, + "loss": 20.5, + "step": 7710 + }, + { + "epoch": 0.07299249344477997, + "grad_norm": 317.4896545410156, + "learning_rate": 1.990324593291681e-06, + "loss": 26.125, + "step": 7711 + }, + { + "epoch": 0.07300195946649501, + "grad_norm": 495.4125671386719, + "learning_rate": 1.9903203383562397e-06, + "loss": 58.125, + "step": 7712 + }, + { + "epoch": 0.07301142548821007, + "grad_norm": 399.5958251953125, + "learning_rate": 1.990316082489962e-06, + "loss": 46.2812, + "step": 7713 + }, + { + "epoch": 0.07302089150992512, + "grad_norm": 173.22763061523438, + "learning_rate": 1.990311825692851e-06, + "loss": 21.7422, + "step": 7714 + }, + { + "epoch": 0.07303035753164018, + "grad_norm": 204.42916870117188, + "learning_rate": 1.990307567964912e-06, + "loss": 20.6875, + "step": 7715 + }, + { + "epoch": 0.07303982355335523, + "grad_norm": 3.385699987411499, + "learning_rate": 1.990303309306148e-06, + "loss": 1.0723, + "step": 7716 + }, + { + "epoch": 0.07304928957507029, + "grad_norm": 376.9508972167969, + "learning_rate": 1.9902990497165637e-06, + "loss": 34.6719, + "step": 7717 + }, + { + "epoch": 0.07305875559678533, + "grad_norm": 3.503671169281006, + "learning_rate": 1.990294789196162e-06, + "loss": 0.9058, + "step": 7718 + }, + { + "epoch": 0.07306822161850039, + "grad_norm": 602.90966796875, + "learning_rate": 1.990290527744948e-06, + "loss": 20.7734, + "step": 7719 + }, + { + "epoch": 0.07307768764021545, + "grad_norm": 159.93524169921875, + "learning_rate": 1.9902862653629255e-06, + "loss": 16.0, + "step": 7720 + }, + { + "epoch": 0.0730871536619305, + "grad_norm": 495.773193359375, + "learning_rate": 1.990282002050098e-06, + "loss": 20.2969, + "step": 7721 + }, + { + "epoch": 0.07309661968364556, + "grad_norm": 191.4704132080078, + "learning_rate": 1.99027773780647e-06, + "loss": 22.4297, + "step": 7722 + }, + { + "epoch": 0.0731060857053606, + "grad_norm": 385.458251953125, + "learning_rate": 1.9902734726320455e-06, + "loss": 44.7188, + "step": 7723 + }, + { + "epoch": 0.07311555172707566, + "grad_norm": 495.4996643066406, + "learning_rate": 1.990269206526828e-06, + "loss": 21.8984, + "step": 7724 + }, + { + "epoch": 0.07312501774879071, + "grad_norm": 999.8109130859375, + "learning_rate": 1.9902649394908224e-06, + "loss": 51.8047, + "step": 7725 + }, + { + "epoch": 0.07313448377050577, + "grad_norm": 363.8932800292969, + "learning_rate": 1.9902606715240317e-06, + "loss": 30.5, + "step": 7726 + }, + { + "epoch": 0.07314394979222082, + "grad_norm": 547.3767700195312, + "learning_rate": 1.9902564026264608e-06, + "loss": 22.5781, + "step": 7727 + }, + { + "epoch": 0.07315341581393588, + "grad_norm": 486.9081115722656, + "learning_rate": 1.990252132798113e-06, + "loss": 25.5469, + "step": 7728 + }, + { + "epoch": 0.07316288183565094, + "grad_norm": 169.06141662597656, + "learning_rate": 1.9902478620389925e-06, + "loss": 20.2422, + "step": 7729 + }, + { + "epoch": 0.07317234785736598, + "grad_norm": 476.5834655761719, + "learning_rate": 1.9902435903491037e-06, + "loss": 26.3672, + "step": 7730 + }, + { + "epoch": 0.07318181387908104, + "grad_norm": 326.8877868652344, + "learning_rate": 1.99023931772845e-06, + "loss": 33.2188, + "step": 7731 + }, + { + "epoch": 0.07319127990079609, + "grad_norm": 408.9595642089844, + "learning_rate": 1.9902350441770363e-06, + "loss": 25.0938, + "step": 7732 + }, + { + "epoch": 0.07320074592251115, + "grad_norm": 502.09423828125, + "learning_rate": 1.9902307696948653e-06, + "loss": 29.1172, + "step": 7733 + }, + { + "epoch": 0.0732102119442262, + "grad_norm": 333.50738525390625, + "learning_rate": 1.9902264942819427e-06, + "loss": 18.7344, + "step": 7734 + }, + { + "epoch": 0.07321967796594125, + "grad_norm": 286.2925109863281, + "learning_rate": 1.990222217938271e-06, + "loss": 22.6094, + "step": 7735 + }, + { + "epoch": 0.07322914398765631, + "grad_norm": 3.576902389526367, + "learning_rate": 1.990217940663855e-06, + "loss": 0.978, + "step": 7736 + }, + { + "epoch": 0.07323861000937136, + "grad_norm": 215.3629608154297, + "learning_rate": 1.9902136624586987e-06, + "loss": 20.8438, + "step": 7737 + }, + { + "epoch": 0.07324807603108642, + "grad_norm": 609.0733032226562, + "learning_rate": 1.990209383322806e-06, + "loss": 23.543, + "step": 7738 + }, + { + "epoch": 0.07325754205280147, + "grad_norm": 317.9944763183594, + "learning_rate": 1.9902051032561805e-06, + "loss": 25.6406, + "step": 7739 + }, + { + "epoch": 0.07326700807451653, + "grad_norm": 252.14210510253906, + "learning_rate": 1.990200822258827e-06, + "loss": 17.8047, + "step": 7740 + }, + { + "epoch": 0.07327647409623157, + "grad_norm": 1964.086181640625, + "learning_rate": 1.990196540330749e-06, + "loss": 83.9375, + "step": 7741 + }, + { + "epoch": 0.07328594011794663, + "grad_norm": 308.60162353515625, + "learning_rate": 1.990192257471951e-06, + "loss": 20.875, + "step": 7742 + }, + { + "epoch": 0.07329540613966168, + "grad_norm": 264.1639099121094, + "learning_rate": 1.990187973682436e-06, + "loss": 18.6484, + "step": 7743 + }, + { + "epoch": 0.07330487216137674, + "grad_norm": 565.743896484375, + "learning_rate": 1.990183688962209e-06, + "loss": 22.2812, + "step": 7744 + }, + { + "epoch": 0.0733143381830918, + "grad_norm": 327.44146728515625, + "learning_rate": 1.990179403311274e-06, + "loss": 45.7031, + "step": 7745 + }, + { + "epoch": 0.07332380420480684, + "grad_norm": 907.5953979492188, + "learning_rate": 1.9901751167296346e-06, + "loss": 57.8828, + "step": 7746 + }, + { + "epoch": 0.0733332702265219, + "grad_norm": 837.8818969726562, + "learning_rate": 1.990170829217295e-06, + "loss": 50.4688, + "step": 7747 + }, + { + "epoch": 0.07334273624823695, + "grad_norm": 517.406982421875, + "learning_rate": 1.990166540774259e-06, + "loss": 32.2578, + "step": 7748 + }, + { + "epoch": 0.07335220226995201, + "grad_norm": 212.1629638671875, + "learning_rate": 1.9901622514005314e-06, + "loss": 18.1562, + "step": 7749 + }, + { + "epoch": 0.07336166829166706, + "grad_norm": 426.07757568359375, + "learning_rate": 1.9901579610961153e-06, + "loss": 21.0586, + "step": 7750 + }, + { + "epoch": 0.07337113431338212, + "grad_norm": 316.0277404785156, + "learning_rate": 1.9901536698610153e-06, + "loss": 24.1797, + "step": 7751 + }, + { + "epoch": 0.07338060033509716, + "grad_norm": 605.6529541015625, + "learning_rate": 1.990149377695235e-06, + "loss": 50.6875, + "step": 7752 + }, + { + "epoch": 0.07339006635681222, + "grad_norm": 410.1715393066406, + "learning_rate": 1.990145084598779e-06, + "loss": 39.4922, + "step": 7753 + }, + { + "epoch": 0.07339953237852728, + "grad_norm": 587.861328125, + "learning_rate": 1.990140790571651e-06, + "loss": 62.6094, + "step": 7754 + }, + { + "epoch": 0.07340899840024233, + "grad_norm": 320.0467224121094, + "learning_rate": 1.990136495613855e-06, + "loss": 26.4531, + "step": 7755 + }, + { + "epoch": 0.07341846442195739, + "grad_norm": 242.2895965576172, + "learning_rate": 1.9901321997253954e-06, + "loss": 26.0938, + "step": 7756 + }, + { + "epoch": 0.07342793044367243, + "grad_norm": 518.316650390625, + "learning_rate": 1.9901279029062757e-06, + "loss": 30.0078, + "step": 7757 + }, + { + "epoch": 0.0734373964653875, + "grad_norm": 240.3124237060547, + "learning_rate": 1.9901236051565e-06, + "loss": 32.2422, + "step": 7758 + }, + { + "epoch": 0.07344686248710254, + "grad_norm": 385.7977294921875, + "learning_rate": 1.9901193064760725e-06, + "loss": 29.7969, + "step": 7759 + }, + { + "epoch": 0.0734563285088176, + "grad_norm": 242.18356323242188, + "learning_rate": 1.9901150068649975e-06, + "loss": 25.3906, + "step": 7760 + }, + { + "epoch": 0.07346579453053265, + "grad_norm": 506.98480224609375, + "learning_rate": 1.990110706323279e-06, + "loss": 48.8594, + "step": 7761 + }, + { + "epoch": 0.0734752605522477, + "grad_norm": 1234.6990966796875, + "learning_rate": 1.9901064048509206e-06, + "loss": 53.8203, + "step": 7762 + }, + { + "epoch": 0.07348472657396277, + "grad_norm": 283.6845703125, + "learning_rate": 1.9901021024479263e-06, + "loss": 28.7773, + "step": 7763 + }, + { + "epoch": 0.07349419259567781, + "grad_norm": 587.0357055664062, + "learning_rate": 1.9900977991143007e-06, + "loss": 48.5625, + "step": 7764 + }, + { + "epoch": 0.07350365861739287, + "grad_norm": 361.07623291015625, + "learning_rate": 1.9900934948500477e-06, + "loss": 19.1719, + "step": 7765 + }, + { + "epoch": 0.07351312463910792, + "grad_norm": 389.46929931640625, + "learning_rate": 1.9900891896551714e-06, + "loss": 39.9375, + "step": 7766 + }, + { + "epoch": 0.07352259066082298, + "grad_norm": 433.7288513183594, + "learning_rate": 1.9900848835296752e-06, + "loss": 41.7969, + "step": 7767 + }, + { + "epoch": 0.07353205668253802, + "grad_norm": 195.10140991210938, + "learning_rate": 1.990080576473564e-06, + "loss": 20.5781, + "step": 7768 + }, + { + "epoch": 0.07354152270425308, + "grad_norm": 374.8475646972656, + "learning_rate": 1.9900762684868408e-06, + "loss": 36.9688, + "step": 7769 + }, + { + "epoch": 0.07355098872596813, + "grad_norm": 610.6051635742188, + "learning_rate": 1.990071959569511e-06, + "loss": 43.4219, + "step": 7770 + }, + { + "epoch": 0.07356045474768319, + "grad_norm": 1073.405517578125, + "learning_rate": 1.9900676497215777e-06, + "loss": 52.7188, + "step": 7771 + }, + { + "epoch": 0.07356992076939825, + "grad_norm": 406.59539794921875, + "learning_rate": 1.9900633389430454e-06, + "loss": 27.1836, + "step": 7772 + }, + { + "epoch": 0.0735793867911133, + "grad_norm": 345.21392822265625, + "learning_rate": 1.990059027233918e-06, + "loss": 27.5781, + "step": 7773 + }, + { + "epoch": 0.07358885281282836, + "grad_norm": 446.5315856933594, + "learning_rate": 1.990054714594199e-06, + "loss": 57.2656, + "step": 7774 + }, + { + "epoch": 0.0735983188345434, + "grad_norm": 673.4780883789062, + "learning_rate": 1.9900504010238933e-06, + "loss": 52.4219, + "step": 7775 + }, + { + "epoch": 0.07360778485625846, + "grad_norm": 507.7639465332031, + "learning_rate": 1.990046086523005e-06, + "loss": 24.5, + "step": 7776 + }, + { + "epoch": 0.07361725087797351, + "grad_norm": 440.0870056152344, + "learning_rate": 1.990041771091537e-06, + "loss": 36.0, + "step": 7777 + }, + { + "epoch": 0.07362671689968857, + "grad_norm": 484.5918884277344, + "learning_rate": 1.990037454729495e-06, + "loss": 25.4219, + "step": 7778 + }, + { + "epoch": 0.07363618292140363, + "grad_norm": 460.08935546875, + "learning_rate": 1.9900331374368816e-06, + "loss": 63.4219, + "step": 7779 + }, + { + "epoch": 0.07364564894311867, + "grad_norm": 141.5282440185547, + "learning_rate": 1.990028819213702e-06, + "loss": 20.0234, + "step": 7780 + }, + { + "epoch": 0.07365511496483373, + "grad_norm": 671.6639404296875, + "learning_rate": 1.9900245000599593e-06, + "loss": 40.1953, + "step": 7781 + }, + { + "epoch": 0.07366458098654878, + "grad_norm": 328.7576599121094, + "learning_rate": 1.990020179975658e-06, + "loss": 25.3438, + "step": 7782 + }, + { + "epoch": 0.07367404700826384, + "grad_norm": 950.1121215820312, + "learning_rate": 1.990015858960802e-06, + "loss": 62.0156, + "step": 7783 + }, + { + "epoch": 0.07368351302997889, + "grad_norm": 292.1514892578125, + "learning_rate": 1.9900115370153955e-06, + "loss": 14.6797, + "step": 7784 + }, + { + "epoch": 0.07369297905169395, + "grad_norm": 263.5529479980469, + "learning_rate": 1.990007214139443e-06, + "loss": 19.6953, + "step": 7785 + }, + { + "epoch": 0.07370244507340899, + "grad_norm": 714.4374389648438, + "learning_rate": 1.9900028903329476e-06, + "loss": 19.9297, + "step": 7786 + }, + { + "epoch": 0.07371191109512405, + "grad_norm": 336.85546875, + "learning_rate": 1.9899985655959146e-06, + "loss": 26.0625, + "step": 7787 + }, + { + "epoch": 0.07372137711683911, + "grad_norm": 3.020850419998169, + "learning_rate": 1.989994239928347e-06, + "loss": 0.9463, + "step": 7788 + }, + { + "epoch": 0.07373084313855416, + "grad_norm": 201.15914916992188, + "learning_rate": 1.9899899133302486e-06, + "loss": 21.9688, + "step": 7789 + }, + { + "epoch": 0.07374030916026922, + "grad_norm": 818.4331665039062, + "learning_rate": 1.989985585801625e-06, + "loss": 30.6562, + "step": 7790 + }, + { + "epoch": 0.07374977518198426, + "grad_norm": 529.1079711914062, + "learning_rate": 1.989981257342479e-06, + "loss": 24.5, + "step": 7791 + }, + { + "epoch": 0.07375924120369932, + "grad_norm": 517.1475830078125, + "learning_rate": 1.989976927952815e-06, + "loss": 45.1094, + "step": 7792 + }, + { + "epoch": 0.07376870722541437, + "grad_norm": 2.9409310817718506, + "learning_rate": 1.989972597632637e-06, + "loss": 0.7515, + "step": 7793 + }, + { + "epoch": 0.07377817324712943, + "grad_norm": 553.5172729492188, + "learning_rate": 1.989968266381949e-06, + "loss": 47.5625, + "step": 7794 + }, + { + "epoch": 0.07378763926884448, + "grad_norm": 844.8604125976562, + "learning_rate": 1.9899639342007555e-06, + "loss": 62.9531, + "step": 7795 + }, + { + "epoch": 0.07379710529055954, + "grad_norm": 421.3685607910156, + "learning_rate": 1.9899596010890603e-06, + "loss": 33.0156, + "step": 7796 + }, + { + "epoch": 0.0738065713122746, + "grad_norm": 334.90863037109375, + "learning_rate": 1.9899552670468672e-06, + "loss": 15.5703, + "step": 7797 + }, + { + "epoch": 0.07381603733398964, + "grad_norm": 182.38058471679688, + "learning_rate": 1.989950932074181e-06, + "loss": 21.1016, + "step": 7798 + }, + { + "epoch": 0.0738255033557047, + "grad_norm": 569.1196899414062, + "learning_rate": 1.989946596171005e-06, + "loss": 16.7109, + "step": 7799 + }, + { + "epoch": 0.07383496937741975, + "grad_norm": 250.05787658691406, + "learning_rate": 1.989942259337344e-06, + "loss": 19.2344, + "step": 7800 + }, + { + "epoch": 0.07384443539913481, + "grad_norm": 443.2502746582031, + "learning_rate": 1.989937921573201e-06, + "loss": 17.5078, + "step": 7801 + }, + { + "epoch": 0.07385390142084985, + "grad_norm": 776.9044189453125, + "learning_rate": 1.9899335828785814e-06, + "loss": 56.7266, + "step": 7802 + }, + { + "epoch": 0.07386336744256491, + "grad_norm": 207.9147186279297, + "learning_rate": 1.9899292432534883e-06, + "loss": 15.3594, + "step": 7803 + }, + { + "epoch": 0.07387283346427996, + "grad_norm": 286.68994140625, + "learning_rate": 1.9899249026979265e-06, + "loss": 25.2578, + "step": 7804 + }, + { + "epoch": 0.07388229948599502, + "grad_norm": 3.0146186351776123, + "learning_rate": 1.9899205612118994e-06, + "loss": 0.8975, + "step": 7805 + }, + { + "epoch": 0.07389176550771008, + "grad_norm": 581.9160766601562, + "learning_rate": 1.989916218795411e-06, + "loss": 46.6875, + "step": 7806 + }, + { + "epoch": 0.07390123152942513, + "grad_norm": 3.771362543106079, + "learning_rate": 1.9899118754484664e-06, + "loss": 1.1382, + "step": 7807 + }, + { + "epoch": 0.07391069755114019, + "grad_norm": 296.3580017089844, + "learning_rate": 1.9899075311710686e-06, + "loss": 26.75, + "step": 7808 + }, + { + "epoch": 0.07392016357285523, + "grad_norm": 3.740834951400757, + "learning_rate": 1.989903185963222e-06, + "loss": 0.8984, + "step": 7809 + }, + { + "epoch": 0.07392962959457029, + "grad_norm": 303.3360900878906, + "learning_rate": 1.9898988398249312e-06, + "loss": 42.1094, + "step": 7810 + }, + { + "epoch": 0.07393909561628534, + "grad_norm": 547.008544921875, + "learning_rate": 1.9898944927562e-06, + "loss": 51.9688, + "step": 7811 + }, + { + "epoch": 0.0739485616380004, + "grad_norm": 3.061126708984375, + "learning_rate": 1.989890144757032e-06, + "loss": 0.8687, + "step": 7812 + }, + { + "epoch": 0.07395802765971544, + "grad_norm": 237.3316650390625, + "learning_rate": 1.9898857958274314e-06, + "loss": 23.2344, + "step": 7813 + }, + { + "epoch": 0.0739674936814305, + "grad_norm": 250.72964477539062, + "learning_rate": 1.9898814459674032e-06, + "loss": 21.5391, + "step": 7814 + }, + { + "epoch": 0.07397695970314556, + "grad_norm": 310.0326843261719, + "learning_rate": 1.9898770951769505e-06, + "loss": 29.1094, + "step": 7815 + }, + { + "epoch": 0.07398642572486061, + "grad_norm": 186.43556213378906, + "learning_rate": 1.989872743456078e-06, + "loss": 17.4297, + "step": 7816 + }, + { + "epoch": 0.07399589174657567, + "grad_norm": 517.5050659179688, + "learning_rate": 1.989868390804789e-06, + "loss": 60.25, + "step": 7817 + }, + { + "epoch": 0.07400535776829072, + "grad_norm": 3.672152280807495, + "learning_rate": 1.9898640372230882e-06, + "loss": 0.9785, + "step": 7818 + }, + { + "epoch": 0.07401482379000578, + "grad_norm": 396.2974548339844, + "learning_rate": 1.9898596827109803e-06, + "loss": 19.9609, + "step": 7819 + }, + { + "epoch": 0.07402428981172082, + "grad_norm": 589.0237426757812, + "learning_rate": 1.989855327268468e-06, + "loss": 31.25, + "step": 7820 + }, + { + "epoch": 0.07403375583343588, + "grad_norm": 3.281907320022583, + "learning_rate": 1.9898509708955565e-06, + "loss": 0.7905, + "step": 7821 + }, + { + "epoch": 0.07404322185515094, + "grad_norm": 3.352717638015747, + "learning_rate": 1.9898466135922492e-06, + "loss": 0.8979, + "step": 7822 + }, + { + "epoch": 0.07405268787686599, + "grad_norm": 260.89349365234375, + "learning_rate": 1.9898422553585504e-06, + "loss": 31.1875, + "step": 7823 + }, + { + "epoch": 0.07406215389858105, + "grad_norm": 623.048828125, + "learning_rate": 1.9898378961944646e-06, + "loss": 60.7812, + "step": 7824 + }, + { + "epoch": 0.0740716199202961, + "grad_norm": 429.704345703125, + "learning_rate": 1.9898335360999953e-06, + "loss": 23.6953, + "step": 7825 + }, + { + "epoch": 0.07408108594201115, + "grad_norm": 429.3663330078125, + "learning_rate": 1.989829175075147e-06, + "loss": 29.3828, + "step": 7826 + }, + { + "epoch": 0.0740905519637262, + "grad_norm": 603.03759765625, + "learning_rate": 1.9898248131199235e-06, + "loss": 43.8281, + "step": 7827 + }, + { + "epoch": 0.07410001798544126, + "grad_norm": 3.305358648300171, + "learning_rate": 1.9898204502343294e-06, + "loss": 0.8311, + "step": 7828 + }, + { + "epoch": 0.0741094840071563, + "grad_norm": 521.109619140625, + "learning_rate": 1.9898160864183683e-06, + "loss": 22.4922, + "step": 7829 + }, + { + "epoch": 0.07411895002887137, + "grad_norm": 564.866943359375, + "learning_rate": 1.9898117216720445e-06, + "loss": 24.707, + "step": 7830 + }, + { + "epoch": 0.07412841605058643, + "grad_norm": 265.8131103515625, + "learning_rate": 1.989807355995362e-06, + "loss": 20.6953, + "step": 7831 + }, + { + "epoch": 0.07413788207230147, + "grad_norm": 176.88258361816406, + "learning_rate": 1.989802989388325e-06, + "loss": 15.9297, + "step": 7832 + }, + { + "epoch": 0.07414734809401653, + "grad_norm": 2.2965095043182373, + "learning_rate": 1.9897986218509373e-06, + "loss": 0.8501, + "step": 7833 + }, + { + "epoch": 0.07415681411573158, + "grad_norm": 211.15650939941406, + "learning_rate": 1.9897942533832034e-06, + "loss": 21.4062, + "step": 7834 + }, + { + "epoch": 0.07416628013744664, + "grad_norm": 358.9505310058594, + "learning_rate": 1.9897898839851275e-06, + "loss": 48.4766, + "step": 7835 + }, + { + "epoch": 0.07417574615916168, + "grad_norm": 261.1632385253906, + "learning_rate": 1.989785513656714e-06, + "loss": 25.0469, + "step": 7836 + }, + { + "epoch": 0.07418521218087674, + "grad_norm": 427.7331237792969, + "learning_rate": 1.9897811423979657e-06, + "loss": 29.5703, + "step": 7837 + }, + { + "epoch": 0.07419467820259179, + "grad_norm": 405.4223937988281, + "learning_rate": 1.989776770208888e-06, + "loss": 30.6016, + "step": 7838 + }, + { + "epoch": 0.07420414422430685, + "grad_norm": 317.26702880859375, + "learning_rate": 1.989772397089484e-06, + "loss": 13.7031, + "step": 7839 + }, + { + "epoch": 0.07421361024602191, + "grad_norm": 518.75, + "learning_rate": 1.9897680230397586e-06, + "loss": 64.2812, + "step": 7840 + }, + { + "epoch": 0.07422307626773696, + "grad_norm": 238.99493408203125, + "learning_rate": 1.9897636480597157e-06, + "loss": 22.3047, + "step": 7841 + }, + { + "epoch": 0.07423254228945202, + "grad_norm": 214.47442626953125, + "learning_rate": 1.989759272149359e-06, + "loss": 28.2031, + "step": 7842 + }, + { + "epoch": 0.07424200831116706, + "grad_norm": 1156.9456787109375, + "learning_rate": 1.9897548953086935e-06, + "loss": 34.6953, + "step": 7843 + }, + { + "epoch": 0.07425147433288212, + "grad_norm": 3.51088285446167, + "learning_rate": 1.9897505175377227e-06, + "loss": 0.9058, + "step": 7844 + }, + { + "epoch": 0.07426094035459717, + "grad_norm": 301.859375, + "learning_rate": 1.9897461388364506e-06, + "loss": 26.3906, + "step": 7845 + }, + { + "epoch": 0.07427040637631223, + "grad_norm": 258.5216369628906, + "learning_rate": 1.9897417592048818e-06, + "loss": 27.6406, + "step": 7846 + }, + { + "epoch": 0.07427987239802727, + "grad_norm": 225.61228942871094, + "learning_rate": 1.98973737864302e-06, + "loss": 20.1406, + "step": 7847 + }, + { + "epoch": 0.07428933841974233, + "grad_norm": 458.94354248046875, + "learning_rate": 1.989732997150869e-06, + "loss": 39.5703, + "step": 7848 + }, + { + "epoch": 0.0742988044414574, + "grad_norm": 269.4731140136719, + "learning_rate": 1.989728614728434e-06, + "loss": 20.0469, + "step": 7849 + }, + { + "epoch": 0.07430827046317244, + "grad_norm": 316.86767578125, + "learning_rate": 1.9897242313757185e-06, + "loss": 38.0, + "step": 7850 + }, + { + "epoch": 0.0743177364848875, + "grad_norm": 208.6240997314453, + "learning_rate": 1.9897198470927263e-06, + "loss": 20.1719, + "step": 7851 + }, + { + "epoch": 0.07432720250660255, + "grad_norm": 573.3817138671875, + "learning_rate": 1.9897154618794614e-06, + "loss": 56.7891, + "step": 7852 + }, + { + "epoch": 0.0743366685283176, + "grad_norm": 331.05816650390625, + "learning_rate": 1.989711075735929e-06, + "loss": 17.4609, + "step": 7853 + }, + { + "epoch": 0.07434613455003265, + "grad_norm": 853.7544555664062, + "learning_rate": 1.9897066886621327e-06, + "loss": 34.1016, + "step": 7854 + }, + { + "epoch": 0.07435560057174771, + "grad_norm": 532.6224365234375, + "learning_rate": 1.989702300658076e-06, + "loss": 46.1758, + "step": 7855 + }, + { + "epoch": 0.07436506659346276, + "grad_norm": 278.55242919921875, + "learning_rate": 1.9896979117237637e-06, + "loss": 25.2578, + "step": 7856 + }, + { + "epoch": 0.07437453261517782, + "grad_norm": 621.77392578125, + "learning_rate": 1.9896935218592e-06, + "loss": 35.1172, + "step": 7857 + }, + { + "epoch": 0.07438399863689288, + "grad_norm": 813.1824340820312, + "learning_rate": 1.9896891310643883e-06, + "loss": 51.2891, + "step": 7858 + }, + { + "epoch": 0.07439346465860792, + "grad_norm": 896.2973022460938, + "learning_rate": 1.9896847393393337e-06, + "loss": 46.7578, + "step": 7859 + }, + { + "epoch": 0.07440293068032298, + "grad_norm": 273.3180847167969, + "learning_rate": 1.9896803466840395e-06, + "loss": 28.5625, + "step": 7860 + }, + { + "epoch": 0.07441239670203803, + "grad_norm": 321.5802917480469, + "learning_rate": 1.9896759530985105e-06, + "loss": 25.125, + "step": 7861 + }, + { + "epoch": 0.07442186272375309, + "grad_norm": 380.7770080566406, + "learning_rate": 1.9896715585827503e-06, + "loss": 21.6016, + "step": 7862 + }, + { + "epoch": 0.07443132874546814, + "grad_norm": 1042.14892578125, + "learning_rate": 1.989667163136763e-06, + "loss": 69.875, + "step": 7863 + }, + { + "epoch": 0.0744407947671832, + "grad_norm": 3.2175018787384033, + "learning_rate": 1.9896627667605526e-06, + "loss": 0.7119, + "step": 7864 + }, + { + "epoch": 0.07445026078889826, + "grad_norm": 252.19174194335938, + "learning_rate": 1.9896583694541243e-06, + "loss": 24.0781, + "step": 7865 + }, + { + "epoch": 0.0744597268106133, + "grad_norm": 213.28167724609375, + "learning_rate": 1.9896539712174814e-06, + "loss": 18.9766, + "step": 7866 + }, + { + "epoch": 0.07446919283232836, + "grad_norm": 238.86856079101562, + "learning_rate": 1.9896495720506282e-06, + "loss": 10.8086, + "step": 7867 + }, + { + "epoch": 0.07447865885404341, + "grad_norm": 582.0234375, + "learning_rate": 1.9896451719535686e-06, + "loss": 37.4219, + "step": 7868 + }, + { + "epoch": 0.07448812487575847, + "grad_norm": 269.0195617675781, + "learning_rate": 1.9896407709263066e-06, + "loss": 35.6562, + "step": 7869 + }, + { + "epoch": 0.07449759089747351, + "grad_norm": 209.46844482421875, + "learning_rate": 1.9896363689688475e-06, + "loss": 21.6953, + "step": 7870 + }, + { + "epoch": 0.07450705691918857, + "grad_norm": 313.2518615722656, + "learning_rate": 1.9896319660811937e-06, + "loss": 21.4922, + "step": 7871 + }, + { + "epoch": 0.07451652294090362, + "grad_norm": 1030.201171875, + "learning_rate": 1.989627562263351e-06, + "loss": 38.3047, + "step": 7872 + }, + { + "epoch": 0.07452598896261868, + "grad_norm": 225.66799926757812, + "learning_rate": 1.989623157515322e-06, + "loss": 23.875, + "step": 7873 + }, + { + "epoch": 0.07453545498433374, + "grad_norm": 166.4659881591797, + "learning_rate": 1.989618751837112e-06, + "loss": 9.6055, + "step": 7874 + }, + { + "epoch": 0.07454492100604879, + "grad_norm": 188.91075134277344, + "learning_rate": 1.989614345228725e-06, + "loss": 23.2969, + "step": 7875 + }, + { + "epoch": 0.07455438702776385, + "grad_norm": 367.7387390136719, + "learning_rate": 1.9896099376901645e-06, + "loss": 20.3672, + "step": 7876 + }, + { + "epoch": 0.07456385304947889, + "grad_norm": 361.4109802246094, + "learning_rate": 1.989605529221435e-06, + "loss": 35.8438, + "step": 7877 + }, + { + "epoch": 0.07457331907119395, + "grad_norm": 528.1442260742188, + "learning_rate": 1.989601119822541e-06, + "loss": 22.6016, + "step": 7878 + }, + { + "epoch": 0.074582785092909, + "grad_norm": 329.18511962890625, + "learning_rate": 1.9895967094934863e-06, + "loss": 39.5156, + "step": 7879 + }, + { + "epoch": 0.07459225111462406, + "grad_norm": 548.5663452148438, + "learning_rate": 1.989592298234275e-06, + "loss": 32.3516, + "step": 7880 + }, + { + "epoch": 0.0746017171363391, + "grad_norm": 267.7093505859375, + "learning_rate": 1.9895878860449112e-06, + "loss": 12.4648, + "step": 7881 + }, + { + "epoch": 0.07461118315805416, + "grad_norm": 501.2581481933594, + "learning_rate": 1.9895834729253993e-06, + "loss": 41.3281, + "step": 7882 + }, + { + "epoch": 0.07462064917976922, + "grad_norm": 294.9993896484375, + "learning_rate": 1.989579058875743e-06, + "loss": 41.7344, + "step": 7883 + }, + { + "epoch": 0.07463011520148427, + "grad_norm": 273.8593444824219, + "learning_rate": 1.9895746438959474e-06, + "loss": 11.4375, + "step": 7884 + }, + { + "epoch": 0.07463958122319933, + "grad_norm": 320.9474182128906, + "learning_rate": 1.9895702279860155e-06, + "loss": 26.0391, + "step": 7885 + }, + { + "epoch": 0.07464904724491438, + "grad_norm": 280.3133544921875, + "learning_rate": 1.989565811145952e-06, + "loss": 17.0469, + "step": 7886 + }, + { + "epoch": 0.07465851326662944, + "grad_norm": 170.58656311035156, + "learning_rate": 1.989561393375761e-06, + "loss": 19.8594, + "step": 7887 + }, + { + "epoch": 0.07466797928834448, + "grad_norm": 2.653918981552124, + "learning_rate": 1.989556974675447e-06, + "loss": 0.873, + "step": 7888 + }, + { + "epoch": 0.07467744531005954, + "grad_norm": 411.6569519042969, + "learning_rate": 1.989552555045014e-06, + "loss": 43.7656, + "step": 7889 + }, + { + "epoch": 0.07468691133177459, + "grad_norm": 302.5626220703125, + "learning_rate": 1.9895481344844652e-06, + "loss": 42.3906, + "step": 7890 + }, + { + "epoch": 0.07469637735348965, + "grad_norm": 196.2335968017578, + "learning_rate": 1.989543712993806e-06, + "loss": 21.375, + "step": 7891 + }, + { + "epoch": 0.07470584337520471, + "grad_norm": 630.7767333984375, + "learning_rate": 1.98953929057304e-06, + "loss": 61.9375, + "step": 7892 + }, + { + "epoch": 0.07471530939691975, + "grad_norm": 3.2717983722686768, + "learning_rate": 1.9895348672221714e-06, + "loss": 0.9365, + "step": 7893 + }, + { + "epoch": 0.07472477541863481, + "grad_norm": 518.2737426757812, + "learning_rate": 1.9895304429412042e-06, + "loss": 39.6641, + "step": 7894 + }, + { + "epoch": 0.07473424144034986, + "grad_norm": 762.7935180664062, + "learning_rate": 1.989526017730143e-06, + "loss": 65.4609, + "step": 7895 + }, + { + "epoch": 0.07474370746206492, + "grad_norm": 296.8765563964844, + "learning_rate": 1.989521591588992e-06, + "loss": 19.1953, + "step": 7896 + }, + { + "epoch": 0.07475317348377997, + "grad_norm": 317.1935119628906, + "learning_rate": 1.9895171645177546e-06, + "loss": 21.6797, + "step": 7897 + }, + { + "epoch": 0.07476263950549503, + "grad_norm": 397.08502197265625, + "learning_rate": 1.9895127365164354e-06, + "loss": 33.4688, + "step": 7898 + }, + { + "epoch": 0.07477210552721007, + "grad_norm": 600.0338745117188, + "learning_rate": 1.989508307585039e-06, + "loss": 54.1797, + "step": 7899 + }, + { + "epoch": 0.07478157154892513, + "grad_norm": 569.1010131835938, + "learning_rate": 1.9895038777235687e-06, + "loss": 21.7188, + "step": 7900 + }, + { + "epoch": 0.07479103757064019, + "grad_norm": 374.9268798828125, + "learning_rate": 1.9894994469320294e-06, + "loss": 47.125, + "step": 7901 + }, + { + "epoch": 0.07480050359235524, + "grad_norm": 346.7905578613281, + "learning_rate": 1.989495015210425e-06, + "loss": 27.9297, + "step": 7902 + }, + { + "epoch": 0.0748099696140703, + "grad_norm": 726.725341796875, + "learning_rate": 1.98949058255876e-06, + "loss": 27.8125, + "step": 7903 + }, + { + "epoch": 0.07481943563578534, + "grad_norm": 1731.6297607421875, + "learning_rate": 1.9894861489770376e-06, + "loss": 35.2852, + "step": 7904 + }, + { + "epoch": 0.0748289016575004, + "grad_norm": 902.7050170898438, + "learning_rate": 1.989481714465263e-06, + "loss": 41.6719, + "step": 7905 + }, + { + "epoch": 0.07483836767921545, + "grad_norm": 423.06060791015625, + "learning_rate": 1.98947727902344e-06, + "loss": 25.375, + "step": 7906 + }, + { + "epoch": 0.07484783370093051, + "grad_norm": 389.37371826171875, + "learning_rate": 1.989472842651572e-06, + "loss": 23.6875, + "step": 7907 + }, + { + "epoch": 0.07485729972264557, + "grad_norm": 516.84130859375, + "learning_rate": 1.9894684053496643e-06, + "loss": 51.5859, + "step": 7908 + }, + { + "epoch": 0.07486676574436062, + "grad_norm": 1127.72119140625, + "learning_rate": 1.989463967117721e-06, + "loss": 44.5703, + "step": 7909 + }, + { + "epoch": 0.07487623176607568, + "grad_norm": 283.32952880859375, + "learning_rate": 1.989459527955746e-06, + "loss": 22.4766, + "step": 7910 + }, + { + "epoch": 0.07488569778779072, + "grad_norm": 267.3606872558594, + "learning_rate": 1.9894550878637428e-06, + "loss": 23.1562, + "step": 7911 + }, + { + "epoch": 0.07489516380950578, + "grad_norm": 479.6025085449219, + "learning_rate": 1.9894506468417166e-06, + "loss": 28.3711, + "step": 7912 + }, + { + "epoch": 0.07490462983122083, + "grad_norm": 229.2274627685547, + "learning_rate": 1.989446204889671e-06, + "loss": 23.6328, + "step": 7913 + }, + { + "epoch": 0.07491409585293589, + "grad_norm": 313.0173034667969, + "learning_rate": 1.9894417620076106e-06, + "loss": 22.2578, + "step": 7914 + }, + { + "epoch": 0.07492356187465093, + "grad_norm": 205.5279083251953, + "learning_rate": 1.9894373181955393e-06, + "loss": 23.8984, + "step": 7915 + }, + { + "epoch": 0.074933027896366, + "grad_norm": 223.147705078125, + "learning_rate": 1.989432873453461e-06, + "loss": 22.5625, + "step": 7916 + }, + { + "epoch": 0.07494249391808105, + "grad_norm": 199.80368041992188, + "learning_rate": 1.98942842778138e-06, + "loss": 24.3828, + "step": 7917 + }, + { + "epoch": 0.0749519599397961, + "grad_norm": 1625.9127197265625, + "learning_rate": 1.9894239811793012e-06, + "loss": 48.5391, + "step": 7918 + }, + { + "epoch": 0.07496142596151116, + "grad_norm": 1237.7879638671875, + "learning_rate": 1.989419533647228e-06, + "loss": 52.0625, + "step": 7919 + }, + { + "epoch": 0.0749708919832262, + "grad_norm": 331.85662841796875, + "learning_rate": 1.9894150851851647e-06, + "loss": 28.3828, + "step": 7920 + }, + { + "epoch": 0.07498035800494127, + "grad_norm": 202.0017852783203, + "learning_rate": 1.9894106357931156e-06, + "loss": 20.2656, + "step": 7921 + }, + { + "epoch": 0.07498982402665631, + "grad_norm": 462.9127502441406, + "learning_rate": 1.9894061854710853e-06, + "loss": 41.0078, + "step": 7922 + }, + { + "epoch": 0.07499929004837137, + "grad_norm": 306.69476318359375, + "learning_rate": 1.989401734219077e-06, + "loss": 20.8594, + "step": 7923 + }, + { + "epoch": 0.07500875607008642, + "grad_norm": 475.1324157714844, + "learning_rate": 1.989397282037096e-06, + "loss": 29.6172, + "step": 7924 + }, + { + "epoch": 0.07501822209180148, + "grad_norm": 448.8993835449219, + "learning_rate": 1.9893928289251453e-06, + "loss": 53.3281, + "step": 7925 + }, + { + "epoch": 0.07502768811351654, + "grad_norm": 2.9885900020599365, + "learning_rate": 1.9893883748832296e-06, + "loss": 0.939, + "step": 7926 + }, + { + "epoch": 0.07503715413523158, + "grad_norm": 1192.4188232421875, + "learning_rate": 1.989383919911354e-06, + "loss": 57.9688, + "step": 7927 + }, + { + "epoch": 0.07504662015694664, + "grad_norm": 256.52923583984375, + "learning_rate": 1.989379464009521e-06, + "loss": 24.1406, + "step": 7928 + }, + { + "epoch": 0.07505608617866169, + "grad_norm": 729.3926391601562, + "learning_rate": 1.989375007177736e-06, + "loss": 50.1562, + "step": 7929 + }, + { + "epoch": 0.07506555220037675, + "grad_norm": 279.54327392578125, + "learning_rate": 1.989370549416003e-06, + "loss": 26.2422, + "step": 7930 + }, + { + "epoch": 0.0750750182220918, + "grad_norm": 418.2335510253906, + "learning_rate": 1.9893660907243262e-06, + "loss": 24.1758, + "step": 7931 + }, + { + "epoch": 0.07508448424380686, + "grad_norm": 372.8615417480469, + "learning_rate": 1.9893616311027093e-06, + "loss": 25.8906, + "step": 7932 + }, + { + "epoch": 0.0750939502655219, + "grad_norm": 334.2088623046875, + "learning_rate": 1.989357170551157e-06, + "loss": 23.3906, + "step": 7933 + }, + { + "epoch": 0.07510341628723696, + "grad_norm": 734.1502075195312, + "learning_rate": 1.9893527090696734e-06, + "loss": 62.4375, + "step": 7934 + }, + { + "epoch": 0.07511288230895202, + "grad_norm": 174.4520721435547, + "learning_rate": 1.9893482466582624e-06, + "loss": 18.0703, + "step": 7935 + }, + { + "epoch": 0.07512234833066707, + "grad_norm": 181.28561401367188, + "learning_rate": 1.9893437833169282e-06, + "loss": 18.9922, + "step": 7936 + }, + { + "epoch": 0.07513181435238213, + "grad_norm": 221.6005401611328, + "learning_rate": 1.9893393190456756e-06, + "loss": 22.7812, + "step": 7937 + }, + { + "epoch": 0.07514128037409717, + "grad_norm": 649.4873657226562, + "learning_rate": 1.9893348538445082e-06, + "loss": 54.5312, + "step": 7938 + }, + { + "epoch": 0.07515074639581223, + "grad_norm": 3.128175973892212, + "learning_rate": 1.989330387713431e-06, + "loss": 0.7725, + "step": 7939 + }, + { + "epoch": 0.07516021241752728, + "grad_norm": 620.1820678710938, + "learning_rate": 1.9893259206524468e-06, + "loss": 35.5312, + "step": 7940 + }, + { + "epoch": 0.07516967843924234, + "grad_norm": 250.74822998046875, + "learning_rate": 1.9893214526615607e-06, + "loss": 19.8359, + "step": 7941 + }, + { + "epoch": 0.07517914446095739, + "grad_norm": 413.31646728515625, + "learning_rate": 1.989316983740777e-06, + "loss": 43.7344, + "step": 7942 + }, + { + "epoch": 0.07518861048267245, + "grad_norm": 264.57452392578125, + "learning_rate": 1.9893125138900996e-06, + "loss": 10.2891, + "step": 7943 + }, + { + "epoch": 0.0751980765043875, + "grad_norm": 386.077392578125, + "learning_rate": 1.989308043109533e-06, + "loss": 40.4219, + "step": 7944 + }, + { + "epoch": 0.07520754252610255, + "grad_norm": 320.1135559082031, + "learning_rate": 1.989303571399081e-06, + "loss": 27.7031, + "step": 7945 + }, + { + "epoch": 0.07521700854781761, + "grad_norm": 298.0207214355469, + "learning_rate": 1.989299098758748e-06, + "loss": 25.5156, + "step": 7946 + }, + { + "epoch": 0.07522647456953266, + "grad_norm": 456.23162841796875, + "learning_rate": 1.989294625188538e-06, + "loss": 25.8047, + "step": 7947 + }, + { + "epoch": 0.07523594059124772, + "grad_norm": 243.17108154296875, + "learning_rate": 1.989290150688456e-06, + "loss": 21.4531, + "step": 7948 + }, + { + "epoch": 0.07524540661296276, + "grad_norm": 577.9053955078125, + "learning_rate": 1.9892856752585055e-06, + "loss": 67.2031, + "step": 7949 + }, + { + "epoch": 0.07525487263467782, + "grad_norm": 516.8131103515625, + "learning_rate": 1.9892811988986905e-06, + "loss": 43.75, + "step": 7950 + }, + { + "epoch": 0.07526433865639288, + "grad_norm": 282.9229736328125, + "learning_rate": 1.9892767216090157e-06, + "loss": 29.4609, + "step": 7951 + }, + { + "epoch": 0.07527380467810793, + "grad_norm": 208.1815643310547, + "learning_rate": 1.989272243389485e-06, + "loss": 20.6016, + "step": 7952 + }, + { + "epoch": 0.07528327069982299, + "grad_norm": 228.89849853515625, + "learning_rate": 1.989267764240103e-06, + "loss": 23.5, + "step": 7953 + }, + { + "epoch": 0.07529273672153804, + "grad_norm": 256.4129333496094, + "learning_rate": 1.9892632841608735e-06, + "loss": 25.625, + "step": 7954 + }, + { + "epoch": 0.0753022027432531, + "grad_norm": 276.1039733886719, + "learning_rate": 1.989258803151801e-06, + "loss": 25.7422, + "step": 7955 + }, + { + "epoch": 0.07531166876496814, + "grad_norm": 238.52146911621094, + "learning_rate": 1.989254321212889e-06, + "loss": 21.8984, + "step": 7956 + }, + { + "epoch": 0.0753211347866832, + "grad_norm": 286.8876037597656, + "learning_rate": 1.989249838344143e-06, + "loss": 37.5156, + "step": 7957 + }, + { + "epoch": 0.07533060080839825, + "grad_norm": 654.6364135742188, + "learning_rate": 1.9892453545455662e-06, + "loss": 15.4023, + "step": 7958 + }, + { + "epoch": 0.07534006683011331, + "grad_norm": 719.457275390625, + "learning_rate": 1.989240869817163e-06, + "loss": 39.125, + "step": 7959 + }, + { + "epoch": 0.07534953285182837, + "grad_norm": 390.3125305175781, + "learning_rate": 1.989236384158938e-06, + "loss": 29.5078, + "step": 7960 + }, + { + "epoch": 0.07535899887354341, + "grad_norm": 369.92578125, + "learning_rate": 1.989231897570895e-06, + "loss": 27.4766, + "step": 7961 + }, + { + "epoch": 0.07536846489525847, + "grad_norm": 1748.8466796875, + "learning_rate": 1.9892274100530387e-06, + "loss": 32.2031, + "step": 7962 + }, + { + "epoch": 0.07537793091697352, + "grad_norm": 269.979248046875, + "learning_rate": 1.9892229216053727e-06, + "loss": 25.7422, + "step": 7963 + }, + { + "epoch": 0.07538739693868858, + "grad_norm": 1052.05126953125, + "learning_rate": 1.9892184322279017e-06, + "loss": 79.6953, + "step": 7964 + }, + { + "epoch": 0.07539686296040363, + "grad_norm": 2.8921620845794678, + "learning_rate": 1.989213941920629e-06, + "loss": 0.939, + "step": 7965 + }, + { + "epoch": 0.07540632898211869, + "grad_norm": 525.0552368164062, + "learning_rate": 1.9892094506835604e-06, + "loss": 54.0156, + "step": 7966 + }, + { + "epoch": 0.07541579500383373, + "grad_norm": 228.26792907714844, + "learning_rate": 1.989204958516699e-06, + "loss": 20.2109, + "step": 7967 + }, + { + "epoch": 0.07542526102554879, + "grad_norm": 937.9195556640625, + "learning_rate": 1.989200465420049e-06, + "loss": 45.25, + "step": 7968 + }, + { + "epoch": 0.07543472704726385, + "grad_norm": 494.3483581542969, + "learning_rate": 1.989195971393615e-06, + "loss": 24.1484, + "step": 7969 + }, + { + "epoch": 0.0754441930689789, + "grad_norm": 409.9561767578125, + "learning_rate": 1.9891914764374014e-06, + "loss": 36.6406, + "step": 7970 + }, + { + "epoch": 0.07545365909069396, + "grad_norm": 1180.5028076171875, + "learning_rate": 1.9891869805514122e-06, + "loss": 30.2266, + "step": 7971 + }, + { + "epoch": 0.075463125112409, + "grad_norm": 605.7735595703125, + "learning_rate": 1.9891824837356515e-06, + "loss": 44.0312, + "step": 7972 + }, + { + "epoch": 0.07547259113412406, + "grad_norm": 438.3897705078125, + "learning_rate": 1.9891779859901235e-06, + "loss": 26.0781, + "step": 7973 + }, + { + "epoch": 0.07548205715583911, + "grad_norm": 334.910400390625, + "learning_rate": 1.989173487314833e-06, + "loss": 22.0469, + "step": 7974 + }, + { + "epoch": 0.07549152317755417, + "grad_norm": 328.6864013671875, + "learning_rate": 1.989168987709783e-06, + "loss": 20.8984, + "step": 7975 + }, + { + "epoch": 0.07550098919926922, + "grad_norm": 380.2566833496094, + "learning_rate": 1.989164487174979e-06, + "loss": 9.4277, + "step": 7976 + }, + { + "epoch": 0.07551045522098428, + "grad_norm": 285.6993713378906, + "learning_rate": 1.9891599857104247e-06, + "loss": 24.3359, + "step": 7977 + }, + { + "epoch": 0.07551992124269934, + "grad_norm": 373.5867614746094, + "learning_rate": 1.989155483316124e-06, + "loss": 29.8398, + "step": 7978 + }, + { + "epoch": 0.07552938726441438, + "grad_norm": 354.5078125, + "learning_rate": 1.989150979992082e-06, + "loss": 28.9375, + "step": 7979 + }, + { + "epoch": 0.07553885328612944, + "grad_norm": 192.98170471191406, + "learning_rate": 1.989146475738302e-06, + "loss": 23.4141, + "step": 7980 + }, + { + "epoch": 0.07554831930784449, + "grad_norm": 898.0112915039062, + "learning_rate": 1.989141970554789e-06, + "loss": 81.3438, + "step": 7981 + }, + { + "epoch": 0.07555778532955955, + "grad_norm": 966.91748046875, + "learning_rate": 1.989137464441547e-06, + "loss": 27.8672, + "step": 7982 + }, + { + "epoch": 0.0755672513512746, + "grad_norm": 354.9923400878906, + "learning_rate": 1.98913295739858e-06, + "loss": 30.5391, + "step": 7983 + }, + { + "epoch": 0.07557671737298965, + "grad_norm": 519.3421020507812, + "learning_rate": 1.989128449425892e-06, + "loss": 27.6953, + "step": 7984 + }, + { + "epoch": 0.0755861833947047, + "grad_norm": 221.0620880126953, + "learning_rate": 1.9891239405234877e-06, + "loss": 21.0859, + "step": 7985 + }, + { + "epoch": 0.07559564941641976, + "grad_norm": 242.35231018066406, + "learning_rate": 1.9891194306913713e-06, + "loss": 23.9531, + "step": 7986 + }, + { + "epoch": 0.07560511543813482, + "grad_norm": 479.2976379394531, + "learning_rate": 1.9891149199295472e-06, + "loss": 31.7812, + "step": 7987 + }, + { + "epoch": 0.07561458145984987, + "grad_norm": 209.09242248535156, + "learning_rate": 1.9891104082380194e-06, + "loss": 20.5938, + "step": 7988 + }, + { + "epoch": 0.07562404748156493, + "grad_norm": 376.4866943359375, + "learning_rate": 1.989105895616792e-06, + "loss": 43.0234, + "step": 7989 + }, + { + "epoch": 0.07563351350327997, + "grad_norm": 406.54638671875, + "learning_rate": 1.98910138206587e-06, + "loss": 52.0, + "step": 7990 + }, + { + "epoch": 0.07564297952499503, + "grad_norm": 407.5090637207031, + "learning_rate": 1.989096867585256e-06, + "loss": 29.4922, + "step": 7991 + }, + { + "epoch": 0.07565244554671008, + "grad_norm": 224.98126220703125, + "learning_rate": 1.9890923521749563e-06, + "loss": 10.6914, + "step": 7992 + }, + { + "epoch": 0.07566191156842514, + "grad_norm": 308.4093933105469, + "learning_rate": 1.989087835834973e-06, + "loss": 19.4609, + "step": 7993 + }, + { + "epoch": 0.0756713775901402, + "grad_norm": 171.3807373046875, + "learning_rate": 1.9890833185653123e-06, + "loss": 22.5586, + "step": 7994 + }, + { + "epoch": 0.07568084361185524, + "grad_norm": 255.1243438720703, + "learning_rate": 1.9890788003659774e-06, + "loss": 26.7109, + "step": 7995 + }, + { + "epoch": 0.0756903096335703, + "grad_norm": 507.8326110839844, + "learning_rate": 1.989074281236973e-06, + "loss": 35.4609, + "step": 7996 + }, + { + "epoch": 0.07569977565528535, + "grad_norm": 302.8341369628906, + "learning_rate": 1.989069761178303e-06, + "loss": 32.3516, + "step": 7997 + }, + { + "epoch": 0.07570924167700041, + "grad_norm": 338.7254943847656, + "learning_rate": 1.9890652401899715e-06, + "loss": 9.1328, + "step": 7998 + }, + { + "epoch": 0.07571870769871546, + "grad_norm": 663.545166015625, + "learning_rate": 1.9890607182719833e-06, + "loss": 33.1914, + "step": 7999 + }, + { + "epoch": 0.07572817372043052, + "grad_norm": 592.913818359375, + "learning_rate": 1.9890561954243425e-06, + "loss": 51.4844, + "step": 8000 + }, + { + "epoch": 0.07573763974214556, + "grad_norm": 441.1180419921875, + "learning_rate": 1.989051671647053e-06, + "loss": 49.8906, + "step": 8001 + }, + { + "epoch": 0.07574710576386062, + "grad_norm": 379.790283203125, + "learning_rate": 1.9890471469401194e-06, + "loss": 22.9648, + "step": 8002 + }, + { + "epoch": 0.07575657178557568, + "grad_norm": 728.5665893554688, + "learning_rate": 1.9890426213035455e-06, + "loss": 86.1875, + "step": 8003 + }, + { + "epoch": 0.07576603780729073, + "grad_norm": 3.426126718521118, + "learning_rate": 1.989038094737336e-06, + "loss": 0.9629, + "step": 8004 + }, + { + "epoch": 0.07577550382900579, + "grad_norm": 253.57945251464844, + "learning_rate": 1.9890335672414953e-06, + "loss": 24.5312, + "step": 8005 + }, + { + "epoch": 0.07578496985072083, + "grad_norm": 282.7489318847656, + "learning_rate": 1.9890290388160273e-06, + "loss": 27.1641, + "step": 8006 + }, + { + "epoch": 0.0757944358724359, + "grad_norm": 196.47738647460938, + "learning_rate": 1.989024509460936e-06, + "loss": 23.0703, + "step": 8007 + }, + { + "epoch": 0.07580390189415094, + "grad_norm": 2.696985960006714, + "learning_rate": 1.989019979176226e-06, + "loss": 0.8706, + "step": 8008 + }, + { + "epoch": 0.075813367915866, + "grad_norm": 398.02581787109375, + "learning_rate": 1.9890154479619022e-06, + "loss": 33.2266, + "step": 8009 + }, + { + "epoch": 0.07582283393758105, + "grad_norm": 537.0755615234375, + "learning_rate": 1.9890109158179677e-06, + "loss": 56.0625, + "step": 8010 + }, + { + "epoch": 0.0758322999592961, + "grad_norm": 282.6112365722656, + "learning_rate": 1.9890063827444276e-06, + "loss": 26.125, + "step": 8011 + }, + { + "epoch": 0.07584176598101117, + "grad_norm": 294.68377685546875, + "learning_rate": 1.989001848741286e-06, + "loss": 27.8047, + "step": 8012 + }, + { + "epoch": 0.07585123200272621, + "grad_norm": 370.8663635253906, + "learning_rate": 1.988997313808546e-06, + "loss": 27.9688, + "step": 8013 + }, + { + "epoch": 0.07586069802444127, + "grad_norm": 355.0487976074219, + "learning_rate": 1.9889927779462136e-06, + "loss": 29.9219, + "step": 8014 + }, + { + "epoch": 0.07587016404615632, + "grad_norm": 394.17413330078125, + "learning_rate": 1.9889882411542925e-06, + "loss": 25.1953, + "step": 8015 + }, + { + "epoch": 0.07587963006787138, + "grad_norm": 470.8172607421875, + "learning_rate": 1.988983703432786e-06, + "loss": 51.9531, + "step": 8016 + }, + { + "epoch": 0.07588909608958642, + "grad_norm": 581.8789672851562, + "learning_rate": 1.9889791647817e-06, + "loss": 71.0938, + "step": 8017 + }, + { + "epoch": 0.07589856211130148, + "grad_norm": 574.8593139648438, + "learning_rate": 1.988974625201038e-06, + "loss": 48.6562, + "step": 8018 + }, + { + "epoch": 0.07590802813301653, + "grad_norm": 792.6998901367188, + "learning_rate": 1.9889700846908035e-06, + "loss": 13.5391, + "step": 8019 + }, + { + "epoch": 0.07591749415473159, + "grad_norm": 232.10244750976562, + "learning_rate": 1.9889655432510017e-06, + "loss": 18.3281, + "step": 8020 + }, + { + "epoch": 0.07592696017644665, + "grad_norm": 208.07510375976562, + "learning_rate": 1.9889610008816367e-06, + "loss": 15.5898, + "step": 8021 + }, + { + "epoch": 0.0759364261981617, + "grad_norm": 188.01132202148438, + "learning_rate": 1.9889564575827128e-06, + "loss": 16.3008, + "step": 8022 + }, + { + "epoch": 0.07594589221987676, + "grad_norm": 148.05401611328125, + "learning_rate": 1.988951913354234e-06, + "loss": 17.9062, + "step": 8023 + }, + { + "epoch": 0.0759553582415918, + "grad_norm": 764.9488525390625, + "learning_rate": 1.9889473681962046e-06, + "loss": 61.6562, + "step": 8024 + }, + { + "epoch": 0.07596482426330686, + "grad_norm": 3.507826805114746, + "learning_rate": 1.9889428221086293e-06, + "loss": 0.8862, + "step": 8025 + }, + { + "epoch": 0.07597429028502191, + "grad_norm": 194.1053924560547, + "learning_rate": 1.988938275091512e-06, + "loss": 22.9961, + "step": 8026 + }, + { + "epoch": 0.07598375630673697, + "grad_norm": 415.47894287109375, + "learning_rate": 1.988933727144857e-06, + "loss": 27.8281, + "step": 8027 + }, + { + "epoch": 0.07599322232845201, + "grad_norm": 622.7335205078125, + "learning_rate": 1.988929178268669e-06, + "loss": 60.8008, + "step": 8028 + }, + { + "epoch": 0.07600268835016707, + "grad_norm": 823.0458374023438, + "learning_rate": 1.9889246284629515e-06, + "loss": 48.7422, + "step": 8029 + }, + { + "epoch": 0.07601215437188213, + "grad_norm": 3.231740951538086, + "learning_rate": 1.988920077727709e-06, + "loss": 1.0044, + "step": 8030 + }, + { + "epoch": 0.07602162039359718, + "grad_norm": 752.1704711914062, + "learning_rate": 1.9889155260629463e-06, + "loss": 46.4062, + "step": 8031 + }, + { + "epoch": 0.07603108641531224, + "grad_norm": 247.99343872070312, + "learning_rate": 1.988910973468667e-06, + "loss": 25.7344, + "step": 8032 + }, + { + "epoch": 0.07604055243702729, + "grad_norm": 632.8496704101562, + "learning_rate": 1.988906419944876e-06, + "loss": 36.0312, + "step": 8033 + }, + { + "epoch": 0.07605001845874235, + "grad_norm": 283.29510498046875, + "learning_rate": 1.9889018654915773e-06, + "loss": 27.4922, + "step": 8034 + }, + { + "epoch": 0.07605948448045739, + "grad_norm": 152.42681884765625, + "learning_rate": 1.988897310108775e-06, + "loss": 22.2344, + "step": 8035 + }, + { + "epoch": 0.07606895050217245, + "grad_norm": 523.8509521484375, + "learning_rate": 1.988892753796474e-06, + "loss": 31.5781, + "step": 8036 + }, + { + "epoch": 0.07607841652388751, + "grad_norm": 296.7117004394531, + "learning_rate": 1.988888196554678e-06, + "loss": 23.4297, + "step": 8037 + }, + { + "epoch": 0.07608788254560256, + "grad_norm": 3.181264877319336, + "learning_rate": 1.988883638383391e-06, + "loss": 0.9409, + "step": 8038 + }, + { + "epoch": 0.07609734856731762, + "grad_norm": 233.75035095214844, + "learning_rate": 1.988879079282618e-06, + "loss": 23.1797, + "step": 8039 + }, + { + "epoch": 0.07610681458903266, + "grad_norm": 350.29534912109375, + "learning_rate": 1.988874519252363e-06, + "loss": 25.8281, + "step": 8040 + }, + { + "epoch": 0.07611628061074772, + "grad_norm": 712.0346069335938, + "learning_rate": 1.98886995829263e-06, + "loss": 39.4219, + "step": 8041 + }, + { + "epoch": 0.07612574663246277, + "grad_norm": 244.23980712890625, + "learning_rate": 1.988865396403424e-06, + "loss": 32.9062, + "step": 8042 + }, + { + "epoch": 0.07613521265417783, + "grad_norm": 487.6708679199219, + "learning_rate": 1.9888608335847486e-06, + "loss": 27.4961, + "step": 8043 + }, + { + "epoch": 0.07614467867589288, + "grad_norm": 380.0262756347656, + "learning_rate": 1.9888562698366087e-06, + "loss": 29.3594, + "step": 8044 + }, + { + "epoch": 0.07615414469760794, + "grad_norm": 255.29080200195312, + "learning_rate": 1.9888517051590077e-06, + "loss": 20.2734, + "step": 8045 + }, + { + "epoch": 0.076163610719323, + "grad_norm": 285.7001037597656, + "learning_rate": 1.9888471395519507e-06, + "loss": 12.5352, + "step": 8046 + }, + { + "epoch": 0.07617307674103804, + "grad_norm": 243.9690399169922, + "learning_rate": 1.988842573015442e-06, + "loss": 19.2109, + "step": 8047 + }, + { + "epoch": 0.0761825427627531, + "grad_norm": 686.4725952148438, + "learning_rate": 1.988838005549485e-06, + "loss": 28.4609, + "step": 8048 + }, + { + "epoch": 0.07619200878446815, + "grad_norm": 355.2594909667969, + "learning_rate": 1.988833437154085e-06, + "loss": 32.6562, + "step": 8049 + }, + { + "epoch": 0.07620147480618321, + "grad_norm": 390.173583984375, + "learning_rate": 1.988828867829246e-06, + "loss": 19.2188, + "step": 8050 + }, + { + "epoch": 0.07621094082789825, + "grad_norm": 166.8349151611328, + "learning_rate": 1.9888242975749717e-06, + "loss": 21.3516, + "step": 8051 + }, + { + "epoch": 0.07622040684961331, + "grad_norm": 368.67181396484375, + "learning_rate": 1.9888197263912675e-06, + "loss": 37.8281, + "step": 8052 + }, + { + "epoch": 0.07622987287132836, + "grad_norm": 445.1274719238281, + "learning_rate": 1.9888151542781364e-06, + "loss": 14.6445, + "step": 8053 + }, + { + "epoch": 0.07623933889304342, + "grad_norm": 238.21665954589844, + "learning_rate": 1.988810581235584e-06, + "loss": 22.8438, + "step": 8054 + }, + { + "epoch": 0.07624880491475848, + "grad_norm": 644.42626953125, + "learning_rate": 1.988806007263614e-06, + "loss": 18.1367, + "step": 8055 + }, + { + "epoch": 0.07625827093647353, + "grad_norm": 455.9514465332031, + "learning_rate": 1.9888014323622303e-06, + "loss": 46.5156, + "step": 8056 + }, + { + "epoch": 0.07626773695818859, + "grad_norm": 309.8287048339844, + "learning_rate": 1.9887968565314377e-06, + "loss": 21.6719, + "step": 8057 + }, + { + "epoch": 0.07627720297990363, + "grad_norm": 278.5891418457031, + "learning_rate": 1.9887922797712404e-06, + "loss": 15.7109, + "step": 8058 + }, + { + "epoch": 0.07628666900161869, + "grad_norm": 687.9639892578125, + "learning_rate": 1.988787702081643e-06, + "loss": 60.8828, + "step": 8059 + }, + { + "epoch": 0.07629613502333374, + "grad_norm": 298.4606628417969, + "learning_rate": 1.9887831234626488e-06, + "loss": 29.3047, + "step": 8060 + }, + { + "epoch": 0.0763056010450488, + "grad_norm": 192.7290802001953, + "learning_rate": 1.9887785439142633e-06, + "loss": 23.6562, + "step": 8061 + }, + { + "epoch": 0.07631506706676384, + "grad_norm": 526.62255859375, + "learning_rate": 1.98877396343649e-06, + "loss": 38.8438, + "step": 8062 + }, + { + "epoch": 0.0763245330884789, + "grad_norm": 331.08990478515625, + "learning_rate": 1.9887693820293336e-06, + "loss": 17.25, + "step": 8063 + }, + { + "epoch": 0.07633399911019396, + "grad_norm": 706.9811401367188, + "learning_rate": 1.9887647996927982e-06, + "loss": 25.2656, + "step": 8064 + }, + { + "epoch": 0.07634346513190901, + "grad_norm": 164.03897094726562, + "learning_rate": 1.9887602164268885e-06, + "loss": 24.8594, + "step": 8065 + }, + { + "epoch": 0.07635293115362407, + "grad_norm": 351.5257263183594, + "learning_rate": 1.9887556322316084e-06, + "loss": 55.5625, + "step": 8066 + }, + { + "epoch": 0.07636239717533912, + "grad_norm": 4.074217796325684, + "learning_rate": 1.9887510471069624e-06, + "loss": 0.8545, + "step": 8067 + }, + { + "epoch": 0.07637186319705418, + "grad_norm": 363.5827941894531, + "learning_rate": 1.9887464610529544e-06, + "loss": 21.6406, + "step": 8068 + }, + { + "epoch": 0.07638132921876922, + "grad_norm": 592.734130859375, + "learning_rate": 1.9887418740695894e-06, + "loss": 34.9844, + "step": 8069 + }, + { + "epoch": 0.07639079524048428, + "grad_norm": 163.94606018066406, + "learning_rate": 1.9887372861568714e-06, + "loss": 22.375, + "step": 8070 + }, + { + "epoch": 0.07640026126219933, + "grad_norm": 3.527019500732422, + "learning_rate": 1.9887326973148045e-06, + "loss": 0.9639, + "step": 8071 + }, + { + "epoch": 0.07640972728391439, + "grad_norm": 759.0111083984375, + "learning_rate": 1.988728107543393e-06, + "loss": 29.0312, + "step": 8072 + }, + { + "epoch": 0.07641919330562945, + "grad_norm": 537.6572875976562, + "learning_rate": 1.988723516842642e-06, + "loss": 35.0781, + "step": 8073 + }, + { + "epoch": 0.0764286593273445, + "grad_norm": 191.9508819580078, + "learning_rate": 1.9887189252125545e-06, + "loss": 22.0547, + "step": 8074 + }, + { + "epoch": 0.07643812534905955, + "grad_norm": 3.1401946544647217, + "learning_rate": 1.988714332653136e-06, + "loss": 0.9133, + "step": 8075 + }, + { + "epoch": 0.0764475913707746, + "grad_norm": 3.565115213394165, + "learning_rate": 1.98870973916439e-06, + "loss": 0.9814, + "step": 8076 + }, + { + "epoch": 0.07645705739248966, + "grad_norm": 172.58181762695312, + "learning_rate": 1.9887051447463213e-06, + "loss": 18.75, + "step": 8077 + }, + { + "epoch": 0.0764665234142047, + "grad_norm": 494.6904602050781, + "learning_rate": 1.988700549398934e-06, + "loss": 25.9688, + "step": 8078 + }, + { + "epoch": 0.07647598943591977, + "grad_norm": 922.2372436523438, + "learning_rate": 1.988695953122233e-06, + "loss": 40.7656, + "step": 8079 + }, + { + "epoch": 0.07648545545763483, + "grad_norm": 304.0807800292969, + "learning_rate": 1.9886913559162218e-06, + "loss": 30.5, + "step": 8080 + }, + { + "epoch": 0.07649492147934987, + "grad_norm": 150.90155029296875, + "learning_rate": 1.988686757780905e-06, + "loss": 20.875, + "step": 8081 + }, + { + "epoch": 0.07650438750106493, + "grad_norm": 614.43701171875, + "learning_rate": 1.988682158716287e-06, + "loss": 25.5078, + "step": 8082 + }, + { + "epoch": 0.07651385352277998, + "grad_norm": 385.2694396972656, + "learning_rate": 1.988677558722372e-06, + "loss": 48.6562, + "step": 8083 + }, + { + "epoch": 0.07652331954449504, + "grad_norm": 594.9563598632812, + "learning_rate": 1.9886729577991647e-06, + "loss": 27.5938, + "step": 8084 + }, + { + "epoch": 0.07653278556621008, + "grad_norm": 524.158203125, + "learning_rate": 1.988668355946669e-06, + "loss": 18.5547, + "step": 8085 + }, + { + "epoch": 0.07654225158792514, + "grad_norm": 337.279052734375, + "learning_rate": 1.9886637531648888e-06, + "loss": 22.1406, + "step": 8086 + }, + { + "epoch": 0.07655171760964019, + "grad_norm": 183.38528442382812, + "learning_rate": 1.9886591494538297e-06, + "loss": 20.4375, + "step": 8087 + }, + { + "epoch": 0.07656118363135525, + "grad_norm": 257.4671936035156, + "learning_rate": 1.988654544813495e-06, + "loss": 21.7188, + "step": 8088 + }, + { + "epoch": 0.07657064965307031, + "grad_norm": 262.4556884765625, + "learning_rate": 1.9886499392438895e-06, + "loss": 15.1094, + "step": 8089 + }, + { + "epoch": 0.07658011567478536, + "grad_norm": 196.37632751464844, + "learning_rate": 1.988645332745017e-06, + "loss": 24.2812, + "step": 8090 + }, + { + "epoch": 0.07658958169650042, + "grad_norm": 478.4145812988281, + "learning_rate": 1.988640725316883e-06, + "loss": 34.8672, + "step": 8091 + }, + { + "epoch": 0.07659904771821546, + "grad_norm": 216.36412048339844, + "learning_rate": 1.98863611695949e-06, + "loss": 22.7031, + "step": 8092 + }, + { + "epoch": 0.07660851373993052, + "grad_norm": 520.9180908203125, + "learning_rate": 1.9886315076728444e-06, + "loss": 46.75, + "step": 8093 + }, + { + "epoch": 0.07661797976164557, + "grad_norm": 387.60687255859375, + "learning_rate": 1.988626897456949e-06, + "loss": 52.8125, + "step": 8094 + }, + { + "epoch": 0.07662744578336063, + "grad_norm": 727.5215454101562, + "learning_rate": 1.9886222863118086e-06, + "loss": 19.0312, + "step": 8095 + }, + { + "epoch": 0.07663691180507567, + "grad_norm": 149.83523559570312, + "learning_rate": 1.9886176742374274e-06, + "loss": 24.0, + "step": 8096 + }, + { + "epoch": 0.07664637782679073, + "grad_norm": 866.6432495117188, + "learning_rate": 1.98861306123381e-06, + "loss": 58.4062, + "step": 8097 + }, + { + "epoch": 0.0766558438485058, + "grad_norm": 340.18206787109375, + "learning_rate": 1.9886084473009607e-06, + "loss": 47.9219, + "step": 8098 + }, + { + "epoch": 0.07666530987022084, + "grad_norm": 279.0538330078125, + "learning_rate": 1.988603832438884e-06, + "loss": 17.7109, + "step": 8099 + }, + { + "epoch": 0.0766747758919359, + "grad_norm": 243.5879364013672, + "learning_rate": 1.988599216647584e-06, + "loss": 26.1562, + "step": 8100 + }, + { + "epoch": 0.07668424191365095, + "grad_norm": 200.87120056152344, + "learning_rate": 1.9885945999270648e-06, + "loss": 27.5156, + "step": 8101 + }, + { + "epoch": 0.076693707935366, + "grad_norm": 314.3636779785156, + "learning_rate": 1.988589982277331e-06, + "loss": 26.5781, + "step": 8102 + }, + { + "epoch": 0.07670317395708105, + "grad_norm": 861.4793701171875, + "learning_rate": 1.988585363698387e-06, + "loss": 40.9141, + "step": 8103 + }, + { + "epoch": 0.07671263997879611, + "grad_norm": 173.4169464111328, + "learning_rate": 1.988580744190237e-06, + "loss": 21.2109, + "step": 8104 + }, + { + "epoch": 0.07672210600051116, + "grad_norm": 593.4721069335938, + "learning_rate": 1.9885761237528854e-06, + "loss": 36.2031, + "step": 8105 + }, + { + "epoch": 0.07673157202222622, + "grad_norm": 416.0846252441406, + "learning_rate": 1.988571502386337e-06, + "loss": 51.7812, + "step": 8106 + }, + { + "epoch": 0.07674103804394128, + "grad_norm": 367.0791015625, + "learning_rate": 1.988566880090595e-06, + "loss": 19.7891, + "step": 8107 + }, + { + "epoch": 0.07675050406565632, + "grad_norm": 554.1012573242188, + "learning_rate": 1.988562256865665e-06, + "loss": 65.8906, + "step": 8108 + }, + { + "epoch": 0.07675997008737138, + "grad_norm": 813.490966796875, + "learning_rate": 1.9885576327115504e-06, + "loss": 23.1094, + "step": 8109 + }, + { + "epoch": 0.07676943610908643, + "grad_norm": 387.534423828125, + "learning_rate": 1.988553007628256e-06, + "loss": 23.4844, + "step": 8110 + }, + { + "epoch": 0.07677890213080149, + "grad_norm": 349.6477355957031, + "learning_rate": 1.9885483816157863e-06, + "loss": 21.9648, + "step": 8111 + }, + { + "epoch": 0.07678836815251654, + "grad_norm": 485.4428405761719, + "learning_rate": 1.9885437546741453e-06, + "loss": 27.4688, + "step": 8112 + }, + { + "epoch": 0.0767978341742316, + "grad_norm": 3.2769696712493896, + "learning_rate": 1.9885391268033374e-06, + "loss": 0.9946, + "step": 8113 + }, + { + "epoch": 0.07680730019594664, + "grad_norm": 3.4930591583251953, + "learning_rate": 1.988534498003367e-06, + "loss": 0.877, + "step": 8114 + }, + { + "epoch": 0.0768167662176617, + "grad_norm": 426.26171875, + "learning_rate": 1.988529868274239e-06, + "loss": 44.0938, + "step": 8115 + }, + { + "epoch": 0.07682623223937676, + "grad_norm": 334.76898193359375, + "learning_rate": 1.9885252376159565e-06, + "loss": 18.0, + "step": 8116 + }, + { + "epoch": 0.07683569826109181, + "grad_norm": 808.2674560546875, + "learning_rate": 1.9885206060285248e-06, + "loss": 32.9375, + "step": 8117 + }, + { + "epoch": 0.07684516428280687, + "grad_norm": 439.7249755859375, + "learning_rate": 1.9885159735119483e-06, + "loss": 29.1406, + "step": 8118 + }, + { + "epoch": 0.07685463030452191, + "grad_norm": 248.310791015625, + "learning_rate": 1.988511340066231e-06, + "loss": 22.2109, + "step": 8119 + }, + { + "epoch": 0.07686409632623697, + "grad_norm": 712.6346435546875, + "learning_rate": 1.9885067056913773e-06, + "loss": 21.3594, + "step": 8120 + }, + { + "epoch": 0.07687356234795202, + "grad_norm": 362.2654724121094, + "learning_rate": 1.9885020703873916e-06, + "loss": 30.2812, + "step": 8121 + }, + { + "epoch": 0.07688302836966708, + "grad_norm": 178.7076416015625, + "learning_rate": 1.988497434154278e-06, + "loss": 25.4844, + "step": 8122 + }, + { + "epoch": 0.07689249439138214, + "grad_norm": 1033.59375, + "learning_rate": 1.9884927969920414e-06, + "loss": 39.1562, + "step": 8123 + }, + { + "epoch": 0.07690196041309719, + "grad_norm": 263.85577392578125, + "learning_rate": 1.988488158900686e-06, + "loss": 17.8359, + "step": 8124 + }, + { + "epoch": 0.07691142643481225, + "grad_norm": 272.1051330566406, + "learning_rate": 1.9884835198802156e-06, + "loss": 27.4688, + "step": 8125 + }, + { + "epoch": 0.07692089245652729, + "grad_norm": 259.28668212890625, + "learning_rate": 1.9884788799306358e-06, + "loss": 17.5938, + "step": 8126 + }, + { + "epoch": 0.07693035847824235, + "grad_norm": 211.7530059814453, + "learning_rate": 1.9884742390519493e-06, + "loss": 18.9453, + "step": 8127 + }, + { + "epoch": 0.0769398244999574, + "grad_norm": 460.2309265136719, + "learning_rate": 1.9884695972441617e-06, + "loss": 34.0078, + "step": 8128 + }, + { + "epoch": 0.07694929052167246, + "grad_norm": 528.303955078125, + "learning_rate": 1.988464954507277e-06, + "loss": 47.8438, + "step": 8129 + }, + { + "epoch": 0.0769587565433875, + "grad_norm": 508.80364990234375, + "learning_rate": 1.9884603108412993e-06, + "loss": 42.0, + "step": 8130 + }, + { + "epoch": 0.07696822256510256, + "grad_norm": 616.1954956054688, + "learning_rate": 1.9884556662462333e-06, + "loss": 20.3672, + "step": 8131 + }, + { + "epoch": 0.07697768858681762, + "grad_norm": 486.8760681152344, + "learning_rate": 1.9884510207220836e-06, + "loss": 44.4844, + "step": 8132 + }, + { + "epoch": 0.07698715460853267, + "grad_norm": 326.483154296875, + "learning_rate": 1.988446374268854e-06, + "loss": 29.6172, + "step": 8133 + }, + { + "epoch": 0.07699662063024773, + "grad_norm": 3.2750985622406006, + "learning_rate": 1.988441726886549e-06, + "loss": 0.8804, + "step": 8134 + }, + { + "epoch": 0.07700608665196278, + "grad_norm": 631.4591064453125, + "learning_rate": 1.988437078575173e-06, + "loss": 48.2422, + "step": 8135 + }, + { + "epoch": 0.07701555267367784, + "grad_norm": 203.1705780029297, + "learning_rate": 1.988432429334731e-06, + "loss": 28.5938, + "step": 8136 + }, + { + "epoch": 0.07702501869539288, + "grad_norm": 316.113525390625, + "learning_rate": 1.9884277791652264e-06, + "loss": 30.0312, + "step": 8137 + }, + { + "epoch": 0.07703448471710794, + "grad_norm": 204.3355255126953, + "learning_rate": 1.988423128066664e-06, + "loss": 24.4141, + "step": 8138 + }, + { + "epoch": 0.07704395073882299, + "grad_norm": 305.6619873046875, + "learning_rate": 1.988418476039048e-06, + "loss": 21.9219, + "step": 8139 + }, + { + "epoch": 0.07705341676053805, + "grad_norm": 352.6118469238281, + "learning_rate": 1.9884138230823834e-06, + "loss": 27.1641, + "step": 8140 + }, + { + "epoch": 0.07706288278225311, + "grad_norm": 418.453125, + "learning_rate": 1.988409169196674e-06, + "loss": 59.6094, + "step": 8141 + }, + { + "epoch": 0.07707234880396815, + "grad_norm": 796.5441284179688, + "learning_rate": 1.988404514381924e-06, + "loss": 40.2812, + "step": 8142 + }, + { + "epoch": 0.07708181482568321, + "grad_norm": 349.6156005859375, + "learning_rate": 1.988399858638138e-06, + "loss": 23.3984, + "step": 8143 + }, + { + "epoch": 0.07709128084739826, + "grad_norm": 305.52545166015625, + "learning_rate": 1.988395201965321e-06, + "loss": 22.9062, + "step": 8144 + }, + { + "epoch": 0.07710074686911332, + "grad_norm": 187.94371032714844, + "learning_rate": 1.988390544363476e-06, + "loss": 14.7891, + "step": 8145 + }, + { + "epoch": 0.07711021289082837, + "grad_norm": 472.0030212402344, + "learning_rate": 1.988385885832609e-06, + "loss": 52.1094, + "step": 8146 + }, + { + "epoch": 0.07711967891254343, + "grad_norm": 353.2944641113281, + "learning_rate": 1.988381226372723e-06, + "loss": 18.0938, + "step": 8147 + }, + { + "epoch": 0.07712914493425847, + "grad_norm": 384.3602600097656, + "learning_rate": 1.9883765659838236e-06, + "loss": 36.0938, + "step": 8148 + }, + { + "epoch": 0.07713861095597353, + "grad_norm": 987.880126953125, + "learning_rate": 1.9883719046659136e-06, + "loss": 45.7812, + "step": 8149 + }, + { + "epoch": 0.07714807697768859, + "grad_norm": 239.6569061279297, + "learning_rate": 1.988367242418999e-06, + "loss": 28.75, + "step": 8150 + }, + { + "epoch": 0.07715754299940364, + "grad_norm": 764.9644165039062, + "learning_rate": 1.988362579243083e-06, + "loss": 39.9453, + "step": 8151 + }, + { + "epoch": 0.0771670090211187, + "grad_norm": 669.8430786132812, + "learning_rate": 1.988357915138171e-06, + "loss": 35.4453, + "step": 8152 + }, + { + "epoch": 0.07717647504283374, + "grad_norm": 277.8525695800781, + "learning_rate": 1.9883532501042667e-06, + "loss": 13.0625, + "step": 8153 + }, + { + "epoch": 0.0771859410645488, + "grad_norm": 3.6928205490112305, + "learning_rate": 1.9883485841413746e-06, + "loss": 1.0688, + "step": 8154 + }, + { + "epoch": 0.07719540708626385, + "grad_norm": 322.11395263671875, + "learning_rate": 1.988343917249499e-06, + "loss": 30.1797, + "step": 8155 + }, + { + "epoch": 0.07720487310797891, + "grad_norm": 716.9779052734375, + "learning_rate": 1.988339249428644e-06, + "loss": 35.9141, + "step": 8156 + }, + { + "epoch": 0.07721433912969396, + "grad_norm": 260.8818664550781, + "learning_rate": 1.9883345806788153e-06, + "loss": 29.2188, + "step": 8157 + }, + { + "epoch": 0.07722380515140902, + "grad_norm": 730.1019897460938, + "learning_rate": 1.988329911000016e-06, + "loss": 24.2344, + "step": 8158 + }, + { + "epoch": 0.07723327117312408, + "grad_norm": 361.70440673828125, + "learning_rate": 1.9883252403922505e-06, + "loss": 21.4844, + "step": 8159 + }, + { + "epoch": 0.07724273719483912, + "grad_norm": 264.6295471191406, + "learning_rate": 1.988320568855524e-06, + "loss": 21.5625, + "step": 8160 + }, + { + "epoch": 0.07725220321655418, + "grad_norm": 290.17889404296875, + "learning_rate": 1.9883158963898407e-06, + "loss": 21.5781, + "step": 8161 + }, + { + "epoch": 0.07726166923826923, + "grad_norm": 166.3005828857422, + "learning_rate": 1.9883112229952043e-06, + "loss": 18.8516, + "step": 8162 + }, + { + "epoch": 0.07727113525998429, + "grad_norm": 378.1297912597656, + "learning_rate": 1.9883065486716197e-06, + "loss": 45.3281, + "step": 8163 + }, + { + "epoch": 0.07728060128169933, + "grad_norm": 2.7304036617279053, + "learning_rate": 1.988301873419091e-06, + "loss": 0.8999, + "step": 8164 + }, + { + "epoch": 0.0772900673034144, + "grad_norm": 1109.22900390625, + "learning_rate": 1.9882971972376236e-06, + "loss": 35.0547, + "step": 8165 + }, + { + "epoch": 0.07729953332512945, + "grad_norm": 347.8614807128906, + "learning_rate": 1.9882925201272206e-06, + "loss": 34.9219, + "step": 8166 + }, + { + "epoch": 0.0773089993468445, + "grad_norm": 190.9130859375, + "learning_rate": 1.988287842087887e-06, + "loss": 22.4297, + "step": 8167 + }, + { + "epoch": 0.07731846536855956, + "grad_norm": 694.0296020507812, + "learning_rate": 1.988283163119627e-06, + "loss": 55.4609, + "step": 8168 + }, + { + "epoch": 0.0773279313902746, + "grad_norm": 190.91978454589844, + "learning_rate": 1.9882784832224453e-06, + "loss": 19.0234, + "step": 8169 + }, + { + "epoch": 0.07733739741198967, + "grad_norm": 427.32794189453125, + "learning_rate": 1.9882738023963463e-06, + "loss": 22.4297, + "step": 8170 + }, + { + "epoch": 0.07734686343370471, + "grad_norm": 500.5888977050781, + "learning_rate": 1.9882691206413334e-06, + "loss": 24.7109, + "step": 8171 + }, + { + "epoch": 0.07735632945541977, + "grad_norm": 1659.0382080078125, + "learning_rate": 1.9882644379574125e-06, + "loss": 59.875, + "step": 8172 + }, + { + "epoch": 0.07736579547713482, + "grad_norm": 464.40777587890625, + "learning_rate": 1.9882597543445874e-06, + "loss": 28.7969, + "step": 8173 + }, + { + "epoch": 0.07737526149884988, + "grad_norm": 887.5704956054688, + "learning_rate": 1.9882550698028623e-06, + "loss": 35.9688, + "step": 8174 + }, + { + "epoch": 0.07738472752056494, + "grad_norm": 321.1134338378906, + "learning_rate": 1.988250384332241e-06, + "loss": 16.8398, + "step": 8175 + }, + { + "epoch": 0.07739419354227998, + "grad_norm": 372.7459411621094, + "learning_rate": 1.9882456979327297e-06, + "loss": 46.1719, + "step": 8176 + }, + { + "epoch": 0.07740365956399504, + "grad_norm": 331.54815673828125, + "learning_rate": 1.9882410106043315e-06, + "loss": 40.1094, + "step": 8177 + }, + { + "epoch": 0.07741312558571009, + "grad_norm": 195.5301055908203, + "learning_rate": 1.9882363223470507e-06, + "loss": 20.2344, + "step": 8178 + }, + { + "epoch": 0.07742259160742515, + "grad_norm": 299.5621032714844, + "learning_rate": 1.9882316331608924e-06, + "loss": 21.2617, + "step": 8179 + }, + { + "epoch": 0.0774320576291402, + "grad_norm": 450.919189453125, + "learning_rate": 1.9882269430458604e-06, + "loss": 23.4844, + "step": 8180 + }, + { + "epoch": 0.07744152365085526, + "grad_norm": 3.2459018230438232, + "learning_rate": 1.9882222520019593e-06, + "loss": 0.9629, + "step": 8181 + }, + { + "epoch": 0.0774509896725703, + "grad_norm": 252.8109130859375, + "learning_rate": 1.988217560029194e-06, + "loss": 21.5938, + "step": 8182 + }, + { + "epoch": 0.07746045569428536, + "grad_norm": 498.695068359375, + "learning_rate": 1.988212867127568e-06, + "loss": 36.2188, + "step": 8183 + }, + { + "epoch": 0.07746992171600042, + "grad_norm": 517.5072021484375, + "learning_rate": 1.988208173297086e-06, + "loss": 40.5625, + "step": 8184 + }, + { + "epoch": 0.07747938773771547, + "grad_norm": 1702.565673828125, + "learning_rate": 1.9882034785377533e-06, + "loss": 16.6172, + "step": 8185 + }, + { + "epoch": 0.07748885375943053, + "grad_norm": 195.5110626220703, + "learning_rate": 1.988198782849573e-06, + "loss": 22.7578, + "step": 8186 + }, + { + "epoch": 0.07749831978114557, + "grad_norm": 525.4268798828125, + "learning_rate": 1.9881940862325506e-06, + "loss": 44.5625, + "step": 8187 + }, + { + "epoch": 0.07750778580286063, + "grad_norm": 474.8701171875, + "learning_rate": 1.98818938868669e-06, + "loss": 42.375, + "step": 8188 + }, + { + "epoch": 0.07751725182457568, + "grad_norm": 551.1829223632812, + "learning_rate": 1.9881846902119954e-06, + "loss": 48.375, + "step": 8189 + }, + { + "epoch": 0.07752671784629074, + "grad_norm": 447.24420166015625, + "learning_rate": 1.9881799908084717e-06, + "loss": 35.4375, + "step": 8190 + }, + { + "epoch": 0.07753618386800579, + "grad_norm": 2.679389238357544, + "learning_rate": 1.9881752904761225e-06, + "loss": 0.9023, + "step": 8191 + }, + { + "epoch": 0.07754564988972085, + "grad_norm": 688.9622802734375, + "learning_rate": 1.9881705892149535e-06, + "loss": 53.8672, + "step": 8192 + }, + { + "epoch": 0.0775551159114359, + "grad_norm": 518.5081176757812, + "learning_rate": 1.9881658870249683e-06, + "loss": 38.5703, + "step": 8193 + }, + { + "epoch": 0.07756458193315095, + "grad_norm": 896.4820556640625, + "learning_rate": 1.9881611839061713e-06, + "loss": 76.0781, + "step": 8194 + }, + { + "epoch": 0.07757404795486601, + "grad_norm": 218.35986328125, + "learning_rate": 1.988156479858567e-06, + "loss": 20.0547, + "step": 8195 + }, + { + "epoch": 0.07758351397658106, + "grad_norm": 541.495849609375, + "learning_rate": 1.9881517748821603e-06, + "loss": 23.7422, + "step": 8196 + }, + { + "epoch": 0.07759297999829612, + "grad_norm": 687.8442993164062, + "learning_rate": 1.988147068976955e-06, + "loss": 44.125, + "step": 8197 + }, + { + "epoch": 0.07760244602001116, + "grad_norm": 225.2308349609375, + "learning_rate": 1.9881423621429557e-06, + "loss": 23.9297, + "step": 8198 + }, + { + "epoch": 0.07761191204172622, + "grad_norm": 275.5953369140625, + "learning_rate": 1.988137654380167e-06, + "loss": 31.625, + "step": 8199 + }, + { + "epoch": 0.07762137806344127, + "grad_norm": 820.475341796875, + "learning_rate": 1.988132945688593e-06, + "loss": 59.7109, + "step": 8200 + }, + { + "epoch": 0.07763084408515633, + "grad_norm": 675.160400390625, + "learning_rate": 1.9881282360682383e-06, + "loss": 23.2578, + "step": 8201 + }, + { + "epoch": 0.07764031010687139, + "grad_norm": 563.4215698242188, + "learning_rate": 1.9881235255191073e-06, + "loss": 46.6641, + "step": 8202 + }, + { + "epoch": 0.07764977612858644, + "grad_norm": 1079.9742431640625, + "learning_rate": 1.9881188140412047e-06, + "loss": 57.2734, + "step": 8203 + }, + { + "epoch": 0.0776592421503015, + "grad_norm": 235.62661743164062, + "learning_rate": 1.9881141016345343e-06, + "loss": 19.6484, + "step": 8204 + }, + { + "epoch": 0.07766870817201654, + "grad_norm": 445.4721984863281, + "learning_rate": 1.9881093882991016e-06, + "loss": 53.1562, + "step": 8205 + }, + { + "epoch": 0.0776781741937316, + "grad_norm": 382.2731628417969, + "learning_rate": 1.9881046740349095e-06, + "loss": 54.4844, + "step": 8206 + }, + { + "epoch": 0.07768764021544665, + "grad_norm": 295.4429931640625, + "learning_rate": 1.988099958841964e-06, + "loss": 19.8359, + "step": 8207 + }, + { + "epoch": 0.07769710623716171, + "grad_norm": 423.2117614746094, + "learning_rate": 1.9880952427202685e-06, + "loss": 44.6523, + "step": 8208 + }, + { + "epoch": 0.07770657225887677, + "grad_norm": 661.9795532226562, + "learning_rate": 1.9880905256698276e-06, + "loss": 24.7812, + "step": 8209 + }, + { + "epoch": 0.07771603828059181, + "grad_norm": 222.7840118408203, + "learning_rate": 1.988085807690646e-06, + "loss": 22.875, + "step": 8210 + }, + { + "epoch": 0.07772550430230687, + "grad_norm": 210.26768493652344, + "learning_rate": 1.9880810887827284e-06, + "loss": 19.1406, + "step": 8211 + }, + { + "epoch": 0.07773497032402192, + "grad_norm": 189.1550750732422, + "learning_rate": 1.9880763689460785e-06, + "loss": 24.9844, + "step": 8212 + }, + { + "epoch": 0.07774443634573698, + "grad_norm": 464.6105041503906, + "learning_rate": 1.988071648180701e-06, + "loss": 34.2109, + "step": 8213 + }, + { + "epoch": 0.07775390236745203, + "grad_norm": 460.5853576660156, + "learning_rate": 1.9880669264866007e-06, + "loss": 31.9688, + "step": 8214 + }, + { + "epoch": 0.07776336838916709, + "grad_norm": 397.88507080078125, + "learning_rate": 1.9880622038637816e-06, + "loss": 25.9844, + "step": 8215 + }, + { + "epoch": 0.07777283441088213, + "grad_norm": 397.10394287109375, + "learning_rate": 1.9880574803122485e-06, + "loss": 23.5078, + "step": 8216 + }, + { + "epoch": 0.07778230043259719, + "grad_norm": 3.3290598392486572, + "learning_rate": 1.9880527558320056e-06, + "loss": 0.8882, + "step": 8217 + }, + { + "epoch": 0.07779176645431225, + "grad_norm": 457.680908203125, + "learning_rate": 1.988048030423057e-06, + "loss": 34.0781, + "step": 8218 + }, + { + "epoch": 0.0778012324760273, + "grad_norm": 438.16156005859375, + "learning_rate": 1.9880433040854083e-06, + "loss": 16.332, + "step": 8219 + }, + { + "epoch": 0.07781069849774236, + "grad_norm": 650.6170654296875, + "learning_rate": 1.9880385768190623e-06, + "loss": 62.2188, + "step": 8220 + }, + { + "epoch": 0.0778201645194574, + "grad_norm": 339.4599304199219, + "learning_rate": 1.988033848624025e-06, + "loss": 23.6562, + "step": 8221 + }, + { + "epoch": 0.07782963054117246, + "grad_norm": 870.8787231445312, + "learning_rate": 1.9880291195002997e-06, + "loss": 47.5742, + "step": 8222 + }, + { + "epoch": 0.07783909656288751, + "grad_norm": 596.0487670898438, + "learning_rate": 1.9880243894478912e-06, + "loss": 21.4844, + "step": 8223 + }, + { + "epoch": 0.07784856258460257, + "grad_norm": 283.40576171875, + "learning_rate": 1.9880196584668043e-06, + "loss": 21.2812, + "step": 8224 + }, + { + "epoch": 0.07785802860631762, + "grad_norm": 547.9681396484375, + "learning_rate": 1.9880149265570436e-06, + "loss": 26.9141, + "step": 8225 + }, + { + "epoch": 0.07786749462803268, + "grad_norm": 518.9537963867188, + "learning_rate": 1.9880101937186125e-06, + "loss": 12.9297, + "step": 8226 + }, + { + "epoch": 0.07787696064974774, + "grad_norm": 911.3694458007812, + "learning_rate": 1.9880054599515164e-06, + "loss": 74.6562, + "step": 8227 + }, + { + "epoch": 0.07788642667146278, + "grad_norm": 961.8978881835938, + "learning_rate": 1.9880007252557593e-06, + "loss": 68.1172, + "step": 8228 + }, + { + "epoch": 0.07789589269317784, + "grad_norm": 437.2579650878906, + "learning_rate": 1.987995989631346e-06, + "loss": 35.1875, + "step": 8229 + }, + { + "epoch": 0.07790535871489289, + "grad_norm": 318.5867004394531, + "learning_rate": 1.9879912530782805e-06, + "loss": 26.1406, + "step": 8230 + }, + { + "epoch": 0.07791482473660795, + "grad_norm": 313.99542236328125, + "learning_rate": 1.987986515596568e-06, + "loss": 26.1797, + "step": 8231 + }, + { + "epoch": 0.077924290758323, + "grad_norm": 3.7438199520111084, + "learning_rate": 1.987981777186212e-06, + "loss": 0.8804, + "step": 8232 + }, + { + "epoch": 0.07793375678003805, + "grad_norm": 2.867356061935425, + "learning_rate": 1.987977037847217e-06, + "loss": 0.8647, + "step": 8233 + }, + { + "epoch": 0.0779432228017531, + "grad_norm": 358.2580261230469, + "learning_rate": 1.9879722975795887e-06, + "loss": 19.6641, + "step": 8234 + }, + { + "epoch": 0.07795268882346816, + "grad_norm": 549.95361328125, + "learning_rate": 1.98796755638333e-06, + "loss": 36.4141, + "step": 8235 + }, + { + "epoch": 0.07796215484518322, + "grad_norm": 509.61083984375, + "learning_rate": 1.987962814258447e-06, + "loss": 54.6406, + "step": 8236 + }, + { + "epoch": 0.07797162086689827, + "grad_norm": 453.89654541015625, + "learning_rate": 1.987958071204942e-06, + "loss": 27.4375, + "step": 8237 + }, + { + "epoch": 0.07798108688861333, + "grad_norm": 2.6008594036102295, + "learning_rate": 1.987953327222822e-06, + "loss": 0.8257, + "step": 8238 + }, + { + "epoch": 0.07799055291032837, + "grad_norm": 166.3528289794922, + "learning_rate": 1.9879485823120888e-06, + "loss": 20.0391, + "step": 8239 + }, + { + "epoch": 0.07800001893204343, + "grad_norm": 258.9320373535156, + "learning_rate": 1.9879438364727492e-06, + "loss": 17.9492, + "step": 8240 + }, + { + "epoch": 0.07800948495375848, + "grad_norm": 274.36846923828125, + "learning_rate": 1.9879390897048062e-06, + "loss": 22.0547, + "step": 8241 + }, + { + "epoch": 0.07801895097547354, + "grad_norm": 397.2615966796875, + "learning_rate": 1.987934342008265e-06, + "loss": 46.6406, + "step": 8242 + }, + { + "epoch": 0.07802841699718858, + "grad_norm": 263.95281982421875, + "learning_rate": 1.9879295933831297e-06, + "loss": 20.1641, + "step": 8243 + }, + { + "epoch": 0.07803788301890364, + "grad_norm": 458.8432922363281, + "learning_rate": 1.9879248438294046e-06, + "loss": 24.7031, + "step": 8244 + }, + { + "epoch": 0.0780473490406187, + "grad_norm": 645.1764526367188, + "learning_rate": 1.9879200933470946e-06, + "loss": 28.875, + "step": 8245 + }, + { + "epoch": 0.07805681506233375, + "grad_norm": 601.5708618164062, + "learning_rate": 1.987915341936204e-06, + "loss": 30.75, + "step": 8246 + }, + { + "epoch": 0.07806628108404881, + "grad_norm": 335.3727722167969, + "learning_rate": 1.9879105895967375e-06, + "loss": 14.625, + "step": 8247 + }, + { + "epoch": 0.07807574710576386, + "grad_norm": 452.1387634277344, + "learning_rate": 1.987905836328699e-06, + "loss": 20.6523, + "step": 8248 + }, + { + "epoch": 0.07808521312747892, + "grad_norm": 507.8369445800781, + "learning_rate": 1.987901082132093e-06, + "loss": 14.3438, + "step": 8249 + }, + { + "epoch": 0.07809467914919396, + "grad_norm": 361.8849182128906, + "learning_rate": 1.9878963270069245e-06, + "loss": 21.0469, + "step": 8250 + }, + { + "epoch": 0.07810414517090902, + "grad_norm": 209.7782745361328, + "learning_rate": 1.987891570953198e-06, + "loss": 15.1133, + "step": 8251 + }, + { + "epoch": 0.07811361119262408, + "grad_norm": 506.6585693359375, + "learning_rate": 1.9878868139709174e-06, + "loss": 52.75, + "step": 8252 + }, + { + "epoch": 0.07812307721433913, + "grad_norm": 737.6630249023438, + "learning_rate": 1.9878820560600874e-06, + "loss": 32.4453, + "step": 8253 + }, + { + "epoch": 0.07813254323605419, + "grad_norm": 216.17642211914062, + "learning_rate": 1.987877297220713e-06, + "loss": 27.2422, + "step": 8254 + }, + { + "epoch": 0.07814200925776923, + "grad_norm": 465.1225891113281, + "learning_rate": 1.987872537452798e-06, + "loss": 38.3125, + "step": 8255 + }, + { + "epoch": 0.0781514752794843, + "grad_norm": 245.48377990722656, + "learning_rate": 1.9878677767563467e-06, + "loss": 15.9375, + "step": 8256 + }, + { + "epoch": 0.07816094130119934, + "grad_norm": 241.1619110107422, + "learning_rate": 1.9878630151313645e-06, + "loss": 19.4219, + "step": 8257 + }, + { + "epoch": 0.0781704073229144, + "grad_norm": 3.9483375549316406, + "learning_rate": 1.987858252577855e-06, + "loss": 1.0554, + "step": 8258 + }, + { + "epoch": 0.07817987334462945, + "grad_norm": 327.32440185546875, + "learning_rate": 1.987853489095823e-06, + "loss": 22.0, + "step": 8259 + }, + { + "epoch": 0.0781893393663445, + "grad_norm": 303.9170837402344, + "learning_rate": 1.987848724685273e-06, + "loss": 21.9141, + "step": 8260 + }, + { + "epoch": 0.07819880538805957, + "grad_norm": 309.4626770019531, + "learning_rate": 1.9878439593462096e-06, + "loss": 24.0312, + "step": 8261 + }, + { + "epoch": 0.07820827140977461, + "grad_norm": 3.0832345485687256, + "learning_rate": 1.987839193078637e-06, + "loss": 1.0459, + "step": 8262 + }, + { + "epoch": 0.07821773743148967, + "grad_norm": 167.67681884765625, + "learning_rate": 1.9878344258825597e-06, + "loss": 23.8438, + "step": 8263 + }, + { + "epoch": 0.07822720345320472, + "grad_norm": 199.7349853515625, + "learning_rate": 1.987829657757983e-06, + "loss": 19.6484, + "step": 8264 + }, + { + "epoch": 0.07823666947491978, + "grad_norm": 2.7938594818115234, + "learning_rate": 1.9878248887049096e-06, + "loss": 0.8325, + "step": 8265 + }, + { + "epoch": 0.07824613549663482, + "grad_norm": 336.162353515625, + "learning_rate": 1.987820118723346e-06, + "loss": 21.1562, + "step": 8266 + }, + { + "epoch": 0.07825560151834988, + "grad_norm": 287.31170654296875, + "learning_rate": 1.9878153478132953e-06, + "loss": 17.1484, + "step": 8267 + }, + { + "epoch": 0.07826506754006493, + "grad_norm": 206.2054443359375, + "learning_rate": 1.9878105759747623e-06, + "loss": 16.707, + "step": 8268 + }, + { + "epoch": 0.07827453356177999, + "grad_norm": 3.7213282585144043, + "learning_rate": 1.987805803207752e-06, + "loss": 0.9614, + "step": 8269 + }, + { + "epoch": 0.07828399958349505, + "grad_norm": 366.2005310058594, + "learning_rate": 1.9878010295122682e-06, + "loss": 30.9453, + "step": 8270 + }, + { + "epoch": 0.0782934656052101, + "grad_norm": 476.2554016113281, + "learning_rate": 1.9877962548883157e-06, + "loss": 21.3047, + "step": 8271 + }, + { + "epoch": 0.07830293162692516, + "grad_norm": 421.9985656738281, + "learning_rate": 1.9877914793358993e-06, + "loss": 53.5469, + "step": 8272 + }, + { + "epoch": 0.0783123976486402, + "grad_norm": 681.7072143554688, + "learning_rate": 1.9877867028550227e-06, + "loss": 57.4844, + "step": 8273 + }, + { + "epoch": 0.07832186367035526, + "grad_norm": 664.4384155273438, + "learning_rate": 1.987781925445691e-06, + "loss": 43.5625, + "step": 8274 + }, + { + "epoch": 0.07833132969207031, + "grad_norm": 2.41931414604187, + "learning_rate": 1.987777147107909e-06, + "loss": 0.8208, + "step": 8275 + }, + { + "epoch": 0.07834079571378537, + "grad_norm": 471.935302734375, + "learning_rate": 1.9877723678416806e-06, + "loss": 34.6992, + "step": 8276 + }, + { + "epoch": 0.07835026173550041, + "grad_norm": 213.3160858154297, + "learning_rate": 1.98776758764701e-06, + "loss": 17.9844, + "step": 8277 + }, + { + "epoch": 0.07835972775721547, + "grad_norm": 271.3631896972656, + "learning_rate": 1.9877628065239024e-06, + "loss": 23.8672, + "step": 8278 + }, + { + "epoch": 0.07836919377893053, + "grad_norm": 883.251220703125, + "learning_rate": 1.9877580244723624e-06, + "loss": 37.0547, + "step": 8279 + }, + { + "epoch": 0.07837865980064558, + "grad_norm": 700.646484375, + "learning_rate": 1.9877532414923933e-06, + "loss": 32.0, + "step": 8280 + }, + { + "epoch": 0.07838812582236064, + "grad_norm": 471.791015625, + "learning_rate": 1.9877484575840012e-06, + "loss": 43.1953, + "step": 8281 + }, + { + "epoch": 0.07839759184407569, + "grad_norm": 247.42515563964844, + "learning_rate": 1.9877436727471894e-06, + "loss": 22.1953, + "step": 8282 + }, + { + "epoch": 0.07840705786579075, + "grad_norm": 342.30517578125, + "learning_rate": 1.9877388869819626e-06, + "loss": 21.6641, + "step": 8283 + }, + { + "epoch": 0.07841652388750579, + "grad_norm": 596.1990356445312, + "learning_rate": 1.987734100288326e-06, + "loss": 37.9805, + "step": 8284 + }, + { + "epoch": 0.07842598990922085, + "grad_norm": 3.197108507156372, + "learning_rate": 1.9877293126662834e-06, + "loss": 1.0454, + "step": 8285 + }, + { + "epoch": 0.0784354559309359, + "grad_norm": 332.29400634765625, + "learning_rate": 1.987724524115839e-06, + "loss": 39.7031, + "step": 8286 + }, + { + "epoch": 0.07844492195265096, + "grad_norm": 1673.3929443359375, + "learning_rate": 1.9877197346369987e-06, + "loss": 54.25, + "step": 8287 + }, + { + "epoch": 0.07845438797436602, + "grad_norm": 529.2880249023438, + "learning_rate": 1.9877149442297654e-06, + "loss": 33.125, + "step": 8288 + }, + { + "epoch": 0.07846385399608106, + "grad_norm": 1018.2173461914062, + "learning_rate": 1.9877101528941445e-06, + "loss": 61.1562, + "step": 8289 + }, + { + "epoch": 0.07847332001779612, + "grad_norm": 869.5347290039062, + "learning_rate": 1.9877053606301404e-06, + "loss": 35.0703, + "step": 8290 + }, + { + "epoch": 0.07848278603951117, + "grad_norm": 386.2419738769531, + "learning_rate": 1.9877005674377577e-06, + "loss": 23.4844, + "step": 8291 + }, + { + "epoch": 0.07849225206122623, + "grad_norm": 203.1743927001953, + "learning_rate": 1.9876957733170007e-06, + "loss": 17.6797, + "step": 8292 + }, + { + "epoch": 0.07850171808294128, + "grad_norm": 398.6024169921875, + "learning_rate": 1.9876909782678735e-06, + "loss": 41.3594, + "step": 8293 + }, + { + "epoch": 0.07851118410465634, + "grad_norm": 498.1827697753906, + "learning_rate": 1.987686182290381e-06, + "loss": 42.0, + "step": 8294 + }, + { + "epoch": 0.07852065012637138, + "grad_norm": 370.4280090332031, + "learning_rate": 1.987681385384528e-06, + "loss": 20.6172, + "step": 8295 + }, + { + "epoch": 0.07853011614808644, + "grad_norm": 212.22488403320312, + "learning_rate": 1.987676587550319e-06, + "loss": 21.0391, + "step": 8296 + }, + { + "epoch": 0.0785395821698015, + "grad_norm": 427.5256652832031, + "learning_rate": 1.9876717887877578e-06, + "loss": 24.2656, + "step": 8297 + }, + { + "epoch": 0.07854904819151655, + "grad_norm": 652.5330810546875, + "learning_rate": 1.98766698909685e-06, + "loss": 26.1953, + "step": 8298 + }, + { + "epoch": 0.07855851421323161, + "grad_norm": 2.4632163047790527, + "learning_rate": 1.987662188477599e-06, + "loss": 0.8325, + "step": 8299 + }, + { + "epoch": 0.07856798023494665, + "grad_norm": 276.2518310546875, + "learning_rate": 1.98765738693001e-06, + "loss": 26.6875, + "step": 8300 + }, + { + "epoch": 0.07857744625666171, + "grad_norm": 503.56390380859375, + "learning_rate": 1.987652584454087e-06, + "loss": 27.0801, + "step": 8301 + }, + { + "epoch": 0.07858691227837676, + "grad_norm": 757.4452514648438, + "learning_rate": 1.987647781049835e-06, + "loss": 38.9141, + "step": 8302 + }, + { + "epoch": 0.07859637830009182, + "grad_norm": 402.4341125488281, + "learning_rate": 1.987642976717258e-06, + "loss": 20.2578, + "step": 8303 + }, + { + "epoch": 0.07860584432180688, + "grad_norm": 297.1852722167969, + "learning_rate": 1.9876381714563614e-06, + "loss": 19.3203, + "step": 8304 + }, + { + "epoch": 0.07861531034352193, + "grad_norm": 489.5880432128906, + "learning_rate": 1.987633365267149e-06, + "loss": 58.625, + "step": 8305 + }, + { + "epoch": 0.07862477636523699, + "grad_norm": 530.9896240234375, + "learning_rate": 1.9876285581496257e-06, + "loss": 54.3438, + "step": 8306 + }, + { + "epoch": 0.07863424238695203, + "grad_norm": 312.6273498535156, + "learning_rate": 1.987623750103795e-06, + "loss": 33.5234, + "step": 8307 + }, + { + "epoch": 0.07864370840866709, + "grad_norm": 288.46527099609375, + "learning_rate": 1.987618941129663e-06, + "loss": 40.625, + "step": 8308 + }, + { + "epoch": 0.07865317443038214, + "grad_norm": 542.2213745117188, + "learning_rate": 1.9876141312272335e-06, + "loss": 24.0156, + "step": 8309 + }, + { + "epoch": 0.0786626404520972, + "grad_norm": 428.4734802246094, + "learning_rate": 1.9876093203965107e-06, + "loss": 13.9453, + "step": 8310 + }, + { + "epoch": 0.07867210647381224, + "grad_norm": 213.30455017089844, + "learning_rate": 1.987604508637499e-06, + "loss": 19.0781, + "step": 8311 + }, + { + "epoch": 0.0786815724955273, + "grad_norm": 238.19482421875, + "learning_rate": 1.9875996959502038e-06, + "loss": 21.9453, + "step": 8312 + }, + { + "epoch": 0.07869103851724236, + "grad_norm": 281.98016357421875, + "learning_rate": 1.987594882334629e-06, + "loss": 23.6875, + "step": 8313 + }, + { + "epoch": 0.07870050453895741, + "grad_norm": 2208.97021484375, + "learning_rate": 1.9875900677907797e-06, + "loss": 56.6875, + "step": 8314 + }, + { + "epoch": 0.07870997056067247, + "grad_norm": 1503.8074951171875, + "learning_rate": 1.9875852523186594e-06, + "loss": 57.4844, + "step": 8315 + }, + { + "epoch": 0.07871943658238752, + "grad_norm": 3.2314400672912598, + "learning_rate": 1.987580435918273e-06, + "loss": 0.9517, + "step": 8316 + }, + { + "epoch": 0.07872890260410258, + "grad_norm": 629.7354736328125, + "learning_rate": 1.9875756185896257e-06, + "loss": 34.5, + "step": 8317 + }, + { + "epoch": 0.07873836862581762, + "grad_norm": 726.44189453125, + "learning_rate": 1.9875708003327215e-06, + "loss": 50.2188, + "step": 8318 + }, + { + "epoch": 0.07874783464753268, + "grad_norm": 857.875732421875, + "learning_rate": 1.987565981147565e-06, + "loss": 48.5547, + "step": 8319 + }, + { + "epoch": 0.07875730066924773, + "grad_norm": 442.4071350097656, + "learning_rate": 1.987561161034161e-06, + "loss": 25.3125, + "step": 8320 + }, + { + "epoch": 0.07876676669096279, + "grad_norm": 401.05023193359375, + "learning_rate": 1.9875563399925133e-06, + "loss": 22.7734, + "step": 8321 + }, + { + "epoch": 0.07877623271267785, + "grad_norm": 128.1033935546875, + "learning_rate": 1.987551518022627e-06, + "loss": 16.9062, + "step": 8322 + }, + { + "epoch": 0.0787856987343929, + "grad_norm": 695.3598022460938, + "learning_rate": 1.9875466951245068e-06, + "loss": 28.6016, + "step": 8323 + }, + { + "epoch": 0.07879516475610795, + "grad_norm": 334.0130615234375, + "learning_rate": 1.9875418712981564e-06, + "loss": 34.1406, + "step": 8324 + }, + { + "epoch": 0.078804630777823, + "grad_norm": 338.6497802734375, + "learning_rate": 1.9875370465435812e-06, + "loss": 32.4375, + "step": 8325 + }, + { + "epoch": 0.07881409679953806, + "grad_norm": 3.753122568130493, + "learning_rate": 1.9875322208607854e-06, + "loss": 0.9658, + "step": 8326 + }, + { + "epoch": 0.07882356282125311, + "grad_norm": 284.7171630859375, + "learning_rate": 1.9875273942497736e-06, + "loss": 31.375, + "step": 8327 + }, + { + "epoch": 0.07883302884296817, + "grad_norm": 179.79754638671875, + "learning_rate": 1.98752256671055e-06, + "loss": 17.8516, + "step": 8328 + }, + { + "epoch": 0.07884249486468321, + "grad_norm": 1200.295654296875, + "learning_rate": 1.98751773824312e-06, + "loss": 48.5, + "step": 8329 + }, + { + "epoch": 0.07885196088639827, + "grad_norm": 700.0030517578125, + "learning_rate": 1.9875129088474872e-06, + "loss": 50.0469, + "step": 8330 + }, + { + "epoch": 0.07886142690811333, + "grad_norm": 299.6881408691406, + "learning_rate": 1.9875080785236564e-06, + "loss": 21.8828, + "step": 8331 + }, + { + "epoch": 0.07887089292982838, + "grad_norm": 175.5735321044922, + "learning_rate": 1.9875032472716325e-06, + "loss": 11.5312, + "step": 8332 + }, + { + "epoch": 0.07888035895154344, + "grad_norm": 356.0053405761719, + "learning_rate": 1.9874984150914197e-06, + "loss": 23.4297, + "step": 8333 + }, + { + "epoch": 0.07888982497325848, + "grad_norm": 374.185546875, + "learning_rate": 1.9874935819830227e-06, + "loss": 25.6875, + "step": 8334 + }, + { + "epoch": 0.07889929099497354, + "grad_norm": 614.9449462890625, + "learning_rate": 1.987488747946446e-06, + "loss": 62.4219, + "step": 8335 + }, + { + "epoch": 0.07890875701668859, + "grad_norm": 335.2027587890625, + "learning_rate": 1.987483912981694e-06, + "loss": 24.1172, + "step": 8336 + }, + { + "epoch": 0.07891822303840365, + "grad_norm": 472.56689453125, + "learning_rate": 1.9874790770887714e-06, + "loss": 28.8984, + "step": 8337 + }, + { + "epoch": 0.0789276890601187, + "grad_norm": 277.458251953125, + "learning_rate": 1.9874742402676825e-06, + "loss": 31.1719, + "step": 8338 + }, + { + "epoch": 0.07893715508183376, + "grad_norm": 178.8313751220703, + "learning_rate": 1.9874694025184322e-06, + "loss": 22.9531, + "step": 8339 + }, + { + "epoch": 0.07894662110354882, + "grad_norm": 512.3998413085938, + "learning_rate": 1.987464563841025e-06, + "loss": 56.4844, + "step": 8340 + }, + { + "epoch": 0.07895608712526386, + "grad_norm": 308.81036376953125, + "learning_rate": 1.987459724235465e-06, + "loss": 33.9062, + "step": 8341 + }, + { + "epoch": 0.07896555314697892, + "grad_norm": 208.32667541503906, + "learning_rate": 1.9874548837017574e-06, + "loss": 17.4688, + "step": 8342 + }, + { + "epoch": 0.07897501916869397, + "grad_norm": 261.7816162109375, + "learning_rate": 1.9874500422399067e-06, + "loss": 29.125, + "step": 8343 + }, + { + "epoch": 0.07898448519040903, + "grad_norm": 751.1848754882812, + "learning_rate": 1.987445199849917e-06, + "loss": 56.3828, + "step": 8344 + }, + { + "epoch": 0.07899395121212408, + "grad_norm": 527.1466064453125, + "learning_rate": 1.9874403565317928e-06, + "loss": 33.375, + "step": 8345 + }, + { + "epoch": 0.07900341723383913, + "grad_norm": 260.6401062011719, + "learning_rate": 1.987435512285539e-06, + "loss": 20.8438, + "step": 8346 + }, + { + "epoch": 0.0790128832555542, + "grad_norm": 258.20599365234375, + "learning_rate": 1.98743066711116e-06, + "loss": 19.5547, + "step": 8347 + }, + { + "epoch": 0.07902234927726924, + "grad_norm": 293.3506774902344, + "learning_rate": 1.9874258210086607e-06, + "loss": 17.1953, + "step": 8348 + }, + { + "epoch": 0.0790318152989843, + "grad_norm": 683.8775634765625, + "learning_rate": 1.9874209739780456e-06, + "loss": 47.7656, + "step": 8349 + }, + { + "epoch": 0.07904128132069935, + "grad_norm": 269.09063720703125, + "learning_rate": 1.9874161260193184e-06, + "loss": 20.8203, + "step": 8350 + }, + { + "epoch": 0.0790507473424144, + "grad_norm": 219.78627014160156, + "learning_rate": 1.9874112771324844e-06, + "loss": 19.9297, + "step": 8351 + }, + { + "epoch": 0.07906021336412945, + "grad_norm": 228.31610107421875, + "learning_rate": 1.9874064273175484e-06, + "loss": 17.375, + "step": 8352 + }, + { + "epoch": 0.07906967938584451, + "grad_norm": 353.06597900390625, + "learning_rate": 1.987401576574514e-06, + "loss": 19.5703, + "step": 8353 + }, + { + "epoch": 0.07907914540755956, + "grad_norm": 175.94122314453125, + "learning_rate": 1.987396724903387e-06, + "loss": 25.2305, + "step": 8354 + }, + { + "epoch": 0.07908861142927462, + "grad_norm": 416.22613525390625, + "learning_rate": 1.9873918723041708e-06, + "loss": 59.4297, + "step": 8355 + }, + { + "epoch": 0.07909807745098968, + "grad_norm": 693.0244750976562, + "learning_rate": 1.9873870187768706e-06, + "loss": 46.0625, + "step": 8356 + }, + { + "epoch": 0.07910754347270472, + "grad_norm": 408.0664367675781, + "learning_rate": 1.987382164321491e-06, + "loss": 35.1562, + "step": 8357 + }, + { + "epoch": 0.07911700949441978, + "grad_norm": 716.0042724609375, + "learning_rate": 1.9873773089380366e-06, + "loss": 49.1719, + "step": 8358 + }, + { + "epoch": 0.07912647551613483, + "grad_norm": 189.87136840820312, + "learning_rate": 1.987372452626511e-06, + "loss": 16.9453, + "step": 8359 + }, + { + "epoch": 0.07913594153784989, + "grad_norm": 299.8789978027344, + "learning_rate": 1.98736759538692e-06, + "loss": 22.1641, + "step": 8360 + }, + { + "epoch": 0.07914540755956494, + "grad_norm": 221.10508728027344, + "learning_rate": 1.9873627372192677e-06, + "loss": 27.8438, + "step": 8361 + }, + { + "epoch": 0.07915487358128, + "grad_norm": 2.7189688682556152, + "learning_rate": 1.987357878123559e-06, + "loss": 0.7998, + "step": 8362 + }, + { + "epoch": 0.07916433960299504, + "grad_norm": 274.54913330078125, + "learning_rate": 1.9873530180997977e-06, + "loss": 13.7734, + "step": 8363 + }, + { + "epoch": 0.0791738056247101, + "grad_norm": 451.6448059082031, + "learning_rate": 1.9873481571479886e-06, + "loss": 50.2031, + "step": 8364 + }, + { + "epoch": 0.07918327164642516, + "grad_norm": 2.893922805786133, + "learning_rate": 1.9873432952681372e-06, + "loss": 0.8262, + "step": 8365 + }, + { + "epoch": 0.07919273766814021, + "grad_norm": 340.35577392578125, + "learning_rate": 1.9873384324602465e-06, + "loss": 35.875, + "step": 8366 + }, + { + "epoch": 0.07920220368985527, + "grad_norm": 172.25277709960938, + "learning_rate": 1.9873335687243222e-06, + "loss": 18.9297, + "step": 8367 + }, + { + "epoch": 0.07921166971157032, + "grad_norm": 3.4133033752441406, + "learning_rate": 1.987328704060369e-06, + "loss": 0.9478, + "step": 8368 + }, + { + "epoch": 0.07922113573328537, + "grad_norm": 627.8491821289062, + "learning_rate": 1.9873238384683905e-06, + "loss": 26.7734, + "step": 8369 + }, + { + "epoch": 0.07923060175500042, + "grad_norm": 220.4805908203125, + "learning_rate": 1.987318971948392e-06, + "loss": 17.5703, + "step": 8370 + }, + { + "epoch": 0.07924006777671548, + "grad_norm": 236.9629669189453, + "learning_rate": 1.987314104500378e-06, + "loss": 26.3906, + "step": 8371 + }, + { + "epoch": 0.07924953379843053, + "grad_norm": 953.8373413085938, + "learning_rate": 1.9873092361243525e-06, + "loss": 23.2969, + "step": 8372 + }, + { + "epoch": 0.07925899982014559, + "grad_norm": 583.4243774414062, + "learning_rate": 1.9873043668203213e-06, + "loss": 49.5391, + "step": 8373 + }, + { + "epoch": 0.07926846584186065, + "grad_norm": 299.8168640136719, + "learning_rate": 1.987299496588288e-06, + "loss": 43.4844, + "step": 8374 + }, + { + "epoch": 0.07927793186357569, + "grad_norm": 444.7717590332031, + "learning_rate": 1.9872946254282568e-06, + "loss": 49.5039, + "step": 8375 + }, + { + "epoch": 0.07928739788529075, + "grad_norm": 508.0465087890625, + "learning_rate": 1.9872897533402337e-06, + "loss": 30.5703, + "step": 8376 + }, + { + "epoch": 0.0792968639070058, + "grad_norm": 230.66229248046875, + "learning_rate": 1.987284880324222e-06, + "loss": 30.8906, + "step": 8377 + }, + { + "epoch": 0.07930632992872086, + "grad_norm": 563.3212280273438, + "learning_rate": 1.987280006380227e-06, + "loss": 52.5469, + "step": 8378 + }, + { + "epoch": 0.0793157959504359, + "grad_norm": 411.8109130859375, + "learning_rate": 1.9872751315082525e-06, + "loss": 24.3281, + "step": 8379 + }, + { + "epoch": 0.07932526197215096, + "grad_norm": 647.2501220703125, + "learning_rate": 1.987270255708304e-06, + "loss": 49.5625, + "step": 8380 + }, + { + "epoch": 0.07933472799386601, + "grad_norm": 534.6000366210938, + "learning_rate": 1.987265378980386e-06, + "loss": 45.5469, + "step": 8381 + }, + { + "epoch": 0.07934419401558107, + "grad_norm": 234.41265869140625, + "learning_rate": 1.9872605013245023e-06, + "loss": 33.5781, + "step": 8382 + }, + { + "epoch": 0.07935366003729613, + "grad_norm": 470.29754638671875, + "learning_rate": 1.9872556227406576e-06, + "loss": 57.3438, + "step": 8383 + }, + { + "epoch": 0.07936312605901118, + "grad_norm": 207.7190399169922, + "learning_rate": 1.9872507432288575e-06, + "loss": 22.3203, + "step": 8384 + }, + { + "epoch": 0.07937259208072624, + "grad_norm": 271.7596130371094, + "learning_rate": 1.987245862789106e-06, + "loss": 25.2344, + "step": 8385 + }, + { + "epoch": 0.07938205810244128, + "grad_norm": 2.5706088542938232, + "learning_rate": 1.9872409814214075e-06, + "loss": 0.8109, + "step": 8386 + }, + { + "epoch": 0.07939152412415634, + "grad_norm": 191.98509216308594, + "learning_rate": 1.9872360991257666e-06, + "loss": 24.1797, + "step": 8387 + }, + { + "epoch": 0.07940099014587139, + "grad_norm": 503.81475830078125, + "learning_rate": 1.9872312159021876e-06, + "loss": 26.5859, + "step": 8388 + }, + { + "epoch": 0.07941045616758645, + "grad_norm": 494.9764709472656, + "learning_rate": 1.987226331750676e-06, + "loss": 68.3594, + "step": 8389 + }, + { + "epoch": 0.07941992218930151, + "grad_norm": 373.74884033203125, + "learning_rate": 1.987221446671236e-06, + "loss": 12.3125, + "step": 8390 + }, + { + "epoch": 0.07942938821101655, + "grad_norm": 287.5040588378906, + "learning_rate": 1.9872165606638715e-06, + "loss": 28.1172, + "step": 8391 + }, + { + "epoch": 0.07943885423273161, + "grad_norm": 578.14990234375, + "learning_rate": 1.9872116737285878e-06, + "loss": 28.3047, + "step": 8392 + }, + { + "epoch": 0.07944832025444666, + "grad_norm": 175.36109924316406, + "learning_rate": 1.987206785865389e-06, + "loss": 22.6875, + "step": 8393 + }, + { + "epoch": 0.07945778627616172, + "grad_norm": 512.2462768554688, + "learning_rate": 1.9872018970742808e-06, + "loss": 54.5, + "step": 8394 + }, + { + "epoch": 0.07946725229787677, + "grad_norm": 476.07867431640625, + "learning_rate": 1.987197007355267e-06, + "loss": 42.0156, + "step": 8395 + }, + { + "epoch": 0.07947671831959183, + "grad_norm": 645.4096069335938, + "learning_rate": 1.987192116708352e-06, + "loss": 50.2539, + "step": 8396 + }, + { + "epoch": 0.07948618434130687, + "grad_norm": 944.76123046875, + "learning_rate": 1.9871872251335406e-06, + "loss": 43.5078, + "step": 8397 + }, + { + "epoch": 0.07949565036302193, + "grad_norm": 3.3956398963928223, + "learning_rate": 1.987182332630837e-06, + "loss": 0.9771, + "step": 8398 + }, + { + "epoch": 0.07950511638473699, + "grad_norm": 348.6272888183594, + "learning_rate": 1.987177439200247e-06, + "loss": 35.8594, + "step": 8399 + }, + { + "epoch": 0.07951458240645204, + "grad_norm": 918.3283081054688, + "learning_rate": 1.987172544841774e-06, + "loss": 26.5156, + "step": 8400 + }, + { + "epoch": 0.0795240484281671, + "grad_norm": 509.1302490234375, + "learning_rate": 1.987167649555423e-06, + "loss": 44.6094, + "step": 8401 + }, + { + "epoch": 0.07953351444988215, + "grad_norm": 376.84381103515625, + "learning_rate": 1.987162753341199e-06, + "loss": 17.0273, + "step": 8402 + }, + { + "epoch": 0.0795429804715972, + "grad_norm": 274.1568603515625, + "learning_rate": 1.987157856199106e-06, + "loss": 22.5234, + "step": 8403 + }, + { + "epoch": 0.07955244649331225, + "grad_norm": 456.652587890625, + "learning_rate": 1.987152958129149e-06, + "loss": 31.2969, + "step": 8404 + }, + { + "epoch": 0.07956191251502731, + "grad_norm": 201.11021423339844, + "learning_rate": 1.987148059131332e-06, + "loss": 18.875, + "step": 8405 + }, + { + "epoch": 0.07957137853674236, + "grad_norm": 240.8824920654297, + "learning_rate": 1.9871431592056604e-06, + "loss": 22.5547, + "step": 8406 + }, + { + "epoch": 0.07958084455845742, + "grad_norm": 1208.202392578125, + "learning_rate": 1.987138258352138e-06, + "loss": 48.9531, + "step": 8407 + }, + { + "epoch": 0.07959031058017248, + "grad_norm": 666.98974609375, + "learning_rate": 1.9871333565707705e-06, + "loss": 23.9531, + "step": 8408 + }, + { + "epoch": 0.07959977660188752, + "grad_norm": 240.38890075683594, + "learning_rate": 1.9871284538615614e-06, + "loss": 17.7578, + "step": 8409 + }, + { + "epoch": 0.07960924262360258, + "grad_norm": 318.36688232421875, + "learning_rate": 1.987123550224516e-06, + "loss": 25.7969, + "step": 8410 + }, + { + "epoch": 0.07961870864531763, + "grad_norm": 239.87820434570312, + "learning_rate": 1.9871186456596385e-06, + "loss": 32.2812, + "step": 8411 + }, + { + "epoch": 0.07962817466703269, + "grad_norm": 540.6273803710938, + "learning_rate": 1.987113740166934e-06, + "loss": 55.0938, + "step": 8412 + }, + { + "epoch": 0.07963764068874774, + "grad_norm": 379.4848937988281, + "learning_rate": 1.9871088337464065e-06, + "loss": 22.6445, + "step": 8413 + }, + { + "epoch": 0.0796471067104628, + "grad_norm": 213.6057586669922, + "learning_rate": 1.987103926398061e-06, + "loss": 24.4844, + "step": 8414 + }, + { + "epoch": 0.07965657273217784, + "grad_norm": 573.684326171875, + "learning_rate": 1.987099018121902e-06, + "loss": 17.9648, + "step": 8415 + }, + { + "epoch": 0.0796660387538929, + "grad_norm": 1031.9822998046875, + "learning_rate": 1.987094108917934e-06, + "loss": 60.6562, + "step": 8416 + }, + { + "epoch": 0.07967550477560796, + "grad_norm": 642.7047119140625, + "learning_rate": 1.987089198786162e-06, + "loss": 48.2344, + "step": 8417 + }, + { + "epoch": 0.07968497079732301, + "grad_norm": 430.5849304199219, + "learning_rate": 1.9870842877265906e-06, + "loss": 16.8242, + "step": 8418 + }, + { + "epoch": 0.07969443681903807, + "grad_norm": 211.2175750732422, + "learning_rate": 1.9870793757392237e-06, + "loss": 22.9844, + "step": 8419 + }, + { + "epoch": 0.07970390284075311, + "grad_norm": 326.170654296875, + "learning_rate": 1.987074462824067e-06, + "loss": 23.8516, + "step": 8420 + }, + { + "epoch": 0.07971336886246817, + "grad_norm": 340.36199951171875, + "learning_rate": 1.987069548981124e-06, + "loss": 23.9922, + "step": 8421 + }, + { + "epoch": 0.07972283488418322, + "grad_norm": 309.5887145996094, + "learning_rate": 1.9870646342104e-06, + "loss": 18.2422, + "step": 8422 + }, + { + "epoch": 0.07973230090589828, + "grad_norm": 304.1363830566406, + "learning_rate": 1.987059718511899e-06, + "loss": 23.4492, + "step": 8423 + }, + { + "epoch": 0.07974176692761333, + "grad_norm": 439.6177978515625, + "learning_rate": 1.9870548018856266e-06, + "loss": 58.7031, + "step": 8424 + }, + { + "epoch": 0.07975123294932839, + "grad_norm": 1390.617919921875, + "learning_rate": 1.987049884331587e-06, + "loss": 53.5312, + "step": 8425 + }, + { + "epoch": 0.07976069897104344, + "grad_norm": 514.1192016601562, + "learning_rate": 1.9870449658497843e-06, + "loss": 35.6406, + "step": 8426 + }, + { + "epoch": 0.07977016499275849, + "grad_norm": 418.88751220703125, + "learning_rate": 1.9870400464402233e-06, + "loss": 34.0078, + "step": 8427 + }, + { + "epoch": 0.07977963101447355, + "grad_norm": 647.6624755859375, + "learning_rate": 1.9870351261029098e-06, + "loss": 33.0078, + "step": 8428 + }, + { + "epoch": 0.0797890970361886, + "grad_norm": 3.0237152576446533, + "learning_rate": 1.987030204837847e-06, + "loss": 0.8574, + "step": 8429 + }, + { + "epoch": 0.07979856305790366, + "grad_norm": 421.3977966308594, + "learning_rate": 1.98702528264504e-06, + "loss": 48.7969, + "step": 8430 + }, + { + "epoch": 0.0798080290796187, + "grad_norm": 234.77847290039062, + "learning_rate": 1.9870203595244934e-06, + "loss": 12.7383, + "step": 8431 + }, + { + "epoch": 0.07981749510133376, + "grad_norm": 233.3870391845703, + "learning_rate": 1.987015435476212e-06, + "loss": 20.4609, + "step": 8432 + }, + { + "epoch": 0.07982696112304882, + "grad_norm": 1149.430419921875, + "learning_rate": 1.9870105105002e-06, + "loss": 37.3164, + "step": 8433 + }, + { + "epoch": 0.07983642714476387, + "grad_norm": 188.66136169433594, + "learning_rate": 1.9870055845964625e-06, + "loss": 25.2656, + "step": 8434 + }, + { + "epoch": 0.07984589316647893, + "grad_norm": 444.0614318847656, + "learning_rate": 1.987000657765004e-06, + "loss": 22.1211, + "step": 8435 + }, + { + "epoch": 0.07985535918819398, + "grad_norm": 264.362548828125, + "learning_rate": 1.9869957300058295e-06, + "loss": 21.4766, + "step": 8436 + }, + { + "epoch": 0.07986482520990903, + "grad_norm": 390.21307373046875, + "learning_rate": 1.986990801318943e-06, + "loss": 42.6875, + "step": 8437 + }, + { + "epoch": 0.07987429123162408, + "grad_norm": 292.9210205078125, + "learning_rate": 1.986985871704349e-06, + "loss": 25.3125, + "step": 8438 + }, + { + "epoch": 0.07988375725333914, + "grad_norm": 280.2536315917969, + "learning_rate": 1.9869809411620522e-06, + "loss": 31.5, + "step": 8439 + }, + { + "epoch": 0.07989322327505419, + "grad_norm": 573.0096435546875, + "learning_rate": 1.9869760096920584e-06, + "loss": 61.1875, + "step": 8440 + }, + { + "epoch": 0.07990268929676925, + "grad_norm": 222.3168182373047, + "learning_rate": 1.986971077294371e-06, + "loss": 18.1562, + "step": 8441 + }, + { + "epoch": 0.07991215531848431, + "grad_norm": 557.6691284179688, + "learning_rate": 1.9869661439689946e-06, + "loss": 51.8281, + "step": 8442 + }, + { + "epoch": 0.07992162134019935, + "grad_norm": 335.64801025390625, + "learning_rate": 1.9869612097159347e-06, + "loss": 24.8828, + "step": 8443 + }, + { + "epoch": 0.07993108736191441, + "grad_norm": 212.6304931640625, + "learning_rate": 1.9869562745351954e-06, + "loss": 15.7695, + "step": 8444 + }, + { + "epoch": 0.07994055338362946, + "grad_norm": 208.56507873535156, + "learning_rate": 1.9869513384267814e-06, + "loss": 25.1953, + "step": 8445 + }, + { + "epoch": 0.07995001940534452, + "grad_norm": 489.3790588378906, + "learning_rate": 1.986946401390697e-06, + "loss": 29.7578, + "step": 8446 + }, + { + "epoch": 0.07995948542705957, + "grad_norm": 818.2174682617188, + "learning_rate": 1.9869414634269475e-06, + "loss": 9.4492, + "step": 8447 + }, + { + "epoch": 0.07996895144877463, + "grad_norm": 315.31536865234375, + "learning_rate": 1.9869365245355373e-06, + "loss": 22.9141, + "step": 8448 + }, + { + "epoch": 0.07997841747048967, + "grad_norm": 3.259554862976074, + "learning_rate": 1.986931584716471e-06, + "loss": 0.9932, + "step": 8449 + }, + { + "epoch": 0.07998788349220473, + "grad_norm": 711.010498046875, + "learning_rate": 1.986926643969753e-06, + "loss": 43.7188, + "step": 8450 + }, + { + "epoch": 0.07999734951391979, + "grad_norm": 2.8715198040008545, + "learning_rate": 1.9869217022953885e-06, + "loss": 0.8027, + "step": 8451 + }, + { + "epoch": 0.08000681553563484, + "grad_norm": 232.66920471191406, + "learning_rate": 1.9869167596933815e-06, + "loss": 22.1406, + "step": 8452 + }, + { + "epoch": 0.0800162815573499, + "grad_norm": 455.4469909667969, + "learning_rate": 1.986911816163737e-06, + "loss": 35.625, + "step": 8453 + }, + { + "epoch": 0.08002574757906494, + "grad_norm": 336.6967468261719, + "learning_rate": 1.98690687170646e-06, + "loss": 23.4297, + "step": 8454 + }, + { + "epoch": 0.08003521360078, + "grad_norm": 278.6434326171875, + "learning_rate": 1.986901926321554e-06, + "loss": 20.2188, + "step": 8455 + }, + { + "epoch": 0.08004467962249505, + "grad_norm": 247.19412231445312, + "learning_rate": 1.9868969800090248e-06, + "loss": 20.1328, + "step": 8456 + }, + { + "epoch": 0.08005414564421011, + "grad_norm": 434.378173828125, + "learning_rate": 1.9868920327688767e-06, + "loss": 38.3594, + "step": 8457 + }, + { + "epoch": 0.08006361166592516, + "grad_norm": 185.87579345703125, + "learning_rate": 1.986887084601114e-06, + "loss": 21.3594, + "step": 8458 + }, + { + "epoch": 0.08007307768764022, + "grad_norm": 415.3404846191406, + "learning_rate": 1.986882135505742e-06, + "loss": 36.8906, + "step": 8459 + }, + { + "epoch": 0.08008254370935527, + "grad_norm": 529.0100708007812, + "learning_rate": 1.9868771854827644e-06, + "loss": 43.5938, + "step": 8460 + }, + { + "epoch": 0.08009200973107032, + "grad_norm": 737.5526123046875, + "learning_rate": 1.986872234532187e-06, + "loss": 34.0781, + "step": 8461 + }, + { + "epoch": 0.08010147575278538, + "grad_norm": 562.6326293945312, + "learning_rate": 1.9868672826540137e-06, + "loss": 54.2031, + "step": 8462 + }, + { + "epoch": 0.08011094177450043, + "grad_norm": 293.9028625488281, + "learning_rate": 1.9868623298482493e-06, + "loss": 27.3672, + "step": 8463 + }, + { + "epoch": 0.08012040779621549, + "grad_norm": 489.36895751953125, + "learning_rate": 1.9868573761148987e-06, + "loss": 50.0938, + "step": 8464 + }, + { + "epoch": 0.08012987381793053, + "grad_norm": 417.83502197265625, + "learning_rate": 1.9868524214539665e-06, + "loss": 31.0469, + "step": 8465 + }, + { + "epoch": 0.08013933983964559, + "grad_norm": 339.16229248046875, + "learning_rate": 1.9868474658654567e-06, + "loss": 21.3477, + "step": 8466 + }, + { + "epoch": 0.08014880586136064, + "grad_norm": 360.8052978515625, + "learning_rate": 1.986842509349375e-06, + "loss": 13.4414, + "step": 8467 + }, + { + "epoch": 0.0801582718830757, + "grad_norm": 430.62493896484375, + "learning_rate": 1.9868375519057253e-06, + "loss": 29.2031, + "step": 8468 + }, + { + "epoch": 0.08016773790479076, + "grad_norm": 455.3436279296875, + "learning_rate": 1.9868325935345127e-06, + "loss": 46.4531, + "step": 8469 + }, + { + "epoch": 0.0801772039265058, + "grad_norm": 390.69305419921875, + "learning_rate": 1.9868276342357414e-06, + "loss": 33.7969, + "step": 8470 + }, + { + "epoch": 0.08018666994822087, + "grad_norm": 199.37109375, + "learning_rate": 1.986822674009416e-06, + "loss": 19.9297, + "step": 8471 + }, + { + "epoch": 0.08019613596993591, + "grad_norm": 311.1738586425781, + "learning_rate": 1.9868177128555423e-06, + "loss": 11.4922, + "step": 8472 + }, + { + "epoch": 0.08020560199165097, + "grad_norm": 263.65753173828125, + "learning_rate": 1.986812750774124e-06, + "loss": 32.0547, + "step": 8473 + }, + { + "epoch": 0.08021506801336602, + "grad_norm": 798.34716796875, + "learning_rate": 1.9868077877651653e-06, + "loss": 29.6641, + "step": 8474 + }, + { + "epoch": 0.08022453403508108, + "grad_norm": 298.0019836425781, + "learning_rate": 1.9868028238286718e-06, + "loss": 30.5938, + "step": 8475 + }, + { + "epoch": 0.08023400005679614, + "grad_norm": 213.15919494628906, + "learning_rate": 1.986797858964648e-06, + "loss": 22.4531, + "step": 8476 + }, + { + "epoch": 0.08024346607851118, + "grad_norm": 320.39013671875, + "learning_rate": 1.986792893173098e-06, + "loss": 27.625, + "step": 8477 + }, + { + "epoch": 0.08025293210022624, + "grad_norm": 307.4263610839844, + "learning_rate": 1.9867879264540272e-06, + "loss": 19.1016, + "step": 8478 + }, + { + "epoch": 0.08026239812194129, + "grad_norm": 289.7598571777344, + "learning_rate": 1.98678295880744e-06, + "loss": 20.9531, + "step": 8479 + }, + { + "epoch": 0.08027186414365635, + "grad_norm": 501.16448974609375, + "learning_rate": 1.986777990233341e-06, + "loss": 29.6875, + "step": 8480 + }, + { + "epoch": 0.0802813301653714, + "grad_norm": 232.2662353515625, + "learning_rate": 1.986773020731735e-06, + "loss": 19.3594, + "step": 8481 + }, + { + "epoch": 0.08029079618708646, + "grad_norm": 388.5505065917969, + "learning_rate": 1.986768050302626e-06, + "loss": 33.8438, + "step": 8482 + }, + { + "epoch": 0.0803002622088015, + "grad_norm": 227.3174285888672, + "learning_rate": 1.9867630789460196e-06, + "loss": 27.25, + "step": 8483 + }, + { + "epoch": 0.08030972823051656, + "grad_norm": 268.02130126953125, + "learning_rate": 1.9867581066619203e-06, + "loss": 20.3906, + "step": 8484 + }, + { + "epoch": 0.08031919425223162, + "grad_norm": 516.3768310546875, + "learning_rate": 1.986753133450332e-06, + "loss": 43.5156, + "step": 8485 + }, + { + "epoch": 0.08032866027394667, + "grad_norm": 632.3856811523438, + "learning_rate": 1.9867481593112603e-06, + "loss": 48.2578, + "step": 8486 + }, + { + "epoch": 0.08033812629566173, + "grad_norm": 397.4874572753906, + "learning_rate": 1.98674318424471e-06, + "loss": 26.7656, + "step": 8487 + }, + { + "epoch": 0.08034759231737677, + "grad_norm": 360.2332763671875, + "learning_rate": 1.9867382082506846e-06, + "loss": 21.8516, + "step": 8488 + }, + { + "epoch": 0.08035705833909183, + "grad_norm": 456.57318115234375, + "learning_rate": 1.9867332313291895e-06, + "loss": 54.0469, + "step": 8489 + }, + { + "epoch": 0.08036652436080688, + "grad_norm": 397.6147155761719, + "learning_rate": 1.98672825348023e-06, + "loss": 23.9023, + "step": 8490 + }, + { + "epoch": 0.08037599038252194, + "grad_norm": 414.3427734375, + "learning_rate": 1.9867232747038093e-06, + "loss": 31.6172, + "step": 8491 + }, + { + "epoch": 0.08038545640423699, + "grad_norm": 267.4054870605469, + "learning_rate": 1.9867182949999335e-06, + "loss": 27.9062, + "step": 8492 + }, + { + "epoch": 0.08039492242595205, + "grad_norm": 204.1593475341797, + "learning_rate": 1.986713314368606e-06, + "loss": 13.5547, + "step": 8493 + }, + { + "epoch": 0.0804043884476671, + "grad_norm": 215.73699951171875, + "learning_rate": 1.986708332809833e-06, + "loss": 20.0156, + "step": 8494 + }, + { + "epoch": 0.08041385446938215, + "grad_norm": 262.4432678222656, + "learning_rate": 1.986703350323618e-06, + "loss": 20.4844, + "step": 8495 + }, + { + "epoch": 0.08042332049109721, + "grad_norm": 497.19744873046875, + "learning_rate": 1.986698366909966e-06, + "loss": 45.6484, + "step": 8496 + }, + { + "epoch": 0.08043278651281226, + "grad_norm": 437.8172607421875, + "learning_rate": 1.9866933825688816e-06, + "loss": 29.3828, + "step": 8497 + }, + { + "epoch": 0.08044225253452732, + "grad_norm": 3.3869142532348633, + "learning_rate": 1.98668839730037e-06, + "loss": 0.9375, + "step": 8498 + }, + { + "epoch": 0.08045171855624236, + "grad_norm": 710.2008056640625, + "learning_rate": 1.9866834111044354e-06, + "loss": 27.1641, + "step": 8499 + }, + { + "epoch": 0.08046118457795742, + "grad_norm": 198.98416137695312, + "learning_rate": 1.9866784239810824e-06, + "loss": 19.8828, + "step": 8500 + }, + { + "epoch": 0.08047065059967247, + "grad_norm": 398.0941162109375, + "learning_rate": 1.986673435930316e-06, + "loss": 23.3281, + "step": 8501 + }, + { + "epoch": 0.08048011662138753, + "grad_norm": 803.37451171875, + "learning_rate": 1.9866684469521405e-06, + "loss": 64.5469, + "step": 8502 + }, + { + "epoch": 0.08048958264310259, + "grad_norm": 210.36546325683594, + "learning_rate": 1.986663457046561e-06, + "loss": 10.7734, + "step": 8503 + }, + { + "epoch": 0.08049904866481764, + "grad_norm": 344.4302673339844, + "learning_rate": 1.986658466213582e-06, + "loss": 22.6094, + "step": 8504 + }, + { + "epoch": 0.0805085146865327, + "grad_norm": 255.50486755371094, + "learning_rate": 1.9866534744532083e-06, + "loss": 23.9766, + "step": 8505 + }, + { + "epoch": 0.08051798070824774, + "grad_norm": 309.3825378417969, + "learning_rate": 1.9866484817654444e-06, + "loss": 27.4062, + "step": 8506 + }, + { + "epoch": 0.0805274467299628, + "grad_norm": 474.3784484863281, + "learning_rate": 1.9866434881502958e-06, + "loss": 45.4844, + "step": 8507 + }, + { + "epoch": 0.08053691275167785, + "grad_norm": 582.953125, + "learning_rate": 1.9866384936077656e-06, + "loss": 40.0234, + "step": 8508 + }, + { + "epoch": 0.08054637877339291, + "grad_norm": 562.1674194335938, + "learning_rate": 1.9866334981378596e-06, + "loss": 36.5234, + "step": 8509 + }, + { + "epoch": 0.08055584479510795, + "grad_norm": 635.6570434570312, + "learning_rate": 1.9866285017405826e-06, + "loss": 53.3281, + "step": 8510 + }, + { + "epoch": 0.08056531081682301, + "grad_norm": 347.3316345214844, + "learning_rate": 1.986623504415939e-06, + "loss": 25.1797, + "step": 8511 + }, + { + "epoch": 0.08057477683853807, + "grad_norm": 3.4086503982543945, + "learning_rate": 1.9866185061639332e-06, + "loss": 1.0352, + "step": 8512 + }, + { + "epoch": 0.08058424286025312, + "grad_norm": 552.4366455078125, + "learning_rate": 1.9866135069845706e-06, + "loss": 37.7344, + "step": 8513 + }, + { + "epoch": 0.08059370888196818, + "grad_norm": 924.2072143554688, + "learning_rate": 1.9866085068778547e-06, + "loss": 24.5625, + "step": 8514 + }, + { + "epoch": 0.08060317490368323, + "grad_norm": 212.4166717529297, + "learning_rate": 1.986603505843792e-06, + "loss": 13.5078, + "step": 8515 + }, + { + "epoch": 0.08061264092539829, + "grad_norm": 331.066650390625, + "learning_rate": 1.9865985038823855e-06, + "loss": 38.0156, + "step": 8516 + }, + { + "epoch": 0.08062210694711333, + "grad_norm": 659.16943359375, + "learning_rate": 1.9865935009936407e-06, + "loss": 82.6641, + "step": 8517 + }, + { + "epoch": 0.08063157296882839, + "grad_norm": 472.3448181152344, + "learning_rate": 1.986588497177562e-06, + "loss": 50.9844, + "step": 8518 + }, + { + "epoch": 0.08064103899054345, + "grad_norm": 528.5516967773438, + "learning_rate": 1.986583492434155e-06, + "loss": 42.5312, + "step": 8519 + }, + { + "epoch": 0.0806505050122585, + "grad_norm": 496.7639465332031, + "learning_rate": 1.986578486763423e-06, + "loss": 52.6875, + "step": 8520 + }, + { + "epoch": 0.08065997103397356, + "grad_norm": 300.6073913574219, + "learning_rate": 1.9865734801653714e-06, + "loss": 22.6602, + "step": 8521 + }, + { + "epoch": 0.0806694370556886, + "grad_norm": 583.0403442382812, + "learning_rate": 1.9865684726400055e-06, + "loss": 25.082, + "step": 8522 + }, + { + "epoch": 0.08067890307740366, + "grad_norm": 300.95819091796875, + "learning_rate": 1.9865634641873287e-06, + "loss": 26.0547, + "step": 8523 + }, + { + "epoch": 0.08068836909911871, + "grad_norm": 574.1019897460938, + "learning_rate": 1.986558454807347e-06, + "loss": 35.4062, + "step": 8524 + }, + { + "epoch": 0.08069783512083377, + "grad_norm": 249.85816955566406, + "learning_rate": 1.9865534445000644e-06, + "loss": 19.1523, + "step": 8525 + }, + { + "epoch": 0.08070730114254882, + "grad_norm": 200.61672973632812, + "learning_rate": 1.9865484332654857e-06, + "loss": 18.1875, + "step": 8526 + }, + { + "epoch": 0.08071676716426388, + "grad_norm": 245.45443725585938, + "learning_rate": 1.9865434211036157e-06, + "loss": 22.6641, + "step": 8527 + }, + { + "epoch": 0.08072623318597894, + "grad_norm": 277.1518859863281, + "learning_rate": 1.986538408014459e-06, + "loss": 19.3828, + "step": 8528 + }, + { + "epoch": 0.08073569920769398, + "grad_norm": 334.88250732421875, + "learning_rate": 1.98653339399802e-06, + "loss": 23.2188, + "step": 8529 + }, + { + "epoch": 0.08074516522940904, + "grad_norm": 929.8727416992188, + "learning_rate": 1.9865283790543042e-06, + "loss": 69.25, + "step": 8530 + }, + { + "epoch": 0.08075463125112409, + "grad_norm": 353.0323181152344, + "learning_rate": 1.986523363183316e-06, + "loss": 19.9688, + "step": 8531 + }, + { + "epoch": 0.08076409727283915, + "grad_norm": 282.87738037109375, + "learning_rate": 1.9865183463850597e-06, + "loss": 22.1953, + "step": 8532 + }, + { + "epoch": 0.0807735632945542, + "grad_norm": 440.9579772949219, + "learning_rate": 1.9865133286595405e-06, + "loss": 44.4688, + "step": 8533 + }, + { + "epoch": 0.08078302931626925, + "grad_norm": 700.0255126953125, + "learning_rate": 1.986508310006763e-06, + "loss": 50.1875, + "step": 8534 + }, + { + "epoch": 0.0807924953379843, + "grad_norm": 268.7870788574219, + "learning_rate": 1.9865032904267316e-06, + "loss": 11.332, + "step": 8535 + }, + { + "epoch": 0.08080196135969936, + "grad_norm": 435.78216552734375, + "learning_rate": 1.9864982699194516e-06, + "loss": 28.25, + "step": 8536 + }, + { + "epoch": 0.08081142738141442, + "grad_norm": 353.9810485839844, + "learning_rate": 1.9864932484849277e-06, + "loss": 42.6875, + "step": 8537 + }, + { + "epoch": 0.08082089340312947, + "grad_norm": 197.31436157226562, + "learning_rate": 1.9864882261231635e-06, + "loss": 20.1484, + "step": 8538 + }, + { + "epoch": 0.08083035942484453, + "grad_norm": 771.5092163085938, + "learning_rate": 1.986483202834165e-06, + "loss": 35.2031, + "step": 8539 + }, + { + "epoch": 0.08083982544655957, + "grad_norm": 543.054443359375, + "learning_rate": 1.9864781786179366e-06, + "loss": 30.3594, + "step": 8540 + }, + { + "epoch": 0.08084929146827463, + "grad_norm": 402.3676452636719, + "learning_rate": 1.9864731534744827e-06, + "loss": 17.6523, + "step": 8541 + }, + { + "epoch": 0.08085875748998968, + "grad_norm": 301.1455078125, + "learning_rate": 1.986468127403808e-06, + "loss": 20.5195, + "step": 8542 + }, + { + "epoch": 0.08086822351170474, + "grad_norm": 955.1190795898438, + "learning_rate": 1.9864631004059176e-06, + "loss": 47.9531, + "step": 8543 + }, + { + "epoch": 0.08087768953341978, + "grad_norm": 421.8614501953125, + "learning_rate": 1.9864580724808165e-06, + "loss": 16.9766, + "step": 8544 + }, + { + "epoch": 0.08088715555513484, + "grad_norm": 707.968017578125, + "learning_rate": 1.9864530436285086e-06, + "loss": 37.4531, + "step": 8545 + }, + { + "epoch": 0.0808966215768499, + "grad_norm": 2.808866262435913, + "learning_rate": 1.986448013848999e-06, + "loss": 0.8706, + "step": 8546 + }, + { + "epoch": 0.08090608759856495, + "grad_norm": 187.32730102539062, + "learning_rate": 1.986442983142292e-06, + "loss": 22.2188, + "step": 8547 + }, + { + "epoch": 0.08091555362028001, + "grad_norm": 541.3775024414062, + "learning_rate": 1.9864379515083937e-06, + "loss": 30.3359, + "step": 8548 + }, + { + "epoch": 0.08092501964199506, + "grad_norm": 347.1914978027344, + "learning_rate": 1.986432918947307e-06, + "loss": 28.8281, + "step": 8549 + }, + { + "epoch": 0.08093448566371012, + "grad_norm": 306.2086181640625, + "learning_rate": 1.9864278854590385e-06, + "loss": 24.7422, + "step": 8550 + }, + { + "epoch": 0.08094395168542516, + "grad_norm": 379.7132568359375, + "learning_rate": 1.986422851043591e-06, + "loss": 34.5, + "step": 8551 + }, + { + "epoch": 0.08095341770714022, + "grad_norm": 534.699462890625, + "learning_rate": 1.9864178157009707e-06, + "loss": 38.4219, + "step": 8552 + }, + { + "epoch": 0.08096288372885527, + "grad_norm": 257.14288330078125, + "learning_rate": 1.9864127794311817e-06, + "loss": 19.6641, + "step": 8553 + }, + { + "epoch": 0.08097234975057033, + "grad_norm": 503.1828918457031, + "learning_rate": 1.986407742234229e-06, + "loss": 26.3281, + "step": 8554 + }, + { + "epoch": 0.08098181577228539, + "grad_norm": 551.9478149414062, + "learning_rate": 1.986402704110117e-06, + "loss": 56.6562, + "step": 8555 + }, + { + "epoch": 0.08099128179400043, + "grad_norm": 372.1324462890625, + "learning_rate": 1.9863976650588506e-06, + "loss": 22.9766, + "step": 8556 + }, + { + "epoch": 0.0810007478157155, + "grad_norm": 782.4595336914062, + "learning_rate": 1.9863926250804347e-06, + "loss": 29.1641, + "step": 8557 + }, + { + "epoch": 0.08101021383743054, + "grad_norm": 438.630615234375, + "learning_rate": 1.9863875841748738e-06, + "loss": 25.2578, + "step": 8558 + }, + { + "epoch": 0.0810196798591456, + "grad_norm": 421.8316345214844, + "learning_rate": 1.986382542342173e-06, + "loss": 44.25, + "step": 8559 + }, + { + "epoch": 0.08102914588086065, + "grad_norm": 730.1160278320312, + "learning_rate": 1.9863774995823364e-06, + "loss": 67.4375, + "step": 8560 + }, + { + "epoch": 0.0810386119025757, + "grad_norm": 629.3380737304688, + "learning_rate": 1.9863724558953696e-06, + "loss": 33.9844, + "step": 8561 + }, + { + "epoch": 0.08104807792429077, + "grad_norm": 374.15399169921875, + "learning_rate": 1.9863674112812766e-06, + "loss": 17.3594, + "step": 8562 + }, + { + "epoch": 0.08105754394600581, + "grad_norm": 239.8258056640625, + "learning_rate": 1.9863623657400623e-06, + "loss": 20.4922, + "step": 8563 + }, + { + "epoch": 0.08106700996772087, + "grad_norm": 427.66839599609375, + "learning_rate": 1.9863573192717316e-06, + "loss": 20.6953, + "step": 8564 + }, + { + "epoch": 0.08107647598943592, + "grad_norm": 769.4589233398438, + "learning_rate": 1.986352271876289e-06, + "loss": 47.4219, + "step": 8565 + }, + { + "epoch": 0.08108594201115098, + "grad_norm": 469.4601745605469, + "learning_rate": 1.9863472235537396e-06, + "loss": 38.5781, + "step": 8566 + }, + { + "epoch": 0.08109540803286602, + "grad_norm": 257.622802734375, + "learning_rate": 1.986342174304088e-06, + "loss": 19.6328, + "step": 8567 + }, + { + "epoch": 0.08110487405458108, + "grad_norm": 480.2428894042969, + "learning_rate": 1.986337124127339e-06, + "loss": 34.625, + "step": 8568 + }, + { + "epoch": 0.08111434007629613, + "grad_norm": 1131.1986083984375, + "learning_rate": 1.9863320730234973e-06, + "loss": 50.9375, + "step": 8569 + }, + { + "epoch": 0.08112380609801119, + "grad_norm": 402.59967041015625, + "learning_rate": 1.9863270209925673e-06, + "loss": 35.2109, + "step": 8570 + }, + { + "epoch": 0.08113327211972625, + "grad_norm": 308.095703125, + "learning_rate": 1.9863219680345543e-06, + "loss": 21.9609, + "step": 8571 + }, + { + "epoch": 0.0811427381414413, + "grad_norm": 385.0146179199219, + "learning_rate": 1.986316914149463e-06, + "loss": 32.4688, + "step": 8572 + }, + { + "epoch": 0.08115220416315636, + "grad_norm": 845.9793701171875, + "learning_rate": 1.9863118593372977e-06, + "loss": 44.9688, + "step": 8573 + }, + { + "epoch": 0.0811616701848714, + "grad_norm": 289.05633544921875, + "learning_rate": 1.9863068035980634e-06, + "loss": 23.7422, + "step": 8574 + }, + { + "epoch": 0.08117113620658646, + "grad_norm": 823.8607788085938, + "learning_rate": 1.9863017469317647e-06, + "loss": 16.5703, + "step": 8575 + }, + { + "epoch": 0.08118060222830151, + "grad_norm": 338.7046813964844, + "learning_rate": 1.986296689338407e-06, + "loss": 21.8281, + "step": 8576 + }, + { + "epoch": 0.08119006825001657, + "grad_norm": 409.7878112792969, + "learning_rate": 1.9862916308179944e-06, + "loss": 39.7656, + "step": 8577 + }, + { + "epoch": 0.08119953427173161, + "grad_norm": 719.274169921875, + "learning_rate": 1.9862865713705316e-06, + "loss": 45.125, + "step": 8578 + }, + { + "epoch": 0.08120900029344667, + "grad_norm": 945.4529418945312, + "learning_rate": 1.986281510996024e-06, + "loss": 34.5, + "step": 8579 + }, + { + "epoch": 0.08121846631516173, + "grad_norm": 248.1065673828125, + "learning_rate": 1.9862764496944755e-06, + "loss": 18.2188, + "step": 8580 + }, + { + "epoch": 0.08122793233687678, + "grad_norm": 892.1857299804688, + "learning_rate": 1.9862713874658914e-06, + "loss": 41.0781, + "step": 8581 + }, + { + "epoch": 0.08123739835859184, + "grad_norm": 700.2037963867188, + "learning_rate": 1.9862663243102764e-06, + "loss": 24.7031, + "step": 8582 + }, + { + "epoch": 0.08124686438030689, + "grad_norm": 197.10028076171875, + "learning_rate": 1.9862612602276355e-06, + "loss": 18.3516, + "step": 8583 + }, + { + "epoch": 0.08125633040202195, + "grad_norm": 565.071044921875, + "learning_rate": 1.986256195217973e-06, + "loss": 50.4062, + "step": 8584 + }, + { + "epoch": 0.08126579642373699, + "grad_norm": 482.6943054199219, + "learning_rate": 1.9862511292812937e-06, + "loss": 48.6562, + "step": 8585 + }, + { + "epoch": 0.08127526244545205, + "grad_norm": 374.1911315917969, + "learning_rate": 1.9862460624176026e-06, + "loss": 29.2188, + "step": 8586 + }, + { + "epoch": 0.0812847284671671, + "grad_norm": 323.5276184082031, + "learning_rate": 1.9862409946269043e-06, + "loss": 20.7812, + "step": 8587 + }, + { + "epoch": 0.08129419448888216, + "grad_norm": 363.32513427734375, + "learning_rate": 1.986235925909204e-06, + "loss": 38.8125, + "step": 8588 + }, + { + "epoch": 0.08130366051059722, + "grad_norm": 264.4052429199219, + "learning_rate": 1.9862308562645054e-06, + "loss": 25.8125, + "step": 8589 + }, + { + "epoch": 0.08131312653231226, + "grad_norm": 400.31927490234375, + "learning_rate": 1.986225785692814e-06, + "loss": 36.0, + "step": 8590 + }, + { + "epoch": 0.08132259255402732, + "grad_norm": 271.249755859375, + "learning_rate": 1.986220714194135e-06, + "loss": 24.3125, + "step": 8591 + }, + { + "epoch": 0.08133205857574237, + "grad_norm": 432.8563232421875, + "learning_rate": 1.9862156417684724e-06, + "loss": 31.1953, + "step": 8592 + }, + { + "epoch": 0.08134152459745743, + "grad_norm": 529.2146606445312, + "learning_rate": 1.9862105684158313e-06, + "loss": 40.9844, + "step": 8593 + }, + { + "epoch": 0.08135099061917248, + "grad_norm": 450.27862548828125, + "learning_rate": 1.986205494136217e-06, + "loss": 31.3672, + "step": 8594 + }, + { + "epoch": 0.08136045664088754, + "grad_norm": 208.42037963867188, + "learning_rate": 1.986200418929633e-06, + "loss": 16.8516, + "step": 8595 + }, + { + "epoch": 0.08136992266260258, + "grad_norm": 549.4236450195312, + "learning_rate": 1.9861953427960847e-06, + "loss": 56.8125, + "step": 8596 + }, + { + "epoch": 0.08137938868431764, + "grad_norm": 697.4417114257812, + "learning_rate": 1.9861902657355773e-06, + "loss": 66.9141, + "step": 8597 + }, + { + "epoch": 0.0813888547060327, + "grad_norm": 2.8923346996307373, + "learning_rate": 1.986185187748115e-06, + "loss": 0.8467, + "step": 8598 + }, + { + "epoch": 0.08139832072774775, + "grad_norm": 525.2405395507812, + "learning_rate": 1.9861801088337027e-06, + "loss": 19.8984, + "step": 8599 + }, + { + "epoch": 0.08140778674946281, + "grad_norm": 593.7041625976562, + "learning_rate": 1.9861750289923455e-06, + "loss": 51.9844, + "step": 8600 + }, + { + "epoch": 0.08141725277117785, + "grad_norm": 176.997314453125, + "learning_rate": 1.986169948224048e-06, + "loss": 19.3359, + "step": 8601 + }, + { + "epoch": 0.08142671879289291, + "grad_norm": 541.769775390625, + "learning_rate": 1.9861648665288145e-06, + "loss": 30.75, + "step": 8602 + }, + { + "epoch": 0.08143618481460796, + "grad_norm": 555.3895874023438, + "learning_rate": 1.9861597839066506e-06, + "loss": 60.5781, + "step": 8603 + }, + { + "epoch": 0.08144565083632302, + "grad_norm": 431.91522216796875, + "learning_rate": 1.9861547003575603e-06, + "loss": 37.4531, + "step": 8604 + }, + { + "epoch": 0.08145511685803808, + "grad_norm": 564.4188842773438, + "learning_rate": 1.986149615881549e-06, + "loss": 35.3359, + "step": 8605 + }, + { + "epoch": 0.08146458287975313, + "grad_norm": 436.56304931640625, + "learning_rate": 1.9861445304786214e-06, + "loss": 47.0781, + "step": 8606 + }, + { + "epoch": 0.08147404890146819, + "grad_norm": 587.87158203125, + "learning_rate": 1.9861394441487816e-06, + "loss": 54.2891, + "step": 8607 + }, + { + "epoch": 0.08148351492318323, + "grad_norm": 392.10302734375, + "learning_rate": 1.9861343568920354e-06, + "loss": 13.1953, + "step": 8608 + }, + { + "epoch": 0.08149298094489829, + "grad_norm": 367.22003173828125, + "learning_rate": 1.9861292687083866e-06, + "loss": 11.7383, + "step": 8609 + }, + { + "epoch": 0.08150244696661334, + "grad_norm": 482.1783447265625, + "learning_rate": 1.98612417959784e-06, + "loss": 45.1797, + "step": 8610 + }, + { + "epoch": 0.0815119129883284, + "grad_norm": 294.2837829589844, + "learning_rate": 1.9861190895604017e-06, + "loss": 34.6719, + "step": 8611 + }, + { + "epoch": 0.08152137901004344, + "grad_norm": 382.6028137207031, + "learning_rate": 1.9861139985960754e-06, + "loss": 29.7969, + "step": 8612 + }, + { + "epoch": 0.0815308450317585, + "grad_norm": 528.97265625, + "learning_rate": 1.986108906704866e-06, + "loss": 42.1875, + "step": 8613 + }, + { + "epoch": 0.08154031105347356, + "grad_norm": 596.6533813476562, + "learning_rate": 1.9861038138867784e-06, + "loss": 65.5938, + "step": 8614 + }, + { + "epoch": 0.08154977707518861, + "grad_norm": 397.8375244140625, + "learning_rate": 1.9860987201418174e-06, + "loss": 63.0781, + "step": 8615 + }, + { + "epoch": 0.08155924309690367, + "grad_norm": 283.2540588378906, + "learning_rate": 1.9860936254699877e-06, + "loss": 19.4219, + "step": 8616 + }, + { + "epoch": 0.08156870911861872, + "grad_norm": 421.1691589355469, + "learning_rate": 1.9860885298712942e-06, + "loss": 51.2812, + "step": 8617 + }, + { + "epoch": 0.08157817514033378, + "grad_norm": 684.3056640625, + "learning_rate": 1.9860834333457418e-06, + "loss": 15.6992, + "step": 8618 + }, + { + "epoch": 0.08158764116204882, + "grad_norm": 564.8286743164062, + "learning_rate": 1.9860783358933345e-06, + "loss": 58.5625, + "step": 8619 + }, + { + "epoch": 0.08159710718376388, + "grad_norm": 467.4150085449219, + "learning_rate": 1.9860732375140784e-06, + "loss": 38.0625, + "step": 8620 + }, + { + "epoch": 0.08160657320547893, + "grad_norm": 553.298828125, + "learning_rate": 1.9860681382079773e-06, + "loss": 43.4844, + "step": 8621 + }, + { + "epoch": 0.08161603922719399, + "grad_norm": 673.1853637695312, + "learning_rate": 1.986063037975036e-06, + "loss": 57.5625, + "step": 8622 + }, + { + "epoch": 0.08162550524890905, + "grad_norm": 520.4962768554688, + "learning_rate": 1.98605793681526e-06, + "loss": 49.25, + "step": 8623 + }, + { + "epoch": 0.0816349712706241, + "grad_norm": 317.63165283203125, + "learning_rate": 1.9860528347286535e-06, + "loss": 19.3594, + "step": 8624 + }, + { + "epoch": 0.08164443729233915, + "grad_norm": 811.7960205078125, + "learning_rate": 1.9860477317152217e-06, + "loss": 28.7109, + "step": 8625 + }, + { + "epoch": 0.0816539033140542, + "grad_norm": 592.7666625976562, + "learning_rate": 1.986042627774969e-06, + "loss": 36.5469, + "step": 8626 + }, + { + "epoch": 0.08166336933576926, + "grad_norm": 589.6677856445312, + "learning_rate": 1.9860375229079004e-06, + "loss": 13.7734, + "step": 8627 + }, + { + "epoch": 0.0816728353574843, + "grad_norm": 303.36669921875, + "learning_rate": 1.9860324171140207e-06, + "loss": 35.1562, + "step": 8628 + }, + { + "epoch": 0.08168230137919937, + "grad_norm": 219.7354736328125, + "learning_rate": 1.986027310393335e-06, + "loss": 16.7734, + "step": 8629 + }, + { + "epoch": 0.08169176740091441, + "grad_norm": 966.5437622070312, + "learning_rate": 1.9860222027458472e-06, + "loss": 44.1094, + "step": 8630 + }, + { + "epoch": 0.08170123342262947, + "grad_norm": 215.709228515625, + "learning_rate": 1.9860170941715632e-06, + "loss": 18.0, + "step": 8631 + }, + { + "epoch": 0.08171069944434453, + "grad_norm": 399.2390441894531, + "learning_rate": 1.9860119846704867e-06, + "loss": 23.5078, + "step": 8632 + }, + { + "epoch": 0.08172016546605958, + "grad_norm": 168.9862518310547, + "learning_rate": 1.9860068742426236e-06, + "loss": 17.5859, + "step": 8633 + }, + { + "epoch": 0.08172963148777464, + "grad_norm": 366.90325927734375, + "learning_rate": 1.986001762887978e-06, + "loss": 44.0156, + "step": 8634 + }, + { + "epoch": 0.08173909750948968, + "grad_norm": 853.6576538085938, + "learning_rate": 1.985996650606555e-06, + "loss": 52.4375, + "step": 8635 + }, + { + "epoch": 0.08174856353120474, + "grad_norm": 310.4335021972656, + "learning_rate": 1.985991537398359e-06, + "loss": 23.9375, + "step": 8636 + }, + { + "epoch": 0.08175802955291979, + "grad_norm": 383.2647399902344, + "learning_rate": 1.9859864232633956e-06, + "loss": 27.4844, + "step": 8637 + }, + { + "epoch": 0.08176749557463485, + "grad_norm": 246.62628173828125, + "learning_rate": 1.9859813082016685e-06, + "loss": 25.5312, + "step": 8638 + }, + { + "epoch": 0.0817769615963499, + "grad_norm": 155.03610229492188, + "learning_rate": 1.985976192213183e-06, + "loss": 17.6406, + "step": 8639 + }, + { + "epoch": 0.08178642761806496, + "grad_norm": 252.03302001953125, + "learning_rate": 1.9859710752979446e-06, + "loss": 26.3125, + "step": 8640 + }, + { + "epoch": 0.08179589363978002, + "grad_norm": 520.2264404296875, + "learning_rate": 1.985965957455957e-06, + "loss": 63.4219, + "step": 8641 + }, + { + "epoch": 0.08180535966149506, + "grad_norm": 226.48153686523438, + "learning_rate": 1.9859608386872264e-06, + "loss": 24.1406, + "step": 8642 + }, + { + "epoch": 0.08181482568321012, + "grad_norm": 286.6160888671875, + "learning_rate": 1.985955718991756e-06, + "loss": 20.6484, + "step": 8643 + }, + { + "epoch": 0.08182429170492517, + "grad_norm": 271.9776916503906, + "learning_rate": 1.9859505983695514e-06, + "loss": 21.6562, + "step": 8644 + }, + { + "epoch": 0.08183375772664023, + "grad_norm": 243.08273315429688, + "learning_rate": 1.9859454768206175e-06, + "loss": 10.6992, + "step": 8645 + }, + { + "epoch": 0.08184322374835527, + "grad_norm": 322.5129699707031, + "learning_rate": 1.9859403543449592e-06, + "loss": 33.2031, + "step": 8646 + }, + { + "epoch": 0.08185268977007033, + "grad_norm": 450.21771240234375, + "learning_rate": 1.985935230942581e-06, + "loss": 26.4219, + "step": 8647 + }, + { + "epoch": 0.0818621557917854, + "grad_norm": 653.526611328125, + "learning_rate": 1.985930106613488e-06, + "loss": 26.7969, + "step": 8648 + }, + { + "epoch": 0.08187162181350044, + "grad_norm": 2.959918737411499, + "learning_rate": 1.9859249813576844e-06, + "loss": 0.9072, + "step": 8649 + }, + { + "epoch": 0.0818810878352155, + "grad_norm": 173.8944549560547, + "learning_rate": 1.9859198551751758e-06, + "loss": 18.0938, + "step": 8650 + }, + { + "epoch": 0.08189055385693055, + "grad_norm": 206.4402313232422, + "learning_rate": 1.9859147280659665e-06, + "loss": 23.5, + "step": 8651 + }, + { + "epoch": 0.0819000198786456, + "grad_norm": 289.3849792480469, + "learning_rate": 1.9859096000300616e-06, + "loss": 21.375, + "step": 8652 + }, + { + "epoch": 0.08190948590036065, + "grad_norm": 869.9110717773438, + "learning_rate": 1.9859044710674655e-06, + "loss": 71.2188, + "step": 8653 + }, + { + "epoch": 0.08191895192207571, + "grad_norm": 365.70062255859375, + "learning_rate": 1.9858993411781835e-06, + "loss": 26.4688, + "step": 8654 + }, + { + "epoch": 0.08192841794379076, + "grad_norm": 186.9750213623047, + "learning_rate": 1.9858942103622204e-06, + "loss": 22.2188, + "step": 8655 + }, + { + "epoch": 0.08193788396550582, + "grad_norm": 755.2274780273438, + "learning_rate": 1.985889078619581e-06, + "loss": 21.9219, + "step": 8656 + }, + { + "epoch": 0.08194734998722088, + "grad_norm": 277.4668273925781, + "learning_rate": 1.9858839459502698e-06, + "loss": 28.6562, + "step": 8657 + }, + { + "epoch": 0.08195681600893592, + "grad_norm": 477.6401672363281, + "learning_rate": 1.985878812354292e-06, + "loss": 13.2344, + "step": 8658 + }, + { + "epoch": 0.08196628203065098, + "grad_norm": 465.13360595703125, + "learning_rate": 1.9858736778316517e-06, + "loss": 20.1172, + "step": 8659 + }, + { + "epoch": 0.08197574805236603, + "grad_norm": 260.86407470703125, + "learning_rate": 1.985868542382355e-06, + "loss": 9.3984, + "step": 8660 + }, + { + "epoch": 0.08198521407408109, + "grad_norm": 822.0299072265625, + "learning_rate": 1.9858634060064056e-06, + "loss": 52.9688, + "step": 8661 + }, + { + "epoch": 0.08199468009579614, + "grad_norm": 1221.094482421875, + "learning_rate": 1.9858582687038087e-06, + "loss": 30.7422, + "step": 8662 + }, + { + "epoch": 0.0820041461175112, + "grad_norm": 278.4847717285156, + "learning_rate": 1.985853130474569e-06, + "loss": 24.3281, + "step": 8663 + }, + { + "epoch": 0.08201361213922624, + "grad_norm": 351.7635192871094, + "learning_rate": 1.985847991318692e-06, + "loss": 14.4062, + "step": 8664 + }, + { + "epoch": 0.0820230781609413, + "grad_norm": 453.0286560058594, + "learning_rate": 1.985842851236182e-06, + "loss": 50.375, + "step": 8665 + }, + { + "epoch": 0.08203254418265636, + "grad_norm": 3.2739827632904053, + "learning_rate": 1.9858377102270437e-06, + "loss": 0.9756, + "step": 8666 + }, + { + "epoch": 0.08204201020437141, + "grad_norm": 2.6491310596466064, + "learning_rate": 1.985832568291282e-06, + "loss": 0.7944, + "step": 8667 + }, + { + "epoch": 0.08205147622608647, + "grad_norm": 399.88348388671875, + "learning_rate": 1.985827425428902e-06, + "loss": 35.3594, + "step": 8668 + }, + { + "epoch": 0.08206094224780151, + "grad_norm": 414.0464172363281, + "learning_rate": 1.985822281639908e-06, + "loss": 37.7344, + "step": 8669 + }, + { + "epoch": 0.08207040826951657, + "grad_norm": 390.7677001953125, + "learning_rate": 1.9858171369243057e-06, + "loss": 49.5312, + "step": 8670 + }, + { + "epoch": 0.08207987429123162, + "grad_norm": 521.9815673828125, + "learning_rate": 1.985811991282099e-06, + "loss": 29.4766, + "step": 8671 + }, + { + "epoch": 0.08208934031294668, + "grad_norm": 277.93011474609375, + "learning_rate": 1.985806844713293e-06, + "loss": 23.3438, + "step": 8672 + }, + { + "epoch": 0.08209880633466173, + "grad_norm": 284.7210693359375, + "learning_rate": 1.985801697217893e-06, + "loss": 31.5859, + "step": 8673 + }, + { + "epoch": 0.08210827235637679, + "grad_norm": 511.8755798339844, + "learning_rate": 1.9857965487959034e-06, + "loss": 23.9609, + "step": 8674 + }, + { + "epoch": 0.08211773837809185, + "grad_norm": 325.8238220214844, + "learning_rate": 1.9857913994473295e-06, + "loss": 39.1406, + "step": 8675 + }, + { + "epoch": 0.08212720439980689, + "grad_norm": 266.0287170410156, + "learning_rate": 1.9857862491721756e-06, + "loss": 19.3906, + "step": 8676 + }, + { + "epoch": 0.08213667042152195, + "grad_norm": 450.7208251953125, + "learning_rate": 1.9857810979704465e-06, + "loss": 12.3164, + "step": 8677 + }, + { + "epoch": 0.082146136443237, + "grad_norm": 194.34597778320312, + "learning_rate": 1.9857759458421477e-06, + "loss": 23.0625, + "step": 8678 + }, + { + "epoch": 0.08215560246495206, + "grad_norm": 349.4842529296875, + "learning_rate": 1.9857707927872833e-06, + "loss": 36.1406, + "step": 8679 + }, + { + "epoch": 0.0821650684866671, + "grad_norm": 469.8100280761719, + "learning_rate": 1.985765638805859e-06, + "loss": 12.0234, + "step": 8680 + }, + { + "epoch": 0.08217453450838216, + "grad_norm": 287.5538330078125, + "learning_rate": 1.9857604838978787e-06, + "loss": 23.4141, + "step": 8681 + }, + { + "epoch": 0.08218400053009721, + "grad_norm": 199.02735900878906, + "learning_rate": 1.9857553280633477e-06, + "loss": 20.8203, + "step": 8682 + }, + { + "epoch": 0.08219346655181227, + "grad_norm": 472.733154296875, + "learning_rate": 1.985750171302271e-06, + "loss": 52.7578, + "step": 8683 + }, + { + "epoch": 0.08220293257352733, + "grad_norm": 368.31414794921875, + "learning_rate": 1.985745013614653e-06, + "loss": 24.0312, + "step": 8684 + }, + { + "epoch": 0.08221239859524238, + "grad_norm": 389.3880615234375, + "learning_rate": 1.985739855000499e-06, + "loss": 41.2969, + "step": 8685 + }, + { + "epoch": 0.08222186461695744, + "grad_norm": 562.7379760742188, + "learning_rate": 1.9857346954598136e-06, + "loss": 54.8828, + "step": 8686 + }, + { + "epoch": 0.08223133063867248, + "grad_norm": 244.9026641845703, + "learning_rate": 1.9857295349926016e-06, + "loss": 12.1133, + "step": 8687 + }, + { + "epoch": 0.08224079666038754, + "grad_norm": 155.0160369873047, + "learning_rate": 1.9857243735988685e-06, + "loss": 18.3516, + "step": 8688 + }, + { + "epoch": 0.08225026268210259, + "grad_norm": 531.6895141601562, + "learning_rate": 1.985719211278618e-06, + "loss": 45.8594, + "step": 8689 + }, + { + "epoch": 0.08225972870381765, + "grad_norm": 386.4433288574219, + "learning_rate": 1.985714048031856e-06, + "loss": 23.5547, + "step": 8690 + }, + { + "epoch": 0.08226919472553271, + "grad_norm": 166.97007751464844, + "learning_rate": 1.9857088838585865e-06, + "loss": 15.4531, + "step": 8691 + }, + { + "epoch": 0.08227866074724775, + "grad_norm": 541.2481689453125, + "learning_rate": 1.985703718758815e-06, + "loss": 53.3438, + "step": 8692 + }, + { + "epoch": 0.08228812676896281, + "grad_norm": 269.0350646972656, + "learning_rate": 1.985698552732546e-06, + "loss": 19.0547, + "step": 8693 + }, + { + "epoch": 0.08229759279067786, + "grad_norm": 317.9525146484375, + "learning_rate": 1.985693385779785e-06, + "loss": 24.5078, + "step": 8694 + }, + { + "epoch": 0.08230705881239292, + "grad_norm": 336.6326599121094, + "learning_rate": 1.9856882179005356e-06, + "loss": 26.0938, + "step": 8695 + }, + { + "epoch": 0.08231652483410797, + "grad_norm": 298.5812683105469, + "learning_rate": 1.985683049094804e-06, + "loss": 23.9531, + "step": 8696 + }, + { + "epoch": 0.08232599085582303, + "grad_norm": 1261.5892333984375, + "learning_rate": 1.9856778793625943e-06, + "loss": 34.9062, + "step": 8697 + }, + { + "epoch": 0.08233545687753807, + "grad_norm": 941.9432983398438, + "learning_rate": 1.985672708703911e-06, + "loss": 42.8906, + "step": 8698 + }, + { + "epoch": 0.08234492289925313, + "grad_norm": 650.5338134765625, + "learning_rate": 1.9856675371187597e-06, + "loss": 18.1094, + "step": 8699 + }, + { + "epoch": 0.08235438892096819, + "grad_norm": 216.5482177734375, + "learning_rate": 1.9856623646071453e-06, + "loss": 23.3047, + "step": 8700 + }, + { + "epoch": 0.08236385494268324, + "grad_norm": 482.5940246582031, + "learning_rate": 1.9856571911690726e-06, + "loss": 23.4219, + "step": 8701 + }, + { + "epoch": 0.0823733209643983, + "grad_norm": 257.6396789550781, + "learning_rate": 1.9856520168045457e-06, + "loss": 25.8672, + "step": 8702 + }, + { + "epoch": 0.08238278698611334, + "grad_norm": 335.8690185546875, + "learning_rate": 1.9856468415135706e-06, + "loss": 30.1641, + "step": 8703 + }, + { + "epoch": 0.0823922530078284, + "grad_norm": 175.90028381347656, + "learning_rate": 1.985641665296151e-06, + "loss": 23.2461, + "step": 8704 + }, + { + "epoch": 0.08240171902954345, + "grad_norm": 219.3831329345703, + "learning_rate": 1.985636488152293e-06, + "loss": 16.7891, + "step": 8705 + }, + { + "epoch": 0.08241118505125851, + "grad_norm": 3.5476040840148926, + "learning_rate": 1.9856313100820004e-06, + "loss": 0.927, + "step": 8706 + }, + { + "epoch": 0.08242065107297356, + "grad_norm": 279.5444030761719, + "learning_rate": 1.9856261310852786e-06, + "loss": 37.4844, + "step": 8707 + }, + { + "epoch": 0.08243011709468862, + "grad_norm": 315.3930969238281, + "learning_rate": 1.9856209511621322e-06, + "loss": 16.2812, + "step": 8708 + }, + { + "epoch": 0.08243958311640368, + "grad_norm": 453.61431884765625, + "learning_rate": 1.985615770312566e-06, + "loss": 52.8594, + "step": 8709 + }, + { + "epoch": 0.08244904913811872, + "grad_norm": 598.2427978515625, + "learning_rate": 1.985610588536586e-06, + "loss": 42.4297, + "step": 8710 + }, + { + "epoch": 0.08245851515983378, + "grad_norm": 730.796142578125, + "learning_rate": 1.9856054058341954e-06, + "loss": 38.0586, + "step": 8711 + }, + { + "epoch": 0.08246798118154883, + "grad_norm": 565.1240234375, + "learning_rate": 1.9856002222054e-06, + "loss": 33.1172, + "step": 8712 + }, + { + "epoch": 0.08247744720326389, + "grad_norm": 304.0792541503906, + "learning_rate": 1.9855950376502047e-06, + "loss": 29.9062, + "step": 8713 + }, + { + "epoch": 0.08248691322497893, + "grad_norm": 167.6880340576172, + "learning_rate": 1.985589852168614e-06, + "loss": 16.5781, + "step": 8714 + }, + { + "epoch": 0.082496379246694, + "grad_norm": 563.47216796875, + "learning_rate": 1.985584665760633e-06, + "loss": 31.0781, + "step": 8715 + }, + { + "epoch": 0.08250584526840904, + "grad_norm": 3.488931655883789, + "learning_rate": 1.9855794784262663e-06, + "loss": 1.0991, + "step": 8716 + }, + { + "epoch": 0.0825153112901241, + "grad_norm": 410.6516418457031, + "learning_rate": 1.9855742901655194e-06, + "loss": 49.7266, + "step": 8717 + }, + { + "epoch": 0.08252477731183916, + "grad_norm": 304.539794921875, + "learning_rate": 1.9855691009783966e-06, + "loss": 18.7383, + "step": 8718 + }, + { + "epoch": 0.0825342433335542, + "grad_norm": 240.3153839111328, + "learning_rate": 1.985563910864903e-06, + "loss": 22.375, + "step": 8719 + }, + { + "epoch": 0.08254370935526927, + "grad_norm": 364.3797302246094, + "learning_rate": 1.985558719825043e-06, + "loss": 19.8125, + "step": 8720 + }, + { + "epoch": 0.08255317537698431, + "grad_norm": 422.76177978515625, + "learning_rate": 1.9855535278588226e-06, + "loss": 40.3594, + "step": 8721 + }, + { + "epoch": 0.08256264139869937, + "grad_norm": 254.8544921875, + "learning_rate": 1.9855483349662457e-06, + "loss": 18.9375, + "step": 8722 + }, + { + "epoch": 0.08257210742041442, + "grad_norm": 514.790283203125, + "learning_rate": 1.9855431411473177e-06, + "loss": 32.6562, + "step": 8723 + }, + { + "epoch": 0.08258157344212948, + "grad_norm": 259.16943359375, + "learning_rate": 1.9855379464020426e-06, + "loss": 27.9922, + "step": 8724 + }, + { + "epoch": 0.08259103946384452, + "grad_norm": 739.2986450195312, + "learning_rate": 1.985532750730427e-06, + "loss": 31.6016, + "step": 8725 + }, + { + "epoch": 0.08260050548555958, + "grad_norm": 316.5402526855469, + "learning_rate": 1.985527554132474e-06, + "loss": 30.0781, + "step": 8726 + }, + { + "epoch": 0.08260997150727464, + "grad_norm": 1177.6724853515625, + "learning_rate": 1.9855223566081892e-06, + "loss": 55.9062, + "step": 8727 + }, + { + "epoch": 0.08261943752898969, + "grad_norm": 723.8480834960938, + "learning_rate": 1.9855171581575774e-06, + "loss": 51.8281, + "step": 8728 + }, + { + "epoch": 0.08262890355070475, + "grad_norm": 438.4127197265625, + "learning_rate": 1.985511958780644e-06, + "loss": 37.3906, + "step": 8729 + }, + { + "epoch": 0.0826383695724198, + "grad_norm": 460.58709716796875, + "learning_rate": 1.9855067584773933e-06, + "loss": 14.0859, + "step": 8730 + }, + { + "epoch": 0.08264783559413486, + "grad_norm": 333.8709716796875, + "learning_rate": 1.9855015572478303e-06, + "loss": 16.8438, + "step": 8731 + }, + { + "epoch": 0.0826573016158499, + "grad_norm": 293.8708801269531, + "learning_rate": 1.98549635509196e-06, + "loss": 31.4492, + "step": 8732 + }, + { + "epoch": 0.08266676763756496, + "grad_norm": 454.6141662597656, + "learning_rate": 1.9854911520097874e-06, + "loss": 26.2031, + "step": 8733 + }, + { + "epoch": 0.08267623365928002, + "grad_norm": 378.40753173828125, + "learning_rate": 1.9854859480013173e-06, + "loss": 28.9219, + "step": 8734 + }, + { + "epoch": 0.08268569968099507, + "grad_norm": 620.6176147460938, + "learning_rate": 1.985480743066554e-06, + "loss": 42.7852, + "step": 8735 + }, + { + "epoch": 0.08269516570271013, + "grad_norm": 226.80517578125, + "learning_rate": 1.9854755372055033e-06, + "loss": 21.2109, + "step": 8736 + }, + { + "epoch": 0.08270463172442517, + "grad_norm": 231.06625366210938, + "learning_rate": 1.9854703304181696e-06, + "loss": 9.6914, + "step": 8737 + }, + { + "epoch": 0.08271409774614023, + "grad_norm": 242.5263671875, + "learning_rate": 1.985465122704558e-06, + "loss": 23.1562, + "step": 8738 + }, + { + "epoch": 0.08272356376785528, + "grad_norm": 228.9398956298828, + "learning_rate": 1.985459914064673e-06, + "loss": 33.3125, + "step": 8739 + }, + { + "epoch": 0.08273302978957034, + "grad_norm": 3.227261543273926, + "learning_rate": 1.98545470449852e-06, + "loss": 0.9126, + "step": 8740 + }, + { + "epoch": 0.08274249581128539, + "grad_norm": 499.432861328125, + "learning_rate": 1.9854494940061036e-06, + "loss": 46.9531, + "step": 8741 + }, + { + "epoch": 0.08275196183300045, + "grad_norm": 389.5716552734375, + "learning_rate": 1.9854442825874288e-06, + "loss": 41.25, + "step": 8742 + }, + { + "epoch": 0.0827614278547155, + "grad_norm": 409.2669372558594, + "learning_rate": 1.9854390702425006e-06, + "loss": 33.3984, + "step": 8743 + }, + { + "epoch": 0.08277089387643055, + "grad_norm": 229.82664489746094, + "learning_rate": 1.9854338569713237e-06, + "loss": 16.5469, + "step": 8744 + }, + { + "epoch": 0.08278035989814561, + "grad_norm": 403.3621520996094, + "learning_rate": 1.985428642773903e-06, + "loss": 22.7344, + "step": 8745 + }, + { + "epoch": 0.08278982591986066, + "grad_norm": 266.61468505859375, + "learning_rate": 1.9854234276502435e-06, + "loss": 25.4922, + "step": 8746 + }, + { + "epoch": 0.08279929194157572, + "grad_norm": 233.8787841796875, + "learning_rate": 1.98541821160035e-06, + "loss": 9.7852, + "step": 8747 + }, + { + "epoch": 0.08280875796329076, + "grad_norm": 260.4866027832031, + "learning_rate": 1.985412994624228e-06, + "loss": 20.6406, + "step": 8748 + }, + { + "epoch": 0.08281822398500582, + "grad_norm": 237.79815673828125, + "learning_rate": 1.9854077767218816e-06, + "loss": 18.9297, + "step": 8749 + }, + { + "epoch": 0.08282769000672087, + "grad_norm": 578.6331176757812, + "learning_rate": 1.9854025578933156e-06, + "loss": 24.0312, + "step": 8750 + }, + { + "epoch": 0.08283715602843593, + "grad_norm": 426.75726318359375, + "learning_rate": 1.985397338138536e-06, + "loss": 27.7734, + "step": 8751 + }, + { + "epoch": 0.08284662205015099, + "grad_norm": 245.75962829589844, + "learning_rate": 1.9853921174575466e-06, + "loss": 27.3125, + "step": 8752 + }, + { + "epoch": 0.08285608807186604, + "grad_norm": 271.35333251953125, + "learning_rate": 1.9853868958503526e-06, + "loss": 21.1172, + "step": 8753 + }, + { + "epoch": 0.0828655540935811, + "grad_norm": 521.1118774414062, + "learning_rate": 1.9853816733169593e-06, + "loss": 13.1523, + "step": 8754 + }, + { + "epoch": 0.08287502011529614, + "grad_norm": 227.997802734375, + "learning_rate": 1.9853764498573715e-06, + "loss": 21.8516, + "step": 8755 + }, + { + "epoch": 0.0828844861370112, + "grad_norm": 316.5507507324219, + "learning_rate": 1.9853712254715934e-06, + "loss": 17.8516, + "step": 8756 + }, + { + "epoch": 0.08289395215872625, + "grad_norm": 448.6460876464844, + "learning_rate": 1.9853660001596306e-06, + "loss": 27.1406, + "step": 8757 + }, + { + "epoch": 0.08290341818044131, + "grad_norm": 410.67022705078125, + "learning_rate": 1.985360773921488e-06, + "loss": 25.0938, + "step": 8758 + }, + { + "epoch": 0.08291288420215635, + "grad_norm": 334.8908996582031, + "learning_rate": 1.9853555467571703e-06, + "loss": 23.0156, + "step": 8759 + }, + { + "epoch": 0.08292235022387141, + "grad_norm": 370.65576171875, + "learning_rate": 1.9853503186666823e-06, + "loss": 20.1562, + "step": 8760 + }, + { + "epoch": 0.08293181624558647, + "grad_norm": 476.39459228515625, + "learning_rate": 1.98534508965003e-06, + "loss": 26.0273, + "step": 8761 + }, + { + "epoch": 0.08294128226730152, + "grad_norm": 237.46949768066406, + "learning_rate": 1.9853398597072163e-06, + "loss": 21.1641, + "step": 8762 + }, + { + "epoch": 0.08295074828901658, + "grad_norm": 564.3192138671875, + "learning_rate": 1.9853346288382476e-06, + "loss": 47.3125, + "step": 8763 + }, + { + "epoch": 0.08296021431073163, + "grad_norm": 363.9349670410156, + "learning_rate": 1.9853293970431285e-06, + "loss": 39.9844, + "step": 8764 + }, + { + "epoch": 0.08296968033244669, + "grad_norm": 230.90232849121094, + "learning_rate": 1.985324164321864e-06, + "loss": 21.0156, + "step": 8765 + }, + { + "epoch": 0.08297914635416173, + "grad_norm": 182.3005828857422, + "learning_rate": 1.9853189306744586e-06, + "loss": 23.6484, + "step": 8766 + }, + { + "epoch": 0.08298861237587679, + "grad_norm": 279.4573669433594, + "learning_rate": 1.9853136961009176e-06, + "loss": 19.8359, + "step": 8767 + }, + { + "epoch": 0.08299807839759184, + "grad_norm": 465.7752990722656, + "learning_rate": 1.985308460601246e-06, + "loss": 38.2188, + "step": 8768 + }, + { + "epoch": 0.0830075444193069, + "grad_norm": 309.4772033691406, + "learning_rate": 1.985303224175448e-06, + "loss": 39.3125, + "step": 8769 + }, + { + "epoch": 0.08301701044102196, + "grad_norm": 524.9190673828125, + "learning_rate": 1.98529798682353e-06, + "loss": 40.9961, + "step": 8770 + }, + { + "epoch": 0.083026476462737, + "grad_norm": 1024.4036865234375, + "learning_rate": 1.9852927485454954e-06, + "loss": 55.2969, + "step": 8771 + }, + { + "epoch": 0.08303594248445206, + "grad_norm": 451.4525451660156, + "learning_rate": 1.98528750934135e-06, + "loss": 21.7422, + "step": 8772 + }, + { + "epoch": 0.08304540850616711, + "grad_norm": 361.4324951171875, + "learning_rate": 1.985282269211098e-06, + "loss": 41.9531, + "step": 8773 + }, + { + "epoch": 0.08305487452788217, + "grad_norm": 773.0471801757812, + "learning_rate": 1.9852770281547453e-06, + "loss": 43.1172, + "step": 8774 + }, + { + "epoch": 0.08306434054959722, + "grad_norm": 297.3982238769531, + "learning_rate": 1.985271786172296e-06, + "loss": 22.0, + "step": 8775 + }, + { + "epoch": 0.08307380657131228, + "grad_norm": 416.8920593261719, + "learning_rate": 1.9852665432637556e-06, + "loss": 31.6406, + "step": 8776 + }, + { + "epoch": 0.08308327259302734, + "grad_norm": 447.1200866699219, + "learning_rate": 1.985261299429128e-06, + "loss": 48.1094, + "step": 8777 + }, + { + "epoch": 0.08309273861474238, + "grad_norm": 3.161116123199463, + "learning_rate": 1.9852560546684194e-06, + "loss": 0.8359, + "step": 8778 + }, + { + "epoch": 0.08310220463645744, + "grad_norm": 232.520263671875, + "learning_rate": 1.9852508089816343e-06, + "loss": 22.75, + "step": 8779 + }, + { + "epoch": 0.08311167065817249, + "grad_norm": 3.281550168991089, + "learning_rate": 1.9852455623687774e-06, + "loss": 0.9539, + "step": 8780 + }, + { + "epoch": 0.08312113667988755, + "grad_norm": 203.04812622070312, + "learning_rate": 1.985240314829854e-06, + "loss": 21.9531, + "step": 8781 + }, + { + "epoch": 0.0831306027016026, + "grad_norm": 915.4973754882812, + "learning_rate": 1.9852350663648683e-06, + "loss": 76.125, + "step": 8782 + }, + { + "epoch": 0.08314006872331765, + "grad_norm": 305.6681823730469, + "learning_rate": 1.985229816973826e-06, + "loss": 21.0352, + "step": 8783 + }, + { + "epoch": 0.0831495347450327, + "grad_norm": 316.6472473144531, + "learning_rate": 1.9852245666567318e-06, + "loss": 52.8281, + "step": 8784 + }, + { + "epoch": 0.08315900076674776, + "grad_norm": 795.4052734375, + "learning_rate": 1.9852193154135905e-06, + "loss": 49.6719, + "step": 8785 + }, + { + "epoch": 0.08316846678846282, + "grad_norm": 384.4526672363281, + "learning_rate": 1.985214063244407e-06, + "loss": 37.9219, + "step": 8786 + }, + { + "epoch": 0.08317793281017787, + "grad_norm": 414.32354736328125, + "learning_rate": 1.9852088101491867e-06, + "loss": 33.4844, + "step": 8787 + }, + { + "epoch": 0.08318739883189293, + "grad_norm": 259.5562438964844, + "learning_rate": 1.985203556127934e-06, + "loss": 23.625, + "step": 8788 + }, + { + "epoch": 0.08319686485360797, + "grad_norm": 299.328369140625, + "learning_rate": 1.985198301180654e-06, + "loss": 30.8594, + "step": 8789 + }, + { + "epoch": 0.08320633087532303, + "grad_norm": 531.0048828125, + "learning_rate": 1.9851930453073516e-06, + "loss": 14.3906, + "step": 8790 + }, + { + "epoch": 0.08321579689703808, + "grad_norm": 419.4172058105469, + "learning_rate": 1.9851877885080324e-06, + "loss": 14.5703, + "step": 8791 + }, + { + "epoch": 0.08322526291875314, + "grad_norm": 520.8797607421875, + "learning_rate": 1.9851825307827004e-06, + "loss": 29.4844, + "step": 8792 + }, + { + "epoch": 0.08323472894046818, + "grad_norm": 324.93402099609375, + "learning_rate": 1.985177272131361e-06, + "loss": 12.7227, + "step": 8793 + }, + { + "epoch": 0.08324419496218324, + "grad_norm": 689.3154296875, + "learning_rate": 1.985172012554019e-06, + "loss": 75.6016, + "step": 8794 + }, + { + "epoch": 0.0832536609838983, + "grad_norm": 3.1198084354400635, + "learning_rate": 1.985166752050679e-06, + "loss": 0.9307, + "step": 8795 + }, + { + "epoch": 0.08326312700561335, + "grad_norm": 355.2467346191406, + "learning_rate": 1.985161490621347e-06, + "loss": 21.6719, + "step": 8796 + }, + { + "epoch": 0.08327259302732841, + "grad_norm": 242.8581085205078, + "learning_rate": 1.985156228266027e-06, + "loss": 28.1875, + "step": 8797 + }, + { + "epoch": 0.08328205904904346, + "grad_norm": 197.9665985107422, + "learning_rate": 1.985150964984724e-06, + "loss": 19.6797, + "step": 8798 + }, + { + "epoch": 0.08329152507075852, + "grad_norm": 229.36068725585938, + "learning_rate": 1.9851457007774437e-06, + "loss": 18.9609, + "step": 8799 + }, + { + "epoch": 0.08330099109247356, + "grad_norm": 329.86358642578125, + "learning_rate": 1.9851404356441903e-06, + "loss": 51.7656, + "step": 8800 + }, + { + "epoch": 0.08331045711418862, + "grad_norm": 923.4099731445312, + "learning_rate": 1.985135169584969e-06, + "loss": 32.7656, + "step": 8801 + }, + { + "epoch": 0.08331992313590367, + "grad_norm": 244.1951904296875, + "learning_rate": 1.985129902599784e-06, + "loss": 22.5625, + "step": 8802 + }, + { + "epoch": 0.08332938915761873, + "grad_norm": 299.083251953125, + "learning_rate": 1.985124634688642e-06, + "loss": 21.0312, + "step": 8803 + }, + { + "epoch": 0.08333885517933379, + "grad_norm": 723.6370849609375, + "learning_rate": 1.9851193658515465e-06, + "loss": 39.5312, + "step": 8804 + }, + { + "epoch": 0.08334832120104883, + "grad_norm": 515.4642333984375, + "learning_rate": 1.9851140960885032e-06, + "loss": 19.6641, + "step": 8805 + }, + { + "epoch": 0.0833577872227639, + "grad_norm": 548.3618774414062, + "learning_rate": 1.985108825399516e-06, + "loss": 57.4531, + "step": 8806 + }, + { + "epoch": 0.08336725324447894, + "grad_norm": 454.2354431152344, + "learning_rate": 1.9851035537845913e-06, + "loss": 29.1719, + "step": 8807 + }, + { + "epoch": 0.083376719266194, + "grad_norm": 161.4385223388672, + "learning_rate": 1.985098281243733e-06, + "loss": 21.2344, + "step": 8808 + }, + { + "epoch": 0.08338618528790905, + "grad_norm": 741.9959716796875, + "learning_rate": 1.9850930077769465e-06, + "loss": 36.4922, + "step": 8809 + }, + { + "epoch": 0.0833956513096241, + "grad_norm": 277.9853210449219, + "learning_rate": 1.9850877333842367e-06, + "loss": 23.0781, + "step": 8810 + }, + { + "epoch": 0.08340511733133915, + "grad_norm": 212.28634643554688, + "learning_rate": 1.9850824580656086e-06, + "loss": 26.6797, + "step": 8811 + }, + { + "epoch": 0.08341458335305421, + "grad_norm": 493.63433837890625, + "learning_rate": 1.985077181821067e-06, + "loss": 47.8125, + "step": 8812 + }, + { + "epoch": 0.08342404937476927, + "grad_norm": 320.9934997558594, + "learning_rate": 1.9850719046506166e-06, + "loss": 23.5234, + "step": 8813 + }, + { + "epoch": 0.08343351539648432, + "grad_norm": 483.96630859375, + "learning_rate": 1.9850666265542633e-06, + "loss": 23.9453, + "step": 8814 + }, + { + "epoch": 0.08344298141819938, + "grad_norm": 244.08682250976562, + "learning_rate": 1.985061347532011e-06, + "loss": 24.3047, + "step": 8815 + }, + { + "epoch": 0.08345244743991442, + "grad_norm": 534.9404907226562, + "learning_rate": 1.985056067583865e-06, + "loss": 26.5078, + "step": 8816 + }, + { + "epoch": 0.08346191346162948, + "grad_norm": 266.70867919921875, + "learning_rate": 1.985050786709831e-06, + "loss": 27.9844, + "step": 8817 + }, + { + "epoch": 0.08347137948334453, + "grad_norm": 381.6511535644531, + "learning_rate": 1.985045504909913e-06, + "loss": 51.0469, + "step": 8818 + }, + { + "epoch": 0.08348084550505959, + "grad_norm": 635.8932495117188, + "learning_rate": 1.9850402221841162e-06, + "loss": 48.3906, + "step": 8819 + }, + { + "epoch": 0.08349031152677465, + "grad_norm": 3.3161020278930664, + "learning_rate": 1.9850349385324457e-06, + "loss": 0.876, + "step": 8820 + }, + { + "epoch": 0.0834997775484897, + "grad_norm": 856.2240600585938, + "learning_rate": 1.9850296539549063e-06, + "loss": 51.9531, + "step": 8821 + }, + { + "epoch": 0.08350924357020476, + "grad_norm": 444.64862060546875, + "learning_rate": 1.9850243684515036e-06, + "loss": 64.6641, + "step": 8822 + }, + { + "epoch": 0.0835187095919198, + "grad_norm": 289.142578125, + "learning_rate": 1.985019082022242e-06, + "loss": 24.7422, + "step": 8823 + }, + { + "epoch": 0.08352817561363486, + "grad_norm": 531.5360717773438, + "learning_rate": 1.985013794667126e-06, + "loss": 24.3438, + "step": 8824 + }, + { + "epoch": 0.08353764163534991, + "grad_norm": 422.6285400390625, + "learning_rate": 1.9850085063861613e-06, + "loss": 35.6875, + "step": 8825 + }, + { + "epoch": 0.08354710765706497, + "grad_norm": 612.0631103515625, + "learning_rate": 1.985003217179353e-06, + "loss": 26.1719, + "step": 8826 + }, + { + "epoch": 0.08355657367878001, + "grad_norm": 473.46197509765625, + "learning_rate": 1.9849979270467052e-06, + "loss": 37.8047, + "step": 8827 + }, + { + "epoch": 0.08356603970049507, + "grad_norm": 471.254638671875, + "learning_rate": 1.9849926359882237e-06, + "loss": 30.7109, + "step": 8828 + }, + { + "epoch": 0.08357550572221013, + "grad_norm": 417.5416564941406, + "learning_rate": 1.9849873440039136e-06, + "loss": 57.0, + "step": 8829 + }, + { + "epoch": 0.08358497174392518, + "grad_norm": 164.27415466308594, + "learning_rate": 1.984982051093779e-06, + "loss": 22.0156, + "step": 8830 + }, + { + "epoch": 0.08359443776564024, + "grad_norm": 295.36785888671875, + "learning_rate": 1.9849767572578257e-06, + "loss": 20.1172, + "step": 8831 + }, + { + "epoch": 0.08360390378735529, + "grad_norm": 2.831653118133545, + "learning_rate": 1.984971462496058e-06, + "loss": 0.9204, + "step": 8832 + }, + { + "epoch": 0.08361336980907035, + "grad_norm": 1456.96044921875, + "learning_rate": 1.9849661668084814e-06, + "loss": 67.1562, + "step": 8833 + }, + { + "epoch": 0.08362283583078539, + "grad_norm": 248.31365966796875, + "learning_rate": 1.9849608701951007e-06, + "loss": 19.8125, + "step": 8834 + }, + { + "epoch": 0.08363230185250045, + "grad_norm": 745.7921752929688, + "learning_rate": 1.9849555726559205e-06, + "loss": 47.2812, + "step": 8835 + }, + { + "epoch": 0.0836417678742155, + "grad_norm": 397.4355773925781, + "learning_rate": 1.984950274190947e-06, + "loss": 33.1719, + "step": 8836 + }, + { + "epoch": 0.08365123389593056, + "grad_norm": 3.3277812004089355, + "learning_rate": 1.9849449748001833e-06, + "loss": 0.9858, + "step": 8837 + }, + { + "epoch": 0.08366069991764562, + "grad_norm": 758.599853515625, + "learning_rate": 1.984939674483636e-06, + "loss": 26.2031, + "step": 8838 + }, + { + "epoch": 0.08367016593936066, + "grad_norm": 546.0413208007812, + "learning_rate": 1.9849343732413094e-06, + "loss": 46.6875, + "step": 8839 + }, + { + "epoch": 0.08367963196107572, + "grad_norm": 813.3239135742188, + "learning_rate": 1.984929071073208e-06, + "loss": 32.6562, + "step": 8840 + }, + { + "epoch": 0.08368909798279077, + "grad_norm": 176.57369995117188, + "learning_rate": 1.984923767979338e-06, + "loss": 18.8555, + "step": 8841 + }, + { + "epoch": 0.08369856400450583, + "grad_norm": 245.02989196777344, + "learning_rate": 1.9849184639597036e-06, + "loss": 23.1953, + "step": 8842 + }, + { + "epoch": 0.08370803002622088, + "grad_norm": 585.6976928710938, + "learning_rate": 1.9849131590143096e-06, + "loss": 52.5781, + "step": 8843 + }, + { + "epoch": 0.08371749604793594, + "grad_norm": 676.7100219726562, + "learning_rate": 1.984907853143162e-06, + "loss": 33.3047, + "step": 8844 + }, + { + "epoch": 0.08372696206965098, + "grad_norm": 159.07142639160156, + "learning_rate": 1.9849025463462645e-06, + "loss": 22.1406, + "step": 8845 + }, + { + "epoch": 0.08373642809136604, + "grad_norm": 282.001220703125, + "learning_rate": 1.984897238623623e-06, + "loss": 23.6562, + "step": 8846 + }, + { + "epoch": 0.0837458941130811, + "grad_norm": 388.9539794921875, + "learning_rate": 1.9848919299752416e-06, + "loss": 48.1719, + "step": 8847 + }, + { + "epoch": 0.08375536013479615, + "grad_norm": 393.9342346191406, + "learning_rate": 1.9848866204011264e-06, + "loss": 48.75, + "step": 8848 + }, + { + "epoch": 0.08376482615651121, + "grad_norm": 210.8812255859375, + "learning_rate": 1.984881309901282e-06, + "loss": 18.5859, + "step": 8849 + }, + { + "epoch": 0.08377429217822625, + "grad_norm": 3.436210870742798, + "learning_rate": 1.984875998475713e-06, + "loss": 1.0728, + "step": 8850 + }, + { + "epoch": 0.08378375819994131, + "grad_norm": 3.0027875900268555, + "learning_rate": 1.9848706861244245e-06, + "loss": 0.9043, + "step": 8851 + }, + { + "epoch": 0.08379322422165636, + "grad_norm": 230.69186401367188, + "learning_rate": 1.9848653728474217e-06, + "loss": 25.0234, + "step": 8852 + }, + { + "epoch": 0.08380269024337142, + "grad_norm": 654.931396484375, + "learning_rate": 1.9848600586447094e-06, + "loss": 55.5078, + "step": 8853 + }, + { + "epoch": 0.08381215626508647, + "grad_norm": 330.3341369628906, + "learning_rate": 1.984854743516293e-06, + "loss": 25.0312, + "step": 8854 + }, + { + "epoch": 0.08382162228680153, + "grad_norm": 250.2721405029297, + "learning_rate": 1.9848494274621766e-06, + "loss": 26.1719, + "step": 8855 + }, + { + "epoch": 0.08383108830851659, + "grad_norm": 173.36387634277344, + "learning_rate": 1.9848441104823664e-06, + "loss": 15.7969, + "step": 8856 + }, + { + "epoch": 0.08384055433023163, + "grad_norm": 556.9160766601562, + "learning_rate": 1.984838792576867e-06, + "loss": 59.4219, + "step": 8857 + }, + { + "epoch": 0.08385002035194669, + "grad_norm": 522.1920166015625, + "learning_rate": 1.9848334737456827e-06, + "loss": 30.1406, + "step": 8858 + }, + { + "epoch": 0.08385948637366174, + "grad_norm": 484.8470764160156, + "learning_rate": 1.984828153988819e-06, + "loss": 19.9844, + "step": 8859 + }, + { + "epoch": 0.0838689523953768, + "grad_norm": 1095.81982421875, + "learning_rate": 1.984822833306281e-06, + "loss": 44.1406, + "step": 8860 + }, + { + "epoch": 0.08387841841709184, + "grad_norm": 468.5309753417969, + "learning_rate": 1.984817511698074e-06, + "loss": 24.7734, + "step": 8861 + }, + { + "epoch": 0.0838878844388069, + "grad_norm": 218.4012908935547, + "learning_rate": 1.984812189164202e-06, + "loss": 19.0547, + "step": 8862 + }, + { + "epoch": 0.08389735046052196, + "grad_norm": 488.3345947265625, + "learning_rate": 1.984806865704671e-06, + "loss": 36.6172, + "step": 8863 + }, + { + "epoch": 0.08390681648223701, + "grad_norm": 300.9268493652344, + "learning_rate": 1.9848015413194856e-06, + "loss": 23.6016, + "step": 8864 + }, + { + "epoch": 0.08391628250395207, + "grad_norm": 202.99546813964844, + "learning_rate": 1.9847962160086506e-06, + "loss": 17.1094, + "step": 8865 + }, + { + "epoch": 0.08392574852566712, + "grad_norm": 295.3038024902344, + "learning_rate": 1.9847908897721717e-06, + "loss": 34.5, + "step": 8866 + }, + { + "epoch": 0.08393521454738218, + "grad_norm": 222.79852294921875, + "learning_rate": 1.9847855626100526e-06, + "loss": 15.0781, + "step": 8867 + }, + { + "epoch": 0.08394468056909722, + "grad_norm": 388.4269104003906, + "learning_rate": 1.9847802345223e-06, + "loss": 19.6094, + "step": 8868 + }, + { + "epoch": 0.08395414659081228, + "grad_norm": 180.2424774169922, + "learning_rate": 1.9847749055089173e-06, + "loss": 17.1172, + "step": 8869 + }, + { + "epoch": 0.08396361261252733, + "grad_norm": 330.44854736328125, + "learning_rate": 1.9847695755699108e-06, + "loss": 33.0391, + "step": 8870 + }, + { + "epoch": 0.08397307863424239, + "grad_norm": 340.24462890625, + "learning_rate": 1.9847642447052845e-06, + "loss": 36.9531, + "step": 8871 + }, + { + "epoch": 0.08398254465595745, + "grad_norm": 2.9250519275665283, + "learning_rate": 1.9847589129150443e-06, + "loss": 0.8091, + "step": 8872 + }, + { + "epoch": 0.0839920106776725, + "grad_norm": 284.3958740234375, + "learning_rate": 1.984753580199195e-06, + "loss": 16.7734, + "step": 8873 + }, + { + "epoch": 0.08400147669938755, + "grad_norm": 238.1319122314453, + "learning_rate": 1.984748246557741e-06, + "loss": 23.3047, + "step": 8874 + }, + { + "epoch": 0.0840109427211026, + "grad_norm": 192.00372314453125, + "learning_rate": 1.9847429119906873e-06, + "loss": 21.9219, + "step": 8875 + }, + { + "epoch": 0.08402040874281766, + "grad_norm": 444.34039306640625, + "learning_rate": 1.98473757649804e-06, + "loss": 27.4688, + "step": 8876 + }, + { + "epoch": 0.0840298747645327, + "grad_norm": 363.1620788574219, + "learning_rate": 1.984732240079803e-06, + "loss": 23.7031, + "step": 8877 + }, + { + "epoch": 0.08403934078624777, + "grad_norm": 238.79112243652344, + "learning_rate": 1.984726902735982e-06, + "loss": 31.8125, + "step": 8878 + }, + { + "epoch": 0.08404880680796281, + "grad_norm": 318.61712646484375, + "learning_rate": 1.984721564466582e-06, + "loss": 18.3281, + "step": 8879 + }, + { + "epoch": 0.08405827282967787, + "grad_norm": 699.9796142578125, + "learning_rate": 1.984716225271607e-06, + "loss": 19.3203, + "step": 8880 + }, + { + "epoch": 0.08406773885139293, + "grad_norm": 351.974365234375, + "learning_rate": 1.9847108851510635e-06, + "loss": 29.7656, + "step": 8881 + }, + { + "epoch": 0.08407720487310798, + "grad_norm": 230.63966369628906, + "learning_rate": 1.9847055441049555e-06, + "loss": 21.9766, + "step": 8882 + }, + { + "epoch": 0.08408667089482304, + "grad_norm": 449.7486877441406, + "learning_rate": 1.984700202133289e-06, + "loss": 26.1719, + "step": 8883 + }, + { + "epoch": 0.08409613691653808, + "grad_norm": 382.6612243652344, + "learning_rate": 1.9846948592360676e-06, + "loss": 21.293, + "step": 8884 + }, + { + "epoch": 0.08410560293825314, + "grad_norm": 458.1452941894531, + "learning_rate": 1.984689515413297e-06, + "loss": 22.5469, + "step": 8885 + }, + { + "epoch": 0.08411506895996819, + "grad_norm": 3.5151407718658447, + "learning_rate": 1.984684170664983e-06, + "loss": 1.064, + "step": 8886 + }, + { + "epoch": 0.08412453498168325, + "grad_norm": 470.7999572753906, + "learning_rate": 1.98467882499113e-06, + "loss": 27.1172, + "step": 8887 + }, + { + "epoch": 0.0841340010033983, + "grad_norm": 612.2626953125, + "learning_rate": 1.984673478391742e-06, + "loss": 47.5156, + "step": 8888 + }, + { + "epoch": 0.08414346702511336, + "grad_norm": 650.4542846679688, + "learning_rate": 1.984668130866826e-06, + "loss": 19.7656, + "step": 8889 + }, + { + "epoch": 0.08415293304682842, + "grad_norm": 3.203543186187744, + "learning_rate": 1.9846627824163854e-06, + "loss": 1.0146, + "step": 8890 + }, + { + "epoch": 0.08416239906854346, + "grad_norm": 301.44696044921875, + "learning_rate": 1.984657433040426e-06, + "loss": 24.6016, + "step": 8891 + }, + { + "epoch": 0.08417186509025852, + "grad_norm": 3.6490440368652344, + "learning_rate": 1.9846520827389527e-06, + "loss": 0.8613, + "step": 8892 + }, + { + "epoch": 0.08418133111197357, + "grad_norm": 981.7557373046875, + "learning_rate": 1.9846467315119702e-06, + "loss": 58.0, + "step": 8893 + }, + { + "epoch": 0.08419079713368863, + "grad_norm": 2.8892717361450195, + "learning_rate": 1.9846413793594845e-06, + "loss": 0.7439, + "step": 8894 + }, + { + "epoch": 0.08420026315540367, + "grad_norm": 372.880615234375, + "learning_rate": 1.9846360262814996e-06, + "loss": 33.4766, + "step": 8895 + }, + { + "epoch": 0.08420972917711873, + "grad_norm": 215.8834686279297, + "learning_rate": 1.9846306722780207e-06, + "loss": 26.125, + "step": 8896 + }, + { + "epoch": 0.08421919519883378, + "grad_norm": 3.05358624458313, + "learning_rate": 1.9846253173490532e-06, + "loss": 0.873, + "step": 8897 + }, + { + "epoch": 0.08422866122054884, + "grad_norm": 685.8557739257812, + "learning_rate": 1.984619961494602e-06, + "loss": 29.5234, + "step": 8898 + }, + { + "epoch": 0.0842381272422639, + "grad_norm": 270.11041259765625, + "learning_rate": 1.984614604714672e-06, + "loss": 20.7031, + "step": 8899 + }, + { + "epoch": 0.08424759326397895, + "grad_norm": 212.61422729492188, + "learning_rate": 1.9846092470092684e-06, + "loss": 14.5234, + "step": 8900 + }, + { + "epoch": 0.084257059285694, + "grad_norm": 424.1892395019531, + "learning_rate": 1.984603888378396e-06, + "loss": 35.4141, + "step": 8901 + }, + { + "epoch": 0.08426652530740905, + "grad_norm": 490.9993591308594, + "learning_rate": 1.98459852882206e-06, + "loss": 12.1758, + "step": 8902 + }, + { + "epoch": 0.08427599132912411, + "grad_norm": 445.6024475097656, + "learning_rate": 1.9845931683402656e-06, + "loss": 26.4297, + "step": 8903 + }, + { + "epoch": 0.08428545735083916, + "grad_norm": 289.1441955566406, + "learning_rate": 1.9845878069330174e-06, + "loss": 24.7656, + "step": 8904 + }, + { + "epoch": 0.08429492337255422, + "grad_norm": 462.58441162109375, + "learning_rate": 1.984582444600321e-06, + "loss": 31.0625, + "step": 8905 + }, + { + "epoch": 0.08430438939426928, + "grad_norm": 458.9753112792969, + "learning_rate": 1.984577081342181e-06, + "loss": 13.668, + "step": 8906 + }, + { + "epoch": 0.08431385541598432, + "grad_norm": 264.06951904296875, + "learning_rate": 1.984571717158603e-06, + "loss": 22.8516, + "step": 8907 + }, + { + "epoch": 0.08432332143769938, + "grad_norm": 220.81752014160156, + "learning_rate": 1.984566352049591e-06, + "loss": 23.2969, + "step": 8908 + }, + { + "epoch": 0.08433278745941443, + "grad_norm": 710.1553344726562, + "learning_rate": 1.9845609860151515e-06, + "loss": 29.7266, + "step": 8909 + }, + { + "epoch": 0.08434225348112949, + "grad_norm": 613.0198364257812, + "learning_rate": 1.984555619055288e-06, + "loss": 29.375, + "step": 8910 + }, + { + "epoch": 0.08435171950284454, + "grad_norm": 448.0309143066406, + "learning_rate": 1.9845502511700064e-06, + "loss": 26.6602, + "step": 8911 + }, + { + "epoch": 0.0843611855245596, + "grad_norm": 950.5946044921875, + "learning_rate": 1.9845448823593116e-06, + "loss": 44.3281, + "step": 8912 + }, + { + "epoch": 0.08437065154627464, + "grad_norm": 707.7650756835938, + "learning_rate": 1.984539512623209e-06, + "loss": 32.5312, + "step": 8913 + }, + { + "epoch": 0.0843801175679897, + "grad_norm": 277.1792907714844, + "learning_rate": 1.984534141961703e-06, + "loss": 22.6719, + "step": 8914 + }, + { + "epoch": 0.08438958358970476, + "grad_norm": 401.34832763671875, + "learning_rate": 1.9845287703747987e-06, + "loss": 51.7188, + "step": 8915 + }, + { + "epoch": 0.08439904961141981, + "grad_norm": 358.3507995605469, + "learning_rate": 1.9845233978625018e-06, + "loss": 49.875, + "step": 8916 + }, + { + "epoch": 0.08440851563313487, + "grad_norm": 374.16357421875, + "learning_rate": 1.984518024424817e-06, + "loss": 24.0547, + "step": 8917 + }, + { + "epoch": 0.08441798165484991, + "grad_norm": 740.1757202148438, + "learning_rate": 1.9845126500617493e-06, + "loss": 20.7891, + "step": 8918 + }, + { + "epoch": 0.08442744767656497, + "grad_norm": 319.4224548339844, + "learning_rate": 1.9845072747733035e-06, + "loss": 21.3477, + "step": 8919 + }, + { + "epoch": 0.08443691369828002, + "grad_norm": 641.6824340820312, + "learning_rate": 1.984501898559485e-06, + "loss": 38.3594, + "step": 8920 + }, + { + "epoch": 0.08444637971999508, + "grad_norm": 327.9729919433594, + "learning_rate": 1.984496521420299e-06, + "loss": 17.1016, + "step": 8921 + }, + { + "epoch": 0.08445584574171013, + "grad_norm": 3.042351722717285, + "learning_rate": 1.98449114335575e-06, + "loss": 0.7656, + "step": 8922 + }, + { + "epoch": 0.08446531176342519, + "grad_norm": 259.6384582519531, + "learning_rate": 1.9844857643658435e-06, + "loss": 28.3125, + "step": 8923 + }, + { + "epoch": 0.08447477778514025, + "grad_norm": 439.8496398925781, + "learning_rate": 1.9844803844505845e-06, + "loss": 21.5625, + "step": 8924 + }, + { + "epoch": 0.08448424380685529, + "grad_norm": 784.4655151367188, + "learning_rate": 1.984475003609978e-06, + "loss": 39.6016, + "step": 8925 + }, + { + "epoch": 0.08449370982857035, + "grad_norm": 356.92474365234375, + "learning_rate": 1.9844696218440286e-06, + "loss": 22.3906, + "step": 8926 + }, + { + "epoch": 0.0845031758502854, + "grad_norm": 427.600341796875, + "learning_rate": 1.9844642391527423e-06, + "loss": 51.7031, + "step": 8927 + }, + { + "epoch": 0.08451264187200046, + "grad_norm": 202.8555145263672, + "learning_rate": 1.9844588555361233e-06, + "loss": 21.0781, + "step": 8928 + }, + { + "epoch": 0.0845221078937155, + "grad_norm": 325.9989929199219, + "learning_rate": 1.984453470994177e-06, + "loss": 28.1172, + "step": 8929 + }, + { + "epoch": 0.08453157391543056, + "grad_norm": 444.90142822265625, + "learning_rate": 1.984448085526909e-06, + "loss": 34.9531, + "step": 8930 + }, + { + "epoch": 0.08454103993714561, + "grad_norm": 470.87457275390625, + "learning_rate": 1.9844426991343238e-06, + "loss": 31.6953, + "step": 8931 + }, + { + "epoch": 0.08455050595886067, + "grad_norm": 385.0086975097656, + "learning_rate": 1.984437311816426e-06, + "loss": 25.5664, + "step": 8932 + }, + { + "epoch": 0.08455997198057573, + "grad_norm": 266.005615234375, + "learning_rate": 1.9844319235732214e-06, + "loss": 20.8906, + "step": 8933 + }, + { + "epoch": 0.08456943800229078, + "grad_norm": 643.6508178710938, + "learning_rate": 1.984426534404715e-06, + "loss": 54.3828, + "step": 8934 + }, + { + "epoch": 0.08457890402400584, + "grad_norm": 562.8931884765625, + "learning_rate": 1.9844211443109115e-06, + "loss": 29.5, + "step": 8935 + }, + { + "epoch": 0.08458837004572088, + "grad_norm": 2.835160970687866, + "learning_rate": 1.9844157532918163e-06, + "loss": 0.8364, + "step": 8936 + }, + { + "epoch": 0.08459783606743594, + "grad_norm": 242.38140869140625, + "learning_rate": 1.984410361347434e-06, + "loss": 28.2344, + "step": 8937 + }, + { + "epoch": 0.08460730208915099, + "grad_norm": 497.8084411621094, + "learning_rate": 1.9844049684777704e-06, + "loss": 60.375, + "step": 8938 + }, + { + "epoch": 0.08461676811086605, + "grad_norm": 341.8622131347656, + "learning_rate": 1.98439957468283e-06, + "loss": 16.8828, + "step": 8939 + }, + { + "epoch": 0.0846262341325811, + "grad_norm": 612.1652221679688, + "learning_rate": 1.984394179962618e-06, + "loss": 58.4375, + "step": 8940 + }, + { + "epoch": 0.08463570015429615, + "grad_norm": 384.6694641113281, + "learning_rate": 1.9843887843171397e-06, + "loss": 26.8125, + "step": 8941 + }, + { + "epoch": 0.08464516617601121, + "grad_norm": 464.9880065917969, + "learning_rate": 1.9843833877463995e-06, + "loss": 19.3125, + "step": 8942 + }, + { + "epoch": 0.08465463219772626, + "grad_norm": 3.190570116043091, + "learning_rate": 1.9843779902504037e-06, + "loss": 0.8154, + "step": 8943 + }, + { + "epoch": 0.08466409821944132, + "grad_norm": 397.5469970703125, + "learning_rate": 1.984372591829156e-06, + "loss": 19.6875, + "step": 8944 + }, + { + "epoch": 0.08467356424115637, + "grad_norm": 252.61300659179688, + "learning_rate": 1.9843671924826622e-06, + "loss": 22.8672, + "step": 8945 + }, + { + "epoch": 0.08468303026287143, + "grad_norm": 347.6247863769531, + "learning_rate": 1.9843617922109276e-06, + "loss": 25.8516, + "step": 8946 + }, + { + "epoch": 0.08469249628458647, + "grad_norm": 196.84426879882812, + "learning_rate": 1.984356391013957e-06, + "loss": 22.3594, + "step": 8947 + }, + { + "epoch": 0.08470196230630153, + "grad_norm": 169.17178344726562, + "learning_rate": 1.9843509888917547e-06, + "loss": 20.7266, + "step": 8948 + }, + { + "epoch": 0.08471142832801659, + "grad_norm": 446.05029296875, + "learning_rate": 1.9843455858443274e-06, + "loss": 49.7344, + "step": 8949 + }, + { + "epoch": 0.08472089434973164, + "grad_norm": 483.9273986816406, + "learning_rate": 1.984340181871679e-06, + "loss": 24.125, + "step": 8950 + }, + { + "epoch": 0.0847303603714467, + "grad_norm": 426.08087158203125, + "learning_rate": 1.9843347769738145e-06, + "loss": 30.9844, + "step": 8951 + }, + { + "epoch": 0.08473982639316174, + "grad_norm": 764.5841674804688, + "learning_rate": 1.98432937115074e-06, + "loss": 23.0078, + "step": 8952 + }, + { + "epoch": 0.0847492924148768, + "grad_norm": 250.4618682861328, + "learning_rate": 1.984323964402459e-06, + "loss": 22.7891, + "step": 8953 + }, + { + "epoch": 0.08475875843659185, + "grad_norm": 347.85308837890625, + "learning_rate": 1.984318556728978e-06, + "loss": 19.0625, + "step": 8954 + }, + { + "epoch": 0.08476822445830691, + "grad_norm": 434.8587341308594, + "learning_rate": 1.9843131481303017e-06, + "loss": 60.8594, + "step": 8955 + }, + { + "epoch": 0.08477769048002196, + "grad_norm": 602.5391845703125, + "learning_rate": 1.984307738606435e-06, + "loss": 51.2109, + "step": 8956 + }, + { + "epoch": 0.08478715650173702, + "grad_norm": 634.9931030273438, + "learning_rate": 1.984302328157383e-06, + "loss": 20.5547, + "step": 8957 + }, + { + "epoch": 0.08479662252345208, + "grad_norm": 868.7211303710938, + "learning_rate": 1.984296916783151e-06, + "loss": 46.7109, + "step": 8958 + }, + { + "epoch": 0.08480608854516712, + "grad_norm": 148.088623046875, + "learning_rate": 1.984291504483744e-06, + "loss": 22.4766, + "step": 8959 + }, + { + "epoch": 0.08481555456688218, + "grad_norm": 321.1512756347656, + "learning_rate": 1.9842860912591665e-06, + "loss": 12.6953, + "step": 8960 + }, + { + "epoch": 0.08482502058859723, + "grad_norm": 430.67913818359375, + "learning_rate": 1.9842806771094247e-06, + "loss": 27.3125, + "step": 8961 + }, + { + "epoch": 0.08483448661031229, + "grad_norm": 173.428466796875, + "learning_rate": 1.9842752620345226e-06, + "loss": 18.8516, + "step": 8962 + }, + { + "epoch": 0.08484395263202733, + "grad_norm": 470.8623352050781, + "learning_rate": 1.984269846034466e-06, + "loss": 66.125, + "step": 8963 + }, + { + "epoch": 0.0848534186537424, + "grad_norm": 336.7995300292969, + "learning_rate": 1.98426442910926e-06, + "loss": 31.8125, + "step": 8964 + }, + { + "epoch": 0.08486288467545744, + "grad_norm": 439.6339111328125, + "learning_rate": 1.984259011258909e-06, + "loss": 43.1328, + "step": 8965 + }, + { + "epoch": 0.0848723506971725, + "grad_norm": 3.148829221725464, + "learning_rate": 1.984253592483419e-06, + "loss": 0.9243, + "step": 8966 + }, + { + "epoch": 0.08488181671888756, + "grad_norm": 260.5761413574219, + "learning_rate": 1.9842481727827946e-06, + "loss": 18.5703, + "step": 8967 + }, + { + "epoch": 0.0848912827406026, + "grad_norm": 895.5426025390625, + "learning_rate": 1.984242752157041e-06, + "loss": 54.0, + "step": 8968 + }, + { + "epoch": 0.08490074876231767, + "grad_norm": 541.4494018554688, + "learning_rate": 1.9842373306061627e-06, + "loss": 25.1562, + "step": 8969 + }, + { + "epoch": 0.08491021478403271, + "grad_norm": 3.313176393508911, + "learning_rate": 1.984231908130166e-06, + "loss": 0.9561, + "step": 8970 + }, + { + "epoch": 0.08491968080574777, + "grad_norm": 715.0614624023438, + "learning_rate": 1.9842264847290547e-06, + "loss": 28.3359, + "step": 8971 + }, + { + "epoch": 0.08492914682746282, + "grad_norm": 316.01031494140625, + "learning_rate": 1.984221060402835e-06, + "loss": 21.0625, + "step": 8972 + }, + { + "epoch": 0.08493861284917788, + "grad_norm": 303.5615539550781, + "learning_rate": 1.9842156351515114e-06, + "loss": 30.8516, + "step": 8973 + }, + { + "epoch": 0.08494807887089292, + "grad_norm": 515.254638671875, + "learning_rate": 1.9842102089750893e-06, + "loss": 46.6562, + "step": 8974 + }, + { + "epoch": 0.08495754489260798, + "grad_norm": 336.7696533203125, + "learning_rate": 1.9842047818735733e-06, + "loss": 24.7891, + "step": 8975 + }, + { + "epoch": 0.08496701091432304, + "grad_norm": 225.42103576660156, + "learning_rate": 1.984199353846969e-06, + "loss": 23.5234, + "step": 8976 + }, + { + "epoch": 0.08497647693603809, + "grad_norm": 438.22174072265625, + "learning_rate": 1.984193924895281e-06, + "loss": 20.3359, + "step": 8977 + }, + { + "epoch": 0.08498594295775315, + "grad_norm": 314.52557373046875, + "learning_rate": 1.9841884950185156e-06, + "loss": 19.6719, + "step": 8978 + }, + { + "epoch": 0.0849954089794682, + "grad_norm": 451.1203308105469, + "learning_rate": 1.984183064216676e-06, + "loss": 24.5, + "step": 8979 + }, + { + "epoch": 0.08500487500118326, + "grad_norm": 284.1127624511719, + "learning_rate": 1.984177632489769e-06, + "loss": 15.6562, + "step": 8980 + }, + { + "epoch": 0.0850143410228983, + "grad_norm": 152.5570526123047, + "learning_rate": 1.984172199837799e-06, + "loss": 21.2812, + "step": 8981 + }, + { + "epoch": 0.08502380704461336, + "grad_norm": 233.97743225097656, + "learning_rate": 1.984166766260771e-06, + "loss": 12.2266, + "step": 8982 + }, + { + "epoch": 0.08503327306632841, + "grad_norm": 450.0826416015625, + "learning_rate": 1.9841613317586904e-06, + "loss": 41.2344, + "step": 8983 + }, + { + "epoch": 0.08504273908804347, + "grad_norm": 507.8577575683594, + "learning_rate": 1.9841558963315622e-06, + "loss": 34.5469, + "step": 8984 + }, + { + "epoch": 0.08505220510975853, + "grad_norm": 677.7987670898438, + "learning_rate": 1.9841504599793915e-06, + "loss": 55.875, + "step": 8985 + }, + { + "epoch": 0.08506167113147357, + "grad_norm": 211.3102264404297, + "learning_rate": 1.984145022702183e-06, + "loss": 26.5469, + "step": 8986 + }, + { + "epoch": 0.08507113715318863, + "grad_norm": 147.4817657470703, + "learning_rate": 1.9841395844999424e-06, + "loss": 15.3281, + "step": 8987 + }, + { + "epoch": 0.08508060317490368, + "grad_norm": 254.62643432617188, + "learning_rate": 1.984134145372675e-06, + "loss": 24.4922, + "step": 8988 + }, + { + "epoch": 0.08509006919661874, + "grad_norm": 466.29986572265625, + "learning_rate": 1.984128705320385e-06, + "loss": 9.2852, + "step": 8989 + }, + { + "epoch": 0.08509953521833379, + "grad_norm": 1727.5743408203125, + "learning_rate": 1.9841232643430783e-06, + "loss": 49.0859, + "step": 8990 + }, + { + "epoch": 0.08510900124004885, + "grad_norm": 236.56930541992188, + "learning_rate": 1.9841178224407598e-06, + "loss": 27.8906, + "step": 8991 + }, + { + "epoch": 0.08511846726176389, + "grad_norm": 738.1075439453125, + "learning_rate": 1.9841123796134343e-06, + "loss": 41.6875, + "step": 8992 + }, + { + "epoch": 0.08512793328347895, + "grad_norm": 252.18765258789062, + "learning_rate": 1.9841069358611073e-06, + "loss": 27.2656, + "step": 8993 + }, + { + "epoch": 0.08513739930519401, + "grad_norm": 240.696533203125, + "learning_rate": 1.984101491183784e-06, + "loss": 15.7266, + "step": 8994 + }, + { + "epoch": 0.08514686532690906, + "grad_norm": 444.804443359375, + "learning_rate": 1.984096045581469e-06, + "loss": 41.5156, + "step": 8995 + }, + { + "epoch": 0.08515633134862412, + "grad_norm": 193.24325561523438, + "learning_rate": 1.9840905990541683e-06, + "loss": 21.7812, + "step": 8996 + }, + { + "epoch": 0.08516579737033916, + "grad_norm": 462.8013610839844, + "learning_rate": 1.9840851516018857e-06, + "loss": 40.3594, + "step": 8997 + }, + { + "epoch": 0.08517526339205422, + "grad_norm": 752.9466552734375, + "learning_rate": 1.9840797032246276e-06, + "loss": 23.5547, + "step": 8998 + }, + { + "epoch": 0.08518472941376927, + "grad_norm": 488.2502136230469, + "learning_rate": 1.9840742539223984e-06, + "loss": 23.2188, + "step": 8999 + }, + { + "epoch": 0.08519419543548433, + "grad_norm": 900.9025268554688, + "learning_rate": 1.9840688036952034e-06, + "loss": 39.1953, + "step": 9000 + }, + { + "epoch": 0.08520366145719939, + "grad_norm": 568.57177734375, + "learning_rate": 1.984063352543048e-06, + "loss": 25.25, + "step": 9001 + }, + { + "epoch": 0.08521312747891444, + "grad_norm": 626.4536743164062, + "learning_rate": 1.9840579004659367e-06, + "loss": 47.0469, + "step": 9002 + }, + { + "epoch": 0.0852225935006295, + "grad_norm": 932.9011840820312, + "learning_rate": 1.9840524474638752e-06, + "loss": 80.6094, + "step": 9003 + }, + { + "epoch": 0.08523205952234454, + "grad_norm": 2.3400521278381348, + "learning_rate": 1.9840469935368686e-06, + "loss": 0.7202, + "step": 9004 + }, + { + "epoch": 0.0852415255440596, + "grad_norm": 248.3699493408203, + "learning_rate": 1.9840415386849215e-06, + "loss": 19.2578, + "step": 9005 + }, + { + "epoch": 0.08525099156577465, + "grad_norm": 700.3169555664062, + "learning_rate": 1.9840360829080394e-06, + "loss": 22.2812, + "step": 9006 + }, + { + "epoch": 0.08526045758748971, + "grad_norm": 356.7626037597656, + "learning_rate": 1.9840306262062274e-06, + "loss": 35.625, + "step": 9007 + }, + { + "epoch": 0.08526992360920475, + "grad_norm": 469.1622619628906, + "learning_rate": 1.9840251685794906e-06, + "loss": 22.9844, + "step": 9008 + }, + { + "epoch": 0.08527938963091981, + "grad_norm": 301.9053955078125, + "learning_rate": 1.9840197100278345e-06, + "loss": 41.7812, + "step": 9009 + }, + { + "epoch": 0.08528885565263487, + "grad_norm": 360.13427734375, + "learning_rate": 1.9840142505512637e-06, + "loss": 23.7031, + "step": 9010 + }, + { + "epoch": 0.08529832167434992, + "grad_norm": 363.123291015625, + "learning_rate": 1.9840087901497837e-06, + "loss": 24.4141, + "step": 9011 + }, + { + "epoch": 0.08530778769606498, + "grad_norm": 426.5337829589844, + "learning_rate": 1.9840033288233992e-06, + "loss": 41.7031, + "step": 9012 + }, + { + "epoch": 0.08531725371778003, + "grad_norm": 726.8045043945312, + "learning_rate": 1.9839978665721153e-06, + "loss": 22.625, + "step": 9013 + }, + { + "epoch": 0.08532671973949509, + "grad_norm": 485.836669921875, + "learning_rate": 1.9839924033959376e-06, + "loss": 50.7266, + "step": 9014 + }, + { + "epoch": 0.08533618576121013, + "grad_norm": 222.4981231689453, + "learning_rate": 1.9839869392948714e-06, + "loss": 30.0312, + "step": 9015 + }, + { + "epoch": 0.08534565178292519, + "grad_norm": 255.98147583007812, + "learning_rate": 1.9839814742689214e-06, + "loss": 16.3828, + "step": 9016 + }, + { + "epoch": 0.08535511780464024, + "grad_norm": 2.5820746421813965, + "learning_rate": 1.9839760083180924e-06, + "loss": 0.8901, + "step": 9017 + }, + { + "epoch": 0.0853645838263553, + "grad_norm": 638.98095703125, + "learning_rate": 1.9839705414423902e-06, + "loss": 54.6719, + "step": 9018 + }, + { + "epoch": 0.08537404984807036, + "grad_norm": 221.1435546875, + "learning_rate": 1.98396507364182e-06, + "loss": 24.9922, + "step": 9019 + }, + { + "epoch": 0.0853835158697854, + "grad_norm": 428.1808776855469, + "learning_rate": 1.9839596049163863e-06, + "loss": 15.9453, + "step": 9020 + }, + { + "epoch": 0.08539298189150046, + "grad_norm": 663.3126831054688, + "learning_rate": 1.9839541352660947e-06, + "loss": 42.4844, + "step": 9021 + }, + { + "epoch": 0.08540244791321551, + "grad_norm": 460.8239440917969, + "learning_rate": 1.9839486646909503e-06, + "loss": 24.9609, + "step": 9022 + }, + { + "epoch": 0.08541191393493057, + "grad_norm": 612.2810668945312, + "learning_rate": 1.983943193190958e-06, + "loss": 51.2734, + "step": 9023 + }, + { + "epoch": 0.08542137995664562, + "grad_norm": 418.028076171875, + "learning_rate": 1.9839377207661233e-06, + "loss": 30.5859, + "step": 9024 + }, + { + "epoch": 0.08543084597836068, + "grad_norm": 227.06149291992188, + "learning_rate": 1.983932247416451e-06, + "loss": 22.8125, + "step": 9025 + }, + { + "epoch": 0.08544031200007572, + "grad_norm": 300.8431396484375, + "learning_rate": 1.9839267731419464e-06, + "loss": 22.3047, + "step": 9026 + }, + { + "epoch": 0.08544977802179078, + "grad_norm": 891.8139038085938, + "learning_rate": 1.983921297942615e-06, + "loss": 33.6562, + "step": 9027 + }, + { + "epoch": 0.08545924404350584, + "grad_norm": 415.286865234375, + "learning_rate": 1.9839158218184607e-06, + "loss": 52.1562, + "step": 9028 + }, + { + "epoch": 0.08546871006522089, + "grad_norm": 463.89208984375, + "learning_rate": 1.9839103447694905e-06, + "loss": 44.2812, + "step": 9029 + }, + { + "epoch": 0.08547817608693595, + "grad_norm": 404.8325500488281, + "learning_rate": 1.983904866795708e-06, + "loss": 33.8438, + "step": 9030 + }, + { + "epoch": 0.085487642108651, + "grad_norm": 615.2754516601562, + "learning_rate": 1.9838993878971195e-06, + "loss": 38.2734, + "step": 9031 + }, + { + "epoch": 0.08549710813036605, + "grad_norm": 203.61416625976562, + "learning_rate": 1.983893908073729e-06, + "loss": 26.5469, + "step": 9032 + }, + { + "epoch": 0.0855065741520811, + "grad_norm": 3.249326229095459, + "learning_rate": 1.983888427325542e-06, + "loss": 0.9663, + "step": 9033 + }, + { + "epoch": 0.08551604017379616, + "grad_norm": 402.7342529296875, + "learning_rate": 1.9838829456525644e-06, + "loss": 27.7344, + "step": 9034 + }, + { + "epoch": 0.0855255061955112, + "grad_norm": 417.86590576171875, + "learning_rate": 1.9838774630548008e-06, + "loss": 51.0, + "step": 9035 + }, + { + "epoch": 0.08553497221722627, + "grad_norm": 186.13804626464844, + "learning_rate": 1.9838719795322567e-06, + "loss": 22.9531, + "step": 9036 + }, + { + "epoch": 0.08554443823894133, + "grad_norm": 563.5912475585938, + "learning_rate": 1.9838664950849365e-06, + "loss": 41.4062, + "step": 9037 + }, + { + "epoch": 0.08555390426065637, + "grad_norm": 317.5250244140625, + "learning_rate": 1.983861009712846e-06, + "loss": 22.6406, + "step": 9038 + }, + { + "epoch": 0.08556337028237143, + "grad_norm": 262.7423400878906, + "learning_rate": 1.9838555234159896e-06, + "loss": 35.5781, + "step": 9039 + }, + { + "epoch": 0.08557283630408648, + "grad_norm": 178.1310272216797, + "learning_rate": 1.983850036194374e-06, + "loss": 18.5312, + "step": 9040 + }, + { + "epoch": 0.08558230232580154, + "grad_norm": 2.8643033504486084, + "learning_rate": 1.9838445480480024e-06, + "loss": 0.9084, + "step": 9041 + }, + { + "epoch": 0.08559176834751658, + "grad_norm": 227.80746459960938, + "learning_rate": 1.983839058976882e-06, + "loss": 23.9688, + "step": 9042 + }, + { + "epoch": 0.08560123436923164, + "grad_norm": 271.30908203125, + "learning_rate": 1.9838335689810155e-06, + "loss": 22.5, + "step": 9043 + }, + { + "epoch": 0.0856107003909467, + "grad_norm": 279.7651672363281, + "learning_rate": 1.9838280780604103e-06, + "loss": 29.7656, + "step": 9044 + }, + { + "epoch": 0.08562016641266175, + "grad_norm": 228.44247436523438, + "learning_rate": 1.9838225862150705e-06, + "loss": 22.6719, + "step": 9045 + }, + { + "epoch": 0.08562963243437681, + "grad_norm": 226.6572265625, + "learning_rate": 1.983817093445002e-06, + "loss": 22.8047, + "step": 9046 + }, + { + "epoch": 0.08563909845609186, + "grad_norm": 659.06298828125, + "learning_rate": 1.983811599750209e-06, + "loss": 53.6641, + "step": 9047 + }, + { + "epoch": 0.08564856447780692, + "grad_norm": 410.4881591796875, + "learning_rate": 1.9838061051306967e-06, + "loss": 18.8398, + "step": 9048 + }, + { + "epoch": 0.08565803049952196, + "grad_norm": 211.89747619628906, + "learning_rate": 1.983800609586471e-06, + "loss": 21.0, + "step": 9049 + }, + { + "epoch": 0.08566749652123702, + "grad_norm": 390.17803955078125, + "learning_rate": 1.983795113117537e-06, + "loss": 27.2188, + "step": 9050 + }, + { + "epoch": 0.08567696254295207, + "grad_norm": 719.3101196289062, + "learning_rate": 1.9837896157238994e-06, + "loss": 45.7734, + "step": 9051 + }, + { + "epoch": 0.08568642856466713, + "grad_norm": 193.02049255371094, + "learning_rate": 1.9837841174055635e-06, + "loss": 21.8359, + "step": 9052 + }, + { + "epoch": 0.08569589458638219, + "grad_norm": 231.52651977539062, + "learning_rate": 1.9837786181625347e-06, + "loss": 27.1797, + "step": 9053 + }, + { + "epoch": 0.08570536060809723, + "grad_norm": 767.3444213867188, + "learning_rate": 1.983773117994818e-06, + "loss": 40.3906, + "step": 9054 + }, + { + "epoch": 0.0857148266298123, + "grad_norm": 645.8029174804688, + "learning_rate": 1.983767616902418e-06, + "loss": 21.1641, + "step": 9055 + }, + { + "epoch": 0.08572429265152734, + "grad_norm": 267.6380310058594, + "learning_rate": 1.9837621148853412e-06, + "loss": 21.1641, + "step": 9056 + }, + { + "epoch": 0.0857337586732424, + "grad_norm": 872.2036743164062, + "learning_rate": 1.983756611943592e-06, + "loss": 25.7109, + "step": 9057 + }, + { + "epoch": 0.08574322469495745, + "grad_norm": 363.88604736328125, + "learning_rate": 1.983751108077175e-06, + "loss": 16.8242, + "step": 9058 + }, + { + "epoch": 0.0857526907166725, + "grad_norm": 212.43003845214844, + "learning_rate": 1.983745603286096e-06, + "loss": 20.9609, + "step": 9059 + }, + { + "epoch": 0.08576215673838755, + "grad_norm": 502.1639709472656, + "learning_rate": 1.9837400975703605e-06, + "loss": 50.0156, + "step": 9060 + }, + { + "epoch": 0.08577162276010261, + "grad_norm": 182.04483032226562, + "learning_rate": 1.9837345909299732e-06, + "loss": 17.4062, + "step": 9061 + }, + { + "epoch": 0.08578108878181767, + "grad_norm": 317.3255310058594, + "learning_rate": 1.983729083364939e-06, + "loss": 29.3594, + "step": 9062 + }, + { + "epoch": 0.08579055480353272, + "grad_norm": 450.9927673339844, + "learning_rate": 1.983723574875264e-06, + "loss": 22.5977, + "step": 9063 + }, + { + "epoch": 0.08580002082524778, + "grad_norm": 772.4249877929688, + "learning_rate": 1.9837180654609527e-06, + "loss": 63.1562, + "step": 9064 + }, + { + "epoch": 0.08580948684696282, + "grad_norm": 389.6232604980469, + "learning_rate": 1.9837125551220105e-06, + "loss": 15.7969, + "step": 9065 + }, + { + "epoch": 0.08581895286867788, + "grad_norm": 756.192626953125, + "learning_rate": 1.983707043858442e-06, + "loss": 58.4375, + "step": 9066 + }, + { + "epoch": 0.08582841889039293, + "grad_norm": 281.69207763671875, + "learning_rate": 1.9837015316702533e-06, + "loss": 40.9375, + "step": 9067 + }, + { + "epoch": 0.08583788491210799, + "grad_norm": 242.1265106201172, + "learning_rate": 1.9836960185574492e-06, + "loss": 21.1719, + "step": 9068 + }, + { + "epoch": 0.08584735093382304, + "grad_norm": 274.39007568359375, + "learning_rate": 1.9836905045200346e-06, + "loss": 20.1016, + "step": 9069 + }, + { + "epoch": 0.0858568169555381, + "grad_norm": 313.30047607421875, + "learning_rate": 1.9836849895580154e-06, + "loss": 25.4922, + "step": 9070 + }, + { + "epoch": 0.08586628297725316, + "grad_norm": 422.71759033203125, + "learning_rate": 1.9836794736713957e-06, + "loss": 27.7812, + "step": 9071 + }, + { + "epoch": 0.0858757489989682, + "grad_norm": 723.7015991210938, + "learning_rate": 1.9836739568601816e-06, + "loss": 47.2188, + "step": 9072 + }, + { + "epoch": 0.08588521502068326, + "grad_norm": 409.81719970703125, + "learning_rate": 1.983668439124378e-06, + "loss": 37.7188, + "step": 9073 + }, + { + "epoch": 0.08589468104239831, + "grad_norm": 676.652099609375, + "learning_rate": 1.9836629204639897e-06, + "loss": 50.2656, + "step": 9074 + }, + { + "epoch": 0.08590414706411337, + "grad_norm": 339.5746154785156, + "learning_rate": 1.9836574008790222e-06, + "loss": 21.6328, + "step": 9075 + }, + { + "epoch": 0.08591361308582841, + "grad_norm": 483.7355041503906, + "learning_rate": 1.9836518803694814e-06, + "loss": 46.3438, + "step": 9076 + }, + { + "epoch": 0.08592307910754347, + "grad_norm": 195.63955688476562, + "learning_rate": 1.9836463589353715e-06, + "loss": 16.1484, + "step": 9077 + }, + { + "epoch": 0.08593254512925852, + "grad_norm": 3.366938829421997, + "learning_rate": 1.983640836576698e-06, + "loss": 0.8806, + "step": 9078 + }, + { + "epoch": 0.08594201115097358, + "grad_norm": 536.8746337890625, + "learning_rate": 1.9836353132934656e-06, + "loss": 23.5, + "step": 9079 + }, + { + "epoch": 0.08595147717268864, + "grad_norm": 666.0939331054688, + "learning_rate": 1.9836297890856807e-06, + "loss": 51.7031, + "step": 9080 + }, + { + "epoch": 0.08596094319440369, + "grad_norm": 214.5516357421875, + "learning_rate": 1.9836242639533474e-06, + "loss": 19.4062, + "step": 9081 + }, + { + "epoch": 0.08597040921611875, + "grad_norm": 247.93124389648438, + "learning_rate": 1.9836187378964718e-06, + "loss": 29.7422, + "step": 9082 + }, + { + "epoch": 0.08597987523783379, + "grad_norm": 860.36767578125, + "learning_rate": 1.983613210915058e-06, + "loss": 57.0078, + "step": 9083 + }, + { + "epoch": 0.08598934125954885, + "grad_norm": 372.3347473144531, + "learning_rate": 1.983607683009112e-06, + "loss": 32.8125, + "step": 9084 + }, + { + "epoch": 0.0859988072812639, + "grad_norm": 1307.647705078125, + "learning_rate": 1.9836021541786388e-06, + "loss": 20.5312, + "step": 9085 + }, + { + "epoch": 0.08600827330297896, + "grad_norm": 388.0370788574219, + "learning_rate": 1.983596624423643e-06, + "loss": 13.6328, + "step": 9086 + }, + { + "epoch": 0.08601773932469402, + "grad_norm": 198.6277313232422, + "learning_rate": 1.9835910937441313e-06, + "loss": 21.8516, + "step": 9087 + }, + { + "epoch": 0.08602720534640906, + "grad_norm": 443.94525146484375, + "learning_rate": 1.9835855621401076e-06, + "loss": 49.5391, + "step": 9088 + }, + { + "epoch": 0.08603667136812412, + "grad_norm": 2.86672043800354, + "learning_rate": 1.9835800296115775e-06, + "loss": 0.8228, + "step": 9089 + }, + { + "epoch": 0.08604613738983917, + "grad_norm": 961.7407836914062, + "learning_rate": 1.9835744961585457e-06, + "loss": 77.1562, + "step": 9090 + }, + { + "epoch": 0.08605560341155423, + "grad_norm": 2.6305527687072754, + "learning_rate": 1.9835689617810185e-06, + "loss": 0.7747, + "step": 9091 + }, + { + "epoch": 0.08606506943326928, + "grad_norm": 302.6685791015625, + "learning_rate": 1.983563426479e-06, + "loss": 18.4844, + "step": 9092 + }, + { + "epoch": 0.08607453545498434, + "grad_norm": 253.1156005859375, + "learning_rate": 1.9835578902524957e-06, + "loss": 22.5625, + "step": 9093 + }, + { + "epoch": 0.08608400147669938, + "grad_norm": 384.2945556640625, + "learning_rate": 1.9835523531015116e-06, + "loss": 31.6094, + "step": 9094 + }, + { + "epoch": 0.08609346749841444, + "grad_norm": 244.89149475097656, + "learning_rate": 1.983546815026052e-06, + "loss": 19.5703, + "step": 9095 + }, + { + "epoch": 0.0861029335201295, + "grad_norm": 571.0215454101562, + "learning_rate": 1.9835412760261225e-06, + "loss": 30.7969, + "step": 9096 + }, + { + "epoch": 0.08611239954184455, + "grad_norm": 400.8146667480469, + "learning_rate": 1.9835357361017276e-06, + "loss": 34.1406, + "step": 9097 + }, + { + "epoch": 0.08612186556355961, + "grad_norm": 739.0099487304688, + "learning_rate": 1.9835301952528737e-06, + "loss": 52.1328, + "step": 9098 + }, + { + "epoch": 0.08613133158527465, + "grad_norm": 2.4297990798950195, + "learning_rate": 1.983524653479565e-06, + "loss": 0.7888, + "step": 9099 + }, + { + "epoch": 0.08614079760698971, + "grad_norm": 391.1317443847656, + "learning_rate": 1.9835191107818074e-06, + "loss": 25.4375, + "step": 9100 + }, + { + "epoch": 0.08615026362870476, + "grad_norm": 194.17178344726562, + "learning_rate": 1.983513567159606e-06, + "loss": 25.5781, + "step": 9101 + }, + { + "epoch": 0.08615972965041982, + "grad_norm": 423.1429748535156, + "learning_rate": 1.983508022612966e-06, + "loss": 41.4688, + "step": 9102 + }, + { + "epoch": 0.08616919567213487, + "grad_norm": 317.99163818359375, + "learning_rate": 1.9835024771418917e-06, + "loss": 21.9609, + "step": 9103 + }, + { + "epoch": 0.08617866169384993, + "grad_norm": 813.2364501953125, + "learning_rate": 1.983496930746389e-06, + "loss": 21.2969, + "step": 9104 + }, + { + "epoch": 0.08618812771556499, + "grad_norm": 318.7381286621094, + "learning_rate": 1.9834913834264636e-06, + "loss": 22.25, + "step": 9105 + }, + { + "epoch": 0.08619759373728003, + "grad_norm": 305.7060852050781, + "learning_rate": 1.9834858351821203e-06, + "loss": 22.7578, + "step": 9106 + }, + { + "epoch": 0.08620705975899509, + "grad_norm": 200.40927124023438, + "learning_rate": 1.983480286013364e-06, + "loss": 20.6172, + "step": 9107 + }, + { + "epoch": 0.08621652578071014, + "grad_norm": 490.8055114746094, + "learning_rate": 1.9834747359202006e-06, + "loss": 43.0, + "step": 9108 + }, + { + "epoch": 0.0862259918024252, + "grad_norm": 325.3564147949219, + "learning_rate": 1.983469184902635e-06, + "loss": 31.1719, + "step": 9109 + }, + { + "epoch": 0.08623545782414024, + "grad_norm": 183.74655151367188, + "learning_rate": 1.9834636329606717e-06, + "loss": 21.8516, + "step": 9110 + }, + { + "epoch": 0.0862449238458553, + "grad_norm": 885.63134765625, + "learning_rate": 1.983458080094317e-06, + "loss": 78.5703, + "step": 9111 + }, + { + "epoch": 0.08625438986757035, + "grad_norm": 639.6661376953125, + "learning_rate": 1.983452526303576e-06, + "loss": 28.9062, + "step": 9112 + }, + { + "epoch": 0.08626385588928541, + "grad_norm": 701.4891357421875, + "learning_rate": 1.983446971588453e-06, + "loss": 46.9375, + "step": 9113 + }, + { + "epoch": 0.08627332191100047, + "grad_norm": 474.6315002441406, + "learning_rate": 1.9834414159489542e-06, + "loss": 15.1719, + "step": 9114 + }, + { + "epoch": 0.08628278793271552, + "grad_norm": 538.543212890625, + "learning_rate": 1.983435859385084e-06, + "loss": 61.5469, + "step": 9115 + }, + { + "epoch": 0.08629225395443058, + "grad_norm": 880.2410278320312, + "learning_rate": 1.9834303018968487e-06, + "loss": 55.5234, + "step": 9116 + }, + { + "epoch": 0.08630171997614562, + "grad_norm": 3.1427080631256104, + "learning_rate": 1.9834247434842526e-06, + "loss": 0.9038, + "step": 9117 + }, + { + "epoch": 0.08631118599786068, + "grad_norm": 297.1977233886719, + "learning_rate": 1.983419184147301e-06, + "loss": 25.0, + "step": 9118 + }, + { + "epoch": 0.08632065201957573, + "grad_norm": 519.7793579101562, + "learning_rate": 1.9834136238859993e-06, + "loss": 37.7031, + "step": 9119 + }, + { + "epoch": 0.08633011804129079, + "grad_norm": 207.9677734375, + "learning_rate": 1.9834080627003533e-06, + "loss": 21.4219, + "step": 9120 + }, + { + "epoch": 0.08633958406300583, + "grad_norm": 377.840576171875, + "learning_rate": 1.9834025005903672e-06, + "loss": 34.8438, + "step": 9121 + }, + { + "epoch": 0.0863490500847209, + "grad_norm": 233.80841064453125, + "learning_rate": 1.983396937556047e-06, + "loss": 22.5859, + "step": 9122 + }, + { + "epoch": 0.08635851610643595, + "grad_norm": 308.7026672363281, + "learning_rate": 1.9833913735973974e-06, + "loss": 24.7266, + "step": 9123 + }, + { + "epoch": 0.086367982128151, + "grad_norm": 297.0133972167969, + "learning_rate": 1.9833858087144246e-06, + "loss": 23.2812, + "step": 9124 + }, + { + "epoch": 0.08637744814986606, + "grad_norm": 179.03147888183594, + "learning_rate": 1.983380242907132e-06, + "loss": 21.1562, + "step": 9125 + }, + { + "epoch": 0.0863869141715811, + "grad_norm": 307.73358154296875, + "learning_rate": 1.983374676175527e-06, + "loss": 25.375, + "step": 9126 + }, + { + "epoch": 0.08639638019329617, + "grad_norm": 555.0011596679688, + "learning_rate": 1.983369108519613e-06, + "loss": 21.2344, + "step": 9127 + }, + { + "epoch": 0.08640584621501121, + "grad_norm": 200.1666717529297, + "learning_rate": 1.9833635399393963e-06, + "loss": 17.5547, + "step": 9128 + }, + { + "epoch": 0.08641531223672627, + "grad_norm": 236.20730590820312, + "learning_rate": 1.983357970434882e-06, + "loss": 16.1133, + "step": 9129 + }, + { + "epoch": 0.08642477825844133, + "grad_norm": 184.51126098632812, + "learning_rate": 1.983352400006075e-06, + "loss": 19.25, + "step": 9130 + }, + { + "epoch": 0.08643424428015638, + "grad_norm": 466.5748291015625, + "learning_rate": 1.983346828652981e-06, + "loss": 22.25, + "step": 9131 + }, + { + "epoch": 0.08644371030187144, + "grad_norm": 534.889404296875, + "learning_rate": 1.9833412563756043e-06, + "loss": 38.0469, + "step": 9132 + }, + { + "epoch": 0.08645317632358648, + "grad_norm": 280.76654052734375, + "learning_rate": 1.983335683173951e-06, + "loss": 23.2734, + "step": 9133 + }, + { + "epoch": 0.08646264234530154, + "grad_norm": 639.2067260742188, + "learning_rate": 1.9833301090480268e-06, + "loss": 50.875, + "step": 9134 + }, + { + "epoch": 0.08647210836701659, + "grad_norm": 435.1859436035156, + "learning_rate": 1.9833245339978354e-06, + "loss": 36.4531, + "step": 9135 + }, + { + "epoch": 0.08648157438873165, + "grad_norm": 286.775634765625, + "learning_rate": 1.9833189580233833e-06, + "loss": 38.625, + "step": 9136 + }, + { + "epoch": 0.0864910404104467, + "grad_norm": 495.6844177246094, + "learning_rate": 1.9833133811246755e-06, + "loss": 29.125, + "step": 9137 + }, + { + "epoch": 0.08650050643216176, + "grad_norm": 806.4127197265625, + "learning_rate": 1.983307803301717e-06, + "loss": 43.6719, + "step": 9138 + }, + { + "epoch": 0.08650997245387682, + "grad_norm": 292.70965576171875, + "learning_rate": 1.983302224554513e-06, + "loss": 12.4219, + "step": 9139 + }, + { + "epoch": 0.08651943847559186, + "grad_norm": 971.04150390625, + "learning_rate": 1.983296644883069e-06, + "loss": 31.5391, + "step": 9140 + }, + { + "epoch": 0.08652890449730692, + "grad_norm": 337.11627197265625, + "learning_rate": 1.9832910642873897e-06, + "loss": 37.1875, + "step": 9141 + }, + { + "epoch": 0.08653837051902197, + "grad_norm": 204.2744140625, + "learning_rate": 1.9832854827674815e-06, + "loss": 24.875, + "step": 9142 + }, + { + "epoch": 0.08654783654073703, + "grad_norm": 668.0250854492188, + "learning_rate": 1.983279900323348e-06, + "loss": 44.8906, + "step": 9143 + }, + { + "epoch": 0.08655730256245207, + "grad_norm": 300.5992431640625, + "learning_rate": 1.9832743169549963e-06, + "loss": 30.2656, + "step": 9144 + }, + { + "epoch": 0.08656676858416713, + "grad_norm": 272.24212646484375, + "learning_rate": 1.9832687326624304e-06, + "loss": 22.8125, + "step": 9145 + }, + { + "epoch": 0.08657623460588218, + "grad_norm": 532.3703002929688, + "learning_rate": 1.9832631474456558e-06, + "loss": 45.9297, + "step": 9146 + }, + { + "epoch": 0.08658570062759724, + "grad_norm": 1517.7958984375, + "learning_rate": 1.9832575613046776e-06, + "loss": 30.1406, + "step": 9147 + }, + { + "epoch": 0.0865951666493123, + "grad_norm": 142.1226348876953, + "learning_rate": 1.9832519742395017e-06, + "loss": 22.1016, + "step": 9148 + }, + { + "epoch": 0.08660463267102735, + "grad_norm": 440.59674072265625, + "learning_rate": 1.9832463862501325e-06, + "loss": 48.7656, + "step": 9149 + }, + { + "epoch": 0.0866140986927424, + "grad_norm": 182.52345275878906, + "learning_rate": 1.9832407973365757e-06, + "loss": 22.2031, + "step": 9150 + }, + { + "epoch": 0.08662356471445745, + "grad_norm": 409.4872131347656, + "learning_rate": 1.9832352074988366e-06, + "loss": 44.0781, + "step": 9151 + }, + { + "epoch": 0.08663303073617251, + "grad_norm": 440.7464294433594, + "learning_rate": 1.98322961673692e-06, + "loss": 38.625, + "step": 9152 + }, + { + "epoch": 0.08664249675788756, + "grad_norm": 378.56170654296875, + "learning_rate": 1.9832240250508323e-06, + "loss": 22.6172, + "step": 9153 + }, + { + "epoch": 0.08665196277960262, + "grad_norm": 233.40965270996094, + "learning_rate": 1.9832184324405773e-06, + "loss": 21.1172, + "step": 9154 + }, + { + "epoch": 0.08666142880131766, + "grad_norm": 1039.8565673828125, + "learning_rate": 1.9832128389061615e-06, + "loss": 36.8594, + "step": 9155 + }, + { + "epoch": 0.08667089482303272, + "grad_norm": 333.0272521972656, + "learning_rate": 1.9832072444475895e-06, + "loss": 33.4453, + "step": 9156 + }, + { + "epoch": 0.08668036084474778, + "grad_norm": 752.9513549804688, + "learning_rate": 1.983201649064866e-06, + "loss": 49.8828, + "step": 9157 + }, + { + "epoch": 0.08668982686646283, + "grad_norm": 400.7919616699219, + "learning_rate": 1.9831960527579975e-06, + "loss": 18.4531, + "step": 9158 + }, + { + "epoch": 0.08669929288817789, + "grad_norm": 562.2817993164062, + "learning_rate": 1.9831904555269885e-06, + "loss": 12.5117, + "step": 9159 + }, + { + "epoch": 0.08670875890989294, + "grad_norm": 271.43817138671875, + "learning_rate": 1.9831848573718444e-06, + "loss": 25.7422, + "step": 9160 + }, + { + "epoch": 0.086718224931608, + "grad_norm": 485.77447509765625, + "learning_rate": 1.9831792582925705e-06, + "loss": 57.1562, + "step": 9161 + }, + { + "epoch": 0.08672769095332304, + "grad_norm": 3.3752806186676025, + "learning_rate": 1.9831736582891723e-06, + "loss": 0.9204, + "step": 9162 + }, + { + "epoch": 0.0867371569750381, + "grad_norm": 408.979736328125, + "learning_rate": 1.9831680573616547e-06, + "loss": 33.4688, + "step": 9163 + }, + { + "epoch": 0.08674662299675315, + "grad_norm": 436.4665222167969, + "learning_rate": 1.983162455510023e-06, + "loss": 34.4844, + "step": 9164 + }, + { + "epoch": 0.08675608901846821, + "grad_norm": 3.4913454055786133, + "learning_rate": 1.983156852734283e-06, + "loss": 0.9785, + "step": 9165 + }, + { + "epoch": 0.08676555504018327, + "grad_norm": 298.89923095703125, + "learning_rate": 1.9831512490344387e-06, + "loss": 19.7969, + "step": 9166 + }, + { + "epoch": 0.08677502106189831, + "grad_norm": 211.33091735839844, + "learning_rate": 1.9831456444104964e-06, + "loss": 26.125, + "step": 9167 + }, + { + "epoch": 0.08678448708361337, + "grad_norm": 212.5766143798828, + "learning_rate": 1.9831400388624615e-06, + "loss": 28.4531, + "step": 9168 + }, + { + "epoch": 0.08679395310532842, + "grad_norm": 251.88780212402344, + "learning_rate": 1.9831344323903387e-06, + "loss": 23.4297, + "step": 9169 + }, + { + "epoch": 0.08680341912704348, + "grad_norm": 288.4961242675781, + "learning_rate": 1.9831288249941334e-06, + "loss": 14.6289, + "step": 9170 + }, + { + "epoch": 0.08681288514875853, + "grad_norm": 284.4944152832031, + "learning_rate": 1.9831232166738516e-06, + "loss": 23.8203, + "step": 9171 + }, + { + "epoch": 0.08682235117047359, + "grad_norm": 312.96270751953125, + "learning_rate": 1.983117607429497e-06, + "loss": 23.4453, + "step": 9172 + }, + { + "epoch": 0.08683181719218865, + "grad_norm": 454.9992980957031, + "learning_rate": 1.9831119972610762e-06, + "loss": 26.2031, + "step": 9173 + }, + { + "epoch": 0.08684128321390369, + "grad_norm": 581.040771484375, + "learning_rate": 1.9831063861685945e-06, + "loss": 15.8438, + "step": 9174 + }, + { + "epoch": 0.08685074923561875, + "grad_norm": 253.28958129882812, + "learning_rate": 1.9831007741520566e-06, + "loss": 31.5625, + "step": 9175 + }, + { + "epoch": 0.0868602152573338, + "grad_norm": 321.7626037597656, + "learning_rate": 1.983095161211467e-06, + "loss": 17.375, + "step": 9176 + }, + { + "epoch": 0.08686968127904886, + "grad_norm": 302.66595458984375, + "learning_rate": 1.983089547346833e-06, + "loss": 22.5703, + "step": 9177 + }, + { + "epoch": 0.0868791473007639, + "grad_norm": 313.091064453125, + "learning_rate": 1.983083932558158e-06, + "loss": 39.2969, + "step": 9178 + }, + { + "epoch": 0.08688861332247896, + "grad_norm": 186.38754272460938, + "learning_rate": 1.983078316845449e-06, + "loss": 19.4844, + "step": 9179 + }, + { + "epoch": 0.08689807934419401, + "grad_norm": 331.19866943359375, + "learning_rate": 1.9830727002087095e-06, + "loss": 41.1094, + "step": 9180 + }, + { + "epoch": 0.08690754536590907, + "grad_norm": 273.9647521972656, + "learning_rate": 1.9830670826479463e-06, + "loss": 19.6172, + "step": 9181 + }, + { + "epoch": 0.08691701138762413, + "grad_norm": 253.29685974121094, + "learning_rate": 1.983061464163163e-06, + "loss": 24.6797, + "step": 9182 + }, + { + "epoch": 0.08692647740933918, + "grad_norm": 166.2800750732422, + "learning_rate": 1.9830558447543667e-06, + "loss": 8.0469, + "step": 9183 + }, + { + "epoch": 0.08693594343105424, + "grad_norm": 488.0263977050781, + "learning_rate": 1.9830502244215615e-06, + "loss": 58.625, + "step": 9184 + }, + { + "epoch": 0.08694540945276928, + "grad_norm": 422.2090148925781, + "learning_rate": 1.9830446031647534e-06, + "loss": 15.2578, + "step": 9185 + }, + { + "epoch": 0.08695487547448434, + "grad_norm": 586.7027587890625, + "learning_rate": 1.9830389809839468e-06, + "loss": 52.6719, + "step": 9186 + }, + { + "epoch": 0.08696434149619939, + "grad_norm": 226.85586547851562, + "learning_rate": 1.983033357879148e-06, + "loss": 19.0234, + "step": 9187 + }, + { + "epoch": 0.08697380751791445, + "grad_norm": 370.0135192871094, + "learning_rate": 1.9830277338503614e-06, + "loss": 26.8984, + "step": 9188 + }, + { + "epoch": 0.0869832735396295, + "grad_norm": 326.68426513671875, + "learning_rate": 1.9830221088975932e-06, + "loss": 23.0977, + "step": 9189 + }, + { + "epoch": 0.08699273956134455, + "grad_norm": 721.1836547851562, + "learning_rate": 1.9830164830208477e-06, + "loss": 23.8281, + "step": 9190 + }, + { + "epoch": 0.08700220558305961, + "grad_norm": 259.8715515136719, + "learning_rate": 1.983010856220131e-06, + "loss": 19.8672, + "step": 9191 + }, + { + "epoch": 0.08701167160477466, + "grad_norm": 525.2345581054688, + "learning_rate": 1.9830052284954476e-06, + "loss": 40.4062, + "step": 9192 + }, + { + "epoch": 0.08702113762648972, + "grad_norm": 325.6142883300781, + "learning_rate": 1.9829995998468036e-06, + "loss": 27.1562, + "step": 9193 + }, + { + "epoch": 0.08703060364820477, + "grad_norm": 398.7509460449219, + "learning_rate": 1.982993970274204e-06, + "loss": 31.0938, + "step": 9194 + }, + { + "epoch": 0.08704006966991983, + "grad_norm": 477.3588562011719, + "learning_rate": 1.982988339777654e-06, + "loss": 49.9062, + "step": 9195 + }, + { + "epoch": 0.08704953569163487, + "grad_norm": 395.7779846191406, + "learning_rate": 1.9829827083571585e-06, + "loss": 49.0156, + "step": 9196 + }, + { + "epoch": 0.08705900171334993, + "grad_norm": 284.2933044433594, + "learning_rate": 1.9829770760127235e-06, + "loss": 32.5781, + "step": 9197 + }, + { + "epoch": 0.08706846773506498, + "grad_norm": 359.8265686035156, + "learning_rate": 1.9829714427443538e-06, + "loss": 24.3125, + "step": 9198 + }, + { + "epoch": 0.08707793375678004, + "grad_norm": 398.5655822753906, + "learning_rate": 1.982965808552055e-06, + "loss": 38.9375, + "step": 9199 + }, + { + "epoch": 0.0870873997784951, + "grad_norm": 290.1617736816406, + "learning_rate": 1.982960173435832e-06, + "loss": 24.5, + "step": 9200 + }, + { + "epoch": 0.08709686580021014, + "grad_norm": 422.3478698730469, + "learning_rate": 1.9829545373956906e-06, + "loss": 21.7695, + "step": 9201 + }, + { + "epoch": 0.0871063318219252, + "grad_norm": 231.11801147460938, + "learning_rate": 1.982948900431636e-06, + "loss": 23.3984, + "step": 9202 + }, + { + "epoch": 0.08711579784364025, + "grad_norm": 397.3639221191406, + "learning_rate": 1.9829432625436733e-06, + "loss": 19.1367, + "step": 9203 + }, + { + "epoch": 0.08712526386535531, + "grad_norm": 194.36087036132812, + "learning_rate": 1.9829376237318084e-06, + "loss": 25.9688, + "step": 9204 + }, + { + "epoch": 0.08713472988707036, + "grad_norm": 286.4955139160156, + "learning_rate": 1.9829319839960453e-06, + "loss": 34.8828, + "step": 9205 + }, + { + "epoch": 0.08714419590878542, + "grad_norm": 276.7548828125, + "learning_rate": 1.9829263433363907e-06, + "loss": 24.1328, + "step": 9206 + }, + { + "epoch": 0.08715366193050046, + "grad_norm": 451.14215087890625, + "learning_rate": 1.9829207017528485e-06, + "loss": 21.875, + "step": 9207 + }, + { + "epoch": 0.08716312795221552, + "grad_norm": 319.40692138671875, + "learning_rate": 1.9829150592454255e-06, + "loss": 40.2969, + "step": 9208 + }, + { + "epoch": 0.08717259397393058, + "grad_norm": 557.6298828125, + "learning_rate": 1.9829094158141258e-06, + "loss": 48.7891, + "step": 9209 + }, + { + "epoch": 0.08718205999564563, + "grad_norm": 363.0107116699219, + "learning_rate": 1.9829037714589554e-06, + "loss": 20.6641, + "step": 9210 + }, + { + "epoch": 0.08719152601736069, + "grad_norm": 669.59716796875, + "learning_rate": 1.98289812617992e-06, + "loss": 55.2969, + "step": 9211 + }, + { + "epoch": 0.08720099203907573, + "grad_norm": 231.87112426757812, + "learning_rate": 1.9828924799770233e-06, + "loss": 20.2188, + "step": 9212 + }, + { + "epoch": 0.0872104580607908, + "grad_norm": 422.0999450683594, + "learning_rate": 1.982886832850272e-06, + "loss": 53.7031, + "step": 9213 + }, + { + "epoch": 0.08721992408250584, + "grad_norm": 371.87664794921875, + "learning_rate": 1.982881184799671e-06, + "loss": 14.7383, + "step": 9214 + }, + { + "epoch": 0.0872293901042209, + "grad_norm": 456.50335693359375, + "learning_rate": 1.9828755358252256e-06, + "loss": 37.8438, + "step": 9215 + }, + { + "epoch": 0.08723885612593596, + "grad_norm": 504.8052978515625, + "learning_rate": 1.9828698859269416e-06, + "loss": 10.3633, + "step": 9216 + }, + { + "epoch": 0.087248322147651, + "grad_norm": 650.5972290039062, + "learning_rate": 1.9828642351048234e-06, + "loss": 37.8594, + "step": 9217 + }, + { + "epoch": 0.08725778816936607, + "grad_norm": 1000.7470703125, + "learning_rate": 1.9828585833588767e-06, + "loss": 37.1172, + "step": 9218 + }, + { + "epoch": 0.08726725419108111, + "grad_norm": 166.939208984375, + "learning_rate": 1.9828529306891073e-06, + "loss": 23.3594, + "step": 9219 + }, + { + "epoch": 0.08727672021279617, + "grad_norm": 245.42166137695312, + "learning_rate": 1.9828472770955192e-06, + "loss": 14.4297, + "step": 9220 + }, + { + "epoch": 0.08728618623451122, + "grad_norm": 290.3560791015625, + "learning_rate": 1.9828416225781194e-06, + "loss": 14.8359, + "step": 9221 + }, + { + "epoch": 0.08729565225622628, + "grad_norm": 295.231201171875, + "learning_rate": 1.9828359671369123e-06, + "loss": 9.7695, + "step": 9222 + }, + { + "epoch": 0.08730511827794132, + "grad_norm": 679.3731689453125, + "learning_rate": 1.982830310771903e-06, + "loss": 52.8438, + "step": 9223 + }, + { + "epoch": 0.08731458429965638, + "grad_norm": 581.2213134765625, + "learning_rate": 1.982824653483098e-06, + "loss": 24.4766, + "step": 9224 + }, + { + "epoch": 0.08732405032137144, + "grad_norm": 462.62103271484375, + "learning_rate": 1.9828189952705007e-06, + "loss": 24.9766, + "step": 9225 + }, + { + "epoch": 0.08733351634308649, + "grad_norm": 726.2408447265625, + "learning_rate": 1.982813336134118e-06, + "loss": 58.0156, + "step": 9226 + }, + { + "epoch": 0.08734298236480155, + "grad_norm": 824.8374633789062, + "learning_rate": 1.9828076760739547e-06, + "loss": 53.6641, + "step": 9227 + }, + { + "epoch": 0.0873524483865166, + "grad_norm": 272.56878662109375, + "learning_rate": 1.9828020150900156e-06, + "loss": 26.4062, + "step": 9228 + }, + { + "epoch": 0.08736191440823166, + "grad_norm": 633.4901733398438, + "learning_rate": 1.9827963531823074e-06, + "loss": 22.0625, + "step": 9229 + }, + { + "epoch": 0.0873713804299467, + "grad_norm": 466.4870300292969, + "learning_rate": 1.9827906903508336e-06, + "loss": 28.375, + "step": 9230 + }, + { + "epoch": 0.08738084645166176, + "grad_norm": 354.4967956542969, + "learning_rate": 1.9827850265956014e-06, + "loss": 30.207, + "step": 9231 + }, + { + "epoch": 0.08739031247337681, + "grad_norm": 222.26382446289062, + "learning_rate": 1.9827793619166145e-06, + "loss": 24.6016, + "step": 9232 + }, + { + "epoch": 0.08739977849509187, + "grad_norm": 638.90185546875, + "learning_rate": 1.9827736963138797e-06, + "loss": 40.1953, + "step": 9233 + }, + { + "epoch": 0.08740924451680693, + "grad_norm": 208.5233612060547, + "learning_rate": 1.982768029787401e-06, + "loss": 21.1172, + "step": 9234 + }, + { + "epoch": 0.08741871053852197, + "grad_norm": 198.8594207763672, + "learning_rate": 1.9827623623371843e-06, + "loss": 14.4102, + "step": 9235 + }, + { + "epoch": 0.08742817656023703, + "grad_norm": 872.0149536132812, + "learning_rate": 1.982756693963235e-06, + "loss": 55.6406, + "step": 9236 + }, + { + "epoch": 0.08743764258195208, + "grad_norm": 204.7392578125, + "learning_rate": 1.9827510246655584e-06, + "loss": 27.1406, + "step": 9237 + }, + { + "epoch": 0.08744710860366714, + "grad_norm": 411.25634765625, + "learning_rate": 1.9827453544441596e-06, + "loss": 26.4219, + "step": 9238 + }, + { + "epoch": 0.08745657462538219, + "grad_norm": 199.57626342773438, + "learning_rate": 1.9827396832990443e-06, + "loss": 18.2891, + "step": 9239 + }, + { + "epoch": 0.08746604064709725, + "grad_norm": 301.3692626953125, + "learning_rate": 1.9827340112302175e-06, + "loss": 21.6328, + "step": 9240 + }, + { + "epoch": 0.08747550666881229, + "grad_norm": 225.6087188720703, + "learning_rate": 1.982728338237685e-06, + "loss": 22.0547, + "step": 9241 + }, + { + "epoch": 0.08748497269052735, + "grad_norm": 167.18060302734375, + "learning_rate": 1.9827226643214513e-06, + "loss": 16.625, + "step": 9242 + }, + { + "epoch": 0.08749443871224241, + "grad_norm": 176.27557373046875, + "learning_rate": 1.9827169894815223e-06, + "loss": 15.3477, + "step": 9243 + }, + { + "epoch": 0.08750390473395746, + "grad_norm": 435.64154052734375, + "learning_rate": 1.9827113137179036e-06, + "loss": 38.5312, + "step": 9244 + }, + { + "epoch": 0.08751337075567252, + "grad_norm": 259.5101318359375, + "learning_rate": 1.9827056370306e-06, + "loss": 25.2852, + "step": 9245 + }, + { + "epoch": 0.08752283677738756, + "grad_norm": 462.1337585449219, + "learning_rate": 1.9826999594196168e-06, + "loss": 50.7656, + "step": 9246 + }, + { + "epoch": 0.08753230279910262, + "grad_norm": 365.2279968261719, + "learning_rate": 1.98269428088496e-06, + "loss": 20.9609, + "step": 9247 + }, + { + "epoch": 0.08754176882081767, + "grad_norm": 680.7598266601562, + "learning_rate": 1.982688601426634e-06, + "loss": 33.3047, + "step": 9248 + }, + { + "epoch": 0.08755123484253273, + "grad_norm": 465.4015808105469, + "learning_rate": 1.982682921044645e-06, + "loss": 31.5625, + "step": 9249 + }, + { + "epoch": 0.08756070086424778, + "grad_norm": 332.328369140625, + "learning_rate": 1.982677239738998e-06, + "loss": 23.6328, + "step": 9250 + }, + { + "epoch": 0.08757016688596284, + "grad_norm": 345.8831787109375, + "learning_rate": 1.982671557509698e-06, + "loss": 27.0703, + "step": 9251 + }, + { + "epoch": 0.0875796329076779, + "grad_norm": 263.89886474609375, + "learning_rate": 1.982665874356751e-06, + "loss": 29.7656, + "step": 9252 + }, + { + "epoch": 0.08758909892939294, + "grad_norm": 209.02186584472656, + "learning_rate": 1.982660190280162e-06, + "loss": 25.8203, + "step": 9253 + }, + { + "epoch": 0.087598564951108, + "grad_norm": 264.72015380859375, + "learning_rate": 1.982654505279936e-06, + "loss": 18.375, + "step": 9254 + }, + { + "epoch": 0.08760803097282305, + "grad_norm": 355.1997375488281, + "learning_rate": 1.982648819356079e-06, + "loss": 19.5625, + "step": 9255 + }, + { + "epoch": 0.08761749699453811, + "grad_norm": 261.6010437011719, + "learning_rate": 1.9826431325085963e-06, + "loss": 21.5859, + "step": 9256 + }, + { + "epoch": 0.08762696301625315, + "grad_norm": 604.2498779296875, + "learning_rate": 1.9826374447374927e-06, + "loss": 49.5469, + "step": 9257 + }, + { + "epoch": 0.08763642903796821, + "grad_norm": 417.19268798828125, + "learning_rate": 1.9826317560427738e-06, + "loss": 27.5156, + "step": 9258 + }, + { + "epoch": 0.08764589505968327, + "grad_norm": 615.6971435546875, + "learning_rate": 1.9826260664244446e-06, + "loss": 27.6797, + "step": 9259 + }, + { + "epoch": 0.08765536108139832, + "grad_norm": 776.2155151367188, + "learning_rate": 1.982620375882511e-06, + "loss": 19.2188, + "step": 9260 + }, + { + "epoch": 0.08766482710311338, + "grad_norm": 626.379150390625, + "learning_rate": 1.982614684416979e-06, + "loss": 32.1523, + "step": 9261 + }, + { + "epoch": 0.08767429312482843, + "grad_norm": 306.3669738769531, + "learning_rate": 1.982608992027852e-06, + "loss": 22.8203, + "step": 9262 + }, + { + "epoch": 0.08768375914654349, + "grad_norm": 430.14178466796875, + "learning_rate": 1.982603298715137e-06, + "loss": 41.3906, + "step": 9263 + }, + { + "epoch": 0.08769322516825853, + "grad_norm": 358.7335510253906, + "learning_rate": 1.982597604478839e-06, + "loss": 30.6641, + "step": 9264 + }, + { + "epoch": 0.08770269118997359, + "grad_norm": 679.46630859375, + "learning_rate": 1.9825919093189626e-06, + "loss": 66.2969, + "step": 9265 + }, + { + "epoch": 0.08771215721168864, + "grad_norm": 789.9998168945312, + "learning_rate": 1.9825862132355143e-06, + "loss": 82.0, + "step": 9266 + }, + { + "epoch": 0.0877216232334037, + "grad_norm": 177.16490173339844, + "learning_rate": 1.9825805162284985e-06, + "loss": 19.0078, + "step": 9267 + }, + { + "epoch": 0.08773108925511876, + "grad_norm": 228.80386352539062, + "learning_rate": 1.982574818297921e-06, + "loss": 22.75, + "step": 9268 + }, + { + "epoch": 0.0877405552768338, + "grad_norm": 622.7556762695312, + "learning_rate": 1.9825691194437874e-06, + "loss": 37.4531, + "step": 9269 + }, + { + "epoch": 0.08775002129854886, + "grad_norm": 463.32025146484375, + "learning_rate": 1.9825634196661023e-06, + "loss": 19.0898, + "step": 9270 + }, + { + "epoch": 0.08775948732026391, + "grad_norm": 2.8457183837890625, + "learning_rate": 1.9825577189648718e-06, + "loss": 0.958, + "step": 9271 + }, + { + "epoch": 0.08776895334197897, + "grad_norm": 399.63580322265625, + "learning_rate": 1.982552017340101e-06, + "loss": 25.3047, + "step": 9272 + }, + { + "epoch": 0.08777841936369402, + "grad_norm": 2.5856130123138428, + "learning_rate": 1.9825463147917947e-06, + "loss": 0.8413, + "step": 9273 + }, + { + "epoch": 0.08778788538540908, + "grad_norm": 280.9276123046875, + "learning_rate": 1.9825406113199596e-06, + "loss": 28.3125, + "step": 9274 + }, + { + "epoch": 0.08779735140712412, + "grad_norm": 209.05191040039062, + "learning_rate": 1.9825349069246e-06, + "loss": 19.1953, + "step": 9275 + }, + { + "epoch": 0.08780681742883918, + "grad_norm": 604.0938110351562, + "learning_rate": 1.9825292016057213e-06, + "loss": 48.5781, + "step": 9276 + }, + { + "epoch": 0.08781628345055424, + "grad_norm": 308.3072509765625, + "learning_rate": 1.982523495363329e-06, + "loss": 26.4258, + "step": 9277 + }, + { + "epoch": 0.08782574947226929, + "grad_norm": 573.1517333984375, + "learning_rate": 1.9825177881974288e-06, + "loss": 41.8672, + "step": 9278 + }, + { + "epoch": 0.08783521549398435, + "grad_norm": 479.74761962890625, + "learning_rate": 1.9825120801080254e-06, + "loss": 25.2383, + "step": 9279 + }, + { + "epoch": 0.0878446815156994, + "grad_norm": 854.904541015625, + "learning_rate": 1.9825063710951248e-06, + "loss": 47.5312, + "step": 9280 + }, + { + "epoch": 0.08785414753741445, + "grad_norm": 388.1696472167969, + "learning_rate": 1.982500661158732e-06, + "loss": 44.7031, + "step": 9281 + }, + { + "epoch": 0.0878636135591295, + "grad_norm": 473.7981262207031, + "learning_rate": 1.982494950298853e-06, + "loss": 59.2109, + "step": 9282 + }, + { + "epoch": 0.08787307958084456, + "grad_norm": 2.8932385444641113, + "learning_rate": 1.9824892385154918e-06, + "loss": 1.0166, + "step": 9283 + }, + { + "epoch": 0.0878825456025596, + "grad_norm": 216.54718017578125, + "learning_rate": 1.9824835258086555e-06, + "loss": 23.5859, + "step": 9284 + }, + { + "epoch": 0.08789201162427467, + "grad_norm": 263.5341491699219, + "learning_rate": 1.982477812178348e-06, + "loss": 10.0898, + "step": 9285 + }, + { + "epoch": 0.08790147764598973, + "grad_norm": 517.8690185546875, + "learning_rate": 1.9824720976245754e-06, + "loss": 59.5781, + "step": 9286 + }, + { + "epoch": 0.08791094366770477, + "grad_norm": 496.2189636230469, + "learning_rate": 1.982466382147343e-06, + "loss": 33.7656, + "step": 9287 + }, + { + "epoch": 0.08792040968941983, + "grad_norm": 283.5580749511719, + "learning_rate": 1.982460665746656e-06, + "loss": 20.3516, + "step": 9288 + }, + { + "epoch": 0.08792987571113488, + "grad_norm": 935.7689208984375, + "learning_rate": 1.98245494842252e-06, + "loss": 50.5781, + "step": 9289 + }, + { + "epoch": 0.08793934173284994, + "grad_norm": 875.5374145507812, + "learning_rate": 1.9824492301749406e-06, + "loss": 57.6406, + "step": 9290 + }, + { + "epoch": 0.08794880775456498, + "grad_norm": 498.2384338378906, + "learning_rate": 1.9824435110039224e-06, + "loss": 36.8438, + "step": 9291 + }, + { + "epoch": 0.08795827377628004, + "grad_norm": 200.67340087890625, + "learning_rate": 1.9824377909094715e-06, + "loss": 14.8164, + "step": 9292 + }, + { + "epoch": 0.08796773979799509, + "grad_norm": 393.6011657714844, + "learning_rate": 1.982432069891593e-06, + "loss": 39.5781, + "step": 9293 + }, + { + "epoch": 0.08797720581971015, + "grad_norm": 3.336850643157959, + "learning_rate": 1.982426347950292e-06, + "loss": 0.6831, + "step": 9294 + }, + { + "epoch": 0.08798667184142521, + "grad_norm": 638.999267578125, + "learning_rate": 1.982420625085574e-06, + "loss": 36.5859, + "step": 9295 + }, + { + "epoch": 0.08799613786314026, + "grad_norm": 288.67596435546875, + "learning_rate": 1.982414901297445e-06, + "loss": 23.4688, + "step": 9296 + }, + { + "epoch": 0.08800560388485532, + "grad_norm": 305.5658874511719, + "learning_rate": 1.9824091765859093e-06, + "loss": 27.3125, + "step": 9297 + }, + { + "epoch": 0.08801506990657036, + "grad_norm": 190.75408935546875, + "learning_rate": 1.9824034509509735e-06, + "loss": 23.7891, + "step": 9298 + }, + { + "epoch": 0.08802453592828542, + "grad_norm": 1700.4879150390625, + "learning_rate": 1.9823977243926418e-06, + "loss": 38.3828, + "step": 9299 + }, + { + "epoch": 0.08803400195000047, + "grad_norm": 468.86761474609375, + "learning_rate": 1.9823919969109205e-06, + "loss": 46.5391, + "step": 9300 + }, + { + "epoch": 0.08804346797171553, + "grad_norm": 173.03955078125, + "learning_rate": 1.9823862685058146e-06, + "loss": 25.625, + "step": 9301 + }, + { + "epoch": 0.08805293399343059, + "grad_norm": 245.9062042236328, + "learning_rate": 1.98238053917733e-06, + "loss": 16.9609, + "step": 9302 + }, + { + "epoch": 0.08806240001514563, + "grad_norm": 572.93310546875, + "learning_rate": 1.9823748089254707e-06, + "loss": 40.1172, + "step": 9303 + }, + { + "epoch": 0.0880718660368607, + "grad_norm": 533.8823852539062, + "learning_rate": 1.9823690777502436e-06, + "loss": 10.7656, + "step": 9304 + }, + { + "epoch": 0.08808133205857574, + "grad_norm": 2.535731315612793, + "learning_rate": 1.982363345651653e-06, + "loss": 0.8491, + "step": 9305 + }, + { + "epoch": 0.0880907980802908, + "grad_norm": 356.3128356933594, + "learning_rate": 1.9823576126297053e-06, + "loss": 41.1875, + "step": 9306 + }, + { + "epoch": 0.08810026410200585, + "grad_norm": 602.2106323242188, + "learning_rate": 1.982351878684405e-06, + "loss": 39.2656, + "step": 9307 + }, + { + "epoch": 0.0881097301237209, + "grad_norm": 152.7186737060547, + "learning_rate": 1.9823461438157582e-06, + "loss": 24.0938, + "step": 9308 + }, + { + "epoch": 0.08811919614543595, + "grad_norm": 170.76805114746094, + "learning_rate": 1.9823404080237695e-06, + "loss": 18.8672, + "step": 9309 + }, + { + "epoch": 0.08812866216715101, + "grad_norm": 278.7886047363281, + "learning_rate": 1.9823346713084447e-06, + "loss": 9.5273, + "step": 9310 + }, + { + "epoch": 0.08813812818886607, + "grad_norm": 183.64280700683594, + "learning_rate": 1.9823289336697895e-06, + "loss": 18.6406, + "step": 9311 + }, + { + "epoch": 0.08814759421058112, + "grad_norm": 274.5676574707031, + "learning_rate": 1.982323195107809e-06, + "loss": 12.9883, + "step": 9312 + }, + { + "epoch": 0.08815706023229618, + "grad_norm": 2.7212166786193848, + "learning_rate": 1.9823174556225086e-06, + "loss": 0.7988, + "step": 9313 + }, + { + "epoch": 0.08816652625401122, + "grad_norm": 232.03269958496094, + "learning_rate": 1.9823117152138935e-06, + "loss": 10.9453, + "step": 9314 + }, + { + "epoch": 0.08817599227572628, + "grad_norm": 427.5469055175781, + "learning_rate": 1.9823059738819693e-06, + "loss": 41.5938, + "step": 9315 + }, + { + "epoch": 0.08818545829744133, + "grad_norm": 328.6736145019531, + "learning_rate": 1.9823002316267414e-06, + "loss": 29.1562, + "step": 9316 + }, + { + "epoch": 0.08819492431915639, + "grad_norm": 353.6867980957031, + "learning_rate": 1.9822944884482155e-06, + "loss": 30.3203, + "step": 9317 + }, + { + "epoch": 0.08820439034087144, + "grad_norm": 1618.1947021484375, + "learning_rate": 1.982288744346396e-06, + "loss": 48.2188, + "step": 9318 + }, + { + "epoch": 0.0882138563625865, + "grad_norm": 641.8336181640625, + "learning_rate": 1.9822829993212897e-06, + "loss": 39.8672, + "step": 9319 + }, + { + "epoch": 0.08822332238430156, + "grad_norm": 442.0866394042969, + "learning_rate": 1.982277253372901e-06, + "loss": 41.75, + "step": 9320 + }, + { + "epoch": 0.0882327884060166, + "grad_norm": 220.56227111816406, + "learning_rate": 1.9822715065012354e-06, + "loss": 19.9844, + "step": 9321 + }, + { + "epoch": 0.08824225442773166, + "grad_norm": 324.9588623046875, + "learning_rate": 1.982265758706299e-06, + "loss": 34.1406, + "step": 9322 + }, + { + "epoch": 0.08825172044944671, + "grad_norm": 278.526611328125, + "learning_rate": 1.9822600099880963e-06, + "loss": 20.9844, + "step": 9323 + }, + { + "epoch": 0.08826118647116177, + "grad_norm": 696.2850341796875, + "learning_rate": 1.982254260346633e-06, + "loss": 24.4219, + "step": 9324 + }, + { + "epoch": 0.08827065249287681, + "grad_norm": 254.92588806152344, + "learning_rate": 1.9822485097819147e-06, + "loss": 17.6602, + "step": 9325 + }, + { + "epoch": 0.08828011851459187, + "grad_norm": 400.280029296875, + "learning_rate": 1.982242758293947e-06, + "loss": 18.1328, + "step": 9326 + }, + { + "epoch": 0.08828958453630692, + "grad_norm": 544.7694702148438, + "learning_rate": 1.9822370058827347e-06, + "loss": 59.3438, + "step": 9327 + }, + { + "epoch": 0.08829905055802198, + "grad_norm": 326.2803039550781, + "learning_rate": 1.9822312525482837e-06, + "loss": 26.875, + "step": 9328 + }, + { + "epoch": 0.08830851657973704, + "grad_norm": 368.6722717285156, + "learning_rate": 1.982225498290599e-06, + "loss": 27.2109, + "step": 9329 + }, + { + "epoch": 0.08831798260145209, + "grad_norm": 413.07440185546875, + "learning_rate": 1.9822197431096865e-06, + "loss": 41.9219, + "step": 9330 + }, + { + "epoch": 0.08832744862316715, + "grad_norm": 305.7964172363281, + "learning_rate": 1.982213987005551e-06, + "loss": 23.1562, + "step": 9331 + }, + { + "epoch": 0.08833691464488219, + "grad_norm": 327.628662109375, + "learning_rate": 1.982208229978198e-06, + "loss": 41.2188, + "step": 9332 + }, + { + "epoch": 0.08834638066659725, + "grad_norm": 2.959294080734253, + "learning_rate": 1.982202472027634e-06, + "loss": 1.0142, + "step": 9333 + }, + { + "epoch": 0.0883558466883123, + "grad_norm": 505.7881774902344, + "learning_rate": 1.982196713153863e-06, + "loss": 35.375, + "step": 9334 + }, + { + "epoch": 0.08836531271002736, + "grad_norm": 355.1046142578125, + "learning_rate": 1.9821909533568914e-06, + "loss": 23.1719, + "step": 9335 + }, + { + "epoch": 0.0883747787317424, + "grad_norm": 311.53863525390625, + "learning_rate": 1.9821851926367235e-06, + "loss": 45.4531, + "step": 9336 + }, + { + "epoch": 0.08838424475345746, + "grad_norm": 343.7705078125, + "learning_rate": 1.982179430993366e-06, + "loss": 17.0586, + "step": 9337 + }, + { + "epoch": 0.08839371077517252, + "grad_norm": 495.4217529296875, + "learning_rate": 1.9821736684268238e-06, + "loss": 33.7812, + "step": 9338 + }, + { + "epoch": 0.08840317679688757, + "grad_norm": 202.22962951660156, + "learning_rate": 1.9821679049371017e-06, + "loss": 10.1719, + "step": 9339 + }, + { + "epoch": 0.08841264281860263, + "grad_norm": 300.59844970703125, + "learning_rate": 1.982162140524206e-06, + "loss": 28.5391, + "step": 9340 + }, + { + "epoch": 0.08842210884031768, + "grad_norm": 206.52919006347656, + "learning_rate": 1.9821563751881418e-06, + "loss": 23.5703, + "step": 9341 + }, + { + "epoch": 0.08843157486203274, + "grad_norm": 657.4034423828125, + "learning_rate": 1.9821506089289146e-06, + "loss": 34.0625, + "step": 9342 + }, + { + "epoch": 0.08844104088374778, + "grad_norm": 3.3939132690429688, + "learning_rate": 1.9821448417465295e-06, + "loss": 1.0078, + "step": 9343 + }, + { + "epoch": 0.08845050690546284, + "grad_norm": 566.9185180664062, + "learning_rate": 1.9821390736409924e-06, + "loss": 36.9375, + "step": 9344 + }, + { + "epoch": 0.0884599729271779, + "grad_norm": 288.721923828125, + "learning_rate": 1.982133304612308e-06, + "loss": 16.8594, + "step": 9345 + }, + { + "epoch": 0.08846943894889295, + "grad_norm": 544.5038452148438, + "learning_rate": 1.9821275346604826e-06, + "loss": 38.8594, + "step": 9346 + }, + { + "epoch": 0.08847890497060801, + "grad_norm": 3.6464970111846924, + "learning_rate": 1.982121763785521e-06, + "loss": 0.8872, + "step": 9347 + }, + { + "epoch": 0.08848837099232305, + "grad_norm": 465.5851135253906, + "learning_rate": 1.982115991987429e-06, + "loss": 29.0, + "step": 9348 + }, + { + "epoch": 0.08849783701403811, + "grad_norm": 572.0011596679688, + "learning_rate": 1.982110219266212e-06, + "loss": 35.5391, + "step": 9349 + }, + { + "epoch": 0.08850730303575316, + "grad_norm": 692.63671875, + "learning_rate": 1.982104445621875e-06, + "loss": 68.6133, + "step": 9350 + }, + { + "epoch": 0.08851676905746822, + "grad_norm": 372.8194580078125, + "learning_rate": 1.9820986710544236e-06, + "loss": 31.9688, + "step": 9351 + }, + { + "epoch": 0.08852623507918327, + "grad_norm": 260.6785888671875, + "learning_rate": 1.9820928955638637e-06, + "loss": 22.0156, + "step": 9352 + }, + { + "epoch": 0.08853570110089833, + "grad_norm": 192.28460693359375, + "learning_rate": 1.9820871191502e-06, + "loss": 19.9688, + "step": 9353 + }, + { + "epoch": 0.08854516712261339, + "grad_norm": 698.9056396484375, + "learning_rate": 1.9820813418134384e-06, + "loss": 40.4375, + "step": 9354 + }, + { + "epoch": 0.08855463314432843, + "grad_norm": 269.8890075683594, + "learning_rate": 1.9820755635535843e-06, + "loss": 13.7695, + "step": 9355 + }, + { + "epoch": 0.08856409916604349, + "grad_norm": 240.4335479736328, + "learning_rate": 1.982069784370643e-06, + "loss": 16.8984, + "step": 9356 + }, + { + "epoch": 0.08857356518775854, + "grad_norm": 505.01861572265625, + "learning_rate": 1.98206400426462e-06, + "loss": 44.0938, + "step": 9357 + }, + { + "epoch": 0.0885830312094736, + "grad_norm": 214.09149169921875, + "learning_rate": 1.982058223235521e-06, + "loss": 20.2734, + "step": 9358 + }, + { + "epoch": 0.08859249723118864, + "grad_norm": 3.2525599002838135, + "learning_rate": 1.982052441283351e-06, + "loss": 0.9893, + "step": 9359 + }, + { + "epoch": 0.0886019632529037, + "grad_norm": 303.97930908203125, + "learning_rate": 1.9820466584081156e-06, + "loss": 24.2031, + "step": 9360 + }, + { + "epoch": 0.08861142927461875, + "grad_norm": 694.4467163085938, + "learning_rate": 1.98204087460982e-06, + "loss": 65.75, + "step": 9361 + }, + { + "epoch": 0.08862089529633381, + "grad_norm": 488.81329345703125, + "learning_rate": 1.9820350898884698e-06, + "loss": 26.6484, + "step": 9362 + }, + { + "epoch": 0.08863036131804887, + "grad_norm": 175.79751586914062, + "learning_rate": 1.982029304244071e-06, + "loss": 17.8945, + "step": 9363 + }, + { + "epoch": 0.08863982733976392, + "grad_norm": 288.3592834472656, + "learning_rate": 1.982023517676628e-06, + "loss": 32.2578, + "step": 9364 + }, + { + "epoch": 0.08864929336147898, + "grad_norm": 542.0755004882812, + "learning_rate": 1.9820177301861473e-06, + "loss": 18.1719, + "step": 9365 + }, + { + "epoch": 0.08865875938319402, + "grad_norm": 230.34359741210938, + "learning_rate": 1.9820119417726336e-06, + "loss": 18.5469, + "step": 9366 + }, + { + "epoch": 0.08866822540490908, + "grad_norm": 477.7102355957031, + "learning_rate": 1.982006152436092e-06, + "loss": 50.4688, + "step": 9367 + }, + { + "epoch": 0.08867769142662413, + "grad_norm": 855.0917358398438, + "learning_rate": 1.9820003621765294e-06, + "loss": 65.0469, + "step": 9368 + }, + { + "epoch": 0.08868715744833919, + "grad_norm": 362.2286376953125, + "learning_rate": 1.98199457099395e-06, + "loss": 23.7812, + "step": 9369 + }, + { + "epoch": 0.08869662347005423, + "grad_norm": 523.5137939453125, + "learning_rate": 1.9819887788883593e-06, + "loss": 23.0156, + "step": 9370 + }, + { + "epoch": 0.0887060894917693, + "grad_norm": 256.0611877441406, + "learning_rate": 1.9819829858597633e-06, + "loss": 19.9141, + "step": 9371 + }, + { + "epoch": 0.08871555551348435, + "grad_norm": 424.1212158203125, + "learning_rate": 1.9819771919081673e-06, + "loss": 23.1797, + "step": 9372 + }, + { + "epoch": 0.0887250215351994, + "grad_norm": 500.6457824707031, + "learning_rate": 1.981971397033576e-06, + "loss": 47.5469, + "step": 9373 + }, + { + "epoch": 0.08873448755691446, + "grad_norm": 261.1625061035156, + "learning_rate": 1.9819656012359964e-06, + "loss": 10.4844, + "step": 9374 + }, + { + "epoch": 0.0887439535786295, + "grad_norm": 194.53831481933594, + "learning_rate": 1.9819598045154324e-06, + "loss": 17.8359, + "step": 9375 + }, + { + "epoch": 0.08875341960034457, + "grad_norm": 485.48126220703125, + "learning_rate": 1.9819540068718905e-06, + "loss": 26.3906, + "step": 9376 + }, + { + "epoch": 0.08876288562205961, + "grad_norm": 526.7757568359375, + "learning_rate": 1.9819482083053753e-06, + "loss": 36.8594, + "step": 9377 + }, + { + "epoch": 0.08877235164377467, + "grad_norm": 271.79937744140625, + "learning_rate": 1.9819424088158928e-06, + "loss": 34.1094, + "step": 9378 + }, + { + "epoch": 0.08878181766548972, + "grad_norm": 3.0203287601470947, + "learning_rate": 1.981936608403448e-06, + "loss": 0.8042, + "step": 9379 + }, + { + "epoch": 0.08879128368720478, + "grad_norm": 1130.525146484375, + "learning_rate": 1.9819308070680473e-06, + "loss": 57.5391, + "step": 9380 + }, + { + "epoch": 0.08880074970891984, + "grad_norm": 358.7857360839844, + "learning_rate": 1.981925004809695e-06, + "loss": 30.6641, + "step": 9381 + }, + { + "epoch": 0.08881021573063488, + "grad_norm": 337.1576843261719, + "learning_rate": 1.9819192016283973e-06, + "loss": 24.7188, + "step": 9382 + }, + { + "epoch": 0.08881968175234994, + "grad_norm": 192.81781005859375, + "learning_rate": 1.9819133975241595e-06, + "loss": 19.7344, + "step": 9383 + }, + { + "epoch": 0.08882914777406499, + "grad_norm": 2.704434633255005, + "learning_rate": 1.981907592496987e-06, + "loss": 0.894, + "step": 9384 + }, + { + "epoch": 0.08883861379578005, + "grad_norm": 440.28973388671875, + "learning_rate": 1.981901786546885e-06, + "loss": 31.4219, + "step": 9385 + }, + { + "epoch": 0.0888480798174951, + "grad_norm": 316.3153076171875, + "learning_rate": 1.9818959796738596e-06, + "loss": 20.9844, + "step": 9386 + }, + { + "epoch": 0.08885754583921016, + "grad_norm": 207.21246337890625, + "learning_rate": 1.9818901718779154e-06, + "loss": 23.8125, + "step": 9387 + }, + { + "epoch": 0.08886701186092522, + "grad_norm": 303.5054626464844, + "learning_rate": 1.981884363159058e-06, + "loss": 27.3242, + "step": 9388 + }, + { + "epoch": 0.08887647788264026, + "grad_norm": 318.9969787597656, + "learning_rate": 1.981878553517294e-06, + "loss": 19.7656, + "step": 9389 + }, + { + "epoch": 0.08888594390435532, + "grad_norm": 388.9466552734375, + "learning_rate": 1.981872742952628e-06, + "loss": 39.4453, + "step": 9390 + }, + { + "epoch": 0.08889540992607037, + "grad_norm": 351.9349365234375, + "learning_rate": 1.9818669314650647e-06, + "loss": 30.625, + "step": 9391 + }, + { + "epoch": 0.08890487594778543, + "grad_norm": 561.6893920898438, + "learning_rate": 1.981861119054611e-06, + "loss": 33.8516, + "step": 9392 + }, + { + "epoch": 0.08891434196950047, + "grad_norm": 668.3001708984375, + "learning_rate": 1.981855305721272e-06, + "loss": 50.25, + "step": 9393 + }, + { + "epoch": 0.08892380799121553, + "grad_norm": 312.4421081542969, + "learning_rate": 1.9818494914650523e-06, + "loss": 22.4453, + "step": 9394 + }, + { + "epoch": 0.08893327401293058, + "grad_norm": 3.088897943496704, + "learning_rate": 1.9818436762859578e-06, + "loss": 0.9824, + "step": 9395 + }, + { + "epoch": 0.08894274003464564, + "grad_norm": 494.02606201171875, + "learning_rate": 1.9818378601839945e-06, + "loss": 40.7969, + "step": 9396 + }, + { + "epoch": 0.0889522060563607, + "grad_norm": 491.4949645996094, + "learning_rate": 1.9818320431591673e-06, + "loss": 36.5391, + "step": 9397 + }, + { + "epoch": 0.08896167207807575, + "grad_norm": 3.1077561378479004, + "learning_rate": 1.981826225211482e-06, + "loss": 0.9062, + "step": 9398 + }, + { + "epoch": 0.0889711380997908, + "grad_norm": 920.8452758789062, + "learning_rate": 1.9818204063409442e-06, + "loss": 78.9453, + "step": 9399 + }, + { + "epoch": 0.08898060412150585, + "grad_norm": 361.4859924316406, + "learning_rate": 1.9818145865475585e-06, + "loss": 9.3516, + "step": 9400 + }, + { + "epoch": 0.08899007014322091, + "grad_norm": 2.9828546047210693, + "learning_rate": 1.9818087658313312e-06, + "loss": 0.8882, + "step": 9401 + }, + { + "epoch": 0.08899953616493596, + "grad_norm": 523.2352294921875, + "learning_rate": 1.981802944192268e-06, + "loss": 50.0469, + "step": 9402 + }, + { + "epoch": 0.08900900218665102, + "grad_norm": 223.9906768798828, + "learning_rate": 1.981797121630373e-06, + "loss": 19.0156, + "step": 9403 + }, + { + "epoch": 0.08901846820836606, + "grad_norm": 1066.9813232421875, + "learning_rate": 1.9817912981456535e-06, + "loss": 59.1133, + "step": 9404 + }, + { + "epoch": 0.08902793423008112, + "grad_norm": 389.8690185546875, + "learning_rate": 1.9817854737381134e-06, + "loss": 21.6484, + "step": 9405 + }, + { + "epoch": 0.08903740025179618, + "grad_norm": 387.2532958984375, + "learning_rate": 1.981779648407759e-06, + "loss": 51.5625, + "step": 9406 + }, + { + "epoch": 0.08904686627351123, + "grad_norm": 480.5852355957031, + "learning_rate": 1.9817738221545957e-06, + "loss": 41.0625, + "step": 9407 + }, + { + "epoch": 0.08905633229522629, + "grad_norm": 645.7107543945312, + "learning_rate": 1.981767994978629e-06, + "loss": 46.6094, + "step": 9408 + }, + { + "epoch": 0.08906579831694134, + "grad_norm": 490.3713684082031, + "learning_rate": 1.981762166879864e-06, + "loss": 33.7109, + "step": 9409 + }, + { + "epoch": 0.0890752643386564, + "grad_norm": 294.6695861816406, + "learning_rate": 1.9817563378583062e-06, + "loss": 20.2344, + "step": 9410 + }, + { + "epoch": 0.08908473036037144, + "grad_norm": 617.3006591796875, + "learning_rate": 1.9817505079139616e-06, + "loss": 47.875, + "step": 9411 + }, + { + "epoch": 0.0890941963820865, + "grad_norm": 316.7355651855469, + "learning_rate": 1.981744677046835e-06, + "loss": 30.9062, + "step": 9412 + }, + { + "epoch": 0.08910366240380155, + "grad_norm": 643.7457275390625, + "learning_rate": 1.9817388452569333e-06, + "loss": 10.2539, + "step": 9413 + }, + { + "epoch": 0.08911312842551661, + "grad_norm": 549.659423828125, + "learning_rate": 1.98173301254426e-06, + "loss": 60.25, + "step": 9414 + }, + { + "epoch": 0.08912259444723167, + "grad_norm": 517.6708374023438, + "learning_rate": 1.981727178908822e-06, + "loss": 42.4062, + "step": 9415 + }, + { + "epoch": 0.08913206046894671, + "grad_norm": 501.07904052734375, + "learning_rate": 1.981721344350624e-06, + "loss": 42.8594, + "step": 9416 + }, + { + "epoch": 0.08914152649066177, + "grad_norm": 291.6895751953125, + "learning_rate": 1.981715508869672e-06, + "loss": 31.3047, + "step": 9417 + }, + { + "epoch": 0.08915099251237682, + "grad_norm": 414.514404296875, + "learning_rate": 1.9817096724659714e-06, + "loss": 29.8203, + "step": 9418 + }, + { + "epoch": 0.08916045853409188, + "grad_norm": 333.0033264160156, + "learning_rate": 1.981703835139527e-06, + "loss": 61.9531, + "step": 9419 + }, + { + "epoch": 0.08916992455580693, + "grad_norm": 499.8777160644531, + "learning_rate": 1.981697996890346e-06, + "loss": 36.2891, + "step": 9420 + }, + { + "epoch": 0.08917939057752199, + "grad_norm": 839.9603271484375, + "learning_rate": 1.9816921577184316e-06, + "loss": 9.4805, + "step": 9421 + }, + { + "epoch": 0.08918885659923703, + "grad_norm": 189.38743591308594, + "learning_rate": 1.981686317623791e-06, + "loss": 19.6953, + "step": 9422 + }, + { + "epoch": 0.08919832262095209, + "grad_norm": 348.5921325683594, + "learning_rate": 1.981680476606429e-06, + "loss": 22.3203, + "step": 9423 + }, + { + "epoch": 0.08920778864266715, + "grad_norm": 461.4508972167969, + "learning_rate": 1.981674634666351e-06, + "loss": 21.8945, + "step": 9424 + }, + { + "epoch": 0.0892172546643822, + "grad_norm": 152.05003356933594, + "learning_rate": 1.981668791803563e-06, + "loss": 19.9922, + "step": 9425 + }, + { + "epoch": 0.08922672068609726, + "grad_norm": 542.9703979492188, + "learning_rate": 1.98166294801807e-06, + "loss": 54.9844, + "step": 9426 + }, + { + "epoch": 0.0892361867078123, + "grad_norm": 416.2648620605469, + "learning_rate": 1.981657103309878e-06, + "loss": 23.2773, + "step": 9427 + }, + { + "epoch": 0.08924565272952736, + "grad_norm": 2.7456905841827393, + "learning_rate": 1.981651257678992e-06, + "loss": 0.7986, + "step": 9428 + }, + { + "epoch": 0.08925511875124241, + "grad_norm": 3.066311836242676, + "learning_rate": 1.9816454111254175e-06, + "loss": 0.8369, + "step": 9429 + }, + { + "epoch": 0.08926458477295747, + "grad_norm": 288.93792724609375, + "learning_rate": 1.9816395636491604e-06, + "loss": 28.5938, + "step": 9430 + }, + { + "epoch": 0.08927405079467253, + "grad_norm": 400.378173828125, + "learning_rate": 1.9816337152502256e-06, + "loss": 21.2109, + "step": 9431 + }, + { + "epoch": 0.08928351681638758, + "grad_norm": 3.756993055343628, + "learning_rate": 1.9816278659286196e-06, + "loss": 1.0552, + "step": 9432 + }, + { + "epoch": 0.08929298283810264, + "grad_norm": 382.3153076171875, + "learning_rate": 1.981622015684347e-06, + "loss": 25.6484, + "step": 9433 + }, + { + "epoch": 0.08930244885981768, + "grad_norm": 320.0689392089844, + "learning_rate": 1.981616164517413e-06, + "loss": 11.3242, + "step": 9434 + }, + { + "epoch": 0.08931191488153274, + "grad_norm": 216.01150512695312, + "learning_rate": 1.9816103124278244e-06, + "loss": 23.4297, + "step": 9435 + }, + { + "epoch": 0.08932138090324779, + "grad_norm": 148.4545440673828, + "learning_rate": 1.9816044594155857e-06, + "loss": 20.6094, + "step": 9436 + }, + { + "epoch": 0.08933084692496285, + "grad_norm": 526.0708618164062, + "learning_rate": 1.9815986054807028e-06, + "loss": 78.2656, + "step": 9437 + }, + { + "epoch": 0.0893403129466779, + "grad_norm": 584.0662841796875, + "learning_rate": 1.9815927506231807e-06, + "loss": 32.0391, + "step": 9438 + }, + { + "epoch": 0.08934977896839295, + "grad_norm": 284.74981689453125, + "learning_rate": 1.981586894843026e-06, + "loss": 40.5781, + "step": 9439 + }, + { + "epoch": 0.08935924499010801, + "grad_norm": 283.4690246582031, + "learning_rate": 1.9815810381402427e-06, + "loss": 22.5234, + "step": 9440 + }, + { + "epoch": 0.08936871101182306, + "grad_norm": 3.7398362159729004, + "learning_rate": 1.9815751805148376e-06, + "loss": 0.8989, + "step": 9441 + }, + { + "epoch": 0.08937817703353812, + "grad_norm": 309.2441711425781, + "learning_rate": 1.981569321966815e-06, + "loss": 22.0391, + "step": 9442 + }, + { + "epoch": 0.08938764305525317, + "grad_norm": 333.94097900390625, + "learning_rate": 1.9815634624961818e-06, + "loss": 22.125, + "step": 9443 + }, + { + "epoch": 0.08939710907696823, + "grad_norm": 521.0274658203125, + "learning_rate": 1.9815576021029424e-06, + "loss": 27.0, + "step": 9444 + }, + { + "epoch": 0.08940657509868327, + "grad_norm": 205.16038513183594, + "learning_rate": 1.9815517407871033e-06, + "loss": 19.1406, + "step": 9445 + }, + { + "epoch": 0.08941604112039833, + "grad_norm": 724.651123046875, + "learning_rate": 1.9815458785486688e-06, + "loss": 64.9375, + "step": 9446 + }, + { + "epoch": 0.08942550714211338, + "grad_norm": 418.0701904296875, + "learning_rate": 1.981540015387645e-06, + "loss": 50.5312, + "step": 9447 + }, + { + "epoch": 0.08943497316382844, + "grad_norm": 3.225876569747925, + "learning_rate": 1.981534151304038e-06, + "loss": 0.8169, + "step": 9448 + }, + { + "epoch": 0.0894444391855435, + "grad_norm": 167.68124389648438, + "learning_rate": 1.9815282862978523e-06, + "loss": 19.6328, + "step": 9449 + }, + { + "epoch": 0.08945390520725854, + "grad_norm": 336.68408203125, + "learning_rate": 1.9815224203690938e-06, + "loss": 12.5117, + "step": 9450 + }, + { + "epoch": 0.0894633712289736, + "grad_norm": 388.9901123046875, + "learning_rate": 1.9815165535177686e-06, + "loss": 16.3828, + "step": 9451 + }, + { + "epoch": 0.08947283725068865, + "grad_norm": 363.2774353027344, + "learning_rate": 1.9815106857438814e-06, + "loss": 53.3828, + "step": 9452 + }, + { + "epoch": 0.08948230327240371, + "grad_norm": 285.150390625, + "learning_rate": 1.9815048170474377e-06, + "loss": 28.6562, + "step": 9453 + }, + { + "epoch": 0.08949176929411876, + "grad_norm": 398.74359130859375, + "learning_rate": 1.9814989474284438e-06, + "loss": 18.6953, + "step": 9454 + }, + { + "epoch": 0.08950123531583382, + "grad_norm": 246.92210388183594, + "learning_rate": 1.9814930768869044e-06, + "loss": 17.8672, + "step": 9455 + }, + { + "epoch": 0.08951070133754886, + "grad_norm": 229.83639526367188, + "learning_rate": 1.9814872054228254e-06, + "loss": 18.5, + "step": 9456 + }, + { + "epoch": 0.08952016735926392, + "grad_norm": 373.35125732421875, + "learning_rate": 1.9814813330362123e-06, + "loss": 19.2656, + "step": 9457 + }, + { + "epoch": 0.08952963338097898, + "grad_norm": 420.3739318847656, + "learning_rate": 1.9814754597270707e-06, + "loss": 36.3672, + "step": 9458 + }, + { + "epoch": 0.08953909940269403, + "grad_norm": 228.38804626464844, + "learning_rate": 1.981469585495406e-06, + "loss": 13.7422, + "step": 9459 + }, + { + "epoch": 0.08954856542440909, + "grad_norm": 170.71864318847656, + "learning_rate": 1.9814637103412237e-06, + "loss": 12.25, + "step": 9460 + }, + { + "epoch": 0.08955803144612413, + "grad_norm": 436.7677001953125, + "learning_rate": 1.9814578342645294e-06, + "loss": 29.8594, + "step": 9461 + }, + { + "epoch": 0.0895674974678392, + "grad_norm": 197.26287841796875, + "learning_rate": 1.9814519572653285e-06, + "loss": 23.375, + "step": 9462 + }, + { + "epoch": 0.08957696348955424, + "grad_norm": 816.8046875, + "learning_rate": 1.9814460793436266e-06, + "loss": 23.3594, + "step": 9463 + }, + { + "epoch": 0.0895864295112693, + "grad_norm": 398.2343444824219, + "learning_rate": 1.981440200499429e-06, + "loss": 30.1562, + "step": 9464 + }, + { + "epoch": 0.08959589553298435, + "grad_norm": 1319.264404296875, + "learning_rate": 1.9814343207327417e-06, + "loss": 44.2578, + "step": 9465 + }, + { + "epoch": 0.0896053615546994, + "grad_norm": 654.756103515625, + "learning_rate": 1.98142844004357e-06, + "loss": 42.4531, + "step": 9466 + }, + { + "epoch": 0.08961482757641447, + "grad_norm": 218.04486083984375, + "learning_rate": 1.9814225584319195e-06, + "loss": 15.7109, + "step": 9467 + }, + { + "epoch": 0.08962429359812951, + "grad_norm": 579.1248779296875, + "learning_rate": 1.981416675897796e-06, + "loss": 62.625, + "step": 9468 + }, + { + "epoch": 0.08963375961984457, + "grad_norm": 525.848876953125, + "learning_rate": 1.981410792441204e-06, + "loss": 49.8906, + "step": 9469 + }, + { + "epoch": 0.08964322564155962, + "grad_norm": 420.0704040527344, + "learning_rate": 1.98140490806215e-06, + "loss": 31.4297, + "step": 9470 + }, + { + "epoch": 0.08965269166327468, + "grad_norm": 292.6224365234375, + "learning_rate": 1.9813990227606392e-06, + "loss": 17.3516, + "step": 9471 + }, + { + "epoch": 0.08966215768498972, + "grad_norm": 3.1609318256378174, + "learning_rate": 1.981393136536677e-06, + "loss": 0.9102, + "step": 9472 + }, + { + "epoch": 0.08967162370670478, + "grad_norm": 516.6405639648438, + "learning_rate": 1.981387249390269e-06, + "loss": 55.8438, + "step": 9473 + }, + { + "epoch": 0.08968108972841984, + "grad_norm": 3.2265400886535645, + "learning_rate": 1.9813813613214213e-06, + "loss": 0.947, + "step": 9474 + }, + { + "epoch": 0.08969055575013489, + "grad_norm": 206.45135498046875, + "learning_rate": 1.981375472330139e-06, + "loss": 18.3125, + "step": 9475 + }, + { + "epoch": 0.08970002177184995, + "grad_norm": 175.5547637939453, + "learning_rate": 1.981369582416427e-06, + "loss": 24.4375, + "step": 9476 + }, + { + "epoch": 0.089709487793565, + "grad_norm": 238.46664428710938, + "learning_rate": 1.981363691580292e-06, + "loss": 24.6797, + "step": 9477 + }, + { + "epoch": 0.08971895381528006, + "grad_norm": 2.987643003463745, + "learning_rate": 1.9813577998217385e-06, + "loss": 0.8547, + "step": 9478 + }, + { + "epoch": 0.0897284198369951, + "grad_norm": 306.7988586425781, + "learning_rate": 1.9813519071407726e-06, + "loss": 24.1016, + "step": 9479 + }, + { + "epoch": 0.08973788585871016, + "grad_norm": 612.2770385742188, + "learning_rate": 1.9813460135374004e-06, + "loss": 44.9609, + "step": 9480 + }, + { + "epoch": 0.08974735188042521, + "grad_norm": 394.6059265136719, + "learning_rate": 1.9813401190116263e-06, + "loss": 25.1875, + "step": 9481 + }, + { + "epoch": 0.08975681790214027, + "grad_norm": 390.64093017578125, + "learning_rate": 1.981334223563456e-06, + "loss": 23.4062, + "step": 9482 + }, + { + "epoch": 0.08976628392385533, + "grad_norm": 492.9010314941406, + "learning_rate": 1.9813283271928956e-06, + "loss": 57.2969, + "step": 9483 + }, + { + "epoch": 0.08977574994557037, + "grad_norm": 456.67120361328125, + "learning_rate": 1.981322429899951e-06, + "loss": 29.9844, + "step": 9484 + }, + { + "epoch": 0.08978521596728543, + "grad_norm": 719.1285400390625, + "learning_rate": 1.981316531684626e-06, + "loss": 43.6875, + "step": 9485 + }, + { + "epoch": 0.08979468198900048, + "grad_norm": 206.8585205078125, + "learning_rate": 1.9813106325469283e-06, + "loss": 17.75, + "step": 9486 + }, + { + "epoch": 0.08980414801071554, + "grad_norm": 1360.60009765625, + "learning_rate": 1.981304732486862e-06, + "loss": 56.0469, + "step": 9487 + }, + { + "epoch": 0.08981361403243059, + "grad_norm": 376.3085632324219, + "learning_rate": 1.981298831504433e-06, + "loss": 25.0312, + "step": 9488 + }, + { + "epoch": 0.08982308005414565, + "grad_norm": 1118.7540283203125, + "learning_rate": 1.9812929295996473e-06, + "loss": 51.1328, + "step": 9489 + }, + { + "epoch": 0.08983254607586069, + "grad_norm": 481.20477294921875, + "learning_rate": 1.98128702677251e-06, + "loss": 18.5547, + "step": 9490 + }, + { + "epoch": 0.08984201209757575, + "grad_norm": 659.1195678710938, + "learning_rate": 1.9812811230230264e-06, + "loss": 31.1367, + "step": 9491 + }, + { + "epoch": 0.08985147811929081, + "grad_norm": 599.1149291992188, + "learning_rate": 1.981275218351203e-06, + "loss": 31.2344, + "step": 9492 + }, + { + "epoch": 0.08986094414100586, + "grad_norm": 656.2215576171875, + "learning_rate": 1.981269312757044e-06, + "loss": 40.5703, + "step": 9493 + }, + { + "epoch": 0.08987041016272092, + "grad_norm": 372.67327880859375, + "learning_rate": 1.981263406240556e-06, + "loss": 34.75, + "step": 9494 + }, + { + "epoch": 0.08987987618443596, + "grad_norm": 400.66156005859375, + "learning_rate": 1.9812574988017444e-06, + "loss": 33.9062, + "step": 9495 + }, + { + "epoch": 0.08988934220615102, + "grad_norm": 190.00209045410156, + "learning_rate": 1.9812515904406146e-06, + "loss": 23.3672, + "step": 9496 + }, + { + "epoch": 0.08989880822786607, + "grad_norm": 920.4474487304688, + "learning_rate": 1.981245681157172e-06, + "loss": 58.25, + "step": 9497 + }, + { + "epoch": 0.08990827424958113, + "grad_norm": 401.5257873535156, + "learning_rate": 1.981239770951422e-06, + "loss": 37.0156, + "step": 9498 + }, + { + "epoch": 0.08991774027129618, + "grad_norm": 1113.6953125, + "learning_rate": 1.9812338598233705e-06, + "loss": 59.2031, + "step": 9499 + }, + { + "epoch": 0.08992720629301124, + "grad_norm": 301.6733093261719, + "learning_rate": 1.9812279477730235e-06, + "loss": 35.0781, + "step": 9500 + }, + { + "epoch": 0.0899366723147263, + "grad_norm": 141.7421417236328, + "learning_rate": 1.981222034800386e-06, + "loss": 13.6562, + "step": 9501 + }, + { + "epoch": 0.08994613833644134, + "grad_norm": 517.6517333984375, + "learning_rate": 1.981216120905463e-06, + "loss": 35.7969, + "step": 9502 + }, + { + "epoch": 0.0899556043581564, + "grad_norm": 523.3798217773438, + "learning_rate": 1.9812102060882614e-06, + "loss": 24.8125, + "step": 9503 + }, + { + "epoch": 0.08996507037987145, + "grad_norm": 373.04168701171875, + "learning_rate": 1.9812042903487856e-06, + "loss": 35.2969, + "step": 9504 + }, + { + "epoch": 0.08997453640158651, + "grad_norm": 226.73255920410156, + "learning_rate": 1.9811983736870417e-06, + "loss": 26.9766, + "step": 9505 + }, + { + "epoch": 0.08998400242330155, + "grad_norm": 169.91038513183594, + "learning_rate": 1.981192456103035e-06, + "loss": 14.6797, + "step": 9506 + }, + { + "epoch": 0.08999346844501661, + "grad_norm": 281.8894958496094, + "learning_rate": 1.9811865375967715e-06, + "loss": 24.8281, + "step": 9507 + }, + { + "epoch": 0.09000293446673166, + "grad_norm": 166.61549377441406, + "learning_rate": 1.9811806181682567e-06, + "loss": 18.25, + "step": 9508 + }, + { + "epoch": 0.09001240048844672, + "grad_norm": 343.2150573730469, + "learning_rate": 1.9811746978174954e-06, + "loss": 22.1797, + "step": 9509 + }, + { + "epoch": 0.09002186651016178, + "grad_norm": 411.0964660644531, + "learning_rate": 1.9811687765444944e-06, + "loss": 21.5625, + "step": 9510 + }, + { + "epoch": 0.09003133253187683, + "grad_norm": 287.27874755859375, + "learning_rate": 1.981162854349258e-06, + "loss": 8.0664, + "step": 9511 + }, + { + "epoch": 0.09004079855359189, + "grad_norm": 438.58990478515625, + "learning_rate": 1.9811569312317926e-06, + "loss": 51.1406, + "step": 9512 + }, + { + "epoch": 0.09005026457530693, + "grad_norm": 3.2245101928710938, + "learning_rate": 1.981151007192103e-06, + "loss": 0.9058, + "step": 9513 + }, + { + "epoch": 0.09005973059702199, + "grad_norm": 517.1991577148438, + "learning_rate": 1.9811450822301954e-06, + "loss": 39.3438, + "step": 9514 + }, + { + "epoch": 0.09006919661873704, + "grad_norm": 662.112060546875, + "learning_rate": 1.9811391563460758e-06, + "loss": 36.9805, + "step": 9515 + }, + { + "epoch": 0.0900786626404521, + "grad_norm": 477.8849182128906, + "learning_rate": 1.981133229539749e-06, + "loss": 37.2812, + "step": 9516 + }, + { + "epoch": 0.09008812866216716, + "grad_norm": 284.6169128417969, + "learning_rate": 1.9811273018112206e-06, + "loss": 20.8906, + "step": 9517 + }, + { + "epoch": 0.0900975946838822, + "grad_norm": 611.748291015625, + "learning_rate": 1.9811213731604965e-06, + "loss": 8.3281, + "step": 9518 + }, + { + "epoch": 0.09010706070559726, + "grad_norm": 381.3407287597656, + "learning_rate": 1.9811154435875823e-06, + "loss": 28.6797, + "step": 9519 + }, + { + "epoch": 0.09011652672731231, + "grad_norm": 511.77374267578125, + "learning_rate": 1.9811095130924834e-06, + "loss": 34.1094, + "step": 9520 + }, + { + "epoch": 0.09012599274902737, + "grad_norm": 264.94122314453125, + "learning_rate": 1.9811035816752053e-06, + "loss": 32.5547, + "step": 9521 + }, + { + "epoch": 0.09013545877074242, + "grad_norm": 222.18309020996094, + "learning_rate": 1.9810976493357535e-06, + "loss": 23.75, + "step": 9522 + }, + { + "epoch": 0.09014492479245748, + "grad_norm": 444.715087890625, + "learning_rate": 1.981091716074134e-06, + "loss": 22.4766, + "step": 9523 + }, + { + "epoch": 0.09015439081417252, + "grad_norm": 3.580080032348633, + "learning_rate": 1.981085781890352e-06, + "loss": 0.9854, + "step": 9524 + }, + { + "epoch": 0.09016385683588758, + "grad_norm": 168.17236328125, + "learning_rate": 1.9810798467844126e-06, + "loss": 20.5703, + "step": 9525 + }, + { + "epoch": 0.09017332285760264, + "grad_norm": 160.619140625, + "learning_rate": 1.9810739107563225e-06, + "loss": 23.3203, + "step": 9526 + }, + { + "epoch": 0.09018278887931769, + "grad_norm": 3.090773105621338, + "learning_rate": 1.981067973806087e-06, + "loss": 0.9648, + "step": 9527 + }, + { + "epoch": 0.09019225490103275, + "grad_norm": 567.5475463867188, + "learning_rate": 1.981062035933711e-06, + "loss": 36.2734, + "step": 9528 + }, + { + "epoch": 0.0902017209227478, + "grad_norm": 188.54696655273438, + "learning_rate": 1.981056097139201e-06, + "loss": 20.5898, + "step": 9529 + }, + { + "epoch": 0.09021118694446285, + "grad_norm": 259.3170166015625, + "learning_rate": 1.9810501574225613e-06, + "loss": 34.2188, + "step": 9530 + }, + { + "epoch": 0.0902206529661779, + "grad_norm": 515.0632934570312, + "learning_rate": 1.981044216783799e-06, + "loss": 31.9609, + "step": 9531 + }, + { + "epoch": 0.09023011898789296, + "grad_norm": 605.9942626953125, + "learning_rate": 1.9810382752229185e-06, + "loss": 24.7344, + "step": 9532 + }, + { + "epoch": 0.090239585009608, + "grad_norm": 430.25482177734375, + "learning_rate": 1.981032332739926e-06, + "loss": 18.6328, + "step": 9533 + }, + { + "epoch": 0.09024905103132307, + "grad_norm": 399.2234191894531, + "learning_rate": 1.981026389334827e-06, + "loss": 24.2188, + "step": 9534 + }, + { + "epoch": 0.09025851705303813, + "grad_norm": 182.85650634765625, + "learning_rate": 1.9810204450076272e-06, + "loss": 19.8906, + "step": 9535 + }, + { + "epoch": 0.09026798307475317, + "grad_norm": 514.7698364257812, + "learning_rate": 1.9810144997583316e-06, + "loss": 47.7344, + "step": 9536 + }, + { + "epoch": 0.09027744909646823, + "grad_norm": 594.4711303710938, + "learning_rate": 1.9810085535869466e-06, + "loss": 38.0859, + "step": 9537 + }, + { + "epoch": 0.09028691511818328, + "grad_norm": 358.90704345703125, + "learning_rate": 1.9810026064934773e-06, + "loss": 29.2812, + "step": 9538 + }, + { + "epoch": 0.09029638113989834, + "grad_norm": 263.97552490234375, + "learning_rate": 1.980996658477929e-06, + "loss": 21.2891, + "step": 9539 + }, + { + "epoch": 0.09030584716161338, + "grad_norm": 266.4091491699219, + "learning_rate": 1.9809907095403077e-06, + "loss": 14.8633, + "step": 9540 + }, + { + "epoch": 0.09031531318332844, + "grad_norm": 176.7696990966797, + "learning_rate": 1.980984759680619e-06, + "loss": 20.4609, + "step": 9541 + }, + { + "epoch": 0.09032477920504349, + "grad_norm": 285.677490234375, + "learning_rate": 1.9809788088988688e-06, + "loss": 24.6406, + "step": 9542 + }, + { + "epoch": 0.09033424522675855, + "grad_norm": 971.498779296875, + "learning_rate": 1.9809728571950622e-06, + "loss": 55.4219, + "step": 9543 + }, + { + "epoch": 0.09034371124847361, + "grad_norm": 592.8601684570312, + "learning_rate": 1.980966904569205e-06, + "loss": 42.2344, + "step": 9544 + }, + { + "epoch": 0.09035317727018866, + "grad_norm": 3.393580913543701, + "learning_rate": 1.9809609510213025e-06, + "loss": 0.9897, + "step": 9545 + }, + { + "epoch": 0.09036264329190372, + "grad_norm": 298.6016540527344, + "learning_rate": 1.9809549965513604e-06, + "loss": 8.7578, + "step": 9546 + }, + { + "epoch": 0.09037210931361876, + "grad_norm": 610.9742431640625, + "learning_rate": 1.9809490411593844e-06, + "loss": 42.5234, + "step": 9547 + }, + { + "epoch": 0.09038157533533382, + "grad_norm": 3.244966506958008, + "learning_rate": 1.9809430848453804e-06, + "loss": 0.9741, + "step": 9548 + }, + { + "epoch": 0.09039104135704887, + "grad_norm": 517.9107055664062, + "learning_rate": 1.9809371276093534e-06, + "loss": 37.3281, + "step": 9549 + }, + { + "epoch": 0.09040050737876393, + "grad_norm": 331.4424133300781, + "learning_rate": 1.9809311694513096e-06, + "loss": 34.6875, + "step": 9550 + }, + { + "epoch": 0.09040997340047897, + "grad_norm": 941.544189453125, + "learning_rate": 1.9809252103712547e-06, + "loss": 29.3047, + "step": 9551 + }, + { + "epoch": 0.09041943942219403, + "grad_norm": 377.02398681640625, + "learning_rate": 1.9809192503691934e-06, + "loss": 40.6016, + "step": 9552 + }, + { + "epoch": 0.0904289054439091, + "grad_norm": 207.14637756347656, + "learning_rate": 1.980913289445132e-06, + "loss": 19.8828, + "step": 9553 + }, + { + "epoch": 0.09043837146562414, + "grad_norm": 408.3224182128906, + "learning_rate": 1.9809073275990754e-06, + "loss": 24.5312, + "step": 9554 + }, + { + "epoch": 0.0904478374873392, + "grad_norm": 648.103759765625, + "learning_rate": 1.9809013648310306e-06, + "loss": 50.2969, + "step": 9555 + }, + { + "epoch": 0.09045730350905425, + "grad_norm": 464.6663818359375, + "learning_rate": 1.9808954011410014e-06, + "loss": 53.5469, + "step": 9556 + }, + { + "epoch": 0.0904667695307693, + "grad_norm": 442.8744812011719, + "learning_rate": 1.980889436528995e-06, + "loss": 42.5781, + "step": 9557 + }, + { + "epoch": 0.09047623555248435, + "grad_norm": 236.3868865966797, + "learning_rate": 1.9808834709950163e-06, + "loss": 21.0391, + "step": 9558 + }, + { + "epoch": 0.09048570157419941, + "grad_norm": 555.3199462890625, + "learning_rate": 1.9808775045390705e-06, + "loss": 45.7812, + "step": 9559 + }, + { + "epoch": 0.09049516759591447, + "grad_norm": 228.523193359375, + "learning_rate": 1.980871537161164e-06, + "loss": 19.4688, + "step": 9560 + }, + { + "epoch": 0.09050463361762952, + "grad_norm": 453.7644348144531, + "learning_rate": 1.980865568861302e-06, + "loss": 17.2422, + "step": 9561 + }, + { + "epoch": 0.09051409963934458, + "grad_norm": 719.947021484375, + "learning_rate": 1.9808595996394904e-06, + "loss": 60.5312, + "step": 9562 + }, + { + "epoch": 0.09052356566105962, + "grad_norm": 497.8179931640625, + "learning_rate": 1.9808536294957343e-06, + "loss": 20.3984, + "step": 9563 + }, + { + "epoch": 0.09053303168277468, + "grad_norm": 4.035803318023682, + "learning_rate": 1.98084765843004e-06, + "loss": 0.9849, + "step": 9564 + }, + { + "epoch": 0.09054249770448973, + "grad_norm": 328.01708984375, + "learning_rate": 1.980841686442412e-06, + "loss": 20.8203, + "step": 9565 + }, + { + "epoch": 0.09055196372620479, + "grad_norm": 389.71795654296875, + "learning_rate": 1.980835713532857e-06, + "loss": 29.3594, + "step": 9566 + }, + { + "epoch": 0.09056142974791984, + "grad_norm": 288.1833801269531, + "learning_rate": 1.9808297397013803e-06, + "loss": 25.7578, + "step": 9567 + }, + { + "epoch": 0.0905708957696349, + "grad_norm": 320.91424560546875, + "learning_rate": 1.9808237649479874e-06, + "loss": 26.2422, + "step": 9568 + }, + { + "epoch": 0.09058036179134996, + "grad_norm": 298.38177490234375, + "learning_rate": 1.9808177892726843e-06, + "loss": 29.4297, + "step": 9569 + }, + { + "epoch": 0.090589827813065, + "grad_norm": 711.2406616210938, + "learning_rate": 1.9808118126754756e-06, + "loss": 40.75, + "step": 9570 + }, + { + "epoch": 0.09059929383478006, + "grad_norm": 386.9328918457031, + "learning_rate": 1.980805835156368e-06, + "loss": 22.5469, + "step": 9571 + }, + { + "epoch": 0.09060875985649511, + "grad_norm": 365.0610656738281, + "learning_rate": 1.980799856715367e-06, + "loss": 22.2188, + "step": 9572 + }, + { + "epoch": 0.09061822587821017, + "grad_norm": 269.36285400390625, + "learning_rate": 1.9807938773524774e-06, + "loss": 32.5781, + "step": 9573 + }, + { + "epoch": 0.09062769189992521, + "grad_norm": 408.8167419433594, + "learning_rate": 1.9807878970677052e-06, + "loss": 31.8438, + "step": 9574 + }, + { + "epoch": 0.09063715792164027, + "grad_norm": 366.47808837890625, + "learning_rate": 1.9807819158610567e-06, + "loss": 11.6484, + "step": 9575 + }, + { + "epoch": 0.09064662394335532, + "grad_norm": 307.1010437011719, + "learning_rate": 1.980775933732537e-06, + "loss": 21.3984, + "step": 9576 + }, + { + "epoch": 0.09065608996507038, + "grad_norm": 396.2312927246094, + "learning_rate": 1.9807699506821514e-06, + "loss": 41.125, + "step": 9577 + }, + { + "epoch": 0.09066555598678544, + "grad_norm": 303.9492492675781, + "learning_rate": 1.980763966709906e-06, + "loss": 22.4961, + "step": 9578 + }, + { + "epoch": 0.09067502200850049, + "grad_norm": 750.0142822265625, + "learning_rate": 1.9807579818158063e-06, + "loss": 28.4766, + "step": 9579 + }, + { + "epoch": 0.09068448803021555, + "grad_norm": 650.0199584960938, + "learning_rate": 1.980751995999858e-06, + "loss": 56.0312, + "step": 9580 + }, + { + "epoch": 0.09069395405193059, + "grad_norm": 190.72264099121094, + "learning_rate": 1.9807460092620664e-06, + "loss": 24.2188, + "step": 9581 + }, + { + "epoch": 0.09070342007364565, + "grad_norm": 462.9262390136719, + "learning_rate": 1.9807400216024375e-06, + "loss": 26.5312, + "step": 9582 + }, + { + "epoch": 0.0907128860953607, + "grad_norm": 534.6539306640625, + "learning_rate": 1.980734033020977e-06, + "loss": 28.3125, + "step": 9583 + }, + { + "epoch": 0.09072235211707576, + "grad_norm": 507.11138916015625, + "learning_rate": 1.9807280435176897e-06, + "loss": 33.0312, + "step": 9584 + }, + { + "epoch": 0.0907318181387908, + "grad_norm": 3.51324462890625, + "learning_rate": 1.980722053092582e-06, + "loss": 0.9424, + "step": 9585 + }, + { + "epoch": 0.09074128416050586, + "grad_norm": 3.152210235595703, + "learning_rate": 1.98071606174566e-06, + "loss": 0.8901, + "step": 9586 + }, + { + "epoch": 0.09075075018222092, + "grad_norm": 615.6299438476562, + "learning_rate": 1.980710069476928e-06, + "loss": 44.7891, + "step": 9587 + }, + { + "epoch": 0.09076021620393597, + "grad_norm": 330.4158935546875, + "learning_rate": 1.9807040762863928e-06, + "loss": 19.7031, + "step": 9588 + }, + { + "epoch": 0.09076968222565103, + "grad_norm": 2.685065507888794, + "learning_rate": 1.9806980821740597e-06, + "loss": 0.9844, + "step": 9589 + }, + { + "epoch": 0.09077914824736608, + "grad_norm": 295.3648376464844, + "learning_rate": 1.980692087139934e-06, + "loss": 33.3125, + "step": 9590 + }, + { + "epoch": 0.09078861426908114, + "grad_norm": 253.79444885253906, + "learning_rate": 1.9806860911840213e-06, + "loss": 19.6328, + "step": 9591 + }, + { + "epoch": 0.09079808029079618, + "grad_norm": 268.2671203613281, + "learning_rate": 1.9806800943063277e-06, + "loss": 22.4844, + "step": 9592 + }, + { + "epoch": 0.09080754631251124, + "grad_norm": 258.3752746582031, + "learning_rate": 1.980674096506858e-06, + "loss": 21.2734, + "step": 9593 + }, + { + "epoch": 0.09081701233422629, + "grad_norm": 408.5722961425781, + "learning_rate": 1.980668097785619e-06, + "loss": 23.3906, + "step": 9594 + }, + { + "epoch": 0.09082647835594135, + "grad_norm": 575.11572265625, + "learning_rate": 1.9806620981426157e-06, + "loss": 49.8594, + "step": 9595 + }, + { + "epoch": 0.09083594437765641, + "grad_norm": 554.806640625, + "learning_rate": 1.980656097577854e-06, + "loss": 44.4844, + "step": 9596 + }, + { + "epoch": 0.09084541039937145, + "grad_norm": 227.0635528564453, + "learning_rate": 1.9806500960913396e-06, + "loss": 18.4297, + "step": 9597 + }, + { + "epoch": 0.09085487642108651, + "grad_norm": 723.376708984375, + "learning_rate": 1.9806440936830773e-06, + "loss": 32.3281, + "step": 9598 + }, + { + "epoch": 0.09086434244280156, + "grad_norm": 193.87933349609375, + "learning_rate": 1.9806380903530734e-06, + "loss": 21.0391, + "step": 9599 + }, + { + "epoch": 0.09087380846451662, + "grad_norm": 831.2883911132812, + "learning_rate": 1.980632086101334e-06, + "loss": 28.625, + "step": 9600 + }, + { + "epoch": 0.09088327448623167, + "grad_norm": 570.9581909179688, + "learning_rate": 1.9806260809278635e-06, + "loss": 26.5391, + "step": 9601 + }, + { + "epoch": 0.09089274050794673, + "grad_norm": 3.1142196655273438, + "learning_rate": 1.980620074832669e-06, + "loss": 1.0186, + "step": 9602 + }, + { + "epoch": 0.09090220652966179, + "grad_norm": 425.13909912109375, + "learning_rate": 1.9806140678157546e-06, + "loss": 37.1094, + "step": 9603 + }, + { + "epoch": 0.09091167255137683, + "grad_norm": 525.424072265625, + "learning_rate": 1.980608059877128e-06, + "loss": 43.0312, + "step": 9604 + }, + { + "epoch": 0.09092113857309189, + "grad_norm": 488.20806884765625, + "learning_rate": 1.9806020510167925e-06, + "loss": 49.4844, + "step": 9605 + }, + { + "epoch": 0.09093060459480694, + "grad_norm": 171.5394744873047, + "learning_rate": 1.9805960412347553e-06, + "loss": 16.6367, + "step": 9606 + }, + { + "epoch": 0.090940070616522, + "grad_norm": 355.5564880371094, + "learning_rate": 1.9805900305310217e-06, + "loss": 43.5938, + "step": 9607 + }, + { + "epoch": 0.09094953663823704, + "grad_norm": 399.0274658203125, + "learning_rate": 1.9805840189055968e-06, + "loss": 25.5859, + "step": 9608 + }, + { + "epoch": 0.0909590026599521, + "grad_norm": 424.30426025390625, + "learning_rate": 1.980578006358487e-06, + "loss": 32.5859, + "step": 9609 + }, + { + "epoch": 0.09096846868166715, + "grad_norm": 368.096923828125, + "learning_rate": 1.9805719928896978e-06, + "loss": 17.8359, + "step": 9610 + }, + { + "epoch": 0.09097793470338221, + "grad_norm": 361.4423522949219, + "learning_rate": 1.9805659784992346e-06, + "loss": 48.5, + "step": 9611 + }, + { + "epoch": 0.09098740072509727, + "grad_norm": 466.4801940917969, + "learning_rate": 1.980559963187103e-06, + "loss": 40.2656, + "step": 9612 + }, + { + "epoch": 0.09099686674681232, + "grad_norm": 762.5079345703125, + "learning_rate": 1.9805539469533095e-06, + "loss": 60.8867, + "step": 9613 + }, + { + "epoch": 0.09100633276852738, + "grad_norm": 3.375452756881714, + "learning_rate": 1.9805479297978585e-06, + "loss": 0.8892, + "step": 9614 + }, + { + "epoch": 0.09101579879024242, + "grad_norm": 184.2909698486328, + "learning_rate": 1.9805419117207565e-06, + "loss": 18.9453, + "step": 9615 + }, + { + "epoch": 0.09102526481195748, + "grad_norm": 409.1004333496094, + "learning_rate": 1.9805358927220084e-06, + "loss": 17.3984, + "step": 9616 + }, + { + "epoch": 0.09103473083367253, + "grad_norm": 454.9047546386719, + "learning_rate": 1.980529872801621e-06, + "loss": 32.4219, + "step": 9617 + }, + { + "epoch": 0.09104419685538759, + "grad_norm": 362.0184326171875, + "learning_rate": 1.9805238519595987e-06, + "loss": 33.4766, + "step": 9618 + }, + { + "epoch": 0.09105366287710263, + "grad_norm": 370.3946228027344, + "learning_rate": 1.9805178301959484e-06, + "loss": 27.4453, + "step": 9619 + }, + { + "epoch": 0.0910631288988177, + "grad_norm": 303.4541931152344, + "learning_rate": 1.9805118075106746e-06, + "loss": 38.7422, + "step": 9620 + }, + { + "epoch": 0.09107259492053275, + "grad_norm": 162.36831665039062, + "learning_rate": 1.9805057839037836e-06, + "loss": 23.0703, + "step": 9621 + }, + { + "epoch": 0.0910820609422478, + "grad_norm": 287.47918701171875, + "learning_rate": 1.980499759375281e-06, + "loss": 24.3438, + "step": 9622 + }, + { + "epoch": 0.09109152696396286, + "grad_norm": 2.8562445640563965, + "learning_rate": 1.980493733925172e-06, + "loss": 0.853, + "step": 9623 + }, + { + "epoch": 0.0911009929856779, + "grad_norm": 516.0043334960938, + "learning_rate": 1.9804877075534635e-06, + "loss": 18.0508, + "step": 9624 + }, + { + "epoch": 0.09111045900739297, + "grad_norm": 285.7124328613281, + "learning_rate": 1.9804816802601598e-06, + "loss": 37.9102, + "step": 9625 + }, + { + "epoch": 0.09111992502910801, + "grad_norm": 361.998779296875, + "learning_rate": 1.9804756520452673e-06, + "loss": 40.6719, + "step": 9626 + }, + { + "epoch": 0.09112939105082307, + "grad_norm": 659.02392578125, + "learning_rate": 1.9804696229087914e-06, + "loss": 13.4453, + "step": 9627 + }, + { + "epoch": 0.09113885707253812, + "grad_norm": 370.3154296875, + "learning_rate": 1.980463592850738e-06, + "loss": 21.9688, + "step": 9628 + }, + { + "epoch": 0.09114832309425318, + "grad_norm": 438.93408203125, + "learning_rate": 1.9804575618711124e-06, + "loss": 37.5547, + "step": 9629 + }, + { + "epoch": 0.09115778911596824, + "grad_norm": 468.5584411621094, + "learning_rate": 1.9804515299699207e-06, + "loss": 36.5781, + "step": 9630 + }, + { + "epoch": 0.09116725513768328, + "grad_norm": 270.81787109375, + "learning_rate": 1.9804454971471676e-06, + "loss": 20.5039, + "step": 9631 + }, + { + "epoch": 0.09117672115939834, + "grad_norm": 485.3475646972656, + "learning_rate": 1.9804394634028605e-06, + "loss": 21.8984, + "step": 9632 + }, + { + "epoch": 0.09118618718111339, + "grad_norm": 736.6694946289062, + "learning_rate": 1.9804334287370036e-06, + "loss": 69.5, + "step": 9633 + }, + { + "epoch": 0.09119565320282845, + "grad_norm": 233.01043701171875, + "learning_rate": 1.9804273931496032e-06, + "loss": 20.9297, + "step": 9634 + }, + { + "epoch": 0.0912051192245435, + "grad_norm": 531.791015625, + "learning_rate": 1.9804213566406645e-06, + "loss": 22.7109, + "step": 9635 + }, + { + "epoch": 0.09121458524625856, + "grad_norm": 326.66162109375, + "learning_rate": 1.980415319210194e-06, + "loss": 18.1016, + "step": 9636 + }, + { + "epoch": 0.0912240512679736, + "grad_norm": 637.7732543945312, + "learning_rate": 1.9804092808581963e-06, + "loss": 21.668, + "step": 9637 + }, + { + "epoch": 0.09123351728968866, + "grad_norm": 737.5419921875, + "learning_rate": 1.9804032415846784e-06, + "loss": 59.1094, + "step": 9638 + }, + { + "epoch": 0.09124298331140372, + "grad_norm": 436.31658935546875, + "learning_rate": 1.980397201389645e-06, + "loss": 23.0234, + "step": 9639 + }, + { + "epoch": 0.09125244933311877, + "grad_norm": 3.3757376670837402, + "learning_rate": 1.9803911602731015e-06, + "loss": 1.0073, + "step": 9640 + }, + { + "epoch": 0.09126191535483383, + "grad_norm": 335.6438903808594, + "learning_rate": 1.9803851182350548e-06, + "loss": 22.4141, + "step": 9641 + }, + { + "epoch": 0.09127138137654887, + "grad_norm": 925.059814453125, + "learning_rate": 1.9803790752755095e-06, + "loss": 51.1328, + "step": 9642 + }, + { + "epoch": 0.09128084739826393, + "grad_norm": 471.06585693359375, + "learning_rate": 1.9803730313944715e-06, + "loss": 18.1562, + "step": 9643 + }, + { + "epoch": 0.09129031341997898, + "grad_norm": 283.82000732421875, + "learning_rate": 1.980366986591947e-06, + "loss": 20.5938, + "step": 9644 + }, + { + "epoch": 0.09129977944169404, + "grad_norm": 264.7506103515625, + "learning_rate": 1.9803609408679406e-06, + "loss": 27.9531, + "step": 9645 + }, + { + "epoch": 0.0913092454634091, + "grad_norm": 1463.376953125, + "learning_rate": 1.9803548942224594e-06, + "loss": 61.6562, + "step": 9646 + }, + { + "epoch": 0.09131871148512415, + "grad_norm": 575.117431640625, + "learning_rate": 1.980348846655508e-06, + "loss": 23.2188, + "step": 9647 + }, + { + "epoch": 0.0913281775068392, + "grad_norm": 256.2195129394531, + "learning_rate": 1.9803427981670924e-06, + "loss": 32.4531, + "step": 9648 + }, + { + "epoch": 0.09133764352855425, + "grad_norm": 206.14244079589844, + "learning_rate": 1.980336748757219e-06, + "loss": 18.5156, + "step": 9649 + }, + { + "epoch": 0.09134710955026931, + "grad_norm": 3.2712817192077637, + "learning_rate": 1.9803306984258925e-06, + "loss": 0.9463, + "step": 9650 + }, + { + "epoch": 0.09135657557198436, + "grad_norm": 333.7142333984375, + "learning_rate": 1.9803246471731184e-06, + "loss": 22.2969, + "step": 9651 + }, + { + "epoch": 0.09136604159369942, + "grad_norm": 375.9570617675781, + "learning_rate": 1.9803185949989032e-06, + "loss": 34.1328, + "step": 9652 + }, + { + "epoch": 0.09137550761541446, + "grad_norm": 236.3732452392578, + "learning_rate": 1.9803125419032526e-06, + "loss": 18.7031, + "step": 9653 + }, + { + "epoch": 0.09138497363712952, + "grad_norm": 443.5666809082031, + "learning_rate": 1.980306487886172e-06, + "loss": 34.2969, + "step": 9654 + }, + { + "epoch": 0.09139443965884458, + "grad_norm": 228.01783752441406, + "learning_rate": 1.9803004329476666e-06, + "loss": 19.6562, + "step": 9655 + }, + { + "epoch": 0.09140390568055963, + "grad_norm": 439.1605224609375, + "learning_rate": 1.980294377087743e-06, + "loss": 27.9062, + "step": 9656 + }, + { + "epoch": 0.09141337170227469, + "grad_norm": 622.3037109375, + "learning_rate": 1.9802883203064064e-06, + "loss": 42.6562, + "step": 9657 + }, + { + "epoch": 0.09142283772398974, + "grad_norm": 714.6419067382812, + "learning_rate": 1.9802822626036623e-06, + "loss": 25.6406, + "step": 9658 + }, + { + "epoch": 0.0914323037457048, + "grad_norm": 646.4647216796875, + "learning_rate": 1.980276203979517e-06, + "loss": 52.3906, + "step": 9659 + }, + { + "epoch": 0.09144176976741984, + "grad_norm": 481.072509765625, + "learning_rate": 1.9802701444339752e-06, + "loss": 32.8281, + "step": 9660 + }, + { + "epoch": 0.0914512357891349, + "grad_norm": 302.57757568359375, + "learning_rate": 1.9802640839670437e-06, + "loss": 12.7578, + "step": 9661 + }, + { + "epoch": 0.09146070181084995, + "grad_norm": 232.9759063720703, + "learning_rate": 1.980258022578728e-06, + "loss": 21.9844, + "step": 9662 + }, + { + "epoch": 0.09147016783256501, + "grad_norm": 273.5087585449219, + "learning_rate": 1.980251960269033e-06, + "loss": 14.0391, + "step": 9663 + }, + { + "epoch": 0.09147963385428007, + "grad_norm": 2.9000864028930664, + "learning_rate": 1.9802458970379653e-06, + "loss": 0.8613, + "step": 9664 + }, + { + "epoch": 0.09148909987599511, + "grad_norm": 173.2218475341797, + "learning_rate": 1.98023983288553e-06, + "loss": 15.8828, + "step": 9665 + }, + { + "epoch": 0.09149856589771017, + "grad_norm": 451.5128479003906, + "learning_rate": 1.980233767811733e-06, + "loss": 23.1484, + "step": 9666 + }, + { + "epoch": 0.09150803191942522, + "grad_norm": 330.98675537109375, + "learning_rate": 1.9802277018165805e-06, + "loss": 22.3828, + "step": 9667 + }, + { + "epoch": 0.09151749794114028, + "grad_norm": 749.626953125, + "learning_rate": 1.980221634900077e-06, + "loss": 34.0781, + "step": 9668 + }, + { + "epoch": 0.09152696396285533, + "grad_norm": 225.1514434814453, + "learning_rate": 1.9802155670622293e-06, + "loss": 15.0508, + "step": 9669 + }, + { + "epoch": 0.09153642998457039, + "grad_norm": 378.3317565917969, + "learning_rate": 1.980209498303043e-06, + "loss": 18.043, + "step": 9670 + }, + { + "epoch": 0.09154589600628543, + "grad_norm": 320.65655517578125, + "learning_rate": 1.980203428622523e-06, + "loss": 25.5938, + "step": 9671 + }, + { + "epoch": 0.09155536202800049, + "grad_norm": 422.22314453125, + "learning_rate": 1.980197358020676e-06, + "loss": 45.8906, + "step": 9672 + }, + { + "epoch": 0.09156482804971555, + "grad_norm": 212.3396453857422, + "learning_rate": 1.980191286497507e-06, + "loss": 21.0, + "step": 9673 + }, + { + "epoch": 0.0915742940714306, + "grad_norm": 329.428955078125, + "learning_rate": 1.9801852140530218e-06, + "loss": 33.5312, + "step": 9674 + }, + { + "epoch": 0.09158376009314566, + "grad_norm": 202.3539581298828, + "learning_rate": 1.980179140687227e-06, + "loss": 15.7812, + "step": 9675 + }, + { + "epoch": 0.0915932261148607, + "grad_norm": 472.1769714355469, + "learning_rate": 1.980173066400127e-06, + "loss": 26.1406, + "step": 9676 + }, + { + "epoch": 0.09160269213657576, + "grad_norm": 3.1342339515686035, + "learning_rate": 1.980166991191728e-06, + "loss": 0.8774, + "step": 9677 + }, + { + "epoch": 0.09161215815829081, + "grad_norm": 533.1991577148438, + "learning_rate": 1.980160915062036e-06, + "loss": 42.2734, + "step": 9678 + }, + { + "epoch": 0.09162162418000587, + "grad_norm": 358.7475891113281, + "learning_rate": 1.9801548380110563e-06, + "loss": 27.8516, + "step": 9679 + }, + { + "epoch": 0.09163109020172092, + "grad_norm": 437.4605407714844, + "learning_rate": 1.980148760038795e-06, + "loss": 22.1719, + "step": 9680 + }, + { + "epoch": 0.09164055622343598, + "grad_norm": 3.3634350299835205, + "learning_rate": 1.9801426811452577e-06, + "loss": 0.9243, + "step": 9681 + }, + { + "epoch": 0.09165002224515104, + "grad_norm": 610.99853515625, + "learning_rate": 1.9801366013304496e-06, + "loss": 36.3281, + "step": 9682 + }, + { + "epoch": 0.09165948826686608, + "grad_norm": 753.858642578125, + "learning_rate": 1.980130520594377e-06, + "loss": 23.9297, + "step": 9683 + }, + { + "epoch": 0.09166895428858114, + "grad_norm": 257.5164794921875, + "learning_rate": 1.980124438937046e-06, + "loss": 17.25, + "step": 9684 + }, + { + "epoch": 0.09167842031029619, + "grad_norm": 185.4252471923828, + "learning_rate": 1.9801183563584614e-06, + "loss": 21.8594, + "step": 9685 + }, + { + "epoch": 0.09168788633201125, + "grad_norm": 368.43896484375, + "learning_rate": 1.980112272858629e-06, + "loss": 24.6641, + "step": 9686 + }, + { + "epoch": 0.0916973523537263, + "grad_norm": 385.1996154785156, + "learning_rate": 1.9801061884375555e-06, + "loss": 41.6562, + "step": 9687 + }, + { + "epoch": 0.09170681837544135, + "grad_norm": 250.1849822998047, + "learning_rate": 1.9801001030952454e-06, + "loss": 23.125, + "step": 9688 + }, + { + "epoch": 0.09171628439715641, + "grad_norm": 527.6453857421875, + "learning_rate": 1.9800940168317053e-06, + "loss": 27.9062, + "step": 9689 + }, + { + "epoch": 0.09172575041887146, + "grad_norm": 360.0325622558594, + "learning_rate": 1.9800879296469405e-06, + "loss": 14.5039, + "step": 9690 + }, + { + "epoch": 0.09173521644058652, + "grad_norm": 270.4943542480469, + "learning_rate": 1.9800818415409567e-06, + "loss": 18.1484, + "step": 9691 + }, + { + "epoch": 0.09174468246230157, + "grad_norm": 242.7377166748047, + "learning_rate": 1.98007575251376e-06, + "loss": 19.2812, + "step": 9692 + }, + { + "epoch": 0.09175414848401663, + "grad_norm": 245.8706817626953, + "learning_rate": 1.9800696625653553e-06, + "loss": 9.918, + "step": 9693 + }, + { + "epoch": 0.09176361450573167, + "grad_norm": 259.55059814453125, + "learning_rate": 1.980063571695749e-06, + "loss": 20.5703, + "step": 9694 + }, + { + "epoch": 0.09177308052744673, + "grad_norm": 835.9336547851562, + "learning_rate": 1.980057479904947e-06, + "loss": 73.0312, + "step": 9695 + }, + { + "epoch": 0.09178254654916178, + "grad_norm": 2.726426601409912, + "learning_rate": 1.980051387192955e-06, + "loss": 0.8862, + "step": 9696 + }, + { + "epoch": 0.09179201257087684, + "grad_norm": 543.9826049804688, + "learning_rate": 1.980045293559778e-06, + "loss": 61.4531, + "step": 9697 + }, + { + "epoch": 0.0918014785925919, + "grad_norm": 309.509521484375, + "learning_rate": 1.9800391990054224e-06, + "loss": 26.3672, + "step": 9698 + }, + { + "epoch": 0.09181094461430694, + "grad_norm": 165.83055114746094, + "learning_rate": 1.9800331035298932e-06, + "loss": 23.9766, + "step": 9699 + }, + { + "epoch": 0.091820410636022, + "grad_norm": 3.6539928913116455, + "learning_rate": 1.980027007133197e-06, + "loss": 0.8931, + "step": 9700 + }, + { + "epoch": 0.09182987665773705, + "grad_norm": 975.7266235351562, + "learning_rate": 1.9800209098153395e-06, + "loss": 45.4062, + "step": 9701 + }, + { + "epoch": 0.09183934267945211, + "grad_norm": 279.7576599121094, + "learning_rate": 1.9800148115763256e-06, + "loss": 20.8594, + "step": 9702 + }, + { + "epoch": 0.09184880870116716, + "grad_norm": 1440.291748046875, + "learning_rate": 1.9800087124161616e-06, + "loss": 41.7422, + "step": 9703 + }, + { + "epoch": 0.09185827472288222, + "grad_norm": 3.6639065742492676, + "learning_rate": 1.9800026123348536e-06, + "loss": 0.8794, + "step": 9704 + }, + { + "epoch": 0.09186774074459726, + "grad_norm": 466.2031555175781, + "learning_rate": 1.9799965113324065e-06, + "loss": 42.125, + "step": 9705 + }, + { + "epoch": 0.09187720676631232, + "grad_norm": 2.749011516571045, + "learning_rate": 1.9799904094088263e-06, + "loss": 0.792, + "step": 9706 + }, + { + "epoch": 0.09188667278802738, + "grad_norm": 628.20703125, + "learning_rate": 1.9799843065641194e-06, + "loss": 56.3438, + "step": 9707 + }, + { + "epoch": 0.09189613880974243, + "grad_norm": 271.0714416503906, + "learning_rate": 1.9799782027982905e-06, + "loss": 23.4219, + "step": 9708 + }, + { + "epoch": 0.09190560483145749, + "grad_norm": 2.986212730407715, + "learning_rate": 1.979972098111346e-06, + "loss": 0.8164, + "step": 9709 + }, + { + "epoch": 0.09191507085317253, + "grad_norm": 3.315554618835449, + "learning_rate": 1.9799659925032913e-06, + "loss": 0.8569, + "step": 9710 + }, + { + "epoch": 0.0919245368748876, + "grad_norm": 492.279541015625, + "learning_rate": 1.9799598859741325e-06, + "loss": 46.0469, + "step": 9711 + }, + { + "epoch": 0.09193400289660264, + "grad_norm": 271.823486328125, + "learning_rate": 1.9799537785238754e-06, + "loss": 19.2109, + "step": 9712 + }, + { + "epoch": 0.0919434689183177, + "grad_norm": 625.6539306640625, + "learning_rate": 1.9799476701525254e-06, + "loss": 50.625, + "step": 9713 + }, + { + "epoch": 0.09195293494003275, + "grad_norm": 738.1665649414062, + "learning_rate": 1.979941560860088e-06, + "loss": 36.2617, + "step": 9714 + }, + { + "epoch": 0.0919624009617478, + "grad_norm": 238.42332458496094, + "learning_rate": 1.9799354506465694e-06, + "loss": 17.4922, + "step": 9715 + }, + { + "epoch": 0.09197186698346287, + "grad_norm": 3.135193347930908, + "learning_rate": 1.9799293395119756e-06, + "loss": 0.9038, + "step": 9716 + }, + { + "epoch": 0.09198133300517791, + "grad_norm": 2.9406158924102783, + "learning_rate": 1.979923227456312e-06, + "loss": 0.8701, + "step": 9717 + }, + { + "epoch": 0.09199079902689297, + "grad_norm": 533.9811401367188, + "learning_rate": 1.9799171144795836e-06, + "loss": 33.8281, + "step": 9718 + }, + { + "epoch": 0.09200026504860802, + "grad_norm": 698.6226196289062, + "learning_rate": 1.9799110005817973e-06, + "loss": 45.6836, + "step": 9719 + }, + { + "epoch": 0.09200973107032308, + "grad_norm": 274.27496337890625, + "learning_rate": 1.9799048857629583e-06, + "loss": 26.8594, + "step": 9720 + }, + { + "epoch": 0.09201919709203812, + "grad_norm": 306.318115234375, + "learning_rate": 1.9798987700230726e-06, + "loss": 26.3945, + "step": 9721 + }, + { + "epoch": 0.09202866311375318, + "grad_norm": 815.236572265625, + "learning_rate": 1.9798926533621458e-06, + "loss": 54.2188, + "step": 9722 + }, + { + "epoch": 0.09203812913546823, + "grad_norm": 376.8554382324219, + "learning_rate": 1.9798865357801837e-06, + "loss": 24.8984, + "step": 9723 + }, + { + "epoch": 0.09204759515718329, + "grad_norm": 202.79550170898438, + "learning_rate": 1.979880417277192e-06, + "loss": 17.1641, + "step": 9724 + }, + { + "epoch": 0.09205706117889835, + "grad_norm": 1925.5526123046875, + "learning_rate": 1.9798742978531758e-06, + "loss": 36.3281, + "step": 9725 + }, + { + "epoch": 0.0920665272006134, + "grad_norm": 356.54632568359375, + "learning_rate": 1.9798681775081422e-06, + "loss": 34.5938, + "step": 9726 + }, + { + "epoch": 0.09207599322232846, + "grad_norm": 666.2531127929688, + "learning_rate": 1.9798620562420963e-06, + "loss": 57.5469, + "step": 9727 + }, + { + "epoch": 0.0920854592440435, + "grad_norm": 528.625244140625, + "learning_rate": 1.9798559340550435e-06, + "loss": 33.7656, + "step": 9728 + }, + { + "epoch": 0.09209492526575856, + "grad_norm": 986.4994506835938, + "learning_rate": 1.97984981094699e-06, + "loss": 98.5625, + "step": 9729 + }, + { + "epoch": 0.09210439128747361, + "grad_norm": 3.0931286811828613, + "learning_rate": 1.9798436869179414e-06, + "loss": 0.793, + "step": 9730 + }, + { + "epoch": 0.09211385730918867, + "grad_norm": 400.24285888671875, + "learning_rate": 1.9798375619679036e-06, + "loss": 26.2031, + "step": 9731 + }, + { + "epoch": 0.09212332333090371, + "grad_norm": 645.6535034179688, + "learning_rate": 1.979831436096882e-06, + "loss": 41.125, + "step": 9732 + }, + { + "epoch": 0.09213278935261877, + "grad_norm": 800.4039916992188, + "learning_rate": 1.9798253093048826e-06, + "loss": 34.6875, + "step": 9733 + }, + { + "epoch": 0.09214225537433383, + "grad_norm": 321.9671936035156, + "learning_rate": 1.9798191815919114e-06, + "loss": 13.9141, + "step": 9734 + }, + { + "epoch": 0.09215172139604888, + "grad_norm": 476.40679931640625, + "learning_rate": 1.9798130529579735e-06, + "loss": 20.0508, + "step": 9735 + }, + { + "epoch": 0.09216118741776394, + "grad_norm": 327.8306579589844, + "learning_rate": 1.9798069234030758e-06, + "loss": 26.6562, + "step": 9736 + }, + { + "epoch": 0.09217065343947899, + "grad_norm": 382.8904724121094, + "learning_rate": 1.9798007929272224e-06, + "loss": 37.8672, + "step": 9737 + }, + { + "epoch": 0.09218011946119405, + "grad_norm": 564.7841186523438, + "learning_rate": 1.9797946615304206e-06, + "loss": 40.0, + "step": 9738 + }, + { + "epoch": 0.09218958548290909, + "grad_norm": 530.4746704101562, + "learning_rate": 1.9797885292126754e-06, + "loss": 22.0703, + "step": 9739 + }, + { + "epoch": 0.09219905150462415, + "grad_norm": 1095.3746337890625, + "learning_rate": 1.979782395973993e-06, + "loss": 59.7656, + "step": 9740 + }, + { + "epoch": 0.09220851752633921, + "grad_norm": 339.0370178222656, + "learning_rate": 1.9797762618143784e-06, + "loss": 29.2578, + "step": 9741 + }, + { + "epoch": 0.09221798354805426, + "grad_norm": 685.6427612304688, + "learning_rate": 1.9797701267338384e-06, + "loss": 15.6758, + "step": 9742 + }, + { + "epoch": 0.09222744956976932, + "grad_norm": 407.2082214355469, + "learning_rate": 1.9797639907323774e-06, + "loss": 9.793, + "step": 9743 + }, + { + "epoch": 0.09223691559148436, + "grad_norm": 341.4816589355469, + "learning_rate": 1.9797578538100028e-06, + "loss": 49.1875, + "step": 9744 + }, + { + "epoch": 0.09224638161319942, + "grad_norm": 420.548095703125, + "learning_rate": 1.979751715966719e-06, + "loss": 21.0469, + "step": 9745 + }, + { + "epoch": 0.09225584763491447, + "grad_norm": 969.6497192382812, + "learning_rate": 1.9797455772025327e-06, + "loss": 25.7656, + "step": 9746 + }, + { + "epoch": 0.09226531365662953, + "grad_norm": 1156.1646728515625, + "learning_rate": 1.979739437517449e-06, + "loss": 84.7266, + "step": 9747 + }, + { + "epoch": 0.09227477967834458, + "grad_norm": 437.7890625, + "learning_rate": 1.9797332969114743e-06, + "loss": 34.8047, + "step": 9748 + }, + { + "epoch": 0.09228424570005964, + "grad_norm": 317.7425842285156, + "learning_rate": 1.979727155384614e-06, + "loss": 21.5938, + "step": 9749 + }, + { + "epoch": 0.0922937117217747, + "grad_norm": 517.2509155273438, + "learning_rate": 1.9797210129368738e-06, + "loss": 53.1719, + "step": 9750 + }, + { + "epoch": 0.09230317774348974, + "grad_norm": 513.4829711914062, + "learning_rate": 1.9797148695682595e-06, + "loss": 28.875, + "step": 9751 + }, + { + "epoch": 0.0923126437652048, + "grad_norm": 931.4481811523438, + "learning_rate": 1.979708725278777e-06, + "loss": 45.4844, + "step": 9752 + }, + { + "epoch": 0.09232210978691985, + "grad_norm": 782.07177734375, + "learning_rate": 1.979702580068432e-06, + "loss": 60.3125, + "step": 9753 + }, + { + "epoch": 0.09233157580863491, + "grad_norm": 442.12860107421875, + "learning_rate": 1.97969643393723e-06, + "loss": 27.75, + "step": 9754 + }, + { + "epoch": 0.09234104183034995, + "grad_norm": 177.65625, + "learning_rate": 1.979690286885178e-06, + "loss": 18.9453, + "step": 9755 + }, + { + "epoch": 0.09235050785206501, + "grad_norm": 255.34902954101562, + "learning_rate": 1.97968413891228e-06, + "loss": 12.8555, + "step": 9756 + }, + { + "epoch": 0.09235997387378006, + "grad_norm": 216.99131774902344, + "learning_rate": 1.979677990018543e-06, + "loss": 25.7656, + "step": 9757 + }, + { + "epoch": 0.09236943989549512, + "grad_norm": 3.2548162937164307, + "learning_rate": 1.979671840203972e-06, + "loss": 0.9922, + "step": 9758 + }, + { + "epoch": 0.09237890591721018, + "grad_norm": 165.5908203125, + "learning_rate": 1.9796656894685738e-06, + "loss": 20.5234, + "step": 9759 + }, + { + "epoch": 0.09238837193892523, + "grad_norm": 622.5933837890625, + "learning_rate": 1.9796595378123537e-06, + "loss": 38.5469, + "step": 9760 + }, + { + "epoch": 0.09239783796064029, + "grad_norm": 752.8024291992188, + "learning_rate": 1.979653385235317e-06, + "loss": 58.7188, + "step": 9761 + }, + { + "epoch": 0.09240730398235533, + "grad_norm": 465.4765625, + "learning_rate": 1.9796472317374696e-06, + "loss": 46.0469, + "step": 9762 + }, + { + "epoch": 0.09241677000407039, + "grad_norm": 483.5605773925781, + "learning_rate": 1.979641077318818e-06, + "loss": 14.0664, + "step": 9763 + }, + { + "epoch": 0.09242623602578544, + "grad_norm": 378.791015625, + "learning_rate": 1.9796349219793675e-06, + "loss": 36.6484, + "step": 9764 + }, + { + "epoch": 0.0924357020475005, + "grad_norm": 215.74502563476562, + "learning_rate": 1.9796287657191235e-06, + "loss": 18.2891, + "step": 9765 + }, + { + "epoch": 0.09244516806921554, + "grad_norm": 1186.552734375, + "learning_rate": 1.9796226085380924e-06, + "loss": 55.9531, + "step": 9766 + }, + { + "epoch": 0.0924546340909306, + "grad_norm": 214.33616638183594, + "learning_rate": 1.97961645043628e-06, + "loss": 14.3281, + "step": 9767 + }, + { + "epoch": 0.09246410011264566, + "grad_norm": 260.2725830078125, + "learning_rate": 1.9796102914136916e-06, + "loss": 20.2812, + "step": 9768 + }, + { + "epoch": 0.09247356613436071, + "grad_norm": 1186.65966796875, + "learning_rate": 1.979604131470333e-06, + "loss": 29.9688, + "step": 9769 + }, + { + "epoch": 0.09248303215607577, + "grad_norm": 395.4815368652344, + "learning_rate": 1.9795979706062106e-06, + "loss": 48.6875, + "step": 9770 + }, + { + "epoch": 0.09249249817779082, + "grad_norm": 934.4725341796875, + "learning_rate": 1.9795918088213297e-06, + "loss": 60.2109, + "step": 9771 + }, + { + "epoch": 0.09250196419950588, + "grad_norm": 319.33758544921875, + "learning_rate": 1.9795856461156965e-06, + "loss": 30.2031, + "step": 9772 + }, + { + "epoch": 0.09251143022122092, + "grad_norm": 658.2109985351562, + "learning_rate": 1.979579482489316e-06, + "loss": 54.2188, + "step": 9773 + }, + { + "epoch": 0.09252089624293598, + "grad_norm": 458.41168212890625, + "learning_rate": 1.979573317942195e-06, + "loss": 49.2812, + "step": 9774 + }, + { + "epoch": 0.09253036226465103, + "grad_norm": 288.0232238769531, + "learning_rate": 1.979567152474339e-06, + "loss": 40.1094, + "step": 9775 + }, + { + "epoch": 0.09253982828636609, + "grad_norm": 394.9178771972656, + "learning_rate": 1.979560986085753e-06, + "loss": 40.4688, + "step": 9776 + }, + { + "epoch": 0.09254929430808115, + "grad_norm": 415.4179992675781, + "learning_rate": 1.9795548187764436e-06, + "loss": 30.0312, + "step": 9777 + }, + { + "epoch": 0.0925587603297962, + "grad_norm": 965.8900756835938, + "learning_rate": 1.9795486505464167e-06, + "loss": 49.5625, + "step": 9778 + }, + { + "epoch": 0.09256822635151125, + "grad_norm": 365.87799072265625, + "learning_rate": 1.9795424813956776e-06, + "loss": 46.875, + "step": 9779 + }, + { + "epoch": 0.0925776923732263, + "grad_norm": 475.97589111328125, + "learning_rate": 1.979536311324232e-06, + "loss": 29.5469, + "step": 9780 + }, + { + "epoch": 0.09258715839494136, + "grad_norm": 584.5625, + "learning_rate": 1.979530140332086e-06, + "loss": 25.1055, + "step": 9781 + }, + { + "epoch": 0.0925966244166564, + "grad_norm": 297.8659362792969, + "learning_rate": 1.9795239684192457e-06, + "loss": 20.2969, + "step": 9782 + }, + { + "epoch": 0.09260609043837147, + "grad_norm": 882.7657470703125, + "learning_rate": 1.9795177955857166e-06, + "loss": 45.3281, + "step": 9783 + }, + { + "epoch": 0.09261555646008653, + "grad_norm": 390.9586486816406, + "learning_rate": 1.9795116218315043e-06, + "loss": 22.7969, + "step": 9784 + }, + { + "epoch": 0.09262502248180157, + "grad_norm": 3.84670352935791, + "learning_rate": 1.979505447156615e-06, + "loss": 0.7783, + "step": 9785 + }, + { + "epoch": 0.09263448850351663, + "grad_norm": 355.2179260253906, + "learning_rate": 1.979499271561054e-06, + "loss": 11.4453, + "step": 9786 + }, + { + "epoch": 0.09264395452523168, + "grad_norm": 1213.56005859375, + "learning_rate": 1.979493095044828e-06, + "loss": 53.5234, + "step": 9787 + }, + { + "epoch": 0.09265342054694674, + "grad_norm": 295.291015625, + "learning_rate": 1.9794869176079415e-06, + "loss": 19.0469, + "step": 9788 + }, + { + "epoch": 0.09266288656866178, + "grad_norm": 373.337158203125, + "learning_rate": 1.9794807392504015e-06, + "loss": 26.2656, + "step": 9789 + }, + { + "epoch": 0.09267235259037684, + "grad_norm": 418.4180908203125, + "learning_rate": 1.979474559972213e-06, + "loss": 30.7812, + "step": 9790 + }, + { + "epoch": 0.09268181861209189, + "grad_norm": 172.42074584960938, + "learning_rate": 1.979468379773382e-06, + "loss": 24.2812, + "step": 9791 + }, + { + "epoch": 0.09269128463380695, + "grad_norm": 800.954345703125, + "learning_rate": 1.9794621986539147e-06, + "loss": 49.5938, + "step": 9792 + }, + { + "epoch": 0.09270075065552201, + "grad_norm": 755.182373046875, + "learning_rate": 1.9794560166138172e-06, + "loss": 36.4219, + "step": 9793 + }, + { + "epoch": 0.09271021667723706, + "grad_norm": 651.0326538085938, + "learning_rate": 1.979449833653094e-06, + "loss": 22.0703, + "step": 9794 + }, + { + "epoch": 0.09271968269895212, + "grad_norm": 574.7891845703125, + "learning_rate": 1.979443649771752e-06, + "loss": 23.6094, + "step": 9795 + }, + { + "epoch": 0.09272914872066716, + "grad_norm": 495.2911376953125, + "learning_rate": 1.9794374649697964e-06, + "loss": 42.5, + "step": 9796 + }, + { + "epoch": 0.09273861474238222, + "grad_norm": 300.8608093261719, + "learning_rate": 1.979431279247234e-06, + "loss": 20.7266, + "step": 9797 + }, + { + "epoch": 0.09274808076409727, + "grad_norm": 3.0941474437713623, + "learning_rate": 1.979425092604069e-06, + "loss": 0.873, + "step": 9798 + }, + { + "epoch": 0.09275754678581233, + "grad_norm": 3.620229721069336, + "learning_rate": 1.9794189050403085e-06, + "loss": 0.856, + "step": 9799 + }, + { + "epoch": 0.09276701280752737, + "grad_norm": 887.5234375, + "learning_rate": 1.979412716555958e-06, + "loss": 56.1875, + "step": 9800 + }, + { + "epoch": 0.09277647882924243, + "grad_norm": 380.1490783691406, + "learning_rate": 1.979406527151023e-06, + "loss": 21.2188, + "step": 9801 + }, + { + "epoch": 0.0927859448509575, + "grad_norm": 382.5151062011719, + "learning_rate": 1.9794003368255103e-06, + "loss": 27.9141, + "step": 9802 + }, + { + "epoch": 0.09279541087267254, + "grad_norm": 424.9163513183594, + "learning_rate": 1.979394145579424e-06, + "loss": 18.4297, + "step": 9803 + }, + { + "epoch": 0.0928048768943876, + "grad_norm": 166.53240966796875, + "learning_rate": 1.9793879534127717e-06, + "loss": 23.1875, + "step": 9804 + }, + { + "epoch": 0.09281434291610265, + "grad_norm": 342.6606750488281, + "learning_rate": 1.9793817603255583e-06, + "loss": 31.1953, + "step": 9805 + }, + { + "epoch": 0.0928238089378177, + "grad_norm": 387.8049011230469, + "learning_rate": 1.9793755663177895e-06, + "loss": 31.375, + "step": 9806 + }, + { + "epoch": 0.09283327495953275, + "grad_norm": 705.0557250976562, + "learning_rate": 1.9793693713894717e-06, + "loss": 29.7422, + "step": 9807 + }, + { + "epoch": 0.09284274098124781, + "grad_norm": 433.0641784667969, + "learning_rate": 1.9793631755406103e-06, + "loss": 45.9531, + "step": 9808 + }, + { + "epoch": 0.09285220700296286, + "grad_norm": 240.56919860839844, + "learning_rate": 1.979356978771211e-06, + "loss": 21.3047, + "step": 9809 + }, + { + "epoch": 0.09286167302467792, + "grad_norm": 453.6480407714844, + "learning_rate": 1.97935078108128e-06, + "loss": 31.5938, + "step": 9810 + }, + { + "epoch": 0.09287113904639298, + "grad_norm": 502.16766357421875, + "learning_rate": 1.979344582470823e-06, + "loss": 29.625, + "step": 9811 + }, + { + "epoch": 0.09288060506810802, + "grad_norm": 245.7512664794922, + "learning_rate": 1.9793383829398457e-06, + "loss": 21.0312, + "step": 9812 + }, + { + "epoch": 0.09289007108982308, + "grad_norm": 316.44244384765625, + "learning_rate": 1.9793321824883538e-06, + "loss": 28.5, + "step": 9813 + }, + { + "epoch": 0.09289953711153813, + "grad_norm": 494.3861083984375, + "learning_rate": 1.979325981116354e-06, + "loss": 18.6875, + "step": 9814 + }, + { + "epoch": 0.09290900313325319, + "grad_norm": 272.5976867675781, + "learning_rate": 1.9793197788238507e-06, + "loss": 17.1211, + "step": 9815 + }, + { + "epoch": 0.09291846915496824, + "grad_norm": 444.637939453125, + "learning_rate": 1.9793135756108514e-06, + "loss": 21.0703, + "step": 9816 + }, + { + "epoch": 0.0929279351766833, + "grad_norm": 515.8795166015625, + "learning_rate": 1.9793073714773607e-06, + "loss": 22.457, + "step": 9817 + }, + { + "epoch": 0.09293740119839834, + "grad_norm": 661.4091796875, + "learning_rate": 1.9793011664233844e-06, + "loss": 35.9492, + "step": 9818 + }, + { + "epoch": 0.0929468672201134, + "grad_norm": 274.75390625, + "learning_rate": 1.979294960448929e-06, + "loss": 28.6016, + "step": 9819 + }, + { + "epoch": 0.09295633324182846, + "grad_norm": 299.4797058105469, + "learning_rate": 1.9792887535540002e-06, + "loss": 18.625, + "step": 9820 + }, + { + "epoch": 0.09296579926354351, + "grad_norm": 177.4140625, + "learning_rate": 1.9792825457386034e-06, + "loss": 19.8125, + "step": 9821 + }, + { + "epoch": 0.09297526528525857, + "grad_norm": 605.3327026367188, + "learning_rate": 1.979276337002745e-06, + "loss": 45.5156, + "step": 9822 + }, + { + "epoch": 0.09298473130697361, + "grad_norm": 176.46192932128906, + "learning_rate": 1.97927012734643e-06, + "loss": 20.3359, + "step": 9823 + }, + { + "epoch": 0.09299419732868867, + "grad_norm": 524.7268676757812, + "learning_rate": 1.9792639167696656e-06, + "loss": 14.0469, + "step": 9824 + }, + { + "epoch": 0.09300366335040372, + "grad_norm": 3.2733848094940186, + "learning_rate": 1.9792577052724562e-06, + "loss": 0.8525, + "step": 9825 + }, + { + "epoch": 0.09301312937211878, + "grad_norm": 470.464599609375, + "learning_rate": 1.9792514928548083e-06, + "loss": 18.3359, + "step": 9826 + }, + { + "epoch": 0.09302259539383384, + "grad_norm": 239.26205444335938, + "learning_rate": 1.979245279516728e-06, + "loss": 21.8438, + "step": 9827 + }, + { + "epoch": 0.09303206141554889, + "grad_norm": 224.10159301757812, + "learning_rate": 1.9792390652582207e-06, + "loss": 18.2734, + "step": 9828 + }, + { + "epoch": 0.09304152743726395, + "grad_norm": 276.6976013183594, + "learning_rate": 1.979232850079293e-06, + "loss": 18.6797, + "step": 9829 + }, + { + "epoch": 0.09305099345897899, + "grad_norm": 434.7705383300781, + "learning_rate": 1.979226633979949e-06, + "loss": 25.8672, + "step": 9830 + }, + { + "epoch": 0.09306045948069405, + "grad_norm": 466.1347961425781, + "learning_rate": 1.979220416960196e-06, + "loss": 33.9375, + "step": 9831 + }, + { + "epoch": 0.0930699255024091, + "grad_norm": 440.1026611328125, + "learning_rate": 1.97921419902004e-06, + "loss": 22.7812, + "step": 9832 + }, + { + "epoch": 0.09307939152412416, + "grad_norm": 213.80934143066406, + "learning_rate": 1.979207980159486e-06, + "loss": 21.1328, + "step": 9833 + }, + { + "epoch": 0.0930888575458392, + "grad_norm": 377.20458984375, + "learning_rate": 1.9792017603785404e-06, + "loss": 49.3438, + "step": 9834 + }, + { + "epoch": 0.09309832356755426, + "grad_norm": 1610.53759765625, + "learning_rate": 1.9791955396772084e-06, + "loss": 10.875, + "step": 9835 + }, + { + "epoch": 0.09310778958926932, + "grad_norm": 178.00071716308594, + "learning_rate": 1.979189318055497e-06, + "loss": 17.7891, + "step": 9836 + }, + { + "epoch": 0.09311725561098437, + "grad_norm": 263.30853271484375, + "learning_rate": 1.9791830955134106e-06, + "loss": 24.6172, + "step": 9837 + }, + { + "epoch": 0.09312672163269943, + "grad_norm": 184.97491455078125, + "learning_rate": 1.9791768720509562e-06, + "loss": 19.1797, + "step": 9838 + }, + { + "epoch": 0.09313618765441448, + "grad_norm": 156.04193115234375, + "learning_rate": 1.979170647668139e-06, + "loss": 15.8867, + "step": 9839 + }, + { + "epoch": 0.09314565367612954, + "grad_norm": 761.3607177734375, + "learning_rate": 1.9791644223649657e-06, + "loss": 56.625, + "step": 9840 + }, + { + "epoch": 0.09315511969784458, + "grad_norm": 186.2762908935547, + "learning_rate": 1.9791581961414405e-06, + "loss": 20.7734, + "step": 9841 + }, + { + "epoch": 0.09316458571955964, + "grad_norm": 596.5491943359375, + "learning_rate": 1.979151968997571e-06, + "loss": 58.1562, + "step": 9842 + }, + { + "epoch": 0.09317405174127469, + "grad_norm": 729.2365112304688, + "learning_rate": 1.9791457409333627e-06, + "loss": 31.3711, + "step": 9843 + }, + { + "epoch": 0.09318351776298975, + "grad_norm": 406.8029479980469, + "learning_rate": 1.97913951194882e-06, + "loss": 25.4844, + "step": 9844 + }, + { + "epoch": 0.09319298378470481, + "grad_norm": 241.2714080810547, + "learning_rate": 1.979133282043951e-06, + "loss": 17.6055, + "step": 9845 + }, + { + "epoch": 0.09320244980641985, + "grad_norm": 406.24609375, + "learning_rate": 1.9791270512187596e-06, + "loss": 30.625, + "step": 9846 + }, + { + "epoch": 0.09321191582813491, + "grad_norm": 230.16476440429688, + "learning_rate": 1.9791208194732526e-06, + "loss": 25.1875, + "step": 9847 + }, + { + "epoch": 0.09322138184984996, + "grad_norm": 176.7630615234375, + "learning_rate": 1.9791145868074363e-06, + "loss": 26.4766, + "step": 9848 + }, + { + "epoch": 0.09323084787156502, + "grad_norm": 474.333251953125, + "learning_rate": 1.9791083532213153e-06, + "loss": 30.25, + "step": 9849 + }, + { + "epoch": 0.09324031389328007, + "grad_norm": 211.48951721191406, + "learning_rate": 1.9791021187148967e-06, + "loss": 21.4766, + "step": 9850 + }, + { + "epoch": 0.09324977991499513, + "grad_norm": 266.3599853515625, + "learning_rate": 1.9790958832881854e-06, + "loss": 18.9609, + "step": 9851 + }, + { + "epoch": 0.09325924593671017, + "grad_norm": 192.31561279296875, + "learning_rate": 1.9790896469411876e-06, + "loss": 16.9531, + "step": 9852 + }, + { + "epoch": 0.09326871195842523, + "grad_norm": 388.3170166015625, + "learning_rate": 1.9790834096739097e-06, + "loss": 30.9531, + "step": 9853 + }, + { + "epoch": 0.09327817798014029, + "grad_norm": 3.4114723205566406, + "learning_rate": 1.9790771714863567e-06, + "loss": 0.9966, + "step": 9854 + }, + { + "epoch": 0.09328764400185534, + "grad_norm": 348.1025695800781, + "learning_rate": 1.979070932378535e-06, + "loss": 34.2578, + "step": 9855 + }, + { + "epoch": 0.0932971100235704, + "grad_norm": 3.0652918815612793, + "learning_rate": 1.97906469235045e-06, + "loss": 0.9351, + "step": 9856 + }, + { + "epoch": 0.09330657604528544, + "grad_norm": 802.1818237304688, + "learning_rate": 1.979058451402108e-06, + "loss": 63.9805, + "step": 9857 + }, + { + "epoch": 0.0933160420670005, + "grad_norm": 584.939697265625, + "learning_rate": 1.979052209533515e-06, + "loss": 41.8125, + "step": 9858 + }, + { + "epoch": 0.09332550808871555, + "grad_norm": 578.8598022460938, + "learning_rate": 1.9790459667446768e-06, + "loss": 53.1719, + "step": 9859 + }, + { + "epoch": 0.09333497411043061, + "grad_norm": 2247.509765625, + "learning_rate": 1.9790397230355985e-06, + "loss": 14.0039, + "step": 9860 + }, + { + "epoch": 0.09334444013214566, + "grad_norm": 239.22776794433594, + "learning_rate": 1.979033478406287e-06, + "loss": 20.3125, + "step": 9861 + }, + { + "epoch": 0.09335390615386072, + "grad_norm": 511.43280029296875, + "learning_rate": 1.9790272328567477e-06, + "loss": 27.4375, + "step": 9862 + }, + { + "epoch": 0.09336337217557578, + "grad_norm": 314.92962646484375, + "learning_rate": 1.9790209863869866e-06, + "loss": 18.4688, + "step": 9863 + }, + { + "epoch": 0.09337283819729082, + "grad_norm": 235.13534545898438, + "learning_rate": 1.979014738997009e-06, + "loss": 22.3906, + "step": 9864 + }, + { + "epoch": 0.09338230421900588, + "grad_norm": 793.1541137695312, + "learning_rate": 1.9790084906868214e-06, + "loss": 34.7656, + "step": 9865 + }, + { + "epoch": 0.09339177024072093, + "grad_norm": 220.02383422851562, + "learning_rate": 1.9790022414564296e-06, + "loss": 21.9844, + "step": 9866 + }, + { + "epoch": 0.09340123626243599, + "grad_norm": 405.81500244140625, + "learning_rate": 1.9789959913058394e-06, + "loss": 26.0, + "step": 9867 + }, + { + "epoch": 0.09341070228415103, + "grad_norm": 260.3953857421875, + "learning_rate": 1.978989740235057e-06, + "loss": 31.3906, + "step": 9868 + }, + { + "epoch": 0.0934201683058661, + "grad_norm": 224.79229736328125, + "learning_rate": 1.978983488244087e-06, + "loss": 24.1094, + "step": 9869 + }, + { + "epoch": 0.09342963432758115, + "grad_norm": 852.2725830078125, + "learning_rate": 1.9789772353329366e-06, + "loss": 63.2031, + "step": 9870 + }, + { + "epoch": 0.0934391003492962, + "grad_norm": 175.42343139648438, + "learning_rate": 1.9789709815016118e-06, + "loss": 17.1992, + "step": 9871 + }, + { + "epoch": 0.09344856637101126, + "grad_norm": 161.52371215820312, + "learning_rate": 1.9789647267501173e-06, + "loss": 19.3281, + "step": 9872 + }, + { + "epoch": 0.0934580323927263, + "grad_norm": 243.2836456298828, + "learning_rate": 1.97895847107846e-06, + "loss": 15.8828, + "step": 9873 + }, + { + "epoch": 0.09346749841444137, + "grad_norm": 281.77191162109375, + "learning_rate": 1.978952214486645e-06, + "loss": 24.7031, + "step": 9874 + }, + { + "epoch": 0.09347696443615641, + "grad_norm": 276.2549133300781, + "learning_rate": 1.978945956974679e-06, + "loss": 9.7578, + "step": 9875 + }, + { + "epoch": 0.09348643045787147, + "grad_norm": 307.3795166015625, + "learning_rate": 1.978939698542568e-06, + "loss": 18.6953, + "step": 9876 + }, + { + "epoch": 0.09349589647958652, + "grad_norm": 645.7089233398438, + "learning_rate": 1.9789334391903165e-06, + "loss": 28.2109, + "step": 9877 + }, + { + "epoch": 0.09350536250130158, + "grad_norm": 477.6346435546875, + "learning_rate": 1.9789271789179316e-06, + "loss": 40.1094, + "step": 9878 + }, + { + "epoch": 0.09351482852301664, + "grad_norm": 801.1768188476562, + "learning_rate": 1.978920917725419e-06, + "loss": 20.8047, + "step": 9879 + }, + { + "epoch": 0.09352429454473168, + "grad_norm": 346.8614807128906, + "learning_rate": 1.978914655612784e-06, + "loss": 31.0469, + "step": 9880 + }, + { + "epoch": 0.09353376056644674, + "grad_norm": 307.6673889160156, + "learning_rate": 1.978908392580033e-06, + "loss": 23.2266, + "step": 9881 + }, + { + "epoch": 0.09354322658816179, + "grad_norm": 363.0068359375, + "learning_rate": 1.978902128627172e-06, + "loss": 52.8281, + "step": 9882 + }, + { + "epoch": 0.09355269260987685, + "grad_norm": 558.7035522460938, + "learning_rate": 1.9788958637542068e-06, + "loss": 63.0312, + "step": 9883 + }, + { + "epoch": 0.0935621586315919, + "grad_norm": 447.5443420410156, + "learning_rate": 1.9788895979611428e-06, + "loss": 51.5703, + "step": 9884 + }, + { + "epoch": 0.09357162465330696, + "grad_norm": 311.1153564453125, + "learning_rate": 1.9788833312479863e-06, + "loss": 30.7188, + "step": 9885 + }, + { + "epoch": 0.093581090675022, + "grad_norm": 377.9522399902344, + "learning_rate": 1.978877063614743e-06, + "loss": 20.5312, + "step": 9886 + }, + { + "epoch": 0.09359055669673706, + "grad_norm": 2.7749695777893066, + "learning_rate": 1.97887079506142e-06, + "loss": 0.7898, + "step": 9887 + }, + { + "epoch": 0.09360002271845212, + "grad_norm": 272.0263977050781, + "learning_rate": 1.978864525588021e-06, + "loss": 44.3125, + "step": 9888 + }, + { + "epoch": 0.09360948874016717, + "grad_norm": 208.9983673095703, + "learning_rate": 1.978858255194554e-06, + "loss": 20.5703, + "step": 9889 + }, + { + "epoch": 0.09361895476188223, + "grad_norm": 741.0450439453125, + "learning_rate": 1.978851983881023e-06, + "loss": 25.5156, + "step": 9890 + }, + { + "epoch": 0.09362842078359727, + "grad_norm": 510.52508544921875, + "learning_rate": 1.978845711647435e-06, + "loss": 46.7578, + "step": 9891 + }, + { + "epoch": 0.09363788680531233, + "grad_norm": 415.8538513183594, + "learning_rate": 1.9788394384937958e-06, + "loss": 41.9375, + "step": 9892 + }, + { + "epoch": 0.09364735282702738, + "grad_norm": 309.2653503417969, + "learning_rate": 1.978833164420111e-06, + "loss": 29.5312, + "step": 9893 + }, + { + "epoch": 0.09365681884874244, + "grad_norm": 632.9796142578125, + "learning_rate": 1.9788268894263867e-06, + "loss": 30.2891, + "step": 9894 + }, + { + "epoch": 0.09366628487045749, + "grad_norm": 323.8116760253906, + "learning_rate": 1.978820613512629e-06, + "loss": 20.5938, + "step": 9895 + }, + { + "epoch": 0.09367575089217255, + "grad_norm": 224.24293518066406, + "learning_rate": 1.9788143366788443e-06, + "loss": 18.6641, + "step": 9896 + }, + { + "epoch": 0.0936852169138876, + "grad_norm": 250.4203643798828, + "learning_rate": 1.978808058925037e-06, + "loss": 11.1875, + "step": 9897 + }, + { + "epoch": 0.09369468293560265, + "grad_norm": 462.9657287597656, + "learning_rate": 1.9788017802512136e-06, + "loss": 19.5625, + "step": 9898 + }, + { + "epoch": 0.09370414895731771, + "grad_norm": 319.6261901855469, + "learning_rate": 1.9787955006573808e-06, + "loss": 25.6484, + "step": 9899 + }, + { + "epoch": 0.09371361497903276, + "grad_norm": 503.3995361328125, + "learning_rate": 1.9787892201435436e-06, + "loss": 41.1406, + "step": 9900 + }, + { + "epoch": 0.09372308100074782, + "grad_norm": 436.50360107421875, + "learning_rate": 1.9787829387097083e-06, + "loss": 21.8047, + "step": 9901 + }, + { + "epoch": 0.09373254702246286, + "grad_norm": 444.47564697265625, + "learning_rate": 1.9787766563558805e-06, + "loss": 49.7188, + "step": 9902 + }, + { + "epoch": 0.09374201304417792, + "grad_norm": 485.2574157714844, + "learning_rate": 1.9787703730820665e-06, + "loss": 38.6133, + "step": 9903 + }, + { + "epoch": 0.09375147906589297, + "grad_norm": 204.71176147460938, + "learning_rate": 1.9787640888882718e-06, + "loss": 18.1328, + "step": 9904 + }, + { + "epoch": 0.09376094508760803, + "grad_norm": 755.2777099609375, + "learning_rate": 1.9787578037745027e-06, + "loss": 82.4766, + "step": 9905 + }, + { + "epoch": 0.09377041110932309, + "grad_norm": 427.2696838378906, + "learning_rate": 1.9787515177407653e-06, + "loss": 28.9531, + "step": 9906 + }, + { + "epoch": 0.09377987713103814, + "grad_norm": 381.16754150390625, + "learning_rate": 1.9787452307870645e-06, + "loss": 26.2266, + "step": 9907 + }, + { + "epoch": 0.0937893431527532, + "grad_norm": 3.0898449420928955, + "learning_rate": 1.978738942913407e-06, + "loss": 0.9766, + "step": 9908 + }, + { + "epoch": 0.09379880917446824, + "grad_norm": 387.1581726074219, + "learning_rate": 1.978732654119799e-06, + "loss": 23.5781, + "step": 9909 + }, + { + "epoch": 0.0938082751961833, + "grad_norm": 253.9192352294922, + "learning_rate": 1.9787263644062456e-06, + "loss": 23.5, + "step": 9910 + }, + { + "epoch": 0.09381774121789835, + "grad_norm": 204.65267944335938, + "learning_rate": 1.9787200737727533e-06, + "loss": 19.0859, + "step": 9911 + }, + { + "epoch": 0.09382720723961341, + "grad_norm": 373.2955322265625, + "learning_rate": 1.9787137822193278e-06, + "loss": 31.3281, + "step": 9912 + }, + { + "epoch": 0.09383667326132847, + "grad_norm": 278.4083251953125, + "learning_rate": 1.978707489745975e-06, + "loss": 25.4688, + "step": 9913 + }, + { + "epoch": 0.09384613928304351, + "grad_norm": 369.2250671386719, + "learning_rate": 1.9787011963527006e-06, + "loss": 20.1797, + "step": 9914 + }, + { + "epoch": 0.09385560530475857, + "grad_norm": 964.0662231445312, + "learning_rate": 1.978694902039511e-06, + "loss": 53.625, + "step": 9915 + }, + { + "epoch": 0.09386507132647362, + "grad_norm": 509.7144775390625, + "learning_rate": 1.9786886068064115e-06, + "loss": 45.7656, + "step": 9916 + }, + { + "epoch": 0.09387453734818868, + "grad_norm": 824.7598266601562, + "learning_rate": 1.9786823106534086e-06, + "loss": 32.4297, + "step": 9917 + }, + { + "epoch": 0.09388400336990373, + "grad_norm": 443.66961669921875, + "learning_rate": 1.978676013580508e-06, + "loss": 19.2656, + "step": 9918 + }, + { + "epoch": 0.09389346939161879, + "grad_norm": 284.842041015625, + "learning_rate": 1.9786697155877157e-06, + "loss": 25.2734, + "step": 9919 + }, + { + "epoch": 0.09390293541333383, + "grad_norm": 318.54193115234375, + "learning_rate": 1.9786634166750375e-06, + "loss": 22.7188, + "step": 9920 + }, + { + "epoch": 0.09391240143504889, + "grad_norm": 412.5765075683594, + "learning_rate": 1.9786571168424794e-06, + "loss": 10.0469, + "step": 9921 + }, + { + "epoch": 0.09392186745676395, + "grad_norm": 299.0815734863281, + "learning_rate": 1.978650816090047e-06, + "loss": 38.0469, + "step": 9922 + }, + { + "epoch": 0.093931333478479, + "grad_norm": 279.6338195800781, + "learning_rate": 1.978644514417747e-06, + "loss": 25.2188, + "step": 9923 + }, + { + "epoch": 0.09394079950019406, + "grad_norm": 459.0413818359375, + "learning_rate": 1.978638211825584e-06, + "loss": 44.3125, + "step": 9924 + }, + { + "epoch": 0.0939502655219091, + "grad_norm": 300.9391784667969, + "learning_rate": 1.9786319083135654e-06, + "loss": 20.0312, + "step": 9925 + }, + { + "epoch": 0.09395973154362416, + "grad_norm": 213.55482482910156, + "learning_rate": 1.9786256038816963e-06, + "loss": 20.6875, + "step": 9926 + }, + { + "epoch": 0.09396919756533921, + "grad_norm": 427.4632263183594, + "learning_rate": 1.978619298529983e-06, + "loss": 15.9141, + "step": 9927 + }, + { + "epoch": 0.09397866358705427, + "grad_norm": 295.53802490234375, + "learning_rate": 1.9786129922584307e-06, + "loss": 21.4609, + "step": 9928 + }, + { + "epoch": 0.09398812960876932, + "grad_norm": 445.13519287109375, + "learning_rate": 1.9786066850670465e-06, + "loss": 40.6875, + "step": 9929 + }, + { + "epoch": 0.09399759563048438, + "grad_norm": 256.2403259277344, + "learning_rate": 1.9786003769558353e-06, + "loss": 34.1016, + "step": 9930 + }, + { + "epoch": 0.09400706165219944, + "grad_norm": 240.35830688476562, + "learning_rate": 1.9785940679248035e-06, + "loss": 31.9297, + "step": 9931 + }, + { + "epoch": 0.09401652767391448, + "grad_norm": 321.9465637207031, + "learning_rate": 1.978587757973957e-06, + "loss": 18.4844, + "step": 9932 + }, + { + "epoch": 0.09402599369562954, + "grad_norm": 690.6354370117188, + "learning_rate": 1.9785814471033017e-06, + "loss": 32.7344, + "step": 9933 + }, + { + "epoch": 0.09403545971734459, + "grad_norm": 2.996217966079712, + "learning_rate": 1.9785751353128433e-06, + "loss": 0.8867, + "step": 9934 + }, + { + "epoch": 0.09404492573905965, + "grad_norm": 425.3062438964844, + "learning_rate": 1.978568822602588e-06, + "loss": 37.0547, + "step": 9935 + }, + { + "epoch": 0.0940543917607747, + "grad_norm": 393.1328430175781, + "learning_rate": 1.9785625089725416e-06, + "loss": 32.9219, + "step": 9936 + }, + { + "epoch": 0.09406385778248975, + "grad_norm": 294.85009765625, + "learning_rate": 1.9785561944227102e-06, + "loss": 27.7344, + "step": 9937 + }, + { + "epoch": 0.0940733238042048, + "grad_norm": 294.02862548828125, + "learning_rate": 1.9785498789530998e-06, + "loss": 34.9609, + "step": 9938 + }, + { + "epoch": 0.09408278982591986, + "grad_norm": 1009.8809204101562, + "learning_rate": 1.9785435625637157e-06, + "loss": 41.2031, + "step": 9939 + }, + { + "epoch": 0.09409225584763492, + "grad_norm": 559.0762939453125, + "learning_rate": 1.9785372452545644e-06, + "loss": 61.4219, + "step": 9940 + }, + { + "epoch": 0.09410172186934997, + "grad_norm": 255.0807342529297, + "learning_rate": 1.9785309270256522e-06, + "loss": 15.4922, + "step": 9941 + }, + { + "epoch": 0.09411118789106503, + "grad_norm": 537.6625366210938, + "learning_rate": 1.9785246078769842e-06, + "loss": 20.9375, + "step": 9942 + }, + { + "epoch": 0.09412065391278007, + "grad_norm": 269.2287902832031, + "learning_rate": 1.978518287808567e-06, + "loss": 14.6875, + "step": 9943 + }, + { + "epoch": 0.09413011993449513, + "grad_norm": 3.811760663986206, + "learning_rate": 1.978511966820406e-06, + "loss": 1.0659, + "step": 9944 + }, + { + "epoch": 0.09413958595621018, + "grad_norm": 3.2331528663635254, + "learning_rate": 1.9785056449125077e-06, + "loss": 0.9336, + "step": 9945 + }, + { + "epoch": 0.09414905197792524, + "grad_norm": 328.1126403808594, + "learning_rate": 1.9784993220848774e-06, + "loss": 32.9062, + "step": 9946 + }, + { + "epoch": 0.09415851799964028, + "grad_norm": 766.6487426757812, + "learning_rate": 1.9784929983375215e-06, + "loss": 49.4531, + "step": 9947 + }, + { + "epoch": 0.09416798402135534, + "grad_norm": 3.4066975116729736, + "learning_rate": 1.978486673670446e-06, + "loss": 0.9458, + "step": 9948 + }, + { + "epoch": 0.0941774500430704, + "grad_norm": 267.7322998046875, + "learning_rate": 1.978480348083657e-06, + "loss": 38.9453, + "step": 9949 + }, + { + "epoch": 0.09418691606478545, + "grad_norm": 998.2938232421875, + "learning_rate": 1.9784740215771592e-06, + "loss": 56.2031, + "step": 9950 + }, + { + "epoch": 0.09419638208650051, + "grad_norm": 3.1486639976501465, + "learning_rate": 1.9784676941509604e-06, + "loss": 0.8584, + "step": 9951 + }, + { + "epoch": 0.09420584810821556, + "grad_norm": 485.8135681152344, + "learning_rate": 1.978461365805065e-06, + "loss": 21.9766, + "step": 9952 + }, + { + "epoch": 0.09421531412993062, + "grad_norm": 354.8214416503906, + "learning_rate": 1.9784550365394797e-06, + "loss": 21.6094, + "step": 9953 + }, + { + "epoch": 0.09422478015164566, + "grad_norm": 520.8807373046875, + "learning_rate": 1.9784487063542104e-06, + "loss": 34.8125, + "step": 9954 + }, + { + "epoch": 0.09423424617336072, + "grad_norm": 233.48196411132812, + "learning_rate": 1.9784423752492632e-06, + "loss": 22.4688, + "step": 9955 + }, + { + "epoch": 0.09424371219507578, + "grad_norm": 486.7530822753906, + "learning_rate": 1.9784360432246437e-06, + "loss": 33.9219, + "step": 9956 + }, + { + "epoch": 0.09425317821679083, + "grad_norm": 348.7976989746094, + "learning_rate": 1.9784297102803577e-06, + "loss": 19.4766, + "step": 9957 + }, + { + "epoch": 0.09426264423850589, + "grad_norm": 199.7154541015625, + "learning_rate": 1.9784233764164116e-06, + "loss": 24.5703, + "step": 9958 + }, + { + "epoch": 0.09427211026022093, + "grad_norm": 981.094970703125, + "learning_rate": 1.9784170416328114e-06, + "loss": 28.5625, + "step": 9959 + }, + { + "epoch": 0.094281576281936, + "grad_norm": 225.95791625976562, + "learning_rate": 1.9784107059295625e-06, + "loss": 17.0156, + "step": 9960 + }, + { + "epoch": 0.09429104230365104, + "grad_norm": 636.669921875, + "learning_rate": 1.9784043693066713e-06, + "loss": 37.9219, + "step": 9961 + }, + { + "epoch": 0.0943005083253661, + "grad_norm": 256.5860595703125, + "learning_rate": 1.9783980317641437e-06, + "loss": 23.7812, + "step": 9962 + }, + { + "epoch": 0.09430997434708115, + "grad_norm": 222.3970489501953, + "learning_rate": 1.9783916933019853e-06, + "loss": 23.0469, + "step": 9963 + }, + { + "epoch": 0.0943194403687962, + "grad_norm": 219.58612060546875, + "learning_rate": 1.9783853539202027e-06, + "loss": 31.1875, + "step": 9964 + }, + { + "epoch": 0.09432890639051127, + "grad_norm": 588.8779907226562, + "learning_rate": 1.978379013618801e-06, + "loss": 29.3594, + "step": 9965 + }, + { + "epoch": 0.09433837241222631, + "grad_norm": 2.935382604598999, + "learning_rate": 1.9783726723977874e-06, + "loss": 0.9565, + "step": 9966 + }, + { + "epoch": 0.09434783843394137, + "grad_norm": 392.7066955566406, + "learning_rate": 1.978366330257167e-06, + "loss": 35.3125, + "step": 9967 + }, + { + "epoch": 0.09435730445565642, + "grad_norm": 194.5018310546875, + "learning_rate": 1.9783599871969455e-06, + "loss": 21.9844, + "step": 9968 + }, + { + "epoch": 0.09436677047737148, + "grad_norm": 670.9620971679688, + "learning_rate": 1.9783536432171293e-06, + "loss": 17.5742, + "step": 9969 + }, + { + "epoch": 0.09437623649908652, + "grad_norm": 2.9602293968200684, + "learning_rate": 1.9783472983177246e-06, + "loss": 0.7705, + "step": 9970 + }, + { + "epoch": 0.09438570252080158, + "grad_norm": 144.0406951904297, + "learning_rate": 1.978340952498737e-06, + "loss": 21.1172, + "step": 9971 + }, + { + "epoch": 0.09439516854251663, + "grad_norm": 381.0456848144531, + "learning_rate": 1.9783346057601725e-06, + "loss": 25.6953, + "step": 9972 + }, + { + "epoch": 0.09440463456423169, + "grad_norm": 212.4360809326172, + "learning_rate": 1.9783282581020374e-06, + "loss": 11.6641, + "step": 9973 + }, + { + "epoch": 0.09441410058594675, + "grad_norm": 338.9324035644531, + "learning_rate": 1.978321909524337e-06, + "loss": 38.1094, + "step": 9974 + }, + { + "epoch": 0.0944235666076618, + "grad_norm": 296.6690979003906, + "learning_rate": 1.978315560027078e-06, + "loss": 16.5547, + "step": 9975 + }, + { + "epoch": 0.09443303262937686, + "grad_norm": 337.0851745605469, + "learning_rate": 1.9783092096102652e-06, + "loss": 21.3125, + "step": 9976 + }, + { + "epoch": 0.0944424986510919, + "grad_norm": 413.2040710449219, + "learning_rate": 1.9783028582739065e-06, + "loss": 41.3438, + "step": 9977 + }, + { + "epoch": 0.09445196467280696, + "grad_norm": 167.70407104492188, + "learning_rate": 1.978296506018006e-06, + "loss": 15.0781, + "step": 9978 + }, + { + "epoch": 0.09446143069452201, + "grad_norm": 189.37026977539062, + "learning_rate": 1.978290152842571e-06, + "loss": 21.6406, + "step": 9979 + }, + { + "epoch": 0.09447089671623707, + "grad_norm": 394.66619873046875, + "learning_rate": 1.978283798747606e-06, + "loss": 46.3594, + "step": 9980 + }, + { + "epoch": 0.09448036273795211, + "grad_norm": 914.3612670898438, + "learning_rate": 1.9782774437331187e-06, + "loss": 37.0, + "step": 9981 + }, + { + "epoch": 0.09448982875966717, + "grad_norm": 487.1379089355469, + "learning_rate": 1.9782710877991138e-06, + "loss": 26.1328, + "step": 9982 + }, + { + "epoch": 0.09449929478138223, + "grad_norm": 635.7474975585938, + "learning_rate": 1.978264730945598e-06, + "loss": 55.9062, + "step": 9983 + }, + { + "epoch": 0.09450876080309728, + "grad_norm": 371.2217712402344, + "learning_rate": 1.9782583731725765e-06, + "loss": 21.0156, + "step": 9984 + }, + { + "epoch": 0.09451822682481234, + "grad_norm": 205.2873992919922, + "learning_rate": 1.9782520144800563e-06, + "loss": 16.3008, + "step": 9985 + }, + { + "epoch": 0.09452769284652739, + "grad_norm": 501.9367980957031, + "learning_rate": 1.978245654868043e-06, + "loss": 45.707, + "step": 9986 + }, + { + "epoch": 0.09453715886824245, + "grad_norm": 474.3633117675781, + "learning_rate": 1.978239294336542e-06, + "loss": 43.1562, + "step": 9987 + }, + { + "epoch": 0.09454662488995749, + "grad_norm": 470.89031982421875, + "learning_rate": 1.9782329328855594e-06, + "loss": 48.6562, + "step": 9988 + }, + { + "epoch": 0.09455609091167255, + "grad_norm": 299.90972900390625, + "learning_rate": 1.9782265705151023e-06, + "loss": 26.5, + "step": 9989 + }, + { + "epoch": 0.0945655569333876, + "grad_norm": 224.48031616210938, + "learning_rate": 1.9782202072251753e-06, + "loss": 18.7578, + "step": 9990 + }, + { + "epoch": 0.09457502295510266, + "grad_norm": 536.8344116210938, + "learning_rate": 1.978213843015785e-06, + "loss": 41.8281, + "step": 9991 + }, + { + "epoch": 0.09458448897681772, + "grad_norm": 331.03155517578125, + "learning_rate": 1.978207477886938e-06, + "loss": 17.0938, + "step": 9992 + }, + { + "epoch": 0.09459395499853276, + "grad_norm": 521.6146850585938, + "learning_rate": 1.9782011118386387e-06, + "loss": 16.7891, + "step": 9993 + }, + { + "epoch": 0.09460342102024782, + "grad_norm": 304.19012451171875, + "learning_rate": 1.9781947448708942e-06, + "loss": 16.9219, + "step": 9994 + }, + { + "epoch": 0.09461288704196287, + "grad_norm": 178.88265991210938, + "learning_rate": 1.9781883769837103e-06, + "loss": 20.5312, + "step": 9995 + }, + { + "epoch": 0.09462235306367793, + "grad_norm": 168.55499267578125, + "learning_rate": 1.9781820081770933e-06, + "loss": 22.3203, + "step": 9996 + }, + { + "epoch": 0.09463181908539298, + "grad_norm": 480.4566955566406, + "learning_rate": 1.9781756384510488e-06, + "loss": 28.8828, + "step": 9997 + }, + { + "epoch": 0.09464128510710804, + "grad_norm": 548.2232666015625, + "learning_rate": 1.9781692678055825e-06, + "loss": 43.2656, + "step": 9998 + }, + { + "epoch": 0.0946507511288231, + "grad_norm": 349.5177917480469, + "learning_rate": 1.978162896240701e-06, + "loss": 23.7344, + "step": 9999 + }, + { + "epoch": 0.09466021715053814, + "grad_norm": 518.6321411132812, + "learning_rate": 1.9781565237564096e-06, + "loss": 19.0312, + "step": 10000 + }, + { + "epoch": 0.0946696831722532, + "grad_norm": 292.95159912109375, + "learning_rate": 1.978150150352715e-06, + "loss": 37.6172, + "step": 10001 + }, + { + "epoch": 0.09467914919396825, + "grad_norm": 1163.2376708984375, + "learning_rate": 1.978143776029623e-06, + "loss": 54.8281, + "step": 10002 + }, + { + "epoch": 0.09468861521568331, + "grad_norm": 407.2154235839844, + "learning_rate": 1.978137400787139e-06, + "loss": 21.8906, + "step": 10003 + }, + { + "epoch": 0.09469808123739835, + "grad_norm": 281.943603515625, + "learning_rate": 1.97813102462527e-06, + "loss": 23.9688, + "step": 10004 + }, + { + "epoch": 0.09470754725911341, + "grad_norm": 3.344081163406372, + "learning_rate": 1.978124647544021e-06, + "loss": 0.812, + "step": 10005 + }, + { + "epoch": 0.09471701328082846, + "grad_norm": 213.2633056640625, + "learning_rate": 1.978118269543399e-06, + "loss": 20.25, + "step": 10006 + }, + { + "epoch": 0.09472647930254352, + "grad_norm": 370.82110595703125, + "learning_rate": 1.9781118906234094e-06, + "loss": 22.2031, + "step": 10007 + }, + { + "epoch": 0.09473594532425858, + "grad_norm": 438.2336120605469, + "learning_rate": 1.978105510784058e-06, + "loss": 31.8438, + "step": 10008 + }, + { + "epoch": 0.09474541134597363, + "grad_norm": 398.14361572265625, + "learning_rate": 1.978099130025351e-06, + "loss": 41.4062, + "step": 10009 + }, + { + "epoch": 0.09475487736768869, + "grad_norm": 2.7759616374969482, + "learning_rate": 1.978092748347295e-06, + "loss": 0.7622, + "step": 10010 + }, + { + "epoch": 0.09476434338940373, + "grad_norm": 869.1846313476562, + "learning_rate": 1.978086365749895e-06, + "loss": 32.1562, + "step": 10011 + }, + { + "epoch": 0.09477380941111879, + "grad_norm": 427.522216796875, + "learning_rate": 1.978079982233157e-06, + "loss": 48.0312, + "step": 10012 + }, + { + "epoch": 0.09478327543283384, + "grad_norm": 530.4539794921875, + "learning_rate": 1.978073597797088e-06, + "loss": 55.2344, + "step": 10013 + }, + { + "epoch": 0.0947927414545489, + "grad_norm": 853.9423217773438, + "learning_rate": 1.9780672124416936e-06, + "loss": 45.1484, + "step": 10014 + }, + { + "epoch": 0.09480220747626394, + "grad_norm": 321.0423583984375, + "learning_rate": 1.978060826166979e-06, + "loss": 28.4219, + "step": 10015 + }, + { + "epoch": 0.094811673497979, + "grad_norm": 678.094482421875, + "learning_rate": 1.9780544389729514e-06, + "loss": 43.5, + "step": 10016 + }, + { + "epoch": 0.09482113951969406, + "grad_norm": 454.8870849609375, + "learning_rate": 1.9780480508596164e-06, + "loss": 34.1875, + "step": 10017 + }, + { + "epoch": 0.09483060554140911, + "grad_norm": 509.7919616699219, + "learning_rate": 1.9780416618269796e-06, + "loss": 24.5469, + "step": 10018 + }, + { + "epoch": 0.09484007156312417, + "grad_norm": 259.4820251464844, + "learning_rate": 1.978035271875047e-06, + "loss": 25.4609, + "step": 10019 + }, + { + "epoch": 0.09484953758483922, + "grad_norm": 3.7149767875671387, + "learning_rate": 1.978028881003825e-06, + "loss": 0.9604, + "step": 10020 + }, + { + "epoch": 0.09485900360655428, + "grad_norm": 611.75341796875, + "learning_rate": 1.97802248921332e-06, + "loss": 31.8203, + "step": 10021 + }, + { + "epoch": 0.09486846962826932, + "grad_norm": 2.648486375808716, + "learning_rate": 1.978016096503537e-06, + "loss": 0.8774, + "step": 10022 + }, + { + "epoch": 0.09487793564998438, + "grad_norm": 652.892333984375, + "learning_rate": 1.978009702874482e-06, + "loss": 50.8125, + "step": 10023 + }, + { + "epoch": 0.09488740167169943, + "grad_norm": 208.9322509765625, + "learning_rate": 1.9780033083261624e-06, + "loss": 18.8281, + "step": 10024 + }, + { + "epoch": 0.09489686769341449, + "grad_norm": 234.25628662109375, + "learning_rate": 1.977996912858583e-06, + "loss": 22.4219, + "step": 10025 + }, + { + "epoch": 0.09490633371512955, + "grad_norm": 662.9414672851562, + "learning_rate": 1.9779905164717498e-06, + "loss": 34.9062, + "step": 10026 + }, + { + "epoch": 0.0949157997368446, + "grad_norm": 901.8578491210938, + "learning_rate": 1.9779841191656695e-06, + "loss": 32.3281, + "step": 10027 + }, + { + "epoch": 0.09492526575855965, + "grad_norm": 662.2747802734375, + "learning_rate": 1.977977720940348e-06, + "loss": 33.1602, + "step": 10028 + }, + { + "epoch": 0.0949347317802747, + "grad_norm": 491.9393005371094, + "learning_rate": 1.9779713217957904e-06, + "loss": 50.5469, + "step": 10029 + }, + { + "epoch": 0.09494419780198976, + "grad_norm": 415.9501037597656, + "learning_rate": 1.977964921732004e-06, + "loss": 52.9844, + "step": 10030 + }, + { + "epoch": 0.0949536638237048, + "grad_norm": 2.2428982257843018, + "learning_rate": 1.977958520748994e-06, + "loss": 0.8091, + "step": 10031 + }, + { + "epoch": 0.09496312984541987, + "grad_norm": 1308.3697509765625, + "learning_rate": 1.9779521188467662e-06, + "loss": 69.4766, + "step": 10032 + }, + { + "epoch": 0.09497259586713491, + "grad_norm": 328.7098388671875, + "learning_rate": 1.9779457160253275e-06, + "loss": 24.2031, + "step": 10033 + }, + { + "epoch": 0.09498206188884997, + "grad_norm": 355.07696533203125, + "learning_rate": 1.977939312284683e-06, + "loss": 31.8906, + "step": 10034 + }, + { + "epoch": 0.09499152791056503, + "grad_norm": 381.4848327636719, + "learning_rate": 1.9779329076248395e-06, + "loss": 30.1094, + "step": 10035 + }, + { + "epoch": 0.09500099393228008, + "grad_norm": 660.3407592773438, + "learning_rate": 1.977926502045803e-06, + "loss": 24.6406, + "step": 10036 + }, + { + "epoch": 0.09501045995399514, + "grad_norm": 1022.7686767578125, + "learning_rate": 1.9779200955475787e-06, + "loss": 86.6875, + "step": 10037 + }, + { + "epoch": 0.09501992597571018, + "grad_norm": 143.43589782714844, + "learning_rate": 1.977913688130173e-06, + "loss": 23.9922, + "step": 10038 + }, + { + "epoch": 0.09502939199742524, + "grad_norm": 646.296142578125, + "learning_rate": 1.9779072797935928e-06, + "loss": 29.8672, + "step": 10039 + }, + { + "epoch": 0.09503885801914029, + "grad_norm": 259.0724792480469, + "learning_rate": 1.9779008705378428e-06, + "loss": 22.9453, + "step": 10040 + }, + { + "epoch": 0.09504832404085535, + "grad_norm": 671.2141723632812, + "learning_rate": 1.9778944603629294e-06, + "loss": 43.5781, + "step": 10041 + }, + { + "epoch": 0.09505779006257041, + "grad_norm": 276.0722351074219, + "learning_rate": 1.9778880492688596e-06, + "loss": 16.25, + "step": 10042 + }, + { + "epoch": 0.09506725608428546, + "grad_norm": 420.24859619140625, + "learning_rate": 1.977881637255638e-06, + "loss": 27.2344, + "step": 10043 + }, + { + "epoch": 0.09507672210600052, + "grad_norm": 400.2782897949219, + "learning_rate": 1.9778752243232715e-06, + "loss": 27.0781, + "step": 10044 + }, + { + "epoch": 0.09508618812771556, + "grad_norm": 319.6298522949219, + "learning_rate": 1.977868810471766e-06, + "loss": 21.0859, + "step": 10045 + }, + { + "epoch": 0.09509565414943062, + "grad_norm": 259.08172607421875, + "learning_rate": 1.977862395701127e-06, + "loss": 22.2188, + "step": 10046 + }, + { + "epoch": 0.09510512017114567, + "grad_norm": 297.62109375, + "learning_rate": 1.9778559800113614e-06, + "loss": 25.75, + "step": 10047 + }, + { + "epoch": 0.09511458619286073, + "grad_norm": 389.01165771484375, + "learning_rate": 1.9778495634024747e-06, + "loss": 20.6953, + "step": 10048 + }, + { + "epoch": 0.09512405221457577, + "grad_norm": 284.9661560058594, + "learning_rate": 1.977843145874473e-06, + "loss": 16.8828, + "step": 10049 + }, + { + "epoch": 0.09513351823629083, + "grad_norm": 261.7856750488281, + "learning_rate": 1.9778367274273623e-06, + "loss": 24.9922, + "step": 10050 + }, + { + "epoch": 0.0951429842580059, + "grad_norm": 503.2874450683594, + "learning_rate": 1.9778303080611483e-06, + "loss": 33.0781, + "step": 10051 + }, + { + "epoch": 0.09515245027972094, + "grad_norm": 412.4489440917969, + "learning_rate": 1.977823887775838e-06, + "loss": 40.6562, + "step": 10052 + }, + { + "epoch": 0.095161916301436, + "grad_norm": 246.4237060546875, + "learning_rate": 1.9778174665714368e-06, + "loss": 26.0312, + "step": 10053 + }, + { + "epoch": 0.09517138232315105, + "grad_norm": 221.5636749267578, + "learning_rate": 1.9778110444479506e-06, + "loss": 29.9062, + "step": 10054 + }, + { + "epoch": 0.0951808483448661, + "grad_norm": 502.3572082519531, + "learning_rate": 1.977804621405386e-06, + "loss": 39.1406, + "step": 10055 + }, + { + "epoch": 0.09519031436658115, + "grad_norm": 307.1854248046875, + "learning_rate": 1.9777981974437483e-06, + "loss": 16.0781, + "step": 10056 + }, + { + "epoch": 0.09519978038829621, + "grad_norm": 382.7138977050781, + "learning_rate": 1.977791772563044e-06, + "loss": 38.9922, + "step": 10057 + }, + { + "epoch": 0.09520924641001126, + "grad_norm": 373.169921875, + "learning_rate": 1.977785346763279e-06, + "loss": 44.7969, + "step": 10058 + }, + { + "epoch": 0.09521871243172632, + "grad_norm": 486.9596862792969, + "learning_rate": 1.9777789200444596e-06, + "loss": 28.1094, + "step": 10059 + }, + { + "epoch": 0.09522817845344138, + "grad_norm": 560.360107421875, + "learning_rate": 1.977772492406591e-06, + "loss": 16.8516, + "step": 10060 + }, + { + "epoch": 0.09523764447515642, + "grad_norm": 440.0010681152344, + "learning_rate": 1.9777660638496805e-06, + "loss": 39.8203, + "step": 10061 + }, + { + "epoch": 0.09524711049687148, + "grad_norm": 519.1611328125, + "learning_rate": 1.9777596343737332e-06, + "loss": 43.2188, + "step": 10062 + }, + { + "epoch": 0.09525657651858653, + "grad_norm": 649.6676025390625, + "learning_rate": 1.977753203978756e-06, + "loss": 35.4844, + "step": 10063 + }, + { + "epoch": 0.09526604254030159, + "grad_norm": 223.1390380859375, + "learning_rate": 1.9777467726647535e-06, + "loss": 21.6562, + "step": 10064 + }, + { + "epoch": 0.09527550856201664, + "grad_norm": 2.735426664352417, + "learning_rate": 1.977740340431733e-06, + "loss": 0.8384, + "step": 10065 + }, + { + "epoch": 0.0952849745837317, + "grad_norm": 532.4666748046875, + "learning_rate": 1.9777339072797004e-06, + "loss": 35.4844, + "step": 10066 + }, + { + "epoch": 0.09529444060544674, + "grad_norm": 451.3540344238281, + "learning_rate": 1.9777274732086617e-06, + "loss": 43.0625, + "step": 10067 + }, + { + "epoch": 0.0953039066271618, + "grad_norm": 268.7657775878906, + "learning_rate": 1.9777210382186226e-06, + "loss": 24.5703, + "step": 10068 + }, + { + "epoch": 0.09531337264887686, + "grad_norm": 604.065185546875, + "learning_rate": 1.977714602309589e-06, + "loss": 7.5312, + "step": 10069 + }, + { + "epoch": 0.09532283867059191, + "grad_norm": 550.9915771484375, + "learning_rate": 1.977708165481568e-06, + "loss": 43.0391, + "step": 10070 + }, + { + "epoch": 0.09533230469230697, + "grad_norm": 572.9027099609375, + "learning_rate": 1.977701727734564e-06, + "loss": 35.8125, + "step": 10071 + }, + { + "epoch": 0.09534177071402201, + "grad_norm": 730.685791015625, + "learning_rate": 1.977695289068584e-06, + "loss": 49.9688, + "step": 10072 + }, + { + "epoch": 0.09535123673573707, + "grad_norm": 485.31219482421875, + "learning_rate": 1.977688849483635e-06, + "loss": 49.6406, + "step": 10073 + }, + { + "epoch": 0.09536070275745212, + "grad_norm": 664.1962280273438, + "learning_rate": 1.9776824089797214e-06, + "loss": 39.9688, + "step": 10074 + }, + { + "epoch": 0.09537016877916718, + "grad_norm": 395.1653747558594, + "learning_rate": 1.9776759675568504e-06, + "loss": 29.5234, + "step": 10075 + }, + { + "epoch": 0.09537963480088223, + "grad_norm": 408.0954284667969, + "learning_rate": 1.977669525215027e-06, + "loss": 39.4375, + "step": 10076 + }, + { + "epoch": 0.09538910082259729, + "grad_norm": 192.48715209960938, + "learning_rate": 1.977663081954258e-06, + "loss": 26.5547, + "step": 10077 + }, + { + "epoch": 0.09539856684431235, + "grad_norm": 702.787353515625, + "learning_rate": 1.9776566377745497e-06, + "loss": 39.0, + "step": 10078 + }, + { + "epoch": 0.09540803286602739, + "grad_norm": 549.51904296875, + "learning_rate": 1.977650192675907e-06, + "loss": 24.5938, + "step": 10079 + }, + { + "epoch": 0.09541749888774245, + "grad_norm": 704.75537109375, + "learning_rate": 1.9776437466583373e-06, + "loss": 37.6875, + "step": 10080 + }, + { + "epoch": 0.0954269649094575, + "grad_norm": 347.5857238769531, + "learning_rate": 1.977637299721846e-06, + "loss": 27.7109, + "step": 10081 + }, + { + "epoch": 0.09543643093117256, + "grad_norm": 236.8549346923828, + "learning_rate": 1.9776308518664394e-06, + "loss": 17.0781, + "step": 10082 + }, + { + "epoch": 0.0954458969528876, + "grad_norm": 433.4521179199219, + "learning_rate": 1.9776244030921233e-06, + "loss": 33.625, + "step": 10083 + }, + { + "epoch": 0.09545536297460266, + "grad_norm": 615.6995849609375, + "learning_rate": 1.9776179533989035e-06, + "loss": 59.2188, + "step": 10084 + }, + { + "epoch": 0.09546482899631772, + "grad_norm": 364.0931396484375, + "learning_rate": 1.977611502786787e-06, + "loss": 36.4375, + "step": 10085 + }, + { + "epoch": 0.09547429501803277, + "grad_norm": 226.42694091796875, + "learning_rate": 1.9776050512557787e-06, + "loss": 14.9883, + "step": 10086 + }, + { + "epoch": 0.09548376103974783, + "grad_norm": 385.0859375, + "learning_rate": 1.9775985988058855e-06, + "loss": 27.0938, + "step": 10087 + }, + { + "epoch": 0.09549322706146288, + "grad_norm": 873.1299438476562, + "learning_rate": 1.9775921454371136e-06, + "loss": 61.4531, + "step": 10088 + }, + { + "epoch": 0.09550269308317794, + "grad_norm": 441.3066101074219, + "learning_rate": 1.977585691149468e-06, + "loss": 37.0, + "step": 10089 + }, + { + "epoch": 0.09551215910489298, + "grad_norm": 435.6531982421875, + "learning_rate": 1.9775792359429557e-06, + "loss": 27.6094, + "step": 10090 + }, + { + "epoch": 0.09552162512660804, + "grad_norm": 479.2162170410156, + "learning_rate": 1.977572779817583e-06, + "loss": 41.5391, + "step": 10091 + }, + { + "epoch": 0.09553109114832309, + "grad_norm": 329.98065185546875, + "learning_rate": 1.9775663227733546e-06, + "loss": 20.6719, + "step": 10092 + }, + { + "epoch": 0.09554055717003815, + "grad_norm": 423.5835876464844, + "learning_rate": 1.977559864810278e-06, + "loss": 57.9375, + "step": 10093 + }, + { + "epoch": 0.09555002319175321, + "grad_norm": 453.1203918457031, + "learning_rate": 1.977553405928359e-06, + "loss": 33.3984, + "step": 10094 + }, + { + "epoch": 0.09555948921346825, + "grad_norm": 582.3023681640625, + "learning_rate": 1.9775469461276025e-06, + "loss": 53.0938, + "step": 10095 + }, + { + "epoch": 0.09556895523518331, + "grad_norm": 430.0511169433594, + "learning_rate": 1.977540485408016e-06, + "loss": 43.0625, + "step": 10096 + }, + { + "epoch": 0.09557842125689836, + "grad_norm": 828.994873046875, + "learning_rate": 1.9775340237696052e-06, + "loss": 48.5312, + "step": 10097 + }, + { + "epoch": 0.09558788727861342, + "grad_norm": 367.1300354003906, + "learning_rate": 1.9775275612123758e-06, + "loss": 20.3594, + "step": 10098 + }, + { + "epoch": 0.09559735330032847, + "grad_norm": 2.951322555541992, + "learning_rate": 1.9775210977363345e-06, + "loss": 0.9575, + "step": 10099 + }, + { + "epoch": 0.09560681932204353, + "grad_norm": 578.4172973632812, + "learning_rate": 1.9775146333414866e-06, + "loss": 62.2344, + "step": 10100 + }, + { + "epoch": 0.09561628534375857, + "grad_norm": 296.2647399902344, + "learning_rate": 1.9775081680278383e-06, + "loss": 35.2344, + "step": 10101 + }, + { + "epoch": 0.09562575136547363, + "grad_norm": 174.0088653564453, + "learning_rate": 1.977501701795396e-06, + "loss": 30.4688, + "step": 10102 + }, + { + "epoch": 0.09563521738718869, + "grad_norm": 407.6733703613281, + "learning_rate": 1.977495234644166e-06, + "loss": 37.3594, + "step": 10103 + }, + { + "epoch": 0.09564468340890374, + "grad_norm": 638.0780029296875, + "learning_rate": 1.977488766574154e-06, + "loss": 43.5391, + "step": 10104 + }, + { + "epoch": 0.0956541494306188, + "grad_norm": 307.7032775878906, + "learning_rate": 1.9774822975853656e-06, + "loss": 18.8164, + "step": 10105 + }, + { + "epoch": 0.09566361545233384, + "grad_norm": 325.3033752441406, + "learning_rate": 1.9774758276778083e-06, + "loss": 16.1875, + "step": 10106 + }, + { + "epoch": 0.0956730814740489, + "grad_norm": 687.14892578125, + "learning_rate": 1.9774693568514866e-06, + "loss": 29.375, + "step": 10107 + }, + { + "epoch": 0.09568254749576395, + "grad_norm": 439.8672180175781, + "learning_rate": 1.9774628851064077e-06, + "loss": 35.875, + "step": 10108 + }, + { + "epoch": 0.09569201351747901, + "grad_norm": 3.1484732627868652, + "learning_rate": 1.9774564124425772e-06, + "loss": 1.0366, + "step": 10109 + }, + { + "epoch": 0.09570147953919406, + "grad_norm": 253.1479034423828, + "learning_rate": 1.977449938860001e-06, + "loss": 28.0391, + "step": 10110 + }, + { + "epoch": 0.09571094556090912, + "grad_norm": 223.69471740722656, + "learning_rate": 1.977443464358686e-06, + "loss": 21.5938, + "step": 10111 + }, + { + "epoch": 0.09572041158262418, + "grad_norm": 699.9251708984375, + "learning_rate": 1.9774369889386373e-06, + "loss": 37.1719, + "step": 10112 + }, + { + "epoch": 0.09572987760433922, + "grad_norm": 345.5001220703125, + "learning_rate": 1.9774305125998616e-06, + "loss": 22.9766, + "step": 10113 + }, + { + "epoch": 0.09573934362605428, + "grad_norm": 493.0315856933594, + "learning_rate": 1.9774240353423647e-06, + "loss": 51.0469, + "step": 10114 + }, + { + "epoch": 0.09574880964776933, + "grad_norm": 406.1888122558594, + "learning_rate": 1.9774175571661527e-06, + "loss": 33.6094, + "step": 10115 + }, + { + "epoch": 0.09575827566948439, + "grad_norm": 258.1922607421875, + "learning_rate": 1.9774110780712317e-06, + "loss": 9.8242, + "step": 10116 + }, + { + "epoch": 0.09576774169119943, + "grad_norm": 375.6810607910156, + "learning_rate": 1.9774045980576083e-06, + "loss": 39.8906, + "step": 10117 + }, + { + "epoch": 0.0957772077129145, + "grad_norm": 373.4457092285156, + "learning_rate": 1.9773981171252878e-06, + "loss": 32.8438, + "step": 10118 + }, + { + "epoch": 0.09578667373462954, + "grad_norm": 437.98419189453125, + "learning_rate": 1.9773916352742766e-06, + "loss": 23.9922, + "step": 10119 + }, + { + "epoch": 0.0957961397563446, + "grad_norm": 218.09103393554688, + "learning_rate": 1.977385152504581e-06, + "loss": 17.8828, + "step": 10120 + }, + { + "epoch": 0.09580560577805966, + "grad_norm": 2.469261407852173, + "learning_rate": 1.977378668816207e-06, + "loss": 0.9097, + "step": 10121 + }, + { + "epoch": 0.0958150717997747, + "grad_norm": 292.5363464355469, + "learning_rate": 1.9773721842091606e-06, + "loss": 22.1875, + "step": 10122 + }, + { + "epoch": 0.09582453782148977, + "grad_norm": 641.5153198242188, + "learning_rate": 1.9773656986834482e-06, + "loss": 53.4961, + "step": 10123 + }, + { + "epoch": 0.09583400384320481, + "grad_norm": 638.8525390625, + "learning_rate": 1.9773592122390752e-06, + "loss": 19.5234, + "step": 10124 + }, + { + "epoch": 0.09584346986491987, + "grad_norm": 380.65447998046875, + "learning_rate": 1.977352724876048e-06, + "loss": 25.4922, + "step": 10125 + }, + { + "epoch": 0.09585293588663492, + "grad_norm": 156.1951446533203, + "learning_rate": 1.9773462365943733e-06, + "loss": 18.4453, + "step": 10126 + }, + { + "epoch": 0.09586240190834998, + "grad_norm": 311.4060974121094, + "learning_rate": 1.9773397473940566e-06, + "loss": 21.8281, + "step": 10127 + }, + { + "epoch": 0.09587186793006504, + "grad_norm": 329.5367736816406, + "learning_rate": 1.977333257275104e-06, + "loss": 37.6406, + "step": 10128 + }, + { + "epoch": 0.09588133395178008, + "grad_norm": 424.7969970703125, + "learning_rate": 1.977326766237522e-06, + "loss": 38.9453, + "step": 10129 + }, + { + "epoch": 0.09589079997349514, + "grad_norm": 265.6376037597656, + "learning_rate": 1.977320274281316e-06, + "loss": 24.0469, + "step": 10130 + }, + { + "epoch": 0.09590026599521019, + "grad_norm": 381.9410400390625, + "learning_rate": 1.977313781406493e-06, + "loss": 25.4688, + "step": 10131 + }, + { + "epoch": 0.09590973201692525, + "grad_norm": 227.8025665283203, + "learning_rate": 1.977307287613058e-06, + "loss": 33.2109, + "step": 10132 + }, + { + "epoch": 0.0959191980386403, + "grad_norm": 4.259149074554443, + "learning_rate": 1.9773007929010178e-06, + "loss": 1.061, + "step": 10133 + }, + { + "epoch": 0.09592866406035536, + "grad_norm": 212.0712127685547, + "learning_rate": 1.977294297270379e-06, + "loss": 16.8203, + "step": 10134 + }, + { + "epoch": 0.0959381300820704, + "grad_norm": 1055.3990478515625, + "learning_rate": 1.977287800721147e-06, + "loss": 55.9453, + "step": 10135 + }, + { + "epoch": 0.09594759610378546, + "grad_norm": 328.3143615722656, + "learning_rate": 1.9772813032533274e-06, + "loss": 24.1133, + "step": 10136 + }, + { + "epoch": 0.09595706212550052, + "grad_norm": 204.0445556640625, + "learning_rate": 1.9772748048669274e-06, + "loss": 24.3984, + "step": 10137 + }, + { + "epoch": 0.09596652814721557, + "grad_norm": 822.7428588867188, + "learning_rate": 1.977268305561953e-06, + "loss": 27.3203, + "step": 10138 + }, + { + "epoch": 0.09597599416893063, + "grad_norm": 3.1003026962280273, + "learning_rate": 1.9772618053384095e-06, + "loss": 0.9048, + "step": 10139 + }, + { + "epoch": 0.09598546019064567, + "grad_norm": 461.15826416015625, + "learning_rate": 1.9772553041963035e-06, + "loss": 56.5703, + "step": 10140 + }, + { + "epoch": 0.09599492621236073, + "grad_norm": 418.2093505859375, + "learning_rate": 1.9772488021356414e-06, + "loss": 33.1641, + "step": 10141 + }, + { + "epoch": 0.09600439223407578, + "grad_norm": 236.4571533203125, + "learning_rate": 1.9772422991564285e-06, + "loss": 22.7344, + "step": 10142 + }, + { + "epoch": 0.09601385825579084, + "grad_norm": 290.4241027832031, + "learning_rate": 1.977235795258672e-06, + "loss": 23.3516, + "step": 10143 + }, + { + "epoch": 0.09602332427750589, + "grad_norm": 219.86947631835938, + "learning_rate": 1.977229290442377e-06, + "loss": 10.4961, + "step": 10144 + }, + { + "epoch": 0.09603279029922095, + "grad_norm": 371.1127624511719, + "learning_rate": 1.9772227847075503e-06, + "loss": 33.3906, + "step": 10145 + }, + { + "epoch": 0.096042256320936, + "grad_norm": 183.10922241210938, + "learning_rate": 1.9772162780541973e-06, + "loss": 21.9219, + "step": 10146 + }, + { + "epoch": 0.09605172234265105, + "grad_norm": 265.3681945800781, + "learning_rate": 1.977209770482325e-06, + "loss": 21.8516, + "step": 10147 + }, + { + "epoch": 0.09606118836436611, + "grad_norm": 320.51373291015625, + "learning_rate": 1.977203261991939e-06, + "loss": 28.5469, + "step": 10148 + }, + { + "epoch": 0.09607065438608116, + "grad_norm": 195.042724609375, + "learning_rate": 1.9771967525830454e-06, + "loss": 24.5781, + "step": 10149 + }, + { + "epoch": 0.09608012040779622, + "grad_norm": 188.8480987548828, + "learning_rate": 1.9771902422556505e-06, + "loss": 20.9219, + "step": 10150 + }, + { + "epoch": 0.09608958642951126, + "grad_norm": 927.0878295898438, + "learning_rate": 1.97718373100976e-06, + "loss": 42.8281, + "step": 10151 + }, + { + "epoch": 0.09609905245122632, + "grad_norm": 438.2912292480469, + "learning_rate": 1.977177218845381e-06, + "loss": 27.7773, + "step": 10152 + }, + { + "epoch": 0.09610851847294137, + "grad_norm": 512.8429565429688, + "learning_rate": 1.9771707057625188e-06, + "loss": 34.2969, + "step": 10153 + }, + { + "epoch": 0.09611798449465643, + "grad_norm": 451.39404296875, + "learning_rate": 1.9771641917611795e-06, + "loss": 19.0625, + "step": 10154 + }, + { + "epoch": 0.09612745051637149, + "grad_norm": 781.2681884765625, + "learning_rate": 1.9771576768413698e-06, + "loss": 26.2969, + "step": 10155 + }, + { + "epoch": 0.09613691653808654, + "grad_norm": 154.34764099121094, + "learning_rate": 1.977151161003095e-06, + "loss": 18.9141, + "step": 10156 + }, + { + "epoch": 0.0961463825598016, + "grad_norm": 332.841064453125, + "learning_rate": 1.977144644246362e-06, + "loss": 29.3047, + "step": 10157 + }, + { + "epoch": 0.09615584858151664, + "grad_norm": 418.6829528808594, + "learning_rate": 1.9771381265711765e-06, + "loss": 38.2031, + "step": 10158 + }, + { + "epoch": 0.0961653146032317, + "grad_norm": 262.9169006347656, + "learning_rate": 1.9771316079775447e-06, + "loss": 26.1641, + "step": 10159 + }, + { + "epoch": 0.09617478062494675, + "grad_norm": 665.6204833984375, + "learning_rate": 1.9771250884654726e-06, + "loss": 40.3281, + "step": 10160 + }, + { + "epoch": 0.09618424664666181, + "grad_norm": 266.84619140625, + "learning_rate": 1.9771185680349665e-06, + "loss": 24.0781, + "step": 10161 + }, + { + "epoch": 0.09619371266837685, + "grad_norm": 659.6334838867188, + "learning_rate": 1.977112046686033e-06, + "loss": 56.0469, + "step": 10162 + }, + { + "epoch": 0.09620317869009191, + "grad_norm": 888.281005859375, + "learning_rate": 1.977105524418677e-06, + "loss": 13.8711, + "step": 10163 + }, + { + "epoch": 0.09621264471180697, + "grad_norm": 513.14599609375, + "learning_rate": 1.977099001232906e-06, + "loss": 30.5938, + "step": 10164 + }, + { + "epoch": 0.09622211073352202, + "grad_norm": 695.2803955078125, + "learning_rate": 1.977092477128725e-06, + "loss": 55.4297, + "step": 10165 + }, + { + "epoch": 0.09623157675523708, + "grad_norm": 579.484619140625, + "learning_rate": 1.9770859521061412e-06, + "loss": 54.5, + "step": 10166 + }, + { + "epoch": 0.09624104277695213, + "grad_norm": 600.60400390625, + "learning_rate": 1.9770794261651598e-06, + "loss": 68.9688, + "step": 10167 + }, + { + "epoch": 0.09625050879866719, + "grad_norm": 977.4208374023438, + "learning_rate": 1.9770728993057875e-06, + "loss": 18.6172, + "step": 10168 + }, + { + "epoch": 0.09625997482038223, + "grad_norm": 167.001220703125, + "learning_rate": 1.97706637152803e-06, + "loss": 21.4844, + "step": 10169 + }, + { + "epoch": 0.09626944084209729, + "grad_norm": 596.1156005859375, + "learning_rate": 1.977059842831894e-06, + "loss": 50.5312, + "step": 10170 + }, + { + "epoch": 0.09627890686381235, + "grad_norm": 546.8007202148438, + "learning_rate": 1.977053313217385e-06, + "loss": 23.9297, + "step": 10171 + }, + { + "epoch": 0.0962883728855274, + "grad_norm": 371.7882995605469, + "learning_rate": 1.9770467826845093e-06, + "loss": 16.5547, + "step": 10172 + }, + { + "epoch": 0.09629783890724246, + "grad_norm": 452.8056335449219, + "learning_rate": 1.9770402512332736e-06, + "loss": 29.8594, + "step": 10173 + }, + { + "epoch": 0.0963073049289575, + "grad_norm": 3.31783390045166, + "learning_rate": 1.9770337188636835e-06, + "loss": 0.916, + "step": 10174 + }, + { + "epoch": 0.09631677095067256, + "grad_norm": 328.3194580078125, + "learning_rate": 1.977027185575745e-06, + "loss": 28.4297, + "step": 10175 + }, + { + "epoch": 0.09632623697238761, + "grad_norm": 928.585693359375, + "learning_rate": 1.977020651369465e-06, + "loss": 59.4922, + "step": 10176 + }, + { + "epoch": 0.09633570299410267, + "grad_norm": 408.3916931152344, + "learning_rate": 1.9770141162448487e-06, + "loss": 51.5938, + "step": 10177 + }, + { + "epoch": 0.09634516901581772, + "grad_norm": 564.3809814453125, + "learning_rate": 1.977007580201903e-06, + "loss": 25.9609, + "step": 10178 + }, + { + "epoch": 0.09635463503753278, + "grad_norm": 1496.8934326171875, + "learning_rate": 1.9770010432406335e-06, + "loss": 40.6875, + "step": 10179 + }, + { + "epoch": 0.09636410105924784, + "grad_norm": 554.2822875976562, + "learning_rate": 1.9769945053610467e-06, + "loss": 17.8008, + "step": 10180 + }, + { + "epoch": 0.09637356708096288, + "grad_norm": 236.53863525390625, + "learning_rate": 1.9769879665631482e-06, + "loss": 27.3125, + "step": 10181 + }, + { + "epoch": 0.09638303310267794, + "grad_norm": 514.442138671875, + "learning_rate": 1.9769814268469453e-06, + "loss": 41.2344, + "step": 10182 + }, + { + "epoch": 0.09639249912439299, + "grad_norm": 940.480712890625, + "learning_rate": 1.976974886212443e-06, + "loss": 17.9727, + "step": 10183 + }, + { + "epoch": 0.09640196514610805, + "grad_norm": 244.68309020996094, + "learning_rate": 1.9769683446596483e-06, + "loss": 22.082, + "step": 10184 + }, + { + "epoch": 0.0964114311678231, + "grad_norm": 497.4513854980469, + "learning_rate": 1.9769618021885665e-06, + "loss": 30.2891, + "step": 10185 + }, + { + "epoch": 0.09642089718953815, + "grad_norm": 301.9384460449219, + "learning_rate": 1.976955258799204e-06, + "loss": 20.8125, + "step": 10186 + }, + { + "epoch": 0.0964303632112532, + "grad_norm": 385.69873046875, + "learning_rate": 1.9769487144915675e-06, + "loss": 20.7266, + "step": 10187 + }, + { + "epoch": 0.09643982923296826, + "grad_norm": 385.4007873535156, + "learning_rate": 1.9769421692656626e-06, + "loss": 26.2031, + "step": 10188 + }, + { + "epoch": 0.09644929525468332, + "grad_norm": 624.696044921875, + "learning_rate": 1.9769356231214953e-06, + "loss": 49.0625, + "step": 10189 + }, + { + "epoch": 0.09645876127639837, + "grad_norm": 453.164794921875, + "learning_rate": 1.9769290760590726e-06, + "loss": 44.2656, + "step": 10190 + }, + { + "epoch": 0.09646822729811343, + "grad_norm": 300.3827819824219, + "learning_rate": 1.9769225280783997e-06, + "loss": 17.9102, + "step": 10191 + }, + { + "epoch": 0.09647769331982847, + "grad_norm": 2059.104736328125, + "learning_rate": 1.9769159791794834e-06, + "loss": 67.7891, + "step": 10192 + }, + { + "epoch": 0.09648715934154353, + "grad_norm": 189.70501708984375, + "learning_rate": 1.9769094293623297e-06, + "loss": 21.6719, + "step": 10193 + }, + { + "epoch": 0.09649662536325858, + "grad_norm": 492.15753173828125, + "learning_rate": 1.9769028786269443e-06, + "loss": 42.125, + "step": 10194 + }, + { + "epoch": 0.09650609138497364, + "grad_norm": 248.82940673828125, + "learning_rate": 1.976896326973334e-06, + "loss": 30.3438, + "step": 10195 + }, + { + "epoch": 0.09651555740668868, + "grad_norm": 330.384033203125, + "learning_rate": 1.976889774401505e-06, + "loss": 21.5234, + "step": 10196 + }, + { + "epoch": 0.09652502342840374, + "grad_norm": 222.09324645996094, + "learning_rate": 1.9768832209114627e-06, + "loss": 13.9453, + "step": 10197 + }, + { + "epoch": 0.0965344894501188, + "grad_norm": 250.0201873779297, + "learning_rate": 1.976876666503214e-06, + "loss": 30.8594, + "step": 10198 + }, + { + "epoch": 0.09654395547183385, + "grad_norm": 1146.67138671875, + "learning_rate": 1.976870111176765e-06, + "loss": 44.8555, + "step": 10199 + }, + { + "epoch": 0.09655342149354891, + "grad_norm": 2.9639101028442383, + "learning_rate": 1.976863554932121e-06, + "loss": 0.9492, + "step": 10200 + }, + { + "epoch": 0.09656288751526396, + "grad_norm": 450.9801940917969, + "learning_rate": 1.976856997769289e-06, + "loss": 43.5469, + "step": 10201 + }, + { + "epoch": 0.09657235353697902, + "grad_norm": 315.6958312988281, + "learning_rate": 1.9768504396882752e-06, + "loss": 55.3594, + "step": 10202 + }, + { + "epoch": 0.09658181955869406, + "grad_norm": 370.0879821777344, + "learning_rate": 1.9768438806890857e-06, + "loss": 26.0312, + "step": 10203 + }, + { + "epoch": 0.09659128558040912, + "grad_norm": 899.25830078125, + "learning_rate": 1.9768373207717263e-06, + "loss": 57.8047, + "step": 10204 + }, + { + "epoch": 0.09660075160212417, + "grad_norm": 355.3663024902344, + "learning_rate": 1.9768307599362032e-06, + "loss": 17.6953, + "step": 10205 + }, + { + "epoch": 0.09661021762383923, + "grad_norm": 213.57276916503906, + "learning_rate": 1.976824198182523e-06, + "loss": 23.8594, + "step": 10206 + }, + { + "epoch": 0.09661968364555429, + "grad_norm": 432.8179016113281, + "learning_rate": 1.9768176355106914e-06, + "loss": 21.8438, + "step": 10207 + }, + { + "epoch": 0.09662914966726933, + "grad_norm": 543.3475341796875, + "learning_rate": 1.9768110719207145e-06, + "loss": 26.3984, + "step": 10208 + }, + { + "epoch": 0.0966386156889844, + "grad_norm": 215.85740661621094, + "learning_rate": 1.9768045074125993e-06, + "loss": 22.3125, + "step": 10209 + }, + { + "epoch": 0.09664808171069944, + "grad_norm": 636.7998657226562, + "learning_rate": 1.9767979419863516e-06, + "loss": 55.375, + "step": 10210 + }, + { + "epoch": 0.0966575477324145, + "grad_norm": 197.2063446044922, + "learning_rate": 1.9767913756419765e-06, + "loss": 22.4219, + "step": 10211 + }, + { + "epoch": 0.09666701375412955, + "grad_norm": 323.0161437988281, + "learning_rate": 1.976784808379482e-06, + "loss": 46.1562, + "step": 10212 + }, + { + "epoch": 0.0966764797758446, + "grad_norm": 634.7105102539062, + "learning_rate": 1.9767782401988724e-06, + "loss": 35.4375, + "step": 10213 + }, + { + "epoch": 0.09668594579755967, + "grad_norm": 880.4744873046875, + "learning_rate": 1.976771671100155e-06, + "loss": 84.9688, + "step": 10214 + }, + { + "epoch": 0.09669541181927471, + "grad_norm": 297.1836242675781, + "learning_rate": 1.9767651010833364e-06, + "loss": 19.5547, + "step": 10215 + }, + { + "epoch": 0.09670487784098977, + "grad_norm": 396.7928771972656, + "learning_rate": 1.9767585301484218e-06, + "loss": 30.4375, + "step": 10216 + }, + { + "epoch": 0.09671434386270482, + "grad_norm": 158.03799438476562, + "learning_rate": 1.9767519582954174e-06, + "loss": 21.125, + "step": 10217 + }, + { + "epoch": 0.09672380988441988, + "grad_norm": 753.822509765625, + "learning_rate": 1.97674538552433e-06, + "loss": 54.4922, + "step": 10218 + }, + { + "epoch": 0.09673327590613492, + "grad_norm": 265.46746826171875, + "learning_rate": 1.9767388118351655e-06, + "loss": 9.7578, + "step": 10219 + }, + { + "epoch": 0.09674274192784998, + "grad_norm": 768.5626220703125, + "learning_rate": 1.9767322372279302e-06, + "loss": 43.2812, + "step": 10220 + }, + { + "epoch": 0.09675220794956503, + "grad_norm": 1312.8638916015625, + "learning_rate": 1.97672566170263e-06, + "loss": 58.125, + "step": 10221 + }, + { + "epoch": 0.09676167397128009, + "grad_norm": 322.19110107421875, + "learning_rate": 1.976719085259271e-06, + "loss": 21.375, + "step": 10222 + }, + { + "epoch": 0.09677113999299515, + "grad_norm": 285.6373291015625, + "learning_rate": 1.97671250789786e-06, + "loss": 18.0781, + "step": 10223 + }, + { + "epoch": 0.0967806060147102, + "grad_norm": 3.025853395462036, + "learning_rate": 1.9767059296184025e-06, + "loss": 0.8579, + "step": 10224 + }, + { + "epoch": 0.09679007203642526, + "grad_norm": 302.6317443847656, + "learning_rate": 1.9766993504209047e-06, + "loss": 25.9531, + "step": 10225 + }, + { + "epoch": 0.0967995380581403, + "grad_norm": 735.88916015625, + "learning_rate": 1.9766927703053735e-06, + "loss": 51.5938, + "step": 10226 + }, + { + "epoch": 0.09680900407985536, + "grad_norm": 315.39599609375, + "learning_rate": 1.9766861892718144e-06, + "loss": 21.2109, + "step": 10227 + }, + { + "epoch": 0.09681847010157041, + "grad_norm": 284.0198059082031, + "learning_rate": 1.9766796073202342e-06, + "loss": 24.7891, + "step": 10228 + }, + { + "epoch": 0.09682793612328547, + "grad_norm": 455.1190185546875, + "learning_rate": 1.976673024450638e-06, + "loss": 27.9688, + "step": 10229 + }, + { + "epoch": 0.09683740214500051, + "grad_norm": 635.066650390625, + "learning_rate": 1.9766664406630335e-06, + "loss": 33.0547, + "step": 10230 + }, + { + "epoch": 0.09684686816671557, + "grad_norm": 3.0402534008026123, + "learning_rate": 1.9766598559574253e-06, + "loss": 0.7998, + "step": 10231 + }, + { + "epoch": 0.09685633418843063, + "grad_norm": 720.6796264648438, + "learning_rate": 1.9766532703338214e-06, + "loss": 38.9844, + "step": 10232 + }, + { + "epoch": 0.09686580021014568, + "grad_norm": 308.8890075683594, + "learning_rate": 1.976646683792226e-06, + "loss": 23.0156, + "step": 10233 + }, + { + "epoch": 0.09687526623186074, + "grad_norm": 295.5926513671875, + "learning_rate": 1.976640096332647e-06, + "loss": 26.4688, + "step": 10234 + }, + { + "epoch": 0.09688473225357579, + "grad_norm": 264.51055908203125, + "learning_rate": 1.976633507955089e-06, + "loss": 22.0781, + "step": 10235 + }, + { + "epoch": 0.09689419827529085, + "grad_norm": 253.84999084472656, + "learning_rate": 1.9766269186595594e-06, + "loss": 21.9453, + "step": 10236 + }, + { + "epoch": 0.09690366429700589, + "grad_norm": 609.7420654296875, + "learning_rate": 1.976620328446064e-06, + "loss": 29.0781, + "step": 10237 + }, + { + "epoch": 0.09691313031872095, + "grad_norm": 299.04571533203125, + "learning_rate": 1.9766137373146092e-06, + "loss": 20.3906, + "step": 10238 + }, + { + "epoch": 0.096922596340436, + "grad_norm": 671.076416015625, + "learning_rate": 1.976607145265201e-06, + "loss": 30.8906, + "step": 10239 + }, + { + "epoch": 0.09693206236215106, + "grad_norm": 485.3672790527344, + "learning_rate": 1.9766005522978453e-06, + "loss": 44.7812, + "step": 10240 + }, + { + "epoch": 0.09694152838386612, + "grad_norm": 179.59483337402344, + "learning_rate": 1.9765939584125493e-06, + "loss": 25.2266, + "step": 10241 + }, + { + "epoch": 0.09695099440558116, + "grad_norm": 456.4469299316406, + "learning_rate": 1.976587363609318e-06, + "loss": 46.5469, + "step": 10242 + }, + { + "epoch": 0.09696046042729622, + "grad_norm": 442.0279541015625, + "learning_rate": 1.976580767888158e-06, + "loss": 26.0078, + "step": 10243 + }, + { + "epoch": 0.09696992644901127, + "grad_norm": 351.6238098144531, + "learning_rate": 1.976574171249076e-06, + "loss": 11.3984, + "step": 10244 + }, + { + "epoch": 0.09697939247072633, + "grad_norm": 670.4993286132812, + "learning_rate": 1.9765675736920774e-06, + "loss": 47.8672, + "step": 10245 + }, + { + "epoch": 0.09698885849244138, + "grad_norm": 361.2619323730469, + "learning_rate": 1.9765609752171693e-06, + "loss": 40.8984, + "step": 10246 + }, + { + "epoch": 0.09699832451415644, + "grad_norm": 438.58123779296875, + "learning_rate": 1.976554375824357e-06, + "loss": 43.1797, + "step": 10247 + }, + { + "epoch": 0.09700779053587148, + "grad_norm": 219.15370178222656, + "learning_rate": 1.9765477755136474e-06, + "loss": 24.2656, + "step": 10248 + }, + { + "epoch": 0.09701725655758654, + "grad_norm": 1067.553466796875, + "learning_rate": 1.976541174285046e-06, + "loss": 42.8516, + "step": 10249 + }, + { + "epoch": 0.0970267225793016, + "grad_norm": 360.8633117675781, + "learning_rate": 1.97653457213856e-06, + "loss": 26.3047, + "step": 10250 + }, + { + "epoch": 0.09703618860101665, + "grad_norm": 1043.8182373046875, + "learning_rate": 1.9765279690741943e-06, + "loss": 61.0781, + "step": 10251 + }, + { + "epoch": 0.09704565462273171, + "grad_norm": 427.0685729980469, + "learning_rate": 1.9765213650919564e-06, + "loss": 48.4844, + "step": 10252 + }, + { + "epoch": 0.09705512064444675, + "grad_norm": 155.36795043945312, + "learning_rate": 1.976514760191852e-06, + "loss": 17.9297, + "step": 10253 + }, + { + "epoch": 0.09706458666616181, + "grad_norm": 284.4605407714844, + "learning_rate": 1.976508154373887e-06, + "loss": 24.7031, + "step": 10254 + }, + { + "epoch": 0.09707405268787686, + "grad_norm": 490.9344177246094, + "learning_rate": 1.976501547638068e-06, + "loss": 27.1719, + "step": 10255 + }, + { + "epoch": 0.09708351870959192, + "grad_norm": 225.09307861328125, + "learning_rate": 1.976494939984401e-06, + "loss": 24.25, + "step": 10256 + }, + { + "epoch": 0.09709298473130698, + "grad_norm": 352.8570861816406, + "learning_rate": 1.9764883314128923e-06, + "loss": 33.9688, + "step": 10257 + }, + { + "epoch": 0.09710245075302203, + "grad_norm": 556.2406005859375, + "learning_rate": 1.976481721923548e-06, + "loss": 42.2812, + "step": 10258 + }, + { + "epoch": 0.09711191677473709, + "grad_norm": 186.9109344482422, + "learning_rate": 1.9764751115163747e-06, + "loss": 23.9531, + "step": 10259 + }, + { + "epoch": 0.09712138279645213, + "grad_norm": 997.3425903320312, + "learning_rate": 1.9764685001913775e-06, + "loss": 18.3984, + "step": 10260 + }, + { + "epoch": 0.09713084881816719, + "grad_norm": 274.87506103515625, + "learning_rate": 1.9764618879485642e-06, + "loss": 19.1641, + "step": 10261 + }, + { + "epoch": 0.09714031483988224, + "grad_norm": 227.7298126220703, + "learning_rate": 1.9764552747879403e-06, + "loss": 21.5, + "step": 10262 + }, + { + "epoch": 0.0971497808615973, + "grad_norm": 393.2655029296875, + "learning_rate": 1.976448660709512e-06, + "loss": 41.1875, + "step": 10263 + }, + { + "epoch": 0.09715924688331234, + "grad_norm": 454.8700256347656, + "learning_rate": 1.976442045713285e-06, + "loss": 28.918, + "step": 10264 + }, + { + "epoch": 0.0971687129050274, + "grad_norm": 342.0788269042969, + "learning_rate": 1.976435429799266e-06, + "loss": 29.3359, + "step": 10265 + }, + { + "epoch": 0.09717817892674246, + "grad_norm": 847.6720581054688, + "learning_rate": 1.9764288129674617e-06, + "loss": 33.3438, + "step": 10266 + }, + { + "epoch": 0.09718764494845751, + "grad_norm": 2.8608405590057373, + "learning_rate": 1.976422195217877e-06, + "loss": 0.8462, + "step": 10267 + }, + { + "epoch": 0.09719711097017257, + "grad_norm": 207.92356872558594, + "learning_rate": 1.9764155765505198e-06, + "loss": 19.2656, + "step": 10268 + }, + { + "epoch": 0.09720657699188762, + "grad_norm": 577.9781494140625, + "learning_rate": 1.9764089569653953e-06, + "loss": 43.6094, + "step": 10269 + }, + { + "epoch": 0.09721604301360268, + "grad_norm": 288.1922912597656, + "learning_rate": 1.97640233646251e-06, + "loss": 20.375, + "step": 10270 + }, + { + "epoch": 0.09722550903531772, + "grad_norm": 547.9834594726562, + "learning_rate": 1.9763957150418697e-06, + "loss": 50.0234, + "step": 10271 + }, + { + "epoch": 0.09723497505703278, + "grad_norm": 281.6842956542969, + "learning_rate": 1.976389092703481e-06, + "loss": 22.1875, + "step": 10272 + }, + { + "epoch": 0.09724444107874783, + "grad_norm": 1090.2130126953125, + "learning_rate": 1.97638246944735e-06, + "loss": 58.9062, + "step": 10273 + }, + { + "epoch": 0.09725390710046289, + "grad_norm": 534.2918701171875, + "learning_rate": 1.976375845273483e-06, + "loss": 51.6406, + "step": 10274 + }, + { + "epoch": 0.09726337312217795, + "grad_norm": 450.3020324707031, + "learning_rate": 1.9763692201818868e-06, + "loss": 31.1875, + "step": 10275 + }, + { + "epoch": 0.097272839143893, + "grad_norm": 375.4912109375, + "learning_rate": 1.9763625941725667e-06, + "loss": 10.3828, + "step": 10276 + }, + { + "epoch": 0.09728230516560805, + "grad_norm": 418.9878845214844, + "learning_rate": 1.976355967245529e-06, + "loss": 20.0391, + "step": 10277 + }, + { + "epoch": 0.0972917711873231, + "grad_norm": 980.3253173828125, + "learning_rate": 1.9763493394007804e-06, + "loss": 62.9219, + "step": 10278 + }, + { + "epoch": 0.09730123720903816, + "grad_norm": 446.166259765625, + "learning_rate": 1.976342710638327e-06, + "loss": 18.5195, + "step": 10279 + }, + { + "epoch": 0.0973107032307532, + "grad_norm": 375.0148010253906, + "learning_rate": 1.9763360809581747e-06, + "loss": 21.0938, + "step": 10280 + }, + { + "epoch": 0.09732016925246827, + "grad_norm": 429.34149169921875, + "learning_rate": 1.9763294503603303e-06, + "loss": 41.4062, + "step": 10281 + }, + { + "epoch": 0.09732963527418331, + "grad_norm": 176.91094970703125, + "learning_rate": 1.9763228188447998e-06, + "loss": 18.8047, + "step": 10282 + }, + { + "epoch": 0.09733910129589837, + "grad_norm": 297.7659912109375, + "learning_rate": 1.976316186411589e-06, + "loss": 22.5859, + "step": 10283 + }, + { + "epoch": 0.09734856731761343, + "grad_norm": 1132.12060546875, + "learning_rate": 1.9763095530607046e-06, + "loss": 53.25, + "step": 10284 + }, + { + "epoch": 0.09735803333932848, + "grad_norm": 677.9464111328125, + "learning_rate": 1.976302918792153e-06, + "loss": 47.8516, + "step": 10285 + }, + { + "epoch": 0.09736749936104354, + "grad_norm": 191.21070861816406, + "learning_rate": 1.97629628360594e-06, + "loss": 18.6758, + "step": 10286 + }, + { + "epoch": 0.09737696538275858, + "grad_norm": 823.8433837890625, + "learning_rate": 1.976289647502072e-06, + "loss": 38.0469, + "step": 10287 + }, + { + "epoch": 0.09738643140447364, + "grad_norm": 199.12391662597656, + "learning_rate": 1.976283010480555e-06, + "loss": 24.7734, + "step": 10288 + }, + { + "epoch": 0.09739589742618869, + "grad_norm": 239.36695861816406, + "learning_rate": 1.976276372541396e-06, + "loss": 29.3906, + "step": 10289 + }, + { + "epoch": 0.09740536344790375, + "grad_norm": 464.7518615722656, + "learning_rate": 1.9762697336846002e-06, + "loss": 49.668, + "step": 10290 + }, + { + "epoch": 0.0974148294696188, + "grad_norm": 304.86895751953125, + "learning_rate": 1.976263093910175e-06, + "loss": 19.7188, + "step": 10291 + }, + { + "epoch": 0.09742429549133386, + "grad_norm": 578.18603515625, + "learning_rate": 1.9762564532181253e-06, + "loss": 19.2422, + "step": 10292 + }, + { + "epoch": 0.09743376151304892, + "grad_norm": 548.3297119140625, + "learning_rate": 1.9762498116084583e-06, + "loss": 47.8906, + "step": 10293 + }, + { + "epoch": 0.09744322753476396, + "grad_norm": 195.54144287109375, + "learning_rate": 1.97624316908118e-06, + "loss": 21.9844, + "step": 10294 + }, + { + "epoch": 0.09745269355647902, + "grad_norm": 244.40379333496094, + "learning_rate": 1.9762365256362967e-06, + "loss": 14.1523, + "step": 10295 + }, + { + "epoch": 0.09746215957819407, + "grad_norm": 417.9362487792969, + "learning_rate": 1.976229881273815e-06, + "loss": 62.0469, + "step": 10296 + }, + { + "epoch": 0.09747162559990913, + "grad_norm": 368.09381103515625, + "learning_rate": 1.9762232359937403e-06, + "loss": 36.0977, + "step": 10297 + }, + { + "epoch": 0.09748109162162417, + "grad_norm": 270.1180419921875, + "learning_rate": 1.976216589796079e-06, + "loss": 14.8477, + "step": 10298 + }, + { + "epoch": 0.09749055764333923, + "grad_norm": 175.67605590820312, + "learning_rate": 1.9762099426808376e-06, + "loss": 23.8281, + "step": 10299 + }, + { + "epoch": 0.0975000236650543, + "grad_norm": 508.4403381347656, + "learning_rate": 1.9762032946480227e-06, + "loss": 31.3438, + "step": 10300 + }, + { + "epoch": 0.09750948968676934, + "grad_norm": 207.54005432128906, + "learning_rate": 1.97619664569764e-06, + "loss": 21.4219, + "step": 10301 + }, + { + "epoch": 0.0975189557084844, + "grad_norm": 863.822998046875, + "learning_rate": 1.9761899958296964e-06, + "loss": 10.3906, + "step": 10302 + }, + { + "epoch": 0.09752842173019945, + "grad_norm": 434.83148193359375, + "learning_rate": 1.976183345044197e-06, + "loss": 22.3906, + "step": 10303 + }, + { + "epoch": 0.0975378877519145, + "grad_norm": 1844.4630126953125, + "learning_rate": 1.9761766933411496e-06, + "loss": 26.9414, + "step": 10304 + }, + { + "epoch": 0.09754735377362955, + "grad_norm": 1036.388916015625, + "learning_rate": 1.976170040720559e-06, + "loss": 71.8125, + "step": 10305 + }, + { + "epoch": 0.09755681979534461, + "grad_norm": 3.502329111099243, + "learning_rate": 1.976163387182432e-06, + "loss": 0.8857, + "step": 10306 + }, + { + "epoch": 0.09756628581705966, + "grad_norm": 207.48321533203125, + "learning_rate": 1.976156732726775e-06, + "loss": 19.8438, + "step": 10307 + }, + { + "epoch": 0.09757575183877472, + "grad_norm": 547.0567016601562, + "learning_rate": 1.9761500773535946e-06, + "loss": 40.5, + "step": 10308 + }, + { + "epoch": 0.09758521786048978, + "grad_norm": 456.09967041015625, + "learning_rate": 1.9761434210628963e-06, + "loss": 21.8828, + "step": 10309 + }, + { + "epoch": 0.09759468388220482, + "grad_norm": 709.7697143554688, + "learning_rate": 1.976136763854687e-06, + "loss": 62.6875, + "step": 10310 + }, + { + "epoch": 0.09760414990391988, + "grad_norm": 300.9504699707031, + "learning_rate": 1.976130105728972e-06, + "loss": 23.2383, + "step": 10311 + }, + { + "epoch": 0.09761361592563493, + "grad_norm": 534.18212890625, + "learning_rate": 1.976123446685759e-06, + "loss": 46.7344, + "step": 10312 + }, + { + "epoch": 0.09762308194734999, + "grad_norm": 442.36505126953125, + "learning_rate": 1.9761167867250527e-06, + "loss": 46.3281, + "step": 10313 + }, + { + "epoch": 0.09763254796906504, + "grad_norm": 3.2587411403656006, + "learning_rate": 1.9761101258468604e-06, + "loss": 0.9272, + "step": 10314 + }, + { + "epoch": 0.0976420139907801, + "grad_norm": 414.86376953125, + "learning_rate": 1.976103464051188e-06, + "loss": 17.1953, + "step": 10315 + }, + { + "epoch": 0.09765148001249514, + "grad_norm": 221.00144958496094, + "learning_rate": 1.976096801338042e-06, + "loss": 25.7188, + "step": 10316 + }, + { + "epoch": 0.0976609460342102, + "grad_norm": 452.2099914550781, + "learning_rate": 1.9760901377074285e-06, + "loss": 16.5, + "step": 10317 + }, + { + "epoch": 0.09767041205592526, + "grad_norm": 457.12786865234375, + "learning_rate": 1.9760834731593537e-06, + "loss": 13.4375, + "step": 10318 + }, + { + "epoch": 0.09767987807764031, + "grad_norm": 502.55218505859375, + "learning_rate": 1.976076807693824e-06, + "loss": 46.2188, + "step": 10319 + }, + { + "epoch": 0.09768934409935537, + "grad_norm": 1163.345458984375, + "learning_rate": 1.976070141310846e-06, + "loss": 40.3672, + "step": 10320 + }, + { + "epoch": 0.09769881012107041, + "grad_norm": 472.6195983886719, + "learning_rate": 1.976063474010425e-06, + "loss": 25.2188, + "step": 10321 + }, + { + "epoch": 0.09770827614278547, + "grad_norm": 3.055809736251831, + "learning_rate": 1.976056805792568e-06, + "loss": 0.9045, + "step": 10322 + }, + { + "epoch": 0.09771774216450052, + "grad_norm": 346.5358581542969, + "learning_rate": 1.976050136657281e-06, + "loss": 23.0781, + "step": 10323 + }, + { + "epoch": 0.09772720818621558, + "grad_norm": 332.6968688964844, + "learning_rate": 1.9760434666045707e-06, + "loss": 23.7266, + "step": 10324 + }, + { + "epoch": 0.09773667420793063, + "grad_norm": 471.40740966796875, + "learning_rate": 1.9760367956344425e-06, + "loss": 60.1875, + "step": 10325 + }, + { + "epoch": 0.09774614022964569, + "grad_norm": 502.49267578125, + "learning_rate": 1.9760301237469034e-06, + "loss": 46.4688, + "step": 10326 + }, + { + "epoch": 0.09775560625136075, + "grad_norm": 189.1688690185547, + "learning_rate": 1.97602345094196e-06, + "loss": 18.1641, + "step": 10327 + }, + { + "epoch": 0.09776507227307579, + "grad_norm": 711.5901489257812, + "learning_rate": 1.9760167772196174e-06, + "loss": 40.9844, + "step": 10328 + }, + { + "epoch": 0.09777453829479085, + "grad_norm": 505.90411376953125, + "learning_rate": 1.976010102579883e-06, + "loss": 44.2656, + "step": 10329 + }, + { + "epoch": 0.0977840043165059, + "grad_norm": 157.70034790039062, + "learning_rate": 1.976003427022762e-06, + "loss": 16.5586, + "step": 10330 + }, + { + "epoch": 0.09779347033822096, + "grad_norm": 310.81268310546875, + "learning_rate": 1.9759967505482616e-06, + "loss": 24.7188, + "step": 10331 + }, + { + "epoch": 0.097802936359936, + "grad_norm": 372.4911804199219, + "learning_rate": 1.975990073156388e-06, + "loss": 40.6406, + "step": 10332 + }, + { + "epoch": 0.09781240238165106, + "grad_norm": 384.564208984375, + "learning_rate": 1.9759833948471468e-06, + "loss": 19.543, + "step": 10333 + }, + { + "epoch": 0.09782186840336611, + "grad_norm": 277.5809631347656, + "learning_rate": 1.975976715620545e-06, + "loss": 30.6172, + "step": 10334 + }, + { + "epoch": 0.09783133442508117, + "grad_norm": 2.8912665843963623, + "learning_rate": 1.9759700354765886e-06, + "loss": 0.855, + "step": 10335 + }, + { + "epoch": 0.09784080044679623, + "grad_norm": 955.6618041992188, + "learning_rate": 1.975963354415284e-06, + "loss": 59.0312, + "step": 10336 + }, + { + "epoch": 0.09785026646851128, + "grad_norm": 395.9432678222656, + "learning_rate": 1.9759566724366363e-06, + "loss": 33.8984, + "step": 10337 + }, + { + "epoch": 0.09785973249022634, + "grad_norm": 367.2672119140625, + "learning_rate": 1.975949989540654e-06, + "loss": 34.5625, + "step": 10338 + }, + { + "epoch": 0.09786919851194138, + "grad_norm": 535.1878051757812, + "learning_rate": 1.975943305727342e-06, + "loss": 36.9688, + "step": 10339 + }, + { + "epoch": 0.09787866453365644, + "grad_norm": 345.7509460449219, + "learning_rate": 1.9759366209967065e-06, + "loss": 19.0469, + "step": 10340 + }, + { + "epoch": 0.09788813055537149, + "grad_norm": 283.3561706542969, + "learning_rate": 1.975929935348754e-06, + "loss": 26.3008, + "step": 10341 + }, + { + "epoch": 0.09789759657708655, + "grad_norm": 365.5733642578125, + "learning_rate": 1.975923248783491e-06, + "loss": 51.1406, + "step": 10342 + }, + { + "epoch": 0.09790706259880161, + "grad_norm": 788.6690673828125, + "learning_rate": 1.9759165613009235e-06, + "loss": 54.4141, + "step": 10343 + }, + { + "epoch": 0.09791652862051665, + "grad_norm": 494.7263488769531, + "learning_rate": 1.9759098729010582e-06, + "loss": 54.2188, + "step": 10344 + }, + { + "epoch": 0.09792599464223171, + "grad_norm": 1290.26904296875, + "learning_rate": 1.9759031835839007e-06, + "loss": 41.4219, + "step": 10345 + }, + { + "epoch": 0.09793546066394676, + "grad_norm": 1141.926513671875, + "learning_rate": 1.975896493349458e-06, + "loss": 37.0156, + "step": 10346 + }, + { + "epoch": 0.09794492668566182, + "grad_norm": 498.5381164550781, + "learning_rate": 1.975889802197736e-06, + "loss": 50.0781, + "step": 10347 + }, + { + "epoch": 0.09795439270737687, + "grad_norm": 424.741943359375, + "learning_rate": 1.9758831101287413e-06, + "loss": 48.7969, + "step": 10348 + }, + { + "epoch": 0.09796385872909193, + "grad_norm": 503.9120178222656, + "learning_rate": 1.9758764171424797e-06, + "loss": 34.6094, + "step": 10349 + }, + { + "epoch": 0.09797332475080697, + "grad_norm": 591.324462890625, + "learning_rate": 1.9758697232389577e-06, + "loss": 15.7344, + "step": 10350 + }, + { + "epoch": 0.09798279077252203, + "grad_norm": 630.6304321289062, + "learning_rate": 1.975863028418182e-06, + "loss": 49.3125, + "step": 10351 + }, + { + "epoch": 0.09799225679423709, + "grad_norm": 793.0771484375, + "learning_rate": 1.975856332680158e-06, + "loss": 43.2227, + "step": 10352 + }, + { + "epoch": 0.09800172281595214, + "grad_norm": 189.85208129882812, + "learning_rate": 1.975849636024893e-06, + "loss": 17.3398, + "step": 10353 + }, + { + "epoch": 0.0980111888376672, + "grad_norm": 431.83062744140625, + "learning_rate": 1.9758429384523926e-06, + "loss": 27.9531, + "step": 10354 + }, + { + "epoch": 0.09802065485938224, + "grad_norm": 495.9781494140625, + "learning_rate": 1.975836239962663e-06, + "loss": 24.3281, + "step": 10355 + }, + { + "epoch": 0.0980301208810973, + "grad_norm": 3.032191276550293, + "learning_rate": 1.9758295405557114e-06, + "loss": 0.9224, + "step": 10356 + }, + { + "epoch": 0.09803958690281235, + "grad_norm": 1230.7496337890625, + "learning_rate": 1.9758228402315433e-06, + "loss": 31.8516, + "step": 10357 + }, + { + "epoch": 0.09804905292452741, + "grad_norm": 3.1814911365509033, + "learning_rate": 1.9758161389901655e-06, + "loss": 0.9316, + "step": 10358 + }, + { + "epoch": 0.09805851894624246, + "grad_norm": 272.021240234375, + "learning_rate": 1.9758094368315836e-06, + "loss": 19.8516, + "step": 10359 + }, + { + "epoch": 0.09806798496795752, + "grad_norm": 1309.969970703125, + "learning_rate": 1.9758027337558044e-06, + "loss": 63.3438, + "step": 10360 + }, + { + "epoch": 0.09807745098967258, + "grad_norm": 194.75299072265625, + "learning_rate": 1.975796029762834e-06, + "loss": 15.4883, + "step": 10361 + }, + { + "epoch": 0.09808691701138762, + "grad_norm": 339.1365051269531, + "learning_rate": 1.9757893248526793e-06, + "loss": 22.418, + "step": 10362 + }, + { + "epoch": 0.09809638303310268, + "grad_norm": 221.52230834960938, + "learning_rate": 1.9757826190253453e-06, + "loss": 23.4531, + "step": 10363 + }, + { + "epoch": 0.09810584905481773, + "grad_norm": 465.6391296386719, + "learning_rate": 1.9757759122808398e-06, + "loss": 36.8906, + "step": 10364 + }, + { + "epoch": 0.09811531507653279, + "grad_norm": 2.949362277984619, + "learning_rate": 1.9757692046191683e-06, + "loss": 1.0225, + "step": 10365 + }, + { + "epoch": 0.09812478109824783, + "grad_norm": 166.07484436035156, + "learning_rate": 1.975762496040337e-06, + "loss": 16.6562, + "step": 10366 + }, + { + "epoch": 0.0981342471199629, + "grad_norm": 445.81207275390625, + "learning_rate": 1.9757557865443526e-06, + "loss": 24.7969, + "step": 10367 + }, + { + "epoch": 0.09814371314167794, + "grad_norm": 358.4668884277344, + "learning_rate": 1.975749076131221e-06, + "loss": 22.8984, + "step": 10368 + }, + { + "epoch": 0.098153179163393, + "grad_norm": 499.86639404296875, + "learning_rate": 1.975742364800949e-06, + "loss": 24.7188, + "step": 10369 + }, + { + "epoch": 0.09816264518510806, + "grad_norm": 178.606689453125, + "learning_rate": 1.9757356525535425e-06, + "loss": 22.9141, + "step": 10370 + }, + { + "epoch": 0.0981721112068231, + "grad_norm": 445.4112243652344, + "learning_rate": 1.9757289393890084e-06, + "loss": 22.3359, + "step": 10371 + }, + { + "epoch": 0.09818157722853817, + "grad_norm": 428.4311828613281, + "learning_rate": 1.975722225307352e-06, + "loss": 45.9375, + "step": 10372 + }, + { + "epoch": 0.09819104325025321, + "grad_norm": 455.4181823730469, + "learning_rate": 1.9757155103085806e-06, + "loss": 44.1094, + "step": 10373 + }, + { + "epoch": 0.09820050927196827, + "grad_norm": 399.91021728515625, + "learning_rate": 1.9757087943927e-06, + "loss": 47.2734, + "step": 10374 + }, + { + "epoch": 0.09820997529368332, + "grad_norm": 361.8592834472656, + "learning_rate": 1.975702077559716e-06, + "loss": 30.4375, + "step": 10375 + }, + { + "epoch": 0.09821944131539838, + "grad_norm": 256.8457946777344, + "learning_rate": 1.9756953598096362e-06, + "loss": 17.9922, + "step": 10376 + }, + { + "epoch": 0.09822890733711342, + "grad_norm": 413.14788818359375, + "learning_rate": 1.975688641142466e-06, + "loss": 45.7422, + "step": 10377 + }, + { + "epoch": 0.09823837335882848, + "grad_norm": 284.602783203125, + "learning_rate": 1.975681921558212e-06, + "loss": 12.6172, + "step": 10378 + }, + { + "epoch": 0.09824783938054354, + "grad_norm": 4.253300666809082, + "learning_rate": 1.9756752010568805e-06, + "loss": 0.896, + "step": 10379 + }, + { + "epoch": 0.09825730540225859, + "grad_norm": 375.0534362792969, + "learning_rate": 1.975668479638478e-06, + "loss": 42.4531, + "step": 10380 + }, + { + "epoch": 0.09826677142397365, + "grad_norm": 321.6159362792969, + "learning_rate": 1.9756617573030104e-06, + "loss": 42.3906, + "step": 10381 + }, + { + "epoch": 0.0982762374456887, + "grad_norm": 237.150634765625, + "learning_rate": 1.975655034050484e-06, + "loss": 13.9609, + "step": 10382 + }, + { + "epoch": 0.09828570346740376, + "grad_norm": 673.5106811523438, + "learning_rate": 1.9756483098809058e-06, + "loss": 25.1406, + "step": 10383 + }, + { + "epoch": 0.0982951694891188, + "grad_norm": 729.9949951171875, + "learning_rate": 1.9756415847942813e-06, + "loss": 35.2891, + "step": 10384 + }, + { + "epoch": 0.09830463551083386, + "grad_norm": 438.16729736328125, + "learning_rate": 1.9756348587906174e-06, + "loss": 30.3047, + "step": 10385 + }, + { + "epoch": 0.09831410153254892, + "grad_norm": 315.14886474609375, + "learning_rate": 1.97562813186992e-06, + "loss": 21.3672, + "step": 10386 + }, + { + "epoch": 0.09832356755426397, + "grad_norm": 437.479248046875, + "learning_rate": 1.9756214040321956e-06, + "loss": 9.4688, + "step": 10387 + }, + { + "epoch": 0.09833303357597903, + "grad_norm": 324.9654541015625, + "learning_rate": 1.975614675277451e-06, + "loss": 38.9531, + "step": 10388 + }, + { + "epoch": 0.09834249959769407, + "grad_norm": 222.1731414794922, + "learning_rate": 1.9756079456056913e-06, + "loss": 9.25, + "step": 10389 + }, + { + "epoch": 0.09835196561940913, + "grad_norm": 2.940930128097534, + "learning_rate": 1.975601215016924e-06, + "loss": 0.9014, + "step": 10390 + }, + { + "epoch": 0.09836143164112418, + "grad_norm": 579.0230712890625, + "learning_rate": 1.975594483511155e-06, + "loss": 21.2188, + "step": 10391 + }, + { + "epoch": 0.09837089766283924, + "grad_norm": 2.8820202350616455, + "learning_rate": 1.9755877510883908e-06, + "loss": 0.8867, + "step": 10392 + }, + { + "epoch": 0.09838036368455429, + "grad_norm": 328.38739013671875, + "learning_rate": 1.9755810177486376e-06, + "loss": 22.0703, + "step": 10393 + }, + { + "epoch": 0.09838982970626935, + "grad_norm": 362.91259765625, + "learning_rate": 1.9755742834919014e-06, + "loss": 38.6562, + "step": 10394 + }, + { + "epoch": 0.0983992957279844, + "grad_norm": 1231.831787109375, + "learning_rate": 1.975567548318189e-06, + "loss": 35.8984, + "step": 10395 + }, + { + "epoch": 0.09840876174969945, + "grad_norm": 228.1195526123047, + "learning_rate": 1.9755608122275066e-06, + "loss": 14.7656, + "step": 10396 + }, + { + "epoch": 0.09841822777141451, + "grad_norm": 494.0980224609375, + "learning_rate": 1.9755540752198607e-06, + "loss": 21.9844, + "step": 10397 + }, + { + "epoch": 0.09842769379312956, + "grad_norm": 162.33761596679688, + "learning_rate": 1.975547337295257e-06, + "loss": 19.5625, + "step": 10398 + }, + { + "epoch": 0.09843715981484462, + "grad_norm": 408.5000915527344, + "learning_rate": 1.9755405984537027e-06, + "loss": 30.2031, + "step": 10399 + }, + { + "epoch": 0.09844662583655966, + "grad_norm": 2.7992103099823, + "learning_rate": 1.9755338586952033e-06, + "loss": 0.9224, + "step": 10400 + }, + { + "epoch": 0.09845609185827472, + "grad_norm": 288.8086242675781, + "learning_rate": 1.975527118019766e-06, + "loss": 27.9062, + "step": 10401 + }, + { + "epoch": 0.09846555787998977, + "grad_norm": 221.4352569580078, + "learning_rate": 1.975520376427396e-06, + "loss": 35.1953, + "step": 10402 + }, + { + "epoch": 0.09847502390170483, + "grad_norm": 490.8182678222656, + "learning_rate": 1.975513633918101e-06, + "loss": 42.9609, + "step": 10403 + }, + { + "epoch": 0.09848448992341989, + "grad_norm": 423.80902099609375, + "learning_rate": 1.975506890491886e-06, + "loss": 17.6172, + "step": 10404 + }, + { + "epoch": 0.09849395594513494, + "grad_norm": 231.14669799804688, + "learning_rate": 1.9755001461487588e-06, + "loss": 23.0859, + "step": 10405 + }, + { + "epoch": 0.09850342196685, + "grad_norm": 628.6065063476562, + "learning_rate": 1.975493400888724e-06, + "loss": 25.6797, + "step": 10406 + }, + { + "epoch": 0.09851288798856504, + "grad_norm": 305.96466064453125, + "learning_rate": 1.97548665471179e-06, + "loss": 16.8984, + "step": 10407 + }, + { + "epoch": 0.0985223540102801, + "grad_norm": 351.3771667480469, + "learning_rate": 1.975479907617961e-06, + "loss": 28.7656, + "step": 10408 + }, + { + "epoch": 0.09853182003199515, + "grad_norm": 294.8346252441406, + "learning_rate": 1.975473159607245e-06, + "loss": 32.3281, + "step": 10409 + }, + { + "epoch": 0.09854128605371021, + "grad_norm": 386.4176025390625, + "learning_rate": 1.975466410679647e-06, + "loss": 17.4961, + "step": 10410 + }, + { + "epoch": 0.09855075207542525, + "grad_norm": 484.5563049316406, + "learning_rate": 1.9754596608351742e-06, + "loss": 51.5156, + "step": 10411 + }, + { + "epoch": 0.09856021809714031, + "grad_norm": 219.4167022705078, + "learning_rate": 1.975452910073833e-06, + "loss": 15.6094, + "step": 10412 + }, + { + "epoch": 0.09856968411885537, + "grad_norm": 619.6824951171875, + "learning_rate": 1.9754461583956297e-06, + "loss": 41.2266, + "step": 10413 + }, + { + "epoch": 0.09857915014057042, + "grad_norm": 634.3948364257812, + "learning_rate": 1.97543940580057e-06, + "loss": 24.2344, + "step": 10414 + }, + { + "epoch": 0.09858861616228548, + "grad_norm": 261.8423767089844, + "learning_rate": 1.9754326522886612e-06, + "loss": 24.3906, + "step": 10415 + }, + { + "epoch": 0.09859808218400053, + "grad_norm": 408.32220458984375, + "learning_rate": 1.975425897859909e-06, + "loss": 25.2578, + "step": 10416 + }, + { + "epoch": 0.09860754820571559, + "grad_norm": 492.8872375488281, + "learning_rate": 1.97541914251432e-06, + "loss": 19.6484, + "step": 10417 + }, + { + "epoch": 0.09861701422743063, + "grad_norm": 3.365710973739624, + "learning_rate": 1.9754123862519e-06, + "loss": 0.9604, + "step": 10418 + }, + { + "epoch": 0.09862648024914569, + "grad_norm": 277.56634521484375, + "learning_rate": 1.975405629072656e-06, + "loss": 23.5469, + "step": 10419 + }, + { + "epoch": 0.09863594627086074, + "grad_norm": 866.2472534179688, + "learning_rate": 1.9753988709765944e-06, + "loss": 38.1562, + "step": 10420 + }, + { + "epoch": 0.0986454122925758, + "grad_norm": 404.5047607421875, + "learning_rate": 1.975392111963721e-06, + "loss": 22.0312, + "step": 10421 + }, + { + "epoch": 0.09865487831429086, + "grad_norm": 498.59259033203125, + "learning_rate": 1.975385352034043e-06, + "loss": 19.375, + "step": 10422 + }, + { + "epoch": 0.0986643443360059, + "grad_norm": 298.3697814941406, + "learning_rate": 1.975378591187566e-06, + "loss": 15.9297, + "step": 10423 + }, + { + "epoch": 0.09867381035772096, + "grad_norm": 291.7547607421875, + "learning_rate": 1.975371829424296e-06, + "loss": 32.0547, + "step": 10424 + }, + { + "epoch": 0.09868327637943601, + "grad_norm": 320.02288818359375, + "learning_rate": 1.9753650667442407e-06, + "loss": 17.2578, + "step": 10425 + }, + { + "epoch": 0.09869274240115107, + "grad_norm": 548.949462890625, + "learning_rate": 1.975358303147405e-06, + "loss": 60.9844, + "step": 10426 + }, + { + "epoch": 0.09870220842286612, + "grad_norm": 469.86529541015625, + "learning_rate": 1.9753515386337966e-06, + "loss": 22.0117, + "step": 10427 + }, + { + "epoch": 0.09871167444458118, + "grad_norm": 462.38055419921875, + "learning_rate": 1.975344773203421e-06, + "loss": 47.7188, + "step": 10428 + }, + { + "epoch": 0.09872114046629622, + "grad_norm": 307.0365905761719, + "learning_rate": 1.9753380068562846e-06, + "loss": 26.2812, + "step": 10429 + }, + { + "epoch": 0.09873060648801128, + "grad_norm": 278.0803527832031, + "learning_rate": 1.975331239592394e-06, + "loss": 20.5938, + "step": 10430 + }, + { + "epoch": 0.09874007250972634, + "grad_norm": 627.2938232421875, + "learning_rate": 1.9753244714117557e-06, + "loss": 13.2734, + "step": 10431 + }, + { + "epoch": 0.09874953853144139, + "grad_norm": 358.4927062988281, + "learning_rate": 1.975317702314376e-06, + "loss": 21.8047, + "step": 10432 + }, + { + "epoch": 0.09875900455315645, + "grad_norm": 647.2894897460938, + "learning_rate": 1.9753109323002603e-06, + "loss": 56.3125, + "step": 10433 + }, + { + "epoch": 0.0987684705748715, + "grad_norm": 325.6732482910156, + "learning_rate": 1.975304161369416e-06, + "loss": 31.4531, + "step": 10434 + }, + { + "epoch": 0.09877793659658655, + "grad_norm": 319.8504638671875, + "learning_rate": 1.9752973895218495e-06, + "loss": 27.2969, + "step": 10435 + }, + { + "epoch": 0.0987874026183016, + "grad_norm": 286.9356384277344, + "learning_rate": 1.975290616757567e-06, + "loss": 22.6875, + "step": 10436 + }, + { + "epoch": 0.09879686864001666, + "grad_norm": 940.7423706054688, + "learning_rate": 1.9752838430765746e-06, + "loss": 46.2109, + "step": 10437 + }, + { + "epoch": 0.09880633466173172, + "grad_norm": 187.41110229492188, + "learning_rate": 1.975277068478879e-06, + "loss": 19.2578, + "step": 10438 + }, + { + "epoch": 0.09881580068344677, + "grad_norm": 399.7290344238281, + "learning_rate": 1.9752702929644865e-06, + "loss": 14.6406, + "step": 10439 + }, + { + "epoch": 0.09882526670516183, + "grad_norm": 416.6968688964844, + "learning_rate": 1.975263516533403e-06, + "loss": 20.3359, + "step": 10440 + }, + { + "epoch": 0.09883473272687687, + "grad_norm": 225.5126953125, + "learning_rate": 1.9752567391856356e-06, + "loss": 10.5156, + "step": 10441 + }, + { + "epoch": 0.09884419874859193, + "grad_norm": 138.52012634277344, + "learning_rate": 1.97524996092119e-06, + "loss": 20.3672, + "step": 10442 + }, + { + "epoch": 0.09885366477030698, + "grad_norm": 426.450927734375, + "learning_rate": 1.975243181740073e-06, + "loss": 17.0547, + "step": 10443 + }, + { + "epoch": 0.09886313079202204, + "grad_norm": 303.82513427734375, + "learning_rate": 1.9752364016422906e-06, + "loss": 32.5625, + "step": 10444 + }, + { + "epoch": 0.09887259681373708, + "grad_norm": 612.205078125, + "learning_rate": 1.9752296206278497e-06, + "loss": 37.7188, + "step": 10445 + }, + { + "epoch": 0.09888206283545214, + "grad_norm": 225.19180297851562, + "learning_rate": 1.9752228386967564e-06, + "loss": 19.8984, + "step": 10446 + }, + { + "epoch": 0.0988915288571672, + "grad_norm": 195.0120086669922, + "learning_rate": 1.9752160558490168e-06, + "loss": 20.5312, + "step": 10447 + }, + { + "epoch": 0.09890099487888225, + "grad_norm": 502.1038513183594, + "learning_rate": 1.975209272084638e-06, + "loss": 45.5, + "step": 10448 + }, + { + "epoch": 0.09891046090059731, + "grad_norm": 179.06121826171875, + "learning_rate": 1.9752024874036256e-06, + "loss": 16.4844, + "step": 10449 + }, + { + "epoch": 0.09891992692231236, + "grad_norm": 233.52622985839844, + "learning_rate": 1.9751957018059863e-06, + "loss": 8.5352, + "step": 10450 + }, + { + "epoch": 0.09892939294402742, + "grad_norm": 295.1914978027344, + "learning_rate": 1.975188915291727e-06, + "loss": 11.5078, + "step": 10451 + }, + { + "epoch": 0.09893885896574246, + "grad_norm": 808.3541259765625, + "learning_rate": 1.975182127860853e-06, + "loss": 15.918, + "step": 10452 + }, + { + "epoch": 0.09894832498745752, + "grad_norm": 392.3846435546875, + "learning_rate": 1.975175339513371e-06, + "loss": 42.4531, + "step": 10453 + }, + { + "epoch": 0.09895779100917257, + "grad_norm": 424.0802917480469, + "learning_rate": 1.9751685502492884e-06, + "loss": 25.4609, + "step": 10454 + }, + { + "epoch": 0.09896725703088763, + "grad_norm": 222.34893798828125, + "learning_rate": 1.9751617600686105e-06, + "loss": 17.8906, + "step": 10455 + }, + { + "epoch": 0.09897672305260269, + "grad_norm": 1006.8344116210938, + "learning_rate": 1.975154968971344e-06, + "loss": 32.3281, + "step": 10456 + }, + { + "epoch": 0.09898618907431773, + "grad_norm": 201.7296142578125, + "learning_rate": 1.9751481769574947e-06, + "loss": 24.8594, + "step": 10457 + }, + { + "epoch": 0.0989956550960328, + "grad_norm": 464.29888916015625, + "learning_rate": 1.9751413840270705e-06, + "loss": 35.4375, + "step": 10458 + }, + { + "epoch": 0.09900512111774784, + "grad_norm": 579.51220703125, + "learning_rate": 1.975134590180076e-06, + "loss": 38.5156, + "step": 10459 + }, + { + "epoch": 0.0990145871394629, + "grad_norm": 327.7087097167969, + "learning_rate": 1.975127795416519e-06, + "loss": 33.8438, + "step": 10460 + }, + { + "epoch": 0.09902405316117795, + "grad_norm": 3.0270233154296875, + "learning_rate": 1.975120999736405e-06, + "loss": 0.8538, + "step": 10461 + }, + { + "epoch": 0.099033519182893, + "grad_norm": 592.593505859375, + "learning_rate": 1.975114203139741e-06, + "loss": 41.1875, + "step": 10462 + }, + { + "epoch": 0.09904298520460805, + "grad_norm": 253.55532836914062, + "learning_rate": 1.9751074056265325e-06, + "loss": 18.6875, + "step": 10463 + }, + { + "epoch": 0.09905245122632311, + "grad_norm": 616.60986328125, + "learning_rate": 1.9751006071967866e-06, + "loss": 19.25, + "step": 10464 + }, + { + "epoch": 0.09906191724803817, + "grad_norm": 528.144775390625, + "learning_rate": 1.97509380785051e-06, + "loss": 22.125, + "step": 10465 + }, + { + "epoch": 0.09907138326975322, + "grad_norm": 1471.817138671875, + "learning_rate": 1.9750870075877082e-06, + "loss": 15.9805, + "step": 10466 + }, + { + "epoch": 0.09908084929146828, + "grad_norm": 3.264573097229004, + "learning_rate": 1.975080206408389e-06, + "loss": 0.9453, + "step": 10467 + }, + { + "epoch": 0.09909031531318332, + "grad_norm": 313.4655456542969, + "learning_rate": 1.975073404312557e-06, + "loss": 20.9727, + "step": 10468 + }, + { + "epoch": 0.09909978133489838, + "grad_norm": 460.9433898925781, + "learning_rate": 1.975066601300219e-06, + "loss": 9.1973, + "step": 10469 + }, + { + "epoch": 0.09910924735661343, + "grad_norm": 518.381591796875, + "learning_rate": 1.9750597973713826e-06, + "loss": 45.6562, + "step": 10470 + }, + { + "epoch": 0.09911871337832849, + "grad_norm": 497.022216796875, + "learning_rate": 1.9750529925260533e-06, + "loss": 37.6016, + "step": 10471 + }, + { + "epoch": 0.09912817940004354, + "grad_norm": 1456.469970703125, + "learning_rate": 1.9750461867642378e-06, + "loss": 33.7734, + "step": 10472 + }, + { + "epoch": 0.0991376454217586, + "grad_norm": 770.9595336914062, + "learning_rate": 1.975039380085942e-06, + "loss": 39.6562, + "step": 10473 + }, + { + "epoch": 0.09914711144347366, + "grad_norm": 1448.3214111328125, + "learning_rate": 1.9750325724911725e-06, + "loss": 24.2266, + "step": 10474 + }, + { + "epoch": 0.0991565774651887, + "grad_norm": 259.16278076171875, + "learning_rate": 1.9750257639799363e-06, + "loss": 26.6406, + "step": 10475 + }, + { + "epoch": 0.09916604348690376, + "grad_norm": 201.31863403320312, + "learning_rate": 1.9750189545522385e-06, + "loss": 27.9375, + "step": 10476 + }, + { + "epoch": 0.09917550950861881, + "grad_norm": 207.7871551513672, + "learning_rate": 1.9750121442080874e-06, + "loss": 18.5781, + "step": 10477 + }, + { + "epoch": 0.09918497553033387, + "grad_norm": 292.58953857421875, + "learning_rate": 1.9750053329474873e-06, + "loss": 11.6406, + "step": 10478 + }, + { + "epoch": 0.09919444155204891, + "grad_norm": 308.9752502441406, + "learning_rate": 1.974998520770446e-06, + "loss": 12.4141, + "step": 10479 + }, + { + "epoch": 0.09920390757376397, + "grad_norm": 494.34814453125, + "learning_rate": 1.9749917076769697e-06, + "loss": 25.6719, + "step": 10480 + }, + { + "epoch": 0.09921337359547903, + "grad_norm": 363.6687927246094, + "learning_rate": 1.974984893667064e-06, + "loss": 14.4219, + "step": 10481 + }, + { + "epoch": 0.09922283961719408, + "grad_norm": 1657.044921875, + "learning_rate": 1.9749780787407367e-06, + "loss": 53.3438, + "step": 10482 + }, + { + "epoch": 0.09923230563890914, + "grad_norm": 824.8363647460938, + "learning_rate": 1.9749712628979933e-06, + "loss": 40.875, + "step": 10483 + }, + { + "epoch": 0.09924177166062419, + "grad_norm": 982.764404296875, + "learning_rate": 1.97496444613884e-06, + "loss": 54.6641, + "step": 10484 + }, + { + "epoch": 0.09925123768233925, + "grad_norm": 374.8672180175781, + "learning_rate": 1.974957628463284e-06, + "loss": 21.582, + "step": 10485 + }, + { + "epoch": 0.09926070370405429, + "grad_norm": 264.6556396484375, + "learning_rate": 1.974950809871331e-06, + "loss": 12.0664, + "step": 10486 + }, + { + "epoch": 0.09927016972576935, + "grad_norm": 225.75660705566406, + "learning_rate": 1.9749439903629875e-06, + "loss": 22.3672, + "step": 10487 + }, + { + "epoch": 0.0992796357474844, + "grad_norm": 377.72064208984375, + "learning_rate": 1.97493716993826e-06, + "loss": 14.6406, + "step": 10488 + }, + { + "epoch": 0.09928910176919946, + "grad_norm": 580.0833129882812, + "learning_rate": 1.974930348597155e-06, + "loss": 44.9062, + "step": 10489 + }, + { + "epoch": 0.09929856779091452, + "grad_norm": 4.108983516693115, + "learning_rate": 1.974923526339679e-06, + "loss": 1.0034, + "step": 10490 + }, + { + "epoch": 0.09930803381262956, + "grad_norm": 195.62484741210938, + "learning_rate": 1.9749167031658385e-06, + "loss": 33.1719, + "step": 10491 + }, + { + "epoch": 0.09931749983434462, + "grad_norm": 174.94029235839844, + "learning_rate": 1.97490987907564e-06, + "loss": 20.9297, + "step": 10492 + }, + { + "epoch": 0.09932696585605967, + "grad_norm": 457.2878112792969, + "learning_rate": 1.9749030540690893e-06, + "loss": 50.6406, + "step": 10493 + }, + { + "epoch": 0.09933643187777473, + "grad_norm": 345.0350341796875, + "learning_rate": 1.974896228146193e-06, + "loss": 34.7969, + "step": 10494 + }, + { + "epoch": 0.09934589789948978, + "grad_norm": 457.0674133300781, + "learning_rate": 1.974889401306958e-06, + "loss": 19.6406, + "step": 10495 + }, + { + "epoch": 0.09935536392120484, + "grad_norm": 360.1155090332031, + "learning_rate": 1.9748825735513898e-06, + "loss": 45.5703, + "step": 10496 + }, + { + "epoch": 0.09936482994291988, + "grad_norm": 286.20135498046875, + "learning_rate": 1.9748757448794955e-06, + "loss": 35.5312, + "step": 10497 + }, + { + "epoch": 0.09937429596463494, + "grad_norm": 598.2994995117188, + "learning_rate": 1.9748689152912823e-06, + "loss": 66.7188, + "step": 10498 + }, + { + "epoch": 0.09938376198635, + "grad_norm": 274.84857177734375, + "learning_rate": 1.9748620847867547e-06, + "loss": 22.8672, + "step": 10499 + }, + { + "epoch": 0.09939322800806505, + "grad_norm": 282.0081787109375, + "learning_rate": 1.974855253365921e-06, + "loss": 16.25, + "step": 10500 + }, + { + "epoch": 0.09940269402978011, + "grad_norm": 536.1817626953125, + "learning_rate": 1.9748484210287863e-06, + "loss": 31.6484, + "step": 10501 + }, + { + "epoch": 0.09941216005149515, + "grad_norm": 358.7933654785156, + "learning_rate": 1.9748415877753573e-06, + "loss": 29.0938, + "step": 10502 + }, + { + "epoch": 0.09942162607321021, + "grad_norm": 275.96142578125, + "learning_rate": 1.9748347536056407e-06, + "loss": 26.5312, + "step": 10503 + }, + { + "epoch": 0.09943109209492526, + "grad_norm": 282.97637939453125, + "learning_rate": 1.9748279185196433e-06, + "loss": 23.5938, + "step": 10504 + }, + { + "epoch": 0.09944055811664032, + "grad_norm": 225.2004852294922, + "learning_rate": 1.9748210825173704e-06, + "loss": 19.3125, + "step": 10505 + }, + { + "epoch": 0.09945002413835537, + "grad_norm": 280.664794921875, + "learning_rate": 1.97481424559883e-06, + "loss": 40.7969, + "step": 10506 + }, + { + "epoch": 0.09945949016007043, + "grad_norm": 2.9684271812438965, + "learning_rate": 1.974807407764027e-06, + "loss": 0.9756, + "step": 10507 + }, + { + "epoch": 0.09946895618178549, + "grad_norm": 635.0742797851562, + "learning_rate": 1.9748005690129685e-06, + "loss": 33.0273, + "step": 10508 + }, + { + "epoch": 0.09947842220350053, + "grad_norm": 484.6530456542969, + "learning_rate": 1.974793729345661e-06, + "loss": 23.6719, + "step": 10509 + }, + { + "epoch": 0.09948788822521559, + "grad_norm": 311.8675231933594, + "learning_rate": 1.9747868887621107e-06, + "loss": 28.1094, + "step": 10510 + }, + { + "epoch": 0.09949735424693064, + "grad_norm": 327.2730407714844, + "learning_rate": 1.9747800472623243e-06, + "loss": 34.2031, + "step": 10511 + }, + { + "epoch": 0.0995068202686457, + "grad_norm": 3.4789772033691406, + "learning_rate": 1.974773204846308e-06, + "loss": 0.9238, + "step": 10512 + }, + { + "epoch": 0.09951628629036074, + "grad_norm": 257.3350830078125, + "learning_rate": 1.974766361514068e-06, + "loss": 18.5625, + "step": 10513 + }, + { + "epoch": 0.0995257523120758, + "grad_norm": 556.5375366210938, + "learning_rate": 1.9747595172656115e-06, + "loss": 23.3359, + "step": 10514 + }, + { + "epoch": 0.09953521833379085, + "grad_norm": 574.2821044921875, + "learning_rate": 1.9747526721009443e-06, + "loss": 58.2656, + "step": 10515 + }, + { + "epoch": 0.09954468435550591, + "grad_norm": 457.4751892089844, + "learning_rate": 1.974745826020073e-06, + "loss": 57.4844, + "step": 10516 + }, + { + "epoch": 0.09955415037722097, + "grad_norm": 587.476318359375, + "learning_rate": 1.974738979023004e-06, + "loss": 39.3906, + "step": 10517 + }, + { + "epoch": 0.09956361639893602, + "grad_norm": 280.83831787109375, + "learning_rate": 1.974732131109744e-06, + "loss": 24.7734, + "step": 10518 + }, + { + "epoch": 0.09957308242065108, + "grad_norm": 280.8338928222656, + "learning_rate": 1.974725282280299e-06, + "loss": 25.7031, + "step": 10519 + }, + { + "epoch": 0.09958254844236612, + "grad_norm": 324.6818542480469, + "learning_rate": 1.9747184325346754e-06, + "loss": 20.9219, + "step": 10520 + }, + { + "epoch": 0.09959201446408118, + "grad_norm": 265.00360107421875, + "learning_rate": 1.97471158187288e-06, + "loss": 21.2656, + "step": 10521 + }, + { + "epoch": 0.09960148048579623, + "grad_norm": 446.798828125, + "learning_rate": 1.9747047302949194e-06, + "loss": 35.1719, + "step": 10522 + }, + { + "epoch": 0.09961094650751129, + "grad_norm": 335.78973388671875, + "learning_rate": 1.9746978778007995e-06, + "loss": 10.3828, + "step": 10523 + }, + { + "epoch": 0.09962041252922635, + "grad_norm": 708.5814208984375, + "learning_rate": 1.9746910243905275e-06, + "loss": 55.9844, + "step": 10524 + }, + { + "epoch": 0.0996298785509414, + "grad_norm": 308.51129150390625, + "learning_rate": 1.9746841700641084e-06, + "loss": 33.1562, + "step": 10525 + }, + { + "epoch": 0.09963934457265645, + "grad_norm": 1825.2071533203125, + "learning_rate": 1.9746773148215504e-06, + "loss": 40.4766, + "step": 10526 + }, + { + "epoch": 0.0996488105943715, + "grad_norm": 3.9277267456054688, + "learning_rate": 1.974670458662859e-06, + "loss": 0.9819, + "step": 10527 + }, + { + "epoch": 0.09965827661608656, + "grad_norm": 717.6543579101562, + "learning_rate": 1.9746636015880403e-06, + "loss": 35.0859, + "step": 10528 + }, + { + "epoch": 0.0996677426378016, + "grad_norm": 376.1217956542969, + "learning_rate": 1.9746567435971017e-06, + "loss": 31.4258, + "step": 10529 + }, + { + "epoch": 0.09967720865951667, + "grad_norm": 585.8643188476562, + "learning_rate": 1.974649884690049e-06, + "loss": 53.5, + "step": 10530 + }, + { + "epoch": 0.09968667468123171, + "grad_norm": 663.4803466796875, + "learning_rate": 1.974643024866889e-06, + "loss": 95.0781, + "step": 10531 + }, + { + "epoch": 0.09969614070294677, + "grad_norm": 226.31419372558594, + "learning_rate": 1.9746361641276275e-06, + "loss": 16.9453, + "step": 10532 + }, + { + "epoch": 0.09970560672466183, + "grad_norm": 456.3667907714844, + "learning_rate": 1.9746293024722716e-06, + "loss": 48.4766, + "step": 10533 + }, + { + "epoch": 0.09971507274637688, + "grad_norm": 313.5391845703125, + "learning_rate": 1.974622439900828e-06, + "loss": 22.9375, + "step": 10534 + }, + { + "epoch": 0.09972453876809194, + "grad_norm": 991.4757690429688, + "learning_rate": 1.9746155764133024e-06, + "loss": 46.3516, + "step": 10535 + }, + { + "epoch": 0.09973400478980698, + "grad_norm": 442.83349609375, + "learning_rate": 1.9746087120097014e-06, + "loss": 30.8281, + "step": 10536 + }, + { + "epoch": 0.09974347081152204, + "grad_norm": 343.6889343261719, + "learning_rate": 1.974601846690032e-06, + "loss": 22.0625, + "step": 10537 + }, + { + "epoch": 0.09975293683323709, + "grad_norm": 444.3009338378906, + "learning_rate": 1.9745949804543e-06, + "loss": 24.0703, + "step": 10538 + }, + { + "epoch": 0.09976240285495215, + "grad_norm": 223.16622924804688, + "learning_rate": 1.974588113302512e-06, + "loss": 22.3047, + "step": 10539 + }, + { + "epoch": 0.0997718688766672, + "grad_norm": 255.51681518554688, + "learning_rate": 1.974581245234675e-06, + "loss": 21.2344, + "step": 10540 + }, + { + "epoch": 0.09978133489838226, + "grad_norm": 411.4266052246094, + "learning_rate": 1.9745743762507945e-06, + "loss": 24.5078, + "step": 10541 + }, + { + "epoch": 0.09979080092009732, + "grad_norm": 629.9450073242188, + "learning_rate": 1.9745675063508777e-06, + "loss": 48.2891, + "step": 10542 + }, + { + "epoch": 0.09980026694181236, + "grad_norm": 788.22705078125, + "learning_rate": 1.974560635534931e-06, + "loss": 48.6406, + "step": 10543 + }, + { + "epoch": 0.09980973296352742, + "grad_norm": 433.68218994140625, + "learning_rate": 1.9745537638029608e-06, + "loss": 20.7266, + "step": 10544 + }, + { + "epoch": 0.09981919898524247, + "grad_norm": 190.0010223388672, + "learning_rate": 1.974546891154973e-06, + "loss": 10.2266, + "step": 10545 + }, + { + "epoch": 0.09982866500695753, + "grad_norm": 434.3814697265625, + "learning_rate": 1.9745400175909746e-06, + "loss": 36.4531, + "step": 10546 + }, + { + "epoch": 0.09983813102867257, + "grad_norm": 208.67723083496094, + "learning_rate": 1.9745331431109723e-06, + "loss": 16.8828, + "step": 10547 + }, + { + "epoch": 0.09984759705038763, + "grad_norm": 341.0684814453125, + "learning_rate": 1.974526267714972e-06, + "loss": 34.125, + "step": 10548 + }, + { + "epoch": 0.09985706307210268, + "grad_norm": 166.9879150390625, + "learning_rate": 1.9745193914029806e-06, + "loss": 21.9375, + "step": 10549 + }, + { + "epoch": 0.09986652909381774, + "grad_norm": 313.43023681640625, + "learning_rate": 1.974512514175004e-06, + "loss": 47.1016, + "step": 10550 + }, + { + "epoch": 0.0998759951155328, + "grad_norm": 322.5186767578125, + "learning_rate": 1.9745056360310493e-06, + "loss": 37.9844, + "step": 10551 + }, + { + "epoch": 0.09988546113724785, + "grad_norm": 306.283203125, + "learning_rate": 1.974498756971123e-06, + "loss": 17.1836, + "step": 10552 + }, + { + "epoch": 0.0998949271589629, + "grad_norm": 305.0131530761719, + "learning_rate": 1.974491876995231e-06, + "loss": 29.4766, + "step": 10553 + }, + { + "epoch": 0.09990439318067795, + "grad_norm": 524.6256103515625, + "learning_rate": 1.9744849961033796e-06, + "loss": 52.8438, + "step": 10554 + }, + { + "epoch": 0.09991385920239301, + "grad_norm": 389.64788818359375, + "learning_rate": 1.9744781142955763e-06, + "loss": 13.2891, + "step": 10555 + }, + { + "epoch": 0.09992332522410806, + "grad_norm": 902.099853515625, + "learning_rate": 1.9744712315718267e-06, + "loss": 30.7344, + "step": 10556 + }, + { + "epoch": 0.09993279124582312, + "grad_norm": 428.68670654296875, + "learning_rate": 1.9744643479321376e-06, + "loss": 15.207, + "step": 10557 + }, + { + "epoch": 0.09994225726753816, + "grad_norm": 546.7999877929688, + "learning_rate": 1.9744574633765153e-06, + "loss": 48.5, + "step": 10558 + }, + { + "epoch": 0.09995172328925322, + "grad_norm": 385.1988830566406, + "learning_rate": 1.9744505779049667e-06, + "loss": 34.8594, + "step": 10559 + }, + { + "epoch": 0.09996118931096828, + "grad_norm": 710.0870361328125, + "learning_rate": 1.9744436915174976e-06, + "loss": 38.6797, + "step": 10560 + }, + { + "epoch": 0.09997065533268333, + "grad_norm": 734.7847900390625, + "learning_rate": 1.974436804214115e-06, + "loss": 66.3125, + "step": 10561 + }, + { + "epoch": 0.09998012135439839, + "grad_norm": 548.478515625, + "learning_rate": 1.974429915994825e-06, + "loss": 24.125, + "step": 10562 + }, + { + "epoch": 0.09998958737611344, + "grad_norm": 514.9948120117188, + "learning_rate": 1.9744230268596347e-06, + "loss": 28.1094, + "step": 10563 + }, + { + "epoch": 0.0999990533978285, + "grad_norm": 789.7698364257812, + "learning_rate": 1.97441613680855e-06, + "loss": 37.1953, + "step": 10564 + }, + { + "epoch": 0.10000851941954354, + "grad_norm": 335.65789794921875, + "learning_rate": 1.9744092458415773e-06, + "loss": 23.2188, + "step": 10565 + }, + { + "epoch": 0.1000179854412586, + "grad_norm": 295.2977294921875, + "learning_rate": 1.974402353958723e-06, + "loss": 24.332, + "step": 10566 + }, + { + "epoch": 0.10002745146297366, + "grad_norm": 272.85845947265625, + "learning_rate": 1.9743954611599944e-06, + "loss": 16.4141, + "step": 10567 + }, + { + "epoch": 0.10003691748468871, + "grad_norm": 3.7741830348968506, + "learning_rate": 1.9743885674453973e-06, + "loss": 1.0093, + "step": 10568 + }, + { + "epoch": 0.10004638350640377, + "grad_norm": 639.1832275390625, + "learning_rate": 1.9743816728149386e-06, + "loss": 35.7188, + "step": 10569 + }, + { + "epoch": 0.10005584952811881, + "grad_norm": 465.90826416015625, + "learning_rate": 1.974374777268624e-06, + "loss": 24.875, + "step": 10570 + }, + { + "epoch": 0.10006531554983387, + "grad_norm": 318.6462707519531, + "learning_rate": 1.974367880806461e-06, + "loss": 27.3984, + "step": 10571 + }, + { + "epoch": 0.10007478157154892, + "grad_norm": 343.479248046875, + "learning_rate": 1.974360983428455e-06, + "loss": 24.4141, + "step": 10572 + }, + { + "epoch": 0.10008424759326398, + "grad_norm": 410.5221862792969, + "learning_rate": 1.9743540851346133e-06, + "loss": 41.2891, + "step": 10573 + }, + { + "epoch": 0.10009371361497903, + "grad_norm": 405.2139587402344, + "learning_rate": 1.9743471859249425e-06, + "loss": 52.7656, + "step": 10574 + }, + { + "epoch": 0.10010317963669409, + "grad_norm": 417.7200622558594, + "learning_rate": 1.974340285799448e-06, + "loss": 42.5078, + "step": 10575 + }, + { + "epoch": 0.10011264565840915, + "grad_norm": 621.2086791992188, + "learning_rate": 1.974333384758138e-06, + "loss": 34.6484, + "step": 10576 + }, + { + "epoch": 0.10012211168012419, + "grad_norm": 824.2508544921875, + "learning_rate": 1.9743264828010175e-06, + "loss": 64.3125, + "step": 10577 + }, + { + "epoch": 0.10013157770183925, + "grad_norm": 711.673583984375, + "learning_rate": 1.9743195799280934e-06, + "loss": 53.8594, + "step": 10578 + }, + { + "epoch": 0.1001410437235543, + "grad_norm": 195.89825439453125, + "learning_rate": 1.974312676139372e-06, + "loss": 19.7344, + "step": 10579 + }, + { + "epoch": 0.10015050974526936, + "grad_norm": 433.01593017578125, + "learning_rate": 1.9743057714348605e-06, + "loss": 44.7344, + "step": 10580 + }, + { + "epoch": 0.1001599757669844, + "grad_norm": 277.92962646484375, + "learning_rate": 1.974298865814565e-06, + "loss": 19.8125, + "step": 10581 + }, + { + "epoch": 0.10016944178869946, + "grad_norm": 461.06976318359375, + "learning_rate": 1.9742919592784918e-06, + "loss": 36.6406, + "step": 10582 + }, + { + "epoch": 0.10017890781041451, + "grad_norm": 3.0607473850250244, + "learning_rate": 1.974285051826648e-06, + "loss": 0.918, + "step": 10583 + }, + { + "epoch": 0.10018837383212957, + "grad_norm": 381.955810546875, + "learning_rate": 1.974278143459039e-06, + "loss": 58.6875, + "step": 10584 + }, + { + "epoch": 0.10019783985384463, + "grad_norm": 270.6435546875, + "learning_rate": 1.974271234175672e-06, + "loss": 25.8906, + "step": 10585 + }, + { + "epoch": 0.10020730587555968, + "grad_norm": 509.0980224609375, + "learning_rate": 1.9742643239765533e-06, + "loss": 51.3906, + "step": 10586 + }, + { + "epoch": 0.10021677189727474, + "grad_norm": 294.2142639160156, + "learning_rate": 1.97425741286169e-06, + "loss": 16.6094, + "step": 10587 + }, + { + "epoch": 0.10022623791898978, + "grad_norm": 944.1612548828125, + "learning_rate": 1.974250500831088e-06, + "loss": 44.6016, + "step": 10588 + }, + { + "epoch": 0.10023570394070484, + "grad_norm": 470.3285217285156, + "learning_rate": 1.9742435878847538e-06, + "loss": 54.125, + "step": 10589 + }, + { + "epoch": 0.10024516996241989, + "grad_norm": 348.0058898925781, + "learning_rate": 1.974236674022694e-06, + "loss": 17.2188, + "step": 10590 + }, + { + "epoch": 0.10025463598413495, + "grad_norm": 435.2326354980469, + "learning_rate": 1.974229759244915e-06, + "loss": 28.3516, + "step": 10591 + }, + { + "epoch": 0.10026410200585, + "grad_norm": 253.0084228515625, + "learning_rate": 1.9742228435514236e-06, + "loss": 17.668, + "step": 10592 + }, + { + "epoch": 0.10027356802756505, + "grad_norm": 319.3443908691406, + "learning_rate": 1.974215926942226e-06, + "loss": 39.2188, + "step": 10593 + }, + { + "epoch": 0.10028303404928011, + "grad_norm": 264.6986083984375, + "learning_rate": 1.9742090094173285e-06, + "loss": 25.4062, + "step": 10594 + }, + { + "epoch": 0.10029250007099516, + "grad_norm": 278.1637268066406, + "learning_rate": 1.9742020909767385e-06, + "loss": 26.3438, + "step": 10595 + }, + { + "epoch": 0.10030196609271022, + "grad_norm": 496.13433837890625, + "learning_rate": 1.974195171620462e-06, + "loss": 43.4219, + "step": 10596 + }, + { + "epoch": 0.10031143211442527, + "grad_norm": 498.82684326171875, + "learning_rate": 1.9741882513485044e-06, + "loss": 44.7812, + "step": 10597 + }, + { + "epoch": 0.10032089813614033, + "grad_norm": 523.537841796875, + "learning_rate": 1.974181330160874e-06, + "loss": 40.6406, + "step": 10598 + }, + { + "epoch": 0.10033036415785537, + "grad_norm": 279.31036376953125, + "learning_rate": 1.9741744080575762e-06, + "loss": 35.0547, + "step": 10599 + }, + { + "epoch": 0.10033983017957043, + "grad_norm": 401.4710388183594, + "learning_rate": 1.974167485038618e-06, + "loss": 36.7266, + "step": 10600 + }, + { + "epoch": 0.10034929620128548, + "grad_norm": 907.2523803710938, + "learning_rate": 1.974160561104006e-06, + "loss": 49.4766, + "step": 10601 + }, + { + "epoch": 0.10035876222300054, + "grad_norm": 3.494854211807251, + "learning_rate": 1.974153636253746e-06, + "loss": 1.1094, + "step": 10602 + }, + { + "epoch": 0.1003682282447156, + "grad_norm": 851.9781494140625, + "learning_rate": 1.9741467104878454e-06, + "loss": 36.6797, + "step": 10603 + }, + { + "epoch": 0.10037769426643064, + "grad_norm": 313.6498107910156, + "learning_rate": 1.97413978380631e-06, + "loss": 22.4297, + "step": 10604 + }, + { + "epoch": 0.1003871602881457, + "grad_norm": 370.8446044921875, + "learning_rate": 1.9741328562091465e-06, + "loss": 37.25, + "step": 10605 + }, + { + "epoch": 0.10039662630986075, + "grad_norm": 392.18585205078125, + "learning_rate": 1.974125927696362e-06, + "loss": 23.2969, + "step": 10606 + }, + { + "epoch": 0.10040609233157581, + "grad_norm": 582.4617309570312, + "learning_rate": 1.974118998267962e-06, + "loss": 59.2969, + "step": 10607 + }, + { + "epoch": 0.10041555835329086, + "grad_norm": 417.8567810058594, + "learning_rate": 1.9741120679239534e-06, + "loss": 15.3477, + "step": 10608 + }, + { + "epoch": 0.10042502437500592, + "grad_norm": 718.2811279296875, + "learning_rate": 1.974105136664343e-06, + "loss": 32.4766, + "step": 10609 + }, + { + "epoch": 0.10043449039672098, + "grad_norm": 314.06793212890625, + "learning_rate": 1.974098204489137e-06, + "loss": 27.4688, + "step": 10610 + }, + { + "epoch": 0.10044395641843602, + "grad_norm": 2.930375099182129, + "learning_rate": 1.9740912713983423e-06, + "loss": 0.9126, + "step": 10611 + }, + { + "epoch": 0.10045342244015108, + "grad_norm": 169.0237579345703, + "learning_rate": 1.974084337391965e-06, + "loss": 16.5703, + "step": 10612 + }, + { + "epoch": 0.10046288846186613, + "grad_norm": 514.50146484375, + "learning_rate": 1.974077402470012e-06, + "loss": 18.5859, + "step": 10613 + }, + { + "epoch": 0.10047235448358119, + "grad_norm": 1485.6439208984375, + "learning_rate": 1.9740704666324898e-06, + "loss": 34.7891, + "step": 10614 + }, + { + "epoch": 0.10048182050529623, + "grad_norm": 754.9489135742188, + "learning_rate": 1.974063529879404e-06, + "loss": 25.6406, + "step": 10615 + }, + { + "epoch": 0.1004912865270113, + "grad_norm": 325.19146728515625, + "learning_rate": 1.9740565922107625e-06, + "loss": 22.8359, + "step": 10616 + }, + { + "epoch": 0.10050075254872634, + "grad_norm": 420.18408203125, + "learning_rate": 1.974049653626571e-06, + "loss": 30.0547, + "step": 10617 + }, + { + "epoch": 0.1005102185704414, + "grad_norm": 328.6214599609375, + "learning_rate": 1.974042714126836e-06, + "loss": 20.2891, + "step": 10618 + }, + { + "epoch": 0.10051968459215646, + "grad_norm": 416.1221923828125, + "learning_rate": 1.974035773711564e-06, + "loss": 22.8125, + "step": 10619 + }, + { + "epoch": 0.1005291506138715, + "grad_norm": 295.1413269042969, + "learning_rate": 1.9740288323807625e-06, + "loss": 23.0312, + "step": 10620 + }, + { + "epoch": 0.10053861663558657, + "grad_norm": 483.65618896484375, + "learning_rate": 1.974021890134437e-06, + "loss": 49.375, + "step": 10621 + }, + { + "epoch": 0.10054808265730161, + "grad_norm": 409.1482849121094, + "learning_rate": 1.974014946972594e-06, + "loss": 40.0938, + "step": 10622 + }, + { + "epoch": 0.10055754867901667, + "grad_norm": 384.18988037109375, + "learning_rate": 1.9740080028952402e-06, + "loss": 27.2266, + "step": 10623 + }, + { + "epoch": 0.10056701470073172, + "grad_norm": 426.2543029785156, + "learning_rate": 1.9740010579023827e-06, + "loss": 41.5781, + "step": 10624 + }, + { + "epoch": 0.10057648072244678, + "grad_norm": 779.4286499023438, + "learning_rate": 1.9739941119940275e-06, + "loss": 8.5586, + "step": 10625 + }, + { + "epoch": 0.10058594674416182, + "grad_norm": 518.2972412109375, + "learning_rate": 1.9739871651701808e-06, + "loss": 41.1484, + "step": 10626 + }, + { + "epoch": 0.10059541276587688, + "grad_norm": 188.68101501464844, + "learning_rate": 1.97398021743085e-06, + "loss": 19.0938, + "step": 10627 + }, + { + "epoch": 0.10060487878759194, + "grad_norm": 1193.4737548828125, + "learning_rate": 1.9739732687760407e-06, + "loss": 54.6875, + "step": 10628 + }, + { + "epoch": 0.10061434480930699, + "grad_norm": 288.6430358886719, + "learning_rate": 1.9739663192057604e-06, + "loss": 18.1875, + "step": 10629 + }, + { + "epoch": 0.10062381083102205, + "grad_norm": 233.88282775878906, + "learning_rate": 1.9739593687200145e-06, + "loss": 18.8906, + "step": 10630 + }, + { + "epoch": 0.1006332768527371, + "grad_norm": 303.88177490234375, + "learning_rate": 1.9739524173188107e-06, + "loss": 28.1484, + "step": 10631 + }, + { + "epoch": 0.10064274287445216, + "grad_norm": 2106.4033203125, + "learning_rate": 1.9739454650021548e-06, + "loss": 42.3633, + "step": 10632 + }, + { + "epoch": 0.1006522088961672, + "grad_norm": 1197.361328125, + "learning_rate": 1.9739385117700532e-06, + "loss": 48.2539, + "step": 10633 + }, + { + "epoch": 0.10066167491788226, + "grad_norm": 287.88623046875, + "learning_rate": 1.973931557622513e-06, + "loss": 23.2969, + "step": 10634 + }, + { + "epoch": 0.10067114093959731, + "grad_norm": 608.188720703125, + "learning_rate": 1.973924602559541e-06, + "loss": 63.0938, + "step": 10635 + }, + { + "epoch": 0.10068060696131237, + "grad_norm": 279.9298095703125, + "learning_rate": 1.9739176465811427e-06, + "loss": 28.7656, + "step": 10636 + }, + { + "epoch": 0.10069007298302743, + "grad_norm": 173.24404907226562, + "learning_rate": 1.9739106896873253e-06, + "loss": 19.3125, + "step": 10637 + }, + { + "epoch": 0.10069953900474247, + "grad_norm": 448.0154724121094, + "learning_rate": 1.9739037318780953e-06, + "loss": 19.4141, + "step": 10638 + }, + { + "epoch": 0.10070900502645753, + "grad_norm": 304.6627502441406, + "learning_rate": 1.973896773153459e-06, + "loss": 29.9531, + "step": 10639 + }, + { + "epoch": 0.10071847104817258, + "grad_norm": 423.02191162109375, + "learning_rate": 1.973889813513423e-06, + "loss": 40.9922, + "step": 10640 + }, + { + "epoch": 0.10072793706988764, + "grad_norm": 675.7747192382812, + "learning_rate": 1.973882852957994e-06, + "loss": 37.9258, + "step": 10641 + }, + { + "epoch": 0.10073740309160269, + "grad_norm": 317.4502258300781, + "learning_rate": 1.9738758914871784e-06, + "loss": 36.1719, + "step": 10642 + }, + { + "epoch": 0.10074686911331775, + "grad_norm": 265.65771484375, + "learning_rate": 1.973868929100983e-06, + "loss": 20.5859, + "step": 10643 + }, + { + "epoch": 0.10075633513503279, + "grad_norm": 490.9431457519531, + "learning_rate": 1.973861965799414e-06, + "loss": 30.4922, + "step": 10644 + }, + { + "epoch": 0.10076580115674785, + "grad_norm": 245.01992797851562, + "learning_rate": 1.9738550015824783e-06, + "loss": 21.5234, + "step": 10645 + }, + { + "epoch": 0.10077526717846291, + "grad_norm": 462.2872009277344, + "learning_rate": 1.973848036450182e-06, + "loss": 28.4688, + "step": 10646 + }, + { + "epoch": 0.10078473320017796, + "grad_norm": 411.36083984375, + "learning_rate": 1.9738410704025323e-06, + "loss": 28.1406, + "step": 10647 + }, + { + "epoch": 0.10079419922189302, + "grad_norm": 486.3639831542969, + "learning_rate": 1.9738341034395353e-06, + "loss": 43.0234, + "step": 10648 + }, + { + "epoch": 0.10080366524360806, + "grad_norm": 796.4903564453125, + "learning_rate": 1.973827135561197e-06, + "loss": 42.6953, + "step": 10649 + }, + { + "epoch": 0.10081313126532312, + "grad_norm": 432.8517761230469, + "learning_rate": 1.973820166767525e-06, + "loss": 29.1719, + "step": 10650 + }, + { + "epoch": 0.10082259728703817, + "grad_norm": 1389.22802734375, + "learning_rate": 1.9738131970585253e-06, + "loss": 45.1875, + "step": 10651 + }, + { + "epoch": 0.10083206330875323, + "grad_norm": 460.63262939453125, + "learning_rate": 1.9738062264342047e-06, + "loss": 32.9688, + "step": 10652 + }, + { + "epoch": 0.10084152933046829, + "grad_norm": 329.6923828125, + "learning_rate": 1.9737992548945694e-06, + "loss": 24.2812, + "step": 10653 + }, + { + "epoch": 0.10085099535218334, + "grad_norm": 611.152587890625, + "learning_rate": 1.9737922824396264e-06, + "loss": 42.7344, + "step": 10654 + }, + { + "epoch": 0.1008604613738984, + "grad_norm": 370.48974609375, + "learning_rate": 1.9737853090693818e-06, + "loss": 34.3359, + "step": 10655 + }, + { + "epoch": 0.10086992739561344, + "grad_norm": 285.9647216796875, + "learning_rate": 1.973778334783842e-06, + "loss": 21.8086, + "step": 10656 + }, + { + "epoch": 0.1008793934173285, + "grad_norm": 697.1549682617188, + "learning_rate": 1.9737713595830145e-06, + "loss": 60.5625, + "step": 10657 + }, + { + "epoch": 0.10088885943904355, + "grad_norm": 409.978271484375, + "learning_rate": 1.973764383466905e-06, + "loss": 24.3945, + "step": 10658 + }, + { + "epoch": 0.10089832546075861, + "grad_norm": 327.5806884765625, + "learning_rate": 1.9737574064355205e-06, + "loss": 17.918, + "step": 10659 + }, + { + "epoch": 0.10090779148247365, + "grad_norm": 2.785609006881714, + "learning_rate": 1.9737504284888674e-06, + "loss": 0.8062, + "step": 10660 + }, + { + "epoch": 0.10091725750418871, + "grad_norm": 408.5553283691406, + "learning_rate": 1.973743449626952e-06, + "loss": 55.3594, + "step": 10661 + }, + { + "epoch": 0.10092672352590377, + "grad_norm": 661.19677734375, + "learning_rate": 1.9737364698497815e-06, + "loss": 51.1875, + "step": 10662 + }, + { + "epoch": 0.10093618954761882, + "grad_norm": 282.2568664550781, + "learning_rate": 1.9737294891573618e-06, + "loss": 16.7344, + "step": 10663 + }, + { + "epoch": 0.10094565556933388, + "grad_norm": 358.90899658203125, + "learning_rate": 1.9737225075496997e-06, + "loss": 23.7812, + "step": 10664 + }, + { + "epoch": 0.10095512159104893, + "grad_norm": 728.069091796875, + "learning_rate": 1.9737155250268015e-06, + "loss": 31.3516, + "step": 10665 + }, + { + "epoch": 0.10096458761276399, + "grad_norm": 399.13763427734375, + "learning_rate": 1.9737085415886746e-06, + "loss": 11.0391, + "step": 10666 + }, + { + "epoch": 0.10097405363447903, + "grad_norm": 269.24957275390625, + "learning_rate": 1.9737015572353247e-06, + "loss": 19.5859, + "step": 10667 + }, + { + "epoch": 0.10098351965619409, + "grad_norm": 623.5556030273438, + "learning_rate": 1.9736945719667587e-06, + "loss": 15.3359, + "step": 10668 + }, + { + "epoch": 0.10099298567790914, + "grad_norm": 219.8771209716797, + "learning_rate": 1.9736875857829833e-06, + "loss": 21.3594, + "step": 10669 + }, + { + "epoch": 0.1010024516996242, + "grad_norm": 225.57615661621094, + "learning_rate": 1.973680598684005e-06, + "loss": 25.0703, + "step": 10670 + }, + { + "epoch": 0.10101191772133926, + "grad_norm": 1792.068603515625, + "learning_rate": 1.97367361066983e-06, + "loss": 30.9922, + "step": 10671 + }, + { + "epoch": 0.1010213837430543, + "grad_norm": 3.4837355613708496, + "learning_rate": 1.973666621740465e-06, + "loss": 0.9434, + "step": 10672 + }, + { + "epoch": 0.10103084976476936, + "grad_norm": 218.24822998046875, + "learning_rate": 1.973659631895917e-06, + "loss": 17.0078, + "step": 10673 + }, + { + "epoch": 0.10104031578648441, + "grad_norm": 496.6743469238281, + "learning_rate": 1.973652641136192e-06, + "loss": 19.5312, + "step": 10674 + }, + { + "epoch": 0.10104978180819947, + "grad_norm": 275.8001403808594, + "learning_rate": 1.9736456494612975e-06, + "loss": 16.0078, + "step": 10675 + }, + { + "epoch": 0.10105924782991452, + "grad_norm": 632.2824096679688, + "learning_rate": 1.9736386568712387e-06, + "loss": 46.9531, + "step": 10676 + }, + { + "epoch": 0.10106871385162958, + "grad_norm": 256.0386047363281, + "learning_rate": 1.9736316633660235e-06, + "loss": 25.6797, + "step": 10677 + }, + { + "epoch": 0.10107817987334462, + "grad_norm": 414.1674499511719, + "learning_rate": 1.9736246689456574e-06, + "loss": 37.7031, + "step": 10678 + }, + { + "epoch": 0.10108764589505968, + "grad_norm": 360.83160400390625, + "learning_rate": 1.9736176736101476e-06, + "loss": 42.6719, + "step": 10679 + }, + { + "epoch": 0.10109711191677474, + "grad_norm": 278.8284606933594, + "learning_rate": 1.973610677359501e-06, + "loss": 22.4922, + "step": 10680 + }, + { + "epoch": 0.10110657793848979, + "grad_norm": 156.84400939941406, + "learning_rate": 1.9736036801937226e-06, + "loss": 15.3164, + "step": 10681 + }, + { + "epoch": 0.10111604396020485, + "grad_norm": 561.5140991210938, + "learning_rate": 1.973596682112821e-06, + "loss": 58.9062, + "step": 10682 + }, + { + "epoch": 0.1011255099819199, + "grad_norm": 227.15115356445312, + "learning_rate": 1.9735896831168016e-06, + "loss": 28.3828, + "step": 10683 + }, + { + "epoch": 0.10113497600363495, + "grad_norm": 175.51441955566406, + "learning_rate": 1.9735826832056715e-06, + "loss": 20.0781, + "step": 10684 + }, + { + "epoch": 0.10114444202535, + "grad_norm": 199.6274871826172, + "learning_rate": 1.9735756823794366e-06, + "loss": 16.8906, + "step": 10685 + }, + { + "epoch": 0.10115390804706506, + "grad_norm": 3.341701030731201, + "learning_rate": 1.973568680638104e-06, + "loss": 1.0479, + "step": 10686 + }, + { + "epoch": 0.10116337406878011, + "grad_norm": 281.3661193847656, + "learning_rate": 1.9735616779816805e-06, + "loss": 19.0469, + "step": 10687 + }, + { + "epoch": 0.10117284009049517, + "grad_norm": 441.92596435546875, + "learning_rate": 1.973554674410172e-06, + "loss": 42.1562, + "step": 10688 + }, + { + "epoch": 0.10118230611221023, + "grad_norm": 310.4792785644531, + "learning_rate": 1.9735476699235855e-06, + "loss": 34.9844, + "step": 10689 + }, + { + "epoch": 0.10119177213392527, + "grad_norm": 464.5519104003906, + "learning_rate": 1.9735406645219277e-06, + "loss": 28.2812, + "step": 10690 + }, + { + "epoch": 0.10120123815564033, + "grad_norm": 3.1908648014068604, + "learning_rate": 1.9735336582052045e-06, + "loss": 1.0747, + "step": 10691 + }, + { + "epoch": 0.10121070417735538, + "grad_norm": 516.6564331054688, + "learning_rate": 1.9735266509734235e-06, + "loss": 32.5859, + "step": 10692 + }, + { + "epoch": 0.10122017019907044, + "grad_norm": 463.9186096191406, + "learning_rate": 1.9735196428265904e-06, + "loss": 19.8672, + "step": 10693 + }, + { + "epoch": 0.10122963622078548, + "grad_norm": 187.6327362060547, + "learning_rate": 1.9735126337647126e-06, + "loss": 18.8828, + "step": 10694 + }, + { + "epoch": 0.10123910224250054, + "grad_norm": 303.098388671875, + "learning_rate": 1.973505623787796e-06, + "loss": 20.8711, + "step": 10695 + }, + { + "epoch": 0.1012485682642156, + "grad_norm": 397.9830322265625, + "learning_rate": 1.9734986128958473e-06, + "loss": 39.6953, + "step": 10696 + }, + { + "epoch": 0.10125803428593065, + "grad_norm": 458.7879943847656, + "learning_rate": 1.973491601088874e-06, + "loss": 45.4062, + "step": 10697 + }, + { + "epoch": 0.10126750030764571, + "grad_norm": 508.9710998535156, + "learning_rate": 1.973484588366881e-06, + "loss": 26.8125, + "step": 10698 + }, + { + "epoch": 0.10127696632936076, + "grad_norm": 229.2833709716797, + "learning_rate": 1.9734775747298764e-06, + "loss": 17.2266, + "step": 10699 + }, + { + "epoch": 0.10128643235107582, + "grad_norm": 438.72442626953125, + "learning_rate": 1.9734705601778657e-06, + "loss": 22.9102, + "step": 10700 + }, + { + "epoch": 0.10129589837279086, + "grad_norm": 555.96435546875, + "learning_rate": 1.9734635447108564e-06, + "loss": 23.457, + "step": 10701 + }, + { + "epoch": 0.10130536439450592, + "grad_norm": 664.8400268554688, + "learning_rate": 1.9734565283288547e-06, + "loss": 26.4648, + "step": 10702 + }, + { + "epoch": 0.10131483041622097, + "grad_norm": 335.2774353027344, + "learning_rate": 1.973449511031867e-06, + "loss": 36.9531, + "step": 10703 + }, + { + "epoch": 0.10132429643793603, + "grad_norm": 172.12205505371094, + "learning_rate": 1.9734424928199004e-06, + "loss": 20.125, + "step": 10704 + }, + { + "epoch": 0.10133376245965109, + "grad_norm": 753.755615234375, + "learning_rate": 1.9734354736929605e-06, + "loss": 18.8047, + "step": 10705 + }, + { + "epoch": 0.10134322848136613, + "grad_norm": 508.3937683105469, + "learning_rate": 1.973428453651055e-06, + "loss": 58.2188, + "step": 10706 + }, + { + "epoch": 0.1013526945030812, + "grad_norm": 3.337949514389038, + "learning_rate": 1.9734214326941904e-06, + "loss": 0.8726, + "step": 10707 + }, + { + "epoch": 0.10136216052479624, + "grad_norm": 605.645751953125, + "learning_rate": 1.9734144108223724e-06, + "loss": 39.75, + "step": 10708 + }, + { + "epoch": 0.1013716265465113, + "grad_norm": 317.1656799316406, + "learning_rate": 1.973407388035609e-06, + "loss": 15.918, + "step": 10709 + }, + { + "epoch": 0.10138109256822635, + "grad_norm": 777.4223022460938, + "learning_rate": 1.973400364333905e-06, + "loss": 62.5938, + "step": 10710 + }, + { + "epoch": 0.1013905585899414, + "grad_norm": 941.7799072265625, + "learning_rate": 1.9733933397172685e-06, + "loss": 41.7266, + "step": 10711 + }, + { + "epoch": 0.10140002461165645, + "grad_norm": 2.9774844646453857, + "learning_rate": 1.9733863141857053e-06, + "loss": 0.7925, + "step": 10712 + }, + { + "epoch": 0.10140949063337151, + "grad_norm": 279.4832763671875, + "learning_rate": 1.9733792877392226e-06, + "loss": 28.4375, + "step": 10713 + }, + { + "epoch": 0.10141895665508657, + "grad_norm": 387.12420654296875, + "learning_rate": 1.973372260377827e-06, + "loss": 32.4766, + "step": 10714 + }, + { + "epoch": 0.10142842267680162, + "grad_norm": 553.7158203125, + "learning_rate": 1.973365232101524e-06, + "loss": 25.5781, + "step": 10715 + }, + { + "epoch": 0.10143788869851668, + "grad_norm": 406.6733703613281, + "learning_rate": 1.9733582029103214e-06, + "loss": 31.0469, + "step": 10716 + }, + { + "epoch": 0.10144735472023172, + "grad_norm": 183.98226928710938, + "learning_rate": 1.9733511728042255e-06, + "loss": 16.5469, + "step": 10717 + }, + { + "epoch": 0.10145682074194678, + "grad_norm": 587.1603393554688, + "learning_rate": 1.9733441417832426e-06, + "loss": 23.5547, + "step": 10718 + }, + { + "epoch": 0.10146628676366183, + "grad_norm": 281.7250061035156, + "learning_rate": 1.97333710984738e-06, + "loss": 38.75, + "step": 10719 + }, + { + "epoch": 0.10147575278537689, + "grad_norm": 551.3875122070312, + "learning_rate": 1.973330076996643e-06, + "loss": 25.1406, + "step": 10720 + }, + { + "epoch": 0.10148521880709194, + "grad_norm": 169.3487548828125, + "learning_rate": 1.9733230432310397e-06, + "loss": 10.3867, + "step": 10721 + }, + { + "epoch": 0.101494684828807, + "grad_norm": 499.8358459472656, + "learning_rate": 1.973316008550576e-06, + "loss": 44.4531, + "step": 10722 + }, + { + "epoch": 0.10150415085052206, + "grad_norm": 3.4561665058135986, + "learning_rate": 1.9733089729552585e-06, + "loss": 0.957, + "step": 10723 + }, + { + "epoch": 0.1015136168722371, + "grad_norm": 254.56167602539062, + "learning_rate": 1.9733019364450934e-06, + "loss": 20.5547, + "step": 10724 + }, + { + "epoch": 0.10152308289395216, + "grad_norm": 644.0747680664062, + "learning_rate": 1.9732948990200887e-06, + "loss": 54.375, + "step": 10725 + }, + { + "epoch": 0.10153254891566721, + "grad_norm": 385.3824462890625, + "learning_rate": 1.97328786068025e-06, + "loss": 22.2578, + "step": 10726 + }, + { + "epoch": 0.10154201493738227, + "grad_norm": 782.6631469726562, + "learning_rate": 1.9732808214255832e-06, + "loss": 35.9531, + "step": 10727 + }, + { + "epoch": 0.10155148095909731, + "grad_norm": 350.6466369628906, + "learning_rate": 1.973273781256096e-06, + "loss": 25.375, + "step": 10728 + }, + { + "epoch": 0.10156094698081237, + "grad_norm": 615.5086059570312, + "learning_rate": 1.973266740171795e-06, + "loss": 27.3984, + "step": 10729 + }, + { + "epoch": 0.10157041300252742, + "grad_norm": 195.8678741455078, + "learning_rate": 1.9732596981726867e-06, + "loss": 13.4453, + "step": 10730 + }, + { + "epoch": 0.10157987902424248, + "grad_norm": 376.080322265625, + "learning_rate": 1.9732526552587776e-06, + "loss": 43.9375, + "step": 10731 + }, + { + "epoch": 0.10158934504595754, + "grad_norm": 167.0189208984375, + "learning_rate": 1.9732456114300738e-06, + "loss": 17.9805, + "step": 10732 + }, + { + "epoch": 0.10159881106767259, + "grad_norm": 430.85760498046875, + "learning_rate": 1.9732385666865832e-06, + "loss": 39.6875, + "step": 10733 + }, + { + "epoch": 0.10160827708938765, + "grad_norm": 292.11053466796875, + "learning_rate": 1.973231521028311e-06, + "loss": 19.0312, + "step": 10734 + }, + { + "epoch": 0.10161774311110269, + "grad_norm": 185.2101593017578, + "learning_rate": 1.973224474455265e-06, + "loss": 19.6953, + "step": 10735 + }, + { + "epoch": 0.10162720913281775, + "grad_norm": 472.912841796875, + "learning_rate": 1.973217426967451e-06, + "loss": 19.4453, + "step": 10736 + }, + { + "epoch": 0.1016366751545328, + "grad_norm": 294.7665100097656, + "learning_rate": 1.973210378564876e-06, + "loss": 19.8047, + "step": 10737 + }, + { + "epoch": 0.10164614117624786, + "grad_norm": 219.18197631835938, + "learning_rate": 1.9732033292475464e-06, + "loss": 19.8281, + "step": 10738 + }, + { + "epoch": 0.10165560719796292, + "grad_norm": 3.7228434085845947, + "learning_rate": 1.973196279015469e-06, + "loss": 1.001, + "step": 10739 + }, + { + "epoch": 0.10166507321967796, + "grad_norm": 353.1515197753906, + "learning_rate": 1.973189227868651e-06, + "loss": 19.3828, + "step": 10740 + }, + { + "epoch": 0.10167453924139302, + "grad_norm": 326.2034912109375, + "learning_rate": 1.9731821758070977e-06, + "loss": 25.6172, + "step": 10741 + }, + { + "epoch": 0.10168400526310807, + "grad_norm": 765.2726440429688, + "learning_rate": 1.9731751228308167e-06, + "loss": 46.1094, + "step": 10742 + }, + { + "epoch": 0.10169347128482313, + "grad_norm": 389.43524169921875, + "learning_rate": 1.9731680689398146e-06, + "loss": 35.2969, + "step": 10743 + }, + { + "epoch": 0.10170293730653818, + "grad_norm": 456.8645324707031, + "learning_rate": 1.9731610141340974e-06, + "loss": 25.2266, + "step": 10744 + }, + { + "epoch": 0.10171240332825324, + "grad_norm": 297.10345458984375, + "learning_rate": 1.9731539584136728e-06, + "loss": 17.8828, + "step": 10745 + }, + { + "epoch": 0.10172186934996828, + "grad_norm": 172.04150390625, + "learning_rate": 1.973146901778546e-06, + "loss": 17.7734, + "step": 10746 + }, + { + "epoch": 0.10173133537168334, + "grad_norm": 420.06500244140625, + "learning_rate": 1.973139844228725e-06, + "loss": 45.7656, + "step": 10747 + }, + { + "epoch": 0.1017408013933984, + "grad_norm": 426.1719970703125, + "learning_rate": 1.9731327857642156e-06, + "loss": 28.8438, + "step": 10748 + }, + { + "epoch": 0.10175026741511345, + "grad_norm": 3.1038930416107178, + "learning_rate": 1.973125726385025e-06, + "loss": 0.9253, + "step": 10749 + }, + { + "epoch": 0.10175973343682851, + "grad_norm": 659.4366455078125, + "learning_rate": 1.9731186660911593e-06, + "loss": 11.875, + "step": 10750 + }, + { + "epoch": 0.10176919945854355, + "grad_norm": 1159.806640625, + "learning_rate": 1.9731116048826252e-06, + "loss": 48.6406, + "step": 10751 + }, + { + "epoch": 0.10177866548025861, + "grad_norm": 378.3949890136719, + "learning_rate": 1.9731045427594294e-06, + "loss": 35.5625, + "step": 10752 + }, + { + "epoch": 0.10178813150197366, + "grad_norm": 902.2179565429688, + "learning_rate": 1.973097479721579e-06, + "loss": 60.9922, + "step": 10753 + }, + { + "epoch": 0.10179759752368872, + "grad_norm": 222.41650390625, + "learning_rate": 1.9730904157690804e-06, + "loss": 26.5156, + "step": 10754 + }, + { + "epoch": 0.10180706354540377, + "grad_norm": 593.7284545898438, + "learning_rate": 1.97308335090194e-06, + "loss": 33.7891, + "step": 10755 + }, + { + "epoch": 0.10181652956711883, + "grad_norm": 558.3494873046875, + "learning_rate": 1.973076285120164e-06, + "loss": 24.9336, + "step": 10756 + }, + { + "epoch": 0.10182599558883389, + "grad_norm": 757.4963989257812, + "learning_rate": 1.97306921842376e-06, + "loss": 48.3906, + "step": 10757 + }, + { + "epoch": 0.10183546161054893, + "grad_norm": 262.8924865722656, + "learning_rate": 1.9730621508127344e-06, + "loss": 20.8594, + "step": 10758 + }, + { + "epoch": 0.10184492763226399, + "grad_norm": 143.2061767578125, + "learning_rate": 1.9730550822870935e-06, + "loss": 21.0312, + "step": 10759 + }, + { + "epoch": 0.10185439365397904, + "grad_norm": 301.7262268066406, + "learning_rate": 1.973048012846844e-06, + "loss": 34.5156, + "step": 10760 + }, + { + "epoch": 0.1018638596756941, + "grad_norm": 217.05828857421875, + "learning_rate": 1.973040942491993e-06, + "loss": 20.1406, + "step": 10761 + }, + { + "epoch": 0.10187332569740915, + "grad_norm": 237.01048278808594, + "learning_rate": 1.9730338712225466e-06, + "loss": 19.75, + "step": 10762 + }, + { + "epoch": 0.1018827917191242, + "grad_norm": 336.124267578125, + "learning_rate": 1.9730267990385115e-06, + "loss": 21.0625, + "step": 10763 + }, + { + "epoch": 0.10189225774083925, + "grad_norm": 249.41673278808594, + "learning_rate": 1.973019725939895e-06, + "loss": 18.7969, + "step": 10764 + }, + { + "epoch": 0.10190172376255431, + "grad_norm": 470.5046081542969, + "learning_rate": 1.973012651926703e-06, + "loss": 22.1797, + "step": 10765 + }, + { + "epoch": 0.10191118978426937, + "grad_norm": 360.1184997558594, + "learning_rate": 1.9730055769989423e-06, + "loss": 23.9688, + "step": 10766 + }, + { + "epoch": 0.10192065580598442, + "grad_norm": 279.7814025878906, + "learning_rate": 1.9729985011566197e-06, + "loss": 21.6797, + "step": 10767 + }, + { + "epoch": 0.10193012182769948, + "grad_norm": 500.1244201660156, + "learning_rate": 1.9729914243997418e-06, + "loss": 18.2148, + "step": 10768 + }, + { + "epoch": 0.10193958784941452, + "grad_norm": 374.2088317871094, + "learning_rate": 1.9729843467283155e-06, + "loss": 17.9297, + "step": 10769 + }, + { + "epoch": 0.10194905387112958, + "grad_norm": 255.31495666503906, + "learning_rate": 1.9729772681423466e-06, + "loss": 24.9062, + "step": 10770 + }, + { + "epoch": 0.10195851989284463, + "grad_norm": 264.9049377441406, + "learning_rate": 1.972970188641843e-06, + "loss": 28.0781, + "step": 10771 + }, + { + "epoch": 0.10196798591455969, + "grad_norm": 608.18505859375, + "learning_rate": 1.9729631082268104e-06, + "loss": 25.1211, + "step": 10772 + }, + { + "epoch": 0.10197745193627474, + "grad_norm": 317.8977966308594, + "learning_rate": 1.972956026897256e-06, + "loss": 39.375, + "step": 10773 + }, + { + "epoch": 0.1019869179579898, + "grad_norm": 394.0249938964844, + "learning_rate": 1.972948944653186e-06, + "loss": 11.0469, + "step": 10774 + }, + { + "epoch": 0.10199638397970485, + "grad_norm": 831.8424682617188, + "learning_rate": 1.9729418614946074e-06, + "loss": 33.3438, + "step": 10775 + }, + { + "epoch": 0.1020058500014199, + "grad_norm": 911.8816528320312, + "learning_rate": 1.9729347774215264e-06, + "loss": 50.5859, + "step": 10776 + }, + { + "epoch": 0.10201531602313496, + "grad_norm": 429.0124206542969, + "learning_rate": 1.9729276924339505e-06, + "loss": 46.9062, + "step": 10777 + }, + { + "epoch": 0.10202478204485001, + "grad_norm": 1202.1380615234375, + "learning_rate": 1.9729206065318856e-06, + "loss": 61.8594, + "step": 10778 + }, + { + "epoch": 0.10203424806656507, + "grad_norm": 539.4385986328125, + "learning_rate": 1.9729135197153386e-06, + "loss": 30.2812, + "step": 10779 + }, + { + "epoch": 0.10204371408828011, + "grad_norm": 272.08056640625, + "learning_rate": 1.9729064319843166e-06, + "loss": 29.4688, + "step": 10780 + }, + { + "epoch": 0.10205318010999517, + "grad_norm": 306.85858154296875, + "learning_rate": 1.9728993433388255e-06, + "loss": 8.832, + "step": 10781 + }, + { + "epoch": 0.10206264613171023, + "grad_norm": 336.7789001464844, + "learning_rate": 1.972892253778872e-06, + "loss": 26.7969, + "step": 10782 + }, + { + "epoch": 0.10207211215342528, + "grad_norm": 212.7386474609375, + "learning_rate": 1.9728851633044637e-06, + "loss": 45.2656, + "step": 10783 + }, + { + "epoch": 0.10208157817514034, + "grad_norm": 186.25457763671875, + "learning_rate": 1.9728780719156066e-06, + "loss": 10.0664, + "step": 10784 + }, + { + "epoch": 0.10209104419685539, + "grad_norm": 309.6431579589844, + "learning_rate": 1.972870979612307e-06, + "loss": 21.5234, + "step": 10785 + }, + { + "epoch": 0.10210051021857044, + "grad_norm": 343.0778503417969, + "learning_rate": 1.972863886394572e-06, + "loss": 30.4844, + "step": 10786 + }, + { + "epoch": 0.10210997624028549, + "grad_norm": 578.17724609375, + "learning_rate": 1.9728567922624084e-06, + "loss": 52.2656, + "step": 10787 + }, + { + "epoch": 0.10211944226200055, + "grad_norm": 645.7352905273438, + "learning_rate": 1.972849697215823e-06, + "loss": 32.4766, + "step": 10788 + }, + { + "epoch": 0.1021289082837156, + "grad_norm": 458.2528076171875, + "learning_rate": 1.9728426012548216e-06, + "loss": 38.3984, + "step": 10789 + }, + { + "epoch": 0.10213837430543066, + "grad_norm": 633.7694091796875, + "learning_rate": 1.9728355043794115e-06, + "loss": 48.5312, + "step": 10790 + }, + { + "epoch": 0.10214784032714572, + "grad_norm": 240.54690551757812, + "learning_rate": 1.9728284065895996e-06, + "loss": 23.0625, + "step": 10791 + }, + { + "epoch": 0.10215730634886076, + "grad_norm": 182.63925170898438, + "learning_rate": 1.9728213078853924e-06, + "loss": 16.5781, + "step": 10792 + }, + { + "epoch": 0.10216677237057582, + "grad_norm": 257.3024597167969, + "learning_rate": 1.972814208266796e-06, + "loss": 14.9297, + "step": 10793 + }, + { + "epoch": 0.10217623839229087, + "grad_norm": 761.2980346679688, + "learning_rate": 1.9728071077338177e-06, + "loss": 44.7812, + "step": 10794 + }, + { + "epoch": 0.10218570441400593, + "grad_norm": 601.8558959960938, + "learning_rate": 1.9728000062864642e-06, + "loss": 24.25, + "step": 10795 + }, + { + "epoch": 0.10219517043572098, + "grad_norm": 551.3482055664062, + "learning_rate": 1.9727929039247416e-06, + "loss": 43.7812, + "step": 10796 + }, + { + "epoch": 0.10220463645743603, + "grad_norm": 252.5516815185547, + "learning_rate": 1.9727858006486575e-06, + "loss": 21.0547, + "step": 10797 + }, + { + "epoch": 0.10221410247915108, + "grad_norm": 3.5308241844177246, + "learning_rate": 1.9727786964582175e-06, + "loss": 1.063, + "step": 10798 + }, + { + "epoch": 0.10222356850086614, + "grad_norm": 205.68643188476562, + "learning_rate": 1.972771591353429e-06, + "loss": 20.1172, + "step": 10799 + }, + { + "epoch": 0.1022330345225812, + "grad_norm": 442.6908874511719, + "learning_rate": 1.9727644853342987e-06, + "loss": 43.4531, + "step": 10800 + }, + { + "epoch": 0.10224250054429625, + "grad_norm": 519.674072265625, + "learning_rate": 1.972757378400833e-06, + "loss": 55.8086, + "step": 10801 + }, + { + "epoch": 0.10225196656601131, + "grad_norm": 410.145751953125, + "learning_rate": 1.9727502705530386e-06, + "loss": 40.9688, + "step": 10802 + }, + { + "epoch": 0.10226143258772635, + "grad_norm": 524.9154052734375, + "learning_rate": 1.972743161790922e-06, + "loss": 8.7695, + "step": 10803 + }, + { + "epoch": 0.10227089860944141, + "grad_norm": 252.77191162109375, + "learning_rate": 1.97273605211449e-06, + "loss": 22.9297, + "step": 10804 + }, + { + "epoch": 0.10228036463115646, + "grad_norm": 281.7939453125, + "learning_rate": 1.97272894152375e-06, + "loss": 19.0156, + "step": 10805 + }, + { + "epoch": 0.10228983065287152, + "grad_norm": 342.2960205078125, + "learning_rate": 1.9727218300187075e-06, + "loss": 22.8359, + "step": 10806 + }, + { + "epoch": 0.10229929667458657, + "grad_norm": 680.4656982421875, + "learning_rate": 1.97271471759937e-06, + "loss": 19.5, + "step": 10807 + }, + { + "epoch": 0.10230876269630163, + "grad_norm": 1994.168701171875, + "learning_rate": 1.972707604265744e-06, + "loss": 41.0625, + "step": 10808 + }, + { + "epoch": 0.10231822871801668, + "grad_norm": 350.072998046875, + "learning_rate": 1.972700490017836e-06, + "loss": 40.4844, + "step": 10809 + }, + { + "epoch": 0.10232769473973173, + "grad_norm": 346.6784362792969, + "learning_rate": 1.9726933748556533e-06, + "loss": 24.2852, + "step": 10810 + }, + { + "epoch": 0.10233716076144679, + "grad_norm": 2.8702890872955322, + "learning_rate": 1.9726862587792014e-06, + "loss": 0.8506, + "step": 10811 + }, + { + "epoch": 0.10234662678316184, + "grad_norm": 296.68426513671875, + "learning_rate": 1.972679141788488e-06, + "loss": 33.8047, + "step": 10812 + }, + { + "epoch": 0.1023560928048769, + "grad_norm": 279.8361511230469, + "learning_rate": 1.9726720238835198e-06, + "loss": 12.7031, + "step": 10813 + }, + { + "epoch": 0.10236555882659194, + "grad_norm": 14966.8642578125, + "learning_rate": 1.9726649050643023e-06, + "loss": 26.2969, + "step": 10814 + }, + { + "epoch": 0.102375024848307, + "grad_norm": 282.9493103027344, + "learning_rate": 1.972657785330844e-06, + "loss": 32.9688, + "step": 10815 + }, + { + "epoch": 0.10238449087002205, + "grad_norm": 802.9174194335938, + "learning_rate": 1.9726506646831502e-06, + "loss": 77.75, + "step": 10816 + }, + { + "epoch": 0.10239395689173711, + "grad_norm": 577.6806030273438, + "learning_rate": 1.9726435431212283e-06, + "loss": 50.6094, + "step": 10817 + }, + { + "epoch": 0.10240342291345217, + "grad_norm": 364.91754150390625, + "learning_rate": 1.9726364206450846e-06, + "loss": 44.7812, + "step": 10818 + }, + { + "epoch": 0.10241288893516722, + "grad_norm": 2.9025769233703613, + "learning_rate": 1.9726292972547255e-06, + "loss": 0.9263, + "step": 10819 + }, + { + "epoch": 0.10242235495688227, + "grad_norm": 236.49420166015625, + "learning_rate": 1.9726221729501587e-06, + "loss": 31.5312, + "step": 10820 + }, + { + "epoch": 0.10243182097859732, + "grad_norm": 510.47271728515625, + "learning_rate": 1.97261504773139e-06, + "loss": 23.8438, + "step": 10821 + }, + { + "epoch": 0.10244128700031238, + "grad_norm": 3.306743621826172, + "learning_rate": 1.972607921598427e-06, + "loss": 0.96, + "step": 10822 + }, + { + "epoch": 0.10245075302202743, + "grad_norm": 758.3485107421875, + "learning_rate": 1.9726007945512754e-06, + "loss": 42.8594, + "step": 10823 + }, + { + "epoch": 0.10246021904374249, + "grad_norm": 1055.95654296875, + "learning_rate": 1.972593666589942e-06, + "loss": 43.7109, + "step": 10824 + }, + { + "epoch": 0.10246968506545755, + "grad_norm": 1080.4608154296875, + "learning_rate": 1.9725865377144343e-06, + "loss": 29.2031, + "step": 10825 + }, + { + "epoch": 0.10247915108717259, + "grad_norm": 387.9617614746094, + "learning_rate": 1.9725794079247583e-06, + "loss": 29.5469, + "step": 10826 + }, + { + "epoch": 0.10248861710888765, + "grad_norm": 323.5856628417969, + "learning_rate": 1.972572277220921e-06, + "loss": 29.3438, + "step": 10827 + }, + { + "epoch": 0.1024980831306027, + "grad_norm": 252.15255737304688, + "learning_rate": 1.972565145602929e-06, + "loss": 17.9922, + "step": 10828 + }, + { + "epoch": 0.10250754915231776, + "grad_norm": 529.5326538085938, + "learning_rate": 1.9725580130707895e-06, + "loss": 47.4766, + "step": 10829 + }, + { + "epoch": 0.1025170151740328, + "grad_norm": 633.2676391601562, + "learning_rate": 1.9725508796245083e-06, + "loss": 32.5078, + "step": 10830 + }, + { + "epoch": 0.10252648119574787, + "grad_norm": 248.73562622070312, + "learning_rate": 1.9725437452640925e-06, + "loss": 23.4531, + "step": 10831 + }, + { + "epoch": 0.10253594721746291, + "grad_norm": 3.68914794921875, + "learning_rate": 1.9725366099895487e-06, + "loss": 0.9048, + "step": 10832 + }, + { + "epoch": 0.10254541323917797, + "grad_norm": 289.8896789550781, + "learning_rate": 1.9725294738008843e-06, + "loss": 22.0469, + "step": 10833 + }, + { + "epoch": 0.10255487926089303, + "grad_norm": 726.901611328125, + "learning_rate": 1.972522336698105e-06, + "loss": 68.6875, + "step": 10834 + }, + { + "epoch": 0.10256434528260808, + "grad_norm": 362.4858093261719, + "learning_rate": 1.9725151986812182e-06, + "loss": 35.6875, + "step": 10835 + }, + { + "epoch": 0.10257381130432314, + "grad_norm": 243.82470703125, + "learning_rate": 1.97250805975023e-06, + "loss": 20.125, + "step": 10836 + }, + { + "epoch": 0.10258327732603818, + "grad_norm": 286.18585205078125, + "learning_rate": 1.972500919905148e-06, + "loss": 25.5781, + "step": 10837 + }, + { + "epoch": 0.10259274334775324, + "grad_norm": 295.905029296875, + "learning_rate": 1.9724937791459782e-06, + "loss": 17.7891, + "step": 10838 + }, + { + "epoch": 0.10260220936946829, + "grad_norm": 275.5675354003906, + "learning_rate": 1.9724866374727273e-06, + "loss": 24.4844, + "step": 10839 + }, + { + "epoch": 0.10261167539118335, + "grad_norm": 519.4190063476562, + "learning_rate": 1.972479494885403e-06, + "loss": 24.3281, + "step": 10840 + }, + { + "epoch": 0.1026211414128984, + "grad_norm": 437.50689697265625, + "learning_rate": 1.97247235138401e-06, + "loss": 34.9531, + "step": 10841 + }, + { + "epoch": 0.10263060743461346, + "grad_norm": 3.3383407592773438, + "learning_rate": 1.972465206968557e-06, + "loss": 0.9778, + "step": 10842 + }, + { + "epoch": 0.10264007345632851, + "grad_norm": 475.2577209472656, + "learning_rate": 1.97245806163905e-06, + "loss": 19.0391, + "step": 10843 + }, + { + "epoch": 0.10264953947804356, + "grad_norm": 3.287160873413086, + "learning_rate": 1.9724509153954955e-06, + "loss": 0.9097, + "step": 10844 + }, + { + "epoch": 0.10265900549975862, + "grad_norm": 760.5127563476562, + "learning_rate": 1.9724437682379005e-06, + "loss": 51.1719, + "step": 10845 + }, + { + "epoch": 0.10266847152147367, + "grad_norm": 302.37518310546875, + "learning_rate": 1.9724366201662715e-06, + "loss": 24.4688, + "step": 10846 + }, + { + "epoch": 0.10267793754318873, + "grad_norm": 543.580810546875, + "learning_rate": 1.972429471180615e-06, + "loss": 47.8125, + "step": 10847 + }, + { + "epoch": 0.10268740356490377, + "grad_norm": 490.3222961425781, + "learning_rate": 1.972422321280939e-06, + "loss": 31.1953, + "step": 10848 + }, + { + "epoch": 0.10269686958661883, + "grad_norm": 315.1514587402344, + "learning_rate": 1.9724151704672485e-06, + "loss": 24.0, + "step": 10849 + }, + { + "epoch": 0.10270633560833388, + "grad_norm": 419.42413330078125, + "learning_rate": 1.9724080187395513e-06, + "loss": 46.3438, + "step": 10850 + }, + { + "epoch": 0.10271580163004894, + "grad_norm": 572.6223754882812, + "learning_rate": 1.972400866097854e-06, + "loss": 57.7578, + "step": 10851 + }, + { + "epoch": 0.102725267651764, + "grad_norm": 604.9544677734375, + "learning_rate": 1.972393712542163e-06, + "loss": 72.8828, + "step": 10852 + }, + { + "epoch": 0.10273473367347905, + "grad_norm": 411.4793395996094, + "learning_rate": 1.972386558072485e-06, + "loss": 32.8438, + "step": 10853 + }, + { + "epoch": 0.1027441996951941, + "grad_norm": 379.739990234375, + "learning_rate": 1.9723794026888267e-06, + "loss": 23.6562, + "step": 10854 + }, + { + "epoch": 0.10275366571690915, + "grad_norm": 459.8060302734375, + "learning_rate": 1.9723722463911953e-06, + "loss": 33.0156, + "step": 10855 + }, + { + "epoch": 0.10276313173862421, + "grad_norm": 251.7030792236328, + "learning_rate": 1.9723650891795975e-06, + "loss": 19.4375, + "step": 10856 + }, + { + "epoch": 0.10277259776033926, + "grad_norm": 253.3026123046875, + "learning_rate": 1.9723579310540394e-06, + "loss": 22.8984, + "step": 10857 + }, + { + "epoch": 0.10278206378205432, + "grad_norm": 420.1177062988281, + "learning_rate": 1.9723507720145284e-06, + "loss": 38.7188, + "step": 10858 + }, + { + "epoch": 0.10279152980376936, + "grad_norm": 320.0526123046875, + "learning_rate": 1.9723436120610706e-06, + "loss": 20.4922, + "step": 10859 + }, + { + "epoch": 0.10280099582548442, + "grad_norm": 222.70547485351562, + "learning_rate": 1.972336451193673e-06, + "loss": 10.8789, + "step": 10860 + }, + { + "epoch": 0.10281046184719948, + "grad_norm": 275.14556884765625, + "learning_rate": 1.972329289412343e-06, + "loss": 22.2344, + "step": 10861 + }, + { + "epoch": 0.10281992786891453, + "grad_norm": 531.1494140625, + "learning_rate": 1.9723221267170864e-06, + "loss": 37.2656, + "step": 10862 + }, + { + "epoch": 0.10282939389062959, + "grad_norm": 354.58489990234375, + "learning_rate": 1.9723149631079098e-06, + "loss": 39.9609, + "step": 10863 + }, + { + "epoch": 0.10283885991234464, + "grad_norm": 170.89991760253906, + "learning_rate": 1.972307798584821e-06, + "loss": 18.3359, + "step": 10864 + }, + { + "epoch": 0.1028483259340597, + "grad_norm": 334.2052917480469, + "learning_rate": 1.9723006331478257e-06, + "loss": 22.375, + "step": 10865 + }, + { + "epoch": 0.10285779195577474, + "grad_norm": 393.3051452636719, + "learning_rate": 1.9722934667969313e-06, + "loss": 12.4414, + "step": 10866 + }, + { + "epoch": 0.1028672579774898, + "grad_norm": 748.0123291015625, + "learning_rate": 1.9722862995321443e-06, + "loss": 25.1953, + "step": 10867 + }, + { + "epoch": 0.10287672399920486, + "grad_norm": 221.02259826660156, + "learning_rate": 1.9722791313534715e-06, + "loss": 15.9648, + "step": 10868 + }, + { + "epoch": 0.10288619002091991, + "grad_norm": 699.7174682617188, + "learning_rate": 1.9722719622609195e-06, + "loss": 32.9531, + "step": 10869 + }, + { + "epoch": 0.10289565604263497, + "grad_norm": 453.2724609375, + "learning_rate": 1.9722647922544948e-06, + "loss": 36.6953, + "step": 10870 + }, + { + "epoch": 0.10290512206435001, + "grad_norm": 515.581787109375, + "learning_rate": 1.972257621334205e-06, + "loss": 19.6836, + "step": 10871 + }, + { + "epoch": 0.10291458808606507, + "grad_norm": 429.34686279296875, + "learning_rate": 1.9722504495000555e-06, + "loss": 24.4297, + "step": 10872 + }, + { + "epoch": 0.10292405410778012, + "grad_norm": 165.5006561279297, + "learning_rate": 1.9722432767520545e-06, + "loss": 19.4453, + "step": 10873 + }, + { + "epoch": 0.10293352012949518, + "grad_norm": 321.20294189453125, + "learning_rate": 1.972236103090208e-06, + "loss": 26.5938, + "step": 10874 + }, + { + "epoch": 0.10294298615121023, + "grad_norm": 394.6074523925781, + "learning_rate": 1.9722289285145224e-06, + "loss": 18.8867, + "step": 10875 + }, + { + "epoch": 0.10295245217292529, + "grad_norm": 539.1472778320312, + "learning_rate": 1.9722217530250052e-06, + "loss": 41.4141, + "step": 10876 + }, + { + "epoch": 0.10296191819464035, + "grad_norm": 201.51284790039062, + "learning_rate": 1.9722145766216628e-06, + "loss": 22.6797, + "step": 10877 + }, + { + "epoch": 0.10297138421635539, + "grad_norm": 290.1072082519531, + "learning_rate": 1.9722073993045018e-06, + "loss": 25.7578, + "step": 10878 + }, + { + "epoch": 0.10298085023807045, + "grad_norm": 344.3453369140625, + "learning_rate": 1.972200221073529e-06, + "loss": 26.5234, + "step": 10879 + }, + { + "epoch": 0.1029903162597855, + "grad_norm": 234.2140350341797, + "learning_rate": 1.9721930419287517e-06, + "loss": 19.8906, + "step": 10880 + }, + { + "epoch": 0.10299978228150056, + "grad_norm": 485.43719482421875, + "learning_rate": 1.972185861870176e-06, + "loss": 48.0, + "step": 10881 + }, + { + "epoch": 0.1030092483032156, + "grad_norm": 583.1027221679688, + "learning_rate": 1.9721786808978084e-06, + "loss": 30.2578, + "step": 10882 + }, + { + "epoch": 0.10301871432493066, + "grad_norm": 285.80999755859375, + "learning_rate": 1.9721714990116564e-06, + "loss": 45.7891, + "step": 10883 + }, + { + "epoch": 0.10302818034664571, + "grad_norm": 160.91310119628906, + "learning_rate": 1.9721643162117266e-06, + "loss": 21.1406, + "step": 10884 + }, + { + "epoch": 0.10303764636836077, + "grad_norm": 378.9137878417969, + "learning_rate": 1.9721571324980256e-06, + "loss": 44.1719, + "step": 10885 + }, + { + "epoch": 0.10304711239007583, + "grad_norm": 426.5187683105469, + "learning_rate": 1.9721499478705595e-06, + "loss": 65.7344, + "step": 10886 + }, + { + "epoch": 0.10305657841179088, + "grad_norm": 420.0162048339844, + "learning_rate": 1.972142762329336e-06, + "loss": 44.8281, + "step": 10887 + }, + { + "epoch": 0.10306604443350594, + "grad_norm": 175.10191345214844, + "learning_rate": 1.972135575874362e-06, + "loss": 19.5781, + "step": 10888 + }, + { + "epoch": 0.10307551045522098, + "grad_norm": 412.2738952636719, + "learning_rate": 1.9721283885056437e-06, + "loss": 44.7578, + "step": 10889 + }, + { + "epoch": 0.10308497647693604, + "grad_norm": 379.93157958984375, + "learning_rate": 1.9721212002231876e-06, + "loss": 23.8594, + "step": 10890 + }, + { + "epoch": 0.10309444249865109, + "grad_norm": 595.22216796875, + "learning_rate": 1.972114011027001e-06, + "loss": 52.2422, + "step": 10891 + }, + { + "epoch": 0.10310390852036615, + "grad_norm": 513.4304809570312, + "learning_rate": 1.9721068209170905e-06, + "loss": 50.4062, + "step": 10892 + }, + { + "epoch": 0.1031133745420812, + "grad_norm": 193.1838836669922, + "learning_rate": 1.9720996298934626e-06, + "loss": 21.4766, + "step": 10893 + }, + { + "epoch": 0.10312284056379625, + "grad_norm": 342.3955993652344, + "learning_rate": 1.9720924379561246e-06, + "loss": 17.8906, + "step": 10894 + }, + { + "epoch": 0.10313230658551131, + "grad_norm": 565.2291870117188, + "learning_rate": 1.9720852451050828e-06, + "loss": 29.8281, + "step": 10895 + }, + { + "epoch": 0.10314177260722636, + "grad_norm": 746.2498168945312, + "learning_rate": 1.972078051340344e-06, + "loss": 57.4219, + "step": 10896 + }, + { + "epoch": 0.10315123862894142, + "grad_norm": 331.8751220703125, + "learning_rate": 1.9720708566619155e-06, + "loss": 21.3438, + "step": 10897 + }, + { + "epoch": 0.10316070465065647, + "grad_norm": 192.48326110839844, + "learning_rate": 1.972063661069803e-06, + "loss": 15.1406, + "step": 10898 + }, + { + "epoch": 0.10317017067237153, + "grad_norm": 289.55340576171875, + "learning_rate": 1.9720564645640144e-06, + "loss": 32.5469, + "step": 10899 + }, + { + "epoch": 0.10317963669408657, + "grad_norm": 282.59759521484375, + "learning_rate": 1.972049267144556e-06, + "loss": 45.4375, + "step": 10900 + }, + { + "epoch": 0.10318910271580163, + "grad_norm": 786.8599243164062, + "learning_rate": 1.972042068811434e-06, + "loss": 40.1094, + "step": 10901 + }, + { + "epoch": 0.10319856873751668, + "grad_norm": 387.32440185546875, + "learning_rate": 1.972034869564656e-06, + "loss": 41.1172, + "step": 10902 + }, + { + "epoch": 0.10320803475923174, + "grad_norm": 282.9812927246094, + "learning_rate": 1.9720276694042285e-06, + "loss": 17.0625, + "step": 10903 + }, + { + "epoch": 0.1032175007809468, + "grad_norm": 440.28076171875, + "learning_rate": 1.9720204683301583e-06, + "loss": 32.0, + "step": 10904 + }, + { + "epoch": 0.10322696680266184, + "grad_norm": 245.8771514892578, + "learning_rate": 1.972013266342452e-06, + "loss": 11.7422, + "step": 10905 + }, + { + "epoch": 0.1032364328243769, + "grad_norm": 317.5594787597656, + "learning_rate": 1.972006063441116e-06, + "loss": 31.3281, + "step": 10906 + }, + { + "epoch": 0.10324589884609195, + "grad_norm": 158.54681396484375, + "learning_rate": 1.9719988596261582e-06, + "loss": 16.2344, + "step": 10907 + }, + { + "epoch": 0.10325536486780701, + "grad_norm": 189.99205017089844, + "learning_rate": 1.9719916548975847e-06, + "loss": 21.25, + "step": 10908 + }, + { + "epoch": 0.10326483088952206, + "grad_norm": 437.78851318359375, + "learning_rate": 1.9719844492554018e-06, + "loss": 31.25, + "step": 10909 + }, + { + "epoch": 0.10327429691123712, + "grad_norm": 365.56842041015625, + "learning_rate": 1.971977242699617e-06, + "loss": 31.1719, + "step": 10910 + }, + { + "epoch": 0.10328376293295218, + "grad_norm": 663.56103515625, + "learning_rate": 1.971970035230237e-06, + "loss": 46.3594, + "step": 10911 + }, + { + "epoch": 0.10329322895466722, + "grad_norm": 890.317138671875, + "learning_rate": 1.971962826847268e-06, + "loss": 40.9355, + "step": 10912 + }, + { + "epoch": 0.10330269497638228, + "grad_norm": 3.5380702018737793, + "learning_rate": 1.9719556175507173e-06, + "loss": 0.9487, + "step": 10913 + }, + { + "epoch": 0.10331216099809733, + "grad_norm": 331.2292175292969, + "learning_rate": 1.9719484073405916e-06, + "loss": 20.1953, + "step": 10914 + }, + { + "epoch": 0.10332162701981239, + "grad_norm": 199.7902374267578, + "learning_rate": 1.971941196216898e-06, + "loss": 25.6406, + "step": 10915 + }, + { + "epoch": 0.10333109304152743, + "grad_norm": 303.60113525390625, + "learning_rate": 1.9719339841796427e-06, + "loss": 11.2148, + "step": 10916 + }, + { + "epoch": 0.1033405590632425, + "grad_norm": 502.7160339355469, + "learning_rate": 1.9719267712288322e-06, + "loss": 40.4219, + "step": 10917 + }, + { + "epoch": 0.10335002508495754, + "grad_norm": 158.38868713378906, + "learning_rate": 1.9719195573644743e-06, + "loss": 16.2031, + "step": 10918 + }, + { + "epoch": 0.1033594911066726, + "grad_norm": 486.94378662109375, + "learning_rate": 1.9719123425865748e-06, + "loss": 23.7422, + "step": 10919 + }, + { + "epoch": 0.10336895712838766, + "grad_norm": 264.3819885253906, + "learning_rate": 1.9719051268951413e-06, + "loss": 34.5547, + "step": 10920 + }, + { + "epoch": 0.1033784231501027, + "grad_norm": 315.072021484375, + "learning_rate": 1.97189791029018e-06, + "loss": 19.7344, + "step": 10921 + }, + { + "epoch": 0.10338788917181777, + "grad_norm": 419.4021911621094, + "learning_rate": 1.971890692771698e-06, + "loss": 44.25, + "step": 10922 + }, + { + "epoch": 0.10339735519353281, + "grad_norm": 523.1376342773438, + "learning_rate": 1.9718834743397022e-06, + "loss": 54.5625, + "step": 10923 + }, + { + "epoch": 0.10340682121524787, + "grad_norm": 207.40463256835938, + "learning_rate": 1.9718762549941987e-06, + "loss": 25.7969, + "step": 10924 + }, + { + "epoch": 0.10341628723696292, + "grad_norm": 468.511474609375, + "learning_rate": 1.9718690347351948e-06, + "loss": 42.8125, + "step": 10925 + }, + { + "epoch": 0.10342575325867798, + "grad_norm": 2.9573404788970947, + "learning_rate": 1.9718618135626975e-06, + "loss": 0.957, + "step": 10926 + }, + { + "epoch": 0.10343521928039302, + "grad_norm": 432.2933044433594, + "learning_rate": 1.9718545914767134e-06, + "loss": 40.4219, + "step": 10927 + }, + { + "epoch": 0.10344468530210808, + "grad_norm": 1119.9229736328125, + "learning_rate": 1.971847368477249e-06, + "loss": 48.9961, + "step": 10928 + }, + { + "epoch": 0.10345415132382314, + "grad_norm": 527.8988647460938, + "learning_rate": 1.9718401445643114e-06, + "loss": 40.1953, + "step": 10929 + }, + { + "epoch": 0.10346361734553819, + "grad_norm": 316.88433837890625, + "learning_rate": 1.971832919737907e-06, + "loss": 47.5781, + "step": 10930 + }, + { + "epoch": 0.10347308336725325, + "grad_norm": 287.9429931640625, + "learning_rate": 1.971825693998043e-06, + "loss": 29.375, + "step": 10931 + }, + { + "epoch": 0.1034825493889683, + "grad_norm": 447.82244873046875, + "learning_rate": 1.9718184673447264e-06, + "loss": 28.7266, + "step": 10932 + }, + { + "epoch": 0.10349201541068336, + "grad_norm": 256.868896484375, + "learning_rate": 1.9718112397779635e-06, + "loss": 18.3594, + "step": 10933 + }, + { + "epoch": 0.1035014814323984, + "grad_norm": 525.4481201171875, + "learning_rate": 1.971804011297761e-06, + "loss": 44.7812, + "step": 10934 + }, + { + "epoch": 0.10351094745411346, + "grad_norm": 324.1067810058594, + "learning_rate": 1.971796781904126e-06, + "loss": 43.4375, + "step": 10935 + }, + { + "epoch": 0.10352041347582851, + "grad_norm": 199.42681884765625, + "learning_rate": 1.9717895515970657e-06, + "loss": 21.7422, + "step": 10936 + }, + { + "epoch": 0.10352987949754357, + "grad_norm": 189.02706909179688, + "learning_rate": 1.971782320376586e-06, + "loss": 17.9922, + "step": 10937 + }, + { + "epoch": 0.10353934551925863, + "grad_norm": 215.96365356445312, + "learning_rate": 1.971775088242694e-06, + "loss": 17.3906, + "step": 10938 + }, + { + "epoch": 0.10354881154097367, + "grad_norm": 436.6141662597656, + "learning_rate": 1.971767855195397e-06, + "loss": 47.6641, + "step": 10939 + }, + { + "epoch": 0.10355827756268873, + "grad_norm": 454.8377685546875, + "learning_rate": 1.971760621234701e-06, + "loss": 41.2266, + "step": 10940 + }, + { + "epoch": 0.10356774358440378, + "grad_norm": 453.27093505859375, + "learning_rate": 1.9717533863606137e-06, + "loss": 12.2656, + "step": 10941 + }, + { + "epoch": 0.10357720960611884, + "grad_norm": 279.400634765625, + "learning_rate": 1.9717461505731415e-06, + "loss": 10.459, + "step": 10942 + }, + { + "epoch": 0.10358667562783389, + "grad_norm": 788.4459838867188, + "learning_rate": 1.9717389138722908e-06, + "loss": 43.1172, + "step": 10943 + }, + { + "epoch": 0.10359614164954895, + "grad_norm": 360.5593566894531, + "learning_rate": 1.9717316762580687e-06, + "loss": 14.8945, + "step": 10944 + }, + { + "epoch": 0.10360560767126399, + "grad_norm": 3.157219409942627, + "learning_rate": 1.971724437730482e-06, + "loss": 0.9614, + "step": 10945 + }, + { + "epoch": 0.10361507369297905, + "grad_norm": 325.6812744140625, + "learning_rate": 1.971717198289538e-06, + "loss": 21.3594, + "step": 10946 + }, + { + "epoch": 0.10362453971469411, + "grad_norm": 455.7628479003906, + "learning_rate": 1.9717099579352427e-06, + "loss": 37.3906, + "step": 10947 + }, + { + "epoch": 0.10363400573640916, + "grad_norm": 185.26690673828125, + "learning_rate": 1.9717027166676036e-06, + "loss": 26.0781, + "step": 10948 + }, + { + "epoch": 0.10364347175812422, + "grad_norm": 219.37246704101562, + "learning_rate": 1.9716954744866268e-06, + "loss": 20.6016, + "step": 10949 + }, + { + "epoch": 0.10365293777983926, + "grad_norm": 672.3784790039062, + "learning_rate": 1.9716882313923193e-06, + "loss": 61.0469, + "step": 10950 + }, + { + "epoch": 0.10366240380155432, + "grad_norm": 283.1695861816406, + "learning_rate": 1.9716809873846883e-06, + "loss": 26.3203, + "step": 10951 + }, + { + "epoch": 0.10367186982326937, + "grad_norm": 567.36865234375, + "learning_rate": 1.9716737424637407e-06, + "loss": 52.7148, + "step": 10952 + }, + { + "epoch": 0.10368133584498443, + "grad_norm": 503.0396423339844, + "learning_rate": 1.971666496629483e-06, + "loss": 23.1641, + "step": 10953 + }, + { + "epoch": 0.10369080186669949, + "grad_norm": 385.7420349121094, + "learning_rate": 1.971659249881921e-06, + "loss": 24.9609, + "step": 10954 + }, + { + "epoch": 0.10370026788841454, + "grad_norm": 295.7552795410156, + "learning_rate": 1.9716520022210636e-06, + "loss": 24.1172, + "step": 10955 + }, + { + "epoch": 0.1037097339101296, + "grad_norm": 577.8611450195312, + "learning_rate": 1.9716447536469163e-06, + "loss": 33.3438, + "step": 10956 + }, + { + "epoch": 0.10371919993184464, + "grad_norm": 260.78131103515625, + "learning_rate": 1.9716375041594857e-06, + "loss": 37.9531, + "step": 10957 + }, + { + "epoch": 0.1037286659535597, + "grad_norm": 206.6540985107422, + "learning_rate": 1.9716302537587796e-06, + "loss": 25.7109, + "step": 10958 + }, + { + "epoch": 0.10373813197527475, + "grad_norm": 478.281982421875, + "learning_rate": 1.971623002444804e-06, + "loss": 49.7969, + "step": 10959 + }, + { + "epoch": 0.10374759799698981, + "grad_norm": 604.0131225585938, + "learning_rate": 1.9716157502175656e-06, + "loss": 22.1484, + "step": 10960 + }, + { + "epoch": 0.10375706401870485, + "grad_norm": 470.69366455078125, + "learning_rate": 1.971608497077072e-06, + "loss": 50.8438, + "step": 10961 + }, + { + "epoch": 0.10376653004041991, + "grad_norm": 309.73309326171875, + "learning_rate": 1.97160124302333e-06, + "loss": 17.6406, + "step": 10962 + }, + { + "epoch": 0.10377599606213497, + "grad_norm": 378.7859802246094, + "learning_rate": 1.9715939880563456e-06, + "loss": 20.9297, + "step": 10963 + }, + { + "epoch": 0.10378546208385002, + "grad_norm": 335.626708984375, + "learning_rate": 1.971586732176126e-06, + "loss": 27.0234, + "step": 10964 + }, + { + "epoch": 0.10379492810556508, + "grad_norm": 448.3037109375, + "learning_rate": 1.971579475382678e-06, + "loss": 26.9766, + "step": 10965 + }, + { + "epoch": 0.10380439412728013, + "grad_norm": 420.7528076171875, + "learning_rate": 1.9715722176760088e-06, + "loss": 25.3984, + "step": 10966 + }, + { + "epoch": 0.10381386014899519, + "grad_norm": 350.0543212890625, + "learning_rate": 1.9715649590561248e-06, + "loss": 25.5391, + "step": 10967 + }, + { + "epoch": 0.10382332617071023, + "grad_norm": 616.3115234375, + "learning_rate": 1.971557699523033e-06, + "loss": 34.5508, + "step": 10968 + }, + { + "epoch": 0.10383279219242529, + "grad_norm": 681.7471923828125, + "learning_rate": 1.9715504390767405e-06, + "loss": 44.6016, + "step": 10969 + }, + { + "epoch": 0.10384225821414034, + "grad_norm": 916.4519653320312, + "learning_rate": 1.9715431777172534e-06, + "loss": 75.75, + "step": 10970 + }, + { + "epoch": 0.1038517242358554, + "grad_norm": 383.5513916015625, + "learning_rate": 1.971535915444579e-06, + "loss": 29.5781, + "step": 10971 + }, + { + "epoch": 0.10386119025757046, + "grad_norm": 676.7057495117188, + "learning_rate": 1.9715286522587235e-06, + "loss": 53.2109, + "step": 10972 + }, + { + "epoch": 0.1038706562792855, + "grad_norm": 465.93243408203125, + "learning_rate": 1.971521388159695e-06, + "loss": 46.3594, + "step": 10973 + }, + { + "epoch": 0.10388012230100056, + "grad_norm": 385.6876525878906, + "learning_rate": 1.971514123147499e-06, + "loss": 29.5781, + "step": 10974 + }, + { + "epoch": 0.10388958832271561, + "grad_norm": 431.7419738769531, + "learning_rate": 1.9715068572221436e-06, + "loss": 18.6328, + "step": 10975 + }, + { + "epoch": 0.10389905434443067, + "grad_norm": 292.94146728515625, + "learning_rate": 1.9714995903836344e-06, + "loss": 19.5664, + "step": 10976 + }, + { + "epoch": 0.10390852036614572, + "grad_norm": 211.20188903808594, + "learning_rate": 1.9714923226319793e-06, + "loss": 23.9062, + "step": 10977 + }, + { + "epoch": 0.10391798638786078, + "grad_norm": 332.8619079589844, + "learning_rate": 1.9714850539671846e-06, + "loss": 23.8359, + "step": 10978 + }, + { + "epoch": 0.10392745240957582, + "grad_norm": 2.751103162765503, + "learning_rate": 1.9714777843892565e-06, + "loss": 0.8911, + "step": 10979 + }, + { + "epoch": 0.10393691843129088, + "grad_norm": 376.7247314453125, + "learning_rate": 1.971470513898203e-06, + "loss": 18.5078, + "step": 10980 + }, + { + "epoch": 0.10394638445300594, + "grad_norm": 289.2521667480469, + "learning_rate": 1.9714632424940302e-06, + "loss": 20.625, + "step": 10981 + }, + { + "epoch": 0.10395585047472099, + "grad_norm": 405.21942138671875, + "learning_rate": 1.971455970176745e-06, + "loss": 25.1562, + "step": 10982 + }, + { + "epoch": 0.10396531649643605, + "grad_norm": 430.47265625, + "learning_rate": 1.9714486969463547e-06, + "loss": 33.0312, + "step": 10983 + }, + { + "epoch": 0.1039747825181511, + "grad_norm": 398.8013000488281, + "learning_rate": 1.971441422802866e-06, + "loss": 30.1719, + "step": 10984 + }, + { + "epoch": 0.10398424853986615, + "grad_norm": 2.8812148571014404, + "learning_rate": 1.9714341477462853e-06, + "loss": 0.9639, + "step": 10985 + }, + { + "epoch": 0.1039937145615812, + "grad_norm": 650.38330078125, + "learning_rate": 1.9714268717766196e-06, + "loss": 37.0625, + "step": 10986 + }, + { + "epoch": 0.10400318058329626, + "grad_norm": 258.11212158203125, + "learning_rate": 1.971419594893876e-06, + "loss": 24.7344, + "step": 10987 + }, + { + "epoch": 0.1040126466050113, + "grad_norm": 222.7620086669922, + "learning_rate": 1.971412317098061e-06, + "loss": 23.5703, + "step": 10988 + }, + { + "epoch": 0.10402211262672637, + "grad_norm": 370.5278015136719, + "learning_rate": 1.9714050383891817e-06, + "loss": 61.6719, + "step": 10989 + }, + { + "epoch": 0.10403157864844143, + "grad_norm": 302.0345458984375, + "learning_rate": 1.971397758767245e-06, + "loss": 15.5078, + "step": 10990 + }, + { + "epoch": 0.10404104467015647, + "grad_norm": 833.75244140625, + "learning_rate": 1.971390478232257e-06, + "loss": 40.1875, + "step": 10991 + }, + { + "epoch": 0.10405051069187153, + "grad_norm": 3.6264429092407227, + "learning_rate": 1.971383196784226e-06, + "loss": 0.8867, + "step": 10992 + }, + { + "epoch": 0.10405997671358658, + "grad_norm": 435.81390380859375, + "learning_rate": 1.9713759144231577e-06, + "loss": 48.75, + "step": 10993 + }, + { + "epoch": 0.10406944273530164, + "grad_norm": 535.5218505859375, + "learning_rate": 1.971368631149059e-06, + "loss": 30.2188, + "step": 10994 + }, + { + "epoch": 0.10407890875701668, + "grad_norm": 364.15069580078125, + "learning_rate": 1.971361346961937e-06, + "loss": 46.8438, + "step": 10995 + }, + { + "epoch": 0.10408837477873174, + "grad_norm": 570.130615234375, + "learning_rate": 1.9713540618617984e-06, + "loss": 50.8906, + "step": 10996 + }, + { + "epoch": 0.1040978408004468, + "grad_norm": 223.6854705810547, + "learning_rate": 1.971346775848651e-06, + "loss": 14.7266, + "step": 10997 + }, + { + "epoch": 0.10410730682216185, + "grad_norm": 183.22982788085938, + "learning_rate": 1.9713394889225e-06, + "loss": 20.5156, + "step": 10998 + }, + { + "epoch": 0.10411677284387691, + "grad_norm": 430.8686828613281, + "learning_rate": 1.9713322010833534e-06, + "loss": 20.7891, + "step": 10999 + }, + { + "epoch": 0.10412623886559196, + "grad_norm": 188.73277282714844, + "learning_rate": 1.9713249123312177e-06, + "loss": 24.25, + "step": 11000 + }, + { + "epoch": 0.10413570488730702, + "grad_norm": 257.7251281738281, + "learning_rate": 1.9713176226660994e-06, + "loss": 13.7031, + "step": 11001 + }, + { + "epoch": 0.10414517090902206, + "grad_norm": 1169.9783935546875, + "learning_rate": 1.971310332088006e-06, + "loss": 47.7109, + "step": 11002 + }, + { + "epoch": 0.10415463693073712, + "grad_norm": 292.08551025390625, + "learning_rate": 1.971303040596944e-06, + "loss": 31.2578, + "step": 11003 + }, + { + "epoch": 0.10416410295245217, + "grad_norm": 563.0948486328125, + "learning_rate": 1.9712957481929206e-06, + "loss": 44.1562, + "step": 11004 + }, + { + "epoch": 0.10417356897416723, + "grad_norm": 280.7180480957031, + "learning_rate": 1.971288454875942e-06, + "loss": 20.1992, + "step": 11005 + }, + { + "epoch": 0.10418303499588229, + "grad_norm": 674.0655517578125, + "learning_rate": 1.971281160646016e-06, + "loss": 35.1875, + "step": 11006 + }, + { + "epoch": 0.10419250101759733, + "grad_norm": 353.2453918457031, + "learning_rate": 1.9712738655031486e-06, + "loss": 29.0312, + "step": 11007 + }, + { + "epoch": 0.1042019670393124, + "grad_norm": 326.4766540527344, + "learning_rate": 1.9712665694473467e-06, + "loss": 21.2109, + "step": 11008 + }, + { + "epoch": 0.10421143306102744, + "grad_norm": 250.00648498535156, + "learning_rate": 1.971259272478618e-06, + "loss": 21.0625, + "step": 11009 + }, + { + "epoch": 0.1042208990827425, + "grad_norm": 685.5781860351562, + "learning_rate": 1.9712519745969682e-06, + "loss": 47.5938, + "step": 11010 + }, + { + "epoch": 0.10423036510445755, + "grad_norm": 1284.687255859375, + "learning_rate": 1.9712446758024047e-06, + "loss": 55.9219, + "step": 11011 + }, + { + "epoch": 0.1042398311261726, + "grad_norm": 230.6253204345703, + "learning_rate": 1.9712373760949345e-06, + "loss": 16.832, + "step": 11012 + }, + { + "epoch": 0.10424929714788765, + "grad_norm": 257.70953369140625, + "learning_rate": 1.9712300754745644e-06, + "loss": 17.6094, + "step": 11013 + }, + { + "epoch": 0.10425876316960271, + "grad_norm": 314.79168701171875, + "learning_rate": 1.971222773941301e-06, + "loss": 16.5938, + "step": 11014 + }, + { + "epoch": 0.10426822919131777, + "grad_norm": 701.1805419921875, + "learning_rate": 1.971215471495152e-06, + "loss": 43.0469, + "step": 11015 + }, + { + "epoch": 0.10427769521303282, + "grad_norm": 627.4496459960938, + "learning_rate": 1.971208168136123e-06, + "loss": 23.0, + "step": 11016 + }, + { + "epoch": 0.10428716123474788, + "grad_norm": 243.2913360595703, + "learning_rate": 1.971200863864222e-06, + "loss": 18.0391, + "step": 11017 + }, + { + "epoch": 0.10429662725646292, + "grad_norm": 341.4163818359375, + "learning_rate": 1.971193558679455e-06, + "loss": 26.0859, + "step": 11018 + }, + { + "epoch": 0.10430609327817798, + "grad_norm": 225.21926879882812, + "learning_rate": 1.9711862525818294e-06, + "loss": 17.9922, + "step": 11019 + }, + { + "epoch": 0.10431555929989303, + "grad_norm": 412.05401611328125, + "learning_rate": 1.9711789455713517e-06, + "loss": 42.8438, + "step": 11020 + }, + { + "epoch": 0.10432502532160809, + "grad_norm": 511.51654052734375, + "learning_rate": 1.9711716376480295e-06, + "loss": 54.9219, + "step": 11021 + }, + { + "epoch": 0.10433449134332314, + "grad_norm": 362.22491455078125, + "learning_rate": 1.971164328811869e-06, + "loss": 26.0469, + "step": 11022 + }, + { + "epoch": 0.1043439573650382, + "grad_norm": 926.6913452148438, + "learning_rate": 1.971157019062877e-06, + "loss": 47.8125, + "step": 11023 + }, + { + "epoch": 0.10435342338675326, + "grad_norm": 258.46148681640625, + "learning_rate": 1.9711497084010603e-06, + "loss": 16.3281, + "step": 11024 + }, + { + "epoch": 0.1043628894084683, + "grad_norm": 2.614239454269409, + "learning_rate": 1.9711423968264265e-06, + "loss": 0.853, + "step": 11025 + }, + { + "epoch": 0.10437235543018336, + "grad_norm": 305.5497131347656, + "learning_rate": 1.9711350843389816e-06, + "loss": 24.1641, + "step": 11026 + }, + { + "epoch": 0.10438182145189841, + "grad_norm": 3.1954874992370605, + "learning_rate": 1.9711277709387334e-06, + "loss": 0.9634, + "step": 11027 + }, + { + "epoch": 0.10439128747361347, + "grad_norm": 182.5242156982422, + "learning_rate": 1.9711204566256877e-06, + "loss": 23.2188, + "step": 11028 + }, + { + "epoch": 0.10440075349532851, + "grad_norm": 531.1272583007812, + "learning_rate": 1.9711131413998527e-06, + "loss": 53.4844, + "step": 11029 + }, + { + "epoch": 0.10441021951704357, + "grad_norm": 679.1480102539062, + "learning_rate": 1.9711058252612338e-06, + "loss": 20.7031, + "step": 11030 + }, + { + "epoch": 0.10441968553875862, + "grad_norm": 1006.1559448242188, + "learning_rate": 1.971098508209839e-06, + "loss": 56.3203, + "step": 11031 + }, + { + "epoch": 0.10442915156047368, + "grad_norm": 542.111083984375, + "learning_rate": 1.971091190245675e-06, + "loss": 45.4531, + "step": 11032 + }, + { + "epoch": 0.10443861758218874, + "grad_norm": 993.37109375, + "learning_rate": 1.971083871368748e-06, + "loss": 27.9219, + "step": 11033 + }, + { + "epoch": 0.10444808360390379, + "grad_norm": 469.634521484375, + "learning_rate": 1.971076551579065e-06, + "loss": 22.2188, + "step": 11034 + }, + { + "epoch": 0.10445754962561885, + "grad_norm": 669.7723999023438, + "learning_rate": 1.971069230876634e-06, + "loss": 58.0, + "step": 11035 + }, + { + "epoch": 0.10446701564733389, + "grad_norm": 547.9564819335938, + "learning_rate": 1.9710619092614603e-06, + "loss": 47.3906, + "step": 11036 + }, + { + "epoch": 0.10447648166904895, + "grad_norm": 764.3327026367188, + "learning_rate": 1.9710545867335524e-06, + "loss": 38.2969, + "step": 11037 + }, + { + "epoch": 0.104485947690764, + "grad_norm": 255.91648864746094, + "learning_rate": 1.9710472632929157e-06, + "loss": 21.0078, + "step": 11038 + }, + { + "epoch": 0.10449541371247906, + "grad_norm": 779.333984375, + "learning_rate": 1.9710399389395583e-06, + "loss": 35.5234, + "step": 11039 + }, + { + "epoch": 0.10450487973419412, + "grad_norm": 282.8353576660156, + "learning_rate": 1.9710326136734864e-06, + "loss": 19.7969, + "step": 11040 + }, + { + "epoch": 0.10451434575590916, + "grad_norm": 993.9248657226562, + "learning_rate": 1.971025287494707e-06, + "loss": 81.3359, + "step": 11041 + }, + { + "epoch": 0.10452381177762422, + "grad_norm": 162.68218994140625, + "learning_rate": 1.9710179604032264e-06, + "loss": 22.5312, + "step": 11042 + }, + { + "epoch": 0.10453327779933927, + "grad_norm": 224.70155334472656, + "learning_rate": 1.9710106323990528e-06, + "loss": 19.8125, + "step": 11043 + }, + { + "epoch": 0.10454274382105433, + "grad_norm": 419.9261779785156, + "learning_rate": 1.9710033034821922e-06, + "loss": 42.0, + "step": 11044 + }, + { + "epoch": 0.10455220984276938, + "grad_norm": 907.1172485351562, + "learning_rate": 1.9709959736526515e-06, + "loss": 47.0, + "step": 11045 + }, + { + "epoch": 0.10456167586448444, + "grad_norm": 258.5594787597656, + "learning_rate": 1.970988642910438e-06, + "loss": 25.2344, + "step": 11046 + }, + { + "epoch": 0.10457114188619948, + "grad_norm": 670.270263671875, + "learning_rate": 1.970981311255558e-06, + "loss": 46.7969, + "step": 11047 + }, + { + "epoch": 0.10458060790791454, + "grad_norm": 538.0924072265625, + "learning_rate": 1.9709739786880187e-06, + "loss": 24.6719, + "step": 11048 + }, + { + "epoch": 0.1045900739296296, + "grad_norm": 343.294189453125, + "learning_rate": 1.9709666452078275e-06, + "loss": 26.1797, + "step": 11049 + }, + { + "epoch": 0.10459953995134465, + "grad_norm": 391.7842712402344, + "learning_rate": 1.970959310814991e-06, + "loss": 39.3906, + "step": 11050 + }, + { + "epoch": 0.10460900597305971, + "grad_norm": 381.0661926269531, + "learning_rate": 1.970951975509515e-06, + "loss": 24.3047, + "step": 11051 + }, + { + "epoch": 0.10461847199477475, + "grad_norm": 395.5987548828125, + "learning_rate": 1.970944639291408e-06, + "loss": 26.6016, + "step": 11052 + }, + { + "epoch": 0.10462793801648981, + "grad_norm": 209.2889404296875, + "learning_rate": 1.970937302160676e-06, + "loss": 19.5391, + "step": 11053 + }, + { + "epoch": 0.10463740403820486, + "grad_norm": 451.6080017089844, + "learning_rate": 1.970929964117326e-06, + "loss": 17.3516, + "step": 11054 + }, + { + "epoch": 0.10464687005991992, + "grad_norm": 364.6752624511719, + "learning_rate": 1.970922625161365e-06, + "loss": 34.4062, + "step": 11055 + }, + { + "epoch": 0.10465633608163497, + "grad_norm": 295.34613037109375, + "learning_rate": 1.9709152852928e-06, + "loss": 37.2969, + "step": 11056 + }, + { + "epoch": 0.10466580210335003, + "grad_norm": 384.87994384765625, + "learning_rate": 1.9709079445116376e-06, + "loss": 23.3828, + "step": 11057 + }, + { + "epoch": 0.10467526812506509, + "grad_norm": 252.2899169921875, + "learning_rate": 1.970900602817885e-06, + "loss": 19.6719, + "step": 11058 + }, + { + "epoch": 0.10468473414678013, + "grad_norm": 3.295797824859619, + "learning_rate": 1.970893260211549e-06, + "loss": 0.9912, + "step": 11059 + }, + { + "epoch": 0.10469420016849519, + "grad_norm": 325.1255798339844, + "learning_rate": 1.970885916692637e-06, + "loss": 22.0117, + "step": 11060 + }, + { + "epoch": 0.10470366619021024, + "grad_norm": 2.886643409729004, + "learning_rate": 1.970878572261155e-06, + "loss": 0.8691, + "step": 11061 + }, + { + "epoch": 0.1047131322119253, + "grad_norm": 296.6607971191406, + "learning_rate": 1.97087122691711e-06, + "loss": 17.3516, + "step": 11062 + }, + { + "epoch": 0.10472259823364034, + "grad_norm": 598.5655517578125, + "learning_rate": 1.97086388066051e-06, + "loss": 49.6562, + "step": 11063 + }, + { + "epoch": 0.1047320642553554, + "grad_norm": 3.0879268646240234, + "learning_rate": 1.9708565334913602e-06, + "loss": 1.0122, + "step": 11064 + }, + { + "epoch": 0.10474153027707045, + "grad_norm": 394.7765808105469, + "learning_rate": 1.9708491854096686e-06, + "loss": 26.4375, + "step": 11065 + }, + { + "epoch": 0.10475099629878551, + "grad_norm": 188.1182861328125, + "learning_rate": 1.970841836415442e-06, + "loss": 23.1602, + "step": 11066 + }, + { + "epoch": 0.10476046232050057, + "grad_norm": 168.74896240234375, + "learning_rate": 1.970834486508688e-06, + "loss": 23.4062, + "step": 11067 + }, + { + "epoch": 0.10476992834221562, + "grad_norm": 3.2329797744750977, + "learning_rate": 1.970827135689412e-06, + "loss": 1.0718, + "step": 11068 + }, + { + "epoch": 0.10477939436393068, + "grad_norm": 336.25469970703125, + "learning_rate": 1.9708197839576217e-06, + "loss": 17.8555, + "step": 11069 + }, + { + "epoch": 0.10478886038564572, + "grad_norm": 270.8706359863281, + "learning_rate": 1.970812431313324e-06, + "loss": 19.7734, + "step": 11070 + }, + { + "epoch": 0.10479832640736078, + "grad_norm": 250.5419158935547, + "learning_rate": 1.9708050777565257e-06, + "loss": 17.2578, + "step": 11071 + }, + { + "epoch": 0.10480779242907583, + "grad_norm": 3.028418779373169, + "learning_rate": 1.9707977232872337e-06, + "loss": 0.8308, + "step": 11072 + }, + { + "epoch": 0.10481725845079089, + "grad_norm": 271.75396728515625, + "learning_rate": 1.9707903679054555e-06, + "loss": 23.2422, + "step": 11073 + }, + { + "epoch": 0.10482672447250593, + "grad_norm": 277.798828125, + "learning_rate": 1.970783011611197e-06, + "loss": 20.4062, + "step": 11074 + }, + { + "epoch": 0.104836190494221, + "grad_norm": 1366.8245849609375, + "learning_rate": 1.9707756544044657e-06, + "loss": 45.7344, + "step": 11075 + }, + { + "epoch": 0.10484565651593605, + "grad_norm": 495.7766418457031, + "learning_rate": 1.9707682962852684e-06, + "loss": 23.8281, + "step": 11076 + }, + { + "epoch": 0.1048551225376511, + "grad_norm": 440.3424987792969, + "learning_rate": 1.970760937253612e-06, + "loss": 18.5938, + "step": 11077 + }, + { + "epoch": 0.10486458855936616, + "grad_norm": 4.059333324432373, + "learning_rate": 1.970753577309504e-06, + "loss": 0.9746, + "step": 11078 + }, + { + "epoch": 0.1048740545810812, + "grad_norm": 295.55401611328125, + "learning_rate": 1.9707462164529504e-06, + "loss": 20.625, + "step": 11079 + }, + { + "epoch": 0.10488352060279627, + "grad_norm": 155.9383544921875, + "learning_rate": 1.9707388546839585e-06, + "loss": 23.2109, + "step": 11080 + }, + { + "epoch": 0.10489298662451131, + "grad_norm": 266.9360656738281, + "learning_rate": 1.970731492002535e-06, + "loss": 21.0312, + "step": 11081 + }, + { + "epoch": 0.10490245264622637, + "grad_norm": 323.8378601074219, + "learning_rate": 1.9707241284086878e-06, + "loss": 42.8125, + "step": 11082 + }, + { + "epoch": 0.10491191866794143, + "grad_norm": 583.0111083984375, + "learning_rate": 1.9707167639024225e-06, + "loss": 67.625, + "step": 11083 + }, + { + "epoch": 0.10492138468965648, + "grad_norm": 502.0019226074219, + "learning_rate": 1.9707093984837465e-06, + "loss": 19.8945, + "step": 11084 + }, + { + "epoch": 0.10493085071137154, + "grad_norm": 577.9376831054688, + "learning_rate": 1.9707020321526673e-06, + "loss": 27.4609, + "step": 11085 + }, + { + "epoch": 0.10494031673308658, + "grad_norm": 262.5251770019531, + "learning_rate": 1.970694664909191e-06, + "loss": 19.2266, + "step": 11086 + }, + { + "epoch": 0.10494978275480164, + "grad_norm": 356.4413757324219, + "learning_rate": 1.970687296753325e-06, + "loss": 19.625, + "step": 11087 + }, + { + "epoch": 0.10495924877651669, + "grad_norm": 1665.1514892578125, + "learning_rate": 1.9706799276850758e-06, + "loss": 42.6758, + "step": 11088 + }, + { + "epoch": 0.10496871479823175, + "grad_norm": 318.0236511230469, + "learning_rate": 1.9706725577044505e-06, + "loss": 22.6836, + "step": 11089 + }, + { + "epoch": 0.1049781808199468, + "grad_norm": 285.42578125, + "learning_rate": 1.970665186811457e-06, + "loss": 10.8867, + "step": 11090 + }, + { + "epoch": 0.10498764684166186, + "grad_norm": 408.74188232421875, + "learning_rate": 1.9706578150061003e-06, + "loss": 27.2656, + "step": 11091 + }, + { + "epoch": 0.10499711286337692, + "grad_norm": 353.296630859375, + "learning_rate": 1.9706504422883894e-06, + "loss": 42.3828, + "step": 11092 + }, + { + "epoch": 0.10500657888509196, + "grad_norm": 365.7181396484375, + "learning_rate": 1.9706430686583294e-06, + "loss": 20.2422, + "step": 11093 + }, + { + "epoch": 0.10501604490680702, + "grad_norm": 297.9637451171875, + "learning_rate": 1.9706356941159282e-06, + "loss": 18.0391, + "step": 11094 + }, + { + "epoch": 0.10502551092852207, + "grad_norm": 435.6208801269531, + "learning_rate": 1.970628318661193e-06, + "loss": 21.1406, + "step": 11095 + }, + { + "epoch": 0.10503497695023713, + "grad_norm": 917.4715576171875, + "learning_rate": 1.97062094229413e-06, + "loss": 52.4531, + "step": 11096 + }, + { + "epoch": 0.10504444297195217, + "grad_norm": 528.5549926757812, + "learning_rate": 1.9706135650147466e-06, + "loss": 42.6562, + "step": 11097 + }, + { + "epoch": 0.10505390899366723, + "grad_norm": 312.6387939453125, + "learning_rate": 1.9706061868230497e-06, + "loss": 18.8906, + "step": 11098 + }, + { + "epoch": 0.10506337501538228, + "grad_norm": 973.0550537109375, + "learning_rate": 1.970598807719046e-06, + "loss": 48.7305, + "step": 11099 + }, + { + "epoch": 0.10507284103709734, + "grad_norm": 368.74652099609375, + "learning_rate": 1.9705914277027427e-06, + "loss": 28.9062, + "step": 11100 + }, + { + "epoch": 0.1050823070588124, + "grad_norm": 225.9493408203125, + "learning_rate": 1.9705840467741464e-06, + "loss": 27.4297, + "step": 11101 + }, + { + "epoch": 0.10509177308052745, + "grad_norm": 615.1433715820312, + "learning_rate": 1.970576664933264e-06, + "loss": 41.6172, + "step": 11102 + }, + { + "epoch": 0.1051012391022425, + "grad_norm": 370.9982604980469, + "learning_rate": 1.9705692821801033e-06, + "loss": 16.2109, + "step": 11103 + }, + { + "epoch": 0.10511070512395755, + "grad_norm": 424.4244384765625, + "learning_rate": 1.9705618985146702e-06, + "loss": 32.7891, + "step": 11104 + }, + { + "epoch": 0.10512017114567261, + "grad_norm": 366.71148681640625, + "learning_rate": 1.970554513936972e-06, + "loss": 48.8281, + "step": 11105 + }, + { + "epoch": 0.10512963716738766, + "grad_norm": 194.59536743164062, + "learning_rate": 1.970547128447016e-06, + "loss": 19.4922, + "step": 11106 + }, + { + "epoch": 0.10513910318910272, + "grad_norm": 244.11044311523438, + "learning_rate": 1.970539742044809e-06, + "loss": 15.293, + "step": 11107 + }, + { + "epoch": 0.10514856921081776, + "grad_norm": 478.781982421875, + "learning_rate": 1.970532354730357e-06, + "loss": 51.0703, + "step": 11108 + }, + { + "epoch": 0.10515803523253282, + "grad_norm": 458.2375183105469, + "learning_rate": 1.9705249665036684e-06, + "loss": 25.7773, + "step": 11109 + }, + { + "epoch": 0.10516750125424788, + "grad_norm": 217.58035278320312, + "learning_rate": 1.970517577364749e-06, + "loss": 19.3125, + "step": 11110 + }, + { + "epoch": 0.10517696727596293, + "grad_norm": 191.0109100341797, + "learning_rate": 1.9705101873136066e-06, + "loss": 18.7266, + "step": 11111 + }, + { + "epoch": 0.10518643329767799, + "grad_norm": 543.9049072265625, + "learning_rate": 1.9705027963502477e-06, + "loss": 49.4844, + "step": 11112 + }, + { + "epoch": 0.10519589931939304, + "grad_norm": 672.0602416992188, + "learning_rate": 1.9704954044746792e-06, + "loss": 41.7578, + "step": 11113 + }, + { + "epoch": 0.1052053653411081, + "grad_norm": 179.30120849609375, + "learning_rate": 1.970488011686908e-06, + "loss": 22.0781, + "step": 11114 + }, + { + "epoch": 0.10521483136282314, + "grad_norm": 180.05706787109375, + "learning_rate": 1.9704806179869415e-06, + "loss": 26.2734, + "step": 11115 + }, + { + "epoch": 0.1052242973845382, + "grad_norm": 256.25732421875, + "learning_rate": 1.9704732233747862e-06, + "loss": 22.5547, + "step": 11116 + }, + { + "epoch": 0.10523376340625325, + "grad_norm": 660.0482788085938, + "learning_rate": 1.9704658278504493e-06, + "loss": 41.0547, + "step": 11117 + }, + { + "epoch": 0.10524322942796831, + "grad_norm": 355.1515808105469, + "learning_rate": 1.9704584314139375e-06, + "loss": 33.0781, + "step": 11118 + }, + { + "epoch": 0.10525269544968337, + "grad_norm": 305.4573669433594, + "learning_rate": 1.9704510340652576e-06, + "loss": 19.0859, + "step": 11119 + }, + { + "epoch": 0.10526216147139841, + "grad_norm": 145.98382568359375, + "learning_rate": 1.9704436358044177e-06, + "loss": 20.1797, + "step": 11120 + }, + { + "epoch": 0.10527162749311347, + "grad_norm": 224.48069763183594, + "learning_rate": 1.970436236631423e-06, + "loss": 18.9062, + "step": 11121 + }, + { + "epoch": 0.10528109351482852, + "grad_norm": 622.5853271484375, + "learning_rate": 1.9704288365462823e-06, + "loss": 31.2969, + "step": 11122 + }, + { + "epoch": 0.10529055953654358, + "grad_norm": 156.7534942626953, + "learning_rate": 1.970421435549001e-06, + "loss": 17.4922, + "step": 11123 + }, + { + "epoch": 0.10530002555825863, + "grad_norm": 374.2091369628906, + "learning_rate": 1.9704140336395867e-06, + "loss": 25.2656, + "step": 11124 + }, + { + "epoch": 0.10530949157997369, + "grad_norm": 3.594177484512329, + "learning_rate": 1.9704066308180466e-06, + "loss": 0.9155, + "step": 11125 + }, + { + "epoch": 0.10531895760168875, + "grad_norm": 313.22039794921875, + "learning_rate": 1.9703992270843874e-06, + "loss": 44.3906, + "step": 11126 + }, + { + "epoch": 0.10532842362340379, + "grad_norm": 303.8727722167969, + "learning_rate": 1.970391822438616e-06, + "loss": 17.1094, + "step": 11127 + }, + { + "epoch": 0.10533788964511885, + "grad_norm": 359.5357971191406, + "learning_rate": 1.9703844168807394e-06, + "loss": 32.3906, + "step": 11128 + }, + { + "epoch": 0.1053473556668339, + "grad_norm": 242.88963317871094, + "learning_rate": 1.9703770104107644e-06, + "loss": 22.5938, + "step": 11129 + }, + { + "epoch": 0.10535682168854896, + "grad_norm": 2.9905214309692383, + "learning_rate": 1.970369603028698e-06, + "loss": 0.74, + "step": 11130 + }, + { + "epoch": 0.105366287710264, + "grad_norm": 426.96624755859375, + "learning_rate": 1.9703621947345475e-06, + "loss": 18.5, + "step": 11131 + }, + { + "epoch": 0.10537575373197906, + "grad_norm": 630.611083984375, + "learning_rate": 1.97035478552832e-06, + "loss": 40.3594, + "step": 11132 + }, + { + "epoch": 0.10538521975369411, + "grad_norm": 499.47808837890625, + "learning_rate": 1.970347375410022e-06, + "loss": 23.2188, + "step": 11133 + }, + { + "epoch": 0.10539468577540917, + "grad_norm": 313.9397888183594, + "learning_rate": 1.97033996437966e-06, + "loss": 39.0156, + "step": 11134 + }, + { + "epoch": 0.10540415179712423, + "grad_norm": 375.9655456542969, + "learning_rate": 1.970332552437242e-06, + "loss": 47.9219, + "step": 11135 + }, + { + "epoch": 0.10541361781883928, + "grad_norm": 700.2702026367188, + "learning_rate": 1.970325139582775e-06, + "loss": 43.0547, + "step": 11136 + }, + { + "epoch": 0.10542308384055434, + "grad_norm": 536.7632446289062, + "learning_rate": 1.9703177258162647e-06, + "loss": 21.9961, + "step": 11137 + }, + { + "epoch": 0.10543254986226938, + "grad_norm": 347.0466613769531, + "learning_rate": 1.9703103111377193e-06, + "loss": 34.4023, + "step": 11138 + }, + { + "epoch": 0.10544201588398444, + "grad_norm": 532.3734130859375, + "learning_rate": 1.970302895547145e-06, + "loss": 42.5078, + "step": 11139 + }, + { + "epoch": 0.10545148190569949, + "grad_norm": 588.6854858398438, + "learning_rate": 1.9702954790445495e-06, + "loss": 18.2266, + "step": 11140 + }, + { + "epoch": 0.10546094792741455, + "grad_norm": 375.93255615234375, + "learning_rate": 1.9702880616299395e-06, + "loss": 42.7188, + "step": 11141 + }, + { + "epoch": 0.1054704139491296, + "grad_norm": 146.7203369140625, + "learning_rate": 1.9702806433033214e-06, + "loss": 14.1367, + "step": 11142 + }, + { + "epoch": 0.10547987997084465, + "grad_norm": 283.5679626464844, + "learning_rate": 1.9702732240647026e-06, + "loss": 12.8203, + "step": 11143 + }, + { + "epoch": 0.10548934599255971, + "grad_norm": 364.35205078125, + "learning_rate": 1.97026580391409e-06, + "loss": 22.2969, + "step": 11144 + }, + { + "epoch": 0.10549881201427476, + "grad_norm": 222.1013641357422, + "learning_rate": 1.9702583828514913e-06, + "loss": 10.9062, + "step": 11145 + }, + { + "epoch": 0.10550827803598982, + "grad_norm": 1103.510986328125, + "learning_rate": 1.970250960876912e-06, + "loss": 61.1172, + "step": 11146 + }, + { + "epoch": 0.10551774405770487, + "grad_norm": 389.6826171875, + "learning_rate": 1.9702435379903607e-06, + "loss": 39.6797, + "step": 11147 + }, + { + "epoch": 0.10552721007941993, + "grad_norm": 385.320556640625, + "learning_rate": 1.9702361141918434e-06, + "loss": 52.1328, + "step": 11148 + }, + { + "epoch": 0.10553667610113497, + "grad_norm": 2.58056902885437, + "learning_rate": 1.9702286894813667e-06, + "loss": 0.8652, + "step": 11149 + }, + { + "epoch": 0.10554614212285003, + "grad_norm": 269.1755676269531, + "learning_rate": 1.9702212638589388e-06, + "loss": 18.1406, + "step": 11150 + }, + { + "epoch": 0.10555560814456508, + "grad_norm": 428.0118408203125, + "learning_rate": 1.970213837324566e-06, + "loss": 53.3125, + "step": 11151 + }, + { + "epoch": 0.10556507416628014, + "grad_norm": 318.7144775390625, + "learning_rate": 1.970206409878255e-06, + "loss": 16.6016, + "step": 11152 + }, + { + "epoch": 0.1055745401879952, + "grad_norm": 302.85284423828125, + "learning_rate": 1.9701989815200132e-06, + "loss": 20.5469, + "step": 11153 + }, + { + "epoch": 0.10558400620971024, + "grad_norm": 537.037841796875, + "learning_rate": 1.9701915522498473e-06, + "loss": 32.4922, + "step": 11154 + }, + { + "epoch": 0.1055934722314253, + "grad_norm": 172.33468627929688, + "learning_rate": 1.9701841220677648e-06, + "loss": 25.4766, + "step": 11155 + }, + { + "epoch": 0.10560293825314035, + "grad_norm": 381.8321838378906, + "learning_rate": 1.970176690973772e-06, + "loss": 33.8438, + "step": 11156 + }, + { + "epoch": 0.10561240427485541, + "grad_norm": 278.1851501464844, + "learning_rate": 1.9701692589678765e-06, + "loss": 19.4844, + "step": 11157 + }, + { + "epoch": 0.10562187029657046, + "grad_norm": 2.884880781173706, + "learning_rate": 1.9701618260500846e-06, + "loss": 1.0259, + "step": 11158 + }, + { + "epoch": 0.10563133631828552, + "grad_norm": 3.8089451789855957, + "learning_rate": 1.9701543922204043e-06, + "loss": 0.8555, + "step": 11159 + }, + { + "epoch": 0.10564080234000056, + "grad_norm": 228.39881896972656, + "learning_rate": 1.9701469574788417e-06, + "loss": 24.3359, + "step": 11160 + }, + { + "epoch": 0.10565026836171562, + "grad_norm": 234.229248046875, + "learning_rate": 1.970139521825404e-06, + "loss": 19.3672, + "step": 11161 + }, + { + "epoch": 0.10565973438343068, + "grad_norm": 290.1262512207031, + "learning_rate": 1.9701320852600986e-06, + "loss": 26.2266, + "step": 11162 + }, + { + "epoch": 0.10566920040514573, + "grad_norm": 246.56866455078125, + "learning_rate": 1.9701246477829313e-06, + "loss": 19.5859, + "step": 11163 + }, + { + "epoch": 0.10567866642686079, + "grad_norm": 445.8070068359375, + "learning_rate": 1.9701172093939106e-06, + "loss": 45.6719, + "step": 11164 + }, + { + "epoch": 0.10568813244857583, + "grad_norm": 183.25245666503906, + "learning_rate": 1.970109770093043e-06, + "loss": 17.1562, + "step": 11165 + }, + { + "epoch": 0.1056975984702909, + "grad_norm": 1346.407470703125, + "learning_rate": 1.970102329880335e-06, + "loss": 50.5938, + "step": 11166 + }, + { + "epoch": 0.10570706449200594, + "grad_norm": 473.24560546875, + "learning_rate": 1.970094888755794e-06, + "loss": 24.4766, + "step": 11167 + }, + { + "epoch": 0.105716530513721, + "grad_norm": 962.2197875976562, + "learning_rate": 1.970087446719427e-06, + "loss": 32.668, + "step": 11168 + }, + { + "epoch": 0.10572599653543605, + "grad_norm": 336.3603515625, + "learning_rate": 1.9700800037712404e-06, + "loss": 27.9297, + "step": 11169 + }, + { + "epoch": 0.1057354625571511, + "grad_norm": 222.56292724609375, + "learning_rate": 1.9700725599112424e-06, + "loss": 17.5938, + "step": 11170 + }, + { + "epoch": 0.10574492857886617, + "grad_norm": 848.1484375, + "learning_rate": 1.9700651151394387e-06, + "loss": 25.3281, + "step": 11171 + }, + { + "epoch": 0.10575439460058121, + "grad_norm": 3.0120837688446045, + "learning_rate": 1.9700576694558376e-06, + "loss": 0.9092, + "step": 11172 + }, + { + "epoch": 0.10576386062229627, + "grad_norm": 759.2112426757812, + "learning_rate": 1.970050222860445e-06, + "loss": 40.2812, + "step": 11173 + }, + { + "epoch": 0.10577332664401132, + "grad_norm": 250.86260986328125, + "learning_rate": 1.9700427753532683e-06, + "loss": 23.6406, + "step": 11174 + }, + { + "epoch": 0.10578279266572638, + "grad_norm": 3.4811244010925293, + "learning_rate": 1.9700353269343144e-06, + "loss": 0.9614, + "step": 11175 + }, + { + "epoch": 0.10579225868744142, + "grad_norm": 376.2232971191406, + "learning_rate": 1.9700278776035906e-06, + "loss": 41.5234, + "step": 11176 + }, + { + "epoch": 0.10580172470915648, + "grad_norm": 560.048095703125, + "learning_rate": 1.9700204273611036e-06, + "loss": 46.7656, + "step": 11177 + }, + { + "epoch": 0.10581119073087154, + "grad_norm": 284.73040771484375, + "learning_rate": 1.9700129762068605e-06, + "loss": 21.0938, + "step": 11178 + }, + { + "epoch": 0.10582065675258659, + "grad_norm": 404.44927978515625, + "learning_rate": 1.9700055241408683e-06, + "loss": 20.4297, + "step": 11179 + }, + { + "epoch": 0.10583012277430165, + "grad_norm": 391.1131286621094, + "learning_rate": 1.969998071163134e-06, + "loss": 37.875, + "step": 11180 + }, + { + "epoch": 0.1058395887960167, + "grad_norm": 577.6972045898438, + "learning_rate": 1.969990617273665e-06, + "loss": 41.9219, + "step": 11181 + }, + { + "epoch": 0.10584905481773176, + "grad_norm": 684.9730834960938, + "learning_rate": 1.969983162472467e-06, + "loss": 60.4453, + "step": 11182 + }, + { + "epoch": 0.1058585208394468, + "grad_norm": 829.9010620117188, + "learning_rate": 1.969975706759549e-06, + "loss": 50.25, + "step": 11183 + }, + { + "epoch": 0.10586798686116186, + "grad_norm": 537.1361694335938, + "learning_rate": 1.969968250134916e-06, + "loss": 34.1016, + "step": 11184 + }, + { + "epoch": 0.10587745288287691, + "grad_norm": 211.88499450683594, + "learning_rate": 1.9699607925985764e-06, + "loss": 21.25, + "step": 11185 + }, + { + "epoch": 0.10588691890459197, + "grad_norm": 299.1098937988281, + "learning_rate": 1.969953334150537e-06, + "loss": 19.6094, + "step": 11186 + }, + { + "epoch": 0.10589638492630703, + "grad_norm": 317.7273254394531, + "learning_rate": 1.969945874790804e-06, + "loss": 49.1562, + "step": 11187 + }, + { + "epoch": 0.10590585094802207, + "grad_norm": 242.1336669921875, + "learning_rate": 1.9699384145193856e-06, + "loss": 14.4609, + "step": 11188 + }, + { + "epoch": 0.10591531696973713, + "grad_norm": 293.61627197265625, + "learning_rate": 1.9699309533362877e-06, + "loss": 18.4141, + "step": 11189 + }, + { + "epoch": 0.10592478299145218, + "grad_norm": 986.9085083007812, + "learning_rate": 1.9699234912415176e-06, + "loss": 34.7031, + "step": 11190 + }, + { + "epoch": 0.10593424901316724, + "grad_norm": 549.4795532226562, + "learning_rate": 1.969916028235083e-06, + "loss": 49.2891, + "step": 11191 + }, + { + "epoch": 0.10594371503488229, + "grad_norm": 345.14276123046875, + "learning_rate": 1.9699085643169903e-06, + "loss": 32.4219, + "step": 11192 + }, + { + "epoch": 0.10595318105659735, + "grad_norm": 437.192626953125, + "learning_rate": 1.9699010994872466e-06, + "loss": 46.0781, + "step": 11193 + }, + { + "epoch": 0.10596264707831239, + "grad_norm": 376.1905822753906, + "learning_rate": 1.969893633745859e-06, + "loss": 12.9863, + "step": 11194 + }, + { + "epoch": 0.10597211310002745, + "grad_norm": 425.7456970214844, + "learning_rate": 1.9698861670928347e-06, + "loss": 13.6523, + "step": 11195 + }, + { + "epoch": 0.10598157912174251, + "grad_norm": 203.59129333496094, + "learning_rate": 1.96987869952818e-06, + "loss": 23.375, + "step": 11196 + }, + { + "epoch": 0.10599104514345756, + "grad_norm": 2.9906961917877197, + "learning_rate": 1.9698712310519027e-06, + "loss": 0.9756, + "step": 11197 + }, + { + "epoch": 0.10600051116517262, + "grad_norm": 478.7656555175781, + "learning_rate": 1.9698637616640096e-06, + "loss": 27.5391, + "step": 11198 + }, + { + "epoch": 0.10600997718688766, + "grad_norm": 238.276611328125, + "learning_rate": 1.969856291364508e-06, + "loss": 14.332, + "step": 11199 + }, + { + "epoch": 0.10601944320860272, + "grad_norm": 431.1570129394531, + "learning_rate": 1.969848820153404e-06, + "loss": 20.5156, + "step": 11200 + }, + { + "epoch": 0.10602890923031777, + "grad_norm": 351.4415588378906, + "learning_rate": 1.969841348030705e-06, + "loss": 35.3281, + "step": 11201 + }, + { + "epoch": 0.10603837525203283, + "grad_norm": 383.7952880859375, + "learning_rate": 1.969833874996419e-06, + "loss": 43.1641, + "step": 11202 + }, + { + "epoch": 0.10604784127374788, + "grad_norm": 560.71826171875, + "learning_rate": 1.9698264010505515e-06, + "loss": 48.2656, + "step": 11203 + }, + { + "epoch": 0.10605730729546294, + "grad_norm": 253.08343505859375, + "learning_rate": 1.9698189261931112e-06, + "loss": 36.7969, + "step": 11204 + }, + { + "epoch": 0.106066773317178, + "grad_norm": 245.6465606689453, + "learning_rate": 1.969811450424103e-06, + "loss": 16.6562, + "step": 11205 + }, + { + "epoch": 0.10607623933889304, + "grad_norm": 305.86505126953125, + "learning_rate": 1.969803973743536e-06, + "loss": 29.625, + "step": 11206 + }, + { + "epoch": 0.1060857053606081, + "grad_norm": 248.66102600097656, + "learning_rate": 1.969796496151416e-06, + "loss": 20.6016, + "step": 11207 + }, + { + "epoch": 0.10609517138232315, + "grad_norm": 207.6014404296875, + "learning_rate": 1.9697890176477507e-06, + "loss": 17.9922, + "step": 11208 + }, + { + "epoch": 0.10610463740403821, + "grad_norm": 320.77337646484375, + "learning_rate": 1.9697815382325465e-06, + "loss": 25.2812, + "step": 11209 + }, + { + "epoch": 0.10611410342575325, + "grad_norm": 530.2566528320312, + "learning_rate": 1.9697740579058107e-06, + "loss": 40.25, + "step": 11210 + }, + { + "epoch": 0.10612356944746831, + "grad_norm": 450.7305908203125, + "learning_rate": 1.969766576667551e-06, + "loss": 36.8906, + "step": 11211 + }, + { + "epoch": 0.10613303546918336, + "grad_norm": 3.4505257606506348, + "learning_rate": 1.9697590945177733e-06, + "loss": 0.9863, + "step": 11212 + }, + { + "epoch": 0.10614250149089842, + "grad_norm": 197.18539428710938, + "learning_rate": 1.9697516114564848e-06, + "loss": 17.2656, + "step": 11213 + }, + { + "epoch": 0.10615196751261348, + "grad_norm": 841.001953125, + "learning_rate": 1.9697441274836936e-06, + "loss": 46.7539, + "step": 11214 + }, + { + "epoch": 0.10616143353432853, + "grad_norm": 467.5375061035156, + "learning_rate": 1.9697366425994055e-06, + "loss": 25.4219, + "step": 11215 + }, + { + "epoch": 0.10617089955604359, + "grad_norm": 325.1080627441406, + "learning_rate": 1.9697291568036283e-06, + "loss": 9.3711, + "step": 11216 + }, + { + "epoch": 0.10618036557775863, + "grad_norm": 370.6496887207031, + "learning_rate": 1.9697216700963686e-06, + "loss": 18.9492, + "step": 11217 + }, + { + "epoch": 0.10618983159947369, + "grad_norm": 298.5588684082031, + "learning_rate": 1.9697141824776334e-06, + "loss": 12.8047, + "step": 11218 + }, + { + "epoch": 0.10619929762118874, + "grad_norm": 439.714599609375, + "learning_rate": 1.9697066939474305e-06, + "loss": 34.4727, + "step": 11219 + }, + { + "epoch": 0.1062087636429038, + "grad_norm": 571.2153930664062, + "learning_rate": 1.969699204505766e-06, + "loss": 47.8906, + "step": 11220 + }, + { + "epoch": 0.10621822966461886, + "grad_norm": 175.71792602539062, + "learning_rate": 1.969691714152648e-06, + "loss": 9.9961, + "step": 11221 + }, + { + "epoch": 0.1062276956863339, + "grad_norm": 227.92332458496094, + "learning_rate": 1.969684222888082e-06, + "loss": 22.6016, + "step": 11222 + }, + { + "epoch": 0.10623716170804896, + "grad_norm": 450.0300598144531, + "learning_rate": 1.969676730712076e-06, + "loss": 43.625, + "step": 11223 + }, + { + "epoch": 0.10624662772976401, + "grad_norm": 727.7874755859375, + "learning_rate": 1.9696692376246373e-06, + "loss": 56.0469, + "step": 11224 + }, + { + "epoch": 0.10625609375147907, + "grad_norm": 318.4004211425781, + "learning_rate": 1.9696617436257725e-06, + "loss": 25.1094, + "step": 11225 + }, + { + "epoch": 0.10626555977319412, + "grad_norm": 277.0768737792969, + "learning_rate": 1.9696542487154887e-06, + "loss": 27.6406, + "step": 11226 + }, + { + "epoch": 0.10627502579490918, + "grad_norm": 628.3143310546875, + "learning_rate": 1.969646752893793e-06, + "loss": 28.4062, + "step": 11227 + }, + { + "epoch": 0.10628449181662422, + "grad_norm": 483.32611083984375, + "learning_rate": 1.9696392561606926e-06, + "loss": 63.0156, + "step": 11228 + }, + { + "epoch": 0.10629395783833928, + "grad_norm": 493.4352722167969, + "learning_rate": 1.969631758516194e-06, + "loss": 49.2344, + "step": 11229 + }, + { + "epoch": 0.10630342386005434, + "grad_norm": 1045.99072265625, + "learning_rate": 1.969624259960305e-06, + "loss": 43.8203, + "step": 11230 + }, + { + "epoch": 0.10631288988176939, + "grad_norm": 398.0616455078125, + "learning_rate": 1.9696167604930324e-06, + "loss": 39.1172, + "step": 11231 + }, + { + "epoch": 0.10632235590348445, + "grad_norm": 435.5480651855469, + "learning_rate": 1.9696092601143824e-06, + "loss": 21.7891, + "step": 11232 + }, + { + "epoch": 0.1063318219251995, + "grad_norm": 178.97152709960938, + "learning_rate": 1.9696017588243634e-06, + "loss": 19.5547, + "step": 11233 + }, + { + "epoch": 0.10634128794691455, + "grad_norm": 291.32354736328125, + "learning_rate": 1.969594256622982e-06, + "loss": 19.8047, + "step": 11234 + }, + { + "epoch": 0.1063507539686296, + "grad_norm": 402.9325256347656, + "learning_rate": 1.9695867535102445e-06, + "loss": 44.3125, + "step": 11235 + }, + { + "epoch": 0.10636021999034466, + "grad_norm": 208.78424072265625, + "learning_rate": 1.9695792494861585e-06, + "loss": 16.9375, + "step": 11236 + }, + { + "epoch": 0.1063696860120597, + "grad_norm": 289.7113037109375, + "learning_rate": 1.9695717445507312e-06, + "loss": 15.0742, + "step": 11237 + }, + { + "epoch": 0.10637915203377477, + "grad_norm": 196.8052520751953, + "learning_rate": 1.9695642387039698e-06, + "loss": 18.2578, + "step": 11238 + }, + { + "epoch": 0.10638861805548983, + "grad_norm": 322.812744140625, + "learning_rate": 1.969556731945881e-06, + "loss": 16.1016, + "step": 11239 + }, + { + "epoch": 0.10639808407720487, + "grad_norm": 295.0429992675781, + "learning_rate": 1.9695492242764718e-06, + "loss": 11.0156, + "step": 11240 + }, + { + "epoch": 0.10640755009891993, + "grad_norm": 594.0059204101562, + "learning_rate": 1.9695417156957497e-06, + "loss": 39.0, + "step": 11241 + }, + { + "epoch": 0.10641701612063498, + "grad_norm": 539.0309448242188, + "learning_rate": 1.9695342062037213e-06, + "loss": 39.6016, + "step": 11242 + }, + { + "epoch": 0.10642648214235004, + "grad_norm": 877.5274658203125, + "learning_rate": 1.969526695800394e-06, + "loss": 46.5898, + "step": 11243 + }, + { + "epoch": 0.10643594816406508, + "grad_norm": 310.42144775390625, + "learning_rate": 1.9695191844857743e-06, + "loss": 28.9609, + "step": 11244 + }, + { + "epoch": 0.10644541418578014, + "grad_norm": 358.73187255859375, + "learning_rate": 1.9695116722598696e-06, + "loss": 45.9375, + "step": 11245 + }, + { + "epoch": 0.10645488020749519, + "grad_norm": 200.55050659179688, + "learning_rate": 1.969504159122687e-06, + "loss": 23.5625, + "step": 11246 + }, + { + "epoch": 0.10646434622921025, + "grad_norm": 372.4938659667969, + "learning_rate": 1.969496645074234e-06, + "loss": 36.2656, + "step": 11247 + }, + { + "epoch": 0.10647381225092531, + "grad_norm": 245.9890899658203, + "learning_rate": 1.9694891301145173e-06, + "loss": 18.7031, + "step": 11248 + }, + { + "epoch": 0.10648327827264036, + "grad_norm": 349.2228698730469, + "learning_rate": 1.969481614243543e-06, + "loss": 26.0547, + "step": 11249 + }, + { + "epoch": 0.10649274429435542, + "grad_norm": 289.35845947265625, + "learning_rate": 1.96947409746132e-06, + "loss": 17.9219, + "step": 11250 + }, + { + "epoch": 0.10650221031607046, + "grad_norm": 185.69879150390625, + "learning_rate": 1.969466579767854e-06, + "loss": 16.8555, + "step": 11251 + }, + { + "epoch": 0.10651167633778552, + "grad_norm": 280.4979248046875, + "learning_rate": 1.9694590611631525e-06, + "loss": 23.7734, + "step": 11252 + }, + { + "epoch": 0.10652114235950057, + "grad_norm": 245.7454833984375, + "learning_rate": 1.9694515416472228e-06, + "loss": 23.3828, + "step": 11253 + }, + { + "epoch": 0.10653060838121563, + "grad_norm": 1181.4847412109375, + "learning_rate": 1.969444021220071e-06, + "loss": 27.5, + "step": 11254 + }, + { + "epoch": 0.10654007440293067, + "grad_norm": 319.011474609375, + "learning_rate": 1.9694364998817055e-06, + "loss": 25.6562, + "step": 11255 + }, + { + "epoch": 0.10654954042464573, + "grad_norm": 984.9982299804688, + "learning_rate": 1.9694289776321328e-06, + "loss": 49.5664, + "step": 11256 + }, + { + "epoch": 0.1065590064463608, + "grad_norm": 1552.0279541015625, + "learning_rate": 1.9694214544713597e-06, + "loss": 59.125, + "step": 11257 + }, + { + "epoch": 0.10656847246807584, + "grad_norm": 1067.95947265625, + "learning_rate": 1.969413930399394e-06, + "loss": 73.5156, + "step": 11258 + }, + { + "epoch": 0.1065779384897909, + "grad_norm": 313.3894958496094, + "learning_rate": 1.9694064054162413e-06, + "loss": 52.4375, + "step": 11259 + }, + { + "epoch": 0.10658740451150595, + "grad_norm": 216.41946411132812, + "learning_rate": 1.9693988795219104e-06, + "loss": 10.8086, + "step": 11260 + }, + { + "epoch": 0.106596870533221, + "grad_norm": 3.134725570678711, + "learning_rate": 1.9693913527164075e-06, + "loss": 0.9849, + "step": 11261 + }, + { + "epoch": 0.10660633655493605, + "grad_norm": 401.1539306640625, + "learning_rate": 1.9693838249997394e-06, + "loss": 25.6094, + "step": 11262 + }, + { + "epoch": 0.10661580257665111, + "grad_norm": 437.25848388671875, + "learning_rate": 1.969376296371914e-06, + "loss": 32.2891, + "step": 11263 + }, + { + "epoch": 0.10662526859836617, + "grad_norm": 234.0981903076172, + "learning_rate": 1.9693687668329376e-06, + "loss": 21.0625, + "step": 11264 + }, + { + "epoch": 0.10663473462008122, + "grad_norm": 251.52645874023438, + "learning_rate": 1.969361236382818e-06, + "loss": 23.375, + "step": 11265 + }, + { + "epoch": 0.10664420064179628, + "grad_norm": 360.71771240234375, + "learning_rate": 1.9693537050215615e-06, + "loss": 25.6875, + "step": 11266 + }, + { + "epoch": 0.10665366666351132, + "grad_norm": 391.6448059082031, + "learning_rate": 1.969346172749176e-06, + "loss": 19.3906, + "step": 11267 + }, + { + "epoch": 0.10666313268522638, + "grad_norm": 479.71954345703125, + "learning_rate": 1.969338639565668e-06, + "loss": 36.0625, + "step": 11268 + }, + { + "epoch": 0.10667259870694143, + "grad_norm": 193.28500366210938, + "learning_rate": 1.9693311054710448e-06, + "loss": 23.2969, + "step": 11269 + }, + { + "epoch": 0.10668206472865649, + "grad_norm": 241.6361541748047, + "learning_rate": 1.969323570465313e-06, + "loss": 24.7773, + "step": 11270 + }, + { + "epoch": 0.10669153075037154, + "grad_norm": 1389.0191650390625, + "learning_rate": 1.9693160345484804e-06, + "loss": 23.0078, + "step": 11271 + }, + { + "epoch": 0.1067009967720866, + "grad_norm": 324.25177001953125, + "learning_rate": 1.9693084977205537e-06, + "loss": 22.5781, + "step": 11272 + }, + { + "epoch": 0.10671046279380166, + "grad_norm": 408.4810791015625, + "learning_rate": 1.9693009599815406e-06, + "loss": 30.0469, + "step": 11273 + }, + { + "epoch": 0.1067199288155167, + "grad_norm": 196.2861785888672, + "learning_rate": 1.969293421331447e-06, + "loss": 8.6406, + "step": 11274 + }, + { + "epoch": 0.10672939483723176, + "grad_norm": 383.9305725097656, + "learning_rate": 1.969285881770281e-06, + "loss": 31.8164, + "step": 11275 + }, + { + "epoch": 0.10673886085894681, + "grad_norm": 386.2887268066406, + "learning_rate": 1.969278341298049e-06, + "loss": 43.5312, + "step": 11276 + }, + { + "epoch": 0.10674832688066187, + "grad_norm": 261.6397705078125, + "learning_rate": 1.9692707999147587e-06, + "loss": 22.7969, + "step": 11277 + }, + { + "epoch": 0.10675779290237691, + "grad_norm": 337.3226623535156, + "learning_rate": 1.969263257620417e-06, + "loss": 21.2773, + "step": 11278 + }, + { + "epoch": 0.10676725892409197, + "grad_norm": 433.3535461425781, + "learning_rate": 1.9692557144150305e-06, + "loss": 28.1406, + "step": 11279 + }, + { + "epoch": 0.10677672494580702, + "grad_norm": 211.93223571777344, + "learning_rate": 1.9692481702986064e-06, + "loss": 15.793, + "step": 11280 + }, + { + "epoch": 0.10678619096752208, + "grad_norm": 149.2986602783203, + "learning_rate": 1.969240625271153e-06, + "loss": 17.7656, + "step": 11281 + }, + { + "epoch": 0.10679565698923714, + "grad_norm": 253.97288513183594, + "learning_rate": 1.969233079332676e-06, + "loss": 20.6562, + "step": 11282 + }, + { + "epoch": 0.10680512301095219, + "grad_norm": 406.56121826171875, + "learning_rate": 1.969225532483183e-06, + "loss": 22.6797, + "step": 11283 + }, + { + "epoch": 0.10681458903266725, + "grad_norm": 310.1609191894531, + "learning_rate": 1.969217984722681e-06, + "loss": 30.5391, + "step": 11284 + }, + { + "epoch": 0.10682405505438229, + "grad_norm": 413.9848327636719, + "learning_rate": 1.969210436051177e-06, + "loss": 47.8438, + "step": 11285 + }, + { + "epoch": 0.10683352107609735, + "grad_norm": 225.36187744140625, + "learning_rate": 1.9692028864686784e-06, + "loss": 18.5312, + "step": 11286 + }, + { + "epoch": 0.1068429870978124, + "grad_norm": 533.88037109375, + "learning_rate": 1.9691953359751923e-06, + "loss": 44.5, + "step": 11287 + }, + { + "epoch": 0.10685245311952746, + "grad_norm": 927.4794311523438, + "learning_rate": 1.9691877845707256e-06, + "loss": 67.2461, + "step": 11288 + }, + { + "epoch": 0.1068619191412425, + "grad_norm": 397.68035888671875, + "learning_rate": 1.969180232255285e-06, + "loss": 22.9453, + "step": 11289 + }, + { + "epoch": 0.10687138516295756, + "grad_norm": 294.1354064941406, + "learning_rate": 1.9691726790288786e-06, + "loss": 9.4609, + "step": 11290 + }, + { + "epoch": 0.10688085118467262, + "grad_norm": 361.87591552734375, + "learning_rate": 1.9691651248915126e-06, + "loss": 25.6016, + "step": 11291 + }, + { + "epoch": 0.10689031720638767, + "grad_norm": 214.62594604492188, + "learning_rate": 1.9691575698431945e-06, + "loss": 14.1484, + "step": 11292 + }, + { + "epoch": 0.10689978322810273, + "grad_norm": 236.98716735839844, + "learning_rate": 1.9691500138839314e-06, + "loss": 20.2734, + "step": 11293 + }, + { + "epoch": 0.10690924924981778, + "grad_norm": 282.70465087890625, + "learning_rate": 1.96914245701373e-06, + "loss": 20.9453, + "step": 11294 + }, + { + "epoch": 0.10691871527153284, + "grad_norm": 264.4690246582031, + "learning_rate": 1.969134899232598e-06, + "loss": 21.6172, + "step": 11295 + }, + { + "epoch": 0.10692818129324788, + "grad_norm": 534.4027709960938, + "learning_rate": 1.9691273405405425e-06, + "loss": 26.5312, + "step": 11296 + }, + { + "epoch": 0.10693764731496294, + "grad_norm": 204.4641876220703, + "learning_rate": 1.9691197809375703e-06, + "loss": 20.1406, + "step": 11297 + }, + { + "epoch": 0.10694711333667799, + "grad_norm": 189.16372680664062, + "learning_rate": 1.969112220423688e-06, + "loss": 20.3906, + "step": 11298 + }, + { + "epoch": 0.10695657935839305, + "grad_norm": 202.27232360839844, + "learning_rate": 1.969104658998904e-06, + "loss": 15.125, + "step": 11299 + }, + { + "epoch": 0.10696604538010811, + "grad_norm": 370.5359191894531, + "learning_rate": 1.9690970966632242e-06, + "loss": 38.4297, + "step": 11300 + }, + { + "epoch": 0.10697551140182315, + "grad_norm": 3.106902599334717, + "learning_rate": 1.969089533416656e-06, + "loss": 0.8115, + "step": 11301 + }, + { + "epoch": 0.10698497742353821, + "grad_norm": 581.0082397460938, + "learning_rate": 1.969081969259207e-06, + "loss": 53.6484, + "step": 11302 + }, + { + "epoch": 0.10699444344525326, + "grad_norm": 436.8468017578125, + "learning_rate": 1.9690744041908838e-06, + "loss": 49.4375, + "step": 11303 + }, + { + "epoch": 0.10700390946696832, + "grad_norm": 180.16818237304688, + "learning_rate": 1.969066838211694e-06, + "loss": 19.3281, + "step": 11304 + }, + { + "epoch": 0.10701337548868337, + "grad_norm": 634.3945922851562, + "learning_rate": 1.9690592713216443e-06, + "loss": 39.0938, + "step": 11305 + }, + { + "epoch": 0.10702284151039843, + "grad_norm": 900.6057739257812, + "learning_rate": 1.969051703520742e-06, + "loss": 45.4609, + "step": 11306 + }, + { + "epoch": 0.10703230753211349, + "grad_norm": 204.68109130859375, + "learning_rate": 1.969044134808994e-06, + "loss": 17.0625, + "step": 11307 + }, + { + "epoch": 0.10704177355382853, + "grad_norm": 197.55027770996094, + "learning_rate": 1.9690365651864075e-06, + "loss": 16.7266, + "step": 11308 + }, + { + "epoch": 0.10705123957554359, + "grad_norm": 760.6207275390625, + "learning_rate": 1.9690289946529896e-06, + "loss": 42.2969, + "step": 11309 + }, + { + "epoch": 0.10706070559725864, + "grad_norm": 241.17259216308594, + "learning_rate": 1.9690214232087475e-06, + "loss": 25.7656, + "step": 11310 + }, + { + "epoch": 0.1070701716189737, + "grad_norm": 902.7659301757812, + "learning_rate": 1.9690138508536883e-06, + "loss": 51.375, + "step": 11311 + }, + { + "epoch": 0.10707963764068874, + "grad_norm": 385.6280212402344, + "learning_rate": 1.9690062775878196e-06, + "loss": 22.6562, + "step": 11312 + }, + { + "epoch": 0.1070891036624038, + "grad_norm": 491.4438781738281, + "learning_rate": 1.9689987034111474e-06, + "loss": 23.6562, + "step": 11313 + }, + { + "epoch": 0.10709856968411885, + "grad_norm": 488.6046142578125, + "learning_rate": 1.96899112832368e-06, + "loss": 11.0898, + "step": 11314 + }, + { + "epoch": 0.10710803570583391, + "grad_norm": 158.42694091796875, + "learning_rate": 1.968983552325424e-06, + "loss": 18.75, + "step": 11315 + }, + { + "epoch": 0.10711750172754897, + "grad_norm": 394.5203857421875, + "learning_rate": 1.9689759754163856e-06, + "loss": 45.125, + "step": 11316 + }, + { + "epoch": 0.10712696774926402, + "grad_norm": 407.99981689453125, + "learning_rate": 1.9689683975965735e-06, + "loss": 23.9922, + "step": 11317 + }, + { + "epoch": 0.10713643377097908, + "grad_norm": 725.556640625, + "learning_rate": 1.9689608188659943e-06, + "loss": 9.7109, + "step": 11318 + }, + { + "epoch": 0.10714589979269412, + "grad_norm": 1000.3292236328125, + "learning_rate": 1.9689532392246547e-06, + "loss": 29.5859, + "step": 11319 + }, + { + "epoch": 0.10715536581440918, + "grad_norm": 467.39947509765625, + "learning_rate": 1.968945658672562e-06, + "loss": 22.1836, + "step": 11320 + }, + { + "epoch": 0.10716483183612423, + "grad_norm": 2.79194712638855, + "learning_rate": 1.9689380772097235e-06, + "loss": 0.8257, + "step": 11321 + }, + { + "epoch": 0.10717429785783929, + "grad_norm": 389.38311767578125, + "learning_rate": 1.9689304948361462e-06, + "loss": 26.9531, + "step": 11322 + }, + { + "epoch": 0.10718376387955433, + "grad_norm": 756.8054809570312, + "learning_rate": 1.9689229115518374e-06, + "loss": 21.6719, + "step": 11323 + }, + { + "epoch": 0.1071932299012694, + "grad_norm": 378.751708984375, + "learning_rate": 1.968915327356804e-06, + "loss": 51.5547, + "step": 11324 + }, + { + "epoch": 0.10720269592298445, + "grad_norm": 594.526611328125, + "learning_rate": 1.968907742251053e-06, + "loss": 51.2266, + "step": 11325 + }, + { + "epoch": 0.1072121619446995, + "grad_norm": 277.5216064453125, + "learning_rate": 1.968900156234592e-06, + "loss": 26.9844, + "step": 11326 + }, + { + "epoch": 0.10722162796641456, + "grad_norm": 329.41217041015625, + "learning_rate": 1.968892569307428e-06, + "loss": 31.4062, + "step": 11327 + }, + { + "epoch": 0.1072310939881296, + "grad_norm": 339.146240234375, + "learning_rate": 1.968884981469568e-06, + "loss": 26.6406, + "step": 11328 + }, + { + "epoch": 0.10724056000984467, + "grad_norm": 711.0836181640625, + "learning_rate": 1.968877392721019e-06, + "loss": 20.8594, + "step": 11329 + }, + { + "epoch": 0.10725002603155971, + "grad_norm": 3.548253059387207, + "learning_rate": 1.9688698030617887e-06, + "loss": 0.9653, + "step": 11330 + }, + { + "epoch": 0.10725949205327477, + "grad_norm": 567.5795288085938, + "learning_rate": 1.968862212491883e-06, + "loss": 38.3594, + "step": 11331 + }, + { + "epoch": 0.10726895807498982, + "grad_norm": 1340.634521484375, + "learning_rate": 1.9688546210113108e-06, + "loss": 39.0078, + "step": 11332 + }, + { + "epoch": 0.10727842409670488, + "grad_norm": 416.36309814453125, + "learning_rate": 1.9688470286200777e-06, + "loss": 22.9922, + "step": 11333 + }, + { + "epoch": 0.10728789011841994, + "grad_norm": 595.3240966796875, + "learning_rate": 1.9688394353181914e-06, + "loss": 25.1484, + "step": 11334 + }, + { + "epoch": 0.10729735614013498, + "grad_norm": 2013.709228515625, + "learning_rate": 1.968831841105659e-06, + "loss": 31.8125, + "step": 11335 + }, + { + "epoch": 0.10730682216185004, + "grad_norm": 460.8668212890625, + "learning_rate": 1.9688242459824878e-06, + "loss": 22.9219, + "step": 11336 + }, + { + "epoch": 0.10731628818356509, + "grad_norm": 299.69268798828125, + "learning_rate": 1.968816649948685e-06, + "loss": 10.5273, + "step": 11337 + }, + { + "epoch": 0.10732575420528015, + "grad_norm": 1813.3956298828125, + "learning_rate": 1.9688090530042574e-06, + "loss": 10.3672, + "step": 11338 + }, + { + "epoch": 0.1073352202269952, + "grad_norm": 241.71771240234375, + "learning_rate": 1.9688014551492125e-06, + "loss": 9.0547, + "step": 11339 + }, + { + "epoch": 0.10734468624871026, + "grad_norm": 495.4393615722656, + "learning_rate": 1.968793856383557e-06, + "loss": 31.0781, + "step": 11340 + }, + { + "epoch": 0.1073541522704253, + "grad_norm": 458.2211608886719, + "learning_rate": 1.9687862567072986e-06, + "loss": 38.0625, + "step": 11341 + }, + { + "epoch": 0.10736361829214036, + "grad_norm": 580.509033203125, + "learning_rate": 1.968778656120444e-06, + "loss": 27.7812, + "step": 11342 + }, + { + "epoch": 0.10737308431385542, + "grad_norm": 249.2209014892578, + "learning_rate": 1.9687710546230007e-06, + "loss": 19.6328, + "step": 11343 + }, + { + "epoch": 0.10738255033557047, + "grad_norm": 359.7764892578125, + "learning_rate": 1.9687634522149754e-06, + "loss": 43.4688, + "step": 11344 + }, + { + "epoch": 0.10739201635728553, + "grad_norm": 333.9904479980469, + "learning_rate": 1.968755848896375e-06, + "loss": 18.4766, + "step": 11345 + }, + { + "epoch": 0.10740148237900057, + "grad_norm": 474.66229248046875, + "learning_rate": 1.968748244667208e-06, + "loss": 16.2734, + "step": 11346 + }, + { + "epoch": 0.10741094840071563, + "grad_norm": 999.7936401367188, + "learning_rate": 1.96874063952748e-06, + "loss": 48.3203, + "step": 11347 + }, + { + "epoch": 0.10742041442243068, + "grad_norm": 1148.3453369140625, + "learning_rate": 1.9687330334771995e-06, + "loss": 70.5469, + "step": 11348 + }, + { + "epoch": 0.10742988044414574, + "grad_norm": 224.39486694335938, + "learning_rate": 1.9687254265163725e-06, + "loss": 12.9648, + "step": 11349 + }, + { + "epoch": 0.1074393464658608, + "grad_norm": 334.9405822753906, + "learning_rate": 1.9687178186450066e-06, + "loss": 23.6875, + "step": 11350 + }, + { + "epoch": 0.10744881248757585, + "grad_norm": 412.22113037109375, + "learning_rate": 1.968710209863109e-06, + "loss": 54.5781, + "step": 11351 + }, + { + "epoch": 0.1074582785092909, + "grad_norm": 685.9068603515625, + "learning_rate": 1.968702600170687e-06, + "loss": 23.7266, + "step": 11352 + }, + { + "epoch": 0.10746774453100595, + "grad_norm": 393.84375, + "learning_rate": 1.9686949895677474e-06, + "loss": 35.3516, + "step": 11353 + }, + { + "epoch": 0.10747721055272101, + "grad_norm": 3.255270481109619, + "learning_rate": 1.968687378054298e-06, + "loss": 0.918, + "step": 11354 + }, + { + "epoch": 0.10748667657443606, + "grad_norm": 473.4698181152344, + "learning_rate": 1.9686797656303446e-06, + "loss": 24.7656, + "step": 11355 + }, + { + "epoch": 0.10749614259615112, + "grad_norm": 303.4213562011719, + "learning_rate": 1.9686721522958954e-06, + "loss": 19.4922, + "step": 11356 + }, + { + "epoch": 0.10750560861786616, + "grad_norm": 416.7363586425781, + "learning_rate": 1.9686645380509577e-06, + "loss": 40.3125, + "step": 11357 + }, + { + "epoch": 0.10751507463958122, + "grad_norm": 280.58758544921875, + "learning_rate": 1.968656922895539e-06, + "loss": 20.2891, + "step": 11358 + }, + { + "epoch": 0.10752454066129628, + "grad_norm": 449.6608581542969, + "learning_rate": 1.9686493068296446e-06, + "loss": 49.8359, + "step": 11359 + }, + { + "epoch": 0.10753400668301133, + "grad_norm": 675.7167358398438, + "learning_rate": 1.9686416898532836e-06, + "loss": 53.9375, + "step": 11360 + }, + { + "epoch": 0.10754347270472639, + "grad_norm": 492.70855712890625, + "learning_rate": 1.968634071966462e-06, + "loss": 42.0938, + "step": 11361 + }, + { + "epoch": 0.10755293872644144, + "grad_norm": 166.78985595703125, + "learning_rate": 1.968626453169188e-06, + "loss": 23.75, + "step": 11362 + }, + { + "epoch": 0.1075624047481565, + "grad_norm": 228.62852478027344, + "learning_rate": 1.9686188334614675e-06, + "loss": 19.9844, + "step": 11363 + }, + { + "epoch": 0.10757187076987154, + "grad_norm": 166.3992919921875, + "learning_rate": 1.9686112128433086e-06, + "loss": 13.8633, + "step": 11364 + }, + { + "epoch": 0.1075813367915866, + "grad_norm": 398.0650939941406, + "learning_rate": 1.968603591314718e-06, + "loss": 36.4062, + "step": 11365 + }, + { + "epoch": 0.10759080281330165, + "grad_norm": 389.8878479003906, + "learning_rate": 1.9685959688757036e-06, + "loss": 22.4922, + "step": 11366 + }, + { + "epoch": 0.10760026883501671, + "grad_norm": 394.0623779296875, + "learning_rate": 1.9685883455262714e-06, + "loss": 25.5, + "step": 11367 + }, + { + "epoch": 0.10760973485673177, + "grad_norm": 304.0694580078125, + "learning_rate": 1.9685807212664295e-06, + "loss": 20.1719, + "step": 11368 + }, + { + "epoch": 0.10761920087844681, + "grad_norm": 179.82281494140625, + "learning_rate": 1.9685730960961843e-06, + "loss": 9.8477, + "step": 11369 + }, + { + "epoch": 0.10762866690016187, + "grad_norm": 736.1580200195312, + "learning_rate": 1.968565470015544e-06, + "loss": 37.2383, + "step": 11370 + }, + { + "epoch": 0.10763813292187692, + "grad_norm": 815.4160766601562, + "learning_rate": 1.9685578430245146e-06, + "loss": 36.1719, + "step": 11371 + }, + { + "epoch": 0.10764759894359198, + "grad_norm": 222.85003662109375, + "learning_rate": 1.968550215123104e-06, + "loss": 25.1406, + "step": 11372 + }, + { + "epoch": 0.10765706496530703, + "grad_norm": 348.6305236816406, + "learning_rate": 1.9685425863113197e-06, + "loss": 13.5039, + "step": 11373 + }, + { + "epoch": 0.10766653098702209, + "grad_norm": 629.8975830078125, + "learning_rate": 1.9685349565891678e-06, + "loss": 51.375, + "step": 11374 + }, + { + "epoch": 0.10767599700873713, + "grad_norm": 375.9354553222656, + "learning_rate": 1.968527325956656e-06, + "loss": 25.3438, + "step": 11375 + }, + { + "epoch": 0.10768546303045219, + "grad_norm": 325.79132080078125, + "learning_rate": 1.9685196944137916e-06, + "loss": 34.2969, + "step": 11376 + }, + { + "epoch": 0.10769492905216725, + "grad_norm": 234.2823028564453, + "learning_rate": 1.9685120619605823e-06, + "loss": 18.5625, + "step": 11377 + }, + { + "epoch": 0.1077043950738823, + "grad_norm": 442.72015380859375, + "learning_rate": 1.968504428597034e-06, + "loss": 25.6328, + "step": 11378 + }, + { + "epoch": 0.10771386109559736, + "grad_norm": 383.73187255859375, + "learning_rate": 1.968496794323155e-06, + "loss": 39.0391, + "step": 11379 + }, + { + "epoch": 0.1077233271173124, + "grad_norm": 396.77838134765625, + "learning_rate": 1.9684891591389516e-06, + "loss": 42.875, + "step": 11380 + }, + { + "epoch": 0.10773279313902746, + "grad_norm": 570.2440185546875, + "learning_rate": 1.9684815230444314e-06, + "loss": 32.3906, + "step": 11381 + }, + { + "epoch": 0.10774225916074251, + "grad_norm": 500.7940368652344, + "learning_rate": 1.968473886039602e-06, + "loss": 41.6094, + "step": 11382 + }, + { + "epoch": 0.10775172518245757, + "grad_norm": 769.3854370117188, + "learning_rate": 1.9684662481244696e-06, + "loss": 50.0703, + "step": 11383 + }, + { + "epoch": 0.10776119120417262, + "grad_norm": 597.0076293945312, + "learning_rate": 1.9684586092990424e-06, + "loss": 40.5703, + "step": 11384 + }, + { + "epoch": 0.10777065722588768, + "grad_norm": 169.088134765625, + "learning_rate": 1.968450969563327e-06, + "loss": 16.7266, + "step": 11385 + }, + { + "epoch": 0.10778012324760274, + "grad_norm": 400.74468994140625, + "learning_rate": 1.9684433289173305e-06, + "loss": 24.2266, + "step": 11386 + }, + { + "epoch": 0.10778958926931778, + "grad_norm": 489.7159423828125, + "learning_rate": 1.9684356873610605e-06, + "loss": 31.7188, + "step": 11387 + }, + { + "epoch": 0.10779905529103284, + "grad_norm": 625.556396484375, + "learning_rate": 1.9684280448945237e-06, + "loss": 48.2188, + "step": 11388 + }, + { + "epoch": 0.10780852131274789, + "grad_norm": 1159.969970703125, + "learning_rate": 1.968420401517728e-06, + "loss": 39.3438, + "step": 11389 + }, + { + "epoch": 0.10781798733446295, + "grad_norm": 222.9630584716797, + "learning_rate": 1.9684127572306798e-06, + "loss": 18.1484, + "step": 11390 + }, + { + "epoch": 0.107827453356178, + "grad_norm": 514.8803100585938, + "learning_rate": 1.9684051120333866e-06, + "loss": 34.0469, + "step": 11391 + }, + { + "epoch": 0.10783691937789305, + "grad_norm": 235.42002868652344, + "learning_rate": 1.9683974659258554e-06, + "loss": 7.1328, + "step": 11392 + }, + { + "epoch": 0.10784638539960811, + "grad_norm": 260.0542907714844, + "learning_rate": 1.968389818908094e-06, + "loss": 19.2031, + "step": 11393 + }, + { + "epoch": 0.10785585142132316, + "grad_norm": 378.1548767089844, + "learning_rate": 1.9683821709801086e-06, + "loss": 38.2734, + "step": 11394 + }, + { + "epoch": 0.10786531744303822, + "grad_norm": 893.2279052734375, + "learning_rate": 1.9683745221419074e-06, + "loss": 50.875, + "step": 11395 + }, + { + "epoch": 0.10787478346475327, + "grad_norm": 382.3701477050781, + "learning_rate": 1.9683668723934974e-06, + "loss": 36.6953, + "step": 11396 + }, + { + "epoch": 0.10788424948646833, + "grad_norm": 444.77960205078125, + "learning_rate": 1.968359221734885e-06, + "loss": 60.2031, + "step": 11397 + }, + { + "epoch": 0.10789371550818337, + "grad_norm": 416.14447021484375, + "learning_rate": 1.968351570166078e-06, + "loss": 29.2812, + "step": 11398 + }, + { + "epoch": 0.10790318152989843, + "grad_norm": 200.78590393066406, + "learning_rate": 1.9683439176870837e-06, + "loss": 15.6641, + "step": 11399 + }, + { + "epoch": 0.10791264755161348, + "grad_norm": 258.99658203125, + "learning_rate": 1.9683362642979092e-06, + "loss": 19.0625, + "step": 11400 + }, + { + "epoch": 0.10792211357332854, + "grad_norm": 2.8006341457366943, + "learning_rate": 1.9683286099985614e-06, + "loss": 0.7822, + "step": 11401 + }, + { + "epoch": 0.1079315795950436, + "grad_norm": 348.53839111328125, + "learning_rate": 1.9683209547890475e-06, + "loss": 29.5156, + "step": 11402 + }, + { + "epoch": 0.10794104561675864, + "grad_norm": 616.1009521484375, + "learning_rate": 1.9683132986693754e-06, + "loss": 19.2441, + "step": 11403 + }, + { + "epoch": 0.1079505116384737, + "grad_norm": 321.23309326171875, + "learning_rate": 1.9683056416395517e-06, + "loss": 52.2344, + "step": 11404 + }, + { + "epoch": 0.10795997766018875, + "grad_norm": 386.47265625, + "learning_rate": 1.9682979836995834e-06, + "loss": 48.8438, + "step": 11405 + }, + { + "epoch": 0.10796944368190381, + "grad_norm": 3.4781322479248047, + "learning_rate": 1.968290324849478e-06, + "loss": 1.1235, + "step": 11406 + }, + { + "epoch": 0.10797890970361886, + "grad_norm": 358.7845153808594, + "learning_rate": 1.968282665089243e-06, + "loss": 9.2031, + "step": 11407 + }, + { + "epoch": 0.10798837572533392, + "grad_norm": 355.1993408203125, + "learning_rate": 1.968275004418885e-06, + "loss": 28.8594, + "step": 11408 + }, + { + "epoch": 0.10799784174704896, + "grad_norm": 744.41064453125, + "learning_rate": 1.9682673428384116e-06, + "loss": 65.7031, + "step": 11409 + }, + { + "epoch": 0.10800730776876402, + "grad_norm": 655.889892578125, + "learning_rate": 1.9682596803478296e-06, + "loss": 41.0547, + "step": 11410 + }, + { + "epoch": 0.10801677379047908, + "grad_norm": 301.79888916015625, + "learning_rate": 1.968252016947147e-06, + "loss": 20.3359, + "step": 11411 + }, + { + "epoch": 0.10802623981219413, + "grad_norm": 175.65245056152344, + "learning_rate": 1.96824435263637e-06, + "loss": 21.5703, + "step": 11412 + }, + { + "epoch": 0.10803570583390919, + "grad_norm": 421.4400939941406, + "learning_rate": 1.9682366874155068e-06, + "loss": 31.0, + "step": 11413 + }, + { + "epoch": 0.10804517185562423, + "grad_norm": 483.7490539550781, + "learning_rate": 1.968229021284564e-06, + "loss": 39.3125, + "step": 11414 + }, + { + "epoch": 0.1080546378773393, + "grad_norm": 758.927001953125, + "learning_rate": 1.9682213542435485e-06, + "loss": 22.0781, + "step": 11415 + }, + { + "epoch": 0.10806410389905434, + "grad_norm": 228.1251678466797, + "learning_rate": 1.968213686292468e-06, + "loss": 17.9688, + "step": 11416 + }, + { + "epoch": 0.1080735699207694, + "grad_norm": 194.93600463867188, + "learning_rate": 1.96820601743133e-06, + "loss": 20.25, + "step": 11417 + }, + { + "epoch": 0.10808303594248445, + "grad_norm": 964.3837280273438, + "learning_rate": 1.968198347660141e-06, + "loss": 27.3125, + "step": 11418 + }, + { + "epoch": 0.1080925019641995, + "grad_norm": 991.326904296875, + "learning_rate": 1.968190676978909e-06, + "loss": 19.8984, + "step": 11419 + }, + { + "epoch": 0.10810196798591457, + "grad_norm": 394.7591857910156, + "learning_rate": 1.96818300538764e-06, + "loss": 45.1719, + "step": 11420 + }, + { + "epoch": 0.10811143400762961, + "grad_norm": 412.7296447753906, + "learning_rate": 1.968175332886343e-06, + "loss": 26.1094, + "step": 11421 + }, + { + "epoch": 0.10812090002934467, + "grad_norm": 373.0243225097656, + "learning_rate": 1.968167659475023e-06, + "loss": 22.2383, + "step": 11422 + }, + { + "epoch": 0.10813036605105972, + "grad_norm": 703.442626953125, + "learning_rate": 1.968159985153689e-06, + "loss": 41.1406, + "step": 11423 + }, + { + "epoch": 0.10813983207277478, + "grad_norm": 513.9996948242188, + "learning_rate": 1.9681523099223475e-06, + "loss": 29.6094, + "step": 11424 + }, + { + "epoch": 0.10814929809448982, + "grad_norm": 382.1614074707031, + "learning_rate": 1.9681446337810057e-06, + "loss": 29.3594, + "step": 11425 + }, + { + "epoch": 0.10815876411620488, + "grad_norm": 229.53228759765625, + "learning_rate": 1.968136956729671e-06, + "loss": 22.0391, + "step": 11426 + }, + { + "epoch": 0.10816823013791993, + "grad_norm": 399.0087585449219, + "learning_rate": 1.9681292787683507e-06, + "loss": 22.6172, + "step": 11427 + }, + { + "epoch": 0.10817769615963499, + "grad_norm": 988.6678466796875, + "learning_rate": 1.968121599897052e-06, + "loss": 49.9219, + "step": 11428 + }, + { + "epoch": 0.10818716218135005, + "grad_norm": 365.75970458984375, + "learning_rate": 1.9681139201157817e-06, + "loss": 24.1719, + "step": 11429 + }, + { + "epoch": 0.1081966282030651, + "grad_norm": 267.1945495605469, + "learning_rate": 1.9681062394245474e-06, + "loss": 33.2969, + "step": 11430 + }, + { + "epoch": 0.10820609422478016, + "grad_norm": 303.700927734375, + "learning_rate": 1.9680985578233563e-06, + "loss": 22.1094, + "step": 11431 + }, + { + "epoch": 0.1082155602464952, + "grad_norm": 244.3128204345703, + "learning_rate": 1.9680908753122154e-06, + "loss": 25.3633, + "step": 11432 + }, + { + "epoch": 0.10822502626821026, + "grad_norm": 849.0704956054688, + "learning_rate": 1.968083191891132e-06, + "loss": 28.5, + "step": 11433 + }, + { + "epoch": 0.10823449228992531, + "grad_norm": 201.23194885253906, + "learning_rate": 1.9680755075601133e-06, + "loss": 19.2891, + "step": 11434 + }, + { + "epoch": 0.10824395831164037, + "grad_norm": 756.1685791015625, + "learning_rate": 1.9680678223191665e-06, + "loss": 29.4766, + "step": 11435 + }, + { + "epoch": 0.10825342433335543, + "grad_norm": 978.5477294921875, + "learning_rate": 1.968060136168299e-06, + "loss": 69.1641, + "step": 11436 + }, + { + "epoch": 0.10826289035507047, + "grad_norm": 583.02392578125, + "learning_rate": 1.968052449107518e-06, + "loss": 40.2812, + "step": 11437 + }, + { + "epoch": 0.10827235637678553, + "grad_norm": 215.78196716308594, + "learning_rate": 1.968044761136831e-06, + "loss": 14.5586, + "step": 11438 + }, + { + "epoch": 0.10828182239850058, + "grad_norm": 174.60348510742188, + "learning_rate": 1.9680370722562447e-06, + "loss": 14.7891, + "step": 11439 + }, + { + "epoch": 0.10829128842021564, + "grad_norm": 513.251708984375, + "learning_rate": 1.9680293824657666e-06, + "loss": 18.9141, + "step": 11440 + }, + { + "epoch": 0.10830075444193069, + "grad_norm": 461.840087890625, + "learning_rate": 1.9680216917654036e-06, + "loss": 26.9609, + "step": 11441 + }, + { + "epoch": 0.10831022046364575, + "grad_norm": 199.66143798828125, + "learning_rate": 1.9680140001551633e-06, + "loss": 20.3672, + "step": 11442 + }, + { + "epoch": 0.10831968648536079, + "grad_norm": 250.4950408935547, + "learning_rate": 1.968006307635053e-06, + "loss": 15.9766, + "step": 11443 + }, + { + "epoch": 0.10832915250707585, + "grad_norm": 969.6625366210938, + "learning_rate": 1.9679986142050797e-06, + "loss": 13.1836, + "step": 11444 + }, + { + "epoch": 0.10833861852879091, + "grad_norm": 343.18890380859375, + "learning_rate": 1.9679909198652503e-06, + "loss": 23.0078, + "step": 11445 + }, + { + "epoch": 0.10834808455050596, + "grad_norm": 338.71136474609375, + "learning_rate": 1.967983224615573e-06, + "loss": 20.1484, + "step": 11446 + }, + { + "epoch": 0.10835755057222102, + "grad_norm": 245.13954162597656, + "learning_rate": 1.9679755284560538e-06, + "loss": 18.2812, + "step": 11447 + }, + { + "epoch": 0.10836701659393606, + "grad_norm": 2.889066457748413, + "learning_rate": 1.967967831386701e-06, + "loss": 0.832, + "step": 11448 + }, + { + "epoch": 0.10837648261565112, + "grad_norm": 927.7006225585938, + "learning_rate": 1.9679601334075213e-06, + "loss": 17.6172, + "step": 11449 + }, + { + "epoch": 0.10838594863736617, + "grad_norm": 397.9100341796875, + "learning_rate": 1.967952434518522e-06, + "loss": 27.0234, + "step": 11450 + }, + { + "epoch": 0.10839541465908123, + "grad_norm": 429.01123046875, + "learning_rate": 1.9679447347197104e-06, + "loss": 28.5, + "step": 11451 + }, + { + "epoch": 0.10840488068079628, + "grad_norm": 865.3442993164062, + "learning_rate": 1.9679370340110937e-06, + "loss": 52.2891, + "step": 11452 + }, + { + "epoch": 0.10841434670251134, + "grad_norm": 423.3039245605469, + "learning_rate": 1.9679293323926793e-06, + "loss": 29.6641, + "step": 11453 + }, + { + "epoch": 0.1084238127242264, + "grad_norm": 336.04656982421875, + "learning_rate": 1.967921629864474e-06, + "loss": 21.8125, + "step": 11454 + }, + { + "epoch": 0.10843327874594144, + "grad_norm": 147.23292541503906, + "learning_rate": 1.9679139264264855e-06, + "loss": 17.9219, + "step": 11455 + }, + { + "epoch": 0.1084427447676565, + "grad_norm": 365.6387939453125, + "learning_rate": 1.967906222078721e-06, + "loss": 44.8594, + "step": 11456 + }, + { + "epoch": 0.10845221078937155, + "grad_norm": 282.4096374511719, + "learning_rate": 1.9678985168211874e-06, + "loss": 19.7188, + "step": 11457 + }, + { + "epoch": 0.10846167681108661, + "grad_norm": 962.9359130859375, + "learning_rate": 1.9678908106538927e-06, + "loss": 58.4688, + "step": 11458 + }, + { + "epoch": 0.10847114283280165, + "grad_norm": 738.0048828125, + "learning_rate": 1.967883103576843e-06, + "loss": 47.8438, + "step": 11459 + }, + { + "epoch": 0.10848060885451671, + "grad_norm": 232.5845489501953, + "learning_rate": 1.967875395590046e-06, + "loss": 16.7969, + "step": 11460 + }, + { + "epoch": 0.10849007487623176, + "grad_norm": 314.80572509765625, + "learning_rate": 1.9678676866935096e-06, + "loss": 20.1484, + "step": 11461 + }, + { + "epoch": 0.10849954089794682, + "grad_norm": 183.26531982421875, + "learning_rate": 1.9678599768872402e-06, + "loss": 26.1016, + "step": 11462 + }, + { + "epoch": 0.10850900691966188, + "grad_norm": 296.9437255859375, + "learning_rate": 1.9678522661712456e-06, + "loss": 22.3203, + "step": 11463 + }, + { + "epoch": 0.10851847294137693, + "grad_norm": 309.3370666503906, + "learning_rate": 1.967844554545533e-06, + "loss": 32.5703, + "step": 11464 + }, + { + "epoch": 0.10852793896309199, + "grad_norm": 421.884765625, + "learning_rate": 1.9678368420101094e-06, + "loss": 20.2812, + "step": 11465 + }, + { + "epoch": 0.10853740498480703, + "grad_norm": 298.2139587402344, + "learning_rate": 1.967829128564982e-06, + "loss": 19.5, + "step": 11466 + }, + { + "epoch": 0.10854687100652209, + "grad_norm": 392.583740234375, + "learning_rate": 1.9678214142101578e-06, + "loss": 30.1406, + "step": 11467 + }, + { + "epoch": 0.10855633702823714, + "grad_norm": 380.10015869140625, + "learning_rate": 1.967813698945645e-06, + "loss": 17.7344, + "step": 11468 + }, + { + "epoch": 0.1085658030499522, + "grad_norm": 522.3056640625, + "learning_rate": 1.96780598277145e-06, + "loss": 46.6406, + "step": 11469 + }, + { + "epoch": 0.10857526907166724, + "grad_norm": 474.6283264160156, + "learning_rate": 1.9677982656875803e-06, + "loss": 19.4688, + "step": 11470 + }, + { + "epoch": 0.1085847350933823, + "grad_norm": 3.5505824089050293, + "learning_rate": 1.9677905476940433e-06, + "loss": 0.9346, + "step": 11471 + }, + { + "epoch": 0.10859420111509736, + "grad_norm": 232.61639404296875, + "learning_rate": 1.9677828287908463e-06, + "loss": 19.0, + "step": 11472 + }, + { + "epoch": 0.10860366713681241, + "grad_norm": 567.9075927734375, + "learning_rate": 1.9677751089779965e-06, + "loss": 63.1562, + "step": 11473 + }, + { + "epoch": 0.10861313315852747, + "grad_norm": 574.2100830078125, + "learning_rate": 1.9677673882555006e-06, + "loss": 30.8672, + "step": 11474 + }, + { + "epoch": 0.10862259918024252, + "grad_norm": 280.9361877441406, + "learning_rate": 1.9677596666233663e-06, + "loss": 26.625, + "step": 11475 + }, + { + "epoch": 0.10863206520195758, + "grad_norm": 222.94708251953125, + "learning_rate": 1.9677519440816012e-06, + "loss": 21.9688, + "step": 11476 + }, + { + "epoch": 0.10864153122367262, + "grad_norm": 409.7029113769531, + "learning_rate": 1.967744220630212e-06, + "loss": 24.4688, + "step": 11477 + }, + { + "epoch": 0.10865099724538768, + "grad_norm": 784.4891357421875, + "learning_rate": 1.9677364962692066e-06, + "loss": 23.3438, + "step": 11478 + }, + { + "epoch": 0.10866046326710274, + "grad_norm": 346.03448486328125, + "learning_rate": 1.9677287709985915e-06, + "loss": 26.7656, + "step": 11479 + }, + { + "epoch": 0.10866992928881779, + "grad_norm": 368.8300476074219, + "learning_rate": 1.9677210448183744e-06, + "loss": 29.9219, + "step": 11480 + }, + { + "epoch": 0.10867939531053285, + "grad_norm": 294.06048583984375, + "learning_rate": 1.9677133177285624e-06, + "loss": 7.1152, + "step": 11481 + }, + { + "epoch": 0.1086888613322479, + "grad_norm": 251.40577697753906, + "learning_rate": 1.967705589729163e-06, + "loss": 21.8594, + "step": 11482 + }, + { + "epoch": 0.10869832735396295, + "grad_norm": 3.6285340785980225, + "learning_rate": 1.967697860820183e-06, + "loss": 0.998, + "step": 11483 + }, + { + "epoch": 0.108707793375678, + "grad_norm": 249.27459716796875, + "learning_rate": 1.9676901310016302e-06, + "loss": 23.0703, + "step": 11484 + }, + { + "epoch": 0.10871725939739306, + "grad_norm": 151.68264770507812, + "learning_rate": 1.9676824002735115e-06, + "loss": 12.0469, + "step": 11485 + }, + { + "epoch": 0.1087267254191081, + "grad_norm": 544.0054321289062, + "learning_rate": 1.9676746686358344e-06, + "loss": 26.8906, + "step": 11486 + }, + { + "epoch": 0.10873619144082317, + "grad_norm": 423.39715576171875, + "learning_rate": 1.967666936088606e-06, + "loss": 37.1016, + "step": 11487 + }, + { + "epoch": 0.10874565746253823, + "grad_norm": 703.2177124023438, + "learning_rate": 1.967659202631834e-06, + "loss": 57.4844, + "step": 11488 + }, + { + "epoch": 0.10875512348425327, + "grad_norm": 699.1116333007812, + "learning_rate": 1.9676514682655244e-06, + "loss": 50.1719, + "step": 11489 + }, + { + "epoch": 0.10876458950596833, + "grad_norm": 544.3113403320312, + "learning_rate": 1.967643732989686e-06, + "loss": 52.0312, + "step": 11490 + }, + { + "epoch": 0.10877405552768338, + "grad_norm": 562.1798706054688, + "learning_rate": 1.9676359968043253e-06, + "loss": 48.3438, + "step": 11491 + }, + { + "epoch": 0.10878352154939844, + "grad_norm": 241.47555541992188, + "learning_rate": 1.96762825970945e-06, + "loss": 11.9219, + "step": 11492 + }, + { + "epoch": 0.10879298757111348, + "grad_norm": 270.0235290527344, + "learning_rate": 1.9676205217050666e-06, + "loss": 18.7734, + "step": 11493 + }, + { + "epoch": 0.10880245359282854, + "grad_norm": 294.74530029296875, + "learning_rate": 1.9676127827911834e-06, + "loss": 31.7578, + "step": 11494 + }, + { + "epoch": 0.10881191961454359, + "grad_norm": 3.1433558464050293, + "learning_rate": 1.967605042967807e-06, + "loss": 0.9985, + "step": 11495 + }, + { + "epoch": 0.10882138563625865, + "grad_norm": 335.44659423828125, + "learning_rate": 1.9675973022349447e-06, + "loss": 30.7969, + "step": 11496 + }, + { + "epoch": 0.10883085165797371, + "grad_norm": 256.3725280761719, + "learning_rate": 1.9675895605926036e-06, + "loss": 20.0391, + "step": 11497 + }, + { + "epoch": 0.10884031767968876, + "grad_norm": 298.67584228515625, + "learning_rate": 1.9675818180407913e-06, + "loss": 34.0156, + "step": 11498 + }, + { + "epoch": 0.10884978370140382, + "grad_norm": 206.30792236328125, + "learning_rate": 1.967574074579515e-06, + "loss": 17.3828, + "step": 11499 + }, + { + "epoch": 0.10885924972311886, + "grad_norm": 660.9520874023438, + "learning_rate": 1.9675663302087827e-06, + "loss": 42.5469, + "step": 11500 + }, + { + "epoch": 0.10886871574483392, + "grad_norm": 292.3359069824219, + "learning_rate": 1.9675585849286007e-06, + "loss": 27.5938, + "step": 11501 + }, + { + "epoch": 0.10887818176654897, + "grad_norm": 184.54737854003906, + "learning_rate": 1.9675508387389763e-06, + "loss": 20.5234, + "step": 11502 + }, + { + "epoch": 0.10888764778826403, + "grad_norm": 433.817626953125, + "learning_rate": 1.9675430916399172e-06, + "loss": 25.1719, + "step": 11503 + }, + { + "epoch": 0.10889711380997907, + "grad_norm": 225.734619140625, + "learning_rate": 1.9675353436314307e-06, + "loss": 16.0469, + "step": 11504 + }, + { + "epoch": 0.10890657983169413, + "grad_norm": 388.60565185546875, + "learning_rate": 1.9675275947135233e-06, + "loss": 18.5312, + "step": 11505 + }, + { + "epoch": 0.1089160458534092, + "grad_norm": 549.512939453125, + "learning_rate": 1.9675198448862033e-06, + "loss": 43.0703, + "step": 11506 + }, + { + "epoch": 0.10892551187512424, + "grad_norm": 647.7848510742188, + "learning_rate": 1.967512094149478e-06, + "loss": 40.4688, + "step": 11507 + }, + { + "epoch": 0.1089349778968393, + "grad_norm": 304.9736328125, + "learning_rate": 1.9675043425033537e-06, + "loss": 33.9922, + "step": 11508 + }, + { + "epoch": 0.10894444391855435, + "grad_norm": 220.94847106933594, + "learning_rate": 1.9674965899478384e-06, + "loss": 17.8203, + "step": 11509 + }, + { + "epoch": 0.1089539099402694, + "grad_norm": 363.2182922363281, + "learning_rate": 1.9674888364829396e-06, + "loss": 35.6406, + "step": 11510 + }, + { + "epoch": 0.10896337596198445, + "grad_norm": 484.2433166503906, + "learning_rate": 1.9674810821086637e-06, + "loss": 37.0156, + "step": 11511 + }, + { + "epoch": 0.10897284198369951, + "grad_norm": 279.92529296875, + "learning_rate": 1.967473326825019e-06, + "loss": 25.7344, + "step": 11512 + }, + { + "epoch": 0.10898230800541456, + "grad_norm": 262.6466064453125, + "learning_rate": 1.967465570632012e-06, + "loss": 12.3203, + "step": 11513 + }, + { + "epoch": 0.10899177402712962, + "grad_norm": 3.218191146850586, + "learning_rate": 1.96745781352965e-06, + "loss": 1.0122, + "step": 11514 + }, + { + "epoch": 0.10900124004884468, + "grad_norm": 490.5176696777344, + "learning_rate": 1.967450055517941e-06, + "loss": 24.3711, + "step": 11515 + }, + { + "epoch": 0.10901070607055972, + "grad_norm": 376.39208984375, + "learning_rate": 1.967442296596892e-06, + "loss": 29.2578, + "step": 11516 + }, + { + "epoch": 0.10902017209227478, + "grad_norm": 538.0573120117188, + "learning_rate": 1.96743453676651e-06, + "loss": 54.6953, + "step": 11517 + }, + { + "epoch": 0.10902963811398983, + "grad_norm": 378.45849609375, + "learning_rate": 1.9674267760268024e-06, + "loss": 20.8047, + "step": 11518 + }, + { + "epoch": 0.10903910413570489, + "grad_norm": 273.3712158203125, + "learning_rate": 1.967419014377777e-06, + "loss": 44.5391, + "step": 11519 + }, + { + "epoch": 0.10904857015741994, + "grad_norm": 313.4364318847656, + "learning_rate": 1.9674112518194403e-06, + "loss": 21.8828, + "step": 11520 + }, + { + "epoch": 0.109058036179135, + "grad_norm": 623.07421875, + "learning_rate": 1.9674034883518e-06, + "loss": 30.0156, + "step": 11521 + }, + { + "epoch": 0.10906750220085006, + "grad_norm": 472.95703125, + "learning_rate": 1.9673957239748628e-06, + "loss": 50.875, + "step": 11522 + }, + { + "epoch": 0.1090769682225651, + "grad_norm": 222.1414031982422, + "learning_rate": 1.967387958688637e-06, + "loss": 17.4609, + "step": 11523 + }, + { + "epoch": 0.10908643424428016, + "grad_norm": 479.81671142578125, + "learning_rate": 1.9673801924931296e-06, + "loss": 46.375, + "step": 11524 + }, + { + "epoch": 0.10909590026599521, + "grad_norm": 184.447021484375, + "learning_rate": 1.967372425388348e-06, + "loss": 16.6172, + "step": 11525 + }, + { + "epoch": 0.10910536628771027, + "grad_norm": 231.922119140625, + "learning_rate": 1.967364657374299e-06, + "loss": 18.9375, + "step": 11526 + }, + { + "epoch": 0.10911483230942531, + "grad_norm": 945.27099609375, + "learning_rate": 1.96735688845099e-06, + "loss": 52.2188, + "step": 11527 + }, + { + "epoch": 0.10912429833114037, + "grad_norm": 473.9387512207031, + "learning_rate": 1.9673491186184284e-06, + "loss": 35.1641, + "step": 11528 + }, + { + "epoch": 0.10913376435285542, + "grad_norm": 285.6877746582031, + "learning_rate": 1.967341347876622e-06, + "loss": 26.6875, + "step": 11529 + }, + { + "epoch": 0.10914323037457048, + "grad_norm": 327.85968017578125, + "learning_rate": 1.967333576225577e-06, + "loss": 23.8594, + "step": 11530 + }, + { + "epoch": 0.10915269639628554, + "grad_norm": 242.3740997314453, + "learning_rate": 1.967325803665302e-06, + "loss": 25.8711, + "step": 11531 + }, + { + "epoch": 0.10916216241800059, + "grad_norm": 4.1103034019470215, + "learning_rate": 1.9673180301958033e-06, + "loss": 1.0596, + "step": 11532 + }, + { + "epoch": 0.10917162843971565, + "grad_norm": 255.63111877441406, + "learning_rate": 1.9673102558170885e-06, + "loss": 28.2188, + "step": 11533 + }, + { + "epoch": 0.10918109446143069, + "grad_norm": 378.79034423828125, + "learning_rate": 1.9673024805291655e-06, + "loss": 32.6406, + "step": 11534 + }, + { + "epoch": 0.10919056048314575, + "grad_norm": 644.8316650390625, + "learning_rate": 1.9672947043320408e-06, + "loss": 25.0625, + "step": 11535 + }, + { + "epoch": 0.1092000265048608, + "grad_norm": 995.300537109375, + "learning_rate": 1.967286927225722e-06, + "loss": 56.0352, + "step": 11536 + }, + { + "epoch": 0.10920949252657586, + "grad_norm": 568.790283203125, + "learning_rate": 1.967279149210216e-06, + "loss": 24.5, + "step": 11537 + }, + { + "epoch": 0.1092189585482909, + "grad_norm": 176.45436096191406, + "learning_rate": 1.9672713702855313e-06, + "loss": 21.8906, + "step": 11538 + }, + { + "epoch": 0.10922842457000596, + "grad_norm": 564.360107421875, + "learning_rate": 1.967263590451674e-06, + "loss": 32.1016, + "step": 11539 + }, + { + "epoch": 0.10923789059172102, + "grad_norm": 243.02090454101562, + "learning_rate": 1.967255809708652e-06, + "loss": 7.9375, + "step": 11540 + }, + { + "epoch": 0.10924735661343607, + "grad_norm": 510.9360656738281, + "learning_rate": 1.9672480280564723e-06, + "loss": 55.0625, + "step": 11541 + }, + { + "epoch": 0.10925682263515113, + "grad_norm": 194.0226287841797, + "learning_rate": 1.967240245495142e-06, + "loss": 21.4844, + "step": 11542 + }, + { + "epoch": 0.10926628865686618, + "grad_norm": 910.73486328125, + "learning_rate": 1.9672324620246696e-06, + "loss": 20.9297, + "step": 11543 + }, + { + "epoch": 0.10927575467858124, + "grad_norm": 3.4038467407226562, + "learning_rate": 1.9672246776450614e-06, + "loss": 0.8677, + "step": 11544 + }, + { + "epoch": 0.10928522070029628, + "grad_norm": 227.6520538330078, + "learning_rate": 1.967216892356324e-06, + "loss": 14.1719, + "step": 11545 + }, + { + "epoch": 0.10929468672201134, + "grad_norm": 412.49053955078125, + "learning_rate": 1.967209106158467e-06, + "loss": 37.8125, + "step": 11546 + }, + { + "epoch": 0.10930415274372639, + "grad_norm": 264.4400329589844, + "learning_rate": 1.9672013190514956e-06, + "loss": 21.0469, + "step": 11547 + }, + { + "epoch": 0.10931361876544145, + "grad_norm": 375.57049560546875, + "learning_rate": 1.967193531035418e-06, + "loss": 13.5781, + "step": 11548 + }, + { + "epoch": 0.10932308478715651, + "grad_norm": 598.000732421875, + "learning_rate": 1.9671857421102415e-06, + "loss": 39.3281, + "step": 11549 + }, + { + "epoch": 0.10933255080887155, + "grad_norm": 280.05438232421875, + "learning_rate": 1.9671779522759736e-06, + "loss": 28.3594, + "step": 11550 + }, + { + "epoch": 0.10934201683058661, + "grad_norm": 3.0023956298828125, + "learning_rate": 1.9671701615326207e-06, + "loss": 0.8809, + "step": 11551 + }, + { + "epoch": 0.10935148285230166, + "grad_norm": 577.3534545898438, + "learning_rate": 1.967162369880191e-06, + "loss": 47.7344, + "step": 11552 + }, + { + "epoch": 0.10936094887401672, + "grad_norm": 289.90289306640625, + "learning_rate": 1.9671545773186918e-06, + "loss": 32.2656, + "step": 11553 + }, + { + "epoch": 0.10937041489573177, + "grad_norm": 709.934326171875, + "learning_rate": 1.96714678384813e-06, + "loss": 33.5, + "step": 11554 + }, + { + "epoch": 0.10937988091744683, + "grad_norm": 413.7619934082031, + "learning_rate": 1.9671389894685135e-06, + "loss": 50.4062, + "step": 11555 + }, + { + "epoch": 0.10938934693916187, + "grad_norm": 600.8081665039062, + "learning_rate": 1.967131194179849e-06, + "loss": 28.7734, + "step": 11556 + }, + { + "epoch": 0.10939881296087693, + "grad_norm": 882.328857421875, + "learning_rate": 1.967123397982144e-06, + "loss": 50.1094, + "step": 11557 + }, + { + "epoch": 0.10940827898259199, + "grad_norm": 2.7335166931152344, + "learning_rate": 1.967115600875406e-06, + "loss": 0.812, + "step": 11558 + }, + { + "epoch": 0.10941774500430704, + "grad_norm": 289.65081787109375, + "learning_rate": 1.967107802859642e-06, + "loss": 19.8203, + "step": 11559 + }, + { + "epoch": 0.1094272110260221, + "grad_norm": 262.42901611328125, + "learning_rate": 1.9671000039348604e-06, + "loss": 22.5156, + "step": 11560 + }, + { + "epoch": 0.10943667704773714, + "grad_norm": 508.3982238769531, + "learning_rate": 1.9670922041010666e-06, + "loss": 42.3281, + "step": 11561 + }, + { + "epoch": 0.1094461430694522, + "grad_norm": 431.0394287109375, + "learning_rate": 1.96708440335827e-06, + "loss": 45.0312, + "step": 11562 + }, + { + "epoch": 0.10945560909116725, + "grad_norm": 325.6947937011719, + "learning_rate": 1.967076601706476e-06, + "loss": 37.7656, + "step": 11563 + }, + { + "epoch": 0.10946507511288231, + "grad_norm": 477.89453125, + "learning_rate": 1.9670687991456937e-06, + "loss": 10.1406, + "step": 11564 + }, + { + "epoch": 0.10947454113459737, + "grad_norm": 248.9674072265625, + "learning_rate": 1.9670609956759296e-06, + "loss": 20.2812, + "step": 11565 + }, + { + "epoch": 0.10948400715631242, + "grad_norm": 236.36842346191406, + "learning_rate": 1.9670531912971907e-06, + "loss": 9.1641, + "step": 11566 + }, + { + "epoch": 0.10949347317802748, + "grad_norm": 799.955078125, + "learning_rate": 1.967045386009485e-06, + "loss": 62.5156, + "step": 11567 + }, + { + "epoch": 0.10950293919974252, + "grad_norm": 180.75726318359375, + "learning_rate": 1.9670375798128196e-06, + "loss": 31.4219, + "step": 11568 + }, + { + "epoch": 0.10951240522145758, + "grad_norm": 571.602294921875, + "learning_rate": 1.9670297727072018e-06, + "loss": 22.8672, + "step": 11569 + }, + { + "epoch": 0.10952187124317263, + "grad_norm": 559.058349609375, + "learning_rate": 1.9670219646926383e-06, + "loss": 32.1719, + "step": 11570 + }, + { + "epoch": 0.10953133726488769, + "grad_norm": 459.8942565917969, + "learning_rate": 1.967014155769138e-06, + "loss": 23.5195, + "step": 11571 + }, + { + "epoch": 0.10954080328660273, + "grad_norm": 354.49169921875, + "learning_rate": 1.9670063459367066e-06, + "loss": 21.25, + "step": 11572 + }, + { + "epoch": 0.1095502693083178, + "grad_norm": 558.8107299804688, + "learning_rate": 1.9669985351953523e-06, + "loss": 33.1562, + "step": 11573 + }, + { + "epoch": 0.10955973533003285, + "grad_norm": 508.0396423339844, + "learning_rate": 1.9669907235450825e-06, + "loss": 26.0312, + "step": 11574 + }, + { + "epoch": 0.1095692013517479, + "grad_norm": 500.3486328125, + "learning_rate": 1.966982910985904e-06, + "loss": 20.6562, + "step": 11575 + }, + { + "epoch": 0.10957866737346296, + "grad_norm": 770.6831665039062, + "learning_rate": 1.966975097517825e-06, + "loss": 36.2578, + "step": 11576 + }, + { + "epoch": 0.109588133395178, + "grad_norm": 325.6020812988281, + "learning_rate": 1.966967283140852e-06, + "loss": 18.5156, + "step": 11577 + }, + { + "epoch": 0.10959759941689307, + "grad_norm": 184.6699676513672, + "learning_rate": 1.9669594678549925e-06, + "loss": 17.1875, + "step": 11578 + }, + { + "epoch": 0.10960706543860811, + "grad_norm": 272.4907531738281, + "learning_rate": 1.966951651660254e-06, + "loss": 33.6406, + "step": 11579 + }, + { + "epoch": 0.10961653146032317, + "grad_norm": 489.2724609375, + "learning_rate": 1.966943834556644e-06, + "loss": 40.2969, + "step": 11580 + }, + { + "epoch": 0.10962599748203822, + "grad_norm": 832.9443969726562, + "learning_rate": 1.9669360165441695e-06, + "loss": 24.3438, + "step": 11581 + }, + { + "epoch": 0.10963546350375328, + "grad_norm": 313.80914306640625, + "learning_rate": 1.9669281976228384e-06, + "loss": 16.6484, + "step": 11582 + }, + { + "epoch": 0.10964492952546834, + "grad_norm": 2.9804930686950684, + "learning_rate": 1.9669203777926575e-06, + "loss": 0.9102, + "step": 11583 + }, + { + "epoch": 0.10965439554718338, + "grad_norm": 622.3955078125, + "learning_rate": 1.9669125570536344e-06, + "loss": 39.5898, + "step": 11584 + }, + { + "epoch": 0.10966386156889844, + "grad_norm": 328.3230285644531, + "learning_rate": 1.9669047354057763e-06, + "loss": 41.9688, + "step": 11585 + }, + { + "epoch": 0.10967332759061349, + "grad_norm": 447.8307189941406, + "learning_rate": 1.966896912849091e-06, + "loss": 20.5, + "step": 11586 + }, + { + "epoch": 0.10968279361232855, + "grad_norm": 268.0477294921875, + "learning_rate": 1.966889089383585e-06, + "loss": 27.4844, + "step": 11587 + }, + { + "epoch": 0.1096922596340436, + "grad_norm": 459.071533203125, + "learning_rate": 1.9668812650092664e-06, + "loss": 40.4453, + "step": 11588 + }, + { + "epoch": 0.10970172565575866, + "grad_norm": 226.7638397216797, + "learning_rate": 1.9668734397261424e-06, + "loss": 18.4453, + "step": 11589 + }, + { + "epoch": 0.1097111916774737, + "grad_norm": 1165.430908203125, + "learning_rate": 1.96686561353422e-06, + "loss": 30.3281, + "step": 11590 + }, + { + "epoch": 0.10972065769918876, + "grad_norm": 460.531005859375, + "learning_rate": 1.966857786433507e-06, + "loss": 30.0, + "step": 11591 + }, + { + "epoch": 0.10973012372090382, + "grad_norm": 565.0679931640625, + "learning_rate": 1.9668499584240104e-06, + "loss": 35.5312, + "step": 11592 + }, + { + "epoch": 0.10973958974261887, + "grad_norm": 226.12013244628906, + "learning_rate": 1.9668421295057377e-06, + "loss": 17.4023, + "step": 11593 + }, + { + "epoch": 0.10974905576433393, + "grad_norm": 180.57003784179688, + "learning_rate": 1.966834299678697e-06, + "loss": 9.3379, + "step": 11594 + }, + { + "epoch": 0.10975852178604897, + "grad_norm": 272.1738586425781, + "learning_rate": 1.966826468942894e-06, + "loss": 16.9766, + "step": 11595 + }, + { + "epoch": 0.10976798780776403, + "grad_norm": 418.64141845703125, + "learning_rate": 1.9668186372983374e-06, + "loss": 42.4688, + "step": 11596 + }, + { + "epoch": 0.10977745382947908, + "grad_norm": 220.49183654785156, + "learning_rate": 1.9668108047450343e-06, + "loss": 24.7891, + "step": 11597 + }, + { + "epoch": 0.10978691985119414, + "grad_norm": 266.99713134765625, + "learning_rate": 1.9668029712829923e-06, + "loss": 24.2969, + "step": 11598 + }, + { + "epoch": 0.10979638587290919, + "grad_norm": 426.0027160644531, + "learning_rate": 1.9667951369122177e-06, + "loss": 26.3906, + "step": 11599 + }, + { + "epoch": 0.10980585189462425, + "grad_norm": 1140.570068359375, + "learning_rate": 1.9667873016327187e-06, + "loss": 63.7891, + "step": 11600 + }, + { + "epoch": 0.1098153179163393, + "grad_norm": 456.0509948730469, + "learning_rate": 1.966779465444503e-06, + "loss": 37.3281, + "step": 11601 + }, + { + "epoch": 0.10982478393805435, + "grad_norm": 277.34869384765625, + "learning_rate": 1.966771628347577e-06, + "loss": 25.2266, + "step": 11602 + }, + { + "epoch": 0.10983424995976941, + "grad_norm": 913.8724975585938, + "learning_rate": 1.9667637903419486e-06, + "loss": 65.4141, + "step": 11603 + }, + { + "epoch": 0.10984371598148446, + "grad_norm": 349.3119201660156, + "learning_rate": 1.9667559514276254e-06, + "loss": 21.4375, + "step": 11604 + }, + { + "epoch": 0.10985318200319952, + "grad_norm": 359.8923034667969, + "learning_rate": 1.966748111604614e-06, + "loss": 32.9844, + "step": 11605 + }, + { + "epoch": 0.10986264802491456, + "grad_norm": 549.0721435546875, + "learning_rate": 1.966740270872923e-06, + "loss": 35.6602, + "step": 11606 + }, + { + "epoch": 0.10987211404662962, + "grad_norm": 442.5870056152344, + "learning_rate": 1.9667324292325588e-06, + "loss": 42.082, + "step": 11607 + }, + { + "epoch": 0.10988158006834468, + "grad_norm": 287.224365234375, + "learning_rate": 1.9667245866835287e-06, + "loss": 13.5312, + "step": 11608 + }, + { + "epoch": 0.10989104609005973, + "grad_norm": 3.158830165863037, + "learning_rate": 1.9667167432258406e-06, + "loss": 0.9302, + "step": 11609 + }, + { + "epoch": 0.10990051211177479, + "grad_norm": 237.39846801757812, + "learning_rate": 1.9667088988595017e-06, + "loss": 22.0469, + "step": 11610 + }, + { + "epoch": 0.10990997813348984, + "grad_norm": 403.6527404785156, + "learning_rate": 1.966701053584519e-06, + "loss": 34.7344, + "step": 11611 + }, + { + "epoch": 0.1099194441552049, + "grad_norm": 305.6593017578125, + "learning_rate": 1.9666932074009005e-06, + "loss": 17.2227, + "step": 11612 + }, + { + "epoch": 0.10992891017691994, + "grad_norm": 3.8422977924346924, + "learning_rate": 1.966685360308653e-06, + "loss": 1.0122, + "step": 11613 + }, + { + "epoch": 0.109938376198635, + "grad_norm": 2.7512478828430176, + "learning_rate": 1.966677512307784e-06, + "loss": 0.8711, + "step": 11614 + }, + { + "epoch": 0.10994784222035005, + "grad_norm": 161.09190368652344, + "learning_rate": 1.966669663398302e-06, + "loss": 18.8047, + "step": 11615 + }, + { + "epoch": 0.10995730824206511, + "grad_norm": 484.0468444824219, + "learning_rate": 1.9666618135802125e-06, + "loss": 18.332, + "step": 11616 + }, + { + "epoch": 0.10996677426378017, + "grad_norm": 1319.0252685546875, + "learning_rate": 1.966653962853524e-06, + "loss": 72.4219, + "step": 11617 + }, + { + "epoch": 0.10997624028549521, + "grad_norm": 360.25128173828125, + "learning_rate": 1.9666461112182436e-06, + "loss": 42.125, + "step": 11618 + }, + { + "epoch": 0.10998570630721027, + "grad_norm": 442.1338195800781, + "learning_rate": 1.966638258674379e-06, + "loss": 46.7461, + "step": 11619 + }, + { + "epoch": 0.10999517232892532, + "grad_norm": 254.5100555419922, + "learning_rate": 1.966630405221937e-06, + "loss": 16.2266, + "step": 11620 + }, + { + "epoch": 0.11000463835064038, + "grad_norm": 157.24710083007812, + "learning_rate": 1.9666225508609258e-06, + "loss": 18.7734, + "step": 11621 + }, + { + "epoch": 0.11001410437235543, + "grad_norm": 550.8811645507812, + "learning_rate": 1.966614695591352e-06, + "loss": 46.832, + "step": 11622 + }, + { + "epoch": 0.11002357039407049, + "grad_norm": 978.1143188476562, + "learning_rate": 1.9666068394132228e-06, + "loss": 37.7344, + "step": 11623 + }, + { + "epoch": 0.11003303641578553, + "grad_norm": 206.0538787841797, + "learning_rate": 1.9665989823265467e-06, + "loss": 17.2422, + "step": 11624 + }, + { + "epoch": 0.11004250243750059, + "grad_norm": 306.9471130371094, + "learning_rate": 1.9665911243313304e-06, + "loss": 18.9375, + "step": 11625 + }, + { + "epoch": 0.11005196845921565, + "grad_norm": 289.1575012207031, + "learning_rate": 1.966583265427581e-06, + "loss": 44.6719, + "step": 11626 + }, + { + "epoch": 0.1100614344809307, + "grad_norm": 222.7789306640625, + "learning_rate": 1.9665754056153063e-06, + "loss": 20.1172, + "step": 11627 + }, + { + "epoch": 0.11007090050264576, + "grad_norm": 751.2871704101562, + "learning_rate": 1.9665675448945137e-06, + "loss": 36.5, + "step": 11628 + }, + { + "epoch": 0.1100803665243608, + "grad_norm": 378.0991516113281, + "learning_rate": 1.96655968326521e-06, + "loss": 30.0234, + "step": 11629 + }, + { + "epoch": 0.11008983254607586, + "grad_norm": 197.96597290039062, + "learning_rate": 1.966551820727404e-06, + "loss": 21.1719, + "step": 11630 + }, + { + "epoch": 0.11009929856779091, + "grad_norm": 371.2069091796875, + "learning_rate": 1.9665439572811017e-06, + "loss": 26.2734, + "step": 11631 + }, + { + "epoch": 0.11010876458950597, + "grad_norm": 3.2116434574127197, + "learning_rate": 1.9665360929263108e-06, + "loss": 1.0586, + "step": 11632 + }, + { + "epoch": 0.11011823061122102, + "grad_norm": 362.7815246582031, + "learning_rate": 1.966528227663039e-06, + "loss": 21.7578, + "step": 11633 + }, + { + "epoch": 0.11012769663293608, + "grad_norm": 279.2822265625, + "learning_rate": 1.9665203614912937e-06, + "loss": 13.0078, + "step": 11634 + }, + { + "epoch": 0.11013716265465114, + "grad_norm": 347.5174560546875, + "learning_rate": 1.966512494411082e-06, + "loss": 10.7305, + "step": 11635 + }, + { + "epoch": 0.11014662867636618, + "grad_norm": 3.0758490562438965, + "learning_rate": 1.9665046264224113e-06, + "loss": 0.874, + "step": 11636 + }, + { + "epoch": 0.11015609469808124, + "grad_norm": 230.75973510742188, + "learning_rate": 1.966496757525289e-06, + "loss": 17.3516, + "step": 11637 + }, + { + "epoch": 0.11016556071979629, + "grad_norm": 664.6534423828125, + "learning_rate": 1.966488887719723e-06, + "loss": 22.4102, + "step": 11638 + }, + { + "epoch": 0.11017502674151135, + "grad_norm": 763.2094116210938, + "learning_rate": 1.9664810170057202e-06, + "loss": 20.0469, + "step": 11639 + }, + { + "epoch": 0.1101844927632264, + "grad_norm": 587.9544067382812, + "learning_rate": 1.966473145383288e-06, + "loss": 57.6562, + "step": 11640 + }, + { + "epoch": 0.11019395878494145, + "grad_norm": 430.2040100097656, + "learning_rate": 1.966465272852434e-06, + "loss": 18.8203, + "step": 11641 + }, + { + "epoch": 0.1102034248066565, + "grad_norm": 183.57037353515625, + "learning_rate": 1.9664573994131656e-06, + "loss": 18.1992, + "step": 11642 + }, + { + "epoch": 0.11021289082837156, + "grad_norm": 347.7760009765625, + "learning_rate": 1.96644952506549e-06, + "loss": 17.5469, + "step": 11643 + }, + { + "epoch": 0.11022235685008662, + "grad_norm": 371.8285827636719, + "learning_rate": 1.9664416498094147e-06, + "loss": 25.0625, + "step": 11644 + }, + { + "epoch": 0.11023182287180167, + "grad_norm": 299.53643798828125, + "learning_rate": 1.966433773644947e-06, + "loss": 19.0391, + "step": 11645 + }, + { + "epoch": 0.11024128889351673, + "grad_norm": 562.6380004882812, + "learning_rate": 1.966425896572095e-06, + "loss": 45.9531, + "step": 11646 + }, + { + "epoch": 0.11025075491523177, + "grad_norm": 1181.1348876953125, + "learning_rate": 1.9664180185908646e-06, + "loss": 36.7578, + "step": 11647 + }, + { + "epoch": 0.11026022093694683, + "grad_norm": 261.9545593261719, + "learning_rate": 1.966410139701265e-06, + "loss": 27.9531, + "step": 11648 + }, + { + "epoch": 0.11026968695866188, + "grad_norm": 3.9275825023651123, + "learning_rate": 1.9664022599033024e-06, + "loss": 0.9023, + "step": 11649 + }, + { + "epoch": 0.11027915298037694, + "grad_norm": 269.3408508300781, + "learning_rate": 1.9663943791969843e-06, + "loss": 17.1484, + "step": 11650 + }, + { + "epoch": 0.110288619002092, + "grad_norm": 238.88340759277344, + "learning_rate": 1.9663864975823186e-06, + "loss": 18.0938, + "step": 11651 + }, + { + "epoch": 0.11029808502380704, + "grad_norm": 1023.1051025390625, + "learning_rate": 1.9663786150593126e-06, + "loss": 65.2578, + "step": 11652 + }, + { + "epoch": 0.1103075510455221, + "grad_norm": 356.8802490234375, + "learning_rate": 1.966370731627973e-06, + "loss": 24.7188, + "step": 11653 + }, + { + "epoch": 0.11031701706723715, + "grad_norm": 447.6301574707031, + "learning_rate": 1.9663628472883084e-06, + "loss": 14.7031, + "step": 11654 + }, + { + "epoch": 0.11032648308895221, + "grad_norm": 422.50262451171875, + "learning_rate": 1.9663549620403254e-06, + "loss": 43.5, + "step": 11655 + }, + { + "epoch": 0.11033594911066726, + "grad_norm": 806.9295043945312, + "learning_rate": 1.9663470758840317e-06, + "loss": 61.0859, + "step": 11656 + }, + { + "epoch": 0.11034541513238232, + "grad_norm": 227.2339630126953, + "learning_rate": 1.9663391888194345e-06, + "loss": 19.9141, + "step": 11657 + }, + { + "epoch": 0.11035488115409736, + "grad_norm": 295.7070007324219, + "learning_rate": 1.966331300846541e-06, + "loss": 27.1016, + "step": 11658 + }, + { + "epoch": 0.11036434717581242, + "grad_norm": 188.67742919921875, + "learning_rate": 1.9663234119653597e-06, + "loss": 11.2461, + "step": 11659 + }, + { + "epoch": 0.11037381319752748, + "grad_norm": 245.34625244140625, + "learning_rate": 1.966315522175897e-06, + "loss": 21.6172, + "step": 11660 + }, + { + "epoch": 0.11038327921924253, + "grad_norm": 338.973388671875, + "learning_rate": 1.96630763147816e-06, + "loss": 19.5469, + "step": 11661 + }, + { + "epoch": 0.11039274524095759, + "grad_norm": 500.7450256347656, + "learning_rate": 1.966299739872157e-06, + "loss": 45.9609, + "step": 11662 + }, + { + "epoch": 0.11040221126267263, + "grad_norm": 1381.1904296875, + "learning_rate": 1.9662918473578954e-06, + "loss": 54.1484, + "step": 11663 + }, + { + "epoch": 0.1104116772843877, + "grad_norm": 239.47686767578125, + "learning_rate": 1.966283953935382e-06, + "loss": 20.2109, + "step": 11664 + }, + { + "epoch": 0.11042114330610274, + "grad_norm": 395.47686767578125, + "learning_rate": 1.966276059604625e-06, + "loss": 50.6094, + "step": 11665 + }, + { + "epoch": 0.1104306093278178, + "grad_norm": 546.701904296875, + "learning_rate": 1.9662681643656305e-06, + "loss": 41.3203, + "step": 11666 + }, + { + "epoch": 0.11044007534953285, + "grad_norm": 3.654832124710083, + "learning_rate": 1.9662602682184074e-06, + "loss": 1.0054, + "step": 11667 + }, + { + "epoch": 0.1104495413712479, + "grad_norm": 687.7979125976562, + "learning_rate": 1.9662523711629626e-06, + "loss": 44.5625, + "step": 11668 + }, + { + "epoch": 0.11045900739296297, + "grad_norm": 301.586181640625, + "learning_rate": 1.9662444731993032e-06, + "loss": 31.2656, + "step": 11669 + }, + { + "epoch": 0.11046847341467801, + "grad_norm": 291.43878173828125, + "learning_rate": 1.966236574327437e-06, + "loss": 17.6016, + "step": 11670 + }, + { + "epoch": 0.11047793943639307, + "grad_norm": 206.71287536621094, + "learning_rate": 1.9662286745473713e-06, + "loss": 24.5664, + "step": 11671 + }, + { + "epoch": 0.11048740545810812, + "grad_norm": 2.7949883937835693, + "learning_rate": 1.966220773859113e-06, + "loss": 0.845, + "step": 11672 + }, + { + "epoch": 0.11049687147982318, + "grad_norm": 466.47650146484375, + "learning_rate": 1.9662128722626704e-06, + "loss": 48.4375, + "step": 11673 + }, + { + "epoch": 0.11050633750153822, + "grad_norm": 438.54937744140625, + "learning_rate": 1.966204969758051e-06, + "loss": 9.4336, + "step": 11674 + }, + { + "epoch": 0.11051580352325328, + "grad_norm": 2.5155436992645264, + "learning_rate": 1.966197066345261e-06, + "loss": 0.7583, + "step": 11675 + }, + { + "epoch": 0.11052526954496833, + "grad_norm": 238.4827423095703, + "learning_rate": 1.9661891620243088e-06, + "loss": 22.7539, + "step": 11676 + }, + { + "epoch": 0.11053473556668339, + "grad_norm": 648.9135131835938, + "learning_rate": 1.9661812567952023e-06, + "loss": 37.2344, + "step": 11677 + }, + { + "epoch": 0.11054420158839845, + "grad_norm": 1042.5692138671875, + "learning_rate": 1.9661733506579473e-06, + "loss": 58.5312, + "step": 11678 + }, + { + "epoch": 0.1105536676101135, + "grad_norm": 438.1525573730469, + "learning_rate": 1.966165443612553e-06, + "loss": 27.7344, + "step": 11679 + }, + { + "epoch": 0.11056313363182856, + "grad_norm": 398.8329772949219, + "learning_rate": 1.9661575356590256e-06, + "loss": 22.6328, + "step": 11680 + }, + { + "epoch": 0.1105725996535436, + "grad_norm": 414.911376953125, + "learning_rate": 1.966149626797373e-06, + "loss": 43.8594, + "step": 11681 + }, + { + "epoch": 0.11058206567525866, + "grad_norm": 4.330409049987793, + "learning_rate": 1.966141717027603e-06, + "loss": 1.0498, + "step": 11682 + }, + { + "epoch": 0.11059153169697371, + "grad_norm": 381.53631591796875, + "learning_rate": 1.9661338063497223e-06, + "loss": 23.957, + "step": 11683 + }, + { + "epoch": 0.11060099771868877, + "grad_norm": 355.8726806640625, + "learning_rate": 1.9661258947637385e-06, + "loss": 37.4922, + "step": 11684 + }, + { + "epoch": 0.11061046374040381, + "grad_norm": 3.0289359092712402, + "learning_rate": 1.966117982269659e-06, + "loss": 0.9995, + "step": 11685 + }, + { + "epoch": 0.11061992976211887, + "grad_norm": 261.149658203125, + "learning_rate": 1.9661100688674924e-06, + "loss": 8.25, + "step": 11686 + }, + { + "epoch": 0.11062939578383393, + "grad_norm": 395.0982971191406, + "learning_rate": 1.9661021545572444e-06, + "loss": 27.1562, + "step": 11687 + }, + { + "epoch": 0.11063886180554898, + "grad_norm": 297.8149719238281, + "learning_rate": 1.966094239338924e-06, + "loss": 30.7031, + "step": 11688 + }, + { + "epoch": 0.11064832782726404, + "grad_norm": 480.4171142578125, + "learning_rate": 1.966086323212537e-06, + "loss": 44.0938, + "step": 11689 + }, + { + "epoch": 0.11065779384897909, + "grad_norm": 2.9270427227020264, + "learning_rate": 1.966078406178092e-06, + "loss": 0.8354, + "step": 11690 + }, + { + "epoch": 0.11066725987069415, + "grad_norm": 152.99765014648438, + "learning_rate": 1.966070488235596e-06, + "loss": 15.9688, + "step": 11691 + }, + { + "epoch": 0.11067672589240919, + "grad_norm": 446.4729919433594, + "learning_rate": 1.966062569385057e-06, + "loss": 31.1484, + "step": 11692 + }, + { + "epoch": 0.11068619191412425, + "grad_norm": 252.77015686035156, + "learning_rate": 1.9660546496264815e-06, + "loss": 19.7422, + "step": 11693 + }, + { + "epoch": 0.11069565793583931, + "grad_norm": 160.29949951171875, + "learning_rate": 1.966046728959878e-06, + "loss": 18.4766, + "step": 11694 + }, + { + "epoch": 0.11070512395755436, + "grad_norm": 195.86155700683594, + "learning_rate": 1.966038807385253e-06, + "loss": 20.1953, + "step": 11695 + }, + { + "epoch": 0.11071458997926942, + "grad_norm": 286.2187194824219, + "learning_rate": 1.966030884902615e-06, + "loss": 20.3984, + "step": 11696 + }, + { + "epoch": 0.11072405600098446, + "grad_norm": 363.47283935546875, + "learning_rate": 1.96602296151197e-06, + "loss": 18.6953, + "step": 11697 + }, + { + "epoch": 0.11073352202269952, + "grad_norm": 753.0455932617188, + "learning_rate": 1.9660150372133266e-06, + "loss": 47.5625, + "step": 11698 + }, + { + "epoch": 0.11074298804441457, + "grad_norm": 242.1217498779297, + "learning_rate": 1.9660071120066923e-06, + "loss": 19.8828, + "step": 11699 + }, + { + "epoch": 0.11075245406612963, + "grad_norm": 186.0167236328125, + "learning_rate": 1.9659991858920737e-06, + "loss": 17.7109, + "step": 11700 + }, + { + "epoch": 0.11076192008784468, + "grad_norm": 208.97320556640625, + "learning_rate": 1.9659912588694786e-06, + "loss": 16.8828, + "step": 11701 + }, + { + "epoch": 0.11077138610955974, + "grad_norm": 275.5387268066406, + "learning_rate": 1.965983330938915e-06, + "loss": 20.8906, + "step": 11702 + }, + { + "epoch": 0.1107808521312748, + "grad_norm": 3.3300933837890625, + "learning_rate": 1.9659754021003897e-06, + "loss": 0.9321, + "step": 11703 + }, + { + "epoch": 0.11079031815298984, + "grad_norm": 274.82476806640625, + "learning_rate": 1.9659674723539103e-06, + "loss": 21.8359, + "step": 11704 + }, + { + "epoch": 0.1107997841747049, + "grad_norm": 430.6251220703125, + "learning_rate": 1.9659595416994845e-06, + "loss": 25.9453, + "step": 11705 + }, + { + "epoch": 0.11080925019641995, + "grad_norm": 3.1388654708862305, + "learning_rate": 1.9659516101371193e-06, + "loss": 0.9863, + "step": 11706 + }, + { + "epoch": 0.11081871621813501, + "grad_norm": 332.71490478515625, + "learning_rate": 1.9659436776668225e-06, + "loss": 22.4766, + "step": 11707 + }, + { + "epoch": 0.11082818223985005, + "grad_norm": 3.371743679046631, + "learning_rate": 1.9659357442886017e-06, + "loss": 0.9219, + "step": 11708 + }, + { + "epoch": 0.11083764826156511, + "grad_norm": 353.05029296875, + "learning_rate": 1.965927810002464e-06, + "loss": 40.1406, + "step": 11709 + }, + { + "epoch": 0.11084711428328016, + "grad_norm": 355.4607849121094, + "learning_rate": 1.9659198748084167e-06, + "loss": 30.2891, + "step": 11710 + }, + { + "epoch": 0.11085658030499522, + "grad_norm": 618.358642578125, + "learning_rate": 1.965911938706468e-06, + "loss": 32.8672, + "step": 11711 + }, + { + "epoch": 0.11086604632671028, + "grad_norm": 527.87841796875, + "learning_rate": 1.9659040016966246e-06, + "loss": 38.2891, + "step": 11712 + }, + { + "epoch": 0.11087551234842533, + "grad_norm": 3.5069689750671387, + "learning_rate": 1.9658960637788946e-06, + "loss": 1.0132, + "step": 11713 + }, + { + "epoch": 0.11088497837014039, + "grad_norm": 602.0235595703125, + "learning_rate": 1.9658881249532846e-06, + "loss": 19.6094, + "step": 11714 + }, + { + "epoch": 0.11089444439185543, + "grad_norm": 512.498291015625, + "learning_rate": 1.965880185219803e-06, + "loss": 41.9062, + "step": 11715 + }, + { + "epoch": 0.11090391041357049, + "grad_norm": 819.5086059570312, + "learning_rate": 1.9658722445784572e-06, + "loss": 52.1094, + "step": 11716 + }, + { + "epoch": 0.11091337643528554, + "grad_norm": 3.581167221069336, + "learning_rate": 1.965864303029254e-06, + "loss": 0.8657, + "step": 11717 + }, + { + "epoch": 0.1109228424570006, + "grad_norm": 194.72799682617188, + "learning_rate": 1.9658563605722006e-06, + "loss": 21.1172, + "step": 11718 + }, + { + "epoch": 0.11093230847871564, + "grad_norm": 678.2125244140625, + "learning_rate": 1.965848417207306e-06, + "loss": 47.6094, + "step": 11719 + }, + { + "epoch": 0.1109417745004307, + "grad_norm": 430.13421630859375, + "learning_rate": 1.965840472934576e-06, + "loss": 31.6172, + "step": 11720 + }, + { + "epoch": 0.11095124052214576, + "grad_norm": 826.6094360351562, + "learning_rate": 1.9658325277540192e-06, + "loss": 31.9805, + "step": 11721 + }, + { + "epoch": 0.11096070654386081, + "grad_norm": 2.929476499557495, + "learning_rate": 1.9658245816656425e-06, + "loss": 0.8794, + "step": 11722 + }, + { + "epoch": 0.11097017256557587, + "grad_norm": 316.6615295410156, + "learning_rate": 1.9658166346694536e-06, + "loss": 24.0469, + "step": 11723 + }, + { + "epoch": 0.11097963858729092, + "grad_norm": 3.102555990219116, + "learning_rate": 1.9658086867654597e-06, + "loss": 0.9526, + "step": 11724 + }, + { + "epoch": 0.11098910460900598, + "grad_norm": 176.4745330810547, + "learning_rate": 1.9658007379536684e-06, + "loss": 17.9609, + "step": 11725 + }, + { + "epoch": 0.11099857063072102, + "grad_norm": 550.6502685546875, + "learning_rate": 1.9657927882340877e-06, + "loss": 34.9297, + "step": 11726 + }, + { + "epoch": 0.11100803665243608, + "grad_norm": 330.0228576660156, + "learning_rate": 1.9657848376067244e-06, + "loss": 20.5859, + "step": 11727 + }, + { + "epoch": 0.11101750267415113, + "grad_norm": 427.6939697265625, + "learning_rate": 1.9657768860715862e-06, + "loss": 40.7578, + "step": 11728 + }, + { + "epoch": 0.11102696869586619, + "grad_norm": 351.5674743652344, + "learning_rate": 1.9657689336286803e-06, + "loss": 22.6797, + "step": 11729 + }, + { + "epoch": 0.11103643471758125, + "grad_norm": 297.2141418457031, + "learning_rate": 1.9657609802780146e-06, + "loss": 16.2109, + "step": 11730 + }, + { + "epoch": 0.1110459007392963, + "grad_norm": 311.5366516113281, + "learning_rate": 1.9657530260195965e-06, + "loss": 17.9062, + "step": 11731 + }, + { + "epoch": 0.11105536676101135, + "grad_norm": 335.42236328125, + "learning_rate": 1.9657450708534334e-06, + "loss": 25.8047, + "step": 11732 + }, + { + "epoch": 0.1110648327827264, + "grad_norm": 242.67848205566406, + "learning_rate": 1.9657371147795327e-06, + "loss": 17.7656, + "step": 11733 + }, + { + "epoch": 0.11107429880444146, + "grad_norm": 401.8386535644531, + "learning_rate": 1.965729157797902e-06, + "loss": 30.1094, + "step": 11734 + }, + { + "epoch": 0.1110837648261565, + "grad_norm": 454.2254333496094, + "learning_rate": 1.9657211999085487e-06, + "loss": 47.2031, + "step": 11735 + }, + { + "epoch": 0.11109323084787157, + "grad_norm": 703.9826049804688, + "learning_rate": 1.9657132411114803e-06, + "loss": 49.7344, + "step": 11736 + }, + { + "epoch": 0.11110269686958663, + "grad_norm": 390.07159423828125, + "learning_rate": 1.965705281406704e-06, + "loss": 30.9375, + "step": 11737 + }, + { + "epoch": 0.11111216289130167, + "grad_norm": 240.27542114257812, + "learning_rate": 1.9656973207942277e-06, + "loss": 19.6719, + "step": 11738 + }, + { + "epoch": 0.11112162891301673, + "grad_norm": 651.0078125, + "learning_rate": 1.965689359274059e-06, + "loss": 27.9844, + "step": 11739 + }, + { + "epoch": 0.11113109493473178, + "grad_norm": 532.5540771484375, + "learning_rate": 1.965681396846205e-06, + "loss": 27.3281, + "step": 11740 + }, + { + "epoch": 0.11114056095644684, + "grad_norm": 296.7336120605469, + "learning_rate": 1.965673433510673e-06, + "loss": 30.3672, + "step": 11741 + }, + { + "epoch": 0.11115002697816188, + "grad_norm": 587.2009887695312, + "learning_rate": 1.965665469267471e-06, + "loss": 55.1719, + "step": 11742 + }, + { + "epoch": 0.11115949299987694, + "grad_norm": 720.6033325195312, + "learning_rate": 1.9656575041166065e-06, + "loss": 55.0469, + "step": 11743 + }, + { + "epoch": 0.11116895902159199, + "grad_norm": 611.6846923828125, + "learning_rate": 1.9656495380580863e-06, + "loss": 53.7812, + "step": 11744 + }, + { + "epoch": 0.11117842504330705, + "grad_norm": 386.1188659667969, + "learning_rate": 1.9656415710919187e-06, + "loss": 33.3125, + "step": 11745 + }, + { + "epoch": 0.11118789106502211, + "grad_norm": 753.240966796875, + "learning_rate": 1.965633603218111e-06, + "loss": 38.8594, + "step": 11746 + }, + { + "epoch": 0.11119735708673716, + "grad_norm": 377.1036071777344, + "learning_rate": 1.9656256344366704e-06, + "loss": 20.5547, + "step": 11747 + }, + { + "epoch": 0.11120682310845222, + "grad_norm": 290.7760314941406, + "learning_rate": 1.965617664747604e-06, + "loss": 32.0781, + "step": 11748 + }, + { + "epoch": 0.11121628913016726, + "grad_norm": 495.35089111328125, + "learning_rate": 1.96560969415092e-06, + "loss": 26.1953, + "step": 11749 + }, + { + "epoch": 0.11122575515188232, + "grad_norm": 3.341252088546753, + "learning_rate": 1.965601722646626e-06, + "loss": 0.9492, + "step": 11750 + }, + { + "epoch": 0.11123522117359737, + "grad_norm": 280.80511474609375, + "learning_rate": 1.9655937502347296e-06, + "loss": 25.7266, + "step": 11751 + }, + { + "epoch": 0.11124468719531243, + "grad_norm": 636.5368041992188, + "learning_rate": 1.9655857769152372e-06, + "loss": 47.5781, + "step": 11752 + }, + { + "epoch": 0.11125415321702747, + "grad_norm": 212.32244873046875, + "learning_rate": 1.965577802688157e-06, + "loss": 18.4453, + "step": 11753 + }, + { + "epoch": 0.11126361923874253, + "grad_norm": 2.9727799892425537, + "learning_rate": 1.9655698275534973e-06, + "loss": 0.897, + "step": 11754 + }, + { + "epoch": 0.1112730852604576, + "grad_norm": 207.33998107910156, + "learning_rate": 1.965561851511264e-06, + "loss": 9.6055, + "step": 11755 + }, + { + "epoch": 0.11128255128217264, + "grad_norm": 287.4629821777344, + "learning_rate": 1.9655538745614656e-06, + "loss": 9.375, + "step": 11756 + }, + { + "epoch": 0.1112920173038877, + "grad_norm": 528.9722900390625, + "learning_rate": 1.9655458967041094e-06, + "loss": 53.125, + "step": 11757 + }, + { + "epoch": 0.11130148332560275, + "grad_norm": 360.6204833984375, + "learning_rate": 1.9655379179392027e-06, + "loss": 24.5859, + "step": 11758 + }, + { + "epoch": 0.1113109493473178, + "grad_norm": 405.0380859375, + "learning_rate": 1.965529938266753e-06, + "loss": 10.4844, + "step": 11759 + }, + { + "epoch": 0.11132041536903285, + "grad_norm": 659.9009399414062, + "learning_rate": 1.9655219576867684e-06, + "loss": 15.0195, + "step": 11760 + }, + { + "epoch": 0.11132988139074791, + "grad_norm": 528.8104248046875, + "learning_rate": 1.9655139761992556e-06, + "loss": 48.7188, + "step": 11761 + }, + { + "epoch": 0.11133934741246296, + "grad_norm": 258.91217041015625, + "learning_rate": 1.9655059938042227e-06, + "loss": 25.4375, + "step": 11762 + }, + { + "epoch": 0.11134881343417802, + "grad_norm": 768.3623046875, + "learning_rate": 1.965498010501677e-06, + "loss": 20.0078, + "step": 11763 + }, + { + "epoch": 0.11135827945589308, + "grad_norm": 159.1790008544922, + "learning_rate": 1.965490026291626e-06, + "loss": 24.7891, + "step": 11764 + }, + { + "epoch": 0.11136774547760812, + "grad_norm": 428.470703125, + "learning_rate": 1.965482041174077e-06, + "loss": 22.3203, + "step": 11765 + }, + { + "epoch": 0.11137721149932318, + "grad_norm": 221.975341796875, + "learning_rate": 1.965474055149038e-06, + "loss": 23.4531, + "step": 11766 + }, + { + "epoch": 0.11138667752103823, + "grad_norm": 523.70263671875, + "learning_rate": 1.9654660682165162e-06, + "loss": 36.918, + "step": 11767 + }, + { + "epoch": 0.11139614354275329, + "grad_norm": 558.5667724609375, + "learning_rate": 1.9654580803765185e-06, + "loss": 21.3047, + "step": 11768 + }, + { + "epoch": 0.11140560956446834, + "grad_norm": 569.7611083984375, + "learning_rate": 1.9654500916290537e-06, + "loss": 28.3203, + "step": 11769 + }, + { + "epoch": 0.1114150755861834, + "grad_norm": 292.2543029785156, + "learning_rate": 1.965442101974128e-06, + "loss": 13.1289, + "step": 11770 + }, + { + "epoch": 0.11142454160789844, + "grad_norm": 294.1295166015625, + "learning_rate": 1.96543411141175e-06, + "loss": 22.7539, + "step": 11771 + }, + { + "epoch": 0.1114340076296135, + "grad_norm": 666.3610229492188, + "learning_rate": 1.9654261199419267e-06, + "loss": 34.6914, + "step": 11772 + }, + { + "epoch": 0.11144347365132856, + "grad_norm": 257.47320556640625, + "learning_rate": 1.9654181275646657e-06, + "loss": 31.8281, + "step": 11773 + }, + { + "epoch": 0.11145293967304361, + "grad_norm": 612.99755859375, + "learning_rate": 1.965410134279974e-06, + "loss": 58.1875, + "step": 11774 + }, + { + "epoch": 0.11146240569475867, + "grad_norm": 487.3499450683594, + "learning_rate": 1.96540214008786e-06, + "loss": 48.5469, + "step": 11775 + }, + { + "epoch": 0.11147187171647371, + "grad_norm": 362.91583251953125, + "learning_rate": 1.965394144988331e-06, + "loss": 18.7266, + "step": 11776 + }, + { + "epoch": 0.11148133773818877, + "grad_norm": 186.30996704101562, + "learning_rate": 1.965386148981394e-06, + "loss": 23.0625, + "step": 11777 + }, + { + "epoch": 0.11149080375990382, + "grad_norm": 269.5071105957031, + "learning_rate": 1.9653781520670564e-06, + "loss": 23.6875, + "step": 11778 + }, + { + "epoch": 0.11150026978161888, + "grad_norm": 900.319091796875, + "learning_rate": 1.965370154245327e-06, + "loss": 45.4219, + "step": 11779 + }, + { + "epoch": 0.11150973580333394, + "grad_norm": 416.7370910644531, + "learning_rate": 1.965362155516212e-06, + "loss": 23.8516, + "step": 11780 + }, + { + "epoch": 0.11151920182504899, + "grad_norm": 486.7370910644531, + "learning_rate": 1.965354155879719e-06, + "loss": 57.6875, + "step": 11781 + }, + { + "epoch": 0.11152866784676405, + "grad_norm": 3.192990779876709, + "learning_rate": 1.9653461553358565e-06, + "loss": 0.9414, + "step": 11782 + }, + { + "epoch": 0.11153813386847909, + "grad_norm": 442.8413391113281, + "learning_rate": 1.9653381538846313e-06, + "loss": 15.5352, + "step": 11783 + }, + { + "epoch": 0.11154759989019415, + "grad_norm": 333.4385681152344, + "learning_rate": 1.965330151526051e-06, + "loss": 8.5781, + "step": 11784 + }, + { + "epoch": 0.1115570659119092, + "grad_norm": 2.7087290287017822, + "learning_rate": 1.965322148260123e-06, + "loss": 0.8677, + "step": 11785 + }, + { + "epoch": 0.11156653193362426, + "grad_norm": 2.9596996307373047, + "learning_rate": 1.965314144086855e-06, + "loss": 0.9326, + "step": 11786 + }, + { + "epoch": 0.1115759979553393, + "grad_norm": 266.14312744140625, + "learning_rate": 1.9653061390062545e-06, + "loss": 36.7656, + "step": 11787 + }, + { + "epoch": 0.11158546397705436, + "grad_norm": 512.4434814453125, + "learning_rate": 1.9652981330183293e-06, + "loss": 39.1172, + "step": 11788 + }, + { + "epoch": 0.11159492999876942, + "grad_norm": 372.62200927734375, + "learning_rate": 1.965290126123086e-06, + "loss": 30.6016, + "step": 11789 + }, + { + "epoch": 0.11160439602048447, + "grad_norm": 369.6979675292969, + "learning_rate": 1.9652821183205336e-06, + "loss": 17.7656, + "step": 11790 + }, + { + "epoch": 0.11161386204219953, + "grad_norm": 301.2774963378906, + "learning_rate": 1.9652741096106786e-06, + "loss": 18.2305, + "step": 11791 + }, + { + "epoch": 0.11162332806391458, + "grad_norm": 291.8656311035156, + "learning_rate": 1.965266099993528e-06, + "loss": 23.2266, + "step": 11792 + }, + { + "epoch": 0.11163279408562964, + "grad_norm": 291.867919921875, + "learning_rate": 1.965258089469091e-06, + "loss": 21.3633, + "step": 11793 + }, + { + "epoch": 0.11164226010734468, + "grad_norm": 484.25115966796875, + "learning_rate": 1.9652500780373737e-06, + "loss": 30.9258, + "step": 11794 + }, + { + "epoch": 0.11165172612905974, + "grad_norm": 354.8265075683594, + "learning_rate": 1.9652420656983844e-06, + "loss": 54.3125, + "step": 11795 + }, + { + "epoch": 0.11166119215077479, + "grad_norm": 269.8039245605469, + "learning_rate": 1.96523405245213e-06, + "loss": 22.9336, + "step": 11796 + }, + { + "epoch": 0.11167065817248985, + "grad_norm": 741.570556640625, + "learning_rate": 1.965226038298619e-06, + "loss": 48.0156, + "step": 11797 + }, + { + "epoch": 0.11168012419420491, + "grad_norm": 3.0399882793426514, + "learning_rate": 1.9652180232378575e-06, + "loss": 0.9702, + "step": 11798 + }, + { + "epoch": 0.11168959021591995, + "grad_norm": 321.8238525390625, + "learning_rate": 1.9652100072698544e-06, + "loss": 22.3672, + "step": 11799 + }, + { + "epoch": 0.11169905623763501, + "grad_norm": 477.90716552734375, + "learning_rate": 1.9652019903946166e-06, + "loss": 27.3359, + "step": 11800 + }, + { + "epoch": 0.11170852225935006, + "grad_norm": 301.3017272949219, + "learning_rate": 1.9651939726121516e-06, + "loss": 25.875, + "step": 11801 + }, + { + "epoch": 0.11171798828106512, + "grad_norm": 373.2320251464844, + "learning_rate": 1.965185953922467e-06, + "loss": 35.7344, + "step": 11802 + }, + { + "epoch": 0.11172745430278017, + "grad_norm": 428.96234130859375, + "learning_rate": 1.9651779343255703e-06, + "loss": 51.5781, + "step": 11803 + }, + { + "epoch": 0.11173692032449523, + "grad_norm": 434.5393981933594, + "learning_rate": 1.9651699138214693e-06, + "loss": 39.2734, + "step": 11804 + }, + { + "epoch": 0.11174638634621027, + "grad_norm": 399.5402526855469, + "learning_rate": 1.9651618924101716e-06, + "loss": 39.1094, + "step": 11805 + }, + { + "epoch": 0.11175585236792533, + "grad_norm": 266.142333984375, + "learning_rate": 1.965153870091684e-06, + "loss": 10.5078, + "step": 11806 + }, + { + "epoch": 0.11176531838964039, + "grad_norm": 739.8113403320312, + "learning_rate": 1.965145846866015e-06, + "loss": 38.1953, + "step": 11807 + }, + { + "epoch": 0.11177478441135544, + "grad_norm": 246.14907836914062, + "learning_rate": 1.9651378227331716e-06, + "loss": 15.375, + "step": 11808 + }, + { + "epoch": 0.1117842504330705, + "grad_norm": 324.5140380859375, + "learning_rate": 1.9651297976931614e-06, + "loss": 33.2656, + "step": 11809 + }, + { + "epoch": 0.11179371645478554, + "grad_norm": 603.7135009765625, + "learning_rate": 1.965121771745992e-06, + "loss": 19.3164, + "step": 11810 + }, + { + "epoch": 0.1118031824765006, + "grad_norm": 2.937551736831665, + "learning_rate": 1.965113744891671e-06, + "loss": 0.7651, + "step": 11811 + }, + { + "epoch": 0.11181264849821565, + "grad_norm": 304.43438720703125, + "learning_rate": 1.965105717130206e-06, + "loss": 22.75, + "step": 11812 + }, + { + "epoch": 0.11182211451993071, + "grad_norm": 703.4104614257812, + "learning_rate": 1.965097688461604e-06, + "loss": 25.5078, + "step": 11813 + }, + { + "epoch": 0.11183158054164576, + "grad_norm": 360.3617858886719, + "learning_rate": 1.965089658885873e-06, + "loss": 20.4414, + "step": 11814 + }, + { + "epoch": 0.11184104656336082, + "grad_norm": 326.96905517578125, + "learning_rate": 1.9650816284030207e-06, + "loss": 33.9375, + "step": 11815 + }, + { + "epoch": 0.11185051258507588, + "grad_norm": 392.4375305175781, + "learning_rate": 1.9650735970130543e-06, + "loss": 34.4062, + "step": 11816 + }, + { + "epoch": 0.11185997860679092, + "grad_norm": 369.2729797363281, + "learning_rate": 1.9650655647159813e-06, + "loss": 47.4062, + "step": 11817 + }, + { + "epoch": 0.11186944462850598, + "grad_norm": 183.10122680664062, + "learning_rate": 1.9650575315118097e-06, + "loss": 17.7969, + "step": 11818 + }, + { + "epoch": 0.11187891065022103, + "grad_norm": 266.5460510253906, + "learning_rate": 1.9650494974005468e-06, + "loss": 20.7188, + "step": 11819 + }, + { + "epoch": 0.11188837667193609, + "grad_norm": 554.1602172851562, + "learning_rate": 1.9650414623822004e-06, + "loss": 43.4375, + "step": 11820 + }, + { + "epoch": 0.11189784269365113, + "grad_norm": 3.0168724060058594, + "learning_rate": 1.9650334264567775e-06, + "loss": 0.9741, + "step": 11821 + }, + { + "epoch": 0.1119073087153662, + "grad_norm": 252.2909393310547, + "learning_rate": 1.965025389624286e-06, + "loss": 25.4453, + "step": 11822 + }, + { + "epoch": 0.11191677473708125, + "grad_norm": 195.7599334716797, + "learning_rate": 1.9650173518847333e-06, + "loss": 17.4141, + "step": 11823 + }, + { + "epoch": 0.1119262407587963, + "grad_norm": 324.9488220214844, + "learning_rate": 1.9650093132381273e-06, + "loss": 49.9062, + "step": 11824 + }, + { + "epoch": 0.11193570678051136, + "grad_norm": 359.6677551269531, + "learning_rate": 1.9650012736844756e-06, + "loss": 37.2969, + "step": 11825 + }, + { + "epoch": 0.1119451728022264, + "grad_norm": 378.8276672363281, + "learning_rate": 1.964993233223785e-06, + "loss": 20.5156, + "step": 11826 + }, + { + "epoch": 0.11195463882394147, + "grad_norm": 364.17291259765625, + "learning_rate": 1.9649851918560636e-06, + "loss": 22.8281, + "step": 11827 + }, + { + "epoch": 0.11196410484565651, + "grad_norm": 551.3377075195312, + "learning_rate": 1.964977149581319e-06, + "loss": 36.7812, + "step": 11828 + }, + { + "epoch": 0.11197357086737157, + "grad_norm": 432.240234375, + "learning_rate": 1.9649691063995583e-06, + "loss": 27.75, + "step": 11829 + }, + { + "epoch": 0.11198303688908662, + "grad_norm": 728.6136474609375, + "learning_rate": 1.9649610623107898e-06, + "loss": 53.4062, + "step": 11830 + }, + { + "epoch": 0.11199250291080168, + "grad_norm": 531.7673950195312, + "learning_rate": 1.9649530173150204e-06, + "loss": 29.3906, + "step": 11831 + }, + { + "epoch": 0.11200196893251674, + "grad_norm": 446.7733154296875, + "learning_rate": 1.9649449714122583e-06, + "loss": 58.4531, + "step": 11832 + }, + { + "epoch": 0.11201143495423178, + "grad_norm": 802.1394653320312, + "learning_rate": 1.9649369246025108e-06, + "loss": 19.2188, + "step": 11833 + }, + { + "epoch": 0.11202090097594684, + "grad_norm": 3.3575124740600586, + "learning_rate": 1.964928876885785e-06, + "loss": 0.8633, + "step": 11834 + }, + { + "epoch": 0.11203036699766189, + "grad_norm": 262.9703674316406, + "learning_rate": 1.9649208282620887e-06, + "loss": 20.8516, + "step": 11835 + }, + { + "epoch": 0.11203983301937695, + "grad_norm": 183.20506286621094, + "learning_rate": 1.96491277873143e-06, + "loss": 14.1211, + "step": 11836 + }, + { + "epoch": 0.112049299041092, + "grad_norm": 3.117825984954834, + "learning_rate": 1.9649047282938156e-06, + "loss": 0.9126, + "step": 11837 + }, + { + "epoch": 0.11205876506280706, + "grad_norm": 1039.4307861328125, + "learning_rate": 1.964896676949254e-06, + "loss": 50.5, + "step": 11838 + }, + { + "epoch": 0.1120682310845221, + "grad_norm": 486.6468505859375, + "learning_rate": 1.964888624697752e-06, + "loss": 53.6562, + "step": 11839 + }, + { + "epoch": 0.11207769710623716, + "grad_norm": 279.1471252441406, + "learning_rate": 1.9648805715393177e-06, + "loss": 22.7578, + "step": 11840 + }, + { + "epoch": 0.11208716312795222, + "grad_norm": 560.871337890625, + "learning_rate": 1.9648725174739583e-06, + "loss": 16.8477, + "step": 11841 + }, + { + "epoch": 0.11209662914966727, + "grad_norm": 427.2511901855469, + "learning_rate": 1.9648644625016816e-06, + "loss": 27.1484, + "step": 11842 + }, + { + "epoch": 0.11210609517138233, + "grad_norm": 510.57733154296875, + "learning_rate": 1.9648564066224947e-06, + "loss": 47.1719, + "step": 11843 + }, + { + "epoch": 0.11211556119309737, + "grad_norm": 562.2008666992188, + "learning_rate": 1.964848349836406e-06, + "loss": 55.6406, + "step": 11844 + }, + { + "epoch": 0.11212502721481243, + "grad_norm": 843.8475952148438, + "learning_rate": 1.9648402921434224e-06, + "loss": 64.5156, + "step": 11845 + }, + { + "epoch": 0.11213449323652748, + "grad_norm": 3.1747097969055176, + "learning_rate": 1.964832233543552e-06, + "loss": 0.9487, + "step": 11846 + }, + { + "epoch": 0.11214395925824254, + "grad_norm": 279.4598388671875, + "learning_rate": 1.9648241740368015e-06, + "loss": 7.1758, + "step": 11847 + }, + { + "epoch": 0.11215342527995759, + "grad_norm": 591.0211791992188, + "learning_rate": 1.964816113623179e-06, + "loss": 47.2031, + "step": 11848 + }, + { + "epoch": 0.11216289130167265, + "grad_norm": 208.06138610839844, + "learning_rate": 1.964808052302693e-06, + "loss": 26.2031, + "step": 11849 + }, + { + "epoch": 0.1121723573233877, + "grad_norm": 165.39552307128906, + "learning_rate": 1.9647999900753495e-06, + "loss": 18.6953, + "step": 11850 + }, + { + "epoch": 0.11218182334510275, + "grad_norm": 574.6751098632812, + "learning_rate": 1.964791926941157e-06, + "loss": 41.6875, + "step": 11851 + }, + { + "epoch": 0.11219128936681781, + "grad_norm": 285.316650390625, + "learning_rate": 1.9647838629001224e-06, + "loss": 18.8047, + "step": 11852 + }, + { + "epoch": 0.11220075538853286, + "grad_norm": 434.1623840332031, + "learning_rate": 1.9647757979522543e-06, + "loss": 21.5234, + "step": 11853 + }, + { + "epoch": 0.11221022141024792, + "grad_norm": 1851.37744140625, + "learning_rate": 1.9647677320975596e-06, + "loss": 63.375, + "step": 11854 + }, + { + "epoch": 0.11221968743196296, + "grad_norm": 591.4910278320312, + "learning_rate": 1.964759665336046e-06, + "loss": 43.2188, + "step": 11855 + }, + { + "epoch": 0.11222915345367802, + "grad_norm": 140.3282470703125, + "learning_rate": 1.964751597667721e-06, + "loss": 19.7109, + "step": 11856 + }, + { + "epoch": 0.11223861947539307, + "grad_norm": 755.3479614257812, + "learning_rate": 1.9647435290925924e-06, + "loss": 36.9922, + "step": 11857 + }, + { + "epoch": 0.11224808549710813, + "grad_norm": 3.2923851013183594, + "learning_rate": 1.964735459610667e-06, + "loss": 0.9375, + "step": 11858 + }, + { + "epoch": 0.11225755151882319, + "grad_norm": 458.12451171875, + "learning_rate": 1.9647273892219537e-06, + "loss": 28.5, + "step": 11859 + }, + { + "epoch": 0.11226701754053824, + "grad_norm": 207.51177978515625, + "learning_rate": 1.9647193179264593e-06, + "loss": 12.75, + "step": 11860 + }, + { + "epoch": 0.1122764835622533, + "grad_norm": 3.5491461753845215, + "learning_rate": 1.9647112457241916e-06, + "loss": 0.9761, + "step": 11861 + }, + { + "epoch": 0.11228594958396834, + "grad_norm": 255.55099487304688, + "learning_rate": 1.9647031726151578e-06, + "loss": 22.5156, + "step": 11862 + }, + { + "epoch": 0.1122954156056834, + "grad_norm": 886.084228515625, + "learning_rate": 1.964695098599366e-06, + "loss": 33.25, + "step": 11863 + }, + { + "epoch": 0.11230488162739845, + "grad_norm": 465.6686706542969, + "learning_rate": 1.9646870236768234e-06, + "loss": 35.625, + "step": 11864 + }, + { + "epoch": 0.11231434764911351, + "grad_norm": 527.2566528320312, + "learning_rate": 1.9646789478475378e-06, + "loss": 50.4453, + "step": 11865 + }, + { + "epoch": 0.11232381367082855, + "grad_norm": 229.45413208007812, + "learning_rate": 1.964670871111517e-06, + "loss": 16.9609, + "step": 11866 + }, + { + "epoch": 0.11233327969254361, + "grad_norm": 845.857421875, + "learning_rate": 1.964662793468768e-06, + "loss": 21.1953, + "step": 11867 + }, + { + "epoch": 0.11234274571425867, + "grad_norm": 290.71746826171875, + "learning_rate": 1.9646547149192987e-06, + "loss": 19.2969, + "step": 11868 + }, + { + "epoch": 0.11235221173597372, + "grad_norm": 270.8349304199219, + "learning_rate": 1.9646466354631166e-06, + "loss": 24.3359, + "step": 11869 + }, + { + "epoch": 0.11236167775768878, + "grad_norm": 476.2601318359375, + "learning_rate": 1.96463855510023e-06, + "loss": 39.0625, + "step": 11870 + }, + { + "epoch": 0.11237114377940383, + "grad_norm": 328.3622131347656, + "learning_rate": 1.9646304738306455e-06, + "loss": 35.3125, + "step": 11871 + }, + { + "epoch": 0.11238060980111889, + "grad_norm": 3.325679302215576, + "learning_rate": 1.9646223916543713e-06, + "loss": 1.0088, + "step": 11872 + }, + { + "epoch": 0.11239007582283393, + "grad_norm": 822.779296875, + "learning_rate": 1.964614308571415e-06, + "loss": 46.9062, + "step": 11873 + }, + { + "epoch": 0.11239954184454899, + "grad_norm": 231.6111602783203, + "learning_rate": 1.9646062245817832e-06, + "loss": 20.8164, + "step": 11874 + }, + { + "epoch": 0.11240900786626405, + "grad_norm": 601.4114990234375, + "learning_rate": 1.964598139685485e-06, + "loss": 64.7344, + "step": 11875 + }, + { + "epoch": 0.1124184738879791, + "grad_norm": 3.3167943954467773, + "learning_rate": 1.964590053882527e-06, + "loss": 0.9282, + "step": 11876 + }, + { + "epoch": 0.11242793990969416, + "grad_norm": 3.037031888961792, + "learning_rate": 1.9645819671729172e-06, + "loss": 0.9048, + "step": 11877 + }, + { + "epoch": 0.1124374059314092, + "grad_norm": 175.35647583007812, + "learning_rate": 1.9645738795566633e-06, + "loss": 24.1484, + "step": 11878 + }, + { + "epoch": 0.11244687195312426, + "grad_norm": 267.2883605957031, + "learning_rate": 1.9645657910337724e-06, + "loss": 18.0664, + "step": 11879 + }, + { + "epoch": 0.11245633797483931, + "grad_norm": 270.5318908691406, + "learning_rate": 1.9645577016042525e-06, + "loss": 15.3906, + "step": 11880 + }, + { + "epoch": 0.11246580399655437, + "grad_norm": 647.7953491210938, + "learning_rate": 1.964549611268111e-06, + "loss": 55.3438, + "step": 11881 + }, + { + "epoch": 0.11247527001826942, + "grad_norm": 3.0943639278411865, + "learning_rate": 1.9645415200253558e-06, + "loss": 1.0674, + "step": 11882 + }, + { + "epoch": 0.11248473603998448, + "grad_norm": 127.84879302978516, + "learning_rate": 1.9645334278759945e-06, + "loss": 14.8984, + "step": 11883 + }, + { + "epoch": 0.11249420206169954, + "grad_norm": 627.2319946289062, + "learning_rate": 1.964525334820034e-06, + "loss": 49.6719, + "step": 11884 + }, + { + "epoch": 0.11250366808341458, + "grad_norm": 764.001220703125, + "learning_rate": 1.9645172408574826e-06, + "loss": 42.3125, + "step": 11885 + }, + { + "epoch": 0.11251313410512964, + "grad_norm": 238.67355346679688, + "learning_rate": 1.964509145988348e-06, + "loss": 22.5234, + "step": 11886 + }, + { + "epoch": 0.11252260012684469, + "grad_norm": 413.6554260253906, + "learning_rate": 1.9645010502126372e-06, + "loss": 41.1719, + "step": 11887 + }, + { + "epoch": 0.11253206614855975, + "grad_norm": 889.3638305664062, + "learning_rate": 1.9644929535303584e-06, + "loss": 30.4844, + "step": 11888 + }, + { + "epoch": 0.1125415321702748, + "grad_norm": 221.21202087402344, + "learning_rate": 1.964484855941519e-06, + "loss": 19.875, + "step": 11889 + }, + { + "epoch": 0.11255099819198985, + "grad_norm": 533.1671752929688, + "learning_rate": 1.9644767574461266e-06, + "loss": 19.6953, + "step": 11890 + }, + { + "epoch": 0.1125604642137049, + "grad_norm": 239.32391357421875, + "learning_rate": 1.964468658044189e-06, + "loss": 17.9453, + "step": 11891 + }, + { + "epoch": 0.11256993023541996, + "grad_norm": 352.6235046386719, + "learning_rate": 1.964460557735713e-06, + "loss": 26.7852, + "step": 11892 + }, + { + "epoch": 0.11257939625713502, + "grad_norm": 546.8124389648438, + "learning_rate": 1.964452456520707e-06, + "loss": 45.8906, + "step": 11893 + }, + { + "epoch": 0.11258886227885007, + "grad_norm": 436.5029602050781, + "learning_rate": 1.9644443543991786e-06, + "loss": 45.0469, + "step": 11894 + }, + { + "epoch": 0.11259832830056513, + "grad_norm": 202.69366455078125, + "learning_rate": 1.964436251371135e-06, + "loss": 16.1094, + "step": 11895 + }, + { + "epoch": 0.11260779432228017, + "grad_norm": 593.6011962890625, + "learning_rate": 1.9644281474365844e-06, + "loss": 29.793, + "step": 11896 + }, + { + "epoch": 0.11261726034399523, + "grad_norm": 398.5366516113281, + "learning_rate": 1.9644200425955343e-06, + "loss": 36.2266, + "step": 11897 + }, + { + "epoch": 0.11262672636571028, + "grad_norm": 1065.91650390625, + "learning_rate": 1.9644119368479912e-06, + "loss": 35.8594, + "step": 11898 + }, + { + "epoch": 0.11263619238742534, + "grad_norm": 216.24664306640625, + "learning_rate": 1.9644038301939645e-06, + "loss": 18.0234, + "step": 11899 + }, + { + "epoch": 0.11264565840914038, + "grad_norm": 481.7897033691406, + "learning_rate": 1.9643957226334605e-06, + "loss": 18.3867, + "step": 11900 + }, + { + "epoch": 0.11265512443085544, + "grad_norm": 402.0020751953125, + "learning_rate": 1.9643876141664872e-06, + "loss": 47.9531, + "step": 11901 + }, + { + "epoch": 0.1126645904525705, + "grad_norm": 182.9598388671875, + "learning_rate": 1.9643795047930528e-06, + "loss": 21.125, + "step": 11902 + }, + { + "epoch": 0.11267405647428555, + "grad_norm": 484.8282165527344, + "learning_rate": 1.964371394513164e-06, + "loss": 48.1719, + "step": 11903 + }, + { + "epoch": 0.11268352249600061, + "grad_norm": 767.2601928710938, + "learning_rate": 1.9643632833268286e-06, + "loss": 46.5195, + "step": 11904 + }, + { + "epoch": 0.11269298851771566, + "grad_norm": 496.21356201171875, + "learning_rate": 1.964355171234055e-06, + "loss": 38.0156, + "step": 11905 + }, + { + "epoch": 0.11270245453943072, + "grad_norm": 360.0345153808594, + "learning_rate": 1.96434705823485e-06, + "loss": 21.6328, + "step": 11906 + }, + { + "epoch": 0.11271192056114576, + "grad_norm": 841.19873046875, + "learning_rate": 1.964338944329221e-06, + "loss": 40.8516, + "step": 11907 + }, + { + "epoch": 0.11272138658286082, + "grad_norm": 739.2568359375, + "learning_rate": 1.964330829517177e-06, + "loss": 40.0781, + "step": 11908 + }, + { + "epoch": 0.11273085260457587, + "grad_norm": 3.9100003242492676, + "learning_rate": 1.9643227137987242e-06, + "loss": 0.9985, + "step": 11909 + }, + { + "epoch": 0.11274031862629093, + "grad_norm": 538.384521484375, + "learning_rate": 1.964314597173871e-06, + "loss": 29.3281, + "step": 11910 + }, + { + "epoch": 0.11274978464800599, + "grad_norm": 2.951982021331787, + "learning_rate": 1.9643064796426247e-06, + "loss": 0.9468, + "step": 11911 + }, + { + "epoch": 0.11275925066972103, + "grad_norm": 330.00006103515625, + "learning_rate": 1.9642983612049933e-06, + "loss": 25.1523, + "step": 11912 + }, + { + "epoch": 0.1127687166914361, + "grad_norm": 462.856689453125, + "learning_rate": 1.964290241860984e-06, + "loss": 38.0781, + "step": 11913 + }, + { + "epoch": 0.11277818271315114, + "grad_norm": 218.53709411621094, + "learning_rate": 1.9642821216106043e-06, + "loss": 23.7031, + "step": 11914 + }, + { + "epoch": 0.1127876487348662, + "grad_norm": 1141.9334716796875, + "learning_rate": 1.964274000453863e-06, + "loss": 45.9766, + "step": 11915 + }, + { + "epoch": 0.11279711475658125, + "grad_norm": 302.6029052734375, + "learning_rate": 1.964265878390766e-06, + "loss": 30.0391, + "step": 11916 + }, + { + "epoch": 0.1128065807782963, + "grad_norm": 917.7401123046875, + "learning_rate": 1.964257755421322e-06, + "loss": 27.7734, + "step": 11917 + }, + { + "epoch": 0.11281604680001137, + "grad_norm": 161.87667846679688, + "learning_rate": 1.9642496315455387e-06, + "loss": 18.5859, + "step": 11918 + }, + { + "epoch": 0.11282551282172641, + "grad_norm": 342.9834899902344, + "learning_rate": 1.964241506763423e-06, + "loss": 40.0312, + "step": 11919 + }, + { + "epoch": 0.11283497884344147, + "grad_norm": 623.6222534179688, + "learning_rate": 1.9642333810749836e-06, + "loss": 37.4609, + "step": 11920 + }, + { + "epoch": 0.11284444486515652, + "grad_norm": 395.8309020996094, + "learning_rate": 1.964225254480227e-06, + "loss": 34.0078, + "step": 11921 + }, + { + "epoch": 0.11285391088687158, + "grad_norm": 719.568603515625, + "learning_rate": 1.964217126979162e-06, + "loss": 38.0938, + "step": 11922 + }, + { + "epoch": 0.11286337690858662, + "grad_norm": 416.4065246582031, + "learning_rate": 1.964208998571795e-06, + "loss": 41.8438, + "step": 11923 + }, + { + "epoch": 0.11287284293030168, + "grad_norm": 552.0810546875, + "learning_rate": 1.964200869258135e-06, + "loss": 39.4375, + "step": 11924 + }, + { + "epoch": 0.11288230895201673, + "grad_norm": 4.05979585647583, + "learning_rate": 1.964192739038188e-06, + "loss": 1.0059, + "step": 11925 + }, + { + "epoch": 0.11289177497373179, + "grad_norm": 2.875101089477539, + "learning_rate": 1.9641846079119635e-06, + "loss": 0.9043, + "step": 11926 + }, + { + "epoch": 0.11290124099544685, + "grad_norm": 931.3070678710938, + "learning_rate": 1.9641764758794676e-06, + "loss": 46.2031, + "step": 11927 + }, + { + "epoch": 0.1129107070171619, + "grad_norm": 272.2159729003906, + "learning_rate": 1.9641683429407083e-06, + "loss": 19.2812, + "step": 11928 + }, + { + "epoch": 0.11292017303887696, + "grad_norm": 307.9062805175781, + "learning_rate": 1.9641602090956937e-06, + "loss": 33.7812, + "step": 11929 + }, + { + "epoch": 0.112929639060592, + "grad_norm": 2.820709705352783, + "learning_rate": 1.9641520743444317e-06, + "loss": 0.8848, + "step": 11930 + }, + { + "epoch": 0.11293910508230706, + "grad_norm": 194.04608154296875, + "learning_rate": 1.964143938686929e-06, + "loss": 15.8047, + "step": 11931 + }, + { + "epoch": 0.11294857110402211, + "grad_norm": 2.7401630878448486, + "learning_rate": 1.9641358021231938e-06, + "loss": 0.8071, + "step": 11932 + }, + { + "epoch": 0.11295803712573717, + "grad_norm": 262.0780029296875, + "learning_rate": 1.9641276646532334e-06, + "loss": 21.8125, + "step": 11933 + }, + { + "epoch": 0.11296750314745221, + "grad_norm": 440.6603088378906, + "learning_rate": 1.9641195262770563e-06, + "loss": 61.875, + "step": 11934 + }, + { + "epoch": 0.11297696916916727, + "grad_norm": 242.6040496826172, + "learning_rate": 1.964111386994669e-06, + "loss": 27.6094, + "step": 11935 + }, + { + "epoch": 0.11298643519088233, + "grad_norm": 959.8777465820312, + "learning_rate": 1.9641032468060803e-06, + "loss": 46.5469, + "step": 11936 + }, + { + "epoch": 0.11299590121259738, + "grad_norm": 197.1245574951172, + "learning_rate": 1.964095105711297e-06, + "loss": 22.6016, + "step": 11937 + }, + { + "epoch": 0.11300536723431244, + "grad_norm": 661.4392700195312, + "learning_rate": 1.964086963710327e-06, + "loss": 76.5938, + "step": 11938 + }, + { + "epoch": 0.11301483325602749, + "grad_norm": 981.5134887695312, + "learning_rate": 1.964078820803178e-06, + "loss": 21.1914, + "step": 11939 + }, + { + "epoch": 0.11302429927774255, + "grad_norm": 219.07411193847656, + "learning_rate": 1.9640706769898574e-06, + "loss": 21.7969, + "step": 11940 + }, + { + "epoch": 0.11303376529945759, + "grad_norm": 493.24407958984375, + "learning_rate": 1.9640625322703733e-06, + "loss": 26.0391, + "step": 11941 + }, + { + "epoch": 0.11304323132117265, + "grad_norm": 406.1639099121094, + "learning_rate": 1.9640543866447334e-06, + "loss": 43.3828, + "step": 11942 + }, + { + "epoch": 0.1130526973428877, + "grad_norm": 347.8591613769531, + "learning_rate": 1.964046240112945e-06, + "loss": 30.8828, + "step": 11943 + }, + { + "epoch": 0.11306216336460276, + "grad_norm": 189.30755615234375, + "learning_rate": 1.9640380926750152e-06, + "loss": 18.3828, + "step": 11944 + }, + { + "epoch": 0.11307162938631782, + "grad_norm": 360.943359375, + "learning_rate": 1.964029944330953e-06, + "loss": 13.5645, + "step": 11945 + }, + { + "epoch": 0.11308109540803286, + "grad_norm": 439.37384033203125, + "learning_rate": 1.9640217950807647e-06, + "loss": 38.7344, + "step": 11946 + }, + { + "epoch": 0.11309056142974792, + "grad_norm": 306.2808532714844, + "learning_rate": 1.9640136449244595e-06, + "loss": 48.3438, + "step": 11947 + }, + { + "epoch": 0.11310002745146297, + "grad_norm": 289.4149169921875, + "learning_rate": 1.9640054938620433e-06, + "loss": 18.8516, + "step": 11948 + }, + { + "epoch": 0.11310949347317803, + "grad_norm": 271.3688049316406, + "learning_rate": 1.9639973418935255e-06, + "loss": 18.7383, + "step": 11949 + }, + { + "epoch": 0.11311895949489308, + "grad_norm": 2.99210262298584, + "learning_rate": 1.9639891890189124e-06, + "loss": 0.9092, + "step": 11950 + }, + { + "epoch": 0.11312842551660814, + "grad_norm": 300.530029296875, + "learning_rate": 1.9639810352382124e-06, + "loss": 21.1797, + "step": 11951 + }, + { + "epoch": 0.11313789153832318, + "grad_norm": 765.3668823242188, + "learning_rate": 1.9639728805514325e-06, + "loss": 49.8828, + "step": 11952 + }, + { + "epoch": 0.11314735756003824, + "grad_norm": 208.53614807128906, + "learning_rate": 1.963964724958581e-06, + "loss": 16.6094, + "step": 11953 + }, + { + "epoch": 0.1131568235817533, + "grad_norm": 885.7110595703125, + "learning_rate": 1.9639565684596655e-06, + "loss": 15.0586, + "step": 11954 + }, + { + "epoch": 0.11316628960346835, + "grad_norm": 387.4890441894531, + "learning_rate": 1.9639484110546937e-06, + "loss": 43.5312, + "step": 11955 + }, + { + "epoch": 0.11317575562518341, + "grad_norm": 483.27313232421875, + "learning_rate": 1.963940252743673e-06, + "loss": 40.6016, + "step": 11956 + }, + { + "epoch": 0.11318522164689845, + "grad_norm": 818.569091796875, + "learning_rate": 1.963932093526611e-06, + "loss": 49.8125, + "step": 11957 + }, + { + "epoch": 0.11319468766861351, + "grad_norm": 269.51361083984375, + "learning_rate": 1.9639239334035157e-06, + "loss": 34.6562, + "step": 11958 + }, + { + "epoch": 0.11320415369032856, + "grad_norm": 644.0307006835938, + "learning_rate": 1.9639157723743945e-06, + "loss": 51.0625, + "step": 11959 + }, + { + "epoch": 0.11321361971204362, + "grad_norm": 442.998779296875, + "learning_rate": 1.963907610439255e-06, + "loss": 48.25, + "step": 11960 + }, + { + "epoch": 0.11322308573375868, + "grad_norm": 308.0646057128906, + "learning_rate": 1.9638994475981054e-06, + "loss": 19.8047, + "step": 11961 + }, + { + "epoch": 0.11323255175547373, + "grad_norm": 179.66009521484375, + "learning_rate": 1.963891283850953e-06, + "loss": 8.3184, + "step": 11962 + }, + { + "epoch": 0.11324201777718879, + "grad_norm": 639.522705078125, + "learning_rate": 1.9638831191978053e-06, + "loss": 15.875, + "step": 11963 + }, + { + "epoch": 0.11325148379890383, + "grad_norm": 367.2482604980469, + "learning_rate": 1.96387495363867e-06, + "loss": 13.8203, + "step": 11964 + }, + { + "epoch": 0.11326094982061889, + "grad_norm": 325.9070739746094, + "learning_rate": 1.9638667871735556e-06, + "loss": 41.9219, + "step": 11965 + }, + { + "epoch": 0.11327041584233394, + "grad_norm": 747.8284912109375, + "learning_rate": 1.9638586198024683e-06, + "loss": 51.9062, + "step": 11966 + }, + { + "epoch": 0.113279881864049, + "grad_norm": 287.3350524902344, + "learning_rate": 1.9638504515254174e-06, + "loss": 17.3281, + "step": 11967 + }, + { + "epoch": 0.11328934788576404, + "grad_norm": 3.2877156734466553, + "learning_rate": 1.9638422823424093e-06, + "loss": 0.8279, + "step": 11968 + }, + { + "epoch": 0.1132988139074791, + "grad_norm": 295.4991149902344, + "learning_rate": 1.9638341122534524e-06, + "loss": 17.4688, + "step": 11969 + }, + { + "epoch": 0.11330827992919416, + "grad_norm": 294.7041931152344, + "learning_rate": 1.963825941258554e-06, + "loss": 21.0703, + "step": 11970 + }, + { + "epoch": 0.11331774595090921, + "grad_norm": 251.45005798339844, + "learning_rate": 1.963817769357722e-06, + "loss": 30.75, + "step": 11971 + }, + { + "epoch": 0.11332721197262427, + "grad_norm": 239.039306640625, + "learning_rate": 1.963809596550964e-06, + "loss": 20.4297, + "step": 11972 + }, + { + "epoch": 0.11333667799433932, + "grad_norm": 341.2623291015625, + "learning_rate": 1.9638014228382874e-06, + "loss": 36.9062, + "step": 11973 + }, + { + "epoch": 0.11334614401605438, + "grad_norm": 1380.5069580078125, + "learning_rate": 1.9637932482197002e-06, + "loss": 25.1406, + "step": 11974 + }, + { + "epoch": 0.11335561003776942, + "grad_norm": 644.3963623046875, + "learning_rate": 1.96378507269521e-06, + "loss": 55.0312, + "step": 11975 + }, + { + "epoch": 0.11336507605948448, + "grad_norm": 371.4204406738281, + "learning_rate": 1.963776896264825e-06, + "loss": 45.0938, + "step": 11976 + }, + { + "epoch": 0.11337454208119953, + "grad_norm": 330.2667236328125, + "learning_rate": 1.9637687189285522e-06, + "loss": 21.2031, + "step": 11977 + }, + { + "epoch": 0.11338400810291459, + "grad_norm": 3.3172640800476074, + "learning_rate": 1.9637605406863997e-06, + "loss": 0.9326, + "step": 11978 + }, + { + "epoch": 0.11339347412462965, + "grad_norm": 321.4062805175781, + "learning_rate": 1.9637523615383746e-06, + "loss": 19.5234, + "step": 11979 + }, + { + "epoch": 0.1134029401463447, + "grad_norm": 398.2757873535156, + "learning_rate": 1.963744181484485e-06, + "loss": 19.1953, + "step": 11980 + }, + { + "epoch": 0.11341240616805975, + "grad_norm": 311.7292785644531, + "learning_rate": 1.9637360005247387e-06, + "loss": 21.1328, + "step": 11981 + }, + { + "epoch": 0.1134218721897748, + "grad_norm": 507.2645568847656, + "learning_rate": 1.963727818659143e-06, + "loss": 22.9688, + "step": 11982 + }, + { + "epoch": 0.11343133821148986, + "grad_norm": 178.75479125976562, + "learning_rate": 1.9637196358877063e-06, + "loss": 15.0352, + "step": 11983 + }, + { + "epoch": 0.1134408042332049, + "grad_norm": 229.23953247070312, + "learning_rate": 1.9637114522104356e-06, + "loss": 15.7188, + "step": 11984 + }, + { + "epoch": 0.11345027025491997, + "grad_norm": 245.79791259765625, + "learning_rate": 1.963703267627339e-06, + "loss": 21.2969, + "step": 11985 + }, + { + "epoch": 0.11345973627663501, + "grad_norm": 187.72689819335938, + "learning_rate": 1.963695082138424e-06, + "loss": 17.8203, + "step": 11986 + }, + { + "epoch": 0.11346920229835007, + "grad_norm": 899.3807373046875, + "learning_rate": 1.9636868957436983e-06, + "loss": 46.0391, + "step": 11987 + }, + { + "epoch": 0.11347866832006513, + "grad_norm": 389.66705322265625, + "learning_rate": 1.963678708443169e-06, + "loss": 10.4805, + "step": 11988 + }, + { + "epoch": 0.11348813434178018, + "grad_norm": 212.05381774902344, + "learning_rate": 1.963670520236845e-06, + "loss": 9.9805, + "step": 11989 + }, + { + "epoch": 0.11349760036349524, + "grad_norm": 231.63568115234375, + "learning_rate": 1.9636623311247334e-06, + "loss": 21.5586, + "step": 11990 + }, + { + "epoch": 0.11350706638521028, + "grad_norm": 335.967529296875, + "learning_rate": 1.9636541411068417e-06, + "loss": 41.8906, + "step": 11991 + }, + { + "epoch": 0.11351653240692534, + "grad_norm": 3.038712501525879, + "learning_rate": 1.963645950183178e-06, + "loss": 0.8408, + "step": 11992 + }, + { + "epoch": 0.11352599842864039, + "grad_norm": 226.14663696289062, + "learning_rate": 1.96363775835375e-06, + "loss": 14.0, + "step": 11993 + }, + { + "epoch": 0.11353546445035545, + "grad_norm": 491.6762390136719, + "learning_rate": 1.9636295656185646e-06, + "loss": 34.4531, + "step": 11994 + }, + { + "epoch": 0.1135449304720705, + "grad_norm": 302.8894958496094, + "learning_rate": 1.9636213719776304e-06, + "loss": 17.7578, + "step": 11995 + }, + { + "epoch": 0.11355439649378556, + "grad_norm": 462.54620361328125, + "learning_rate": 1.9636131774309547e-06, + "loss": 13.0273, + "step": 11996 + }, + { + "epoch": 0.11356386251550062, + "grad_norm": 554.3950805664062, + "learning_rate": 1.9636049819785457e-06, + "loss": 60.9062, + "step": 11997 + }, + { + "epoch": 0.11357332853721566, + "grad_norm": 524.7840576171875, + "learning_rate": 1.9635967856204104e-06, + "loss": 33.0625, + "step": 11998 + }, + { + "epoch": 0.11358279455893072, + "grad_norm": 631.2916870117188, + "learning_rate": 1.9635885883565566e-06, + "loss": 54.3203, + "step": 11999 + }, + { + "epoch": 0.11359226058064577, + "grad_norm": 165.8914794921875, + "learning_rate": 1.963580390186992e-06, + "loss": 20.8984, + "step": 12000 + }, + { + "epoch": 0.11360172660236083, + "grad_norm": 307.2727355957031, + "learning_rate": 1.9635721911117255e-06, + "loss": 21.4062, + "step": 12001 + }, + { + "epoch": 0.11361119262407587, + "grad_norm": 238.94908142089844, + "learning_rate": 1.963563991130763e-06, + "loss": 17.2969, + "step": 12002 + }, + { + "epoch": 0.11362065864579093, + "grad_norm": 260.98370361328125, + "learning_rate": 1.9635557902441133e-06, + "loss": 20.0547, + "step": 12003 + }, + { + "epoch": 0.113630124667506, + "grad_norm": 281.5532531738281, + "learning_rate": 1.963547588451784e-06, + "loss": 23.1172, + "step": 12004 + }, + { + "epoch": 0.11363959068922104, + "grad_norm": 255.92117309570312, + "learning_rate": 1.9635393857537826e-06, + "loss": 18.1328, + "step": 12005 + }, + { + "epoch": 0.1136490567109361, + "grad_norm": 3.3863608837127686, + "learning_rate": 1.9635311821501164e-06, + "loss": 1.0786, + "step": 12006 + }, + { + "epoch": 0.11365852273265115, + "grad_norm": 2.821307897567749, + "learning_rate": 1.963522977640794e-06, + "loss": 0.7754, + "step": 12007 + }, + { + "epoch": 0.1136679887543662, + "grad_norm": 416.58697509765625, + "learning_rate": 1.963514772225823e-06, + "loss": 14.3359, + "step": 12008 + }, + { + "epoch": 0.11367745477608125, + "grad_norm": 523.8429565429688, + "learning_rate": 1.96350656590521e-06, + "loss": 20.7188, + "step": 12009 + }, + { + "epoch": 0.11368692079779631, + "grad_norm": 465.7044982910156, + "learning_rate": 1.963498358678964e-06, + "loss": 40.9062, + "step": 12010 + }, + { + "epoch": 0.11369638681951136, + "grad_norm": 236.51358032226562, + "learning_rate": 1.963490150547092e-06, + "loss": 18.7773, + "step": 12011 + }, + { + "epoch": 0.11370585284122642, + "grad_norm": 178.36740112304688, + "learning_rate": 1.963481941509602e-06, + "loss": 19.8281, + "step": 12012 + }, + { + "epoch": 0.11371531886294148, + "grad_norm": 3.340667247772217, + "learning_rate": 1.9634737315665016e-06, + "loss": 1.0933, + "step": 12013 + }, + { + "epoch": 0.11372478488465652, + "grad_norm": 265.5243835449219, + "learning_rate": 1.963465520717799e-06, + "loss": 10.4336, + "step": 12014 + }, + { + "epoch": 0.11373425090637158, + "grad_norm": 252.8907928466797, + "learning_rate": 1.963457308963501e-06, + "loss": 22.375, + "step": 12015 + }, + { + "epoch": 0.11374371692808663, + "grad_norm": 267.5673522949219, + "learning_rate": 1.963449096303616e-06, + "loss": 17.9141, + "step": 12016 + }, + { + "epoch": 0.11375318294980169, + "grad_norm": 301.9613342285156, + "learning_rate": 1.9634408827381515e-06, + "loss": 44.4688, + "step": 12017 + }, + { + "epoch": 0.11376264897151674, + "grad_norm": 3.0691921710968018, + "learning_rate": 1.9634326682671153e-06, + "loss": 0.9629, + "step": 12018 + }, + { + "epoch": 0.1137721149932318, + "grad_norm": 269.1675109863281, + "learning_rate": 1.9634244528905153e-06, + "loss": 24.4141, + "step": 12019 + }, + { + "epoch": 0.11378158101494684, + "grad_norm": 3.11049222946167, + "learning_rate": 1.9634162366083583e-06, + "loss": 0.9253, + "step": 12020 + }, + { + "epoch": 0.1137910470366619, + "grad_norm": 335.5738830566406, + "learning_rate": 1.963408019420653e-06, + "loss": 29.5, + "step": 12021 + }, + { + "epoch": 0.11380051305837696, + "grad_norm": 410.2436828613281, + "learning_rate": 1.963399801327407e-06, + "loss": 31.875, + "step": 12022 + }, + { + "epoch": 0.11380997908009201, + "grad_norm": 3.105776786804199, + "learning_rate": 1.963391582328628e-06, + "loss": 1.0317, + "step": 12023 + }, + { + "epoch": 0.11381944510180707, + "grad_norm": 291.3841552734375, + "learning_rate": 1.9633833624243234e-06, + "loss": 28.3867, + "step": 12024 + }, + { + "epoch": 0.11382891112352211, + "grad_norm": 368.7658996582031, + "learning_rate": 1.963375141614501e-06, + "loss": 25.2109, + "step": 12025 + }, + { + "epoch": 0.11383837714523717, + "grad_norm": 347.1217956542969, + "learning_rate": 1.9633669198991688e-06, + "loss": 25.6875, + "step": 12026 + }, + { + "epoch": 0.11384784316695222, + "grad_norm": 602.4761962890625, + "learning_rate": 1.9633586972783343e-06, + "loss": 39.5, + "step": 12027 + }, + { + "epoch": 0.11385730918866728, + "grad_norm": 233.7889404296875, + "learning_rate": 1.963350473752005e-06, + "loss": 21.0703, + "step": 12028 + }, + { + "epoch": 0.11386677521038233, + "grad_norm": 475.7135314941406, + "learning_rate": 1.9633422493201896e-06, + "loss": 42.2188, + "step": 12029 + }, + { + "epoch": 0.11387624123209739, + "grad_norm": 3.4550209045410156, + "learning_rate": 1.9633340239828947e-06, + "loss": 0.8687, + "step": 12030 + }, + { + "epoch": 0.11388570725381245, + "grad_norm": 492.0217590332031, + "learning_rate": 1.9633257977401287e-06, + "loss": 13.3164, + "step": 12031 + }, + { + "epoch": 0.11389517327552749, + "grad_norm": 272.2711181640625, + "learning_rate": 1.963317570591899e-06, + "loss": 30.5, + "step": 12032 + }, + { + "epoch": 0.11390463929724255, + "grad_norm": 213.5362548828125, + "learning_rate": 1.9633093425382133e-06, + "loss": 27.9062, + "step": 12033 + }, + { + "epoch": 0.1139141053189576, + "grad_norm": 703.5894775390625, + "learning_rate": 1.9633011135790796e-06, + "loss": 48.6719, + "step": 12034 + }, + { + "epoch": 0.11392357134067266, + "grad_norm": 1098.709228515625, + "learning_rate": 1.9632928837145054e-06, + "loss": 48.5312, + "step": 12035 + }, + { + "epoch": 0.1139330373623877, + "grad_norm": 260.1205749511719, + "learning_rate": 1.963284652944499e-06, + "loss": 19.5078, + "step": 12036 + }, + { + "epoch": 0.11394250338410276, + "grad_norm": 301.3098449707031, + "learning_rate": 1.963276421269067e-06, + "loss": 31.2969, + "step": 12037 + }, + { + "epoch": 0.11395196940581781, + "grad_norm": 873.833984375, + "learning_rate": 1.9632681886882186e-06, + "loss": 45.1953, + "step": 12038 + }, + { + "epoch": 0.11396143542753287, + "grad_norm": 219.25933837890625, + "learning_rate": 1.9632599552019606e-06, + "loss": 17.875, + "step": 12039 + }, + { + "epoch": 0.11397090144924793, + "grad_norm": 492.94964599609375, + "learning_rate": 1.9632517208103e-06, + "loss": 43.7969, + "step": 12040 + }, + { + "epoch": 0.11398036747096298, + "grad_norm": 291.4289855957031, + "learning_rate": 1.9632434855132467e-06, + "loss": 25.7656, + "step": 12041 + }, + { + "epoch": 0.11398983349267804, + "grad_norm": 415.943359375, + "learning_rate": 1.963235249310807e-06, + "loss": 50.9062, + "step": 12042 + }, + { + "epoch": 0.11399929951439308, + "grad_norm": 3.094031810760498, + "learning_rate": 1.963227012202988e-06, + "loss": 0.8647, + "step": 12043 + }, + { + "epoch": 0.11400876553610814, + "grad_norm": 364.7358703613281, + "learning_rate": 1.9632187741897987e-06, + "loss": 25.9609, + "step": 12044 + }, + { + "epoch": 0.11401823155782319, + "grad_norm": 2207.76416015625, + "learning_rate": 1.9632105352712465e-06, + "loss": 50.9141, + "step": 12045 + }, + { + "epoch": 0.11402769757953825, + "grad_norm": 1373.6331787109375, + "learning_rate": 1.963202295447339e-06, + "loss": 17.4844, + "step": 12046 + }, + { + "epoch": 0.11403716360125331, + "grad_norm": 252.42738342285156, + "learning_rate": 1.963194054718084e-06, + "loss": 16.8242, + "step": 12047 + }, + { + "epoch": 0.11404662962296835, + "grad_norm": 394.9511413574219, + "learning_rate": 1.9631858130834893e-06, + "loss": 23.3672, + "step": 12048 + }, + { + "epoch": 0.11405609564468341, + "grad_norm": 617.8291625976562, + "learning_rate": 1.9631775705435625e-06, + "loss": 25.4922, + "step": 12049 + }, + { + "epoch": 0.11406556166639846, + "grad_norm": 853.3154907226562, + "learning_rate": 1.9631693270983112e-06, + "loss": 27.0312, + "step": 12050 + }, + { + "epoch": 0.11407502768811352, + "grad_norm": 337.7823181152344, + "learning_rate": 1.963161082747744e-06, + "loss": 40.9531, + "step": 12051 + }, + { + "epoch": 0.11408449370982857, + "grad_norm": 731.5675659179688, + "learning_rate": 1.963152837491868e-06, + "loss": 43.6094, + "step": 12052 + }, + { + "epoch": 0.11409395973154363, + "grad_norm": 179.36810302734375, + "learning_rate": 1.9631445913306905e-06, + "loss": 9.8398, + "step": 12053 + }, + { + "epoch": 0.11410342575325867, + "grad_norm": 386.2151184082031, + "learning_rate": 1.9631363442642197e-06, + "loss": 24.5, + "step": 12054 + }, + { + "epoch": 0.11411289177497373, + "grad_norm": 486.63775634765625, + "learning_rate": 1.9631280962924637e-06, + "loss": 14.625, + "step": 12055 + }, + { + "epoch": 0.11412235779668879, + "grad_norm": 167.93017578125, + "learning_rate": 1.96311984741543e-06, + "loss": 23.0898, + "step": 12056 + }, + { + "epoch": 0.11413182381840384, + "grad_norm": 304.01715087890625, + "learning_rate": 1.963111597633126e-06, + "loss": 19.9297, + "step": 12057 + }, + { + "epoch": 0.1141412898401189, + "grad_norm": 214.80670166015625, + "learning_rate": 1.96310334694556e-06, + "loss": 18.9062, + "step": 12058 + }, + { + "epoch": 0.11415075586183394, + "grad_norm": 220.2945556640625, + "learning_rate": 1.9630950953527392e-06, + "loss": 20.5234, + "step": 12059 + }, + { + "epoch": 0.114160221883549, + "grad_norm": 592.93798828125, + "learning_rate": 1.963086842854672e-06, + "loss": 38.3906, + "step": 12060 + }, + { + "epoch": 0.11416968790526405, + "grad_norm": 3.267629861831665, + "learning_rate": 1.963078589451366e-06, + "loss": 0.9287, + "step": 12061 + }, + { + "epoch": 0.11417915392697911, + "grad_norm": 257.01629638671875, + "learning_rate": 1.9630703351428288e-06, + "loss": 15.5, + "step": 12062 + }, + { + "epoch": 0.11418861994869416, + "grad_norm": 613.304443359375, + "learning_rate": 1.9630620799290677e-06, + "loss": 35.0781, + "step": 12063 + }, + { + "epoch": 0.11419808597040922, + "grad_norm": 430.0739440917969, + "learning_rate": 1.963053823810091e-06, + "loss": 44.1562, + "step": 12064 + }, + { + "epoch": 0.11420755199212428, + "grad_norm": 554.4951782226562, + "learning_rate": 1.9630455667859064e-06, + "loss": 53.3516, + "step": 12065 + }, + { + "epoch": 0.11421701801383932, + "grad_norm": 772.4570922851562, + "learning_rate": 1.9630373088565213e-06, + "loss": 33.3438, + "step": 12066 + }, + { + "epoch": 0.11422648403555438, + "grad_norm": 420.44586181640625, + "learning_rate": 1.9630290500219443e-06, + "loss": 27.5234, + "step": 12067 + }, + { + "epoch": 0.11423595005726943, + "grad_norm": 560.6339111328125, + "learning_rate": 1.9630207902821826e-06, + "loss": 17.3281, + "step": 12068 + }, + { + "epoch": 0.11424541607898449, + "grad_norm": 187.31687927246094, + "learning_rate": 1.9630125296372438e-06, + "loss": 19.8828, + "step": 12069 + }, + { + "epoch": 0.11425488210069953, + "grad_norm": 320.87164306640625, + "learning_rate": 1.963004268087136e-06, + "loss": 48.5, + "step": 12070 + }, + { + "epoch": 0.1142643481224146, + "grad_norm": 321.5313720703125, + "learning_rate": 1.9629960056318666e-06, + "loss": 32.1562, + "step": 12071 + }, + { + "epoch": 0.11427381414412964, + "grad_norm": 398.90252685546875, + "learning_rate": 1.962987742271444e-06, + "loss": 61.0, + "step": 12072 + }, + { + "epoch": 0.1142832801658447, + "grad_norm": 566.2442626953125, + "learning_rate": 1.962979478005875e-06, + "loss": 39.5156, + "step": 12073 + }, + { + "epoch": 0.11429274618755976, + "grad_norm": 1031.2513427734375, + "learning_rate": 1.9629712128351684e-06, + "loss": 20.2422, + "step": 12074 + }, + { + "epoch": 0.1143022122092748, + "grad_norm": 396.77020263671875, + "learning_rate": 1.9629629467593313e-06, + "loss": 43.0156, + "step": 12075 + }, + { + "epoch": 0.11431167823098987, + "grad_norm": 572.1468505859375, + "learning_rate": 1.9629546797783717e-06, + "loss": 36.2812, + "step": 12076 + }, + { + "epoch": 0.11432114425270491, + "grad_norm": 484.68695068359375, + "learning_rate": 1.9629464118922973e-06, + "loss": 40.9688, + "step": 12077 + }, + { + "epoch": 0.11433061027441997, + "grad_norm": 375.64642333984375, + "learning_rate": 1.962938143101116e-06, + "loss": 35.7188, + "step": 12078 + }, + { + "epoch": 0.11434007629613502, + "grad_norm": 814.9924926757812, + "learning_rate": 1.9629298734048357e-06, + "loss": 57.3281, + "step": 12079 + }, + { + "epoch": 0.11434954231785008, + "grad_norm": 586.5043334960938, + "learning_rate": 1.9629216028034638e-06, + "loss": 60.1719, + "step": 12080 + }, + { + "epoch": 0.11435900833956512, + "grad_norm": 883.0030517578125, + "learning_rate": 1.962913331297008e-06, + "loss": 43.0391, + "step": 12081 + }, + { + "epoch": 0.11436847436128018, + "grad_norm": 150.07948303222656, + "learning_rate": 1.9629050588854766e-06, + "loss": 17.1094, + "step": 12082 + }, + { + "epoch": 0.11437794038299524, + "grad_norm": 208.9055938720703, + "learning_rate": 1.962896785568877e-06, + "loss": 24.8359, + "step": 12083 + }, + { + "epoch": 0.11438740640471029, + "grad_norm": 228.0472412109375, + "learning_rate": 1.962888511347217e-06, + "loss": 23.3984, + "step": 12084 + }, + { + "epoch": 0.11439687242642535, + "grad_norm": 774.8074951171875, + "learning_rate": 1.9628802362205045e-06, + "loss": 46.0234, + "step": 12085 + }, + { + "epoch": 0.1144063384481404, + "grad_norm": 162.78488159179688, + "learning_rate": 1.962871960188747e-06, + "loss": 21.2344, + "step": 12086 + }, + { + "epoch": 0.11441580446985546, + "grad_norm": 600.254150390625, + "learning_rate": 1.962863683251953e-06, + "loss": 31.5234, + "step": 12087 + }, + { + "epoch": 0.1144252704915705, + "grad_norm": 276.975341796875, + "learning_rate": 1.9628554054101293e-06, + "loss": 25.3203, + "step": 12088 + }, + { + "epoch": 0.11443473651328556, + "grad_norm": 863.9351196289062, + "learning_rate": 1.962847126663284e-06, + "loss": 40.7031, + "step": 12089 + }, + { + "epoch": 0.11444420253500062, + "grad_norm": 230.25442504882812, + "learning_rate": 1.9628388470114257e-06, + "loss": 20.3594, + "step": 12090 + }, + { + "epoch": 0.11445366855671567, + "grad_norm": 317.7967224121094, + "learning_rate": 1.9628305664545613e-06, + "loss": 14.9844, + "step": 12091 + }, + { + "epoch": 0.11446313457843073, + "grad_norm": 3.2705509662628174, + "learning_rate": 1.962822284992699e-06, + "loss": 0.918, + "step": 12092 + }, + { + "epoch": 0.11447260060014577, + "grad_norm": 191.15463256835938, + "learning_rate": 1.962814002625846e-06, + "loss": 21.1875, + "step": 12093 + }, + { + "epoch": 0.11448206662186083, + "grad_norm": 336.5748291015625, + "learning_rate": 1.9628057193540104e-06, + "loss": 30.5234, + "step": 12094 + }, + { + "epoch": 0.11449153264357588, + "grad_norm": 411.33221435546875, + "learning_rate": 1.9627974351772e-06, + "loss": 39.1406, + "step": 12095 + }, + { + "epoch": 0.11450099866529094, + "grad_norm": 870.3285522460938, + "learning_rate": 1.962789150095423e-06, + "loss": 51.1172, + "step": 12096 + }, + { + "epoch": 0.11451046468700599, + "grad_norm": 240.18226623535156, + "learning_rate": 1.9627808641086867e-06, + "loss": 28.7578, + "step": 12097 + }, + { + "epoch": 0.11451993070872105, + "grad_norm": 846.9229125976562, + "learning_rate": 1.962772577216999e-06, + "loss": 44.4375, + "step": 12098 + }, + { + "epoch": 0.1145293967304361, + "grad_norm": 403.7541809082031, + "learning_rate": 1.9627642894203677e-06, + "loss": 38.7031, + "step": 12099 + }, + { + "epoch": 0.11453886275215115, + "grad_norm": 561.1138305664062, + "learning_rate": 1.9627560007188008e-06, + "loss": 8.8047, + "step": 12100 + }, + { + "epoch": 0.11454832877386621, + "grad_norm": 722.9204711914062, + "learning_rate": 1.962747711112306e-06, + "loss": 29.2812, + "step": 12101 + }, + { + "epoch": 0.11455779479558126, + "grad_norm": 335.15142822265625, + "learning_rate": 1.9627394206008904e-06, + "loss": 40.5938, + "step": 12102 + }, + { + "epoch": 0.11456726081729632, + "grad_norm": 293.01318359375, + "learning_rate": 1.9627311291845626e-06, + "loss": 22.5586, + "step": 12103 + }, + { + "epoch": 0.11457672683901136, + "grad_norm": 414.8636169433594, + "learning_rate": 1.96272283686333e-06, + "loss": 20.2422, + "step": 12104 + }, + { + "epoch": 0.11458619286072642, + "grad_norm": 433.44744873046875, + "learning_rate": 1.962714543637201e-06, + "loss": 38.4453, + "step": 12105 + }, + { + "epoch": 0.11459565888244147, + "grad_norm": 3.233431577682495, + "learning_rate": 1.9627062495061826e-06, + "loss": 0.9001, + "step": 12106 + }, + { + "epoch": 0.11460512490415653, + "grad_norm": 198.54722595214844, + "learning_rate": 1.962697954470283e-06, + "loss": 16.6094, + "step": 12107 + }, + { + "epoch": 0.11461459092587159, + "grad_norm": 206.85162353515625, + "learning_rate": 1.96268965852951e-06, + "loss": 17.4258, + "step": 12108 + }, + { + "epoch": 0.11462405694758664, + "grad_norm": 735.930419921875, + "learning_rate": 1.9626813616838714e-06, + "loss": 36.3984, + "step": 12109 + }, + { + "epoch": 0.1146335229693017, + "grad_norm": 836.337158203125, + "learning_rate": 1.9626730639333753e-06, + "loss": 17.3984, + "step": 12110 + }, + { + "epoch": 0.11464298899101674, + "grad_norm": 404.1311340332031, + "learning_rate": 1.9626647652780287e-06, + "loss": 56.9531, + "step": 12111 + }, + { + "epoch": 0.1146524550127318, + "grad_norm": 226.58010864257812, + "learning_rate": 1.9626564657178403e-06, + "loss": 22.4609, + "step": 12112 + }, + { + "epoch": 0.11466192103444685, + "grad_norm": 511.6348571777344, + "learning_rate": 1.9626481652528167e-06, + "loss": 50.9844, + "step": 12113 + }, + { + "epoch": 0.11467138705616191, + "grad_norm": 3.15378737449646, + "learning_rate": 1.962639863882967e-06, + "loss": 0.9165, + "step": 12114 + }, + { + "epoch": 0.11468085307787695, + "grad_norm": 251.5749969482422, + "learning_rate": 1.962631561608298e-06, + "loss": 17.5859, + "step": 12115 + }, + { + "epoch": 0.11469031909959201, + "grad_norm": 682.1864624023438, + "learning_rate": 1.9626232584288184e-06, + "loss": 46.125, + "step": 12116 + }, + { + "epoch": 0.11469978512130707, + "grad_norm": 473.5752868652344, + "learning_rate": 1.9626149543445356e-06, + "loss": 34.7188, + "step": 12117 + }, + { + "epoch": 0.11470925114302212, + "grad_norm": 515.7091064453125, + "learning_rate": 1.962606649355457e-06, + "loss": 41.9062, + "step": 12118 + }, + { + "epoch": 0.11471871716473718, + "grad_norm": 469.5472106933594, + "learning_rate": 1.962598343461591e-06, + "loss": 46.625, + "step": 12119 + }, + { + "epoch": 0.11472818318645223, + "grad_norm": 727.4461059570312, + "learning_rate": 1.962590036662945e-06, + "loss": 53.6406, + "step": 12120 + }, + { + "epoch": 0.11473764920816729, + "grad_norm": 244.74041748046875, + "learning_rate": 1.9625817289595272e-06, + "loss": 19.8672, + "step": 12121 + }, + { + "epoch": 0.11474711522988233, + "grad_norm": 213.21151733398438, + "learning_rate": 1.9625734203513452e-06, + "loss": 25.9531, + "step": 12122 + }, + { + "epoch": 0.11475658125159739, + "grad_norm": 219.12930297851562, + "learning_rate": 1.9625651108384067e-06, + "loss": 9.1328, + "step": 12123 + }, + { + "epoch": 0.11476604727331244, + "grad_norm": 555.3278198242188, + "learning_rate": 1.9625568004207196e-06, + "loss": 46.0547, + "step": 12124 + }, + { + "epoch": 0.1147755132950275, + "grad_norm": 326.5265808105469, + "learning_rate": 1.962548489098292e-06, + "loss": 32.8438, + "step": 12125 + }, + { + "epoch": 0.11478497931674256, + "grad_norm": 255.4576416015625, + "learning_rate": 1.9625401768711314e-06, + "loss": 14.4023, + "step": 12126 + }, + { + "epoch": 0.1147944453384576, + "grad_norm": 566.1110229492188, + "learning_rate": 1.9625318637392454e-06, + "loss": 26.7266, + "step": 12127 + }, + { + "epoch": 0.11480391136017266, + "grad_norm": 351.2911682128906, + "learning_rate": 1.962523549702642e-06, + "loss": 21.5156, + "step": 12128 + }, + { + "epoch": 0.11481337738188771, + "grad_norm": 315.3689270019531, + "learning_rate": 1.962515234761329e-06, + "loss": 26.4609, + "step": 12129 + }, + { + "epoch": 0.11482284340360277, + "grad_norm": 687.9088134765625, + "learning_rate": 1.962506918915314e-06, + "loss": 40.1875, + "step": 12130 + }, + { + "epoch": 0.11483230942531782, + "grad_norm": 242.49520874023438, + "learning_rate": 1.962498602164606e-06, + "loss": 21.8984, + "step": 12131 + }, + { + "epoch": 0.11484177544703288, + "grad_norm": 2.913031816482544, + "learning_rate": 1.9624902845092114e-06, + "loss": 0.9771, + "step": 12132 + }, + { + "epoch": 0.11485124146874794, + "grad_norm": 1345.6357421875, + "learning_rate": 1.962481965949139e-06, + "loss": 24.168, + "step": 12133 + }, + { + "epoch": 0.11486070749046298, + "grad_norm": 632.3096923828125, + "learning_rate": 1.9624736464843954e-06, + "loss": 48.25, + "step": 12134 + }, + { + "epoch": 0.11487017351217804, + "grad_norm": 417.4035949707031, + "learning_rate": 1.96246532611499e-06, + "loss": 44.875, + "step": 12135 + }, + { + "epoch": 0.11487963953389309, + "grad_norm": 483.35125732421875, + "learning_rate": 1.9624570048409294e-06, + "loss": 36.5, + "step": 12136 + }, + { + "epoch": 0.11488910555560815, + "grad_norm": 206.81112670898438, + "learning_rate": 1.9624486826622216e-06, + "loss": 17.6406, + "step": 12137 + }, + { + "epoch": 0.1148985715773232, + "grad_norm": 346.3813781738281, + "learning_rate": 1.962440359578875e-06, + "loss": 20.9258, + "step": 12138 + }, + { + "epoch": 0.11490803759903825, + "grad_norm": 903.63134765625, + "learning_rate": 1.962432035590897e-06, + "loss": 79.4141, + "step": 12139 + }, + { + "epoch": 0.1149175036207533, + "grad_norm": 307.0149841308594, + "learning_rate": 1.9624237106982955e-06, + "loss": 21.6875, + "step": 12140 + }, + { + "epoch": 0.11492696964246836, + "grad_norm": 210.011474609375, + "learning_rate": 1.9624153849010783e-06, + "loss": 21.3906, + "step": 12141 + }, + { + "epoch": 0.11493643566418342, + "grad_norm": 3.392737865447998, + "learning_rate": 1.962407058199253e-06, + "loss": 1.0942, + "step": 12142 + }, + { + "epoch": 0.11494590168589847, + "grad_norm": 291.4000549316406, + "learning_rate": 1.962398730592828e-06, + "loss": 26.5234, + "step": 12143 + }, + { + "epoch": 0.11495536770761353, + "grad_norm": 395.94708251953125, + "learning_rate": 1.962390402081811e-06, + "loss": 37.5625, + "step": 12144 + }, + { + "epoch": 0.11496483372932857, + "grad_norm": 485.7785949707031, + "learning_rate": 1.9623820726662094e-06, + "loss": 52.6719, + "step": 12145 + }, + { + "epoch": 0.11497429975104363, + "grad_norm": 377.10809326171875, + "learning_rate": 1.962373742346031e-06, + "loss": 28.2891, + "step": 12146 + }, + { + "epoch": 0.11498376577275868, + "grad_norm": 232.33392333984375, + "learning_rate": 1.962365411121284e-06, + "loss": 22.6641, + "step": 12147 + }, + { + "epoch": 0.11499323179447374, + "grad_norm": 369.1182861328125, + "learning_rate": 1.9623570789919764e-06, + "loss": 36.7422, + "step": 12148 + }, + { + "epoch": 0.11500269781618878, + "grad_norm": 551.7356567382812, + "learning_rate": 1.9623487459581158e-06, + "loss": 33.3828, + "step": 12149 + }, + { + "epoch": 0.11501216383790384, + "grad_norm": 731.4528198242188, + "learning_rate": 1.9623404120197095e-06, + "loss": 74.4688, + "step": 12150 + }, + { + "epoch": 0.1150216298596189, + "grad_norm": 425.5931091308594, + "learning_rate": 1.962332077176766e-06, + "loss": 33.5859, + "step": 12151 + }, + { + "epoch": 0.11503109588133395, + "grad_norm": 550.836669921875, + "learning_rate": 1.962323741429293e-06, + "loss": 51.8125, + "step": 12152 + }, + { + "epoch": 0.11504056190304901, + "grad_norm": 363.13775634765625, + "learning_rate": 1.9623154047772986e-06, + "loss": 33.0625, + "step": 12153 + }, + { + "epoch": 0.11505002792476406, + "grad_norm": 737.3048095703125, + "learning_rate": 1.96230706722079e-06, + "loss": 38.2969, + "step": 12154 + }, + { + "epoch": 0.11505949394647912, + "grad_norm": 921.6763305664062, + "learning_rate": 1.9622987287597753e-06, + "loss": 69.7188, + "step": 12155 + }, + { + "epoch": 0.11506895996819416, + "grad_norm": 572.6326293945312, + "learning_rate": 1.9622903893942625e-06, + "loss": 55.3906, + "step": 12156 + }, + { + "epoch": 0.11507842598990922, + "grad_norm": 3.0013480186462402, + "learning_rate": 1.962282049124259e-06, + "loss": 0.8535, + "step": 12157 + }, + { + "epoch": 0.11508789201162427, + "grad_norm": 332.5553283691406, + "learning_rate": 1.9622737079497737e-06, + "loss": 50.3047, + "step": 12158 + }, + { + "epoch": 0.11509735803333933, + "grad_norm": 270.02545166015625, + "learning_rate": 1.962265365870813e-06, + "loss": 19.3125, + "step": 12159 + }, + { + "epoch": 0.11510682405505439, + "grad_norm": 198.59228515625, + "learning_rate": 1.962257022887386e-06, + "loss": 23.5312, + "step": 12160 + }, + { + "epoch": 0.11511629007676943, + "grad_norm": 274.89154052734375, + "learning_rate": 1.9622486789994995e-06, + "loss": 17.8281, + "step": 12161 + }, + { + "epoch": 0.1151257560984845, + "grad_norm": 600.837158203125, + "learning_rate": 1.9622403342071623e-06, + "loss": 43.4531, + "step": 12162 + }, + { + "epoch": 0.11513522212019954, + "grad_norm": 204.60365295410156, + "learning_rate": 1.9622319885103814e-06, + "loss": 15.8984, + "step": 12163 + }, + { + "epoch": 0.1151446881419146, + "grad_norm": 338.4901123046875, + "learning_rate": 1.9622236419091655e-06, + "loss": 20.0703, + "step": 12164 + }, + { + "epoch": 0.11515415416362965, + "grad_norm": 340.3957214355469, + "learning_rate": 1.9622152944035217e-06, + "loss": 20.5938, + "step": 12165 + }, + { + "epoch": 0.1151636201853447, + "grad_norm": 496.2470703125, + "learning_rate": 1.962206945993458e-06, + "loss": 22.5859, + "step": 12166 + }, + { + "epoch": 0.11517308620705975, + "grad_norm": 176.51889038085938, + "learning_rate": 1.9621985966789825e-06, + "loss": 21.9688, + "step": 12167 + }, + { + "epoch": 0.11518255222877481, + "grad_norm": 235.48876953125, + "learning_rate": 1.962190246460103e-06, + "loss": 20.6562, + "step": 12168 + }, + { + "epoch": 0.11519201825048987, + "grad_norm": 260.2770080566406, + "learning_rate": 1.962181895336827e-06, + "loss": 17.9531, + "step": 12169 + }, + { + "epoch": 0.11520148427220492, + "grad_norm": 459.59600830078125, + "learning_rate": 1.9621735433091627e-06, + "loss": 31.8281, + "step": 12170 + }, + { + "epoch": 0.11521095029391998, + "grad_norm": 3.08390212059021, + "learning_rate": 1.962165190377118e-06, + "loss": 0.8511, + "step": 12171 + }, + { + "epoch": 0.11522041631563502, + "grad_norm": 270.1244812011719, + "learning_rate": 1.9621568365407003e-06, + "loss": 21.5078, + "step": 12172 + }, + { + "epoch": 0.11522988233735008, + "grad_norm": 446.3852844238281, + "learning_rate": 1.9621484817999182e-06, + "loss": 20.2578, + "step": 12173 + }, + { + "epoch": 0.11523934835906513, + "grad_norm": 598.8528442382812, + "learning_rate": 1.962140126154779e-06, + "loss": 16.7656, + "step": 12174 + }, + { + "epoch": 0.11524881438078019, + "grad_norm": 316.7774353027344, + "learning_rate": 1.9621317696052903e-06, + "loss": 16.8281, + "step": 12175 + }, + { + "epoch": 0.11525828040249525, + "grad_norm": 155.33457946777344, + "learning_rate": 1.962123412151461e-06, + "loss": 18.9453, + "step": 12176 + }, + { + "epoch": 0.1152677464242103, + "grad_norm": 477.1852722167969, + "learning_rate": 1.9621150537932977e-06, + "loss": 25.8281, + "step": 12177 + }, + { + "epoch": 0.11527721244592536, + "grad_norm": 304.2066650390625, + "learning_rate": 1.962106694530809e-06, + "loss": 28.3906, + "step": 12178 + }, + { + "epoch": 0.1152866784676404, + "grad_norm": 199.75355529785156, + "learning_rate": 1.9620983343640026e-06, + "loss": 14.2109, + "step": 12179 + }, + { + "epoch": 0.11529614448935546, + "grad_norm": 183.22239685058594, + "learning_rate": 1.9620899732928864e-06, + "loss": 14.0625, + "step": 12180 + }, + { + "epoch": 0.11530561051107051, + "grad_norm": 331.8613586425781, + "learning_rate": 1.9620816113174685e-06, + "loss": 19.1797, + "step": 12181 + }, + { + "epoch": 0.11531507653278557, + "grad_norm": 564.6083374023438, + "learning_rate": 1.962073248437756e-06, + "loss": 44.3594, + "step": 12182 + }, + { + "epoch": 0.11532454255450061, + "grad_norm": 429.9804382324219, + "learning_rate": 1.9620648846537577e-06, + "loss": 18.3984, + "step": 12183 + }, + { + "epoch": 0.11533400857621567, + "grad_norm": 159.52195739746094, + "learning_rate": 1.9620565199654805e-06, + "loss": 20.5078, + "step": 12184 + }, + { + "epoch": 0.11534347459793073, + "grad_norm": 297.75299072265625, + "learning_rate": 1.962048154372933e-06, + "loss": 19.5625, + "step": 12185 + }, + { + "epoch": 0.11535294061964578, + "grad_norm": 419.9986877441406, + "learning_rate": 1.9620397878761226e-06, + "loss": 27.8984, + "step": 12186 + }, + { + "epoch": 0.11536240664136084, + "grad_norm": 202.77101135253906, + "learning_rate": 1.9620314204750577e-06, + "loss": 19.3672, + "step": 12187 + }, + { + "epoch": 0.11537187266307589, + "grad_norm": 538.3274536132812, + "learning_rate": 1.9620230521697456e-06, + "loss": 39.5234, + "step": 12188 + }, + { + "epoch": 0.11538133868479095, + "grad_norm": 3.5200321674346924, + "learning_rate": 1.9620146829601946e-06, + "loss": 0.8359, + "step": 12189 + }, + { + "epoch": 0.11539080470650599, + "grad_norm": 261.3374328613281, + "learning_rate": 1.9620063128464125e-06, + "loss": 15.4375, + "step": 12190 + }, + { + "epoch": 0.11540027072822105, + "grad_norm": 255.47482299804688, + "learning_rate": 1.9619979418284068e-06, + "loss": 23.4141, + "step": 12191 + }, + { + "epoch": 0.1154097367499361, + "grad_norm": 391.22808837890625, + "learning_rate": 1.9619895699061857e-06, + "loss": 36.6875, + "step": 12192 + }, + { + "epoch": 0.11541920277165116, + "grad_norm": 245.4217529296875, + "learning_rate": 1.961981197079757e-06, + "loss": 18.125, + "step": 12193 + }, + { + "epoch": 0.11542866879336622, + "grad_norm": 530.6443481445312, + "learning_rate": 1.9619728233491286e-06, + "loss": 20.5547, + "step": 12194 + }, + { + "epoch": 0.11543813481508126, + "grad_norm": 245.76358032226562, + "learning_rate": 1.961964448714308e-06, + "loss": 19.25, + "step": 12195 + }, + { + "epoch": 0.11544760083679632, + "grad_norm": 3.511868476867676, + "learning_rate": 1.961956073175304e-06, + "loss": 1.0122, + "step": 12196 + }, + { + "epoch": 0.11545706685851137, + "grad_norm": 1996.687255859375, + "learning_rate": 1.961947696732123e-06, + "loss": 33.2773, + "step": 12197 + }, + { + "epoch": 0.11546653288022643, + "grad_norm": 216.2938690185547, + "learning_rate": 1.9619393193847747e-06, + "loss": 19.625, + "step": 12198 + }, + { + "epoch": 0.11547599890194148, + "grad_norm": 234.44168090820312, + "learning_rate": 1.9619309411332655e-06, + "loss": 26.1406, + "step": 12199 + }, + { + "epoch": 0.11548546492365654, + "grad_norm": 225.90411376953125, + "learning_rate": 1.9619225619776037e-06, + "loss": 15.1562, + "step": 12200 + }, + { + "epoch": 0.11549493094537158, + "grad_norm": 162.80125427246094, + "learning_rate": 1.9619141819177974e-06, + "loss": 9.9219, + "step": 12201 + }, + { + "epoch": 0.11550439696708664, + "grad_norm": 244.09152221679688, + "learning_rate": 1.9619058009538545e-06, + "loss": 27.5938, + "step": 12202 + }, + { + "epoch": 0.1155138629888017, + "grad_norm": 163.40553283691406, + "learning_rate": 1.9618974190857827e-06, + "loss": 15.3438, + "step": 12203 + }, + { + "epoch": 0.11552332901051675, + "grad_norm": 186.61459350585938, + "learning_rate": 1.9618890363135896e-06, + "loss": 26.5156, + "step": 12204 + }, + { + "epoch": 0.11553279503223181, + "grad_norm": 283.16082763671875, + "learning_rate": 1.961880652637284e-06, + "loss": 29.3438, + "step": 12205 + }, + { + "epoch": 0.11554226105394685, + "grad_norm": 264.1119079589844, + "learning_rate": 1.9618722680568728e-06, + "loss": 19.9375, + "step": 12206 + }, + { + "epoch": 0.11555172707566191, + "grad_norm": 166.08387756347656, + "learning_rate": 1.9618638825723637e-06, + "loss": 24.5312, + "step": 12207 + }, + { + "epoch": 0.11556119309737696, + "grad_norm": 373.44757080078125, + "learning_rate": 1.961855496183766e-06, + "loss": 34.6719, + "step": 12208 + }, + { + "epoch": 0.11557065911909202, + "grad_norm": 491.7212829589844, + "learning_rate": 1.961847108891086e-06, + "loss": 34.6719, + "step": 12209 + }, + { + "epoch": 0.11558012514080707, + "grad_norm": 237.3571319580078, + "learning_rate": 1.9618387206943327e-06, + "loss": 22.5625, + "step": 12210 + }, + { + "epoch": 0.11558959116252213, + "grad_norm": 518.815673828125, + "learning_rate": 1.9618303315935135e-06, + "loss": 50.4219, + "step": 12211 + }, + { + "epoch": 0.11559905718423719, + "grad_norm": 312.156005859375, + "learning_rate": 1.9618219415886365e-06, + "loss": 19.0156, + "step": 12212 + }, + { + "epoch": 0.11560852320595223, + "grad_norm": 238.16180419921875, + "learning_rate": 1.961813550679709e-06, + "loss": 26.75, + "step": 12213 + }, + { + "epoch": 0.11561798922766729, + "grad_norm": 286.8798828125, + "learning_rate": 1.96180515886674e-06, + "loss": 18.2031, + "step": 12214 + }, + { + "epoch": 0.11562745524938234, + "grad_norm": 398.097900390625, + "learning_rate": 1.9617967661497363e-06, + "loss": 19.0312, + "step": 12215 + }, + { + "epoch": 0.1156369212710974, + "grad_norm": 2262.36572265625, + "learning_rate": 1.9617883725287063e-06, + "loss": 48.8242, + "step": 12216 + }, + { + "epoch": 0.11564638729281244, + "grad_norm": 273.3660888671875, + "learning_rate": 1.9617799780036575e-06, + "loss": 19.9453, + "step": 12217 + }, + { + "epoch": 0.1156558533145275, + "grad_norm": 226.1705780029297, + "learning_rate": 1.9617715825745984e-06, + "loss": 19.5625, + "step": 12218 + }, + { + "epoch": 0.11566531933624256, + "grad_norm": 2.943511962890625, + "learning_rate": 1.9617631862415366e-06, + "loss": 0.8208, + "step": 12219 + }, + { + "epoch": 0.11567478535795761, + "grad_norm": 552.800048828125, + "learning_rate": 1.9617547890044798e-06, + "loss": 53.3047, + "step": 12220 + }, + { + "epoch": 0.11568425137967267, + "grad_norm": 718.3345336914062, + "learning_rate": 1.9617463908634363e-06, + "loss": 21.7422, + "step": 12221 + }, + { + "epoch": 0.11569371740138772, + "grad_norm": 271.1273498535156, + "learning_rate": 1.9617379918184135e-06, + "loss": 18.9922, + "step": 12222 + }, + { + "epoch": 0.11570318342310278, + "grad_norm": 293.2017822265625, + "learning_rate": 1.9617295918694197e-06, + "loss": 21.8516, + "step": 12223 + }, + { + "epoch": 0.11571264944481782, + "grad_norm": 608.1657104492188, + "learning_rate": 1.961721191016463e-06, + "loss": 42.9844, + "step": 12224 + }, + { + "epoch": 0.11572211546653288, + "grad_norm": 854.9348754882812, + "learning_rate": 1.9617127892595503e-06, + "loss": 44.2344, + "step": 12225 + }, + { + "epoch": 0.11573158148824793, + "grad_norm": 190.7877197265625, + "learning_rate": 1.9617043865986903e-06, + "loss": 19.2031, + "step": 12226 + }, + { + "epoch": 0.11574104750996299, + "grad_norm": 305.3858337402344, + "learning_rate": 1.9616959830338908e-06, + "loss": 19.8438, + "step": 12227 + }, + { + "epoch": 0.11575051353167805, + "grad_norm": 713.9384765625, + "learning_rate": 1.9616875785651597e-06, + "loss": 41.0078, + "step": 12228 + }, + { + "epoch": 0.1157599795533931, + "grad_norm": 248.88536071777344, + "learning_rate": 1.9616791731925047e-06, + "loss": 22.5977, + "step": 12229 + }, + { + "epoch": 0.11576944557510815, + "grad_norm": 275.8935546875, + "learning_rate": 1.9616707669159342e-06, + "loss": 12.625, + "step": 12230 + }, + { + "epoch": 0.1157789115968232, + "grad_norm": 282.9179382324219, + "learning_rate": 1.9616623597354556e-06, + "loss": 18.1953, + "step": 12231 + }, + { + "epoch": 0.11578837761853826, + "grad_norm": 243.08316040039062, + "learning_rate": 1.9616539516510767e-06, + "loss": 16.0625, + "step": 12232 + }, + { + "epoch": 0.1157978436402533, + "grad_norm": 358.9370422363281, + "learning_rate": 1.961645542662806e-06, + "loss": 35.4062, + "step": 12233 + }, + { + "epoch": 0.11580730966196837, + "grad_norm": 409.30096435546875, + "learning_rate": 1.9616371327706505e-06, + "loss": 17.0234, + "step": 12234 + }, + { + "epoch": 0.11581677568368341, + "grad_norm": 171.73394775390625, + "learning_rate": 1.9616287219746193e-06, + "loss": 24.8906, + "step": 12235 + }, + { + "epoch": 0.11582624170539847, + "grad_norm": 762.1257934570312, + "learning_rate": 1.9616203102747192e-06, + "loss": 54.9531, + "step": 12236 + }, + { + "epoch": 0.11583570772711353, + "grad_norm": 628.5601806640625, + "learning_rate": 1.9616118976709588e-06, + "loss": 50.1797, + "step": 12237 + }, + { + "epoch": 0.11584517374882858, + "grad_norm": 283.4427795410156, + "learning_rate": 1.9616034841633456e-06, + "loss": 16.5781, + "step": 12238 + }, + { + "epoch": 0.11585463977054364, + "grad_norm": 991.2252807617188, + "learning_rate": 1.9615950697518876e-06, + "loss": 60.6406, + "step": 12239 + }, + { + "epoch": 0.11586410579225868, + "grad_norm": 240.96546936035156, + "learning_rate": 1.9615866544365935e-06, + "loss": 25.5469, + "step": 12240 + }, + { + "epoch": 0.11587357181397374, + "grad_norm": 280.15625, + "learning_rate": 1.96157823821747e-06, + "loss": 24.0938, + "step": 12241 + }, + { + "epoch": 0.11588303783568879, + "grad_norm": 602.9181518554688, + "learning_rate": 1.961569821094525e-06, + "loss": 61.0938, + "step": 12242 + }, + { + "epoch": 0.11589250385740385, + "grad_norm": 164.56219482421875, + "learning_rate": 1.9615614030677676e-06, + "loss": 20.3906, + "step": 12243 + }, + { + "epoch": 0.1159019698791189, + "grad_norm": 524.3125610351562, + "learning_rate": 1.9615529841372046e-06, + "loss": 26.3594, + "step": 12244 + }, + { + "epoch": 0.11591143590083396, + "grad_norm": 495.5349426269531, + "learning_rate": 1.961544564302845e-06, + "loss": 53.0, + "step": 12245 + }, + { + "epoch": 0.11592090192254902, + "grad_norm": 553.4345703125, + "learning_rate": 1.9615361435646954e-06, + "loss": 44.1562, + "step": 12246 + }, + { + "epoch": 0.11593036794426406, + "grad_norm": 519.32080078125, + "learning_rate": 1.9615277219227647e-06, + "loss": 16.5234, + "step": 12247 + }, + { + "epoch": 0.11593983396597912, + "grad_norm": 364.38525390625, + "learning_rate": 1.9615192993770605e-06, + "loss": 22.7734, + "step": 12248 + }, + { + "epoch": 0.11594929998769417, + "grad_norm": 558.1373291015625, + "learning_rate": 1.9615108759275904e-06, + "loss": 34.0625, + "step": 12249 + }, + { + "epoch": 0.11595876600940923, + "grad_norm": 734.47314453125, + "learning_rate": 1.961502451574363e-06, + "loss": 28.8047, + "step": 12250 + }, + { + "epoch": 0.11596823203112427, + "grad_norm": 3.3440606594085693, + "learning_rate": 1.9614940263173857e-06, + "loss": 0.9299, + "step": 12251 + }, + { + "epoch": 0.11597769805283933, + "grad_norm": 257.0387268066406, + "learning_rate": 1.9614856001566664e-06, + "loss": 32.5859, + "step": 12252 + }, + { + "epoch": 0.11598716407455438, + "grad_norm": 450.5187072753906, + "learning_rate": 1.9614771730922134e-06, + "loss": 27.7266, + "step": 12253 + }, + { + "epoch": 0.11599663009626944, + "grad_norm": 678.5233764648438, + "learning_rate": 1.961468745124034e-06, + "loss": 53.5078, + "step": 12254 + }, + { + "epoch": 0.1160060961179845, + "grad_norm": 366.83526611328125, + "learning_rate": 1.9614603162521374e-06, + "loss": 18.3555, + "step": 12255 + }, + { + "epoch": 0.11601556213969955, + "grad_norm": 378.1244812011719, + "learning_rate": 1.96145188647653e-06, + "loss": 23.7109, + "step": 12256 + }, + { + "epoch": 0.1160250281614146, + "grad_norm": 223.63829040527344, + "learning_rate": 1.9614434557972204e-06, + "loss": 21.9141, + "step": 12257 + }, + { + "epoch": 0.11603449418312965, + "grad_norm": 238.5475311279297, + "learning_rate": 1.9614350242142167e-06, + "loss": 20.3594, + "step": 12258 + }, + { + "epoch": 0.11604396020484471, + "grad_norm": 606.815185546875, + "learning_rate": 1.9614265917275267e-06, + "loss": 46.4375, + "step": 12259 + }, + { + "epoch": 0.11605342622655976, + "grad_norm": 734.6270141601562, + "learning_rate": 1.9614181583371576e-06, + "loss": 44.8594, + "step": 12260 + }, + { + "epoch": 0.11606289224827482, + "grad_norm": 3.1053690910339355, + "learning_rate": 1.9614097240431187e-06, + "loss": 0.8496, + "step": 12261 + }, + { + "epoch": 0.11607235826998988, + "grad_norm": 205.0736541748047, + "learning_rate": 1.961401288845417e-06, + "loss": 28.0469, + "step": 12262 + }, + { + "epoch": 0.11608182429170492, + "grad_norm": 137.6976776123047, + "learning_rate": 1.9613928527440607e-06, + "loss": 19.1953, + "step": 12263 + }, + { + "epoch": 0.11609129031341998, + "grad_norm": 576.6903686523438, + "learning_rate": 1.9613844157390577e-06, + "loss": 55.5156, + "step": 12264 + }, + { + "epoch": 0.11610075633513503, + "grad_norm": 400.9349670410156, + "learning_rate": 1.9613759778304154e-06, + "loss": 41.3359, + "step": 12265 + }, + { + "epoch": 0.11611022235685009, + "grad_norm": 253.93438720703125, + "learning_rate": 1.9613675390181427e-06, + "loss": 37.1641, + "step": 12266 + }, + { + "epoch": 0.11611968837856514, + "grad_norm": 158.55166625976562, + "learning_rate": 1.9613590993022467e-06, + "loss": 17.8984, + "step": 12267 + }, + { + "epoch": 0.1161291544002802, + "grad_norm": 159.2113800048828, + "learning_rate": 1.961350658682736e-06, + "loss": 21.6016, + "step": 12268 + }, + { + "epoch": 0.11613862042199524, + "grad_norm": 229.28143310546875, + "learning_rate": 1.961342217159618e-06, + "loss": 21.7422, + "step": 12269 + }, + { + "epoch": 0.1161480864437103, + "grad_norm": 472.16998291015625, + "learning_rate": 1.9613337747329013e-06, + "loss": 15.9453, + "step": 12270 + }, + { + "epoch": 0.11615755246542536, + "grad_norm": 331.57318115234375, + "learning_rate": 1.961325331402593e-06, + "loss": 17.8203, + "step": 12271 + }, + { + "epoch": 0.11616701848714041, + "grad_norm": 287.1399230957031, + "learning_rate": 1.9613168871687016e-06, + "loss": 17.1562, + "step": 12272 + }, + { + "epoch": 0.11617648450885547, + "grad_norm": 532.5968017578125, + "learning_rate": 1.9613084420312344e-06, + "loss": 23.0469, + "step": 12273 + }, + { + "epoch": 0.11618595053057051, + "grad_norm": 3.2366533279418945, + "learning_rate": 1.9612999959902004e-06, + "loss": 0.9258, + "step": 12274 + }, + { + "epoch": 0.11619541655228557, + "grad_norm": 312.8558044433594, + "learning_rate": 1.9612915490456067e-06, + "loss": 20.0078, + "step": 12275 + }, + { + "epoch": 0.11620488257400062, + "grad_norm": 495.7078552246094, + "learning_rate": 1.9612831011974613e-06, + "loss": 34.8906, + "step": 12276 + }, + { + "epoch": 0.11621434859571568, + "grad_norm": 454.44927978515625, + "learning_rate": 1.9612746524457724e-06, + "loss": 38.2773, + "step": 12277 + }, + { + "epoch": 0.11622381461743073, + "grad_norm": 406.00238037109375, + "learning_rate": 1.961266202790548e-06, + "loss": 21.9141, + "step": 12278 + }, + { + "epoch": 0.11623328063914579, + "grad_norm": 664.888427734375, + "learning_rate": 1.961257752231796e-06, + "loss": 54.2656, + "step": 12279 + }, + { + "epoch": 0.11624274666086085, + "grad_norm": 361.5773620605469, + "learning_rate": 1.961249300769524e-06, + "loss": 17.5703, + "step": 12280 + }, + { + "epoch": 0.11625221268257589, + "grad_norm": 3.6167778968811035, + "learning_rate": 1.9612408484037403e-06, + "loss": 0.9795, + "step": 12281 + }, + { + "epoch": 0.11626167870429095, + "grad_norm": 643.734130859375, + "learning_rate": 1.9612323951344528e-06, + "loss": 42.4688, + "step": 12282 + }, + { + "epoch": 0.116271144726006, + "grad_norm": 249.290771484375, + "learning_rate": 1.9612239409616695e-06, + "loss": 19.0781, + "step": 12283 + }, + { + "epoch": 0.11628061074772106, + "grad_norm": 243.54495239257812, + "learning_rate": 1.9612154858853977e-06, + "loss": 22.9688, + "step": 12284 + }, + { + "epoch": 0.1162900767694361, + "grad_norm": 317.0309753417969, + "learning_rate": 1.9612070299056463e-06, + "loss": 19.4609, + "step": 12285 + }, + { + "epoch": 0.11629954279115116, + "grad_norm": 160.337646484375, + "learning_rate": 1.961198573022423e-06, + "loss": 17.5859, + "step": 12286 + }, + { + "epoch": 0.11630900881286621, + "grad_norm": 978.6708374023438, + "learning_rate": 1.961190115235735e-06, + "loss": 66.375, + "step": 12287 + }, + { + "epoch": 0.11631847483458127, + "grad_norm": 350.0788879394531, + "learning_rate": 1.9611816565455916e-06, + "loss": 34.7969, + "step": 12288 + }, + { + "epoch": 0.11632794085629633, + "grad_norm": 221.34735107421875, + "learning_rate": 1.961173196951999e-06, + "loss": 24.1641, + "step": 12289 + }, + { + "epoch": 0.11633740687801138, + "grad_norm": 368.393798828125, + "learning_rate": 1.9611647364549667e-06, + "loss": 26.7656, + "step": 12290 + }, + { + "epoch": 0.11634687289972644, + "grad_norm": 353.0790100097656, + "learning_rate": 1.961156275054502e-06, + "loss": 43.8438, + "step": 12291 + }, + { + "epoch": 0.11635633892144148, + "grad_norm": 420.1966552734375, + "learning_rate": 1.961147812750613e-06, + "loss": 25.4688, + "step": 12292 + }, + { + "epoch": 0.11636580494315654, + "grad_norm": 241.77587890625, + "learning_rate": 1.9611393495433073e-06, + "loss": 10.6055, + "step": 12293 + }, + { + "epoch": 0.11637527096487159, + "grad_norm": 387.2259521484375, + "learning_rate": 1.9611308854325936e-06, + "loss": 31.6875, + "step": 12294 + }, + { + "epoch": 0.11638473698658665, + "grad_norm": 306.1914367675781, + "learning_rate": 1.961122420418479e-06, + "loss": 30.2656, + "step": 12295 + }, + { + "epoch": 0.1163942030083017, + "grad_norm": 264.3464660644531, + "learning_rate": 1.961113954500972e-06, + "loss": 22.4531, + "step": 12296 + }, + { + "epoch": 0.11640366903001675, + "grad_norm": 296.158203125, + "learning_rate": 1.9611054876800805e-06, + "loss": 21.5312, + "step": 12297 + }, + { + "epoch": 0.11641313505173181, + "grad_norm": 997.0905151367188, + "learning_rate": 1.9610970199558124e-06, + "loss": 58.6406, + "step": 12298 + }, + { + "epoch": 0.11642260107344686, + "grad_norm": 314.2774963378906, + "learning_rate": 1.9610885513281755e-06, + "loss": 23.8281, + "step": 12299 + }, + { + "epoch": 0.11643206709516192, + "grad_norm": 553.2461547851562, + "learning_rate": 1.9610800817971777e-06, + "loss": 38.3281, + "step": 12300 + }, + { + "epoch": 0.11644153311687697, + "grad_norm": 219.14501953125, + "learning_rate": 1.961071611362827e-06, + "loss": 18.4219, + "step": 12301 + }, + { + "epoch": 0.11645099913859203, + "grad_norm": 422.981689453125, + "learning_rate": 1.961063140025132e-06, + "loss": 16.5078, + "step": 12302 + }, + { + "epoch": 0.11646046516030707, + "grad_norm": 373.5162658691406, + "learning_rate": 1.9610546677841002e-06, + "loss": 32.6406, + "step": 12303 + }, + { + "epoch": 0.11646993118202213, + "grad_norm": 336.98138427734375, + "learning_rate": 1.9610461946397394e-06, + "loss": 15.4453, + "step": 12304 + }, + { + "epoch": 0.11647939720373719, + "grad_norm": 430.7448425292969, + "learning_rate": 1.961037720592058e-06, + "loss": 38.8125, + "step": 12305 + }, + { + "epoch": 0.11648886322545224, + "grad_norm": 360.6861877441406, + "learning_rate": 1.961029245641063e-06, + "loss": 9.3359, + "step": 12306 + }, + { + "epoch": 0.1164983292471673, + "grad_norm": 689.6343994140625, + "learning_rate": 1.961020769786763e-06, + "loss": 41.0469, + "step": 12307 + }, + { + "epoch": 0.11650779526888234, + "grad_norm": 442.7722473144531, + "learning_rate": 1.9610122930291667e-06, + "loss": 47.7969, + "step": 12308 + }, + { + "epoch": 0.1165172612905974, + "grad_norm": 475.2471923828125, + "learning_rate": 1.961003815368281e-06, + "loss": 42.6562, + "step": 12309 + }, + { + "epoch": 0.11652672731231245, + "grad_norm": 260.97808837890625, + "learning_rate": 1.960995336804114e-06, + "loss": 25.1484, + "step": 12310 + }, + { + "epoch": 0.11653619333402751, + "grad_norm": 399.7123107910156, + "learning_rate": 1.960986857336674e-06, + "loss": 27.4062, + "step": 12311 + }, + { + "epoch": 0.11654565935574256, + "grad_norm": 483.0610046386719, + "learning_rate": 1.960978376965969e-06, + "loss": 45.8906, + "step": 12312 + }, + { + "epoch": 0.11655512537745762, + "grad_norm": 242.87887573242188, + "learning_rate": 1.960969895692007e-06, + "loss": 22.8828, + "step": 12313 + }, + { + "epoch": 0.11656459139917268, + "grad_norm": 358.8490905761719, + "learning_rate": 1.9609614135147958e-06, + "loss": 34.2188, + "step": 12314 + }, + { + "epoch": 0.11657405742088772, + "grad_norm": 704.482177734375, + "learning_rate": 1.960952930434343e-06, + "loss": 37.9688, + "step": 12315 + }, + { + "epoch": 0.11658352344260278, + "grad_norm": 234.72039794921875, + "learning_rate": 1.9609444464506573e-06, + "loss": 19.2422, + "step": 12316 + }, + { + "epoch": 0.11659298946431783, + "grad_norm": 3.4725899696350098, + "learning_rate": 1.9609359615637462e-06, + "loss": 0.9775, + "step": 12317 + }, + { + "epoch": 0.11660245548603289, + "grad_norm": 426.37841796875, + "learning_rate": 1.960927475773618e-06, + "loss": 33.1094, + "step": 12318 + }, + { + "epoch": 0.11661192150774793, + "grad_norm": 370.58197021484375, + "learning_rate": 1.9609189890802804e-06, + "loss": 25.3438, + "step": 12319 + }, + { + "epoch": 0.116621387529463, + "grad_norm": 344.8568115234375, + "learning_rate": 1.960910501483741e-06, + "loss": 9.6523, + "step": 12320 + }, + { + "epoch": 0.11663085355117804, + "grad_norm": 683.9117431640625, + "learning_rate": 1.960902012984009e-06, + "loss": 23.6484, + "step": 12321 + }, + { + "epoch": 0.1166403195728931, + "grad_norm": 566.2504272460938, + "learning_rate": 1.960893523581091e-06, + "loss": 26.668, + "step": 12322 + }, + { + "epoch": 0.11664978559460816, + "grad_norm": 721.2491455078125, + "learning_rate": 1.9608850332749956e-06, + "loss": 44.7969, + "step": 12323 + }, + { + "epoch": 0.1166592516163232, + "grad_norm": 190.11752319335938, + "learning_rate": 1.960876542065731e-06, + "loss": 8.5742, + "step": 12324 + }, + { + "epoch": 0.11666871763803827, + "grad_norm": 572.5359497070312, + "learning_rate": 1.960868049953305e-06, + "loss": 29.2578, + "step": 12325 + }, + { + "epoch": 0.11667818365975331, + "grad_norm": 169.81248474121094, + "learning_rate": 1.9608595569377256e-06, + "loss": 17.9219, + "step": 12326 + }, + { + "epoch": 0.11668764968146837, + "grad_norm": 266.4734191894531, + "learning_rate": 1.960851063019001e-06, + "loss": 17.2344, + "step": 12327 + }, + { + "epoch": 0.11669711570318342, + "grad_norm": 315.4670715332031, + "learning_rate": 1.960842568197138e-06, + "loss": 36.5, + "step": 12328 + }, + { + "epoch": 0.11670658172489848, + "grad_norm": 2.9358694553375244, + "learning_rate": 1.960834072472146e-06, + "loss": 0.9717, + "step": 12329 + }, + { + "epoch": 0.11671604774661352, + "grad_norm": 323.96734619140625, + "learning_rate": 1.960825575844033e-06, + "loss": 24.7656, + "step": 12330 + }, + { + "epoch": 0.11672551376832858, + "grad_norm": 759.3746337890625, + "learning_rate": 1.9608170783128057e-06, + "loss": 38.5938, + "step": 12331 + }, + { + "epoch": 0.11673497979004364, + "grad_norm": 378.3561706542969, + "learning_rate": 1.9608085798784734e-06, + "loss": 11.418, + "step": 12332 + }, + { + "epoch": 0.11674444581175869, + "grad_norm": 988.5194702148438, + "learning_rate": 1.960800080541043e-06, + "loss": 20.2383, + "step": 12333 + }, + { + "epoch": 0.11675391183347375, + "grad_norm": 500.0921325683594, + "learning_rate": 1.9607915803005234e-06, + "loss": 45.8906, + "step": 12334 + }, + { + "epoch": 0.1167633778551888, + "grad_norm": 488.25640869140625, + "learning_rate": 1.960783079156922e-06, + "loss": 32.7969, + "step": 12335 + }, + { + "epoch": 0.11677284387690386, + "grad_norm": 376.09881591796875, + "learning_rate": 1.960774577110247e-06, + "loss": 33.5469, + "step": 12336 + }, + { + "epoch": 0.1167823098986189, + "grad_norm": 478.92889404296875, + "learning_rate": 1.9607660741605067e-06, + "loss": 45.5312, + "step": 12337 + }, + { + "epoch": 0.11679177592033396, + "grad_norm": 268.62835693359375, + "learning_rate": 1.9607575703077084e-06, + "loss": 9.0312, + "step": 12338 + }, + { + "epoch": 0.11680124194204901, + "grad_norm": 230.94735717773438, + "learning_rate": 1.960749065551861e-06, + "loss": 22.4219, + "step": 12339 + }, + { + "epoch": 0.11681070796376407, + "grad_norm": 219.03610229492188, + "learning_rate": 1.9607405598929718e-06, + "loss": 19.5547, + "step": 12340 + }, + { + "epoch": 0.11682017398547913, + "grad_norm": 212.8760223388672, + "learning_rate": 1.9607320533310487e-06, + "loss": 22.9141, + "step": 12341 + }, + { + "epoch": 0.11682964000719417, + "grad_norm": 294.4521789550781, + "learning_rate": 1.9607235458661e-06, + "loss": 12.8047, + "step": 12342 + }, + { + "epoch": 0.11683910602890923, + "grad_norm": 407.64306640625, + "learning_rate": 1.9607150374981337e-06, + "loss": 21.8359, + "step": 12343 + }, + { + "epoch": 0.11684857205062428, + "grad_norm": 281.12567138671875, + "learning_rate": 1.960706528227158e-06, + "loss": 18.8516, + "step": 12344 + }, + { + "epoch": 0.11685803807233934, + "grad_norm": 642.4627685546875, + "learning_rate": 1.9606980180531805e-06, + "loss": 30.1094, + "step": 12345 + }, + { + "epoch": 0.11686750409405439, + "grad_norm": 301.5412902832031, + "learning_rate": 1.9606895069762093e-06, + "loss": 19.1797, + "step": 12346 + }, + { + "epoch": 0.11687697011576945, + "grad_norm": 446.1008605957031, + "learning_rate": 1.9606809949962526e-06, + "loss": 42.5938, + "step": 12347 + }, + { + "epoch": 0.1168864361374845, + "grad_norm": 226.6786651611328, + "learning_rate": 1.9606724821133182e-06, + "loss": 22.4531, + "step": 12348 + }, + { + "epoch": 0.11689590215919955, + "grad_norm": 183.2830810546875, + "learning_rate": 1.960663968327414e-06, + "loss": 12.3359, + "step": 12349 + }, + { + "epoch": 0.11690536818091461, + "grad_norm": 166.5606231689453, + "learning_rate": 1.9606554536385483e-06, + "loss": 15.8516, + "step": 12350 + }, + { + "epoch": 0.11691483420262966, + "grad_norm": 268.03277587890625, + "learning_rate": 1.9606469380467293e-06, + "loss": 34.9531, + "step": 12351 + }, + { + "epoch": 0.11692430022434472, + "grad_norm": 226.70071411132812, + "learning_rate": 1.960638421551964e-06, + "loss": 17.6641, + "step": 12352 + }, + { + "epoch": 0.11693376624605976, + "grad_norm": 374.13177490234375, + "learning_rate": 1.9606299041542616e-06, + "loss": 30.2656, + "step": 12353 + }, + { + "epoch": 0.11694323226777482, + "grad_norm": 214.8070831298828, + "learning_rate": 1.9606213858536295e-06, + "loss": 19.2578, + "step": 12354 + }, + { + "epoch": 0.11695269828948987, + "grad_norm": 1758.8787841796875, + "learning_rate": 1.960612866650076e-06, + "loss": 25.8477, + "step": 12355 + }, + { + "epoch": 0.11696216431120493, + "grad_norm": 327.05633544921875, + "learning_rate": 1.9606043465436086e-06, + "loss": 22.0391, + "step": 12356 + }, + { + "epoch": 0.11697163033291999, + "grad_norm": 1172.8447265625, + "learning_rate": 1.9605958255342354e-06, + "loss": 54.375, + "step": 12357 + }, + { + "epoch": 0.11698109635463504, + "grad_norm": 451.2493896484375, + "learning_rate": 1.960587303621965e-06, + "loss": 17.5273, + "step": 12358 + }, + { + "epoch": 0.1169905623763501, + "grad_norm": 404.15252685546875, + "learning_rate": 1.960578780806805e-06, + "loss": 50.8594, + "step": 12359 + }, + { + "epoch": 0.11700002839806514, + "grad_norm": 586.1253662109375, + "learning_rate": 1.960570257088763e-06, + "loss": 43.6992, + "step": 12360 + }, + { + "epoch": 0.1170094944197802, + "grad_norm": 234.57632446289062, + "learning_rate": 1.9605617324678476e-06, + "loss": 18.8281, + "step": 12361 + }, + { + "epoch": 0.11701896044149525, + "grad_norm": 370.0729064941406, + "learning_rate": 1.960553206944067e-06, + "loss": 46.3633, + "step": 12362 + }, + { + "epoch": 0.11702842646321031, + "grad_norm": 158.50160217285156, + "learning_rate": 1.9605446805174287e-06, + "loss": 13.7773, + "step": 12363 + }, + { + "epoch": 0.11703789248492535, + "grad_norm": 433.57208251953125, + "learning_rate": 1.960536153187941e-06, + "loss": 24.3125, + "step": 12364 + }, + { + "epoch": 0.11704735850664041, + "grad_norm": 318.03680419921875, + "learning_rate": 1.9605276249556115e-06, + "loss": 19.3594, + "step": 12365 + }, + { + "epoch": 0.11705682452835547, + "grad_norm": 444.23065185546875, + "learning_rate": 1.960519095820449e-06, + "loss": 50.8906, + "step": 12366 + }, + { + "epoch": 0.11706629055007052, + "grad_norm": 259.27960205078125, + "learning_rate": 1.9605105657824605e-06, + "loss": 23.9219, + "step": 12367 + }, + { + "epoch": 0.11707575657178558, + "grad_norm": 625.4547119140625, + "learning_rate": 1.960502034841655e-06, + "loss": 40.8359, + "step": 12368 + }, + { + "epoch": 0.11708522259350063, + "grad_norm": 494.7817687988281, + "learning_rate": 1.9604935029980395e-06, + "loss": 33.0234, + "step": 12369 + }, + { + "epoch": 0.11709468861521569, + "grad_norm": 294.5086364746094, + "learning_rate": 1.960484970251623e-06, + "loss": 17.6562, + "step": 12370 + }, + { + "epoch": 0.11710415463693073, + "grad_norm": 3.6723670959472656, + "learning_rate": 1.960476436602413e-06, + "loss": 0.9531, + "step": 12371 + }, + { + "epoch": 0.11711362065864579, + "grad_norm": 1389.734130859375, + "learning_rate": 1.960467902050418e-06, + "loss": 61.9062, + "step": 12372 + }, + { + "epoch": 0.11712308668036084, + "grad_norm": 235.50791931152344, + "learning_rate": 1.960459366595645e-06, + "loss": 28.8047, + "step": 12373 + }, + { + "epoch": 0.1171325527020759, + "grad_norm": 1134.1956787109375, + "learning_rate": 1.960450830238103e-06, + "loss": 45.7031, + "step": 12374 + }, + { + "epoch": 0.11714201872379096, + "grad_norm": 240.77517700195312, + "learning_rate": 1.9604422929777996e-06, + "loss": 14.2852, + "step": 12375 + }, + { + "epoch": 0.117151484745506, + "grad_norm": 385.39898681640625, + "learning_rate": 1.9604337548147434e-06, + "loss": 42.7422, + "step": 12376 + }, + { + "epoch": 0.11716095076722106, + "grad_norm": 188.64707946777344, + "learning_rate": 1.9604252157489414e-06, + "loss": 23.2031, + "step": 12377 + }, + { + "epoch": 0.11717041678893611, + "grad_norm": 183.63308715820312, + "learning_rate": 1.9604166757804023e-06, + "loss": 16.9531, + "step": 12378 + }, + { + "epoch": 0.11717988281065117, + "grad_norm": 494.67193603515625, + "learning_rate": 1.960408134909134e-06, + "loss": 47.1016, + "step": 12379 + }, + { + "epoch": 0.11718934883236622, + "grad_norm": 306.45196533203125, + "learning_rate": 1.9603995931351445e-06, + "loss": 17.1094, + "step": 12380 + }, + { + "epoch": 0.11719881485408128, + "grad_norm": 635.1378784179688, + "learning_rate": 1.960391050458442e-06, + "loss": 47.6094, + "step": 12381 + }, + { + "epoch": 0.11720828087579632, + "grad_norm": 513.6050415039062, + "learning_rate": 1.960382506879035e-06, + "loss": 37.2188, + "step": 12382 + }, + { + "epoch": 0.11721774689751138, + "grad_norm": 495.2779541015625, + "learning_rate": 1.96037396239693e-06, + "loss": 41.418, + "step": 12383 + }, + { + "epoch": 0.11722721291922644, + "grad_norm": 176.64419555664062, + "learning_rate": 1.960365417012136e-06, + "loss": 18.2188, + "step": 12384 + }, + { + "epoch": 0.11723667894094149, + "grad_norm": 279.817138671875, + "learning_rate": 1.960356870724661e-06, + "loss": 11.1992, + "step": 12385 + }, + { + "epoch": 0.11724614496265655, + "grad_norm": 233.22576904296875, + "learning_rate": 1.9603483235345135e-06, + "loss": 16.5391, + "step": 12386 + }, + { + "epoch": 0.1172556109843716, + "grad_norm": 842.42333984375, + "learning_rate": 1.9603397754417007e-06, + "loss": 25.0078, + "step": 12387 + }, + { + "epoch": 0.11726507700608665, + "grad_norm": 321.392822265625, + "learning_rate": 1.9603312264462314e-06, + "loss": 24.7578, + "step": 12388 + }, + { + "epoch": 0.1172745430278017, + "grad_norm": 315.1275329589844, + "learning_rate": 1.9603226765481127e-06, + "loss": 19.3047, + "step": 12389 + }, + { + "epoch": 0.11728400904951676, + "grad_norm": 731.4098510742188, + "learning_rate": 1.9603141257473533e-06, + "loss": 52.3906, + "step": 12390 + }, + { + "epoch": 0.11729347507123182, + "grad_norm": 163.94154357910156, + "learning_rate": 1.9603055740439612e-06, + "loss": 20.75, + "step": 12391 + }, + { + "epoch": 0.11730294109294687, + "grad_norm": 586.4993286132812, + "learning_rate": 1.960297021437944e-06, + "loss": 38.6328, + "step": 12392 + }, + { + "epoch": 0.11731240711466193, + "grad_norm": 447.51849365234375, + "learning_rate": 1.9602884679293106e-06, + "loss": 39.5312, + "step": 12393 + }, + { + "epoch": 0.11732187313637697, + "grad_norm": 233.42306518554688, + "learning_rate": 1.960279913518068e-06, + "loss": 11.9688, + "step": 12394 + }, + { + "epoch": 0.11733133915809203, + "grad_norm": 487.4021301269531, + "learning_rate": 1.9602713582042253e-06, + "loss": 27.4492, + "step": 12395 + }, + { + "epoch": 0.11734080517980708, + "grad_norm": 376.8360595703125, + "learning_rate": 1.9602628019877897e-06, + "loss": 18.8359, + "step": 12396 + }, + { + "epoch": 0.11735027120152214, + "grad_norm": 542.5848388671875, + "learning_rate": 1.9602542448687696e-06, + "loss": 49.0, + "step": 12397 + }, + { + "epoch": 0.11735973722323718, + "grad_norm": 232.25555419921875, + "learning_rate": 1.960245686847173e-06, + "loss": 18.9844, + "step": 12398 + }, + { + "epoch": 0.11736920324495224, + "grad_norm": 285.953857421875, + "learning_rate": 1.9602371279230076e-06, + "loss": 19.0781, + "step": 12399 + }, + { + "epoch": 0.1173786692666673, + "grad_norm": 419.6654052734375, + "learning_rate": 1.9602285680962824e-06, + "loss": 22.8125, + "step": 12400 + }, + { + "epoch": 0.11738813528838235, + "grad_norm": 316.36590576171875, + "learning_rate": 1.9602200073670043e-06, + "loss": 28.875, + "step": 12401 + }, + { + "epoch": 0.11739760131009741, + "grad_norm": 3.524275779724121, + "learning_rate": 1.9602114457351816e-06, + "loss": 0.8794, + "step": 12402 + }, + { + "epoch": 0.11740706733181246, + "grad_norm": 522.2174682617188, + "learning_rate": 1.9602028832008233e-06, + "loss": 32.3906, + "step": 12403 + }, + { + "epoch": 0.11741653335352752, + "grad_norm": 682.33984375, + "learning_rate": 1.960194319763936e-06, + "loss": 60.8516, + "step": 12404 + }, + { + "epoch": 0.11742599937524256, + "grad_norm": 829.5569458007812, + "learning_rate": 1.960185755424529e-06, + "loss": 53.8594, + "step": 12405 + }, + { + "epoch": 0.11743546539695762, + "grad_norm": 296.19146728515625, + "learning_rate": 1.96017719018261e-06, + "loss": 28.7891, + "step": 12406 + }, + { + "epoch": 0.11744493141867267, + "grad_norm": 406.997314453125, + "learning_rate": 1.9601686240381868e-06, + "loss": 25.0234, + "step": 12407 + }, + { + "epoch": 0.11745439744038773, + "grad_norm": 3.209547996520996, + "learning_rate": 1.9601600569912672e-06, + "loss": 0.8462, + "step": 12408 + }, + { + "epoch": 0.11746386346210279, + "grad_norm": 950.633544921875, + "learning_rate": 1.96015148904186e-06, + "loss": 44.8203, + "step": 12409 + }, + { + "epoch": 0.11747332948381783, + "grad_norm": 393.5982360839844, + "learning_rate": 1.960142920189973e-06, + "loss": 23.1797, + "step": 12410 + }, + { + "epoch": 0.1174827955055329, + "grad_norm": 237.84645080566406, + "learning_rate": 1.9601343504356138e-06, + "loss": 15.4648, + "step": 12411 + }, + { + "epoch": 0.11749226152724794, + "grad_norm": 387.9979553222656, + "learning_rate": 1.9601257797787906e-06, + "loss": 38.1875, + "step": 12412 + }, + { + "epoch": 0.117501727548963, + "grad_norm": 429.833251953125, + "learning_rate": 1.960117208219512e-06, + "loss": 27.3438, + "step": 12413 + }, + { + "epoch": 0.11751119357067805, + "grad_norm": 483.8321838378906, + "learning_rate": 1.9601086357577855e-06, + "loss": 35.3125, + "step": 12414 + }, + { + "epoch": 0.1175206595923931, + "grad_norm": 374.3832702636719, + "learning_rate": 1.9601000623936193e-06, + "loss": 12.2461, + "step": 12415 + }, + { + "epoch": 0.11753012561410815, + "grad_norm": 268.9271545410156, + "learning_rate": 1.9600914881270216e-06, + "loss": 27.0625, + "step": 12416 + }, + { + "epoch": 0.11753959163582321, + "grad_norm": 551.5211181640625, + "learning_rate": 1.960082912958e-06, + "loss": 31.375, + "step": 12417 + }, + { + "epoch": 0.11754905765753827, + "grad_norm": 2.946882963180542, + "learning_rate": 1.9600743368865637e-06, + "loss": 0.9072, + "step": 12418 + }, + { + "epoch": 0.11755852367925332, + "grad_norm": 364.9281311035156, + "learning_rate": 1.9600657599127196e-06, + "loss": 38.5469, + "step": 12419 + }, + { + "epoch": 0.11756798970096838, + "grad_norm": 379.7557067871094, + "learning_rate": 1.9600571820364758e-06, + "loss": 22.1484, + "step": 12420 + }, + { + "epoch": 0.11757745572268342, + "grad_norm": 353.2918701171875, + "learning_rate": 1.9600486032578407e-06, + "loss": 30.1094, + "step": 12421 + }, + { + "epoch": 0.11758692174439848, + "grad_norm": 3.765556812286377, + "learning_rate": 1.960040023576823e-06, + "loss": 1.0791, + "step": 12422 + }, + { + "epoch": 0.11759638776611353, + "grad_norm": 321.40576171875, + "learning_rate": 1.9600314429934297e-06, + "loss": 21.2969, + "step": 12423 + }, + { + "epoch": 0.11760585378782859, + "grad_norm": 408.2489929199219, + "learning_rate": 1.9600228615076693e-06, + "loss": 29.7422, + "step": 12424 + }, + { + "epoch": 0.11761531980954364, + "grad_norm": 537.9639282226562, + "learning_rate": 1.9600142791195503e-06, + "loss": 38.9844, + "step": 12425 + }, + { + "epoch": 0.1176247858312587, + "grad_norm": 175.30056762695312, + "learning_rate": 1.96000569582908e-06, + "loss": 20.0391, + "step": 12426 + }, + { + "epoch": 0.11763425185297376, + "grad_norm": 391.8290710449219, + "learning_rate": 1.9599971116362662e-06, + "loss": 45.8906, + "step": 12427 + }, + { + "epoch": 0.1176437178746888, + "grad_norm": 3.3886630535125732, + "learning_rate": 1.9599885265411185e-06, + "loss": 0.9143, + "step": 12428 + }, + { + "epoch": 0.11765318389640386, + "grad_norm": 560.2987670898438, + "learning_rate": 1.9599799405436435e-06, + "loss": 38.5, + "step": 12429 + }, + { + "epoch": 0.11766264991811891, + "grad_norm": 192.4230194091797, + "learning_rate": 1.95997135364385e-06, + "loss": 24.8594, + "step": 12430 + }, + { + "epoch": 0.11767211593983397, + "grad_norm": 232.3142547607422, + "learning_rate": 1.959962765841746e-06, + "loss": 21.6172, + "step": 12431 + }, + { + "epoch": 0.11768158196154901, + "grad_norm": 307.54107666015625, + "learning_rate": 1.959954177137339e-06, + "loss": 25.4336, + "step": 12432 + }, + { + "epoch": 0.11769104798326407, + "grad_norm": 201.4305419921875, + "learning_rate": 1.959945587530638e-06, + "loss": 19.1328, + "step": 12433 + }, + { + "epoch": 0.11770051400497913, + "grad_norm": 366.0924072265625, + "learning_rate": 1.9599369970216505e-06, + "loss": 21.8906, + "step": 12434 + }, + { + "epoch": 0.11770998002669418, + "grad_norm": 282.861328125, + "learning_rate": 1.9599284056103847e-06, + "loss": 23.8594, + "step": 12435 + }, + { + "epoch": 0.11771944604840924, + "grad_norm": 877.1126708984375, + "learning_rate": 1.9599198132968485e-06, + "loss": 44.7031, + "step": 12436 + }, + { + "epoch": 0.11772891207012429, + "grad_norm": 353.4941711425781, + "learning_rate": 1.95991122008105e-06, + "loss": 11.7383, + "step": 12437 + }, + { + "epoch": 0.11773837809183935, + "grad_norm": 408.1835021972656, + "learning_rate": 1.9599026259629974e-06, + "loss": 38.9297, + "step": 12438 + }, + { + "epoch": 0.11774784411355439, + "grad_norm": 2.9579129219055176, + "learning_rate": 1.959894030942699e-06, + "loss": 0.936, + "step": 12439 + }, + { + "epoch": 0.11775731013526945, + "grad_norm": 3.474785327911377, + "learning_rate": 1.9598854350201626e-06, + "loss": 0.938, + "step": 12440 + }, + { + "epoch": 0.1177667761569845, + "grad_norm": 3.377340078353882, + "learning_rate": 1.9598768381953966e-06, + "loss": 0.9202, + "step": 12441 + }, + { + "epoch": 0.11777624217869956, + "grad_norm": 461.2964782714844, + "learning_rate": 1.9598682404684084e-06, + "loss": 18.6641, + "step": 12442 + }, + { + "epoch": 0.11778570820041462, + "grad_norm": 314.5745544433594, + "learning_rate": 1.959859641839207e-06, + "loss": 29.5, + "step": 12443 + }, + { + "epoch": 0.11779517422212966, + "grad_norm": 234.4283447265625, + "learning_rate": 1.959851042307799e-06, + "loss": 22.9531, + "step": 12444 + }, + { + "epoch": 0.11780464024384472, + "grad_norm": 589.3839111328125, + "learning_rate": 1.9598424418741947e-06, + "loss": 14.9609, + "step": 12445 + }, + { + "epoch": 0.11781410626555977, + "grad_norm": 837.8843383789062, + "learning_rate": 1.9598338405384e-06, + "loss": 48.7344, + "step": 12446 + }, + { + "epoch": 0.11782357228727483, + "grad_norm": 175.9969024658203, + "learning_rate": 1.9598252383004245e-06, + "loss": 16.2891, + "step": 12447 + }, + { + "epoch": 0.11783303830898988, + "grad_norm": 474.00469970703125, + "learning_rate": 1.9598166351602753e-06, + "loss": 17.6797, + "step": 12448 + }, + { + "epoch": 0.11784250433070494, + "grad_norm": 318.4284973144531, + "learning_rate": 1.959808031117961e-06, + "loss": 21.7031, + "step": 12449 + }, + { + "epoch": 0.11785197035241998, + "grad_norm": 238.19793701171875, + "learning_rate": 1.95979942617349e-06, + "loss": 21.0078, + "step": 12450 + }, + { + "epoch": 0.11786143637413504, + "grad_norm": 389.4842834472656, + "learning_rate": 1.959790820326869e-06, + "loss": 26.1797, + "step": 12451 + }, + { + "epoch": 0.1178709023958501, + "grad_norm": 345.2535400390625, + "learning_rate": 1.959782213578108e-06, + "loss": 23.2344, + "step": 12452 + }, + { + "epoch": 0.11788036841756515, + "grad_norm": 256.04901123046875, + "learning_rate": 1.959773605927214e-06, + "loss": 16.9023, + "step": 12453 + }, + { + "epoch": 0.11788983443928021, + "grad_norm": 647.462890625, + "learning_rate": 1.9597649973741953e-06, + "loss": 18.6797, + "step": 12454 + }, + { + "epoch": 0.11789930046099525, + "grad_norm": 403.5340270996094, + "learning_rate": 1.9597563879190597e-06, + "loss": 21.6953, + "step": 12455 + }, + { + "epoch": 0.11790876648271031, + "grad_norm": 364.8856201171875, + "learning_rate": 1.9597477775618154e-06, + "loss": 48.0547, + "step": 12456 + }, + { + "epoch": 0.11791823250442536, + "grad_norm": 342.1200866699219, + "learning_rate": 1.959739166302471e-06, + "loss": 39.0156, + "step": 12457 + }, + { + "epoch": 0.11792769852614042, + "grad_norm": 255.42831420898438, + "learning_rate": 1.959730554141034e-06, + "loss": 19.6719, + "step": 12458 + }, + { + "epoch": 0.11793716454785547, + "grad_norm": 250.64439392089844, + "learning_rate": 1.9597219410775128e-06, + "loss": 20.3242, + "step": 12459 + }, + { + "epoch": 0.11794663056957053, + "grad_norm": 481.6292419433594, + "learning_rate": 1.9597133271119153e-06, + "loss": 23.6641, + "step": 12460 + }, + { + "epoch": 0.11795609659128559, + "grad_norm": 307.3537292480469, + "learning_rate": 1.9597047122442498e-06, + "loss": 13.6953, + "step": 12461 + }, + { + "epoch": 0.11796556261300063, + "grad_norm": 340.2870788574219, + "learning_rate": 1.9596960964745242e-06, + "loss": 22.8281, + "step": 12462 + }, + { + "epoch": 0.11797502863471569, + "grad_norm": 341.7785949707031, + "learning_rate": 1.959687479802747e-06, + "loss": 21.625, + "step": 12463 + }, + { + "epoch": 0.11798449465643074, + "grad_norm": 417.7615051269531, + "learning_rate": 1.9596788622289257e-06, + "loss": 23.8047, + "step": 12464 + }, + { + "epoch": 0.1179939606781458, + "grad_norm": 289.7916259765625, + "learning_rate": 1.959670243753069e-06, + "loss": 18.418, + "step": 12465 + }, + { + "epoch": 0.11800342669986084, + "grad_norm": 248.77560424804688, + "learning_rate": 1.959661624375184e-06, + "loss": 30.1875, + "step": 12466 + }, + { + "epoch": 0.1180128927215759, + "grad_norm": 270.22125244140625, + "learning_rate": 1.9596530040952805e-06, + "loss": 17.8672, + "step": 12467 + }, + { + "epoch": 0.11802235874329095, + "grad_norm": 376.4422607421875, + "learning_rate": 1.959644382913365e-06, + "loss": 38.0938, + "step": 12468 + }, + { + "epoch": 0.11803182476500601, + "grad_norm": 666.9505004882812, + "learning_rate": 1.959635760829446e-06, + "loss": 72.0625, + "step": 12469 + }, + { + "epoch": 0.11804129078672107, + "grad_norm": 326.1933288574219, + "learning_rate": 1.959627137843532e-06, + "loss": 20.4336, + "step": 12470 + }, + { + "epoch": 0.11805075680843612, + "grad_norm": 428.2121887207031, + "learning_rate": 1.959618513955631e-06, + "loss": 27.3828, + "step": 12471 + }, + { + "epoch": 0.11806022283015118, + "grad_norm": 211.4190673828125, + "learning_rate": 1.9596098891657512e-06, + "loss": 8.2344, + "step": 12472 + }, + { + "epoch": 0.11806968885186622, + "grad_norm": 502.3570861816406, + "learning_rate": 1.9596012634739004e-06, + "loss": 48.3125, + "step": 12473 + }, + { + "epoch": 0.11807915487358128, + "grad_norm": 484.7079772949219, + "learning_rate": 1.959592636880087e-06, + "loss": 53.2188, + "step": 12474 + }, + { + "epoch": 0.11808862089529633, + "grad_norm": 643.5562133789062, + "learning_rate": 1.9595840093843185e-06, + "loss": 45.7344, + "step": 12475 + }, + { + "epoch": 0.11809808691701139, + "grad_norm": 355.9582824707031, + "learning_rate": 1.959575380986604e-06, + "loss": 38.0156, + "step": 12476 + }, + { + "epoch": 0.11810755293872645, + "grad_norm": 803.7570190429688, + "learning_rate": 1.9595667516869505e-06, + "loss": 41.2422, + "step": 12477 + }, + { + "epoch": 0.1181170189604415, + "grad_norm": 177.93548583984375, + "learning_rate": 1.959558121485367e-06, + "loss": 20.6875, + "step": 12478 + }, + { + "epoch": 0.11812648498215655, + "grad_norm": 3.6072168350219727, + "learning_rate": 1.9595494903818613e-06, + "loss": 1.0024, + "step": 12479 + }, + { + "epoch": 0.1181359510038716, + "grad_norm": 789.9855346679688, + "learning_rate": 1.9595408583764416e-06, + "loss": 23.6328, + "step": 12480 + }, + { + "epoch": 0.11814541702558666, + "grad_norm": 772.53173828125, + "learning_rate": 1.9595322254691156e-06, + "loss": 74.9375, + "step": 12481 + }, + { + "epoch": 0.1181548830473017, + "grad_norm": 224.50225830078125, + "learning_rate": 1.959523591659892e-06, + "loss": 19.9297, + "step": 12482 + }, + { + "epoch": 0.11816434906901677, + "grad_norm": 219.3191680908203, + "learning_rate": 1.9595149569487786e-06, + "loss": 19.5469, + "step": 12483 + }, + { + "epoch": 0.11817381509073181, + "grad_norm": 333.986328125, + "learning_rate": 1.9595063213357833e-06, + "loss": 30.2969, + "step": 12484 + }, + { + "epoch": 0.11818328111244687, + "grad_norm": 308.228515625, + "learning_rate": 1.959497684820915e-06, + "loss": 20.3438, + "step": 12485 + }, + { + "epoch": 0.11819274713416193, + "grad_norm": 340.3928527832031, + "learning_rate": 1.959489047404181e-06, + "loss": 34.4062, + "step": 12486 + }, + { + "epoch": 0.11820221315587698, + "grad_norm": 246.6578369140625, + "learning_rate": 1.9594804090855893e-06, + "loss": 27.0391, + "step": 12487 + }, + { + "epoch": 0.11821167917759204, + "grad_norm": 442.4824523925781, + "learning_rate": 1.959471769865149e-06, + "loss": 14.9062, + "step": 12488 + }, + { + "epoch": 0.11822114519930708, + "grad_norm": 3.0629212856292725, + "learning_rate": 1.9594631297428674e-06, + "loss": 0.8789, + "step": 12489 + }, + { + "epoch": 0.11823061122102214, + "grad_norm": 428.0545959472656, + "learning_rate": 1.9594544887187533e-06, + "loss": 24.7422, + "step": 12490 + }, + { + "epoch": 0.11824007724273719, + "grad_norm": 241.12693786621094, + "learning_rate": 1.959445846792814e-06, + "loss": 13.4219, + "step": 12491 + }, + { + "epoch": 0.11824954326445225, + "grad_norm": 490.22406005859375, + "learning_rate": 1.959437203965058e-06, + "loss": 38.0547, + "step": 12492 + }, + { + "epoch": 0.1182590092861673, + "grad_norm": 2.8312249183654785, + "learning_rate": 1.9594285602354935e-06, + "loss": 0.97, + "step": 12493 + }, + { + "epoch": 0.11826847530788236, + "grad_norm": 262.7889709472656, + "learning_rate": 1.9594199156041286e-06, + "loss": 15.9375, + "step": 12494 + }, + { + "epoch": 0.11827794132959742, + "grad_norm": 226.67605590820312, + "learning_rate": 1.9594112700709715e-06, + "loss": 22.1641, + "step": 12495 + }, + { + "epoch": 0.11828740735131246, + "grad_norm": 3.186171054840088, + "learning_rate": 1.95940262363603e-06, + "loss": 0.9941, + "step": 12496 + }, + { + "epoch": 0.11829687337302752, + "grad_norm": 567.8453979492188, + "learning_rate": 1.9593939762993126e-06, + "loss": 39.5156, + "step": 12497 + }, + { + "epoch": 0.11830633939474257, + "grad_norm": 338.14703369140625, + "learning_rate": 1.959385328060827e-06, + "loss": 33.6406, + "step": 12498 + }, + { + "epoch": 0.11831580541645763, + "grad_norm": 656.4912109375, + "learning_rate": 1.9593766789205822e-06, + "loss": 39.5352, + "step": 12499 + }, + { + "epoch": 0.11832527143817267, + "grad_norm": 606.309326171875, + "learning_rate": 1.9593680288785852e-06, + "loss": 20.9453, + "step": 12500 + }, + { + "epoch": 0.11833473745988773, + "grad_norm": 252.27044677734375, + "learning_rate": 1.9593593779348446e-06, + "loss": 15.2188, + "step": 12501 + }, + { + "epoch": 0.11834420348160278, + "grad_norm": 616.5584106445312, + "learning_rate": 1.959350726089369e-06, + "loss": 21.3203, + "step": 12502 + }, + { + "epoch": 0.11835366950331784, + "grad_norm": 270.822998046875, + "learning_rate": 1.959342073342166e-06, + "loss": 22.2578, + "step": 12503 + }, + { + "epoch": 0.1183631355250329, + "grad_norm": 339.8631591796875, + "learning_rate": 1.9593334196932438e-06, + "loss": 12.6992, + "step": 12504 + }, + { + "epoch": 0.11837260154674795, + "grad_norm": 3.312018394470215, + "learning_rate": 1.9593247651426107e-06, + "loss": 1.1294, + "step": 12505 + }, + { + "epoch": 0.118382067568463, + "grad_norm": 257.2930603027344, + "learning_rate": 1.9593161096902746e-06, + "loss": 24.2188, + "step": 12506 + }, + { + "epoch": 0.11839153359017805, + "grad_norm": 852.0159912109375, + "learning_rate": 1.959307453336244e-06, + "loss": 37.0781, + "step": 12507 + }, + { + "epoch": 0.11840099961189311, + "grad_norm": 251.69371032714844, + "learning_rate": 1.9592987960805265e-06, + "loss": 23.1758, + "step": 12508 + }, + { + "epoch": 0.11841046563360816, + "grad_norm": 475.1164245605469, + "learning_rate": 1.9592901379231305e-06, + "loss": 20.9688, + "step": 12509 + }, + { + "epoch": 0.11841993165532322, + "grad_norm": 630.6978759765625, + "learning_rate": 1.959281478864064e-06, + "loss": 26.0078, + "step": 12510 + }, + { + "epoch": 0.11842939767703826, + "grad_norm": 267.9891052246094, + "learning_rate": 1.959272818903336e-06, + "loss": 18.9609, + "step": 12511 + }, + { + "epoch": 0.11843886369875332, + "grad_norm": 352.7592468261719, + "learning_rate": 1.959264158040954e-06, + "loss": 20.1641, + "step": 12512 + }, + { + "epoch": 0.11844832972046838, + "grad_norm": 712.119140625, + "learning_rate": 1.959255496276925e-06, + "loss": 38.9844, + "step": 12513 + }, + { + "epoch": 0.11845779574218343, + "grad_norm": 708.5888061523438, + "learning_rate": 1.959246833611259e-06, + "loss": 43.8828, + "step": 12514 + }, + { + "epoch": 0.11846726176389849, + "grad_norm": 296.8065185546875, + "learning_rate": 1.959238170043963e-06, + "loss": 24.9844, + "step": 12515 + }, + { + "epoch": 0.11847672778561354, + "grad_norm": 188.07882690429688, + "learning_rate": 1.9592295055750464e-06, + "loss": 19.8125, + "step": 12516 + }, + { + "epoch": 0.1184861938073286, + "grad_norm": 387.41314697265625, + "learning_rate": 1.9592208402045157e-06, + "loss": 16.7734, + "step": 12517 + }, + { + "epoch": 0.11849565982904364, + "grad_norm": 559.8042602539062, + "learning_rate": 1.95921217393238e-06, + "loss": 27.8438, + "step": 12518 + }, + { + "epoch": 0.1185051258507587, + "grad_norm": 315.2022399902344, + "learning_rate": 1.9592035067586476e-06, + "loss": 32.5156, + "step": 12519 + }, + { + "epoch": 0.11851459187247376, + "grad_norm": 236.75318908691406, + "learning_rate": 1.9591948386833255e-06, + "loss": 15.6758, + "step": 12520 + }, + { + "epoch": 0.11852405789418881, + "grad_norm": 251.50946044921875, + "learning_rate": 1.9591861697064233e-06, + "loss": 15.4922, + "step": 12521 + }, + { + "epoch": 0.11853352391590387, + "grad_norm": 348.7432556152344, + "learning_rate": 1.959177499827948e-06, + "loss": 33.4688, + "step": 12522 + }, + { + "epoch": 0.11854298993761891, + "grad_norm": 1606.3087158203125, + "learning_rate": 1.9591688290479087e-06, + "loss": 63.625, + "step": 12523 + }, + { + "epoch": 0.11855245595933397, + "grad_norm": 241.6333770751953, + "learning_rate": 1.959160157366313e-06, + "loss": 22.7031, + "step": 12524 + }, + { + "epoch": 0.11856192198104902, + "grad_norm": 403.9260559082031, + "learning_rate": 1.959151484783169e-06, + "loss": 30.7188, + "step": 12525 + }, + { + "epoch": 0.11857138800276408, + "grad_norm": 346.64520263671875, + "learning_rate": 1.9591428112984855e-06, + "loss": 21.4688, + "step": 12526 + }, + { + "epoch": 0.11858085402447913, + "grad_norm": 267.21246337890625, + "learning_rate": 1.9591341369122695e-06, + "loss": 17.2734, + "step": 12527 + }, + { + "epoch": 0.11859032004619419, + "grad_norm": 291.94482421875, + "learning_rate": 1.9591254616245305e-06, + "loss": 36.8125, + "step": 12528 + }, + { + "epoch": 0.11859978606790925, + "grad_norm": 216.09115600585938, + "learning_rate": 1.9591167854352752e-06, + "loss": 11.1016, + "step": 12529 + }, + { + "epoch": 0.11860925208962429, + "grad_norm": 3.05702543258667, + "learning_rate": 1.959108108344513e-06, + "loss": 0.9248, + "step": 12530 + }, + { + "epoch": 0.11861871811133935, + "grad_norm": 400.4447326660156, + "learning_rate": 1.959099430352251e-06, + "loss": 21.0664, + "step": 12531 + }, + { + "epoch": 0.1186281841330544, + "grad_norm": 3.297555685043335, + "learning_rate": 1.9590907514584985e-06, + "loss": 0.8975, + "step": 12532 + }, + { + "epoch": 0.11863765015476946, + "grad_norm": 213.8272705078125, + "learning_rate": 1.959082071663263e-06, + "loss": 13.8203, + "step": 12533 + }, + { + "epoch": 0.1186471161764845, + "grad_norm": 590.6350708007812, + "learning_rate": 1.959073390966553e-06, + "loss": 60.9531, + "step": 12534 + }, + { + "epoch": 0.11865658219819956, + "grad_norm": 222.63308715820312, + "learning_rate": 1.9590647093683757e-06, + "loss": 24.3125, + "step": 12535 + }, + { + "epoch": 0.11866604821991461, + "grad_norm": 367.02911376953125, + "learning_rate": 1.9590560268687402e-06, + "loss": 16.875, + "step": 12536 + }, + { + "epoch": 0.11867551424162967, + "grad_norm": 3.512831211090088, + "learning_rate": 1.959047343467655e-06, + "loss": 0.9253, + "step": 12537 + }, + { + "epoch": 0.11868498026334473, + "grad_norm": 297.1220703125, + "learning_rate": 1.959038659165127e-06, + "loss": 13.5117, + "step": 12538 + }, + { + "epoch": 0.11869444628505978, + "grad_norm": 3.5804595947265625, + "learning_rate": 1.9590299739611655e-06, + "loss": 0.9209, + "step": 12539 + }, + { + "epoch": 0.11870391230677484, + "grad_norm": 995.9786987304688, + "learning_rate": 1.9590212878557778e-06, + "loss": 27.1016, + "step": 12540 + }, + { + "epoch": 0.11871337832848988, + "grad_norm": 437.1162109375, + "learning_rate": 1.959012600848973e-06, + "loss": 29.5, + "step": 12541 + }, + { + "epoch": 0.11872284435020494, + "grad_norm": 240.74998474121094, + "learning_rate": 1.9590039129407584e-06, + "loss": 18.2148, + "step": 12542 + }, + { + "epoch": 0.11873231037191999, + "grad_norm": 4.232676029205322, + "learning_rate": 1.9589952241311426e-06, + "loss": 1.1045, + "step": 12543 + }, + { + "epoch": 0.11874177639363505, + "grad_norm": 2.8563342094421387, + "learning_rate": 1.9589865344201337e-06, + "loss": 1.0029, + "step": 12544 + }, + { + "epoch": 0.1187512424153501, + "grad_norm": 510.369140625, + "learning_rate": 1.9589778438077395e-06, + "loss": 45.0625, + "step": 12545 + }, + { + "epoch": 0.11876070843706515, + "grad_norm": 604.0240478515625, + "learning_rate": 1.958969152293969e-06, + "loss": 34.625, + "step": 12546 + }, + { + "epoch": 0.11877017445878021, + "grad_norm": 388.9475402832031, + "learning_rate": 1.9589604598788293e-06, + "loss": 17.2539, + "step": 12547 + }, + { + "epoch": 0.11877964048049526, + "grad_norm": 298.2950134277344, + "learning_rate": 1.95895176656233e-06, + "loss": 15.8203, + "step": 12548 + }, + { + "epoch": 0.11878910650221032, + "grad_norm": 241.49867248535156, + "learning_rate": 1.9589430723444777e-06, + "loss": 29.8438, + "step": 12549 + }, + { + "epoch": 0.11879857252392537, + "grad_norm": 541.2489013671875, + "learning_rate": 1.9589343772252816e-06, + "loss": 38.6797, + "step": 12550 + }, + { + "epoch": 0.11880803854564043, + "grad_norm": 431.486083984375, + "learning_rate": 1.9589256812047493e-06, + "loss": 34.1719, + "step": 12551 + }, + { + "epoch": 0.11881750456735547, + "grad_norm": 212.97634887695312, + "learning_rate": 1.9589169842828897e-06, + "loss": 19.5312, + "step": 12552 + }, + { + "epoch": 0.11882697058907053, + "grad_norm": 207.6826171875, + "learning_rate": 1.9589082864597104e-06, + "loss": 12.875, + "step": 12553 + }, + { + "epoch": 0.11883643661078558, + "grad_norm": 415.3200988769531, + "learning_rate": 1.9588995877352194e-06, + "loss": 57.9531, + "step": 12554 + }, + { + "epoch": 0.11884590263250064, + "grad_norm": 702.1846313476562, + "learning_rate": 1.958890888109425e-06, + "loss": 56.9062, + "step": 12555 + }, + { + "epoch": 0.1188553686542157, + "grad_norm": 560.6973266601562, + "learning_rate": 1.9588821875823363e-06, + "loss": 41.2344, + "step": 12556 + }, + { + "epoch": 0.11886483467593074, + "grad_norm": 404.33599853515625, + "learning_rate": 1.9588734861539603e-06, + "loss": 39.7578, + "step": 12557 + }, + { + "epoch": 0.1188743006976458, + "grad_norm": 473.55450439453125, + "learning_rate": 1.9588647838243056e-06, + "loss": 18.4375, + "step": 12558 + }, + { + "epoch": 0.11888376671936085, + "grad_norm": 634.216552734375, + "learning_rate": 1.9588560805933804e-06, + "loss": 30.8906, + "step": 12559 + }, + { + "epoch": 0.11889323274107591, + "grad_norm": 753.8851318359375, + "learning_rate": 1.9588473764611926e-06, + "loss": 45.3906, + "step": 12560 + }, + { + "epoch": 0.11890269876279096, + "grad_norm": 374.0474853515625, + "learning_rate": 1.9588386714277508e-06, + "loss": 20.9297, + "step": 12561 + }, + { + "epoch": 0.11891216478450602, + "grad_norm": 316.0805358886719, + "learning_rate": 1.9588299654930633e-06, + "loss": 21.5, + "step": 12562 + }, + { + "epoch": 0.11892163080622108, + "grad_norm": 593.03564453125, + "learning_rate": 1.958821258657138e-06, + "loss": 47.9688, + "step": 12563 + }, + { + "epoch": 0.11893109682793612, + "grad_norm": 418.44781494140625, + "learning_rate": 1.958812550919983e-06, + "loss": 29.5781, + "step": 12564 + }, + { + "epoch": 0.11894056284965118, + "grad_norm": 474.8135070800781, + "learning_rate": 1.9588038422816062e-06, + "loss": 23.7734, + "step": 12565 + }, + { + "epoch": 0.11895002887136623, + "grad_norm": 317.1666564941406, + "learning_rate": 1.9587951327420164e-06, + "loss": 41.0938, + "step": 12566 + }, + { + "epoch": 0.11895949489308129, + "grad_norm": 193.60215759277344, + "learning_rate": 1.9587864223012217e-06, + "loss": 21.9531, + "step": 12567 + }, + { + "epoch": 0.11896896091479633, + "grad_norm": 242.1050567626953, + "learning_rate": 1.95877771095923e-06, + "loss": 16.4805, + "step": 12568 + }, + { + "epoch": 0.1189784269365114, + "grad_norm": 3.840545892715454, + "learning_rate": 1.9587689987160497e-06, + "loss": 1.0161, + "step": 12569 + }, + { + "epoch": 0.11898789295822644, + "grad_norm": 250.2222442626953, + "learning_rate": 1.958760285571689e-06, + "loss": 22.9766, + "step": 12570 + }, + { + "epoch": 0.1189973589799415, + "grad_norm": 259.39788818359375, + "learning_rate": 1.9587515715261557e-06, + "loss": 19.9062, + "step": 12571 + }, + { + "epoch": 0.11900682500165656, + "grad_norm": 198.34207153320312, + "learning_rate": 1.9587428565794582e-06, + "loss": 17.1797, + "step": 12572 + }, + { + "epoch": 0.1190162910233716, + "grad_norm": 3.1476030349731445, + "learning_rate": 1.9587341407316054e-06, + "loss": 0.9268, + "step": 12573 + }, + { + "epoch": 0.11902575704508667, + "grad_norm": 230.71533203125, + "learning_rate": 1.9587254239826044e-06, + "loss": 19.0, + "step": 12574 + }, + { + "epoch": 0.11903522306680171, + "grad_norm": 4.077130317687988, + "learning_rate": 1.958716706332464e-06, + "loss": 0.9453, + "step": 12575 + }, + { + "epoch": 0.11904468908851677, + "grad_norm": 523.7610473632812, + "learning_rate": 1.9587079877811925e-06, + "loss": 21.9453, + "step": 12576 + }, + { + "epoch": 0.11905415511023182, + "grad_norm": 209.10943603515625, + "learning_rate": 1.9586992683287973e-06, + "loss": 23.1719, + "step": 12577 + }, + { + "epoch": 0.11906362113194688, + "grad_norm": 559.7361450195312, + "learning_rate": 1.9586905479752874e-06, + "loss": 48.1562, + "step": 12578 + }, + { + "epoch": 0.11907308715366192, + "grad_norm": 690.4771118164062, + "learning_rate": 1.9586818267206712e-06, + "loss": 37.6758, + "step": 12579 + }, + { + "epoch": 0.11908255317537698, + "grad_norm": 863.7325439453125, + "learning_rate": 1.958673104564956e-06, + "loss": 53.5195, + "step": 12580 + }, + { + "epoch": 0.11909201919709204, + "grad_norm": 240.59878540039062, + "learning_rate": 1.9586643815081506e-06, + "loss": 20.6719, + "step": 12581 + }, + { + "epoch": 0.11910148521880709, + "grad_norm": 2239.25732421875, + "learning_rate": 1.958655657550263e-06, + "loss": 19.1797, + "step": 12582 + }, + { + "epoch": 0.11911095124052215, + "grad_norm": 283.17095947265625, + "learning_rate": 1.9586469326913015e-06, + "loss": 21.1875, + "step": 12583 + }, + { + "epoch": 0.1191204172622372, + "grad_norm": 350.9524841308594, + "learning_rate": 1.958638206931274e-06, + "loss": 15.8047, + "step": 12584 + }, + { + "epoch": 0.11912988328395226, + "grad_norm": 513.2554321289062, + "learning_rate": 1.958629480270189e-06, + "loss": 42.1719, + "step": 12585 + }, + { + "epoch": 0.1191393493056673, + "grad_norm": 463.993408203125, + "learning_rate": 1.958620752708055e-06, + "loss": 46.7031, + "step": 12586 + }, + { + "epoch": 0.11914881532738236, + "grad_norm": 215.9068603515625, + "learning_rate": 1.9586120242448795e-06, + "loss": 17.9102, + "step": 12587 + }, + { + "epoch": 0.11915828134909741, + "grad_norm": 599.3005981445312, + "learning_rate": 1.9586032948806712e-06, + "loss": 38.1797, + "step": 12588 + }, + { + "epoch": 0.11916774737081247, + "grad_norm": 438.133544921875, + "learning_rate": 1.958594564615438e-06, + "loss": 47.8281, + "step": 12589 + }, + { + "epoch": 0.11917721339252753, + "grad_norm": 414.16058349609375, + "learning_rate": 1.9585858334491883e-06, + "loss": 25.2812, + "step": 12590 + }, + { + "epoch": 0.11918667941424257, + "grad_norm": 278.4576110839844, + "learning_rate": 1.9585771013819306e-06, + "loss": 21.168, + "step": 12591 + }, + { + "epoch": 0.11919614543595763, + "grad_norm": 264.0628356933594, + "learning_rate": 1.9585683684136725e-06, + "loss": 20.5781, + "step": 12592 + }, + { + "epoch": 0.11920561145767268, + "grad_norm": 331.333251953125, + "learning_rate": 1.958559634544423e-06, + "loss": 17.4844, + "step": 12593 + }, + { + "epoch": 0.11921507747938774, + "grad_norm": 3.0175671577453613, + "learning_rate": 1.958550899774189e-06, + "loss": 0.8643, + "step": 12594 + }, + { + "epoch": 0.11922454350110279, + "grad_norm": 371.89935302734375, + "learning_rate": 1.95854216410298e-06, + "loss": 47.3438, + "step": 12595 + }, + { + "epoch": 0.11923400952281785, + "grad_norm": 1402.785888671875, + "learning_rate": 1.9585334275308036e-06, + "loss": 88.5312, + "step": 12596 + }, + { + "epoch": 0.11924347554453289, + "grad_norm": 450.6885986328125, + "learning_rate": 1.958524690057668e-06, + "loss": 23.1016, + "step": 12597 + }, + { + "epoch": 0.11925294156624795, + "grad_norm": 258.53363037109375, + "learning_rate": 1.9585159516835818e-06, + "loss": 22.7188, + "step": 12598 + }, + { + "epoch": 0.11926240758796301, + "grad_norm": 638.29443359375, + "learning_rate": 1.958507212408553e-06, + "loss": 51.5469, + "step": 12599 + }, + { + "epoch": 0.11927187360967806, + "grad_norm": 290.3292541503906, + "learning_rate": 1.9584984722325892e-06, + "loss": 27.0938, + "step": 12600 + }, + { + "epoch": 0.11928133963139312, + "grad_norm": 239.4757537841797, + "learning_rate": 1.9584897311557e-06, + "loss": 16.3516, + "step": 12601 + }, + { + "epoch": 0.11929080565310816, + "grad_norm": 572.6786499023438, + "learning_rate": 1.9584809891778925e-06, + "loss": 23.9062, + "step": 12602 + }, + { + "epoch": 0.11930027167482322, + "grad_norm": 457.0514831542969, + "learning_rate": 1.9584722462991753e-06, + "loss": 50.4375, + "step": 12603 + }, + { + "epoch": 0.11930973769653827, + "grad_norm": 305.94866943359375, + "learning_rate": 1.958463502519556e-06, + "loss": 20.2852, + "step": 12604 + }, + { + "epoch": 0.11931920371825333, + "grad_norm": 363.7744445800781, + "learning_rate": 1.9584547578390442e-06, + "loss": 20.2734, + "step": 12605 + }, + { + "epoch": 0.11932866973996838, + "grad_norm": 207.93844604492188, + "learning_rate": 1.958446012257647e-06, + "loss": 28.0625, + "step": 12606 + }, + { + "epoch": 0.11933813576168344, + "grad_norm": 950.7186279296875, + "learning_rate": 1.9584372657753724e-06, + "loss": 66.9609, + "step": 12607 + }, + { + "epoch": 0.1193476017833985, + "grad_norm": 360.4522705078125, + "learning_rate": 1.9584285183922293e-06, + "loss": 17.5156, + "step": 12608 + }, + { + "epoch": 0.11935706780511354, + "grad_norm": 363.8592529296875, + "learning_rate": 1.958419770108226e-06, + "loss": 36.8281, + "step": 12609 + }, + { + "epoch": 0.1193665338268286, + "grad_norm": 425.701904296875, + "learning_rate": 1.9584110209233704e-06, + "loss": 32.2031, + "step": 12610 + }, + { + "epoch": 0.11937599984854365, + "grad_norm": 242.68923950195312, + "learning_rate": 1.958402270837671e-06, + "loss": 19.2344, + "step": 12611 + }, + { + "epoch": 0.11938546587025871, + "grad_norm": 318.7239074707031, + "learning_rate": 1.958393519851135e-06, + "loss": 18.9375, + "step": 12612 + }, + { + "epoch": 0.11939493189197375, + "grad_norm": 296.7611999511719, + "learning_rate": 1.9583847679637724e-06, + "loss": 28.4844, + "step": 12613 + }, + { + "epoch": 0.11940439791368881, + "grad_norm": 461.30426025390625, + "learning_rate": 1.95837601517559e-06, + "loss": 29.6641, + "step": 12614 + }, + { + "epoch": 0.11941386393540387, + "grad_norm": 384.6173400878906, + "learning_rate": 1.9583672614865966e-06, + "loss": 42.4219, + "step": 12615 + }, + { + "epoch": 0.11942332995711892, + "grad_norm": 173.589111328125, + "learning_rate": 1.9583585068968e-06, + "loss": 21.5078, + "step": 12616 + }, + { + "epoch": 0.11943279597883398, + "grad_norm": 207.0529327392578, + "learning_rate": 1.958349751406209e-06, + "loss": 19.6172, + "step": 12617 + }, + { + "epoch": 0.11944226200054903, + "grad_norm": 233.64682006835938, + "learning_rate": 1.958340995014832e-06, + "loss": 15.1016, + "step": 12618 + }, + { + "epoch": 0.11945172802226409, + "grad_norm": 272.8251037597656, + "learning_rate": 1.958332237722676e-06, + "loss": 21.5781, + "step": 12619 + }, + { + "epoch": 0.11946119404397913, + "grad_norm": 428.06756591796875, + "learning_rate": 1.9583234795297506e-06, + "loss": 31.0938, + "step": 12620 + }, + { + "epoch": 0.11947066006569419, + "grad_norm": 360.3064880371094, + "learning_rate": 1.958314720436063e-06, + "loss": 23.3594, + "step": 12621 + }, + { + "epoch": 0.11948012608740924, + "grad_norm": 686.0059814453125, + "learning_rate": 1.9583059604416226e-06, + "loss": 28.9609, + "step": 12622 + }, + { + "epoch": 0.1194895921091243, + "grad_norm": 694.9517211914062, + "learning_rate": 1.9582971995464367e-06, + "loss": 34.6406, + "step": 12623 + }, + { + "epoch": 0.11949905813083936, + "grad_norm": 3.250584363937378, + "learning_rate": 1.9582884377505134e-06, + "loss": 0.9097, + "step": 12624 + }, + { + "epoch": 0.1195085241525544, + "grad_norm": 330.4075622558594, + "learning_rate": 1.9582796750538616e-06, + "loss": 17.1562, + "step": 12625 + }, + { + "epoch": 0.11951799017426946, + "grad_norm": 298.5489807128906, + "learning_rate": 1.9582709114564893e-06, + "loss": 17.5391, + "step": 12626 + }, + { + "epoch": 0.11952745619598451, + "grad_norm": 359.3769226074219, + "learning_rate": 1.9582621469584047e-06, + "loss": 23.4219, + "step": 12627 + }, + { + "epoch": 0.11953692221769957, + "grad_norm": 263.82977294921875, + "learning_rate": 1.9582533815596158e-06, + "loss": 38.4844, + "step": 12628 + }, + { + "epoch": 0.11954638823941462, + "grad_norm": 565.5381469726562, + "learning_rate": 1.9582446152601313e-06, + "loss": 40.9688, + "step": 12629 + }, + { + "epoch": 0.11955585426112968, + "grad_norm": 157.33409118652344, + "learning_rate": 1.958235848059959e-06, + "loss": 21.2734, + "step": 12630 + }, + { + "epoch": 0.11956532028284472, + "grad_norm": 283.2381591796875, + "learning_rate": 1.9582270799591075e-06, + "loss": 18.1016, + "step": 12631 + }, + { + "epoch": 0.11957478630455978, + "grad_norm": 310.2468566894531, + "learning_rate": 1.9582183109575846e-06, + "loss": 21.7266, + "step": 12632 + }, + { + "epoch": 0.11958425232627484, + "grad_norm": 497.8782653808594, + "learning_rate": 1.9582095410553993e-06, + "loss": 54.0, + "step": 12633 + }, + { + "epoch": 0.11959371834798989, + "grad_norm": 159.71453857421875, + "learning_rate": 1.9582007702525587e-06, + "loss": 13.1406, + "step": 12634 + }, + { + "epoch": 0.11960318436970495, + "grad_norm": 684.3792724609375, + "learning_rate": 1.9581919985490723e-06, + "loss": 62.8359, + "step": 12635 + }, + { + "epoch": 0.11961265039142, + "grad_norm": 442.0411071777344, + "learning_rate": 1.9581832259449476e-06, + "loss": 19.6172, + "step": 12636 + }, + { + "epoch": 0.11962211641313505, + "grad_norm": 3.187875986099243, + "learning_rate": 1.9581744524401927e-06, + "loss": 0.9219, + "step": 12637 + }, + { + "epoch": 0.1196315824348501, + "grad_norm": 939.1146850585938, + "learning_rate": 1.9581656780348164e-06, + "loss": 45.6875, + "step": 12638 + }, + { + "epoch": 0.11964104845656516, + "grad_norm": 3.0466225147247314, + "learning_rate": 1.958156902728827e-06, + "loss": 0.9688, + "step": 12639 + }, + { + "epoch": 0.1196505144782802, + "grad_norm": 722.2459106445312, + "learning_rate": 1.958148126522232e-06, + "loss": 35.75, + "step": 12640 + }, + { + "epoch": 0.11965998049999527, + "grad_norm": 485.54193115234375, + "learning_rate": 1.95813934941504e-06, + "loss": 20.7656, + "step": 12641 + }, + { + "epoch": 0.11966944652171033, + "grad_norm": 385.2447814941406, + "learning_rate": 1.9581305714072598e-06, + "loss": 40.8516, + "step": 12642 + }, + { + "epoch": 0.11967891254342537, + "grad_norm": 268.1156921386719, + "learning_rate": 1.958121792498899e-06, + "loss": 25.0312, + "step": 12643 + }, + { + "epoch": 0.11968837856514043, + "grad_norm": 378.1156311035156, + "learning_rate": 1.958113012689966e-06, + "loss": 9.6367, + "step": 12644 + }, + { + "epoch": 0.11969784458685548, + "grad_norm": 659.4326171875, + "learning_rate": 1.958104231980469e-06, + "loss": 19.2344, + "step": 12645 + }, + { + "epoch": 0.11970731060857054, + "grad_norm": 277.9979248046875, + "learning_rate": 1.9580954503704166e-06, + "loss": 31.3906, + "step": 12646 + }, + { + "epoch": 0.11971677663028558, + "grad_norm": 786.9117431640625, + "learning_rate": 1.9580866678598163e-06, + "loss": 22.6953, + "step": 12647 + }, + { + "epoch": 0.11972624265200064, + "grad_norm": 487.9312744140625, + "learning_rate": 1.9580778844486778e-06, + "loss": 15.7188, + "step": 12648 + }, + { + "epoch": 0.11973570867371569, + "grad_norm": 232.4349822998047, + "learning_rate": 1.9580691001370077e-06, + "loss": 28.7969, + "step": 12649 + }, + { + "epoch": 0.11974517469543075, + "grad_norm": 448.3601989746094, + "learning_rate": 1.958060314924815e-06, + "loss": 22.875, + "step": 12650 + }, + { + "epoch": 0.11975464071714581, + "grad_norm": 2662.25048828125, + "learning_rate": 1.958051528812108e-06, + "loss": 21.4531, + "step": 12651 + }, + { + "epoch": 0.11976410673886086, + "grad_norm": 471.4386291503906, + "learning_rate": 1.958042741798895e-06, + "loss": 36.2969, + "step": 12652 + }, + { + "epoch": 0.11977357276057592, + "grad_norm": 313.09295654296875, + "learning_rate": 1.958033953885184e-06, + "loss": 44.0781, + "step": 12653 + }, + { + "epoch": 0.11978303878229096, + "grad_norm": 271.98748779296875, + "learning_rate": 1.9580251650709837e-06, + "loss": 22.8867, + "step": 12654 + }, + { + "epoch": 0.11979250480400602, + "grad_norm": 346.9518127441406, + "learning_rate": 1.958016375356302e-06, + "loss": 25.1562, + "step": 12655 + }, + { + "epoch": 0.11980197082572107, + "grad_norm": 2.9797236919403076, + "learning_rate": 1.9580075847411468e-06, + "loss": 1.0059, + "step": 12656 + }, + { + "epoch": 0.11981143684743613, + "grad_norm": 3.3108537197113037, + "learning_rate": 1.9579987932255267e-06, + "loss": 0.8875, + "step": 12657 + }, + { + "epoch": 0.11982090286915119, + "grad_norm": 409.4576110839844, + "learning_rate": 1.957990000809451e-06, + "loss": 42.6875, + "step": 12658 + }, + { + "epoch": 0.11983036889086623, + "grad_norm": 273.18621826171875, + "learning_rate": 1.957981207492926e-06, + "loss": 29.0938, + "step": 12659 + }, + { + "epoch": 0.1198398349125813, + "grad_norm": 523.5150146484375, + "learning_rate": 1.9579724132759616e-06, + "loss": 48.1094, + "step": 12660 + }, + { + "epoch": 0.11984930093429634, + "grad_norm": 195.8870086669922, + "learning_rate": 1.957963618158565e-06, + "loss": 23.6641, + "step": 12661 + }, + { + "epoch": 0.1198587669560114, + "grad_norm": 3.6095056533813477, + "learning_rate": 1.9579548221407455e-06, + "loss": 0.8057, + "step": 12662 + }, + { + "epoch": 0.11986823297772645, + "grad_norm": 285.04339599609375, + "learning_rate": 1.9579460252225104e-06, + "loss": 18.3047, + "step": 12663 + }, + { + "epoch": 0.1198776989994415, + "grad_norm": 289.2338562011719, + "learning_rate": 1.9579372274038687e-06, + "loss": 20.9062, + "step": 12664 + }, + { + "epoch": 0.11988716502115655, + "grad_norm": 348.8600769042969, + "learning_rate": 1.957928428684828e-06, + "loss": 27.8125, + "step": 12665 + }, + { + "epoch": 0.11989663104287161, + "grad_norm": 374.7373352050781, + "learning_rate": 1.957919629065397e-06, + "loss": 39.875, + "step": 12666 + }, + { + "epoch": 0.11990609706458667, + "grad_norm": 298.4096984863281, + "learning_rate": 1.9579108285455835e-06, + "loss": 19.2031, + "step": 12667 + }, + { + "epoch": 0.11991556308630172, + "grad_norm": 364.70806884765625, + "learning_rate": 1.9579020271253966e-06, + "loss": 36.7969, + "step": 12668 + }, + { + "epoch": 0.11992502910801678, + "grad_norm": 253.83250427246094, + "learning_rate": 1.9578932248048443e-06, + "loss": 9.5078, + "step": 12669 + }, + { + "epoch": 0.11993449512973182, + "grad_norm": 694.76708984375, + "learning_rate": 1.957884421583934e-06, + "loss": 52.625, + "step": 12670 + }, + { + "epoch": 0.11994396115144688, + "grad_norm": 259.13482666015625, + "learning_rate": 1.957875617462675e-06, + "loss": 25.1875, + "step": 12671 + }, + { + "epoch": 0.11995342717316193, + "grad_norm": 706.8151245117188, + "learning_rate": 1.9578668124410753e-06, + "loss": 28.0078, + "step": 12672 + }, + { + "epoch": 0.11996289319487699, + "grad_norm": 506.1554870605469, + "learning_rate": 1.9578580065191434e-06, + "loss": 26.3125, + "step": 12673 + }, + { + "epoch": 0.11997235921659204, + "grad_norm": 355.3739929199219, + "learning_rate": 1.9578491996968866e-06, + "loss": 25.2188, + "step": 12674 + }, + { + "epoch": 0.1199818252383071, + "grad_norm": 487.9864807128906, + "learning_rate": 1.9578403919743143e-06, + "loss": 37.9062, + "step": 12675 + }, + { + "epoch": 0.11999129126002216, + "grad_norm": 226.1724090576172, + "learning_rate": 1.9578315833514344e-06, + "loss": 14.6992, + "step": 12676 + }, + { + "epoch": 0.1200007572817372, + "grad_norm": 528.4386596679688, + "learning_rate": 1.9578227738282547e-06, + "loss": 41.0938, + "step": 12677 + }, + { + "epoch": 0.12001022330345226, + "grad_norm": 229.46368408203125, + "learning_rate": 1.9578139634047844e-06, + "loss": 25.4453, + "step": 12678 + }, + { + "epoch": 0.12001968932516731, + "grad_norm": 2.8634090423583984, + "learning_rate": 1.957805152081031e-06, + "loss": 0.8379, + "step": 12679 + }, + { + "epoch": 0.12002915534688237, + "grad_norm": 290.8581848144531, + "learning_rate": 1.957796339857003e-06, + "loss": 19.8516, + "step": 12680 + }, + { + "epoch": 0.12003862136859741, + "grad_norm": 363.6197204589844, + "learning_rate": 1.957787526732709e-06, + "loss": 25.3906, + "step": 12681 + }, + { + "epoch": 0.12004808739031247, + "grad_norm": 784.3346557617188, + "learning_rate": 1.9577787127081572e-06, + "loss": 56.1719, + "step": 12682 + }, + { + "epoch": 0.12005755341202752, + "grad_norm": 191.24057006835938, + "learning_rate": 1.9577698977833552e-06, + "loss": 14.2578, + "step": 12683 + }, + { + "epoch": 0.12006701943374258, + "grad_norm": 358.1613464355469, + "learning_rate": 1.957761081958312e-06, + "loss": 50.0469, + "step": 12684 + }, + { + "epoch": 0.12007648545545764, + "grad_norm": 291.3686218261719, + "learning_rate": 1.9577522652330358e-06, + "loss": 20.1172, + "step": 12685 + }, + { + "epoch": 0.12008595147717269, + "grad_norm": 388.2040710449219, + "learning_rate": 1.957743447607535e-06, + "loss": 38.9844, + "step": 12686 + }, + { + "epoch": 0.12009541749888775, + "grad_norm": 337.4063415527344, + "learning_rate": 1.957734629081817e-06, + "loss": 44.8281, + "step": 12687 + }, + { + "epoch": 0.12010488352060279, + "grad_norm": 474.8493347167969, + "learning_rate": 1.957725809655891e-06, + "loss": 39.8125, + "step": 12688 + }, + { + "epoch": 0.12011434954231785, + "grad_norm": 549.4529418945312, + "learning_rate": 1.957716989329765e-06, + "loss": 32.8906, + "step": 12689 + }, + { + "epoch": 0.1201238155640329, + "grad_norm": 389.65789794921875, + "learning_rate": 1.9577081681034476e-06, + "loss": 23.2812, + "step": 12690 + }, + { + "epoch": 0.12013328158574796, + "grad_norm": 242.8768310546875, + "learning_rate": 1.957699345976947e-06, + "loss": 17.0195, + "step": 12691 + }, + { + "epoch": 0.120142747607463, + "grad_norm": 370.0252685546875, + "learning_rate": 1.957690522950271e-06, + "loss": 15.9648, + "step": 12692 + }, + { + "epoch": 0.12015221362917806, + "grad_norm": 3.5389516353607178, + "learning_rate": 1.957681699023428e-06, + "loss": 1.0073, + "step": 12693 + }, + { + "epoch": 0.12016167965089312, + "grad_norm": 358.8053283691406, + "learning_rate": 1.957672874196427e-06, + "loss": 9.1309, + "step": 12694 + }, + { + "epoch": 0.12017114567260817, + "grad_norm": 404.6819152832031, + "learning_rate": 1.9576640484692757e-06, + "loss": 15.2617, + "step": 12695 + }, + { + "epoch": 0.12018061169432323, + "grad_norm": 452.05731201171875, + "learning_rate": 1.9576552218419825e-06, + "loss": 34.1719, + "step": 12696 + }, + { + "epoch": 0.12019007771603828, + "grad_norm": 279.2704772949219, + "learning_rate": 1.957646394314555e-06, + "loss": 19.5938, + "step": 12697 + }, + { + "epoch": 0.12019954373775334, + "grad_norm": 174.36550903320312, + "learning_rate": 1.957637565887003e-06, + "loss": 17.4766, + "step": 12698 + }, + { + "epoch": 0.12020900975946838, + "grad_norm": 234.8516387939453, + "learning_rate": 1.9576287365593337e-06, + "loss": 19.4023, + "step": 12699 + }, + { + "epoch": 0.12021847578118344, + "grad_norm": 258.3994140625, + "learning_rate": 1.957619906331556e-06, + "loss": 17.3125, + "step": 12700 + }, + { + "epoch": 0.1202279418028985, + "grad_norm": 446.39788818359375, + "learning_rate": 1.9576110752036773e-06, + "loss": 12.6953, + "step": 12701 + }, + { + "epoch": 0.12023740782461355, + "grad_norm": 332.29638671875, + "learning_rate": 1.9576022431757067e-06, + "loss": 39.6875, + "step": 12702 + }, + { + "epoch": 0.12024687384632861, + "grad_norm": 585.9398193359375, + "learning_rate": 1.9575934102476522e-06, + "loss": 28.3203, + "step": 12703 + }, + { + "epoch": 0.12025633986804365, + "grad_norm": 1278.21923828125, + "learning_rate": 1.9575845764195224e-06, + "loss": 65.4688, + "step": 12704 + }, + { + "epoch": 0.12026580588975871, + "grad_norm": 272.8926696777344, + "learning_rate": 1.9575757416913255e-06, + "loss": 26.8125, + "step": 12705 + }, + { + "epoch": 0.12027527191147376, + "grad_norm": 232.4720001220703, + "learning_rate": 1.9575669060630698e-06, + "loss": 23.4492, + "step": 12706 + }, + { + "epoch": 0.12028473793318882, + "grad_norm": 2.6663765907287598, + "learning_rate": 1.957558069534763e-06, + "loss": 0.8984, + "step": 12707 + }, + { + "epoch": 0.12029420395490387, + "grad_norm": 316.2236633300781, + "learning_rate": 1.957549232106414e-06, + "loss": 18.9766, + "step": 12708 + }, + { + "epoch": 0.12030366997661893, + "grad_norm": 420.1238708496094, + "learning_rate": 1.9575403937780313e-06, + "loss": 37.8281, + "step": 12709 + }, + { + "epoch": 0.12031313599833399, + "grad_norm": 402.8753356933594, + "learning_rate": 1.957531554549623e-06, + "loss": 37.5625, + "step": 12710 + }, + { + "epoch": 0.12032260202004903, + "grad_norm": 3.206618070602417, + "learning_rate": 1.9575227144211968e-06, + "loss": 0.9663, + "step": 12711 + }, + { + "epoch": 0.12033206804176409, + "grad_norm": 869.1317749023438, + "learning_rate": 1.957513873392762e-06, + "loss": 37.9297, + "step": 12712 + }, + { + "epoch": 0.12034153406347914, + "grad_norm": 3.8954946994781494, + "learning_rate": 1.9575050314643266e-06, + "loss": 0.981, + "step": 12713 + }, + { + "epoch": 0.1203510000851942, + "grad_norm": 336.65765380859375, + "learning_rate": 1.957496188635898e-06, + "loss": 20.3516, + "step": 12714 + }, + { + "epoch": 0.12036046610690924, + "grad_norm": 320.8485107421875, + "learning_rate": 1.957487344907486e-06, + "loss": 20.9062, + "step": 12715 + }, + { + "epoch": 0.1203699321286243, + "grad_norm": 960.0255737304688, + "learning_rate": 1.957478500279098e-06, + "loss": 38.6562, + "step": 12716 + }, + { + "epoch": 0.12037939815033935, + "grad_norm": 579.8353271484375, + "learning_rate": 1.9574696547507424e-06, + "loss": 10.9961, + "step": 12717 + }, + { + "epoch": 0.12038886417205441, + "grad_norm": 420.3601379394531, + "learning_rate": 1.9574608083224274e-06, + "loss": 26.3438, + "step": 12718 + }, + { + "epoch": 0.12039833019376947, + "grad_norm": 466.46856689453125, + "learning_rate": 1.957451960994162e-06, + "loss": 31.75, + "step": 12719 + }, + { + "epoch": 0.12040779621548452, + "grad_norm": 3.0027525424957275, + "learning_rate": 1.957443112765954e-06, + "loss": 0.8657, + "step": 12720 + }, + { + "epoch": 0.12041726223719958, + "grad_norm": 573.90283203125, + "learning_rate": 1.957434263637811e-06, + "loss": 71.4688, + "step": 12721 + }, + { + "epoch": 0.12042672825891462, + "grad_norm": 311.4107666015625, + "learning_rate": 1.957425413609743e-06, + "loss": 28.0469, + "step": 12722 + }, + { + "epoch": 0.12043619428062968, + "grad_norm": 573.556884765625, + "learning_rate": 1.9574165626817567e-06, + "loss": 43.3125, + "step": 12723 + }, + { + "epoch": 0.12044566030234473, + "grad_norm": 286.65057373046875, + "learning_rate": 1.9574077108538616e-06, + "loss": 15.7891, + "step": 12724 + }, + { + "epoch": 0.12045512632405979, + "grad_norm": 368.9739990234375, + "learning_rate": 1.9573988581260656e-06, + "loss": 15.9375, + "step": 12725 + }, + { + "epoch": 0.12046459234577483, + "grad_norm": 622.53662109375, + "learning_rate": 1.9573900044983767e-06, + "loss": 45.1094, + "step": 12726 + }, + { + "epoch": 0.1204740583674899, + "grad_norm": 212.1096649169922, + "learning_rate": 1.9573811499708033e-06, + "loss": 12.4219, + "step": 12727 + }, + { + "epoch": 0.12048352438920495, + "grad_norm": 275.3179931640625, + "learning_rate": 1.9573722945433545e-06, + "loss": 30.9688, + "step": 12728 + }, + { + "epoch": 0.12049299041092, + "grad_norm": 344.63623046875, + "learning_rate": 1.9573634382160377e-06, + "loss": 24.7891, + "step": 12729 + }, + { + "epoch": 0.12050245643263506, + "grad_norm": 361.9876403808594, + "learning_rate": 1.957354580988861e-06, + "loss": 35.5781, + "step": 12730 + }, + { + "epoch": 0.1205119224543501, + "grad_norm": 235.75445556640625, + "learning_rate": 1.957345722861834e-06, + "loss": 20.0469, + "step": 12731 + }, + { + "epoch": 0.12052138847606517, + "grad_norm": 211.69769287109375, + "learning_rate": 1.9573368638349643e-06, + "loss": 17.1055, + "step": 12732 + }, + { + "epoch": 0.12053085449778021, + "grad_norm": 544.2467651367188, + "learning_rate": 1.95732800390826e-06, + "loss": 55.875, + "step": 12733 + }, + { + "epoch": 0.12054032051949527, + "grad_norm": 413.5046081542969, + "learning_rate": 1.9573191430817293e-06, + "loss": 25.9688, + "step": 12734 + }, + { + "epoch": 0.12054978654121032, + "grad_norm": 344.73724365234375, + "learning_rate": 1.9573102813553814e-06, + "loss": 20.9453, + "step": 12735 + }, + { + "epoch": 0.12055925256292538, + "grad_norm": 647.0028076171875, + "learning_rate": 1.957301418729224e-06, + "loss": 23.5156, + "step": 12736 + }, + { + "epoch": 0.12056871858464044, + "grad_norm": 336.85797119140625, + "learning_rate": 1.957292555203265e-06, + "loss": 20.5273, + "step": 12737 + }, + { + "epoch": 0.12057818460635548, + "grad_norm": 260.3353576660156, + "learning_rate": 1.957283690777514e-06, + "loss": 26.8594, + "step": 12738 + }, + { + "epoch": 0.12058765062807054, + "grad_norm": 480.9446105957031, + "learning_rate": 1.957274825451979e-06, + "loss": 33.8438, + "step": 12739 + }, + { + "epoch": 0.12059711664978559, + "grad_norm": 549.4722900390625, + "learning_rate": 1.957265959226667e-06, + "loss": 36.5469, + "step": 12740 + }, + { + "epoch": 0.12060658267150065, + "grad_norm": 523.4688720703125, + "learning_rate": 1.9572570921015873e-06, + "loss": 44.25, + "step": 12741 + }, + { + "epoch": 0.1206160486932157, + "grad_norm": 334.98406982421875, + "learning_rate": 1.9572482240767485e-06, + "loss": 34.9141, + "step": 12742 + }, + { + "epoch": 0.12062551471493076, + "grad_norm": 372.3293151855469, + "learning_rate": 1.9572393551521587e-06, + "loss": 24.9375, + "step": 12743 + }, + { + "epoch": 0.12063498073664582, + "grad_norm": 219.6245880126953, + "learning_rate": 1.9572304853278263e-06, + "loss": 30.75, + "step": 12744 + }, + { + "epoch": 0.12064444675836086, + "grad_norm": 290.9974365234375, + "learning_rate": 1.957221614603759e-06, + "loss": 21.3828, + "step": 12745 + }, + { + "epoch": 0.12065391278007592, + "grad_norm": 299.5206604003906, + "learning_rate": 1.9572127429799662e-06, + "loss": 23.2656, + "step": 12746 + }, + { + "epoch": 0.12066337880179097, + "grad_norm": 253.7771759033203, + "learning_rate": 1.9572038704564554e-06, + "loss": 16.8438, + "step": 12747 + }, + { + "epoch": 0.12067284482350603, + "grad_norm": 2.573437452316284, + "learning_rate": 1.9571949970332355e-06, + "loss": 0.9023, + "step": 12748 + }, + { + "epoch": 0.12068231084522107, + "grad_norm": 354.5247497558594, + "learning_rate": 1.9571861227103146e-06, + "loss": 26.8516, + "step": 12749 + }, + { + "epoch": 0.12069177686693613, + "grad_norm": 230.3568572998047, + "learning_rate": 1.9571772474877e-06, + "loss": 22.6719, + "step": 12750 + }, + { + "epoch": 0.12070124288865118, + "grad_norm": 422.4319152832031, + "learning_rate": 1.957168371365402e-06, + "loss": 43.5312, + "step": 12751 + }, + { + "epoch": 0.12071070891036624, + "grad_norm": 816.5394287109375, + "learning_rate": 1.957159494343428e-06, + "loss": 61.8906, + "step": 12752 + }, + { + "epoch": 0.1207201749320813, + "grad_norm": 222.64309692382812, + "learning_rate": 1.9571506164217865e-06, + "loss": 26.75, + "step": 12753 + }, + { + "epoch": 0.12072964095379635, + "grad_norm": 579.9384765625, + "learning_rate": 1.957141737600485e-06, + "loss": 56.1016, + "step": 12754 + }, + { + "epoch": 0.1207391069755114, + "grad_norm": 368.86871337890625, + "learning_rate": 1.957132857879533e-06, + "loss": 26.0078, + "step": 12755 + }, + { + "epoch": 0.12074857299722645, + "grad_norm": 577.5422973632812, + "learning_rate": 1.9571239772589383e-06, + "loss": 54.1406, + "step": 12756 + }, + { + "epoch": 0.12075803901894151, + "grad_norm": 203.0275115966797, + "learning_rate": 1.9571150957387094e-06, + "loss": 20.5977, + "step": 12757 + }, + { + "epoch": 0.12076750504065656, + "grad_norm": 610.0105590820312, + "learning_rate": 1.9571062133188544e-06, + "loss": 57.5781, + "step": 12758 + }, + { + "epoch": 0.12077697106237162, + "grad_norm": 2.9349350929260254, + "learning_rate": 1.9570973299993822e-06, + "loss": 0.998, + "step": 12759 + }, + { + "epoch": 0.12078643708408666, + "grad_norm": 479.77056884765625, + "learning_rate": 1.9570884457803005e-06, + "loss": 36.3906, + "step": 12760 + }, + { + "epoch": 0.12079590310580172, + "grad_norm": 343.5157470703125, + "learning_rate": 1.9570795606616177e-06, + "loss": 28.4922, + "step": 12761 + }, + { + "epoch": 0.12080536912751678, + "grad_norm": 1119.35986328125, + "learning_rate": 1.9570706746433427e-06, + "loss": 18.9375, + "step": 12762 + }, + { + "epoch": 0.12081483514923183, + "grad_norm": 2.911341428756714, + "learning_rate": 1.9570617877254836e-06, + "loss": 0.9526, + "step": 12763 + }, + { + "epoch": 0.12082430117094689, + "grad_norm": 238.97930908203125, + "learning_rate": 1.957052899908048e-06, + "loss": 15.5156, + "step": 12764 + }, + { + "epoch": 0.12083376719266194, + "grad_norm": 294.512939453125, + "learning_rate": 1.9570440111910455e-06, + "loss": 15.9844, + "step": 12765 + }, + { + "epoch": 0.120843233214377, + "grad_norm": 798.186279296875, + "learning_rate": 1.957035121574484e-06, + "loss": 49.2969, + "step": 12766 + }, + { + "epoch": 0.12085269923609204, + "grad_norm": 642.0094604492188, + "learning_rate": 1.957026231058371e-06, + "loss": 32.7812, + "step": 12767 + }, + { + "epoch": 0.1208621652578071, + "grad_norm": 646.843994140625, + "learning_rate": 1.9570173396427167e-06, + "loss": 31.9375, + "step": 12768 + }, + { + "epoch": 0.12087163127952215, + "grad_norm": 226.69871520996094, + "learning_rate": 1.957008447327528e-06, + "loss": 19.2188, + "step": 12769 + }, + { + "epoch": 0.12088109730123721, + "grad_norm": 390.6223449707031, + "learning_rate": 1.956999554112813e-06, + "loss": 31.9062, + "step": 12770 + }, + { + "epoch": 0.12089056332295227, + "grad_norm": 275.75982666015625, + "learning_rate": 1.9569906599985813e-06, + "loss": 21.5234, + "step": 12771 + }, + { + "epoch": 0.12090002934466731, + "grad_norm": 269.9547119140625, + "learning_rate": 1.9569817649848405e-06, + "loss": 18.9766, + "step": 12772 + }, + { + "epoch": 0.12090949536638237, + "grad_norm": 298.5823669433594, + "learning_rate": 1.956972869071599e-06, + "loss": 29.8125, + "step": 12773 + }, + { + "epoch": 0.12091896138809742, + "grad_norm": 589.9330444335938, + "learning_rate": 1.9569639722588654e-06, + "loss": 42.0508, + "step": 12774 + }, + { + "epoch": 0.12092842740981248, + "grad_norm": 574.8154907226562, + "learning_rate": 1.9569550745466475e-06, + "loss": 38.3516, + "step": 12775 + }, + { + "epoch": 0.12093789343152753, + "grad_norm": 389.780517578125, + "learning_rate": 1.9569461759349547e-06, + "loss": 38.3594, + "step": 12776 + }, + { + "epoch": 0.12094735945324259, + "grad_norm": 488.4895324707031, + "learning_rate": 1.9569372764237943e-06, + "loss": 43.0391, + "step": 12777 + }, + { + "epoch": 0.12095682547495763, + "grad_norm": 1126.3643798828125, + "learning_rate": 1.9569283760131755e-06, + "loss": 43.5117, + "step": 12778 + }, + { + "epoch": 0.12096629149667269, + "grad_norm": 1135.9547119140625, + "learning_rate": 1.956919474703106e-06, + "loss": 24.5312, + "step": 12779 + }, + { + "epoch": 0.12097575751838775, + "grad_norm": 462.3778991699219, + "learning_rate": 1.9569105724935946e-06, + "loss": 36.375, + "step": 12780 + }, + { + "epoch": 0.1209852235401028, + "grad_norm": 382.5169372558594, + "learning_rate": 1.9569016693846494e-06, + "loss": 22.5781, + "step": 12781 + }, + { + "epoch": 0.12099468956181786, + "grad_norm": 254.76707458496094, + "learning_rate": 1.9568927653762785e-06, + "loss": 20.1875, + "step": 12782 + }, + { + "epoch": 0.1210041555835329, + "grad_norm": 379.78948974609375, + "learning_rate": 1.9568838604684916e-06, + "loss": 23.6367, + "step": 12783 + }, + { + "epoch": 0.12101362160524796, + "grad_norm": 540.61669921875, + "learning_rate": 1.9568749546612954e-06, + "loss": 29.4219, + "step": 12784 + }, + { + "epoch": 0.12102308762696301, + "grad_norm": 337.7766418457031, + "learning_rate": 1.956866047954699e-06, + "loss": 23.3281, + "step": 12785 + }, + { + "epoch": 0.12103255364867807, + "grad_norm": 250.90341186523438, + "learning_rate": 1.956857140348711e-06, + "loss": 25.7578, + "step": 12786 + }, + { + "epoch": 0.12104201967039313, + "grad_norm": 385.5437316894531, + "learning_rate": 1.9568482318433397e-06, + "loss": 22.6797, + "step": 12787 + }, + { + "epoch": 0.12105148569210818, + "grad_norm": 374.1923522949219, + "learning_rate": 1.9568393224385926e-06, + "loss": 26.6094, + "step": 12788 + }, + { + "epoch": 0.12106095171382324, + "grad_norm": 254.6243896484375, + "learning_rate": 1.9568304121344794e-06, + "loss": 17.5547, + "step": 12789 + }, + { + "epoch": 0.12107041773553828, + "grad_norm": 423.20220947265625, + "learning_rate": 1.9568215009310075e-06, + "loss": 16.8086, + "step": 12790 + }, + { + "epoch": 0.12107988375725334, + "grad_norm": 445.0322265625, + "learning_rate": 1.956812588828186e-06, + "loss": 41.3125, + "step": 12791 + }, + { + "epoch": 0.12108934977896839, + "grad_norm": 525.0223388671875, + "learning_rate": 1.9568036758260227e-06, + "loss": 39.1094, + "step": 12792 + }, + { + "epoch": 0.12109881580068345, + "grad_norm": 259.3801574707031, + "learning_rate": 1.9567947619245263e-06, + "loss": 16.0078, + "step": 12793 + }, + { + "epoch": 0.1211082818223985, + "grad_norm": 491.8171081542969, + "learning_rate": 1.956785847123705e-06, + "loss": 30.9375, + "step": 12794 + }, + { + "epoch": 0.12111774784411355, + "grad_norm": 2.9948995113372803, + "learning_rate": 1.956776931423567e-06, + "loss": 0.9121, + "step": 12795 + }, + { + "epoch": 0.12112721386582861, + "grad_norm": 465.5750427246094, + "learning_rate": 1.9567680148241214e-06, + "loss": 52.0938, + "step": 12796 + }, + { + "epoch": 0.12113667988754366, + "grad_norm": 418.5020446777344, + "learning_rate": 1.956759097325376e-06, + "loss": 49.6094, + "step": 12797 + }, + { + "epoch": 0.12114614590925872, + "grad_norm": 344.0052795410156, + "learning_rate": 1.9567501789273392e-06, + "loss": 18.7188, + "step": 12798 + }, + { + "epoch": 0.12115561193097377, + "grad_norm": 3.202160596847534, + "learning_rate": 1.9567412596300194e-06, + "loss": 1.0024, + "step": 12799 + }, + { + "epoch": 0.12116507795268883, + "grad_norm": 429.97845458984375, + "learning_rate": 1.956732339433425e-06, + "loss": 25.0156, + "step": 12800 + }, + { + "epoch": 0.12117454397440387, + "grad_norm": 432.9606628417969, + "learning_rate": 1.9567234183375645e-06, + "loss": 32.7266, + "step": 12801 + }, + { + "epoch": 0.12118400999611893, + "grad_norm": 444.66912841796875, + "learning_rate": 1.9567144963424464e-06, + "loss": 18.6484, + "step": 12802 + }, + { + "epoch": 0.12119347601783398, + "grad_norm": 384.7479553222656, + "learning_rate": 1.9567055734480788e-06, + "loss": 20.4453, + "step": 12803 + }, + { + "epoch": 0.12120294203954904, + "grad_norm": 555.91455078125, + "learning_rate": 1.95669664965447e-06, + "loss": 23.0547, + "step": 12804 + }, + { + "epoch": 0.1212124080612641, + "grad_norm": 343.0887756347656, + "learning_rate": 1.956687724961629e-06, + "loss": 41.0469, + "step": 12805 + }, + { + "epoch": 0.12122187408297914, + "grad_norm": 394.61810302734375, + "learning_rate": 1.9566787993695636e-06, + "loss": 43.625, + "step": 12806 + }, + { + "epoch": 0.1212313401046942, + "grad_norm": 346.7062683105469, + "learning_rate": 1.9566698728782823e-06, + "loss": 20.7891, + "step": 12807 + }, + { + "epoch": 0.12124080612640925, + "grad_norm": 371.4486083984375, + "learning_rate": 1.9566609454877935e-06, + "loss": 37.0938, + "step": 12808 + }, + { + "epoch": 0.12125027214812431, + "grad_norm": 614.7605590820312, + "learning_rate": 1.9566520171981055e-06, + "loss": 22.8984, + "step": 12809 + }, + { + "epoch": 0.12125973816983936, + "grad_norm": 292.063720703125, + "learning_rate": 1.9566430880092272e-06, + "loss": 26.3281, + "step": 12810 + }, + { + "epoch": 0.12126920419155442, + "grad_norm": 221.84396362304688, + "learning_rate": 1.9566341579211663e-06, + "loss": 25.5, + "step": 12811 + }, + { + "epoch": 0.12127867021326946, + "grad_norm": 3.020693063735962, + "learning_rate": 1.956625226933932e-06, + "loss": 0.9543, + "step": 12812 + }, + { + "epoch": 0.12128813623498452, + "grad_norm": 859.072265625, + "learning_rate": 1.9566162950475316e-06, + "loss": 53.1328, + "step": 12813 + }, + { + "epoch": 0.12129760225669958, + "grad_norm": 386.50274658203125, + "learning_rate": 1.9566073622619744e-06, + "loss": 35.5078, + "step": 12814 + }, + { + "epoch": 0.12130706827841463, + "grad_norm": 188.4462127685547, + "learning_rate": 1.956598428577269e-06, + "loss": 19.3828, + "step": 12815 + }, + { + "epoch": 0.12131653430012969, + "grad_norm": 301.5638427734375, + "learning_rate": 1.9565894939934226e-06, + "loss": 20.0234, + "step": 12816 + }, + { + "epoch": 0.12132600032184473, + "grad_norm": 954.239013671875, + "learning_rate": 1.9565805585104445e-06, + "loss": 49.7656, + "step": 12817 + }, + { + "epoch": 0.1213354663435598, + "grad_norm": 197.0551300048828, + "learning_rate": 1.956571622128343e-06, + "loss": 23.1094, + "step": 12818 + }, + { + "epoch": 0.12134493236527484, + "grad_norm": 736.450439453125, + "learning_rate": 1.9565626848471263e-06, + "loss": 60.1875, + "step": 12819 + }, + { + "epoch": 0.1213543983869899, + "grad_norm": 302.23455810546875, + "learning_rate": 1.956553746666803e-06, + "loss": 24.1328, + "step": 12820 + }, + { + "epoch": 0.12136386440870495, + "grad_norm": 513.8224487304688, + "learning_rate": 1.956544807587381e-06, + "loss": 30.5469, + "step": 12821 + }, + { + "epoch": 0.12137333043042, + "grad_norm": 593.11474609375, + "learning_rate": 1.9565358676088697e-06, + "loss": 21.8438, + "step": 12822 + }, + { + "epoch": 0.12138279645213507, + "grad_norm": 283.11285400390625, + "learning_rate": 1.956526926731277e-06, + "loss": 23.0, + "step": 12823 + }, + { + "epoch": 0.12139226247385011, + "grad_norm": 254.65650939941406, + "learning_rate": 1.956517984954611e-06, + "loss": 16.1719, + "step": 12824 + }, + { + "epoch": 0.12140172849556517, + "grad_norm": 172.94398498535156, + "learning_rate": 1.9565090422788803e-06, + "loss": 20.6562, + "step": 12825 + }, + { + "epoch": 0.12141119451728022, + "grad_norm": 347.8392028808594, + "learning_rate": 1.956500098704093e-06, + "loss": 39.5938, + "step": 12826 + }, + { + "epoch": 0.12142066053899528, + "grad_norm": 464.4562072753906, + "learning_rate": 1.956491154230258e-06, + "loss": 51.8906, + "step": 12827 + }, + { + "epoch": 0.12143012656071032, + "grad_norm": 640.032958984375, + "learning_rate": 1.9564822088573838e-06, + "loss": 46.1016, + "step": 12828 + }, + { + "epoch": 0.12143959258242538, + "grad_norm": 419.6860656738281, + "learning_rate": 1.9564732625854785e-06, + "loss": 49.1641, + "step": 12829 + }, + { + "epoch": 0.12144905860414044, + "grad_norm": 615.396240234375, + "learning_rate": 1.9564643154145503e-06, + "loss": 14.3516, + "step": 12830 + }, + { + "epoch": 0.12145852462585549, + "grad_norm": 421.32958984375, + "learning_rate": 1.956455367344608e-06, + "loss": 41.5781, + "step": 12831 + }, + { + "epoch": 0.12146799064757055, + "grad_norm": 335.291748046875, + "learning_rate": 1.95644641837566e-06, + "loss": 21.8281, + "step": 12832 + }, + { + "epoch": 0.1214774566692856, + "grad_norm": 2.9722869396209717, + "learning_rate": 1.9564374685077145e-06, + "loss": 0.8809, + "step": 12833 + }, + { + "epoch": 0.12148692269100066, + "grad_norm": 194.58734130859375, + "learning_rate": 1.95642851774078e-06, + "loss": 21.5352, + "step": 12834 + }, + { + "epoch": 0.1214963887127157, + "grad_norm": 348.08148193359375, + "learning_rate": 1.9564195660748646e-06, + "loss": 28.9297, + "step": 12835 + }, + { + "epoch": 0.12150585473443076, + "grad_norm": 357.331787109375, + "learning_rate": 1.9564106135099772e-06, + "loss": 33.5938, + "step": 12836 + }, + { + "epoch": 0.12151532075614581, + "grad_norm": 495.7537536621094, + "learning_rate": 1.9564016600461266e-06, + "loss": 26.0156, + "step": 12837 + }, + { + "epoch": 0.12152478677786087, + "grad_norm": 627.365234375, + "learning_rate": 1.95639270568332e-06, + "loss": 44.0312, + "step": 12838 + }, + { + "epoch": 0.12153425279957593, + "grad_norm": 794.3563232421875, + "learning_rate": 1.9563837504215668e-06, + "loss": 64.625, + "step": 12839 + }, + { + "epoch": 0.12154371882129097, + "grad_norm": 357.7215576171875, + "learning_rate": 1.956374794260875e-06, + "loss": 22.8125, + "step": 12840 + }, + { + "epoch": 0.12155318484300603, + "grad_norm": 464.4410095214844, + "learning_rate": 1.956365837201253e-06, + "loss": 56.5938, + "step": 12841 + }, + { + "epoch": 0.12156265086472108, + "grad_norm": 288.0769348144531, + "learning_rate": 1.9563568792427092e-06, + "loss": 22.4688, + "step": 12842 + }, + { + "epoch": 0.12157211688643614, + "grad_norm": 259.2090759277344, + "learning_rate": 1.9563479203852526e-06, + "loss": 27.4688, + "step": 12843 + }, + { + "epoch": 0.12158158290815119, + "grad_norm": 375.5632019042969, + "learning_rate": 1.9563389606288907e-06, + "loss": 25.6406, + "step": 12844 + }, + { + "epoch": 0.12159104892986625, + "grad_norm": 213.68679809570312, + "learning_rate": 1.9563299999736325e-06, + "loss": 18.2734, + "step": 12845 + }, + { + "epoch": 0.12160051495158129, + "grad_norm": 296.1316223144531, + "learning_rate": 1.9563210384194864e-06, + "loss": 16.7188, + "step": 12846 + }, + { + "epoch": 0.12160998097329635, + "grad_norm": 361.642578125, + "learning_rate": 1.9563120759664613e-06, + "loss": 14.6055, + "step": 12847 + }, + { + "epoch": 0.12161944699501141, + "grad_norm": 443.8876647949219, + "learning_rate": 1.956303112614564e-06, + "loss": 39.4844, + "step": 12848 + }, + { + "epoch": 0.12162891301672646, + "grad_norm": 195.14927673339844, + "learning_rate": 1.9562941483638046e-06, + "loss": 18.9688, + "step": 12849 + }, + { + "epoch": 0.12163837903844152, + "grad_norm": 411.8863525390625, + "learning_rate": 1.9562851832141913e-06, + "loss": 30.6875, + "step": 12850 + }, + { + "epoch": 0.12164784506015656, + "grad_norm": 398.8284912109375, + "learning_rate": 1.9562762171657316e-06, + "loss": 23.3281, + "step": 12851 + }, + { + "epoch": 0.12165731108187162, + "grad_norm": 316.56463623046875, + "learning_rate": 1.9562672502184343e-06, + "loss": 28.7891, + "step": 12852 + }, + { + "epoch": 0.12166677710358667, + "grad_norm": 282.7109375, + "learning_rate": 1.956258282372308e-06, + "loss": 16.25, + "step": 12853 + }, + { + "epoch": 0.12167624312530173, + "grad_norm": 330.50115966796875, + "learning_rate": 1.9562493136273614e-06, + "loss": 23.0547, + "step": 12854 + }, + { + "epoch": 0.12168570914701678, + "grad_norm": 179.54234313964844, + "learning_rate": 1.9562403439836025e-06, + "loss": 21.4297, + "step": 12855 + }, + { + "epoch": 0.12169517516873184, + "grad_norm": 338.7828063964844, + "learning_rate": 1.95623137344104e-06, + "loss": 26.2812, + "step": 12856 + }, + { + "epoch": 0.1217046411904469, + "grad_norm": 654.740478515625, + "learning_rate": 1.956222401999682e-06, + "loss": 56.4375, + "step": 12857 + }, + { + "epoch": 0.12171410721216194, + "grad_norm": 935.6858520507812, + "learning_rate": 1.9562134296595375e-06, + "loss": 30.4531, + "step": 12858 + }, + { + "epoch": 0.121723573233877, + "grad_norm": 606.5556030273438, + "learning_rate": 1.9562044564206146e-06, + "loss": 33.4531, + "step": 12859 + }, + { + "epoch": 0.12173303925559205, + "grad_norm": 245.40435791015625, + "learning_rate": 1.9561954822829216e-06, + "loss": 11.9844, + "step": 12860 + }, + { + "epoch": 0.12174250527730711, + "grad_norm": 227.13917541503906, + "learning_rate": 1.956186507246467e-06, + "loss": 20.1328, + "step": 12861 + }, + { + "epoch": 0.12175197129902215, + "grad_norm": 280.1566467285156, + "learning_rate": 1.956177531311259e-06, + "loss": 24.2656, + "step": 12862 + }, + { + "epoch": 0.12176143732073721, + "grad_norm": 447.95849609375, + "learning_rate": 1.956168554477307e-06, + "loss": 36.6094, + "step": 12863 + }, + { + "epoch": 0.12177090334245226, + "grad_norm": 231.15951538085938, + "learning_rate": 1.956159576744618e-06, + "loss": 20.25, + "step": 12864 + }, + { + "epoch": 0.12178036936416732, + "grad_norm": 248.46237182617188, + "learning_rate": 1.956150598113202e-06, + "loss": 17.7266, + "step": 12865 + }, + { + "epoch": 0.12178983538588238, + "grad_norm": 470.26422119140625, + "learning_rate": 1.9561416185830663e-06, + "loss": 17.9297, + "step": 12866 + }, + { + "epoch": 0.12179930140759743, + "grad_norm": 3.353729009628296, + "learning_rate": 1.9561326381542195e-06, + "loss": 0.876, + "step": 12867 + }, + { + "epoch": 0.12180876742931249, + "grad_norm": 648.5407104492188, + "learning_rate": 1.9561236568266703e-06, + "loss": 33.9375, + "step": 12868 + }, + { + "epoch": 0.12181823345102753, + "grad_norm": 381.4530944824219, + "learning_rate": 1.956114674600427e-06, + "loss": 33.7344, + "step": 12869 + }, + { + "epoch": 0.12182769947274259, + "grad_norm": 300.2816467285156, + "learning_rate": 1.9561056914754982e-06, + "loss": 22.7969, + "step": 12870 + }, + { + "epoch": 0.12183716549445764, + "grad_norm": 568.40283203125, + "learning_rate": 1.9560967074518923e-06, + "loss": 25.75, + "step": 12871 + }, + { + "epoch": 0.1218466315161727, + "grad_norm": 264.9549255371094, + "learning_rate": 1.9560877225296174e-06, + "loss": 19.3672, + "step": 12872 + }, + { + "epoch": 0.12185609753788776, + "grad_norm": 338.7403869628906, + "learning_rate": 1.9560787367086826e-06, + "loss": 34.8281, + "step": 12873 + }, + { + "epoch": 0.1218655635596028, + "grad_norm": 562.49560546875, + "learning_rate": 1.956069749989096e-06, + "loss": 38.6328, + "step": 12874 + }, + { + "epoch": 0.12187502958131786, + "grad_norm": 297.9535217285156, + "learning_rate": 1.9560607623708657e-06, + "loss": 26.6875, + "step": 12875 + }, + { + "epoch": 0.12188449560303291, + "grad_norm": 253.1693115234375, + "learning_rate": 1.9560517738540007e-06, + "loss": 24.2578, + "step": 12876 + }, + { + "epoch": 0.12189396162474797, + "grad_norm": 633.6488647460938, + "learning_rate": 1.956042784438509e-06, + "loss": 22.1328, + "step": 12877 + }, + { + "epoch": 0.12190342764646302, + "grad_norm": 666.5842895507812, + "learning_rate": 1.9560337941243995e-06, + "loss": 29.9688, + "step": 12878 + }, + { + "epoch": 0.12191289366817808, + "grad_norm": 378.84356689453125, + "learning_rate": 1.9560248029116806e-06, + "loss": 23.2969, + "step": 12879 + }, + { + "epoch": 0.12192235968989312, + "grad_norm": 422.3988037109375, + "learning_rate": 1.9560158108003605e-06, + "loss": 9.3516, + "step": 12880 + }, + { + "epoch": 0.12193182571160818, + "grad_norm": 210.885498046875, + "learning_rate": 1.9560068177904474e-06, + "loss": 24.0156, + "step": 12881 + }, + { + "epoch": 0.12194129173332324, + "grad_norm": 200.56570434570312, + "learning_rate": 1.95599782388195e-06, + "loss": 15.5, + "step": 12882 + }, + { + "epoch": 0.12195075775503829, + "grad_norm": 286.90399169921875, + "learning_rate": 1.9559888290748773e-06, + "loss": 12.0117, + "step": 12883 + }, + { + "epoch": 0.12196022377675335, + "grad_norm": 3.1024329662323, + "learning_rate": 1.9559798333692373e-06, + "loss": 0.9419, + "step": 12884 + }, + { + "epoch": 0.1219696897984684, + "grad_norm": 196.38291931152344, + "learning_rate": 1.955970836765038e-06, + "loss": 13.1953, + "step": 12885 + }, + { + "epoch": 0.12197915582018345, + "grad_norm": 917.9822387695312, + "learning_rate": 1.9559618392622886e-06, + "loss": 42.7148, + "step": 12886 + }, + { + "epoch": 0.1219886218418985, + "grad_norm": 874.3707275390625, + "learning_rate": 1.955952840860997e-06, + "loss": 25.4141, + "step": 12887 + }, + { + "epoch": 0.12199808786361356, + "grad_norm": 497.7139587402344, + "learning_rate": 1.9559438415611723e-06, + "loss": 21.4844, + "step": 12888 + }, + { + "epoch": 0.1220075538853286, + "grad_norm": 743.5887451171875, + "learning_rate": 1.9559348413628226e-06, + "loss": 42.5078, + "step": 12889 + }, + { + "epoch": 0.12201701990704367, + "grad_norm": 734.49560546875, + "learning_rate": 1.955925840265956e-06, + "loss": 22.875, + "step": 12890 + }, + { + "epoch": 0.12202648592875873, + "grad_norm": 177.7028045654297, + "learning_rate": 1.9559168382705814e-06, + "loss": 22.6797, + "step": 12891 + }, + { + "epoch": 0.12203595195047377, + "grad_norm": 338.0777587890625, + "learning_rate": 1.9559078353767072e-06, + "loss": 12.0898, + "step": 12892 + }, + { + "epoch": 0.12204541797218883, + "grad_norm": 305.81951904296875, + "learning_rate": 1.955898831584342e-06, + "loss": 23.9453, + "step": 12893 + }, + { + "epoch": 0.12205488399390388, + "grad_norm": 443.8160095214844, + "learning_rate": 1.955889826893494e-06, + "loss": 26.6016, + "step": 12894 + }, + { + "epoch": 0.12206435001561894, + "grad_norm": 383.26995849609375, + "learning_rate": 1.9558808213041716e-06, + "loss": 24.4609, + "step": 12895 + }, + { + "epoch": 0.12207381603733398, + "grad_norm": 389.7901916503906, + "learning_rate": 1.9558718148163833e-06, + "loss": 20.8555, + "step": 12896 + }, + { + "epoch": 0.12208328205904904, + "grad_norm": 261.6724853515625, + "learning_rate": 1.955862807430138e-06, + "loss": 25.5938, + "step": 12897 + }, + { + "epoch": 0.12209274808076409, + "grad_norm": 1533.863525390625, + "learning_rate": 1.9558537991454436e-06, + "loss": 70.5156, + "step": 12898 + }, + { + "epoch": 0.12210221410247915, + "grad_norm": 302.7637939453125, + "learning_rate": 1.9558447899623088e-06, + "loss": 24.6797, + "step": 12899 + }, + { + "epoch": 0.12211168012419421, + "grad_norm": 3.008528709411621, + "learning_rate": 1.955835779880742e-06, + "loss": 0.834, + "step": 12900 + }, + { + "epoch": 0.12212114614590926, + "grad_norm": 514.1102905273438, + "learning_rate": 1.9558267689007523e-06, + "loss": 52.625, + "step": 12901 + }, + { + "epoch": 0.12213061216762432, + "grad_norm": 297.7421569824219, + "learning_rate": 1.955817757022347e-06, + "loss": 33.0469, + "step": 12902 + }, + { + "epoch": 0.12214007818933936, + "grad_norm": 347.09967041015625, + "learning_rate": 1.9558087442455357e-06, + "loss": 56.0156, + "step": 12903 + }, + { + "epoch": 0.12214954421105442, + "grad_norm": 375.9217529296875, + "learning_rate": 1.9557997305703264e-06, + "loss": 22.6406, + "step": 12904 + }, + { + "epoch": 0.12215901023276947, + "grad_norm": 481.5270080566406, + "learning_rate": 1.9557907159967272e-06, + "loss": 39.8281, + "step": 12905 + }, + { + "epoch": 0.12216847625448453, + "grad_norm": 180.36573791503906, + "learning_rate": 1.9557817005247467e-06, + "loss": 8.3867, + "step": 12906 + }, + { + "epoch": 0.12217794227619957, + "grad_norm": 182.1697540283203, + "learning_rate": 1.9557726841543938e-06, + "loss": 16.6719, + "step": 12907 + }, + { + "epoch": 0.12218740829791463, + "grad_norm": 247.44134521484375, + "learning_rate": 1.955763666885677e-06, + "loss": 13.5547, + "step": 12908 + }, + { + "epoch": 0.1221968743196297, + "grad_norm": 193.43699645996094, + "learning_rate": 1.955754648718604e-06, + "loss": 7.7031, + "step": 12909 + }, + { + "epoch": 0.12220634034134474, + "grad_norm": 257.4752502441406, + "learning_rate": 1.9557456296531844e-06, + "loss": 44.3984, + "step": 12910 + }, + { + "epoch": 0.1222158063630598, + "grad_norm": 339.0008850097656, + "learning_rate": 1.9557366096894254e-06, + "loss": 25.7422, + "step": 12911 + }, + { + "epoch": 0.12222527238477485, + "grad_norm": 475.4062805175781, + "learning_rate": 1.9557275888273368e-06, + "loss": 32.6406, + "step": 12912 + }, + { + "epoch": 0.1222347384064899, + "grad_norm": 3.213840961456299, + "learning_rate": 1.955718567066926e-06, + "loss": 0.9756, + "step": 12913 + }, + { + "epoch": 0.12224420442820495, + "grad_norm": 314.26116943359375, + "learning_rate": 1.955709544408202e-06, + "loss": 24.4375, + "step": 12914 + }, + { + "epoch": 0.12225367044992001, + "grad_norm": 239.54258728027344, + "learning_rate": 1.9557005208511736e-06, + "loss": 15.7266, + "step": 12915 + }, + { + "epoch": 0.12226313647163507, + "grad_norm": 662.0048828125, + "learning_rate": 1.955691496395849e-06, + "loss": 50.875, + "step": 12916 + }, + { + "epoch": 0.12227260249335012, + "grad_norm": 549.7974853515625, + "learning_rate": 1.955682471042236e-06, + "loss": 13.3789, + "step": 12917 + }, + { + "epoch": 0.12228206851506518, + "grad_norm": 362.7646484375, + "learning_rate": 1.9556734447903436e-06, + "loss": 32.1562, + "step": 12918 + }, + { + "epoch": 0.12229153453678022, + "grad_norm": 220.79637145996094, + "learning_rate": 1.9556644176401805e-06, + "loss": 19.8633, + "step": 12919 + }, + { + "epoch": 0.12230100055849528, + "grad_norm": 195.50770568847656, + "learning_rate": 1.955655389591755e-06, + "loss": 13.9492, + "step": 12920 + }, + { + "epoch": 0.12231046658021033, + "grad_norm": 485.8413391113281, + "learning_rate": 1.9556463606450757e-06, + "loss": 46.2031, + "step": 12921 + }, + { + "epoch": 0.12231993260192539, + "grad_norm": 317.5955505371094, + "learning_rate": 1.955637330800151e-06, + "loss": 46.6094, + "step": 12922 + }, + { + "epoch": 0.12232939862364044, + "grad_norm": 483.9559631347656, + "learning_rate": 1.9556283000569894e-06, + "loss": 25.0312, + "step": 12923 + }, + { + "epoch": 0.1223388646453555, + "grad_norm": 302.4587707519531, + "learning_rate": 1.9556192684155992e-06, + "loss": 17.9609, + "step": 12924 + }, + { + "epoch": 0.12234833066707056, + "grad_norm": 287.79644775390625, + "learning_rate": 1.955610235875989e-06, + "loss": 23.6328, + "step": 12925 + }, + { + "epoch": 0.1223577966887856, + "grad_norm": 280.3576965332031, + "learning_rate": 1.9556012024381675e-06, + "loss": 30.2109, + "step": 12926 + }, + { + "epoch": 0.12236726271050066, + "grad_norm": 486.7886047363281, + "learning_rate": 1.955592168102143e-06, + "loss": 17.0859, + "step": 12927 + }, + { + "epoch": 0.12237672873221571, + "grad_norm": 214.49075317382812, + "learning_rate": 1.9555831328679244e-06, + "loss": 23.3203, + "step": 12928 + }, + { + "epoch": 0.12238619475393077, + "grad_norm": 3.0250792503356934, + "learning_rate": 1.9555740967355194e-06, + "loss": 0.8203, + "step": 12929 + }, + { + "epoch": 0.12239566077564581, + "grad_norm": 305.53582763671875, + "learning_rate": 1.9555650597049368e-06, + "loss": 22.7188, + "step": 12930 + }, + { + "epoch": 0.12240512679736087, + "grad_norm": 756.9899291992188, + "learning_rate": 1.9555560217761854e-06, + "loss": 66.5625, + "step": 12931 + }, + { + "epoch": 0.12241459281907592, + "grad_norm": 401.5094299316406, + "learning_rate": 1.9555469829492733e-06, + "loss": 25.1641, + "step": 12932 + }, + { + "epoch": 0.12242405884079098, + "grad_norm": 453.7355651855469, + "learning_rate": 1.95553794322421e-06, + "loss": 36.0859, + "step": 12933 + }, + { + "epoch": 0.12243352486250604, + "grad_norm": 246.26846313476562, + "learning_rate": 1.9555289026010023e-06, + "loss": 26.3086, + "step": 12934 + }, + { + "epoch": 0.12244299088422109, + "grad_norm": 308.0872497558594, + "learning_rate": 1.9555198610796597e-06, + "loss": 46.5, + "step": 12935 + }, + { + "epoch": 0.12245245690593615, + "grad_norm": 301.7162780761719, + "learning_rate": 1.955510818660191e-06, + "loss": 22.6328, + "step": 12936 + }, + { + "epoch": 0.12246192292765119, + "grad_norm": 259.569091796875, + "learning_rate": 1.955501775342604e-06, + "loss": 21.9844, + "step": 12937 + }, + { + "epoch": 0.12247138894936625, + "grad_norm": 1096.7939453125, + "learning_rate": 1.9554927311269077e-06, + "loss": 50.8125, + "step": 12938 + }, + { + "epoch": 0.1224808549710813, + "grad_norm": 328.620849609375, + "learning_rate": 1.9554836860131104e-06, + "loss": 14.6719, + "step": 12939 + }, + { + "epoch": 0.12249032099279636, + "grad_norm": 217.0626678466797, + "learning_rate": 1.9554746400012202e-06, + "loss": 16.7344, + "step": 12940 + }, + { + "epoch": 0.1224997870145114, + "grad_norm": 171.02442932128906, + "learning_rate": 1.9554655930912464e-06, + "loss": 8.5625, + "step": 12941 + }, + { + "epoch": 0.12250925303622646, + "grad_norm": 257.93890380859375, + "learning_rate": 1.955456545283197e-06, + "loss": 19.1562, + "step": 12942 + }, + { + "epoch": 0.12251871905794152, + "grad_norm": 516.6427612304688, + "learning_rate": 1.95544749657708e-06, + "loss": 40.9688, + "step": 12943 + }, + { + "epoch": 0.12252818507965657, + "grad_norm": 294.7338562011719, + "learning_rate": 1.9554384469729053e-06, + "loss": 20.9375, + "step": 12944 + }, + { + "epoch": 0.12253765110137163, + "grad_norm": 305.6399230957031, + "learning_rate": 1.9554293964706804e-06, + "loss": 22.5078, + "step": 12945 + }, + { + "epoch": 0.12254711712308668, + "grad_norm": 646.1670532226562, + "learning_rate": 1.9554203450704142e-06, + "loss": 54.8438, + "step": 12946 + }, + { + "epoch": 0.12255658314480174, + "grad_norm": 492.43426513671875, + "learning_rate": 1.955411292772115e-06, + "loss": 27.1094, + "step": 12947 + }, + { + "epoch": 0.12256604916651678, + "grad_norm": 949.3375244140625, + "learning_rate": 1.955402239575791e-06, + "loss": 23.7422, + "step": 12948 + }, + { + "epoch": 0.12257551518823184, + "grad_norm": 269.2207336425781, + "learning_rate": 1.955393185481451e-06, + "loss": 16.875, + "step": 12949 + }, + { + "epoch": 0.12258498120994689, + "grad_norm": 335.1466369628906, + "learning_rate": 1.9553841304891037e-06, + "loss": 19.8125, + "step": 12950 + }, + { + "epoch": 0.12259444723166195, + "grad_norm": 386.48260498046875, + "learning_rate": 1.955375074598758e-06, + "loss": 16.3281, + "step": 12951 + }, + { + "epoch": 0.12260391325337701, + "grad_norm": 342.0567321777344, + "learning_rate": 1.955366017810421e-06, + "loss": 29.3047, + "step": 12952 + }, + { + "epoch": 0.12261337927509205, + "grad_norm": 401.46673583984375, + "learning_rate": 1.9553569601241027e-06, + "loss": 37.25, + "step": 12953 + }, + { + "epoch": 0.12262284529680711, + "grad_norm": 620.2420654296875, + "learning_rate": 1.955347901539811e-06, + "loss": 44.7031, + "step": 12954 + }, + { + "epoch": 0.12263231131852216, + "grad_norm": 379.7644958496094, + "learning_rate": 1.955338842057554e-06, + "loss": 29.7188, + "step": 12955 + }, + { + "epoch": 0.12264177734023722, + "grad_norm": 545.8215942382812, + "learning_rate": 1.955329781677341e-06, + "loss": 37.4062, + "step": 12956 + }, + { + "epoch": 0.12265124336195227, + "grad_norm": 485.3309020996094, + "learning_rate": 1.95532072039918e-06, + "loss": 53.4688, + "step": 12957 + }, + { + "epoch": 0.12266070938366733, + "grad_norm": 1237.2706298828125, + "learning_rate": 1.9553116582230796e-06, + "loss": 66.7734, + "step": 12958 + }, + { + "epoch": 0.12267017540538239, + "grad_norm": 505.22271728515625, + "learning_rate": 1.9553025951490485e-06, + "loss": 33.3516, + "step": 12959 + }, + { + "epoch": 0.12267964142709743, + "grad_norm": 209.1512908935547, + "learning_rate": 1.955293531177095e-06, + "loss": 21.1406, + "step": 12960 + }, + { + "epoch": 0.12268910744881249, + "grad_norm": 270.1877746582031, + "learning_rate": 1.955284466307228e-06, + "loss": 43.4062, + "step": 12961 + }, + { + "epoch": 0.12269857347052754, + "grad_norm": 2.953950881958008, + "learning_rate": 1.9552754005394557e-06, + "loss": 0.9653, + "step": 12962 + }, + { + "epoch": 0.1227080394922426, + "grad_norm": 171.2022247314453, + "learning_rate": 1.9552663338737864e-06, + "loss": 18.3203, + "step": 12963 + }, + { + "epoch": 0.12271750551395764, + "grad_norm": 656.1267700195312, + "learning_rate": 1.9552572663102287e-06, + "loss": 20.6953, + "step": 12964 + }, + { + "epoch": 0.1227269715356727, + "grad_norm": 191.995361328125, + "learning_rate": 1.955248197848792e-06, + "loss": 23.4062, + "step": 12965 + }, + { + "epoch": 0.12273643755738775, + "grad_norm": 498.6728210449219, + "learning_rate": 1.9552391284894834e-06, + "loss": 50.1016, + "step": 12966 + }, + { + "epoch": 0.12274590357910281, + "grad_norm": 295.5690002441406, + "learning_rate": 1.955230058232313e-06, + "loss": 23.7109, + "step": 12967 + }, + { + "epoch": 0.12275536960081787, + "grad_norm": 260.24365234375, + "learning_rate": 1.955220987077288e-06, + "loss": 15.4805, + "step": 12968 + }, + { + "epoch": 0.12276483562253292, + "grad_norm": 257.99053955078125, + "learning_rate": 1.9552119150244175e-06, + "loss": 25.5703, + "step": 12969 + }, + { + "epoch": 0.12277430164424798, + "grad_norm": 3.412170886993408, + "learning_rate": 1.9552028420737095e-06, + "loss": 0.9312, + "step": 12970 + }, + { + "epoch": 0.12278376766596302, + "grad_norm": 631.12646484375, + "learning_rate": 1.9551937682251737e-06, + "loss": 54.2578, + "step": 12971 + }, + { + "epoch": 0.12279323368767808, + "grad_norm": 397.65838623046875, + "learning_rate": 1.9551846934788173e-06, + "loss": 38.6719, + "step": 12972 + }, + { + "epoch": 0.12280269970939313, + "grad_norm": 707.7908325195312, + "learning_rate": 1.9551756178346496e-06, + "loss": 45.5312, + "step": 12973 + }, + { + "epoch": 0.12281216573110819, + "grad_norm": 354.79876708984375, + "learning_rate": 1.955166541292679e-06, + "loss": 26.8125, + "step": 12974 + }, + { + "epoch": 0.12282163175282323, + "grad_norm": 377.6138610839844, + "learning_rate": 1.9551574638529142e-06, + "loss": 40.1719, + "step": 12975 + }, + { + "epoch": 0.1228310977745383, + "grad_norm": 223.18370056152344, + "learning_rate": 1.9551483855153634e-06, + "loss": 22.2422, + "step": 12976 + }, + { + "epoch": 0.12284056379625335, + "grad_norm": 544.7758178710938, + "learning_rate": 1.9551393062800352e-06, + "loss": 21.3086, + "step": 12977 + }, + { + "epoch": 0.1228500298179684, + "grad_norm": 279.2581481933594, + "learning_rate": 1.955130226146938e-06, + "loss": 14.6133, + "step": 12978 + }, + { + "epoch": 0.12285949583968346, + "grad_norm": 492.7696533203125, + "learning_rate": 1.955121145116081e-06, + "loss": 37.4375, + "step": 12979 + }, + { + "epoch": 0.1228689618613985, + "grad_norm": 514.2024536132812, + "learning_rate": 1.9551120631874716e-06, + "loss": 46.2031, + "step": 12980 + }, + { + "epoch": 0.12287842788311357, + "grad_norm": 376.40789794921875, + "learning_rate": 1.9551029803611197e-06, + "loss": 31.25, + "step": 12981 + }, + { + "epoch": 0.12288789390482861, + "grad_norm": 753.2674560546875, + "learning_rate": 1.9550938966370327e-06, + "loss": 48.6719, + "step": 12982 + }, + { + "epoch": 0.12289735992654367, + "grad_norm": 324.7574157714844, + "learning_rate": 1.9550848120152196e-06, + "loss": 57.0312, + "step": 12983 + }, + { + "epoch": 0.12290682594825872, + "grad_norm": 497.81683349609375, + "learning_rate": 1.955075726495689e-06, + "loss": 38.4922, + "step": 12984 + }, + { + "epoch": 0.12291629196997378, + "grad_norm": 428.27984619140625, + "learning_rate": 1.955066640078449e-06, + "loss": 22.3281, + "step": 12985 + }, + { + "epoch": 0.12292575799168884, + "grad_norm": 300.864013671875, + "learning_rate": 1.955057552763509e-06, + "loss": 20.5195, + "step": 12986 + }, + { + "epoch": 0.12293522401340388, + "grad_norm": 234.44427490234375, + "learning_rate": 1.9550484645508767e-06, + "loss": 17.5469, + "step": 12987 + }, + { + "epoch": 0.12294469003511894, + "grad_norm": 238.20758056640625, + "learning_rate": 1.9550393754405614e-06, + "loss": 17.5, + "step": 12988 + }, + { + "epoch": 0.12295415605683399, + "grad_norm": 161.56642150878906, + "learning_rate": 1.9550302854325712e-06, + "loss": 14.7617, + "step": 12989 + }, + { + "epoch": 0.12296362207854905, + "grad_norm": 3.208735227584839, + "learning_rate": 1.955021194526914e-06, + "loss": 0.9297, + "step": 12990 + }, + { + "epoch": 0.1229730881002641, + "grad_norm": 1530.497802734375, + "learning_rate": 1.9550121027236e-06, + "loss": 61.7109, + "step": 12991 + }, + { + "epoch": 0.12298255412197916, + "grad_norm": 352.1361389160156, + "learning_rate": 1.9550030100226357e-06, + "loss": 19.2031, + "step": 12992 + }, + { + "epoch": 0.1229920201436942, + "grad_norm": 333.4760437011719, + "learning_rate": 1.9549939164240313e-06, + "loss": 9.8633, + "step": 12993 + }, + { + "epoch": 0.12300148616540926, + "grad_norm": 213.24217224121094, + "learning_rate": 1.9549848219277944e-06, + "loss": 18.4766, + "step": 12994 + }, + { + "epoch": 0.12301095218712432, + "grad_norm": 682.999267578125, + "learning_rate": 1.954975726533934e-06, + "loss": 35.375, + "step": 12995 + }, + { + "epoch": 0.12302041820883937, + "grad_norm": 297.1202697753906, + "learning_rate": 1.9549666302424587e-06, + "loss": 33.625, + "step": 12996 + }, + { + "epoch": 0.12302988423055443, + "grad_norm": 384.2682189941406, + "learning_rate": 1.954957533053377e-06, + "loss": 46.2188, + "step": 12997 + }, + { + "epoch": 0.12303935025226947, + "grad_norm": 755.5827026367188, + "learning_rate": 1.954948434966697e-06, + "loss": 31.1719, + "step": 12998 + }, + { + "epoch": 0.12304881627398453, + "grad_norm": 263.23236083984375, + "learning_rate": 1.954939335982428e-06, + "loss": 15.3867, + "step": 12999 + }, + { + "epoch": 0.12305828229569958, + "grad_norm": 441.36810302734375, + "learning_rate": 1.9549302361005777e-06, + "loss": 19.5391, + "step": 13000 + }, + { + "epoch": 0.12306774831741464, + "grad_norm": 580.060546875, + "learning_rate": 1.9549211353211553e-06, + "loss": 51.7969, + "step": 13001 + }, + { + "epoch": 0.1230772143391297, + "grad_norm": 605.5339965820312, + "learning_rate": 1.9549120336441694e-06, + "loss": 18.9766, + "step": 13002 + }, + { + "epoch": 0.12308668036084475, + "grad_norm": 288.1062316894531, + "learning_rate": 1.954902931069628e-06, + "loss": 18.1797, + "step": 13003 + }, + { + "epoch": 0.1230961463825598, + "grad_norm": 633.3331909179688, + "learning_rate": 1.95489382759754e-06, + "loss": 68.6094, + "step": 13004 + }, + { + "epoch": 0.12310561240427485, + "grad_norm": 827.410888671875, + "learning_rate": 1.9548847232279136e-06, + "loss": 36.3906, + "step": 13005 + }, + { + "epoch": 0.12311507842598991, + "grad_norm": 263.91851806640625, + "learning_rate": 1.9548756179607582e-06, + "loss": 26.5625, + "step": 13006 + }, + { + "epoch": 0.12312454444770496, + "grad_norm": 228.13125610351562, + "learning_rate": 1.9548665117960817e-06, + "loss": 14.3906, + "step": 13007 + }, + { + "epoch": 0.12313401046942002, + "grad_norm": 262.12353515625, + "learning_rate": 1.954857404733893e-06, + "loss": 18.1016, + "step": 13008 + }, + { + "epoch": 0.12314347649113506, + "grad_norm": 749.2021484375, + "learning_rate": 1.9548482967742e-06, + "loss": 45.5312, + "step": 13009 + }, + { + "epoch": 0.12315294251285012, + "grad_norm": 3.2964768409729004, + "learning_rate": 1.954839187917012e-06, + "loss": 0.9126, + "step": 13010 + }, + { + "epoch": 0.12316240853456518, + "grad_norm": 354.7107238769531, + "learning_rate": 1.954830078162337e-06, + "loss": 31.7188, + "step": 13011 + }, + { + "epoch": 0.12317187455628023, + "grad_norm": 364.32916259765625, + "learning_rate": 1.9548209675101843e-06, + "loss": 20.2734, + "step": 13012 + }, + { + "epoch": 0.12318134057799529, + "grad_norm": 206.02420043945312, + "learning_rate": 1.954811855960562e-06, + "loss": 18.0, + "step": 13013 + }, + { + "epoch": 0.12319080659971034, + "grad_norm": 718.8907470703125, + "learning_rate": 1.9548027435134784e-06, + "loss": 46.9531, + "step": 13014 + }, + { + "epoch": 0.1232002726214254, + "grad_norm": 286.7395935058594, + "learning_rate": 1.9547936301689423e-06, + "loss": 20.1875, + "step": 13015 + }, + { + "epoch": 0.12320973864314044, + "grad_norm": 452.09747314453125, + "learning_rate": 1.9547845159269625e-06, + "loss": 35.4688, + "step": 13016 + }, + { + "epoch": 0.1232192046648555, + "grad_norm": 220.58966064453125, + "learning_rate": 1.954775400787547e-06, + "loss": 20.8359, + "step": 13017 + }, + { + "epoch": 0.12322867068657055, + "grad_norm": 775.610595703125, + "learning_rate": 1.954766284750705e-06, + "loss": 53.75, + "step": 13018 + }, + { + "epoch": 0.12323813670828561, + "grad_norm": 352.4184265136719, + "learning_rate": 1.954757167816445e-06, + "loss": 18.9355, + "step": 13019 + }, + { + "epoch": 0.12324760273000067, + "grad_norm": 180.41339111328125, + "learning_rate": 1.954748049984775e-06, + "loss": 14.3477, + "step": 13020 + }, + { + "epoch": 0.12325706875171571, + "grad_norm": 394.61651611328125, + "learning_rate": 1.954738931255704e-06, + "loss": 35.875, + "step": 13021 + }, + { + "epoch": 0.12326653477343077, + "grad_norm": 708.9653930664062, + "learning_rate": 1.954729811629241e-06, + "loss": 51.0977, + "step": 13022 + }, + { + "epoch": 0.12327600079514582, + "grad_norm": 267.4477233886719, + "learning_rate": 1.9547206911053937e-06, + "loss": 14.2461, + "step": 13023 + }, + { + "epoch": 0.12328546681686088, + "grad_norm": 789.2860717773438, + "learning_rate": 1.954711569684171e-06, + "loss": 19.5859, + "step": 13024 + }, + { + "epoch": 0.12329493283857593, + "grad_norm": 500.17138671875, + "learning_rate": 1.9547024473655817e-06, + "loss": 47.3359, + "step": 13025 + }, + { + "epoch": 0.12330439886029099, + "grad_norm": 290.8778076171875, + "learning_rate": 1.954693324149634e-06, + "loss": 31.6094, + "step": 13026 + }, + { + "epoch": 0.12331386488200603, + "grad_norm": 248.3765869140625, + "learning_rate": 1.9546842000363364e-06, + "loss": 25.9062, + "step": 13027 + }, + { + "epoch": 0.12332333090372109, + "grad_norm": 2.7818009853363037, + "learning_rate": 1.9546750750256986e-06, + "loss": 0.918, + "step": 13028 + }, + { + "epoch": 0.12333279692543615, + "grad_norm": 559.4928588867188, + "learning_rate": 1.9546659491177276e-06, + "loss": 21.1875, + "step": 13029 + }, + { + "epoch": 0.1233422629471512, + "grad_norm": 620.3953857421875, + "learning_rate": 1.954656822312433e-06, + "loss": 34.8125, + "step": 13030 + }, + { + "epoch": 0.12335172896886626, + "grad_norm": 684.8478393554688, + "learning_rate": 1.954647694609823e-06, + "loss": 39.0312, + "step": 13031 + }, + { + "epoch": 0.1233611949905813, + "grad_norm": 305.3449401855469, + "learning_rate": 1.9546385660099067e-06, + "loss": 17.9219, + "step": 13032 + }, + { + "epoch": 0.12337066101229636, + "grad_norm": 379.7016296386719, + "learning_rate": 1.954629436512692e-06, + "loss": 17.3047, + "step": 13033 + }, + { + "epoch": 0.12338012703401141, + "grad_norm": 307.5968933105469, + "learning_rate": 1.9546203061181874e-06, + "loss": 20.9219, + "step": 13034 + }, + { + "epoch": 0.12338959305572647, + "grad_norm": 207.77284240722656, + "learning_rate": 1.954611174826402e-06, + "loss": 23.4844, + "step": 13035 + }, + { + "epoch": 0.12339905907744152, + "grad_norm": 286.9342041015625, + "learning_rate": 1.9546020426373443e-06, + "loss": 21.6094, + "step": 13036 + }, + { + "epoch": 0.12340852509915658, + "grad_norm": 340.11163330078125, + "learning_rate": 1.9545929095510228e-06, + "loss": 27.6875, + "step": 13037 + }, + { + "epoch": 0.12341799112087164, + "grad_norm": 247.5115509033203, + "learning_rate": 1.954583775567446e-06, + "loss": 10.4414, + "step": 13038 + }, + { + "epoch": 0.12342745714258668, + "grad_norm": 3.5297536849975586, + "learning_rate": 1.9545746406866225e-06, + "loss": 0.9082, + "step": 13039 + }, + { + "epoch": 0.12343692316430174, + "grad_norm": 539.1509399414062, + "learning_rate": 1.954565504908561e-06, + "loss": 43.5781, + "step": 13040 + }, + { + "epoch": 0.12344638918601679, + "grad_norm": 899.5726318359375, + "learning_rate": 1.95455636823327e-06, + "loss": 46.6016, + "step": 13041 + }, + { + "epoch": 0.12345585520773185, + "grad_norm": 590.5903930664062, + "learning_rate": 1.9545472306607582e-06, + "loss": 31.6562, + "step": 13042 + }, + { + "epoch": 0.1234653212294469, + "grad_norm": 584.5818481445312, + "learning_rate": 1.9545380921910337e-06, + "loss": 18.1758, + "step": 13043 + }, + { + "epoch": 0.12347478725116195, + "grad_norm": 171.8601531982422, + "learning_rate": 1.954528952824106e-06, + "loss": 14.3125, + "step": 13044 + }, + { + "epoch": 0.12348425327287701, + "grad_norm": 501.9474182128906, + "learning_rate": 1.954519812559983e-06, + "loss": 35.7812, + "step": 13045 + }, + { + "epoch": 0.12349371929459206, + "grad_norm": 243.0185089111328, + "learning_rate": 1.9545106713986734e-06, + "loss": 16.6094, + "step": 13046 + }, + { + "epoch": 0.12350318531630712, + "grad_norm": 703.3917846679688, + "learning_rate": 1.954501529340186e-06, + "loss": 44.5156, + "step": 13047 + }, + { + "epoch": 0.12351265133802217, + "grad_norm": 261.31951904296875, + "learning_rate": 1.9544923863845295e-06, + "loss": 17.1875, + "step": 13048 + }, + { + "epoch": 0.12352211735973723, + "grad_norm": 289.3937683105469, + "learning_rate": 1.954483242531712e-06, + "loss": 14.8516, + "step": 13049 + }, + { + "epoch": 0.12353158338145227, + "grad_norm": 598.662841796875, + "learning_rate": 1.954474097781742e-06, + "loss": 26.7578, + "step": 13050 + }, + { + "epoch": 0.12354104940316733, + "grad_norm": 297.1703796386719, + "learning_rate": 1.9544649521346287e-06, + "loss": 14.625, + "step": 13051 + }, + { + "epoch": 0.12355051542488238, + "grad_norm": 252.27920532226562, + "learning_rate": 1.9544558055903805e-06, + "loss": 22.3672, + "step": 13052 + }, + { + "epoch": 0.12355998144659744, + "grad_norm": 3.0631749629974365, + "learning_rate": 1.9544466581490057e-06, + "loss": 0.8076, + "step": 13053 + }, + { + "epoch": 0.1235694474683125, + "grad_norm": 134.58245849609375, + "learning_rate": 1.9544375098105134e-06, + "loss": 19.7969, + "step": 13054 + }, + { + "epoch": 0.12357891349002754, + "grad_norm": 427.4978332519531, + "learning_rate": 1.954428360574912e-06, + "loss": 49.6719, + "step": 13055 + }, + { + "epoch": 0.1235883795117426, + "grad_norm": 248.6312255859375, + "learning_rate": 1.9544192104422095e-06, + "loss": 18.7969, + "step": 13056 + }, + { + "epoch": 0.12359784553345765, + "grad_norm": 326.7376708984375, + "learning_rate": 1.9544100594124157e-06, + "loss": 32.0078, + "step": 13057 + }, + { + "epoch": 0.12360731155517271, + "grad_norm": 279.945556640625, + "learning_rate": 1.9544009074855382e-06, + "loss": 21.7266, + "step": 13058 + }, + { + "epoch": 0.12361677757688776, + "grad_norm": 421.259521484375, + "learning_rate": 1.954391754661586e-06, + "loss": 32.0312, + "step": 13059 + }, + { + "epoch": 0.12362624359860282, + "grad_norm": 292.13714599609375, + "learning_rate": 1.9543826009405677e-06, + "loss": 23.7969, + "step": 13060 + }, + { + "epoch": 0.12363570962031786, + "grad_norm": 829.6450805664062, + "learning_rate": 1.9543734463224916e-06, + "loss": 27.3594, + "step": 13061 + }, + { + "epoch": 0.12364517564203292, + "grad_norm": 246.29617309570312, + "learning_rate": 1.954364290807367e-06, + "loss": 15.4844, + "step": 13062 + }, + { + "epoch": 0.12365464166374798, + "grad_norm": 602.6741943359375, + "learning_rate": 1.954355134395202e-06, + "loss": 22.8359, + "step": 13063 + }, + { + "epoch": 0.12366410768546303, + "grad_norm": 398.6720275878906, + "learning_rate": 1.9543459770860046e-06, + "loss": 40.3125, + "step": 13064 + }, + { + "epoch": 0.12367357370717809, + "grad_norm": 288.4075622558594, + "learning_rate": 1.9543368188797843e-06, + "loss": 29.125, + "step": 13065 + }, + { + "epoch": 0.12368303972889313, + "grad_norm": 243.93240356445312, + "learning_rate": 1.9543276597765498e-06, + "loss": 19.7344, + "step": 13066 + }, + { + "epoch": 0.1236925057506082, + "grad_norm": 395.5603942871094, + "learning_rate": 1.9543184997763095e-06, + "loss": 23.0156, + "step": 13067 + }, + { + "epoch": 0.12370197177232324, + "grad_norm": 249.8463134765625, + "learning_rate": 1.9543093388790714e-06, + "loss": 9.1562, + "step": 13068 + }, + { + "epoch": 0.1237114377940383, + "grad_norm": 529.55859375, + "learning_rate": 1.954300177084845e-06, + "loss": 27.625, + "step": 13069 + }, + { + "epoch": 0.12372090381575335, + "grad_norm": 405.7126159667969, + "learning_rate": 1.954291014393638e-06, + "loss": 40.8594, + "step": 13070 + }, + { + "epoch": 0.1237303698374684, + "grad_norm": 509.0692443847656, + "learning_rate": 1.9542818508054602e-06, + "loss": 46.0, + "step": 13071 + }, + { + "epoch": 0.12373983585918347, + "grad_norm": 301.10443115234375, + "learning_rate": 1.9542726863203194e-06, + "loss": 21.3203, + "step": 13072 + }, + { + "epoch": 0.12374930188089851, + "grad_norm": 426.22998046875, + "learning_rate": 1.954263520938224e-06, + "loss": 39.6562, + "step": 13073 + }, + { + "epoch": 0.12375876790261357, + "grad_norm": 393.5097351074219, + "learning_rate": 1.954254354659183e-06, + "loss": 19.4766, + "step": 13074 + }, + { + "epoch": 0.12376823392432862, + "grad_norm": 590.9918212890625, + "learning_rate": 1.954245187483205e-06, + "loss": 42.0, + "step": 13075 + }, + { + "epoch": 0.12377769994604368, + "grad_norm": 319.7924499511719, + "learning_rate": 1.954236019410299e-06, + "loss": 28.9062, + "step": 13076 + }, + { + "epoch": 0.12378716596775872, + "grad_norm": 310.44000244140625, + "learning_rate": 1.954226850440473e-06, + "loss": 22.7031, + "step": 13077 + }, + { + "epoch": 0.12379663198947378, + "grad_norm": 235.49778747558594, + "learning_rate": 1.9542176805737357e-06, + "loss": 21.6797, + "step": 13078 + }, + { + "epoch": 0.12380609801118883, + "grad_norm": 237.10215759277344, + "learning_rate": 1.954208509810096e-06, + "loss": 23.7422, + "step": 13079 + }, + { + "epoch": 0.12381556403290389, + "grad_norm": 624.5804443359375, + "learning_rate": 1.954199338149562e-06, + "loss": 40.8906, + "step": 13080 + }, + { + "epoch": 0.12382503005461895, + "grad_norm": 293.75958251953125, + "learning_rate": 1.9541901655921434e-06, + "loss": 18.6953, + "step": 13081 + }, + { + "epoch": 0.123834496076334, + "grad_norm": 590.0864868164062, + "learning_rate": 1.9541809921378478e-06, + "loss": 40.1719, + "step": 13082 + }, + { + "epoch": 0.12384396209804906, + "grad_norm": 445.1694641113281, + "learning_rate": 1.954171817786684e-06, + "loss": 31.8438, + "step": 13083 + }, + { + "epoch": 0.1238534281197641, + "grad_norm": 342.6171875, + "learning_rate": 1.954162642538661e-06, + "loss": 29.6641, + "step": 13084 + }, + { + "epoch": 0.12386289414147916, + "grad_norm": 138.96507263183594, + "learning_rate": 1.9541534663937867e-06, + "loss": 15.0391, + "step": 13085 + }, + { + "epoch": 0.12387236016319421, + "grad_norm": 581.2713012695312, + "learning_rate": 1.9541442893520706e-06, + "loss": 19.957, + "step": 13086 + }, + { + "epoch": 0.12388182618490927, + "grad_norm": 339.0489807128906, + "learning_rate": 1.954135111413521e-06, + "loss": 16.3594, + "step": 13087 + }, + { + "epoch": 0.12389129220662433, + "grad_norm": 408.33056640625, + "learning_rate": 1.9541259325781463e-06, + "loss": 49.1094, + "step": 13088 + }, + { + "epoch": 0.12390075822833937, + "grad_norm": 237.13912963867188, + "learning_rate": 1.9541167528459554e-06, + "loss": 17.7852, + "step": 13089 + }, + { + "epoch": 0.12391022425005443, + "grad_norm": 417.1427307128906, + "learning_rate": 1.9541075722169565e-06, + "loss": 28.2188, + "step": 13090 + }, + { + "epoch": 0.12391969027176948, + "grad_norm": 462.71697998046875, + "learning_rate": 1.954098390691159e-06, + "loss": 31.8047, + "step": 13091 + }, + { + "epoch": 0.12392915629348454, + "grad_norm": 320.5393371582031, + "learning_rate": 1.9540892082685707e-06, + "loss": 12.5352, + "step": 13092 + }, + { + "epoch": 0.12393862231519959, + "grad_norm": 431.38067626953125, + "learning_rate": 1.9540800249492012e-06, + "loss": 43.625, + "step": 13093 + }, + { + "epoch": 0.12394808833691465, + "grad_norm": 464.5538635253906, + "learning_rate": 1.954070840733058e-06, + "loss": 25.7344, + "step": 13094 + }, + { + "epoch": 0.12395755435862969, + "grad_norm": 263.02667236328125, + "learning_rate": 1.9540616556201505e-06, + "loss": 8.4297, + "step": 13095 + }, + { + "epoch": 0.12396702038034475, + "grad_norm": 514.6752319335938, + "learning_rate": 1.954052469610487e-06, + "loss": 46.1797, + "step": 13096 + }, + { + "epoch": 0.12397648640205981, + "grad_norm": 3.259354591369629, + "learning_rate": 1.954043282704076e-06, + "loss": 0.9282, + "step": 13097 + }, + { + "epoch": 0.12398595242377486, + "grad_norm": 805.36865234375, + "learning_rate": 1.9540340949009268e-06, + "loss": 57.1641, + "step": 13098 + }, + { + "epoch": 0.12399541844548992, + "grad_norm": 438.97265625, + "learning_rate": 1.9540249062010473e-06, + "loss": 30.2031, + "step": 13099 + }, + { + "epoch": 0.12400488446720496, + "grad_norm": 444.8312683105469, + "learning_rate": 1.954015716604447e-06, + "loss": 16.0391, + "step": 13100 + }, + { + "epoch": 0.12401435048892002, + "grad_norm": 262.4571838378906, + "learning_rate": 1.9540065261111337e-06, + "loss": 30.125, + "step": 13101 + }, + { + "epoch": 0.12402381651063507, + "grad_norm": 2.806392192840576, + "learning_rate": 1.953997334721116e-06, + "loss": 0.8291, + "step": 13102 + }, + { + "epoch": 0.12403328253235013, + "grad_norm": 357.1257019042969, + "learning_rate": 1.953988142434403e-06, + "loss": 22.6328, + "step": 13103 + }, + { + "epoch": 0.12404274855406518, + "grad_norm": 366.5189208984375, + "learning_rate": 1.9539789492510034e-06, + "loss": 10.5195, + "step": 13104 + }, + { + "epoch": 0.12405221457578024, + "grad_norm": 201.60533142089844, + "learning_rate": 1.9539697551709257e-06, + "loss": 17.7812, + "step": 13105 + }, + { + "epoch": 0.1240616805974953, + "grad_norm": 311.5675964355469, + "learning_rate": 1.9539605601941784e-06, + "loss": 21.707, + "step": 13106 + }, + { + "epoch": 0.12407114661921034, + "grad_norm": 409.9303283691406, + "learning_rate": 1.95395136432077e-06, + "loss": 41.9062, + "step": 13107 + }, + { + "epoch": 0.1240806126409254, + "grad_norm": 216.3126678466797, + "learning_rate": 1.9539421675507097e-06, + "loss": 20.8906, + "step": 13108 + }, + { + "epoch": 0.12409007866264045, + "grad_norm": 442.1349182128906, + "learning_rate": 1.9539329698840056e-06, + "loss": 42.7812, + "step": 13109 + }, + { + "epoch": 0.12409954468435551, + "grad_norm": 361.0172119140625, + "learning_rate": 1.9539237713206668e-06, + "loss": 22.7969, + "step": 13110 + }, + { + "epoch": 0.12410901070607055, + "grad_norm": 233.58602905273438, + "learning_rate": 1.9539145718607016e-06, + "loss": 16.1289, + "step": 13111 + }, + { + "epoch": 0.12411847672778561, + "grad_norm": 2.850527763366699, + "learning_rate": 1.9539053715041186e-06, + "loss": 0.8804, + "step": 13112 + }, + { + "epoch": 0.12412794274950066, + "grad_norm": 601.7166137695312, + "learning_rate": 1.953896170250927e-06, + "loss": 57.2031, + "step": 13113 + }, + { + "epoch": 0.12413740877121572, + "grad_norm": 215.30331420898438, + "learning_rate": 1.9538869681011347e-06, + "loss": 27.6562, + "step": 13114 + }, + { + "epoch": 0.12414687479293078, + "grad_norm": 697.906982421875, + "learning_rate": 1.9538777650547507e-06, + "loss": 33.8203, + "step": 13115 + }, + { + "epoch": 0.12415634081464583, + "grad_norm": 598.7413940429688, + "learning_rate": 1.9538685611117836e-06, + "loss": 36.5156, + "step": 13116 + }, + { + "epoch": 0.12416580683636089, + "grad_norm": 382.5986022949219, + "learning_rate": 1.9538593562722423e-06, + "loss": 40.9219, + "step": 13117 + }, + { + "epoch": 0.12417527285807593, + "grad_norm": 298.8688659667969, + "learning_rate": 1.9538501505361353e-06, + "loss": 18.2734, + "step": 13118 + }, + { + "epoch": 0.12418473887979099, + "grad_norm": 326.5616455078125, + "learning_rate": 1.953840943903471e-06, + "loss": 25.9297, + "step": 13119 + }, + { + "epoch": 0.12419420490150604, + "grad_norm": 251.8900909423828, + "learning_rate": 1.9538317363742583e-06, + "loss": 17.1641, + "step": 13120 + }, + { + "epoch": 0.1242036709232211, + "grad_norm": 3.8504414558410645, + "learning_rate": 1.953822527948506e-06, + "loss": 1.0137, + "step": 13121 + }, + { + "epoch": 0.12421313694493614, + "grad_norm": 357.004638671875, + "learning_rate": 1.9538133186262225e-06, + "loss": 25.8359, + "step": 13122 + }, + { + "epoch": 0.1242226029666512, + "grad_norm": 462.01605224609375, + "learning_rate": 1.9538041084074164e-06, + "loss": 36.2344, + "step": 13123 + }, + { + "epoch": 0.12423206898836626, + "grad_norm": 196.34217834472656, + "learning_rate": 1.9537948972920966e-06, + "loss": 10.0977, + "step": 13124 + }, + { + "epoch": 0.12424153501008131, + "grad_norm": 239.2568359375, + "learning_rate": 1.953785685280272e-06, + "loss": 23.75, + "step": 13125 + }, + { + "epoch": 0.12425100103179637, + "grad_norm": 299.870849609375, + "learning_rate": 1.9537764723719504e-06, + "loss": 22.9219, + "step": 13126 + }, + { + "epoch": 0.12426046705351142, + "grad_norm": 783.8718872070312, + "learning_rate": 1.953767258567141e-06, + "loss": 43.1406, + "step": 13127 + }, + { + "epoch": 0.12426993307522648, + "grad_norm": 382.0538635253906, + "learning_rate": 1.9537580438658525e-06, + "loss": 22.5, + "step": 13128 + }, + { + "epoch": 0.12427939909694152, + "grad_norm": 3.5628857612609863, + "learning_rate": 1.9537488282680937e-06, + "loss": 0.9429, + "step": 13129 + }, + { + "epoch": 0.12428886511865658, + "grad_norm": 183.29832458496094, + "learning_rate": 1.953739611773873e-06, + "loss": 19.0391, + "step": 13130 + }, + { + "epoch": 0.12429833114037164, + "grad_norm": 1296.6287841796875, + "learning_rate": 1.9537303943831985e-06, + "loss": 8.3789, + "step": 13131 + }, + { + "epoch": 0.12430779716208669, + "grad_norm": 263.0072021484375, + "learning_rate": 1.95372117609608e-06, + "loss": 31.4062, + "step": 13132 + }, + { + "epoch": 0.12431726318380175, + "grad_norm": 284.3736572265625, + "learning_rate": 1.9537119569125257e-06, + "loss": 20.7812, + "step": 13133 + }, + { + "epoch": 0.1243267292055168, + "grad_norm": 531.0763549804688, + "learning_rate": 1.9537027368325442e-06, + "loss": 35.5703, + "step": 13134 + }, + { + "epoch": 0.12433619522723185, + "grad_norm": 234.2234649658203, + "learning_rate": 1.953693515856144e-06, + "loss": 11.1992, + "step": 13135 + }, + { + "epoch": 0.1243456612489469, + "grad_norm": 180.91871643066406, + "learning_rate": 1.953684293983334e-06, + "loss": 20.0, + "step": 13136 + }, + { + "epoch": 0.12435512727066196, + "grad_norm": 536.62548828125, + "learning_rate": 1.9536750712141226e-06, + "loss": 59.8906, + "step": 13137 + }, + { + "epoch": 0.12436459329237701, + "grad_norm": 221.06919860839844, + "learning_rate": 1.953665847548519e-06, + "loss": 16.7812, + "step": 13138 + }, + { + "epoch": 0.12437405931409207, + "grad_norm": 510.7951354980469, + "learning_rate": 1.9536566229865312e-06, + "loss": 64.0078, + "step": 13139 + }, + { + "epoch": 0.12438352533580713, + "grad_norm": 480.8776550292969, + "learning_rate": 1.9536473975281688e-06, + "loss": 24.6367, + "step": 13140 + }, + { + "epoch": 0.12439299135752217, + "grad_norm": 452.4254150390625, + "learning_rate": 1.9536381711734396e-06, + "loss": 14.4883, + "step": 13141 + }, + { + "epoch": 0.12440245737923723, + "grad_norm": 203.0320587158203, + "learning_rate": 1.9536289439223523e-06, + "loss": 18.6016, + "step": 13142 + }, + { + "epoch": 0.12441192340095228, + "grad_norm": 231.77450561523438, + "learning_rate": 1.953619715774916e-06, + "loss": 14.3828, + "step": 13143 + }, + { + "epoch": 0.12442138942266734, + "grad_norm": 451.0575866699219, + "learning_rate": 1.9536104867311393e-06, + "loss": 31.1094, + "step": 13144 + }, + { + "epoch": 0.12443085544438238, + "grad_norm": 557.9024047851562, + "learning_rate": 1.9536012567910306e-06, + "loss": 22.5625, + "step": 13145 + }, + { + "epoch": 0.12444032146609744, + "grad_norm": 415.41339111328125, + "learning_rate": 1.953592025954599e-06, + "loss": 38.4297, + "step": 13146 + }, + { + "epoch": 0.12444978748781249, + "grad_norm": 718.8209228515625, + "learning_rate": 1.9535827942218526e-06, + "loss": 28.0703, + "step": 13147 + }, + { + "epoch": 0.12445925350952755, + "grad_norm": 453.6223449707031, + "learning_rate": 1.9535735615928006e-06, + "loss": 34.3281, + "step": 13148 + }, + { + "epoch": 0.12446871953124261, + "grad_norm": 822.9364013671875, + "learning_rate": 1.9535643280674514e-06, + "loss": 34.8516, + "step": 13149 + }, + { + "epoch": 0.12447818555295766, + "grad_norm": 2.9270713329315186, + "learning_rate": 1.9535550936458138e-06, + "loss": 0.8555, + "step": 13150 + }, + { + "epoch": 0.12448765157467272, + "grad_norm": 448.3418884277344, + "learning_rate": 1.9535458583278963e-06, + "loss": 34.4141, + "step": 13151 + }, + { + "epoch": 0.12449711759638776, + "grad_norm": 310.74713134765625, + "learning_rate": 1.9535366221137083e-06, + "loss": 17.8594, + "step": 13152 + }, + { + "epoch": 0.12450658361810282, + "grad_norm": 267.63763427734375, + "learning_rate": 1.9535273850032574e-06, + "loss": 19.8516, + "step": 13153 + }, + { + "epoch": 0.12451604963981787, + "grad_norm": 564.1155395507812, + "learning_rate": 1.953518146996553e-06, + "loss": 61.4531, + "step": 13154 + }, + { + "epoch": 0.12452551566153293, + "grad_norm": 401.83380126953125, + "learning_rate": 1.9535089080936032e-06, + "loss": 20.9297, + "step": 13155 + }, + { + "epoch": 0.12453498168324798, + "grad_norm": 3.130861759185791, + "learning_rate": 1.9534996682944173e-06, + "loss": 0.9634, + "step": 13156 + }, + { + "epoch": 0.12454444770496303, + "grad_norm": 347.2401428222656, + "learning_rate": 1.953490427599004e-06, + "loss": 21.5312, + "step": 13157 + }, + { + "epoch": 0.1245539137266781, + "grad_norm": 356.8335876464844, + "learning_rate": 1.953481186007371e-06, + "loss": 33.8594, + "step": 13158 + }, + { + "epoch": 0.12456337974839314, + "grad_norm": 355.0369567871094, + "learning_rate": 1.9534719435195285e-06, + "loss": 34.1445, + "step": 13159 + }, + { + "epoch": 0.1245728457701082, + "grad_norm": 2.941953182220459, + "learning_rate": 1.953462700135484e-06, + "loss": 0.7041, + "step": 13160 + }, + { + "epoch": 0.12458231179182325, + "grad_norm": 310.69134521484375, + "learning_rate": 1.953453455855247e-06, + "loss": 29.4062, + "step": 13161 + }, + { + "epoch": 0.12459177781353831, + "grad_norm": 277.0561218261719, + "learning_rate": 1.953444210678825e-06, + "loss": 22.1094, + "step": 13162 + }, + { + "epoch": 0.12460124383525335, + "grad_norm": 381.04534912109375, + "learning_rate": 1.9534349646062283e-06, + "loss": 18.7734, + "step": 13163 + }, + { + "epoch": 0.12461070985696841, + "grad_norm": 519.9454345703125, + "learning_rate": 1.953425717637464e-06, + "loss": 42.3438, + "step": 13164 + }, + { + "epoch": 0.12462017587868346, + "grad_norm": 311.6893615722656, + "learning_rate": 1.9534164697725425e-06, + "loss": 23.7617, + "step": 13165 + }, + { + "epoch": 0.12462964190039852, + "grad_norm": 199.52073669433594, + "learning_rate": 1.9534072210114706e-06, + "loss": 23.375, + "step": 13166 + }, + { + "epoch": 0.12463910792211358, + "grad_norm": 219.6476287841797, + "learning_rate": 1.9533979713542586e-06, + "loss": 15.0078, + "step": 13167 + }, + { + "epoch": 0.12464857394382862, + "grad_norm": 565.2183227539062, + "learning_rate": 1.9533887208009145e-06, + "loss": 39.8125, + "step": 13168 + }, + { + "epoch": 0.12465803996554368, + "grad_norm": 166.05911254882812, + "learning_rate": 1.9533794693514464e-06, + "loss": 13.543, + "step": 13169 + }, + { + "epoch": 0.12466750598725873, + "grad_norm": 427.6209411621094, + "learning_rate": 1.953370217005864e-06, + "loss": 24.4688, + "step": 13170 + }, + { + "epoch": 0.12467697200897379, + "grad_norm": 434.6620788574219, + "learning_rate": 1.9533609637641755e-06, + "loss": 22.6641, + "step": 13171 + }, + { + "epoch": 0.12468643803068884, + "grad_norm": 186.1191864013672, + "learning_rate": 1.95335170962639e-06, + "loss": 23.2734, + "step": 13172 + }, + { + "epoch": 0.1246959040524039, + "grad_norm": 571.7578125, + "learning_rate": 1.9533424545925157e-06, + "loss": 35.25, + "step": 13173 + }, + { + "epoch": 0.12470537007411896, + "grad_norm": 995.1732788085938, + "learning_rate": 1.953333198662562e-06, + "loss": 50.3125, + "step": 13174 + }, + { + "epoch": 0.124714836095834, + "grad_norm": 341.9959411621094, + "learning_rate": 1.9533239418365364e-06, + "loss": 22.125, + "step": 13175 + }, + { + "epoch": 0.12472430211754906, + "grad_norm": 452.4346923828125, + "learning_rate": 1.9533146841144485e-06, + "loss": 25.2656, + "step": 13176 + }, + { + "epoch": 0.12473376813926411, + "grad_norm": 457.28985595703125, + "learning_rate": 1.9533054254963068e-06, + "loss": 44.7188, + "step": 13177 + }, + { + "epoch": 0.12474323416097917, + "grad_norm": 354.29058837890625, + "learning_rate": 1.9532961659821203e-06, + "loss": 42.75, + "step": 13178 + }, + { + "epoch": 0.12475270018269422, + "grad_norm": 2.9133503437042236, + "learning_rate": 1.9532869055718974e-06, + "loss": 0.9468, + "step": 13179 + }, + { + "epoch": 0.12476216620440927, + "grad_norm": 1407.248779296875, + "learning_rate": 1.9532776442656463e-06, + "loss": 17.1562, + "step": 13180 + }, + { + "epoch": 0.12477163222612432, + "grad_norm": 439.2509460449219, + "learning_rate": 1.9532683820633766e-06, + "loss": 32.8281, + "step": 13181 + }, + { + "epoch": 0.12478109824783938, + "grad_norm": 357.13507080078125, + "learning_rate": 1.9532591189650968e-06, + "loss": 33.7109, + "step": 13182 + }, + { + "epoch": 0.12479056426955444, + "grad_norm": 430.0354309082031, + "learning_rate": 1.9532498549708154e-06, + "loss": 22.3984, + "step": 13183 + }, + { + "epoch": 0.12480003029126949, + "grad_norm": 861.280029296875, + "learning_rate": 1.953240590080541e-06, + "loss": 22.7578, + "step": 13184 + }, + { + "epoch": 0.12480949631298455, + "grad_norm": 374.7713928222656, + "learning_rate": 1.953231324294282e-06, + "loss": 21.1172, + "step": 13185 + }, + { + "epoch": 0.12481896233469959, + "grad_norm": 262.70770263671875, + "learning_rate": 1.9532220576120483e-06, + "loss": 8.6133, + "step": 13186 + }, + { + "epoch": 0.12482842835641465, + "grad_norm": 555.9517822265625, + "learning_rate": 1.9532127900338477e-06, + "loss": 16.627, + "step": 13187 + }, + { + "epoch": 0.1248378943781297, + "grad_norm": 593.6358032226562, + "learning_rate": 1.953203521559689e-06, + "loss": 17.0703, + "step": 13188 + }, + { + "epoch": 0.12484736039984476, + "grad_norm": 560.9789428710938, + "learning_rate": 1.953194252189581e-06, + "loss": 46.0, + "step": 13189 + }, + { + "epoch": 0.1248568264215598, + "grad_norm": 309.37506103515625, + "learning_rate": 1.9531849819235326e-06, + "loss": 31.2812, + "step": 13190 + }, + { + "epoch": 0.12486629244327486, + "grad_norm": 297.4377136230469, + "learning_rate": 1.953175710761552e-06, + "loss": 19.9766, + "step": 13191 + }, + { + "epoch": 0.12487575846498992, + "grad_norm": 1354.6513671875, + "learning_rate": 1.9531664387036483e-06, + "loss": 33.0586, + "step": 13192 + }, + { + "epoch": 0.12488522448670497, + "grad_norm": 239.4543914794922, + "learning_rate": 1.95315716574983e-06, + "loss": 18.7578, + "step": 13193 + }, + { + "epoch": 0.12489469050842003, + "grad_norm": 285.770751953125, + "learning_rate": 1.9531478919001065e-06, + "loss": 17.5312, + "step": 13194 + }, + { + "epoch": 0.12490415653013508, + "grad_norm": 322.55169677734375, + "learning_rate": 1.953138617154486e-06, + "loss": 25.4922, + "step": 13195 + }, + { + "epoch": 0.12491362255185014, + "grad_norm": 275.6960754394531, + "learning_rate": 1.9531293415129767e-06, + "loss": 16.9375, + "step": 13196 + }, + { + "epoch": 0.12492308857356518, + "grad_norm": 672.6968383789062, + "learning_rate": 1.9531200649755875e-06, + "loss": 26.8984, + "step": 13197 + }, + { + "epoch": 0.12493255459528024, + "grad_norm": 446.4208984375, + "learning_rate": 1.9531107875423283e-06, + "loss": 28.0547, + "step": 13198 + }, + { + "epoch": 0.12494202061699529, + "grad_norm": 480.544189453125, + "learning_rate": 1.9531015092132065e-06, + "loss": 31.6172, + "step": 13199 + }, + { + "epoch": 0.12495148663871035, + "grad_norm": 209.5228271484375, + "learning_rate": 1.9530922299882313e-06, + "loss": 16.125, + "step": 13200 + }, + { + "epoch": 0.12496095266042541, + "grad_norm": 3.470914125442505, + "learning_rate": 1.9530829498674116e-06, + "loss": 1.1973, + "step": 13201 + }, + { + "epoch": 0.12497041868214046, + "grad_norm": 3.2747113704681396, + "learning_rate": 1.953073668850756e-06, + "loss": 0.8635, + "step": 13202 + }, + { + "epoch": 0.12497988470385551, + "grad_norm": 387.27508544921875, + "learning_rate": 1.953064386938273e-06, + "loss": 8.3945, + "step": 13203 + }, + { + "epoch": 0.12498935072557056, + "grad_norm": 178.3929901123047, + "learning_rate": 1.9530551041299715e-06, + "loss": 15.1719, + "step": 13204 + }, + { + "epoch": 0.12499881674728562, + "grad_norm": 293.6584167480469, + "learning_rate": 1.95304582042586e-06, + "loss": 20.2578, + "step": 13205 + }, + { + "epoch": 0.12500828276900067, + "grad_norm": 564.47216796875, + "learning_rate": 1.9530365358259476e-06, + "loss": 39.2656, + "step": 13206 + }, + { + "epoch": 0.12501774879071573, + "grad_norm": 195.0304412841797, + "learning_rate": 1.953027250330243e-06, + "loss": 23.3359, + "step": 13207 + }, + { + "epoch": 0.1250272148124308, + "grad_norm": 310.2759094238281, + "learning_rate": 1.9530179639387546e-06, + "loss": 21.0391, + "step": 13208 + }, + { + "epoch": 0.12503668083414585, + "grad_norm": 450.90380859375, + "learning_rate": 1.9530086766514915e-06, + "loss": 38.5625, + "step": 13209 + }, + { + "epoch": 0.12504614685586088, + "grad_norm": 363.09210205078125, + "learning_rate": 1.952999388468462e-06, + "loss": 25.8281, + "step": 13210 + }, + { + "epoch": 0.12505561287757594, + "grad_norm": 334.7377014160156, + "learning_rate": 1.9529900993896753e-06, + "loss": 48.3594, + "step": 13211 + }, + { + "epoch": 0.125065078899291, + "grad_norm": 171.18077087402344, + "learning_rate": 1.9529808094151397e-06, + "loss": 19.7852, + "step": 13212 + }, + { + "epoch": 0.12507454492100606, + "grad_norm": 2.8484442234039307, + "learning_rate": 1.952971518544864e-06, + "loss": 0.9966, + "step": 13213 + }, + { + "epoch": 0.1250840109427211, + "grad_norm": 186.64215087890625, + "learning_rate": 1.9529622267788576e-06, + "loss": 17.0078, + "step": 13214 + }, + { + "epoch": 0.12509347696443615, + "grad_norm": 245.75340270996094, + "learning_rate": 1.952952934117128e-06, + "loss": 18.3281, + "step": 13215 + }, + { + "epoch": 0.1251029429861512, + "grad_norm": 367.38629150390625, + "learning_rate": 1.952943640559685e-06, + "loss": 42.8594, + "step": 13216 + }, + { + "epoch": 0.12511240900786627, + "grad_norm": 400.872802734375, + "learning_rate": 1.952934346106537e-06, + "loss": 38.0312, + "step": 13217 + }, + { + "epoch": 0.12512187502958133, + "grad_norm": 405.500244140625, + "learning_rate": 1.952925050757693e-06, + "loss": 32.3594, + "step": 13218 + }, + { + "epoch": 0.12513134105129636, + "grad_norm": 644.8778686523438, + "learning_rate": 1.952915754513161e-06, + "loss": 24.1016, + "step": 13219 + }, + { + "epoch": 0.12514080707301142, + "grad_norm": 983.4671630859375, + "learning_rate": 1.9529064573729503e-06, + "loss": 66.1719, + "step": 13220 + }, + { + "epoch": 0.12515027309472648, + "grad_norm": 1231.219482421875, + "learning_rate": 1.95289715933707e-06, + "loss": 80.8125, + "step": 13221 + }, + { + "epoch": 0.12515973911644154, + "grad_norm": 287.96124267578125, + "learning_rate": 1.9528878604055276e-06, + "loss": 18.0547, + "step": 13222 + }, + { + "epoch": 0.12516920513815658, + "grad_norm": 180.53236389160156, + "learning_rate": 1.9528785605783326e-06, + "loss": 19.2891, + "step": 13223 + }, + { + "epoch": 0.12517867115987164, + "grad_norm": 301.969970703125, + "learning_rate": 1.952869259855494e-06, + "loss": 21.9688, + "step": 13224 + }, + { + "epoch": 0.1251881371815867, + "grad_norm": 835.1847534179688, + "learning_rate": 1.9528599582370208e-06, + "loss": 27.1445, + "step": 13225 + }, + { + "epoch": 0.12519760320330175, + "grad_norm": 730.84716796875, + "learning_rate": 1.9528506557229204e-06, + "loss": 16.6406, + "step": 13226 + }, + { + "epoch": 0.12520706922501681, + "grad_norm": 849.4359130859375, + "learning_rate": 1.952841352313203e-06, + "loss": 57.7188, + "step": 13227 + }, + { + "epoch": 0.12521653524673185, + "grad_norm": 181.75157165527344, + "learning_rate": 1.952832048007876e-06, + "loss": 17.2031, + "step": 13228 + }, + { + "epoch": 0.1252260012684469, + "grad_norm": 495.04486083984375, + "learning_rate": 1.9528227428069495e-06, + "loss": 35.5781, + "step": 13229 + }, + { + "epoch": 0.12523546729016197, + "grad_norm": 220.57504272460938, + "learning_rate": 1.9528134367104317e-06, + "loss": 26.3711, + "step": 13230 + }, + { + "epoch": 0.12524493331187703, + "grad_norm": 3.7046878337860107, + "learning_rate": 1.9528041297183308e-06, + "loss": 1.0625, + "step": 13231 + }, + { + "epoch": 0.12525439933359206, + "grad_norm": 862.0830688476562, + "learning_rate": 1.9527948218306563e-06, + "loss": 61.2188, + "step": 13232 + }, + { + "epoch": 0.12526386535530712, + "grad_norm": 209.0033416748047, + "learning_rate": 1.9527855130474165e-06, + "loss": 15.0312, + "step": 13233 + }, + { + "epoch": 0.12527333137702218, + "grad_norm": 167.66302490234375, + "learning_rate": 1.95277620336862e-06, + "loss": 18.0781, + "step": 13234 + }, + { + "epoch": 0.12528279739873724, + "grad_norm": 303.3922424316406, + "learning_rate": 1.9527668927942763e-06, + "loss": 17.6016, + "step": 13235 + }, + { + "epoch": 0.1252922634204523, + "grad_norm": 443.21124267578125, + "learning_rate": 1.9527575813243936e-06, + "loss": 19.0, + "step": 13236 + }, + { + "epoch": 0.12530172944216733, + "grad_norm": 228.22808837890625, + "learning_rate": 1.9527482689589808e-06, + "loss": 17.7891, + "step": 13237 + }, + { + "epoch": 0.1253111954638824, + "grad_norm": 472.8309020996094, + "learning_rate": 1.9527389556980465e-06, + "loss": 45.6562, + "step": 13238 + }, + { + "epoch": 0.12532066148559745, + "grad_norm": 204.62933349609375, + "learning_rate": 1.9527296415415994e-06, + "loss": 29.7969, + "step": 13239 + }, + { + "epoch": 0.1253301275073125, + "grad_norm": 3.2004945278167725, + "learning_rate": 1.9527203264896488e-06, + "loss": 1.0107, + "step": 13240 + }, + { + "epoch": 0.12533959352902754, + "grad_norm": 387.3117370605469, + "learning_rate": 1.952711010542203e-06, + "loss": 24.5625, + "step": 13241 + }, + { + "epoch": 0.1253490595507426, + "grad_norm": 460.4173583984375, + "learning_rate": 1.9527016936992707e-06, + "loss": 33.1953, + "step": 13242 + }, + { + "epoch": 0.12535852557245766, + "grad_norm": 992.9196166992188, + "learning_rate": 1.9526923759608606e-06, + "loss": 45.3594, + "step": 13243 + }, + { + "epoch": 0.12536799159417272, + "grad_norm": 271.3359069824219, + "learning_rate": 1.952683057326982e-06, + "loss": 16.0352, + "step": 13244 + }, + { + "epoch": 0.12537745761588778, + "grad_norm": 494.3929443359375, + "learning_rate": 1.952673737797643e-06, + "loss": 45.0938, + "step": 13245 + }, + { + "epoch": 0.12538692363760282, + "grad_norm": 323.263427734375, + "learning_rate": 1.952664417372853e-06, + "loss": 15.6172, + "step": 13246 + }, + { + "epoch": 0.12539638965931788, + "grad_norm": 562.8699340820312, + "learning_rate": 1.95265509605262e-06, + "loss": 46.75, + "step": 13247 + }, + { + "epoch": 0.12540585568103294, + "grad_norm": 631.8035278320312, + "learning_rate": 1.9526457738369533e-06, + "loss": 56.6172, + "step": 13248 + }, + { + "epoch": 0.125415321702748, + "grad_norm": 700.7542114257812, + "learning_rate": 1.952636450725862e-06, + "loss": 32.7344, + "step": 13249 + }, + { + "epoch": 0.12542478772446303, + "grad_norm": 367.641845703125, + "learning_rate": 1.952627126719354e-06, + "loss": 41.625, + "step": 13250 + }, + { + "epoch": 0.1254342537461781, + "grad_norm": 568.1749267578125, + "learning_rate": 1.9526178018174387e-06, + "loss": 14.9805, + "step": 13251 + }, + { + "epoch": 0.12544371976789315, + "grad_norm": 556.494873046875, + "learning_rate": 1.9526084760201242e-06, + "loss": 34.2656, + "step": 13252 + }, + { + "epoch": 0.1254531857896082, + "grad_norm": 3.4260456562042236, + "learning_rate": 1.9525991493274197e-06, + "loss": 1.0464, + "step": 13253 + }, + { + "epoch": 0.12546265181132327, + "grad_norm": 725.4529418945312, + "learning_rate": 1.9525898217393344e-06, + "loss": 17.2891, + "step": 13254 + }, + { + "epoch": 0.1254721178330383, + "grad_norm": 737.802978515625, + "learning_rate": 1.952580493255877e-06, + "loss": 44.0938, + "step": 13255 + }, + { + "epoch": 0.12548158385475336, + "grad_norm": 648.1580810546875, + "learning_rate": 1.952571163877055e-06, + "loss": 42.9531, + "step": 13256 + }, + { + "epoch": 0.12549104987646842, + "grad_norm": 272.7605895996094, + "learning_rate": 1.9525618336028783e-06, + "loss": 22.125, + "step": 13257 + }, + { + "epoch": 0.12550051589818348, + "grad_norm": 288.2515869140625, + "learning_rate": 1.9525525024333556e-06, + "loss": 15.4375, + "step": 13258 + }, + { + "epoch": 0.1255099819198985, + "grad_norm": 3.5435352325439453, + "learning_rate": 1.952543170368496e-06, + "loss": 0.9263, + "step": 13259 + }, + { + "epoch": 0.12551944794161357, + "grad_norm": 252.89366149902344, + "learning_rate": 1.9525338374083073e-06, + "loss": 17.5938, + "step": 13260 + }, + { + "epoch": 0.12552891396332863, + "grad_norm": 605.7354736328125, + "learning_rate": 1.9525245035527983e-06, + "loss": 15.3242, + "step": 13261 + }, + { + "epoch": 0.1255383799850437, + "grad_norm": 565.1336669921875, + "learning_rate": 1.952515168801979e-06, + "loss": 25.9453, + "step": 13262 + }, + { + "epoch": 0.12554784600675875, + "grad_norm": 297.6282043457031, + "learning_rate": 1.952505833155857e-06, + "loss": 18.1094, + "step": 13263 + }, + { + "epoch": 0.12555731202847378, + "grad_norm": 508.16650390625, + "learning_rate": 1.952496496614442e-06, + "loss": 19.3906, + "step": 13264 + }, + { + "epoch": 0.12556677805018884, + "grad_norm": 359.8111877441406, + "learning_rate": 1.9524871591777414e-06, + "loss": 21.6641, + "step": 13265 + }, + { + "epoch": 0.1255762440719039, + "grad_norm": 480.2612609863281, + "learning_rate": 1.9524778208457653e-06, + "loss": 20.4062, + "step": 13266 + }, + { + "epoch": 0.12558571009361896, + "grad_norm": 567.8670043945312, + "learning_rate": 1.952468481618522e-06, + "loss": 32.5508, + "step": 13267 + }, + { + "epoch": 0.125595176115334, + "grad_norm": 517.4393310546875, + "learning_rate": 1.9524591414960202e-06, + "loss": 29.75, + "step": 13268 + }, + { + "epoch": 0.12560464213704906, + "grad_norm": 363.524658203125, + "learning_rate": 1.952449800478269e-06, + "loss": 19.7227, + "step": 13269 + }, + { + "epoch": 0.12561410815876412, + "grad_norm": 568.1856689453125, + "learning_rate": 1.952440458565277e-06, + "loss": 18.8359, + "step": 13270 + }, + { + "epoch": 0.12562357418047918, + "grad_norm": 223.76426696777344, + "learning_rate": 1.9524311157570527e-06, + "loss": 10.7188, + "step": 13271 + }, + { + "epoch": 0.12563304020219423, + "grad_norm": 904.0764770507812, + "learning_rate": 1.9524217720536053e-06, + "loss": 47.5781, + "step": 13272 + }, + { + "epoch": 0.12564250622390927, + "grad_norm": 217.19467163085938, + "learning_rate": 1.952412427454943e-06, + "loss": 13.7695, + "step": 13273 + }, + { + "epoch": 0.12565197224562433, + "grad_norm": 266.0425109863281, + "learning_rate": 1.952403081961075e-06, + "loss": 30.375, + "step": 13274 + }, + { + "epoch": 0.1256614382673394, + "grad_norm": 570.1168212890625, + "learning_rate": 1.9523937355720106e-06, + "loss": 18.4844, + "step": 13275 + }, + { + "epoch": 0.12567090428905445, + "grad_norm": 342.38433837890625, + "learning_rate": 1.9523843882877577e-06, + "loss": 41.4844, + "step": 13276 + }, + { + "epoch": 0.12568037031076948, + "grad_norm": 733.8587036132812, + "learning_rate": 1.9523750401083256e-06, + "loss": 49.7812, + "step": 13277 + }, + { + "epoch": 0.12568983633248454, + "grad_norm": 375.4632873535156, + "learning_rate": 1.9523656910337224e-06, + "loss": 17.6719, + "step": 13278 + }, + { + "epoch": 0.1256993023541996, + "grad_norm": 460.0033874511719, + "learning_rate": 1.952356341063958e-06, + "loss": 62.5, + "step": 13279 + }, + { + "epoch": 0.12570876837591466, + "grad_norm": 457.8906555175781, + "learning_rate": 1.9523469901990404e-06, + "loss": 29.0938, + "step": 13280 + }, + { + "epoch": 0.12571823439762972, + "grad_norm": 241.46829223632812, + "learning_rate": 1.9523376384389785e-06, + "loss": 21.9375, + "step": 13281 + }, + { + "epoch": 0.12572770041934475, + "grad_norm": 218.89822387695312, + "learning_rate": 1.952328285783781e-06, + "loss": 10.332, + "step": 13282 + }, + { + "epoch": 0.1257371664410598, + "grad_norm": 486.468017578125, + "learning_rate": 1.952318932233457e-06, + "loss": 41.6094, + "step": 13283 + }, + { + "epoch": 0.12574663246277487, + "grad_norm": 161.72080993652344, + "learning_rate": 1.9523095777880158e-06, + "loss": 19.9844, + "step": 13284 + }, + { + "epoch": 0.12575609848448993, + "grad_norm": 649.1815795898438, + "learning_rate": 1.9523002224474647e-06, + "loss": 24.2734, + "step": 13285 + }, + { + "epoch": 0.125765564506205, + "grad_norm": 273.2587585449219, + "learning_rate": 1.9522908662118138e-06, + "loss": 27.8281, + "step": 13286 + }, + { + "epoch": 0.12577503052792002, + "grad_norm": 292.28741455078125, + "learning_rate": 1.9522815090810713e-06, + "loss": 10.3672, + "step": 13287 + }, + { + "epoch": 0.12578449654963508, + "grad_norm": 217.14840698242188, + "learning_rate": 1.952272151055246e-06, + "loss": 20.7188, + "step": 13288 + }, + { + "epoch": 0.12579396257135014, + "grad_norm": 441.19140625, + "learning_rate": 1.9522627921343467e-06, + "loss": 36.6406, + "step": 13289 + }, + { + "epoch": 0.1258034285930652, + "grad_norm": 289.10833740234375, + "learning_rate": 1.9522534323183827e-06, + "loss": 15.8203, + "step": 13290 + }, + { + "epoch": 0.12581289461478024, + "grad_norm": 212.0904541015625, + "learning_rate": 1.9522440716073624e-06, + "loss": 22.5, + "step": 13291 + }, + { + "epoch": 0.1258223606364953, + "grad_norm": 464.1608581542969, + "learning_rate": 1.9522347100012943e-06, + "loss": 35.5625, + "step": 13292 + }, + { + "epoch": 0.12583182665821036, + "grad_norm": 370.44012451171875, + "learning_rate": 1.952225347500188e-06, + "loss": 44.3984, + "step": 13293 + }, + { + "epoch": 0.12584129267992542, + "grad_norm": 233.53030395507812, + "learning_rate": 1.9522159841040513e-06, + "loss": 21.4609, + "step": 13294 + }, + { + "epoch": 0.12585075870164047, + "grad_norm": 362.10498046875, + "learning_rate": 1.952206619812894e-06, + "loss": 32.8359, + "step": 13295 + }, + { + "epoch": 0.1258602247233555, + "grad_norm": 332.592529296875, + "learning_rate": 1.952197254626724e-06, + "loss": 35.1016, + "step": 13296 + }, + { + "epoch": 0.12586969074507057, + "grad_norm": 468.2342834472656, + "learning_rate": 1.952187888545551e-06, + "loss": 27.0781, + "step": 13297 + }, + { + "epoch": 0.12587915676678563, + "grad_norm": 3.282233715057373, + "learning_rate": 1.9521785215693828e-06, + "loss": 0.9531, + "step": 13298 + }, + { + "epoch": 0.1258886227885007, + "grad_norm": 875.9244384765625, + "learning_rate": 1.9521691536982287e-06, + "loss": 57.5312, + "step": 13299 + }, + { + "epoch": 0.12589808881021572, + "grad_norm": 569.4166259765625, + "learning_rate": 1.952159784932098e-06, + "loss": 41.2188, + "step": 13300 + }, + { + "epoch": 0.12590755483193078, + "grad_norm": 251.54425048828125, + "learning_rate": 1.952150415270999e-06, + "loss": 21.4414, + "step": 13301 + }, + { + "epoch": 0.12591702085364584, + "grad_norm": 657.74267578125, + "learning_rate": 1.9521410447149404e-06, + "loss": 38.8906, + "step": 13302 + }, + { + "epoch": 0.1259264868753609, + "grad_norm": 878.3411865234375, + "learning_rate": 1.952131673263931e-06, + "loss": 68.6406, + "step": 13303 + }, + { + "epoch": 0.12593595289707596, + "grad_norm": 3.410975933074951, + "learning_rate": 1.95212230091798e-06, + "loss": 0.9141, + "step": 13304 + }, + { + "epoch": 0.125945418918791, + "grad_norm": 3.7468132972717285, + "learning_rate": 1.952112927677096e-06, + "loss": 1.1133, + "step": 13305 + }, + { + "epoch": 0.12595488494050605, + "grad_norm": 545.3897705078125, + "learning_rate": 1.9521035535412875e-06, + "loss": 8.6055, + "step": 13306 + }, + { + "epoch": 0.1259643509622211, + "grad_norm": 446.0579528808594, + "learning_rate": 1.9520941785105637e-06, + "loss": 37.4609, + "step": 13307 + }, + { + "epoch": 0.12597381698393617, + "grad_norm": 409.2267761230469, + "learning_rate": 1.952084802584934e-06, + "loss": 57.4062, + "step": 13308 + }, + { + "epoch": 0.1259832830056512, + "grad_norm": 339.2450866699219, + "learning_rate": 1.9520754257644058e-06, + "loss": 24.375, + "step": 13309 + }, + { + "epoch": 0.12599274902736626, + "grad_norm": 424.84552001953125, + "learning_rate": 1.9520660480489886e-06, + "loss": 26.5859, + "step": 13310 + }, + { + "epoch": 0.12600221504908132, + "grad_norm": 359.7779846191406, + "learning_rate": 1.9520566694386912e-06, + "loss": 37.4844, + "step": 13311 + }, + { + "epoch": 0.12601168107079638, + "grad_norm": 896.7194213867188, + "learning_rate": 1.9520472899335227e-06, + "loss": 45.2695, + "step": 13312 + }, + { + "epoch": 0.12602114709251144, + "grad_norm": 422.99725341796875, + "learning_rate": 1.9520379095334917e-06, + "loss": 20.2734, + "step": 13313 + }, + { + "epoch": 0.12603061311422648, + "grad_norm": 305.79986572265625, + "learning_rate": 1.952028528238607e-06, + "loss": 24.4375, + "step": 13314 + }, + { + "epoch": 0.12604007913594154, + "grad_norm": 349.8025207519531, + "learning_rate": 1.952019146048877e-06, + "loss": 22.0312, + "step": 13315 + }, + { + "epoch": 0.1260495451576566, + "grad_norm": 389.9148254394531, + "learning_rate": 1.9520097629643116e-06, + "loss": 16.8125, + "step": 13316 + }, + { + "epoch": 0.12605901117937166, + "grad_norm": 310.0596618652344, + "learning_rate": 1.952000378984919e-06, + "loss": 32.9531, + "step": 13317 + }, + { + "epoch": 0.1260684772010867, + "grad_norm": 200.6229705810547, + "learning_rate": 1.9519909941107075e-06, + "loss": 9.4102, + "step": 13318 + }, + { + "epoch": 0.12607794322280175, + "grad_norm": 878.3117065429688, + "learning_rate": 1.9519816083416866e-06, + "loss": 31.3672, + "step": 13319 + }, + { + "epoch": 0.1260874092445168, + "grad_norm": 449.05242919921875, + "learning_rate": 1.951972221677865e-06, + "loss": 45.8125, + "step": 13320 + }, + { + "epoch": 0.12609687526623187, + "grad_norm": 420.2989196777344, + "learning_rate": 1.951962834119251e-06, + "loss": 53.5, + "step": 13321 + }, + { + "epoch": 0.12610634128794693, + "grad_norm": 262.5774841308594, + "learning_rate": 1.951953445665854e-06, + "loss": 14.0586, + "step": 13322 + }, + { + "epoch": 0.12611580730966196, + "grad_norm": 3.17242693901062, + "learning_rate": 1.951944056317683e-06, + "loss": 0.8926, + "step": 13323 + }, + { + "epoch": 0.12612527333137702, + "grad_norm": 433.0061950683594, + "learning_rate": 1.9519346660747465e-06, + "loss": 49.8281, + "step": 13324 + }, + { + "epoch": 0.12613473935309208, + "grad_norm": 219.4008026123047, + "learning_rate": 1.9519252749370534e-06, + "loss": 29.5391, + "step": 13325 + }, + { + "epoch": 0.12614420537480714, + "grad_norm": 428.17291259765625, + "learning_rate": 1.9519158829046123e-06, + "loss": 47.8984, + "step": 13326 + }, + { + "epoch": 0.12615367139652217, + "grad_norm": 484.4280090332031, + "learning_rate": 1.951906489977432e-06, + "loss": 29.2109, + "step": 13327 + }, + { + "epoch": 0.12616313741823723, + "grad_norm": 340.69158935546875, + "learning_rate": 1.9518970961555216e-06, + "loss": 21.7734, + "step": 13328 + }, + { + "epoch": 0.1261726034399523, + "grad_norm": 585.2223510742188, + "learning_rate": 1.9518877014388898e-06, + "loss": 32.6953, + "step": 13329 + }, + { + "epoch": 0.12618206946166735, + "grad_norm": 372.00482177734375, + "learning_rate": 1.9518783058275455e-06, + "loss": 19.6641, + "step": 13330 + }, + { + "epoch": 0.1261915354833824, + "grad_norm": 3.1769609451293945, + "learning_rate": 1.951868909321498e-06, + "loss": 0.8889, + "step": 13331 + }, + { + "epoch": 0.12620100150509744, + "grad_norm": 386.7494812011719, + "learning_rate": 1.9518595119207554e-06, + "loss": 28.2188, + "step": 13332 + }, + { + "epoch": 0.1262104675268125, + "grad_norm": 774.1046752929688, + "learning_rate": 1.9518501136253264e-06, + "loss": 39.9219, + "step": 13333 + }, + { + "epoch": 0.12621993354852756, + "grad_norm": 352.9222717285156, + "learning_rate": 1.9518407144352205e-06, + "loss": 15.5859, + "step": 13334 + }, + { + "epoch": 0.12622939957024262, + "grad_norm": 294.5710754394531, + "learning_rate": 1.9518313143504463e-06, + "loss": 10.3398, + "step": 13335 + }, + { + "epoch": 0.12623886559195766, + "grad_norm": 348.34466552734375, + "learning_rate": 1.9518219133710126e-06, + "loss": 17.7266, + "step": 13336 + }, + { + "epoch": 0.12624833161367272, + "grad_norm": 326.5757751464844, + "learning_rate": 1.9518125114969282e-06, + "loss": 8.9531, + "step": 13337 + }, + { + "epoch": 0.12625779763538778, + "grad_norm": 534.9884033203125, + "learning_rate": 1.9518031087282018e-06, + "loss": 41.3906, + "step": 13338 + }, + { + "epoch": 0.12626726365710284, + "grad_norm": 651.5226440429688, + "learning_rate": 1.951793705064843e-06, + "loss": 41.9844, + "step": 13339 + }, + { + "epoch": 0.1262767296788179, + "grad_norm": 534.2345581054688, + "learning_rate": 1.9517843005068593e-06, + "loss": 43.5078, + "step": 13340 + }, + { + "epoch": 0.12628619570053293, + "grad_norm": 508.1007995605469, + "learning_rate": 1.9517748950542603e-06, + "loss": 46.8516, + "step": 13341 + }, + { + "epoch": 0.126295661722248, + "grad_norm": 264.42730712890625, + "learning_rate": 1.951765488707055e-06, + "loss": 18.5312, + "step": 13342 + }, + { + "epoch": 0.12630512774396305, + "grad_norm": 266.14459228515625, + "learning_rate": 1.9517560814652522e-06, + "loss": 25.4062, + "step": 13343 + }, + { + "epoch": 0.1263145937656781, + "grad_norm": 487.6197814941406, + "learning_rate": 1.9517466733288606e-06, + "loss": 33.0234, + "step": 13344 + }, + { + "epoch": 0.12632405978739314, + "grad_norm": 3.205129384994507, + "learning_rate": 1.9517372642978887e-06, + "loss": 0.8643, + "step": 13345 + }, + { + "epoch": 0.1263335258091082, + "grad_norm": 382.375, + "learning_rate": 1.9517278543723463e-06, + "loss": 32.9375, + "step": 13346 + }, + { + "epoch": 0.12634299183082326, + "grad_norm": 422.7896423339844, + "learning_rate": 1.9517184435522413e-06, + "loss": 40.8125, + "step": 13347 + }, + { + "epoch": 0.12635245785253832, + "grad_norm": 377.9723815917969, + "learning_rate": 1.9517090318375827e-06, + "loss": 20.6016, + "step": 13348 + }, + { + "epoch": 0.12636192387425338, + "grad_norm": 363.46612548828125, + "learning_rate": 1.95169961922838e-06, + "loss": 31.1562, + "step": 13349 + }, + { + "epoch": 0.1263713898959684, + "grad_norm": 754.9600219726562, + "learning_rate": 1.9516902057246407e-06, + "loss": 25.6367, + "step": 13350 + }, + { + "epoch": 0.12638085591768347, + "grad_norm": 488.4947814941406, + "learning_rate": 1.951680791326375e-06, + "loss": 61.3047, + "step": 13351 + }, + { + "epoch": 0.12639032193939853, + "grad_norm": 4.382075786590576, + "learning_rate": 1.9516713760335914e-06, + "loss": 0.9065, + "step": 13352 + }, + { + "epoch": 0.1263997879611136, + "grad_norm": 207.63162231445312, + "learning_rate": 1.9516619598462986e-06, + "loss": 18.3672, + "step": 13353 + }, + { + "epoch": 0.12640925398282862, + "grad_norm": 1005.9324951171875, + "learning_rate": 1.9516525427645056e-06, + "loss": 44.3281, + "step": 13354 + }, + { + "epoch": 0.12641872000454368, + "grad_norm": 695.0509033203125, + "learning_rate": 1.9516431247882207e-06, + "loss": 31.2812, + "step": 13355 + }, + { + "epoch": 0.12642818602625874, + "grad_norm": 756.3140258789062, + "learning_rate": 1.9516337059174534e-06, + "loss": 17.2148, + "step": 13356 + }, + { + "epoch": 0.1264376520479738, + "grad_norm": 599.3627319335938, + "learning_rate": 1.951624286152212e-06, + "loss": 25.3906, + "step": 13357 + }, + { + "epoch": 0.12644711806968886, + "grad_norm": 282.9970397949219, + "learning_rate": 1.951614865492506e-06, + "loss": 28.375, + "step": 13358 + }, + { + "epoch": 0.1264565840914039, + "grad_norm": 294.9689025878906, + "learning_rate": 1.951605443938344e-06, + "loss": 16.2266, + "step": 13359 + }, + { + "epoch": 0.12646605011311896, + "grad_norm": 197.6996307373047, + "learning_rate": 1.951596021489735e-06, + "loss": 23.625, + "step": 13360 + }, + { + "epoch": 0.12647551613483402, + "grad_norm": 180.89208984375, + "learning_rate": 1.9515865981466874e-06, + "loss": 15.0938, + "step": 13361 + }, + { + "epoch": 0.12648498215654908, + "grad_norm": 328.6690979003906, + "learning_rate": 1.95157717390921e-06, + "loss": 23.9609, + "step": 13362 + }, + { + "epoch": 0.1264944481782641, + "grad_norm": 298.8408203125, + "learning_rate": 1.9515677487773124e-06, + "loss": 39.5469, + "step": 13363 + }, + { + "epoch": 0.12650391419997917, + "grad_norm": 3.003593921661377, + "learning_rate": 1.9515583227510026e-06, + "loss": 0.9004, + "step": 13364 + }, + { + "epoch": 0.12651338022169423, + "grad_norm": 865.0154418945312, + "learning_rate": 1.95154889583029e-06, + "loss": 85.8242, + "step": 13365 + }, + { + "epoch": 0.1265228462434093, + "grad_norm": 292.0408020019531, + "learning_rate": 1.951539468015184e-06, + "loss": 22.1953, + "step": 13366 + }, + { + "epoch": 0.12653231226512435, + "grad_norm": 1150.379150390625, + "learning_rate": 1.9515300393056917e-06, + "loss": 34.1094, + "step": 13367 + }, + { + "epoch": 0.12654177828683938, + "grad_norm": 964.8262329101562, + "learning_rate": 1.951520609701824e-06, + "loss": 16.2305, + "step": 13368 + }, + { + "epoch": 0.12655124430855444, + "grad_norm": 446.04620361328125, + "learning_rate": 1.9515111792035885e-06, + "loss": 46.0312, + "step": 13369 + }, + { + "epoch": 0.1265607103302695, + "grad_norm": 396.1007995605469, + "learning_rate": 1.951501747810994e-06, + "loss": 9.375, + "step": 13370 + }, + { + "epoch": 0.12657017635198456, + "grad_norm": 2.9439613819122314, + "learning_rate": 1.9514923155240504e-06, + "loss": 1.0103, + "step": 13371 + }, + { + "epoch": 0.12657964237369962, + "grad_norm": 430.84954833984375, + "learning_rate": 1.9514828823427652e-06, + "loss": 24.3047, + "step": 13372 + }, + { + "epoch": 0.12658910839541465, + "grad_norm": 1150.6832275390625, + "learning_rate": 1.951473448267149e-06, + "loss": 38.0703, + "step": 13373 + }, + { + "epoch": 0.1265985744171297, + "grad_norm": 295.4119567871094, + "learning_rate": 1.9514640132972087e-06, + "loss": 12.6797, + "step": 13374 + }, + { + "epoch": 0.12660804043884477, + "grad_norm": 202.11000061035156, + "learning_rate": 1.9514545774329547e-06, + "loss": 19.0781, + "step": 13375 + }, + { + "epoch": 0.12661750646055983, + "grad_norm": 333.80206298828125, + "learning_rate": 1.951445140674395e-06, + "loss": 25.8203, + "step": 13376 + }, + { + "epoch": 0.12662697248227486, + "grad_norm": 376.2691650390625, + "learning_rate": 1.951435703021539e-06, + "loss": 18.7891, + "step": 13377 + }, + { + "epoch": 0.12663643850398992, + "grad_norm": 409.1540222167969, + "learning_rate": 1.9514262644743953e-06, + "loss": 36.1367, + "step": 13378 + }, + { + "epoch": 0.12664590452570498, + "grad_norm": 312.83770751953125, + "learning_rate": 1.9514168250329725e-06, + "loss": 23.2578, + "step": 13379 + }, + { + "epoch": 0.12665537054742004, + "grad_norm": 452.0498352050781, + "learning_rate": 1.95140738469728e-06, + "loss": 52.7969, + "step": 13380 + }, + { + "epoch": 0.1266648365691351, + "grad_norm": 323.39434814453125, + "learning_rate": 1.9513979434673266e-06, + "loss": 20.1641, + "step": 13381 + }, + { + "epoch": 0.12667430259085014, + "grad_norm": 237.63470458984375, + "learning_rate": 1.9513885013431205e-06, + "loss": 18.4922, + "step": 13382 + }, + { + "epoch": 0.1266837686125652, + "grad_norm": 263.7359619140625, + "learning_rate": 1.9513790583246712e-06, + "loss": 17.9219, + "step": 13383 + }, + { + "epoch": 0.12669323463428026, + "grad_norm": 337.0799560546875, + "learning_rate": 1.951369614411988e-06, + "loss": 44.1719, + "step": 13384 + }, + { + "epoch": 0.12670270065599532, + "grad_norm": 440.5296936035156, + "learning_rate": 1.951360169605079e-06, + "loss": 33.8438, + "step": 13385 + }, + { + "epoch": 0.12671216667771035, + "grad_norm": 184.5572052001953, + "learning_rate": 1.9513507239039533e-06, + "loss": 20.875, + "step": 13386 + }, + { + "epoch": 0.1267216326994254, + "grad_norm": 732.8403930664062, + "learning_rate": 1.95134127730862e-06, + "loss": 22.3711, + "step": 13387 + }, + { + "epoch": 0.12673109872114047, + "grad_norm": 3.4270551204681396, + "learning_rate": 1.9513318298190872e-06, + "loss": 1.1289, + "step": 13388 + }, + { + "epoch": 0.12674056474285553, + "grad_norm": 447.01361083984375, + "learning_rate": 1.951322381435365e-06, + "loss": 37.1016, + "step": 13389 + }, + { + "epoch": 0.1267500307645706, + "grad_norm": 834.6939086914062, + "learning_rate": 1.9513129321574615e-06, + "loss": 61.5391, + "step": 13390 + }, + { + "epoch": 0.12675949678628562, + "grad_norm": 701.85498046875, + "learning_rate": 1.9513034819853857e-06, + "loss": 30.5703, + "step": 13391 + }, + { + "epoch": 0.12676896280800068, + "grad_norm": 501.9645080566406, + "learning_rate": 1.951294030919146e-06, + "loss": 27.5078, + "step": 13392 + }, + { + "epoch": 0.12677842882971574, + "grad_norm": 479.9990539550781, + "learning_rate": 1.951284578958753e-06, + "loss": 48.8594, + "step": 13393 + }, + { + "epoch": 0.1267878948514308, + "grad_norm": 585.630126953125, + "learning_rate": 1.951275126104213e-06, + "loss": 53.3125, + "step": 13394 + }, + { + "epoch": 0.12679736087314583, + "grad_norm": 303.25579833984375, + "learning_rate": 1.9512656723555373e-06, + "loss": 33.4766, + "step": 13395 + }, + { + "epoch": 0.1268068268948609, + "grad_norm": 897.2579345703125, + "learning_rate": 1.951256217712733e-06, + "loss": 27.2188, + "step": 13396 + }, + { + "epoch": 0.12681629291657595, + "grad_norm": 670.3174438476562, + "learning_rate": 1.9512467621758102e-06, + "loss": 40.5, + "step": 13397 + }, + { + "epoch": 0.126825758938291, + "grad_norm": 209.9985809326172, + "learning_rate": 1.951237305744777e-06, + "loss": 8.6992, + "step": 13398 + }, + { + "epoch": 0.12683522496000607, + "grad_norm": 385.1164245605469, + "learning_rate": 1.9512278484196433e-06, + "loss": 22.3594, + "step": 13399 + }, + { + "epoch": 0.1268446909817211, + "grad_norm": 488.499267578125, + "learning_rate": 1.9512183902004165e-06, + "loss": 38.9336, + "step": 13400 + }, + { + "epoch": 0.12685415700343616, + "grad_norm": 180.74627685546875, + "learning_rate": 1.9512089310871067e-06, + "loss": 22.1797, + "step": 13401 + }, + { + "epoch": 0.12686362302515122, + "grad_norm": 823.2357788085938, + "learning_rate": 1.9511994710797223e-06, + "loss": 32.6797, + "step": 13402 + }, + { + "epoch": 0.12687308904686628, + "grad_norm": 480.73052978515625, + "learning_rate": 1.9511900101782725e-06, + "loss": 42.2344, + "step": 13403 + }, + { + "epoch": 0.12688255506858132, + "grad_norm": 630.1935424804688, + "learning_rate": 1.9511805483827655e-06, + "loss": 9.0156, + "step": 13404 + }, + { + "epoch": 0.12689202109029638, + "grad_norm": 218.1253662109375, + "learning_rate": 1.951171085693211e-06, + "loss": 17.625, + "step": 13405 + }, + { + "epoch": 0.12690148711201144, + "grad_norm": 610.4765014648438, + "learning_rate": 1.9511616221096175e-06, + "loss": 38.1875, + "step": 13406 + }, + { + "epoch": 0.1269109531337265, + "grad_norm": 300.9467468261719, + "learning_rate": 1.951152157631994e-06, + "loss": 21.2109, + "step": 13407 + }, + { + "epoch": 0.12692041915544156, + "grad_norm": 346.6846008300781, + "learning_rate": 1.9511426922603492e-06, + "loss": 30.8594, + "step": 13408 + }, + { + "epoch": 0.1269298851771566, + "grad_norm": 4.109763145446777, + "learning_rate": 1.9511332259946924e-06, + "loss": 1.1196, + "step": 13409 + }, + { + "epoch": 0.12693935119887165, + "grad_norm": 464.15228271484375, + "learning_rate": 1.951123758835032e-06, + "loss": 24.2031, + "step": 13410 + }, + { + "epoch": 0.1269488172205867, + "grad_norm": 316.1590270996094, + "learning_rate": 1.951114290781377e-06, + "loss": 20.1641, + "step": 13411 + }, + { + "epoch": 0.12695828324230177, + "grad_norm": 584.0719604492188, + "learning_rate": 1.951104821833737e-06, + "loss": 36.4219, + "step": 13412 + }, + { + "epoch": 0.1269677492640168, + "grad_norm": 2.997528314590454, + "learning_rate": 1.95109535199212e-06, + "loss": 0.9878, + "step": 13413 + }, + { + "epoch": 0.12697721528573186, + "grad_norm": 497.2213439941406, + "learning_rate": 1.951085881256535e-06, + "loss": 22.3438, + "step": 13414 + }, + { + "epoch": 0.12698668130744692, + "grad_norm": 310.8911437988281, + "learning_rate": 1.9510764096269913e-06, + "loss": 16.5078, + "step": 13415 + }, + { + "epoch": 0.12699614732916198, + "grad_norm": 246.42660522460938, + "learning_rate": 1.951066937103498e-06, + "loss": 31.5, + "step": 13416 + }, + { + "epoch": 0.12700561335087704, + "grad_norm": 821.3759155273438, + "learning_rate": 1.9510574636860635e-06, + "loss": 47.5, + "step": 13417 + }, + { + "epoch": 0.12701507937259207, + "grad_norm": 823.7952270507812, + "learning_rate": 1.951047989374697e-06, + "loss": 48.5469, + "step": 13418 + }, + { + "epoch": 0.12702454539430713, + "grad_norm": 289.78515625, + "learning_rate": 1.9510385141694064e-06, + "loss": 22.7578, + "step": 13419 + }, + { + "epoch": 0.1270340114160222, + "grad_norm": 212.28097534179688, + "learning_rate": 1.9510290380702022e-06, + "loss": 7.916, + "step": 13420 + }, + { + "epoch": 0.12704347743773725, + "grad_norm": 3.089507818222046, + "learning_rate": 1.9510195610770924e-06, + "loss": 0.9253, + "step": 13421 + }, + { + "epoch": 0.12705294345945228, + "grad_norm": 510.2340393066406, + "learning_rate": 1.9510100831900864e-06, + "loss": 52.3906, + "step": 13422 + }, + { + "epoch": 0.12706240948116734, + "grad_norm": 220.24234008789062, + "learning_rate": 1.9510006044091924e-06, + "loss": 14.0625, + "step": 13423 + }, + { + "epoch": 0.1270718755028824, + "grad_norm": 457.9831848144531, + "learning_rate": 1.95099112473442e-06, + "loss": 37.1562, + "step": 13424 + }, + { + "epoch": 0.12708134152459746, + "grad_norm": 264.0520935058594, + "learning_rate": 1.9509816441657774e-06, + "loss": 19.9609, + "step": 13425 + }, + { + "epoch": 0.12709080754631252, + "grad_norm": 358.56756591796875, + "learning_rate": 1.950972162703274e-06, + "loss": 28.5625, + "step": 13426 + }, + { + "epoch": 0.12710027356802756, + "grad_norm": 504.8966979980469, + "learning_rate": 1.9509626803469186e-06, + "loss": 50.4219, + "step": 13427 + }, + { + "epoch": 0.12710973958974262, + "grad_norm": 200.09573364257812, + "learning_rate": 1.9509531970967206e-06, + "loss": 16.9219, + "step": 13428 + }, + { + "epoch": 0.12711920561145768, + "grad_norm": 197.83074951171875, + "learning_rate": 1.950943712952688e-06, + "loss": 15.1328, + "step": 13429 + }, + { + "epoch": 0.12712867163317274, + "grad_norm": 479.802978515625, + "learning_rate": 1.9509342279148303e-06, + "loss": 55.9688, + "step": 13430 + }, + { + "epoch": 0.12713813765488777, + "grad_norm": 455.4777526855469, + "learning_rate": 1.9509247419831563e-06, + "loss": 36.1406, + "step": 13431 + }, + { + "epoch": 0.12714760367660283, + "grad_norm": 334.0008850097656, + "learning_rate": 1.950915255157675e-06, + "loss": 41.0, + "step": 13432 + }, + { + "epoch": 0.1271570696983179, + "grad_norm": 407.646240234375, + "learning_rate": 1.950905767438395e-06, + "loss": 16.9688, + "step": 13433 + }, + { + "epoch": 0.12716653572003295, + "grad_norm": 452.0950927734375, + "learning_rate": 1.9508962788253257e-06, + "loss": 21.8047, + "step": 13434 + }, + { + "epoch": 0.127176001741748, + "grad_norm": 391.02606201171875, + "learning_rate": 1.950886789318476e-06, + "loss": 10.6523, + "step": 13435 + }, + { + "epoch": 0.12718546776346304, + "grad_norm": 656.3055419921875, + "learning_rate": 1.950877298917854e-06, + "loss": 55.1406, + "step": 13436 + }, + { + "epoch": 0.1271949337851781, + "grad_norm": 556.2755737304688, + "learning_rate": 1.9508678076234696e-06, + "loss": 44.4766, + "step": 13437 + }, + { + "epoch": 0.12720439980689316, + "grad_norm": 686.4541015625, + "learning_rate": 1.950858315435331e-06, + "loss": 37.0156, + "step": 13438 + }, + { + "epoch": 0.12721386582860822, + "grad_norm": 231.46356201171875, + "learning_rate": 1.9508488223534477e-06, + "loss": 22.2188, + "step": 13439 + }, + { + "epoch": 0.12722333185032325, + "grad_norm": 598.1219482421875, + "learning_rate": 1.9508393283778283e-06, + "loss": 49.0469, + "step": 13440 + }, + { + "epoch": 0.1272327978720383, + "grad_norm": 547.489501953125, + "learning_rate": 1.9508298335084816e-06, + "loss": 22.6328, + "step": 13441 + }, + { + "epoch": 0.12724226389375337, + "grad_norm": 381.3787841796875, + "learning_rate": 1.950820337745417e-06, + "loss": 46.8281, + "step": 13442 + }, + { + "epoch": 0.12725172991546843, + "grad_norm": 838.1392211914062, + "learning_rate": 1.9508108410886427e-06, + "loss": 50.2969, + "step": 13443 + }, + { + "epoch": 0.1272611959371835, + "grad_norm": 471.1757507324219, + "learning_rate": 1.9508013435381687e-06, + "loss": 42.0391, + "step": 13444 + }, + { + "epoch": 0.12727066195889852, + "grad_norm": 200.93865966796875, + "learning_rate": 1.950791845094003e-06, + "loss": 15.9922, + "step": 13445 + }, + { + "epoch": 0.12728012798061358, + "grad_norm": 1862.4908447265625, + "learning_rate": 1.9507823457561546e-06, + "loss": 32.1953, + "step": 13446 + }, + { + "epoch": 0.12728959400232864, + "grad_norm": 745.8229370117188, + "learning_rate": 1.950772845524633e-06, + "loss": 8.7266, + "step": 13447 + }, + { + "epoch": 0.1272990600240437, + "grad_norm": 179.55271911621094, + "learning_rate": 1.9507633443994464e-06, + "loss": 22.25, + "step": 13448 + }, + { + "epoch": 0.12730852604575874, + "grad_norm": 243.8939208984375, + "learning_rate": 1.950753842380604e-06, + "loss": 31.7969, + "step": 13449 + }, + { + "epoch": 0.1273179920674738, + "grad_norm": 2.7916910648345947, + "learning_rate": 1.950744339468115e-06, + "loss": 0.8521, + "step": 13450 + }, + { + "epoch": 0.12732745808918886, + "grad_norm": 626.037353515625, + "learning_rate": 1.9507348356619885e-06, + "loss": 53.0234, + "step": 13451 + }, + { + "epoch": 0.12733692411090392, + "grad_norm": 307.92022705078125, + "learning_rate": 1.950725330962233e-06, + "loss": 22.7266, + "step": 13452 + }, + { + "epoch": 0.12734639013261898, + "grad_norm": 362.7856750488281, + "learning_rate": 1.9507158253688574e-06, + "loss": 28.4766, + "step": 13453 + }, + { + "epoch": 0.127355856154334, + "grad_norm": 286.762451171875, + "learning_rate": 1.950706318881871e-06, + "loss": 20.9844, + "step": 13454 + }, + { + "epoch": 0.12736532217604907, + "grad_norm": 328.85614013671875, + "learning_rate": 1.9506968115012823e-06, + "loss": 34.8906, + "step": 13455 + }, + { + "epoch": 0.12737478819776413, + "grad_norm": 665.508056640625, + "learning_rate": 1.9506873032271e-06, + "loss": 64.8359, + "step": 13456 + }, + { + "epoch": 0.1273842542194792, + "grad_norm": 1009.60498046875, + "learning_rate": 1.9506777940593346e-06, + "loss": 69.3867, + "step": 13457 + }, + { + "epoch": 0.12739372024119425, + "grad_norm": 422.6332702636719, + "learning_rate": 1.950668283997993e-06, + "loss": 36.6875, + "step": 13458 + }, + { + "epoch": 0.12740318626290928, + "grad_norm": 172.31216430664062, + "learning_rate": 1.9506587730430853e-06, + "loss": 28.2031, + "step": 13459 + }, + { + "epoch": 0.12741265228462434, + "grad_norm": 527.895751953125, + "learning_rate": 1.95064926119462e-06, + "loss": 30.9141, + "step": 13460 + }, + { + "epoch": 0.1274221183063394, + "grad_norm": 497.868896484375, + "learning_rate": 1.9506397484526067e-06, + "loss": 46.4062, + "step": 13461 + }, + { + "epoch": 0.12743158432805446, + "grad_norm": 1072.8255615234375, + "learning_rate": 1.9506302348170537e-06, + "loss": 49.125, + "step": 13462 + }, + { + "epoch": 0.1274410503497695, + "grad_norm": 434.7898864746094, + "learning_rate": 1.95062072028797e-06, + "loss": 37.1328, + "step": 13463 + }, + { + "epoch": 0.12745051637148455, + "grad_norm": 333.2665710449219, + "learning_rate": 1.950611204865365e-06, + "loss": 21.2422, + "step": 13464 + }, + { + "epoch": 0.1274599823931996, + "grad_norm": 297.4764099121094, + "learning_rate": 1.950601688549247e-06, + "loss": 18.8359, + "step": 13465 + }, + { + "epoch": 0.12746944841491467, + "grad_norm": 1034.263671875, + "learning_rate": 1.950592171339625e-06, + "loss": 39.25, + "step": 13466 + }, + { + "epoch": 0.12747891443662973, + "grad_norm": 3.4627208709716797, + "learning_rate": 1.9505826532365086e-06, + "loss": 0.9766, + "step": 13467 + }, + { + "epoch": 0.12748838045834476, + "grad_norm": 349.0146789550781, + "learning_rate": 1.950573134239906e-06, + "loss": 41.625, + "step": 13468 + }, + { + "epoch": 0.12749784648005982, + "grad_norm": 365.6046447753906, + "learning_rate": 1.9505636143498267e-06, + "loss": 30.1406, + "step": 13469 + }, + { + "epoch": 0.12750731250177488, + "grad_norm": 273.8702697753906, + "learning_rate": 1.9505540935662795e-06, + "loss": 16.5977, + "step": 13470 + }, + { + "epoch": 0.12751677852348994, + "grad_norm": 511.4125061035156, + "learning_rate": 1.9505445718892734e-06, + "loss": 46.75, + "step": 13471 + }, + { + "epoch": 0.12752624454520498, + "grad_norm": 3.337028980255127, + "learning_rate": 1.950535049318817e-06, + "loss": 0.9497, + "step": 13472 + }, + { + "epoch": 0.12753571056692004, + "grad_norm": 238.80531311035156, + "learning_rate": 1.9505255258549196e-06, + "loss": 10.6016, + "step": 13473 + }, + { + "epoch": 0.1275451765886351, + "grad_norm": 1093.4476318359375, + "learning_rate": 1.95051600149759e-06, + "loss": 56.6094, + "step": 13474 + }, + { + "epoch": 0.12755464261035016, + "grad_norm": 937.4810180664062, + "learning_rate": 1.9505064762468372e-06, + "loss": 64.2969, + "step": 13475 + }, + { + "epoch": 0.12756410863206522, + "grad_norm": 2.885780096054077, + "learning_rate": 1.9504969501026705e-06, + "loss": 1.0278, + "step": 13476 + }, + { + "epoch": 0.12757357465378025, + "grad_norm": 3.2552037239074707, + "learning_rate": 1.950487423065098e-06, + "loss": 0.959, + "step": 13477 + }, + { + "epoch": 0.1275830406754953, + "grad_norm": 224.10659790039062, + "learning_rate": 1.950477895134129e-06, + "loss": 22.1562, + "step": 13478 + }, + { + "epoch": 0.12759250669721037, + "grad_norm": 532.96337890625, + "learning_rate": 1.9504683663097735e-06, + "loss": 43.0625, + "step": 13479 + }, + { + "epoch": 0.12760197271892543, + "grad_norm": 743.4229125976562, + "learning_rate": 1.9504588365920387e-06, + "loss": 48.5586, + "step": 13480 + }, + { + "epoch": 0.12761143874064046, + "grad_norm": 700.3218994140625, + "learning_rate": 1.950449305980935e-06, + "loss": 30.9219, + "step": 13481 + }, + { + "epoch": 0.12762090476235552, + "grad_norm": 1029.405517578125, + "learning_rate": 1.95043977447647e-06, + "loss": 37.7109, + "step": 13482 + }, + { + "epoch": 0.12763037078407058, + "grad_norm": 240.9330291748047, + "learning_rate": 1.9504302420786544e-06, + "loss": 19.8086, + "step": 13483 + }, + { + "epoch": 0.12763983680578564, + "grad_norm": 284.7814025878906, + "learning_rate": 1.9504207087874956e-06, + "loss": 19.1328, + "step": 13484 + }, + { + "epoch": 0.1276493028275007, + "grad_norm": 243.15878295898438, + "learning_rate": 1.9504111746030033e-06, + "loss": 17.7656, + "step": 13485 + }, + { + "epoch": 0.12765876884921573, + "grad_norm": 521.3405151367188, + "learning_rate": 1.9504016395251864e-06, + "loss": 20.375, + "step": 13486 + }, + { + "epoch": 0.1276682348709308, + "grad_norm": 667.3245849609375, + "learning_rate": 1.9503921035540534e-06, + "loss": 70.5469, + "step": 13487 + }, + { + "epoch": 0.12767770089264585, + "grad_norm": 659.7975463867188, + "learning_rate": 1.9503825666896145e-06, + "loss": 49.5078, + "step": 13488 + }, + { + "epoch": 0.1276871669143609, + "grad_norm": 253.30731201171875, + "learning_rate": 1.9503730289318773e-06, + "loss": 19.1641, + "step": 13489 + }, + { + "epoch": 0.12769663293607594, + "grad_norm": 2.864840507507324, + "learning_rate": 1.950363490280851e-06, + "loss": 0.9688, + "step": 13490 + }, + { + "epoch": 0.127706098957791, + "grad_norm": 3.092064380645752, + "learning_rate": 1.950353950736545e-06, + "loss": 0.957, + "step": 13491 + }, + { + "epoch": 0.12771556497950606, + "grad_norm": 443.0016174316406, + "learning_rate": 1.9503444102989685e-06, + "loss": 9.1094, + "step": 13492 + }, + { + "epoch": 0.12772503100122112, + "grad_norm": 539.2684326171875, + "learning_rate": 1.9503348689681295e-06, + "loss": 48.2969, + "step": 13493 + }, + { + "epoch": 0.12773449702293618, + "grad_norm": 324.83251953125, + "learning_rate": 1.950325326744038e-06, + "loss": 21.5312, + "step": 13494 + }, + { + "epoch": 0.12774396304465122, + "grad_norm": 247.7030029296875, + "learning_rate": 1.9503157836267023e-06, + "loss": 14.9297, + "step": 13495 + }, + { + "epoch": 0.12775342906636628, + "grad_norm": 633.1151123046875, + "learning_rate": 1.950306239616132e-06, + "loss": 24.1094, + "step": 13496 + }, + { + "epoch": 0.12776289508808134, + "grad_norm": 670.89892578125, + "learning_rate": 1.950296694712335e-06, + "loss": 50.7539, + "step": 13497 + }, + { + "epoch": 0.1277723611097964, + "grad_norm": 488.889404296875, + "learning_rate": 1.9502871489153213e-06, + "loss": 27.5938, + "step": 13498 + }, + { + "epoch": 0.12778182713151143, + "grad_norm": 469.6299133300781, + "learning_rate": 1.9502776022250995e-06, + "loss": 19.5625, + "step": 13499 + }, + { + "epoch": 0.1277912931532265, + "grad_norm": 551.5736694335938, + "learning_rate": 1.9502680546416784e-06, + "loss": 32.875, + "step": 13500 + }, + { + "epoch": 0.12780075917494155, + "grad_norm": 529.877197265625, + "learning_rate": 1.9502585061650675e-06, + "loss": 28.75, + "step": 13501 + }, + { + "epoch": 0.1278102251966566, + "grad_norm": 462.35205078125, + "learning_rate": 1.950248956795275e-06, + "loss": 44.3125, + "step": 13502 + }, + { + "epoch": 0.12781969121837167, + "grad_norm": 342.6333312988281, + "learning_rate": 1.9502394065323106e-06, + "loss": 19.1094, + "step": 13503 + }, + { + "epoch": 0.1278291572400867, + "grad_norm": 283.75848388671875, + "learning_rate": 1.950229855376183e-06, + "loss": 18.7109, + "step": 13504 + }, + { + "epoch": 0.12783862326180176, + "grad_norm": 882.9357299804688, + "learning_rate": 1.9502203033269006e-06, + "loss": 9.2539, + "step": 13505 + }, + { + "epoch": 0.12784808928351682, + "grad_norm": 268.98297119140625, + "learning_rate": 1.9502107503844736e-06, + "loss": 19.9531, + "step": 13506 + }, + { + "epoch": 0.12785755530523188, + "grad_norm": 538.264892578125, + "learning_rate": 1.95020119654891e-06, + "loss": 53.8125, + "step": 13507 + }, + { + "epoch": 0.1278670213269469, + "grad_norm": 319.18878173828125, + "learning_rate": 1.9501916418202193e-06, + "loss": 27.9023, + "step": 13508 + }, + { + "epoch": 0.12787648734866197, + "grad_norm": 305.1922607421875, + "learning_rate": 1.95018208619841e-06, + "loss": 14.4141, + "step": 13509 + }, + { + "epoch": 0.12788595337037703, + "grad_norm": 243.99099731445312, + "learning_rate": 1.9501725296834914e-06, + "loss": 27.7812, + "step": 13510 + }, + { + "epoch": 0.1278954193920921, + "grad_norm": 2.8619587421417236, + "learning_rate": 1.950162972275473e-06, + "loss": 0.9468, + "step": 13511 + }, + { + "epoch": 0.12790488541380715, + "grad_norm": 690.0322265625, + "learning_rate": 1.950153413974362e-06, + "loss": 35.3047, + "step": 13512 + }, + { + "epoch": 0.12791435143552218, + "grad_norm": 3.724895715713501, + "learning_rate": 1.9501438547801692e-06, + "loss": 0.8164, + "step": 13513 + }, + { + "epoch": 0.12792381745723724, + "grad_norm": 904.3302612304688, + "learning_rate": 1.9501342946929037e-06, + "loss": 40.7109, + "step": 13514 + }, + { + "epoch": 0.1279332834789523, + "grad_norm": 169.0417022705078, + "learning_rate": 1.950124733712573e-06, + "loss": 21.5312, + "step": 13515 + }, + { + "epoch": 0.12794274950066736, + "grad_norm": 883.15771484375, + "learning_rate": 1.950115171839187e-06, + "loss": 41.875, + "step": 13516 + }, + { + "epoch": 0.1279522155223824, + "grad_norm": 836.0418701171875, + "learning_rate": 1.9501056090727547e-06, + "loss": 24.8984, + "step": 13517 + }, + { + "epoch": 0.12796168154409746, + "grad_norm": 3.4537808895111084, + "learning_rate": 1.9500960454132844e-06, + "loss": 0.8965, + "step": 13518 + }, + { + "epoch": 0.12797114756581252, + "grad_norm": 760.0667114257812, + "learning_rate": 1.9500864808607865e-06, + "loss": 46.7812, + "step": 13519 + }, + { + "epoch": 0.12798061358752758, + "grad_norm": 559.0947265625, + "learning_rate": 1.9500769154152685e-06, + "loss": 44.3828, + "step": 13520 + }, + { + "epoch": 0.12799007960924264, + "grad_norm": 405.2442626953125, + "learning_rate": 1.95006734907674e-06, + "loss": 31.5391, + "step": 13521 + }, + { + "epoch": 0.12799954563095767, + "grad_norm": 532.521728515625, + "learning_rate": 1.9500577818452104e-06, + "loss": 59.0859, + "step": 13522 + }, + { + "epoch": 0.12800901165267273, + "grad_norm": 146.82122802734375, + "learning_rate": 1.950048213720688e-06, + "loss": 17.9375, + "step": 13523 + }, + { + "epoch": 0.1280184776743878, + "grad_norm": 256.892822265625, + "learning_rate": 1.950038644703182e-06, + "loss": 23.6484, + "step": 13524 + }, + { + "epoch": 0.12802794369610285, + "grad_norm": 300.7062683105469, + "learning_rate": 1.950029074792702e-06, + "loss": 19.3984, + "step": 13525 + }, + { + "epoch": 0.12803740971781788, + "grad_norm": 244.6529083251953, + "learning_rate": 1.950019503989256e-06, + "loss": 21.5312, + "step": 13526 + }, + { + "epoch": 0.12804687573953294, + "grad_norm": 308.4326171875, + "learning_rate": 1.950009932292853e-06, + "loss": 8.0156, + "step": 13527 + }, + { + "epoch": 0.128056341761248, + "grad_norm": 399.4506530761719, + "learning_rate": 1.9500003597035033e-06, + "loss": 35.9062, + "step": 13528 + }, + { + "epoch": 0.12806580778296306, + "grad_norm": 260.57513427734375, + "learning_rate": 1.9499907862212147e-06, + "loss": 17.6328, + "step": 13529 + }, + { + "epoch": 0.12807527380467812, + "grad_norm": 511.81597900390625, + "learning_rate": 1.9499812118459967e-06, + "loss": 22.1641, + "step": 13530 + }, + { + "epoch": 0.12808473982639315, + "grad_norm": 333.87744140625, + "learning_rate": 1.9499716365778586e-06, + "loss": 31.1719, + "step": 13531 + }, + { + "epoch": 0.1280942058481082, + "grad_norm": 290.04736328125, + "learning_rate": 1.949962060416808e-06, + "loss": 11.2305, + "step": 13532 + }, + { + "epoch": 0.12810367186982327, + "grad_norm": 392.07940673828125, + "learning_rate": 1.9499524833628554e-06, + "loss": 28.1641, + "step": 13533 + }, + { + "epoch": 0.12811313789153833, + "grad_norm": 263.4495849609375, + "learning_rate": 1.949942905416009e-06, + "loss": 21.9766, + "step": 13534 + }, + { + "epoch": 0.12812260391325336, + "grad_norm": 757.2841186523438, + "learning_rate": 1.9499333265762786e-06, + "loss": 62.4805, + "step": 13535 + }, + { + "epoch": 0.12813206993496842, + "grad_norm": 204.74998474121094, + "learning_rate": 1.949923746843672e-06, + "loss": 20.4219, + "step": 13536 + }, + { + "epoch": 0.12814153595668348, + "grad_norm": 296.0246887207031, + "learning_rate": 1.9499141662181996e-06, + "loss": 16.4844, + "step": 13537 + }, + { + "epoch": 0.12815100197839854, + "grad_norm": 356.9522705078125, + "learning_rate": 1.9499045846998693e-06, + "loss": 11.5703, + "step": 13538 + }, + { + "epoch": 0.1281604680001136, + "grad_norm": 245.13720703125, + "learning_rate": 1.9498950022886905e-06, + "loss": 22.0781, + "step": 13539 + }, + { + "epoch": 0.12816993402182864, + "grad_norm": 441.192626953125, + "learning_rate": 1.949885418984672e-06, + "loss": 36.4062, + "step": 13540 + }, + { + "epoch": 0.1281794000435437, + "grad_norm": 478.0263366699219, + "learning_rate": 1.949875834787823e-06, + "loss": 31.0156, + "step": 13541 + }, + { + "epoch": 0.12818886606525876, + "grad_norm": 298.23956298828125, + "learning_rate": 1.949866249698153e-06, + "loss": 22.3594, + "step": 13542 + }, + { + "epoch": 0.12819833208697382, + "grad_norm": 224.57899475097656, + "learning_rate": 1.94985666371567e-06, + "loss": 19.5469, + "step": 13543 + }, + { + "epoch": 0.12820779810868888, + "grad_norm": 333.1011047363281, + "learning_rate": 1.9498470768403838e-06, + "loss": 30.5781, + "step": 13544 + }, + { + "epoch": 0.1282172641304039, + "grad_norm": 298.4156494140625, + "learning_rate": 1.949837489072303e-06, + "loss": 23.8125, + "step": 13545 + }, + { + "epoch": 0.12822673015211897, + "grad_norm": 174.7779541015625, + "learning_rate": 1.949827900411437e-06, + "loss": 13.8242, + "step": 13546 + }, + { + "epoch": 0.12823619617383403, + "grad_norm": 812.0833740234375, + "learning_rate": 1.949818310857794e-06, + "loss": 35.9844, + "step": 13547 + }, + { + "epoch": 0.1282456621955491, + "grad_norm": 403.6700439453125, + "learning_rate": 1.9498087204113842e-06, + "loss": 36.375, + "step": 13548 + }, + { + "epoch": 0.12825512821726412, + "grad_norm": 3.0747480392456055, + "learning_rate": 1.949799129072216e-06, + "loss": 0.9087, + "step": 13549 + }, + { + "epoch": 0.12826459423897918, + "grad_norm": 150.5733642578125, + "learning_rate": 1.949789536840298e-06, + "loss": 14.5625, + "step": 13550 + }, + { + "epoch": 0.12827406026069424, + "grad_norm": 583.00537109375, + "learning_rate": 1.9497799437156396e-06, + "loss": 13.4297, + "step": 13551 + }, + { + "epoch": 0.1282835262824093, + "grad_norm": 3.7243523597717285, + "learning_rate": 1.94977034969825e-06, + "loss": 0.9746, + "step": 13552 + }, + { + "epoch": 0.12829299230412436, + "grad_norm": 378.07427978515625, + "learning_rate": 1.9497607547881384e-06, + "loss": 18.5469, + "step": 13553 + }, + { + "epoch": 0.1283024583258394, + "grad_norm": 222.22792053222656, + "learning_rate": 1.949751158985313e-06, + "loss": 18.1094, + "step": 13554 + }, + { + "epoch": 0.12831192434755445, + "grad_norm": 223.33486938476562, + "learning_rate": 1.9497415622897835e-06, + "loss": 25.8047, + "step": 13555 + }, + { + "epoch": 0.1283213903692695, + "grad_norm": 929.2744140625, + "learning_rate": 1.9497319647015585e-06, + "loss": 55.5, + "step": 13556 + }, + { + "epoch": 0.12833085639098457, + "grad_norm": 159.63307189941406, + "learning_rate": 1.9497223662206476e-06, + "loss": 16.3672, + "step": 13557 + }, + { + "epoch": 0.1283403224126996, + "grad_norm": 494.2107849121094, + "learning_rate": 1.9497127668470593e-06, + "loss": 65.3281, + "step": 13558 + }, + { + "epoch": 0.12834978843441466, + "grad_norm": 572.326416015625, + "learning_rate": 1.949703166580803e-06, + "loss": 42.3438, + "step": 13559 + }, + { + "epoch": 0.12835925445612972, + "grad_norm": 204.17323303222656, + "learning_rate": 1.9496935654218868e-06, + "loss": 17.3281, + "step": 13560 + }, + { + "epoch": 0.12836872047784478, + "grad_norm": 198.6623077392578, + "learning_rate": 1.9496839633703212e-06, + "loss": 7.9375, + "step": 13561 + }, + { + "epoch": 0.12837818649955984, + "grad_norm": 4.00417423248291, + "learning_rate": 1.9496743604261142e-06, + "loss": 0.918, + "step": 13562 + }, + { + "epoch": 0.12838765252127488, + "grad_norm": 376.5587463378906, + "learning_rate": 1.9496647565892747e-06, + "loss": 23.2266, + "step": 13563 + }, + { + "epoch": 0.12839711854298994, + "grad_norm": 3.1182098388671875, + "learning_rate": 1.9496551518598128e-06, + "loss": 0.9751, + "step": 13564 + }, + { + "epoch": 0.128406584564705, + "grad_norm": 629.1312255859375, + "learning_rate": 1.949645546237736e-06, + "loss": 42.8555, + "step": 13565 + }, + { + "epoch": 0.12841605058642006, + "grad_norm": 448.35333251953125, + "learning_rate": 1.949635939723055e-06, + "loss": 45.0938, + "step": 13566 + }, + { + "epoch": 0.1284255166081351, + "grad_norm": 743.1513061523438, + "learning_rate": 1.9496263323157774e-06, + "loss": 55.5625, + "step": 13567 + }, + { + "epoch": 0.12843498262985015, + "grad_norm": 404.2366943359375, + "learning_rate": 1.9496167240159132e-06, + "loss": 45.125, + "step": 13568 + }, + { + "epoch": 0.1284444486515652, + "grad_norm": 149.61697387695312, + "learning_rate": 1.949607114823471e-06, + "loss": 21.3047, + "step": 13569 + }, + { + "epoch": 0.12845391467328027, + "grad_norm": 305.4753723144531, + "learning_rate": 1.9495975047384596e-06, + "loss": 20.3438, + "step": 13570 + }, + { + "epoch": 0.12846338069499533, + "grad_norm": 881.8472290039062, + "learning_rate": 1.9495878937608884e-06, + "loss": 38.1484, + "step": 13571 + }, + { + "epoch": 0.12847284671671036, + "grad_norm": 2.763662099838257, + "learning_rate": 1.9495782818907663e-06, + "loss": 0.8853, + "step": 13572 + }, + { + "epoch": 0.12848231273842542, + "grad_norm": 623.84716796875, + "learning_rate": 1.949568669128103e-06, + "loss": 22.8906, + "step": 13573 + }, + { + "epoch": 0.12849177876014048, + "grad_norm": 203.0635528564453, + "learning_rate": 1.9495590554729063e-06, + "loss": 23.5547, + "step": 13574 + }, + { + "epoch": 0.12850124478185554, + "grad_norm": 410.5312805175781, + "learning_rate": 1.949549440925186e-06, + "loss": 43.8125, + "step": 13575 + }, + { + "epoch": 0.12851071080357057, + "grad_norm": 541.30029296875, + "learning_rate": 1.949539825484951e-06, + "loss": 33.2188, + "step": 13576 + }, + { + "epoch": 0.12852017682528563, + "grad_norm": 4.006409168243408, + "learning_rate": 1.9495302091522105e-06, + "loss": 0.9697, + "step": 13577 + }, + { + "epoch": 0.1285296428470007, + "grad_norm": 554.4622192382812, + "learning_rate": 1.9495205919269734e-06, + "loss": 30.0078, + "step": 13578 + }, + { + "epoch": 0.12853910886871575, + "grad_norm": 336.1119079589844, + "learning_rate": 1.9495109738092485e-06, + "loss": 16.6875, + "step": 13579 + }, + { + "epoch": 0.1285485748904308, + "grad_norm": 311.0564270019531, + "learning_rate": 1.949501354799045e-06, + "loss": 21.9844, + "step": 13580 + }, + { + "epoch": 0.12855804091214584, + "grad_norm": 134.73875427246094, + "learning_rate": 1.949491734896372e-06, + "loss": 17.3242, + "step": 13581 + }, + { + "epoch": 0.1285675069338609, + "grad_norm": 388.9271240234375, + "learning_rate": 1.949482114101239e-06, + "loss": 52.6719, + "step": 13582 + }, + { + "epoch": 0.12857697295557596, + "grad_norm": 3.2687807083129883, + "learning_rate": 1.949472492413654e-06, + "loss": 0.8657, + "step": 13583 + }, + { + "epoch": 0.12858643897729102, + "grad_norm": 251.8640594482422, + "learning_rate": 1.949462869833627e-06, + "loss": 20.332, + "step": 13584 + }, + { + "epoch": 0.12859590499900606, + "grad_norm": 505.8257141113281, + "learning_rate": 1.9494532463611665e-06, + "loss": 39.8203, + "step": 13585 + }, + { + "epoch": 0.12860537102072112, + "grad_norm": 318.7044677734375, + "learning_rate": 1.9494436219962815e-06, + "loss": 24.3828, + "step": 13586 + }, + { + "epoch": 0.12861483704243618, + "grad_norm": 514.1201782226562, + "learning_rate": 1.9494339967389816e-06, + "loss": 59.1875, + "step": 13587 + }, + { + "epoch": 0.12862430306415124, + "grad_norm": 313.0684509277344, + "learning_rate": 1.9494243705892758e-06, + "loss": 20.9062, + "step": 13588 + }, + { + "epoch": 0.1286337690858663, + "grad_norm": 230.2190399169922, + "learning_rate": 1.949414743547172e-06, + "loss": 21.4453, + "step": 13589 + }, + { + "epoch": 0.12864323510758133, + "grad_norm": 434.331787109375, + "learning_rate": 1.949405115612681e-06, + "loss": 43.2969, + "step": 13590 + }, + { + "epoch": 0.1286527011292964, + "grad_norm": 270.87353515625, + "learning_rate": 1.94939548678581e-06, + "loss": 34.1875, + "step": 13591 + }, + { + "epoch": 0.12866216715101145, + "grad_norm": 1360.00439453125, + "learning_rate": 1.9493858570665698e-06, + "loss": 21.6445, + "step": 13592 + }, + { + "epoch": 0.1286716331727265, + "grad_norm": 450.4429626464844, + "learning_rate": 1.9493762264549684e-06, + "loss": 62.3906, + "step": 13593 + }, + { + "epoch": 0.12868109919444154, + "grad_norm": 309.5561218261719, + "learning_rate": 1.949366594951015e-06, + "loss": 24.4844, + "step": 13594 + }, + { + "epoch": 0.1286905652161566, + "grad_norm": 379.7567138671875, + "learning_rate": 1.9493569625547188e-06, + "loss": 19.3984, + "step": 13595 + }, + { + "epoch": 0.12870003123787166, + "grad_norm": 3.0304882526397705, + "learning_rate": 1.949347329266089e-06, + "loss": 0.8657, + "step": 13596 + }, + { + "epoch": 0.12870949725958672, + "grad_norm": 364.9585876464844, + "learning_rate": 1.949337695085134e-06, + "loss": 19.8281, + "step": 13597 + }, + { + "epoch": 0.12871896328130178, + "grad_norm": 910.9575805664062, + "learning_rate": 1.949328060011864e-06, + "loss": 47.4531, + "step": 13598 + }, + { + "epoch": 0.1287284293030168, + "grad_norm": 3.238677978515625, + "learning_rate": 1.949318424046287e-06, + "loss": 0.8199, + "step": 13599 + }, + { + "epoch": 0.12873789532473187, + "grad_norm": 214.78121948242188, + "learning_rate": 1.9493087871884122e-06, + "loss": 22.0078, + "step": 13600 + }, + { + "epoch": 0.12874736134644693, + "grad_norm": 316.46038818359375, + "learning_rate": 1.9492991494382497e-06, + "loss": 21.2422, + "step": 13601 + }, + { + "epoch": 0.128756827368162, + "grad_norm": 319.5392761230469, + "learning_rate": 1.9492895107958073e-06, + "loss": 22.3984, + "step": 13602 + }, + { + "epoch": 0.12876629338987702, + "grad_norm": 310.3583984375, + "learning_rate": 1.949279871261094e-06, + "loss": 32.125, + "step": 13603 + }, + { + "epoch": 0.12877575941159208, + "grad_norm": 334.4086608886719, + "learning_rate": 1.94927023083412e-06, + "loss": 20.2266, + "step": 13604 + }, + { + "epoch": 0.12878522543330714, + "grad_norm": 418.4665222167969, + "learning_rate": 1.9492605895148937e-06, + "loss": 45.0938, + "step": 13605 + }, + { + "epoch": 0.1287946914550222, + "grad_norm": 2.9726314544677734, + "learning_rate": 1.949250947303424e-06, + "loss": 0.8276, + "step": 13606 + }, + { + "epoch": 0.12880415747673726, + "grad_norm": 3.337656021118164, + "learning_rate": 1.94924130419972e-06, + "loss": 0.9199, + "step": 13607 + }, + { + "epoch": 0.1288136234984523, + "grad_norm": 379.2497253417969, + "learning_rate": 1.9492316602037914e-06, + "loss": 15.1953, + "step": 13608 + }, + { + "epoch": 0.12882308952016736, + "grad_norm": 200.9125213623047, + "learning_rate": 1.9492220153156467e-06, + "loss": 18.1641, + "step": 13609 + }, + { + "epoch": 0.12883255554188242, + "grad_norm": 1189.7049560546875, + "learning_rate": 1.949212369535295e-06, + "loss": 52.6719, + "step": 13610 + }, + { + "epoch": 0.12884202156359748, + "grad_norm": 425.1764831542969, + "learning_rate": 1.9492027228627452e-06, + "loss": 53.6875, + "step": 13611 + }, + { + "epoch": 0.1288514875853125, + "grad_norm": 662.7320556640625, + "learning_rate": 1.9491930752980064e-06, + "loss": 32.3594, + "step": 13612 + }, + { + "epoch": 0.12886095360702757, + "grad_norm": 191.12802124023438, + "learning_rate": 1.949183426841088e-06, + "loss": 18.6562, + "step": 13613 + }, + { + "epoch": 0.12887041962874263, + "grad_norm": 339.3927917480469, + "learning_rate": 1.9491737774919995e-06, + "loss": 25.3984, + "step": 13614 + }, + { + "epoch": 0.1288798856504577, + "grad_norm": 419.8470458984375, + "learning_rate": 1.949164127250749e-06, + "loss": 33.5078, + "step": 13615 + }, + { + "epoch": 0.12888935167217275, + "grad_norm": 903.5218505859375, + "learning_rate": 1.9491544761173458e-06, + "loss": 69.3984, + "step": 13616 + }, + { + "epoch": 0.12889881769388778, + "grad_norm": 315.34613037109375, + "learning_rate": 1.9491448240917994e-06, + "loss": 29.4062, + "step": 13617 + }, + { + "epoch": 0.12890828371560284, + "grad_norm": 444.4063415527344, + "learning_rate": 1.9491351711741185e-06, + "loss": 21.9062, + "step": 13618 + }, + { + "epoch": 0.1289177497373179, + "grad_norm": 346.6132507324219, + "learning_rate": 1.9491255173643125e-06, + "loss": 20.7188, + "step": 13619 + }, + { + "epoch": 0.12892721575903296, + "grad_norm": 453.7321472167969, + "learning_rate": 1.9491158626623898e-06, + "loss": 43.875, + "step": 13620 + }, + { + "epoch": 0.128936681780748, + "grad_norm": 3.2497897148132324, + "learning_rate": 1.94910620706836e-06, + "loss": 0.8289, + "step": 13621 + }, + { + "epoch": 0.12894614780246305, + "grad_norm": 422.89678955078125, + "learning_rate": 1.9490965505822327e-06, + "loss": 19.1484, + "step": 13622 + }, + { + "epoch": 0.1289556138241781, + "grad_norm": 458.1832580566406, + "learning_rate": 1.949086893204016e-06, + "loss": 55.9531, + "step": 13623 + }, + { + "epoch": 0.12896507984589317, + "grad_norm": 233.1451873779297, + "learning_rate": 1.949077234933719e-06, + "loss": 17.2969, + "step": 13624 + }, + { + "epoch": 0.12897454586760823, + "grad_norm": 246.15724182128906, + "learning_rate": 1.9490675757713517e-06, + "loss": 23.2578, + "step": 13625 + }, + { + "epoch": 0.12898401188932326, + "grad_norm": 228.79901123046875, + "learning_rate": 1.9490579157169224e-06, + "loss": 23.1641, + "step": 13626 + }, + { + "epoch": 0.12899347791103832, + "grad_norm": 428.5798034667969, + "learning_rate": 1.9490482547704406e-06, + "loss": 17.3945, + "step": 13627 + }, + { + "epoch": 0.12900294393275338, + "grad_norm": 731.0869750976562, + "learning_rate": 1.9490385929319147e-06, + "loss": 62.2656, + "step": 13628 + }, + { + "epoch": 0.12901240995446844, + "grad_norm": 367.5238952636719, + "learning_rate": 1.9490289302013546e-06, + "loss": 30.3594, + "step": 13629 + }, + { + "epoch": 0.1290218759761835, + "grad_norm": 3.461519479751587, + "learning_rate": 1.949019266578769e-06, + "loss": 0.8206, + "step": 13630 + }, + { + "epoch": 0.12903134199789854, + "grad_norm": 688.2706298828125, + "learning_rate": 1.949009602064167e-06, + "loss": 15.168, + "step": 13631 + }, + { + "epoch": 0.1290408080196136, + "grad_norm": 1077.6392822265625, + "learning_rate": 1.9489999366575577e-06, + "loss": 48.8281, + "step": 13632 + }, + { + "epoch": 0.12905027404132866, + "grad_norm": 886.3075561523438, + "learning_rate": 1.9489902703589505e-06, + "loss": 30.6758, + "step": 13633 + }, + { + "epoch": 0.12905974006304372, + "grad_norm": 499.2532653808594, + "learning_rate": 1.9489806031683537e-06, + "loss": 37.1484, + "step": 13634 + }, + { + "epoch": 0.12906920608475875, + "grad_norm": 618.4593505859375, + "learning_rate": 1.9489709350857774e-06, + "loss": 22.875, + "step": 13635 + }, + { + "epoch": 0.1290786721064738, + "grad_norm": 854.0451049804688, + "learning_rate": 1.9489612661112296e-06, + "loss": 57.6562, + "step": 13636 + }, + { + "epoch": 0.12908813812818887, + "grad_norm": 273.0581970214844, + "learning_rate": 1.94895159624472e-06, + "loss": 35.3359, + "step": 13637 + }, + { + "epoch": 0.12909760414990393, + "grad_norm": 3.0462872982025146, + "learning_rate": 1.948941925486258e-06, + "loss": 0.9614, + "step": 13638 + }, + { + "epoch": 0.129107070171619, + "grad_norm": 368.6888427734375, + "learning_rate": 1.948932253835852e-06, + "loss": 33.6797, + "step": 13639 + }, + { + "epoch": 0.12911653619333402, + "grad_norm": 1057.0565185546875, + "learning_rate": 1.948922581293512e-06, + "loss": 43.7578, + "step": 13640 + }, + { + "epoch": 0.12912600221504908, + "grad_norm": 3.7020790576934814, + "learning_rate": 1.948912907859246e-06, + "loss": 1.0762, + "step": 13641 + }, + { + "epoch": 0.12913546823676414, + "grad_norm": 211.9989013671875, + "learning_rate": 1.9489032335330635e-06, + "loss": 17.8594, + "step": 13642 + }, + { + "epoch": 0.1291449342584792, + "grad_norm": 401.5303955078125, + "learning_rate": 1.9488935583149737e-06, + "loss": 26.6328, + "step": 13643 + }, + { + "epoch": 0.12915440028019423, + "grad_norm": 547.540771484375, + "learning_rate": 1.9488838822049856e-06, + "loss": 33.7656, + "step": 13644 + }, + { + "epoch": 0.1291638663019093, + "grad_norm": 178.40289306640625, + "learning_rate": 1.9488742052031088e-06, + "loss": 19.4688, + "step": 13645 + }, + { + "epoch": 0.12917333232362435, + "grad_norm": 239.01910400390625, + "learning_rate": 1.948864527309352e-06, + "loss": 20.6914, + "step": 13646 + }, + { + "epoch": 0.1291827983453394, + "grad_norm": 322.00537109375, + "learning_rate": 1.948854848523724e-06, + "loss": 22.5117, + "step": 13647 + }, + { + "epoch": 0.12919226436705447, + "grad_norm": 369.0542297363281, + "learning_rate": 1.948845168846234e-06, + "loss": 8.6113, + "step": 13648 + }, + { + "epoch": 0.1292017303887695, + "grad_norm": 433.33465576171875, + "learning_rate": 1.9488354882768915e-06, + "loss": 16.7344, + "step": 13649 + }, + { + "epoch": 0.12921119641048456, + "grad_norm": 367.48516845703125, + "learning_rate": 1.9488258068157055e-06, + "loss": 25.7734, + "step": 13650 + }, + { + "epoch": 0.12922066243219962, + "grad_norm": 220.44580078125, + "learning_rate": 1.948816124462685e-06, + "loss": 21.0312, + "step": 13651 + }, + { + "epoch": 0.12923012845391468, + "grad_norm": 382.9694519042969, + "learning_rate": 1.9488064412178385e-06, + "loss": 42.0391, + "step": 13652 + }, + { + "epoch": 0.12923959447562972, + "grad_norm": 181.05711364746094, + "learning_rate": 1.948796757081176e-06, + "loss": 21.9531, + "step": 13653 + }, + { + "epoch": 0.12924906049734478, + "grad_norm": 512.38037109375, + "learning_rate": 1.9487870720527066e-06, + "loss": 24.4453, + "step": 13654 + }, + { + "epoch": 0.12925852651905984, + "grad_norm": 154.94015502929688, + "learning_rate": 1.948777386132439e-06, + "loss": 9.6094, + "step": 13655 + }, + { + "epoch": 0.1292679925407749, + "grad_norm": 260.02386474609375, + "learning_rate": 1.9487676993203823e-06, + "loss": 19.5312, + "step": 13656 + }, + { + "epoch": 0.12927745856248996, + "grad_norm": 760.7427368164062, + "learning_rate": 1.9487580116165452e-06, + "loss": 45.6719, + "step": 13657 + }, + { + "epoch": 0.129286924584205, + "grad_norm": 195.93162536621094, + "learning_rate": 1.9487483230209375e-06, + "loss": 17.7969, + "step": 13658 + }, + { + "epoch": 0.12929639060592005, + "grad_norm": 361.14410400390625, + "learning_rate": 1.9487386335335684e-06, + "loss": 35.5938, + "step": 13659 + }, + { + "epoch": 0.1293058566276351, + "grad_norm": 1141.0758056640625, + "learning_rate": 1.948728943154447e-06, + "loss": 51.4531, + "step": 13660 + }, + { + "epoch": 0.12931532264935017, + "grad_norm": 407.14886474609375, + "learning_rate": 1.9487192518835813e-06, + "loss": 20.4297, + "step": 13661 + }, + { + "epoch": 0.1293247886710652, + "grad_norm": 750.693115234375, + "learning_rate": 1.9487095597209814e-06, + "loss": 52.8125, + "step": 13662 + }, + { + "epoch": 0.12933425469278026, + "grad_norm": 339.9523010253906, + "learning_rate": 1.9486998666666567e-06, + "loss": 27.0156, + "step": 13663 + }, + { + "epoch": 0.12934372071449532, + "grad_norm": 503.78594970703125, + "learning_rate": 1.9486901727206154e-06, + "loss": 26.4141, + "step": 13664 + }, + { + "epoch": 0.12935318673621038, + "grad_norm": 372.2261047363281, + "learning_rate": 1.9486804778828674e-06, + "loss": 23.5391, + "step": 13665 + }, + { + "epoch": 0.12936265275792544, + "grad_norm": 312.7173156738281, + "learning_rate": 1.9486707821534216e-06, + "loss": 24.2383, + "step": 13666 + }, + { + "epoch": 0.12937211877964047, + "grad_norm": 3.214834213256836, + "learning_rate": 1.9486610855322864e-06, + "loss": 0.8867, + "step": 13667 + }, + { + "epoch": 0.12938158480135553, + "grad_norm": 754.0043334960938, + "learning_rate": 1.9486513880194715e-06, + "loss": 28.0312, + "step": 13668 + }, + { + "epoch": 0.1293910508230706, + "grad_norm": 429.3705749511719, + "learning_rate": 1.9486416896149864e-06, + "loss": 28.9453, + "step": 13669 + }, + { + "epoch": 0.12940051684478565, + "grad_norm": 495.70599365234375, + "learning_rate": 1.94863199031884e-06, + "loss": 46.5312, + "step": 13670 + }, + { + "epoch": 0.12940998286650068, + "grad_norm": 243.4773712158203, + "learning_rate": 1.9486222901310407e-06, + "loss": 12.1914, + "step": 13671 + }, + { + "epoch": 0.12941944888821574, + "grad_norm": 195.5849151611328, + "learning_rate": 1.9486125890515984e-06, + "loss": 13.582, + "step": 13672 + }, + { + "epoch": 0.1294289149099308, + "grad_norm": 478.37744140625, + "learning_rate": 1.948602887080522e-06, + "loss": 40.5938, + "step": 13673 + }, + { + "epoch": 0.12943838093164586, + "grad_norm": 563.712158203125, + "learning_rate": 1.9485931842178206e-06, + "loss": 47.8047, + "step": 13674 + }, + { + "epoch": 0.12944784695336092, + "grad_norm": 308.5523376464844, + "learning_rate": 1.9485834804635033e-06, + "loss": 28.4531, + "step": 13675 + }, + { + "epoch": 0.12945731297507596, + "grad_norm": 486.74371337890625, + "learning_rate": 1.948573775817579e-06, + "loss": 20.6016, + "step": 13676 + }, + { + "epoch": 0.12946677899679102, + "grad_norm": 1015.3807373046875, + "learning_rate": 1.9485640702800572e-06, + "loss": 52.4844, + "step": 13677 + }, + { + "epoch": 0.12947624501850608, + "grad_norm": 426.529296875, + "learning_rate": 1.948554363850947e-06, + "loss": 32.4375, + "step": 13678 + }, + { + "epoch": 0.12948571104022114, + "grad_norm": 3.178098440170288, + "learning_rate": 1.9485446565302577e-06, + "loss": 0.9868, + "step": 13679 + }, + { + "epoch": 0.12949517706193617, + "grad_norm": 397.0461120605469, + "learning_rate": 1.948534948317998e-06, + "loss": 46.6797, + "step": 13680 + }, + { + "epoch": 0.12950464308365123, + "grad_norm": 357.3697204589844, + "learning_rate": 1.9485252392141768e-06, + "loss": 14.3008, + "step": 13681 + }, + { + "epoch": 0.1295141091053663, + "grad_norm": 461.2657470703125, + "learning_rate": 1.9485155292188034e-06, + "loss": 32.7891, + "step": 13682 + }, + { + "epoch": 0.12952357512708135, + "grad_norm": 5.678762435913086, + "learning_rate": 1.9485058183318876e-06, + "loss": 0.9321, + "step": 13683 + }, + { + "epoch": 0.1295330411487964, + "grad_norm": 513.2185668945312, + "learning_rate": 1.948496106553438e-06, + "loss": 26.625, + "step": 13684 + }, + { + "epoch": 0.12954250717051144, + "grad_norm": 232.52882385253906, + "learning_rate": 1.9484863938834636e-06, + "loss": 23.8047, + "step": 13685 + }, + { + "epoch": 0.1295519731922265, + "grad_norm": 325.9921569824219, + "learning_rate": 1.9484766803219735e-06, + "loss": 16.6406, + "step": 13686 + }, + { + "epoch": 0.12956143921394156, + "grad_norm": 1387.1519775390625, + "learning_rate": 1.9484669658689774e-06, + "loss": 24.4648, + "step": 13687 + }, + { + "epoch": 0.12957090523565662, + "grad_norm": 305.33843994140625, + "learning_rate": 1.948457250524484e-06, + "loss": 19.332, + "step": 13688 + }, + { + "epoch": 0.12958037125737165, + "grad_norm": 188.04489135742188, + "learning_rate": 1.9484475342885025e-06, + "loss": 19.3047, + "step": 13689 + }, + { + "epoch": 0.1295898372790867, + "grad_norm": 197.13885498046875, + "learning_rate": 1.948437817161042e-06, + "loss": 19.9141, + "step": 13690 + }, + { + "epoch": 0.12959930330080177, + "grad_norm": 395.3216857910156, + "learning_rate": 1.948428099142111e-06, + "loss": 7.3438, + "step": 13691 + }, + { + "epoch": 0.12960876932251683, + "grad_norm": 408.4391784667969, + "learning_rate": 1.9484183802317203e-06, + "loss": 25.3398, + "step": 13692 + }, + { + "epoch": 0.1296182353442319, + "grad_norm": 248.34498596191406, + "learning_rate": 1.9484086604298776e-06, + "loss": 19.1172, + "step": 13693 + }, + { + "epoch": 0.12962770136594692, + "grad_norm": 564.5699462890625, + "learning_rate": 1.9483989397365925e-06, + "loss": 36.7188, + "step": 13694 + }, + { + "epoch": 0.12963716738766198, + "grad_norm": 454.06182861328125, + "learning_rate": 1.9483892181518738e-06, + "loss": 60.6094, + "step": 13695 + }, + { + "epoch": 0.12964663340937704, + "grad_norm": 702.4337768554688, + "learning_rate": 1.9483794956757312e-06, + "loss": 45.6094, + "step": 13696 + }, + { + "epoch": 0.1296560994310921, + "grad_norm": 234.65603637695312, + "learning_rate": 1.9483697723081738e-06, + "loss": 17.2344, + "step": 13697 + }, + { + "epoch": 0.12966556545280714, + "grad_norm": 460.9067687988281, + "learning_rate": 1.94836004804921e-06, + "loss": 30.1562, + "step": 13698 + }, + { + "epoch": 0.1296750314745222, + "grad_norm": 262.3610534667969, + "learning_rate": 1.9483503228988496e-06, + "loss": 20.3672, + "step": 13699 + }, + { + "epoch": 0.12968449749623726, + "grad_norm": 3.4084267616271973, + "learning_rate": 1.948340596857102e-06, + "loss": 0.8286, + "step": 13700 + }, + { + "epoch": 0.12969396351795232, + "grad_norm": 1155.7418212890625, + "learning_rate": 1.9483308699239754e-06, + "loss": 61.7188, + "step": 13701 + }, + { + "epoch": 0.12970342953966738, + "grad_norm": 3.316438913345337, + "learning_rate": 1.9483211420994797e-06, + "loss": 0.8977, + "step": 13702 + }, + { + "epoch": 0.1297128955613824, + "grad_norm": 290.56732177734375, + "learning_rate": 1.948311413383624e-06, + "loss": 21.5625, + "step": 13703 + }, + { + "epoch": 0.12972236158309747, + "grad_norm": 367.9682922363281, + "learning_rate": 1.948301683776417e-06, + "loss": 25.1172, + "step": 13704 + }, + { + "epoch": 0.12973182760481253, + "grad_norm": 712.9367065429688, + "learning_rate": 1.9482919532778683e-06, + "loss": 27.9219, + "step": 13705 + }, + { + "epoch": 0.1297412936265276, + "grad_norm": 587.9979248046875, + "learning_rate": 1.9482822218879864e-06, + "loss": 57.3281, + "step": 13706 + }, + { + "epoch": 0.12975075964824262, + "grad_norm": 405.4967956542969, + "learning_rate": 1.9482724896067815e-06, + "loss": 29.1797, + "step": 13707 + }, + { + "epoch": 0.12976022566995768, + "grad_norm": 311.7378845214844, + "learning_rate": 1.9482627564342616e-06, + "loss": 14.957, + "step": 13708 + }, + { + "epoch": 0.12976969169167274, + "grad_norm": 286.0611267089844, + "learning_rate": 1.948253022370437e-06, + "loss": 15.7695, + "step": 13709 + }, + { + "epoch": 0.1297791577133878, + "grad_norm": 497.6700134277344, + "learning_rate": 1.9482432874153158e-06, + "loss": 19.5859, + "step": 13710 + }, + { + "epoch": 0.12978862373510286, + "grad_norm": 793.4006958007812, + "learning_rate": 1.948233551568908e-06, + "loss": 25.125, + "step": 13711 + }, + { + "epoch": 0.1297980897568179, + "grad_norm": 384.1619567871094, + "learning_rate": 1.948223814831222e-06, + "loss": 35.5078, + "step": 13712 + }, + { + "epoch": 0.12980755577853295, + "grad_norm": 331.5847473144531, + "learning_rate": 1.948214077202267e-06, + "loss": 24.5703, + "step": 13713 + }, + { + "epoch": 0.129817021800248, + "grad_norm": 521.7432861328125, + "learning_rate": 1.948204338682053e-06, + "loss": 33.2031, + "step": 13714 + }, + { + "epoch": 0.12982648782196307, + "grad_norm": 235.61404418945312, + "learning_rate": 1.9481945992705887e-06, + "loss": 17.9062, + "step": 13715 + }, + { + "epoch": 0.1298359538436781, + "grad_norm": 502.8157958984375, + "learning_rate": 1.9481848589678827e-06, + "loss": 53.8906, + "step": 13716 + }, + { + "epoch": 0.12984541986539316, + "grad_norm": 148.44696044921875, + "learning_rate": 1.9481751177739447e-06, + "loss": 18.6719, + "step": 13717 + }, + { + "epoch": 0.12985488588710822, + "grad_norm": 501.31103515625, + "learning_rate": 1.948165375688784e-06, + "loss": 10.7539, + "step": 13718 + }, + { + "epoch": 0.12986435190882328, + "grad_norm": 553.6865844726562, + "learning_rate": 1.948155632712409e-06, + "loss": 28.75, + "step": 13719 + }, + { + "epoch": 0.12987381793053834, + "grad_norm": 3.2859911918640137, + "learning_rate": 1.9481458888448303e-06, + "loss": 0.9316, + "step": 13720 + }, + { + "epoch": 0.12988328395225338, + "grad_norm": 2.6172268390655518, + "learning_rate": 1.9481361440860554e-06, + "loss": 0.8486, + "step": 13721 + }, + { + "epoch": 0.12989274997396844, + "grad_norm": 374.82977294921875, + "learning_rate": 1.9481263984360944e-06, + "loss": 19.3281, + "step": 13722 + }, + { + "epoch": 0.1299022159956835, + "grad_norm": 300.0610046386719, + "learning_rate": 1.9481166518949564e-06, + "loss": 15.9297, + "step": 13723 + }, + { + "epoch": 0.12991168201739856, + "grad_norm": 363.79937744140625, + "learning_rate": 1.9481069044626508e-06, + "loss": 21.3438, + "step": 13724 + }, + { + "epoch": 0.12992114803911362, + "grad_norm": 265.367431640625, + "learning_rate": 1.9480971561391856e-06, + "loss": 30.625, + "step": 13725 + }, + { + "epoch": 0.12993061406082865, + "grad_norm": 354.7070007324219, + "learning_rate": 1.948087406924571e-06, + "loss": 35.4297, + "step": 13726 + }, + { + "epoch": 0.1299400800825437, + "grad_norm": 212.59730529785156, + "learning_rate": 1.948077656818816e-06, + "loss": 16.5078, + "step": 13727 + }, + { + "epoch": 0.12994954610425877, + "grad_norm": 1030.2716064453125, + "learning_rate": 1.94806790582193e-06, + "loss": 76.4766, + "step": 13728 + }, + { + "epoch": 0.12995901212597383, + "grad_norm": 395.3304443359375, + "learning_rate": 1.9480581539339213e-06, + "loss": 20.4219, + "step": 13729 + }, + { + "epoch": 0.12996847814768886, + "grad_norm": 431.3189392089844, + "learning_rate": 1.9480484011548e-06, + "loss": 19.9844, + "step": 13730 + }, + { + "epoch": 0.12997794416940392, + "grad_norm": 237.58885192871094, + "learning_rate": 1.9480386474845743e-06, + "loss": 25.9531, + "step": 13731 + }, + { + "epoch": 0.12998741019111898, + "grad_norm": 432.94049072265625, + "learning_rate": 1.9480288929232545e-06, + "loss": 32.0, + "step": 13732 + }, + { + "epoch": 0.12999687621283404, + "grad_norm": 515.9557495117188, + "learning_rate": 1.948019137470849e-06, + "loss": 29.6797, + "step": 13733 + }, + { + "epoch": 0.1300063422345491, + "grad_norm": 309.56378173828125, + "learning_rate": 1.9480093811273674e-06, + "loss": 25.5156, + "step": 13734 + }, + { + "epoch": 0.13001580825626413, + "grad_norm": 526.5560913085938, + "learning_rate": 1.9479996238928183e-06, + "loss": 59.0781, + "step": 13735 + }, + { + "epoch": 0.1300252742779792, + "grad_norm": 370.5536804199219, + "learning_rate": 1.9479898657672117e-06, + "loss": 18.3672, + "step": 13736 + }, + { + "epoch": 0.13003474029969425, + "grad_norm": 711.9592895507812, + "learning_rate": 1.9479801067505558e-06, + "loss": 25.4609, + "step": 13737 + }, + { + "epoch": 0.1300442063214093, + "grad_norm": 281.97222900390625, + "learning_rate": 1.947970346842861e-06, + "loss": 16.4375, + "step": 13738 + }, + { + "epoch": 0.13005367234312434, + "grad_norm": 686.52294921875, + "learning_rate": 1.947960586044135e-06, + "loss": 22.6094, + "step": 13739 + }, + { + "epoch": 0.1300631383648394, + "grad_norm": 215.41891479492188, + "learning_rate": 1.9479508243543877e-06, + "loss": 19.1953, + "step": 13740 + }, + { + "epoch": 0.13007260438655446, + "grad_norm": 1099.165283203125, + "learning_rate": 1.947941061773629e-06, + "loss": 24.9062, + "step": 13741 + }, + { + "epoch": 0.13008207040826952, + "grad_norm": 175.18055725097656, + "learning_rate": 1.9479312983018667e-06, + "loss": 18.4062, + "step": 13742 + }, + { + "epoch": 0.13009153642998458, + "grad_norm": 721.9402465820312, + "learning_rate": 1.9479215339391106e-06, + "loss": 52.1719, + "step": 13743 + }, + { + "epoch": 0.13010100245169962, + "grad_norm": 308.0867004394531, + "learning_rate": 1.9479117686853703e-06, + "loss": 13.4062, + "step": 13744 + }, + { + "epoch": 0.13011046847341468, + "grad_norm": 808.90771484375, + "learning_rate": 1.9479020025406546e-06, + "loss": 64.4922, + "step": 13745 + }, + { + "epoch": 0.13011993449512974, + "grad_norm": 281.0667724609375, + "learning_rate": 1.9478922355049726e-06, + "loss": 21.2109, + "step": 13746 + }, + { + "epoch": 0.1301294005168448, + "grad_norm": 512.1596069335938, + "learning_rate": 1.947882467578334e-06, + "loss": 30.25, + "step": 13747 + }, + { + "epoch": 0.13013886653855983, + "grad_norm": 170.10411071777344, + "learning_rate": 1.9478726987607464e-06, + "loss": 16.0469, + "step": 13748 + }, + { + "epoch": 0.1301483325602749, + "grad_norm": 300.6065368652344, + "learning_rate": 1.947862929052221e-06, + "loss": 35.1562, + "step": 13749 + }, + { + "epoch": 0.13015779858198995, + "grad_norm": 429.9144592285156, + "learning_rate": 1.947853158452766e-06, + "loss": 31.3125, + "step": 13750 + }, + { + "epoch": 0.130167264603705, + "grad_norm": 429.28594970703125, + "learning_rate": 1.9478433869623906e-06, + "loss": 16.0625, + "step": 13751 + }, + { + "epoch": 0.13017673062542007, + "grad_norm": 386.48052978515625, + "learning_rate": 1.947833614581104e-06, + "loss": 26.0547, + "step": 13752 + }, + { + "epoch": 0.1301861966471351, + "grad_norm": 328.0283203125, + "learning_rate": 1.9478238413089152e-06, + "loss": 16.2031, + "step": 13753 + }, + { + "epoch": 0.13019566266885016, + "grad_norm": 235.47731018066406, + "learning_rate": 1.947814067145834e-06, + "loss": 17.7344, + "step": 13754 + }, + { + "epoch": 0.13020512869056522, + "grad_norm": 870.3026733398438, + "learning_rate": 1.9478042920918697e-06, + "loss": 77.7656, + "step": 13755 + }, + { + "epoch": 0.13021459471228028, + "grad_norm": 431.19635009765625, + "learning_rate": 1.9477945161470303e-06, + "loss": 22.4844, + "step": 13756 + }, + { + "epoch": 0.1302240607339953, + "grad_norm": 384.0390625, + "learning_rate": 1.947784739311326e-06, + "loss": 27.3984, + "step": 13757 + }, + { + "epoch": 0.13023352675571037, + "grad_norm": 265.17242431640625, + "learning_rate": 1.9477749615847656e-06, + "loss": 19.3984, + "step": 13758 + }, + { + "epoch": 0.13024299277742543, + "grad_norm": 316.0339050292969, + "learning_rate": 1.9477651829673586e-06, + "loss": 36.6328, + "step": 13759 + }, + { + "epoch": 0.1302524587991405, + "grad_norm": 506.13958740234375, + "learning_rate": 1.947755403459114e-06, + "loss": 21.3906, + "step": 13760 + }, + { + "epoch": 0.13026192482085555, + "grad_norm": 715.7947387695312, + "learning_rate": 1.9477456230600407e-06, + "loss": 9.4766, + "step": 13761 + }, + { + "epoch": 0.13027139084257058, + "grad_norm": 230.68238830566406, + "learning_rate": 1.9477358417701485e-06, + "loss": 9.6836, + "step": 13762 + }, + { + "epoch": 0.13028085686428564, + "grad_norm": 457.3471984863281, + "learning_rate": 1.947726059589446e-06, + "loss": 46.1562, + "step": 13763 + }, + { + "epoch": 0.1302903228860007, + "grad_norm": 320.5682067871094, + "learning_rate": 1.9477162765179426e-06, + "loss": 16.0156, + "step": 13764 + }, + { + "epoch": 0.13029978890771576, + "grad_norm": 254.42092895507812, + "learning_rate": 1.947706492555648e-06, + "loss": 20.9297, + "step": 13765 + }, + { + "epoch": 0.1303092549294308, + "grad_norm": 434.3868103027344, + "learning_rate": 1.9476967077025702e-06, + "loss": 30.9141, + "step": 13766 + }, + { + "epoch": 0.13031872095114586, + "grad_norm": 230.12210083007812, + "learning_rate": 1.9476869219587196e-06, + "loss": 30.7812, + "step": 13767 + }, + { + "epoch": 0.13032818697286092, + "grad_norm": 176.3850860595703, + "learning_rate": 1.947677135324105e-06, + "loss": 17.793, + "step": 13768 + }, + { + "epoch": 0.13033765299457598, + "grad_norm": 3.206979274749756, + "learning_rate": 1.9476673477987354e-06, + "loss": 0.9517, + "step": 13769 + }, + { + "epoch": 0.13034711901629104, + "grad_norm": 192.40838623046875, + "learning_rate": 1.9476575593826203e-06, + "loss": 19.0, + "step": 13770 + }, + { + "epoch": 0.13035658503800607, + "grad_norm": 268.4371337890625, + "learning_rate": 1.947647770075769e-06, + "loss": 10.7734, + "step": 13771 + }, + { + "epoch": 0.13036605105972113, + "grad_norm": 618.6311645507812, + "learning_rate": 1.94763797987819e-06, + "loss": 19.1836, + "step": 13772 + }, + { + "epoch": 0.1303755170814362, + "grad_norm": 292.6095275878906, + "learning_rate": 1.947628188789893e-06, + "loss": 17.0352, + "step": 13773 + }, + { + "epoch": 0.13038498310315125, + "grad_norm": 149.7078399658203, + "learning_rate": 1.947618396810887e-06, + "loss": 18.1641, + "step": 13774 + }, + { + "epoch": 0.13039444912486628, + "grad_norm": 328.9084777832031, + "learning_rate": 1.9476086039411817e-06, + "loss": 17.4453, + "step": 13775 + }, + { + "epoch": 0.13040391514658134, + "grad_norm": 323.46832275390625, + "learning_rate": 1.9475988101807855e-06, + "loss": 30.1875, + "step": 13776 + }, + { + "epoch": 0.1304133811682964, + "grad_norm": 398.6884765625, + "learning_rate": 1.9475890155297084e-06, + "loss": 46.5156, + "step": 13777 + }, + { + "epoch": 0.13042284719001146, + "grad_norm": 486.7673645019531, + "learning_rate": 1.9475792199879596e-06, + "loss": 39.75, + "step": 13778 + }, + { + "epoch": 0.13043231321172652, + "grad_norm": 311.9058837890625, + "learning_rate": 1.9475694235555477e-06, + "loss": 16.3828, + "step": 13779 + }, + { + "epoch": 0.13044177923344155, + "grad_norm": 677.853759765625, + "learning_rate": 1.947559626232482e-06, + "loss": 31.0078, + "step": 13780 + }, + { + "epoch": 0.1304512452551566, + "grad_norm": 524.8007202148438, + "learning_rate": 1.947549828018772e-06, + "loss": 53.3125, + "step": 13781 + }, + { + "epoch": 0.13046071127687167, + "grad_norm": 214.2718048095703, + "learning_rate": 1.9475400289144266e-06, + "loss": 21.5, + "step": 13782 + }, + { + "epoch": 0.13047017729858673, + "grad_norm": 209.82102966308594, + "learning_rate": 1.9475302289194555e-06, + "loss": 7.957, + "step": 13783 + }, + { + "epoch": 0.13047964332030176, + "grad_norm": 3.1465718746185303, + "learning_rate": 1.9475204280338673e-06, + "loss": 0.894, + "step": 13784 + }, + { + "epoch": 0.13048910934201682, + "grad_norm": 568.29248046875, + "learning_rate": 1.947510626257672e-06, + "loss": 31.3203, + "step": 13785 + }, + { + "epoch": 0.13049857536373188, + "grad_norm": 267.66357421875, + "learning_rate": 1.947500823590878e-06, + "loss": 34.875, + "step": 13786 + }, + { + "epoch": 0.13050804138544694, + "grad_norm": 281.4431457519531, + "learning_rate": 1.947491020033495e-06, + "loss": 37.0938, + "step": 13787 + }, + { + "epoch": 0.130517507407162, + "grad_norm": 2.999124765396118, + "learning_rate": 1.9474812155855324e-06, + "loss": 0.8262, + "step": 13788 + }, + { + "epoch": 0.13052697342887704, + "grad_norm": 309.942626953125, + "learning_rate": 1.9474714102469984e-06, + "loss": 18.9609, + "step": 13789 + }, + { + "epoch": 0.1305364394505921, + "grad_norm": 960.1173706054688, + "learning_rate": 1.9474616040179034e-06, + "loss": 32.1094, + "step": 13790 + }, + { + "epoch": 0.13054590547230716, + "grad_norm": 193.498046875, + "learning_rate": 1.9474517968982556e-06, + "loss": 16.2109, + "step": 13791 + }, + { + "epoch": 0.13055537149402222, + "grad_norm": 3.676231861114502, + "learning_rate": 1.9474419888880654e-06, + "loss": 0.8496, + "step": 13792 + }, + { + "epoch": 0.13056483751573725, + "grad_norm": 258.36383056640625, + "learning_rate": 1.947432179987341e-06, + "loss": 40.0938, + "step": 13793 + }, + { + "epoch": 0.1305743035374523, + "grad_norm": 302.2572021484375, + "learning_rate": 1.947422370196092e-06, + "loss": 17.3086, + "step": 13794 + }, + { + "epoch": 0.13058376955916737, + "grad_norm": 852.1910400390625, + "learning_rate": 1.947412559514328e-06, + "loss": 71.7344, + "step": 13795 + }, + { + "epoch": 0.13059323558088243, + "grad_norm": 183.38291931152344, + "learning_rate": 1.9474027479420574e-06, + "loss": 20.7891, + "step": 13796 + }, + { + "epoch": 0.1306027016025975, + "grad_norm": 222.1390838623047, + "learning_rate": 1.9473929354792903e-06, + "loss": 18.7031, + "step": 13797 + }, + { + "epoch": 0.13061216762431252, + "grad_norm": 172.75775146484375, + "learning_rate": 1.947383122126035e-06, + "loss": 18.5703, + "step": 13798 + }, + { + "epoch": 0.13062163364602758, + "grad_norm": 561.1577758789062, + "learning_rate": 1.9473733078823013e-06, + "loss": 26.6172, + "step": 13799 + }, + { + "epoch": 0.13063109966774264, + "grad_norm": 379.38970947265625, + "learning_rate": 1.9473634927480985e-06, + "loss": 13.2695, + "step": 13800 + }, + { + "epoch": 0.1306405656894577, + "grad_norm": 322.15472412109375, + "learning_rate": 1.9473536767234355e-06, + "loss": 19.9062, + "step": 13801 + }, + { + "epoch": 0.13065003171117273, + "grad_norm": 352.7821350097656, + "learning_rate": 1.9473438598083218e-06, + "loss": 22.0312, + "step": 13802 + }, + { + "epoch": 0.1306594977328878, + "grad_norm": 389.6063537597656, + "learning_rate": 1.947334042002766e-06, + "loss": 46.7266, + "step": 13803 + }, + { + "epoch": 0.13066896375460285, + "grad_norm": 214.15542602539062, + "learning_rate": 1.9473242233067782e-06, + "loss": 14.2227, + "step": 13804 + }, + { + "epoch": 0.1306784297763179, + "grad_norm": 650.0516967773438, + "learning_rate": 1.9473144037203675e-06, + "loss": 84.2188, + "step": 13805 + }, + { + "epoch": 0.13068789579803297, + "grad_norm": 369.3839111328125, + "learning_rate": 1.9473045832435425e-06, + "loss": 18.7969, + "step": 13806 + }, + { + "epoch": 0.130697361819748, + "grad_norm": 740.2755126953125, + "learning_rate": 1.9472947618763135e-06, + "loss": 53.0, + "step": 13807 + }, + { + "epoch": 0.13070682784146306, + "grad_norm": 525.0451049804688, + "learning_rate": 1.947284939618688e-06, + "loss": 39.9141, + "step": 13808 + }, + { + "epoch": 0.13071629386317812, + "grad_norm": 441.5844421386719, + "learning_rate": 1.947275116470677e-06, + "loss": 39.8125, + "step": 13809 + }, + { + "epoch": 0.13072575988489318, + "grad_norm": 240.27952575683594, + "learning_rate": 1.9472652924322886e-06, + "loss": 16.7734, + "step": 13810 + }, + { + "epoch": 0.13073522590660824, + "grad_norm": 318.4177551269531, + "learning_rate": 1.947255467503533e-06, + "loss": 9.2539, + "step": 13811 + }, + { + "epoch": 0.13074469192832328, + "grad_norm": 433.6068115234375, + "learning_rate": 1.9472456416844183e-06, + "loss": 36.3281, + "step": 13812 + }, + { + "epoch": 0.13075415795003834, + "grad_norm": 700.8360595703125, + "learning_rate": 1.9472358149749547e-06, + "loss": 20.3242, + "step": 13813 + }, + { + "epoch": 0.1307636239717534, + "grad_norm": 228.82296752929688, + "learning_rate": 1.947225987375151e-06, + "loss": 21.7969, + "step": 13814 + }, + { + "epoch": 0.13077308999346846, + "grad_norm": 328.58575439453125, + "learning_rate": 1.9472161588850166e-06, + "loss": 20.4297, + "step": 13815 + }, + { + "epoch": 0.1307825560151835, + "grad_norm": 362.2098083496094, + "learning_rate": 1.9472063295045603e-06, + "loss": 43.0, + "step": 13816 + }, + { + "epoch": 0.13079202203689855, + "grad_norm": 606.9033203125, + "learning_rate": 1.947196499233792e-06, + "loss": 20.6133, + "step": 13817 + }, + { + "epoch": 0.1308014880586136, + "grad_norm": 442.9356384277344, + "learning_rate": 1.9471866680727204e-06, + "loss": 46.1406, + "step": 13818 + }, + { + "epoch": 0.13081095408032867, + "grad_norm": 422.4750671386719, + "learning_rate": 1.947176836021355e-06, + "loss": 18.7109, + "step": 13819 + }, + { + "epoch": 0.13082042010204373, + "grad_norm": 688.1630249023438, + "learning_rate": 1.947167003079705e-06, + "loss": 40.9453, + "step": 13820 + }, + { + "epoch": 0.13082988612375876, + "grad_norm": 251.02310180664062, + "learning_rate": 1.9471571692477797e-06, + "loss": 19.4883, + "step": 13821 + }, + { + "epoch": 0.13083935214547382, + "grad_norm": 390.55474853515625, + "learning_rate": 1.947147334525588e-06, + "loss": 42.2031, + "step": 13822 + }, + { + "epoch": 0.13084881816718888, + "grad_norm": 3.8216464519500732, + "learning_rate": 1.94713749891314e-06, + "loss": 0.936, + "step": 13823 + }, + { + "epoch": 0.13085828418890394, + "grad_norm": 2.6980273723602295, + "learning_rate": 1.947127662410444e-06, + "loss": 0.7969, + "step": 13824 + }, + { + "epoch": 0.13086775021061897, + "grad_norm": 3.501498222351074, + "learning_rate": 1.947117825017509e-06, + "loss": 1.0415, + "step": 13825 + }, + { + "epoch": 0.13087721623233403, + "grad_norm": 3.0993666648864746, + "learning_rate": 1.9471079867343457e-06, + "loss": 0.9392, + "step": 13826 + }, + { + "epoch": 0.1308866822540491, + "grad_norm": 478.1569519042969, + "learning_rate": 1.9470981475609625e-06, + "loss": 29.4375, + "step": 13827 + }, + { + "epoch": 0.13089614827576415, + "grad_norm": 547.1470947265625, + "learning_rate": 1.9470883074973684e-06, + "loss": 35.4453, + "step": 13828 + }, + { + "epoch": 0.1309056142974792, + "grad_norm": 208.38470458984375, + "learning_rate": 1.947078466543573e-06, + "loss": 21.4531, + "step": 13829 + }, + { + "epoch": 0.13091508031919424, + "grad_norm": 283.24127197265625, + "learning_rate": 1.947068624699585e-06, + "loss": 16.5352, + "step": 13830 + }, + { + "epoch": 0.1309245463409093, + "grad_norm": 694.0178833007812, + "learning_rate": 1.9470587819654145e-06, + "loss": 32.25, + "step": 13831 + }, + { + "epoch": 0.13093401236262436, + "grad_norm": 776.2769165039062, + "learning_rate": 1.9470489383410705e-06, + "loss": 48.9688, + "step": 13832 + }, + { + "epoch": 0.13094347838433942, + "grad_norm": 199.5000762939453, + "learning_rate": 1.947039093826562e-06, + "loss": 28.3398, + "step": 13833 + }, + { + "epoch": 0.13095294440605446, + "grad_norm": 518.2146606445312, + "learning_rate": 1.947029248421898e-06, + "loss": 54.6094, + "step": 13834 + }, + { + "epoch": 0.13096241042776952, + "grad_norm": 271.1068420410156, + "learning_rate": 1.9470194021270884e-06, + "loss": 20.2188, + "step": 13835 + }, + { + "epoch": 0.13097187644948458, + "grad_norm": 304.0566711425781, + "learning_rate": 1.9470095549421423e-06, + "loss": 24.3438, + "step": 13836 + }, + { + "epoch": 0.13098134247119964, + "grad_norm": 951.1771240234375, + "learning_rate": 1.946999706867069e-06, + "loss": 29.7969, + "step": 13837 + }, + { + "epoch": 0.1309908084929147, + "grad_norm": 262.57733154296875, + "learning_rate": 1.946989857901877e-06, + "loss": 20.2266, + "step": 13838 + }, + { + "epoch": 0.13100027451462973, + "grad_norm": 690.8037719726562, + "learning_rate": 1.9469800080465767e-06, + "loss": 50.7344, + "step": 13839 + }, + { + "epoch": 0.1310097405363448, + "grad_norm": 600.9609985351562, + "learning_rate": 1.9469701573011763e-06, + "loss": 17.6016, + "step": 13840 + }, + { + "epoch": 0.13101920655805985, + "grad_norm": 1386.604736328125, + "learning_rate": 1.946960305665686e-06, + "loss": 9.4297, + "step": 13841 + }, + { + "epoch": 0.1310286725797749, + "grad_norm": 299.7195129394531, + "learning_rate": 1.9469504531401144e-06, + "loss": 31.9609, + "step": 13842 + }, + { + "epoch": 0.13103813860148994, + "grad_norm": 425.6907958984375, + "learning_rate": 1.946940599724471e-06, + "loss": 29.5625, + "step": 13843 + }, + { + "epoch": 0.131047604623205, + "grad_norm": 546.6639404296875, + "learning_rate": 1.946930745418765e-06, + "loss": 53.0781, + "step": 13844 + }, + { + "epoch": 0.13105707064492006, + "grad_norm": 120.85346984863281, + "learning_rate": 1.9469208902230053e-06, + "loss": 16.6328, + "step": 13845 + }, + { + "epoch": 0.13106653666663512, + "grad_norm": 395.3578186035156, + "learning_rate": 1.9469110341372023e-06, + "loss": 50.4688, + "step": 13846 + }, + { + "epoch": 0.13107600268835018, + "grad_norm": 279.63812255859375, + "learning_rate": 1.9469011771613643e-06, + "loss": 17.6328, + "step": 13847 + }, + { + "epoch": 0.1310854687100652, + "grad_norm": 315.69476318359375, + "learning_rate": 1.9468913192955005e-06, + "loss": 17.5156, + "step": 13848 + }, + { + "epoch": 0.13109493473178027, + "grad_norm": 239.50277709960938, + "learning_rate": 1.946881460539621e-06, + "loss": 12.1914, + "step": 13849 + }, + { + "epoch": 0.13110440075349533, + "grad_norm": 235.89039611816406, + "learning_rate": 1.946871600893734e-06, + "loss": 19.6914, + "step": 13850 + }, + { + "epoch": 0.1311138667752104, + "grad_norm": 281.43389892578125, + "learning_rate": 1.9468617403578495e-06, + "loss": 32.0938, + "step": 13851 + }, + { + "epoch": 0.13112333279692542, + "grad_norm": 194.07205200195312, + "learning_rate": 1.9468518789319765e-06, + "loss": 16.6484, + "step": 13852 + }, + { + "epoch": 0.13113279881864048, + "grad_norm": 406.2918701171875, + "learning_rate": 1.9468420166161242e-06, + "loss": 45.4219, + "step": 13853 + }, + { + "epoch": 0.13114226484035554, + "grad_norm": 433.3636474609375, + "learning_rate": 1.946832153410302e-06, + "loss": 38.5312, + "step": 13854 + }, + { + "epoch": 0.1311517308620706, + "grad_norm": 273.7806396484375, + "learning_rate": 1.9468222893145194e-06, + "loss": 17.5547, + "step": 13855 + }, + { + "epoch": 0.13116119688378566, + "grad_norm": 133.23446655273438, + "learning_rate": 1.946812424328786e-06, + "loss": 12.4102, + "step": 13856 + }, + { + "epoch": 0.1311706629055007, + "grad_norm": 242.21112060546875, + "learning_rate": 1.9468025584531096e-06, + "loss": 21.7656, + "step": 13857 + }, + { + "epoch": 0.13118012892721576, + "grad_norm": 403.0055847167969, + "learning_rate": 1.9467926916875006e-06, + "loss": 28.8516, + "step": 13858 + }, + { + "epoch": 0.13118959494893082, + "grad_norm": 377.13922119140625, + "learning_rate": 1.946782824031968e-06, + "loss": 29.5547, + "step": 13859 + }, + { + "epoch": 0.13119906097064588, + "grad_norm": 501.4432067871094, + "learning_rate": 1.9467729554865217e-06, + "loss": 46.8672, + "step": 13860 + }, + { + "epoch": 0.1312085269923609, + "grad_norm": 266.2618103027344, + "learning_rate": 1.94676308605117e-06, + "loss": 14.0469, + "step": 13861 + }, + { + "epoch": 0.13121799301407597, + "grad_norm": 434.32379150390625, + "learning_rate": 1.9467532157259224e-06, + "loss": 33.6016, + "step": 13862 + }, + { + "epoch": 0.13122745903579103, + "grad_norm": 459.33984375, + "learning_rate": 1.9467433445107884e-06, + "loss": 19.1641, + "step": 13863 + }, + { + "epoch": 0.1312369250575061, + "grad_norm": 150.46087646484375, + "learning_rate": 1.9467334724057777e-06, + "loss": 18.3086, + "step": 13864 + }, + { + "epoch": 0.13124639107922115, + "grad_norm": 382.0835876464844, + "learning_rate": 1.9467235994108987e-06, + "loss": 19.5078, + "step": 13865 + }, + { + "epoch": 0.13125585710093618, + "grad_norm": 3.1390624046325684, + "learning_rate": 1.9467137255261613e-06, + "loss": 0.9512, + "step": 13866 + }, + { + "epoch": 0.13126532312265124, + "grad_norm": 278.6279296875, + "learning_rate": 1.9467038507515746e-06, + "loss": 16.3047, + "step": 13867 + }, + { + "epoch": 0.1312747891443663, + "grad_norm": 259.8035888671875, + "learning_rate": 1.9466939750871476e-06, + "loss": 18.4531, + "step": 13868 + }, + { + "epoch": 0.13128425516608136, + "grad_norm": 542.2779541015625, + "learning_rate": 1.94668409853289e-06, + "loss": 34.8438, + "step": 13869 + }, + { + "epoch": 0.1312937211877964, + "grad_norm": 532.5531005859375, + "learning_rate": 1.9466742210888113e-06, + "loss": 40.8281, + "step": 13870 + }, + { + "epoch": 0.13130318720951145, + "grad_norm": 213.27151489257812, + "learning_rate": 1.94666434275492e-06, + "loss": 22.0859, + "step": 13871 + }, + { + "epoch": 0.1313126532312265, + "grad_norm": 424.1556701660156, + "learning_rate": 1.946654463531226e-06, + "loss": 11.7266, + "step": 13872 + }, + { + "epoch": 0.13132211925294157, + "grad_norm": 355.64404296875, + "learning_rate": 1.946644583417738e-06, + "loss": 34.375, + "step": 13873 + }, + { + "epoch": 0.13133158527465663, + "grad_norm": 266.498046875, + "learning_rate": 1.946634702414466e-06, + "loss": 31.168, + "step": 13874 + }, + { + "epoch": 0.13134105129637166, + "grad_norm": 482.2874755859375, + "learning_rate": 1.946624820521419e-06, + "loss": 30.8125, + "step": 13875 + }, + { + "epoch": 0.13135051731808672, + "grad_norm": 311.1220703125, + "learning_rate": 1.9466149377386064e-06, + "loss": 21.4297, + "step": 13876 + }, + { + "epoch": 0.13135998333980178, + "grad_norm": 452.15447998046875, + "learning_rate": 1.946605054066037e-06, + "loss": 45.1719, + "step": 13877 + }, + { + "epoch": 0.13136944936151684, + "grad_norm": 602.1753540039062, + "learning_rate": 1.9465951695037206e-06, + "loss": 53.5156, + "step": 13878 + }, + { + "epoch": 0.13137891538323188, + "grad_norm": 223.1750946044922, + "learning_rate": 1.946585284051666e-06, + "loss": 21.4922, + "step": 13879 + }, + { + "epoch": 0.13138838140494694, + "grad_norm": 393.846923828125, + "learning_rate": 1.946575397709883e-06, + "loss": 19.3359, + "step": 13880 + }, + { + "epoch": 0.131397847426662, + "grad_norm": 395.7481384277344, + "learning_rate": 1.946565510478381e-06, + "loss": 20.9375, + "step": 13881 + }, + { + "epoch": 0.13140731344837706, + "grad_norm": 537.3142700195312, + "learning_rate": 1.9465556223571687e-06, + "loss": 49.875, + "step": 13882 + }, + { + "epoch": 0.13141677947009212, + "grad_norm": 355.25189208984375, + "learning_rate": 1.946545733346256e-06, + "loss": 24.6094, + "step": 13883 + }, + { + "epoch": 0.13142624549180715, + "grad_norm": 3.3584041595458984, + "learning_rate": 1.9465358434456516e-06, + "loss": 1.1641, + "step": 13884 + }, + { + "epoch": 0.1314357115135222, + "grad_norm": 291.0155944824219, + "learning_rate": 1.946525952655365e-06, + "loss": 24.8516, + "step": 13885 + }, + { + "epoch": 0.13144517753523727, + "grad_norm": 3.346717357635498, + "learning_rate": 1.946516060975406e-06, + "loss": 0.9624, + "step": 13886 + }, + { + "epoch": 0.13145464355695233, + "grad_norm": 884.9251098632812, + "learning_rate": 1.9465061684057833e-06, + "loss": 46.75, + "step": 13887 + }, + { + "epoch": 0.13146410957866736, + "grad_norm": 2.367349863052368, + "learning_rate": 1.946496274946506e-06, + "loss": 0.7676, + "step": 13888 + }, + { + "epoch": 0.13147357560038242, + "grad_norm": 719.207275390625, + "learning_rate": 1.9464863805975843e-06, + "loss": 60.5312, + "step": 13889 + }, + { + "epoch": 0.13148304162209748, + "grad_norm": 8.703301429748535, + "learning_rate": 1.9464764853590264e-06, + "loss": 0.8872, + "step": 13890 + }, + { + "epoch": 0.13149250764381254, + "grad_norm": 159.3852081298828, + "learning_rate": 1.946466589230843e-06, + "loss": 14.2422, + "step": 13891 + }, + { + "epoch": 0.1315019736655276, + "grad_norm": 174.36770629882812, + "learning_rate": 1.946456692213042e-06, + "loss": 16.4062, + "step": 13892 + }, + { + "epoch": 0.13151143968724263, + "grad_norm": 3.0013692378997803, + "learning_rate": 1.946446794305633e-06, + "loss": 0.9751, + "step": 13893 + }, + { + "epoch": 0.1315209057089577, + "grad_norm": 3.134794235229492, + "learning_rate": 1.946436895508626e-06, + "loss": 0.7861, + "step": 13894 + }, + { + "epoch": 0.13153037173067275, + "grad_norm": 3.127345085144043, + "learning_rate": 1.9464269958220297e-06, + "loss": 0.9312, + "step": 13895 + }, + { + "epoch": 0.1315398377523878, + "grad_norm": 224.1414337158203, + "learning_rate": 1.9464170952458543e-06, + "loss": 16.5156, + "step": 13896 + }, + { + "epoch": 0.13154930377410287, + "grad_norm": 590.51708984375, + "learning_rate": 1.9464071937801074e-06, + "loss": 46.4531, + "step": 13897 + }, + { + "epoch": 0.1315587697958179, + "grad_norm": 343.673095703125, + "learning_rate": 1.9463972914248e-06, + "loss": 42.0312, + "step": 13898 + }, + { + "epoch": 0.13156823581753296, + "grad_norm": 434.4111022949219, + "learning_rate": 1.9463873881799404e-06, + "loss": 37.7656, + "step": 13899 + }, + { + "epoch": 0.13157770183924802, + "grad_norm": 225.1030731201172, + "learning_rate": 1.9463774840455384e-06, + "loss": 10.1367, + "step": 13900 + }, + { + "epoch": 0.13158716786096308, + "grad_norm": 530.70654296875, + "learning_rate": 1.9463675790216027e-06, + "loss": 40.1133, + "step": 13901 + }, + { + "epoch": 0.13159663388267812, + "grad_norm": 945.7662353515625, + "learning_rate": 1.9463576731081434e-06, + "loss": 47.4141, + "step": 13902 + }, + { + "epoch": 0.13160609990439318, + "grad_norm": 166.4092559814453, + "learning_rate": 1.946347766305169e-06, + "loss": 20.8984, + "step": 13903 + }, + { + "epoch": 0.13161556592610824, + "grad_norm": 261.262939453125, + "learning_rate": 1.94633785861269e-06, + "loss": 19.9609, + "step": 13904 + }, + { + "epoch": 0.1316250319478233, + "grad_norm": 216.20059204101562, + "learning_rate": 1.9463279500307145e-06, + "loss": 18.3125, + "step": 13905 + }, + { + "epoch": 0.13163449796953836, + "grad_norm": 432.1065368652344, + "learning_rate": 1.9463180405592523e-06, + "loss": 48.0078, + "step": 13906 + }, + { + "epoch": 0.1316439639912534, + "grad_norm": 515.6941528320312, + "learning_rate": 1.9463081301983132e-06, + "loss": 21.8906, + "step": 13907 + }, + { + "epoch": 0.13165343001296845, + "grad_norm": 257.58551025390625, + "learning_rate": 1.9462982189479057e-06, + "loss": 17.6172, + "step": 13908 + }, + { + "epoch": 0.1316628960346835, + "grad_norm": 364.2214660644531, + "learning_rate": 1.9462883068080394e-06, + "loss": 21.7656, + "step": 13909 + }, + { + "epoch": 0.13167236205639857, + "grad_norm": 483.4699401855469, + "learning_rate": 1.9462783937787233e-06, + "loss": 45.5, + "step": 13910 + }, + { + "epoch": 0.1316818280781136, + "grad_norm": 223.99476623535156, + "learning_rate": 1.9462684798599676e-06, + "loss": 19.1484, + "step": 13911 + }, + { + "epoch": 0.13169129409982866, + "grad_norm": 2.883776903152466, + "learning_rate": 1.946258565051781e-06, + "loss": 0.9331, + "step": 13912 + }, + { + "epoch": 0.13170076012154372, + "grad_norm": 2.6931827068328857, + "learning_rate": 1.9462486493541727e-06, + "loss": 0.854, + "step": 13913 + }, + { + "epoch": 0.13171022614325878, + "grad_norm": 259.9792785644531, + "learning_rate": 1.9462387327671522e-06, + "loss": 19.1875, + "step": 13914 + }, + { + "epoch": 0.13171969216497384, + "grad_norm": 437.6722412109375, + "learning_rate": 1.946228815290729e-06, + "loss": 32.2734, + "step": 13915 + }, + { + "epoch": 0.13172915818668887, + "grad_norm": 1036.3748779296875, + "learning_rate": 1.946218896924912e-06, + "loss": 9.6602, + "step": 13916 + }, + { + "epoch": 0.13173862420840393, + "grad_norm": 1362.1121826171875, + "learning_rate": 1.946208977669711e-06, + "loss": 29.8125, + "step": 13917 + }, + { + "epoch": 0.131748090230119, + "grad_norm": 255.0959014892578, + "learning_rate": 1.9461990575251354e-06, + "loss": 39.3438, + "step": 13918 + }, + { + "epoch": 0.13175755625183405, + "grad_norm": 510.7918701171875, + "learning_rate": 1.946189136491194e-06, + "loss": 10.2227, + "step": 13919 + }, + { + "epoch": 0.13176702227354908, + "grad_norm": 390.7464599609375, + "learning_rate": 1.9461792145678965e-06, + "loss": 44.4219, + "step": 13920 + }, + { + "epoch": 0.13177648829526414, + "grad_norm": 381.519287109375, + "learning_rate": 1.946169291755252e-06, + "loss": 19.0625, + "step": 13921 + }, + { + "epoch": 0.1317859543169792, + "grad_norm": 201.6819610595703, + "learning_rate": 1.9461593680532696e-06, + "loss": 20.6055, + "step": 13922 + }, + { + "epoch": 0.13179542033869426, + "grad_norm": 197.1574249267578, + "learning_rate": 1.9461494434619593e-06, + "loss": 14.5859, + "step": 13923 + }, + { + "epoch": 0.13180488636040932, + "grad_norm": 894.3849487304688, + "learning_rate": 1.94613951798133e-06, + "loss": 61.9844, + "step": 13924 + }, + { + "epoch": 0.13181435238212436, + "grad_norm": 287.45428466796875, + "learning_rate": 1.946129591611391e-06, + "loss": 22.5, + "step": 13925 + }, + { + "epoch": 0.13182381840383942, + "grad_norm": 384.4999084472656, + "learning_rate": 1.946119664352152e-06, + "loss": 21.6172, + "step": 13926 + }, + { + "epoch": 0.13183328442555448, + "grad_norm": 328.90313720703125, + "learning_rate": 1.9461097362036217e-06, + "loss": 35.3906, + "step": 13927 + }, + { + "epoch": 0.13184275044726954, + "grad_norm": 239.62335205078125, + "learning_rate": 1.9460998071658102e-06, + "loss": 18.7891, + "step": 13928 + }, + { + "epoch": 0.13185221646898457, + "grad_norm": 1101.528564453125, + "learning_rate": 1.9460898772387264e-06, + "loss": 16.1914, + "step": 13929 + }, + { + "epoch": 0.13186168249069963, + "grad_norm": 249.6175537109375, + "learning_rate": 1.946079946422379e-06, + "loss": 19.0312, + "step": 13930 + }, + { + "epoch": 0.1318711485124147, + "grad_norm": 636.847412109375, + "learning_rate": 1.9460700147167785e-06, + "loss": 33.7031, + "step": 13931 + }, + { + "epoch": 0.13188061453412975, + "grad_norm": 738.2593383789062, + "learning_rate": 1.946060082121934e-06, + "loss": 62.1562, + "step": 13932 + }, + { + "epoch": 0.1318900805558448, + "grad_norm": 2.869182586669922, + "learning_rate": 1.9460501486378542e-06, + "loss": 0.9712, + "step": 13933 + }, + { + "epoch": 0.13189954657755984, + "grad_norm": 401.5758056640625, + "learning_rate": 1.9460402142645488e-06, + "loss": 42.0312, + "step": 13934 + }, + { + "epoch": 0.1319090125992749, + "grad_norm": 574.9983520507812, + "learning_rate": 1.9460302790020273e-06, + "loss": 66.2188, + "step": 13935 + }, + { + "epoch": 0.13191847862098996, + "grad_norm": 347.1741638183594, + "learning_rate": 1.9460203428502987e-06, + "loss": 22.2891, + "step": 13936 + }, + { + "epoch": 0.13192794464270502, + "grad_norm": 270.5704040527344, + "learning_rate": 1.9460104058093726e-06, + "loss": 19.4375, + "step": 13937 + }, + { + "epoch": 0.13193741066442005, + "grad_norm": 571.4815673828125, + "learning_rate": 1.9460004678792585e-06, + "loss": 61.625, + "step": 13938 + }, + { + "epoch": 0.1319468766861351, + "grad_norm": 474.87506103515625, + "learning_rate": 1.945990529059965e-06, + "loss": 42.7969, + "step": 13939 + }, + { + "epoch": 0.13195634270785017, + "grad_norm": 526.8367309570312, + "learning_rate": 1.9459805893515025e-06, + "loss": 46.3594, + "step": 13940 + }, + { + "epoch": 0.13196580872956523, + "grad_norm": 349.48089599609375, + "learning_rate": 1.9459706487538796e-06, + "loss": 24.9453, + "step": 13941 + }, + { + "epoch": 0.1319752747512803, + "grad_norm": 696.0598754882812, + "learning_rate": 1.9459607072671054e-06, + "loss": 29.4258, + "step": 13942 + }, + { + "epoch": 0.13198474077299532, + "grad_norm": 321.1485595703125, + "learning_rate": 1.9459507648911903e-06, + "loss": 29.6094, + "step": 13943 + }, + { + "epoch": 0.13199420679471038, + "grad_norm": 459.7760009765625, + "learning_rate": 1.9459408216261425e-06, + "loss": 23.625, + "step": 13944 + }, + { + "epoch": 0.13200367281642544, + "grad_norm": 424.45111083984375, + "learning_rate": 1.9459308774719722e-06, + "loss": 47.7344, + "step": 13945 + }, + { + "epoch": 0.1320131388381405, + "grad_norm": 728.9364013671875, + "learning_rate": 1.9459209324286884e-06, + "loss": 10.5703, + "step": 13946 + }, + { + "epoch": 0.13202260485985554, + "grad_norm": 3.3510043621063232, + "learning_rate": 1.9459109864963004e-06, + "loss": 1.0066, + "step": 13947 + }, + { + "epoch": 0.1320320708815706, + "grad_norm": 488.26507568359375, + "learning_rate": 1.9459010396748175e-06, + "loss": 39.5859, + "step": 13948 + }, + { + "epoch": 0.13204153690328566, + "grad_norm": 251.10491943359375, + "learning_rate": 1.9458910919642494e-06, + "loss": 20.9688, + "step": 13949 + }, + { + "epoch": 0.13205100292500072, + "grad_norm": 437.98095703125, + "learning_rate": 1.945881143364605e-06, + "loss": 57.7461, + "step": 13950 + }, + { + "epoch": 0.13206046894671578, + "grad_norm": 166.7730255126953, + "learning_rate": 1.9458711938758938e-06, + "loss": 24.9141, + "step": 13951 + }, + { + "epoch": 0.1320699349684308, + "grad_norm": 644.92822265625, + "learning_rate": 1.9458612434981252e-06, + "loss": 42.0469, + "step": 13952 + }, + { + "epoch": 0.13207940099014587, + "grad_norm": 311.0833740234375, + "learning_rate": 1.945851292231309e-06, + "loss": 10.1172, + "step": 13953 + }, + { + "epoch": 0.13208886701186093, + "grad_norm": 413.9958801269531, + "learning_rate": 1.945841340075454e-06, + "loss": 27.5625, + "step": 13954 + }, + { + "epoch": 0.132098333033576, + "grad_norm": 3320.8720703125, + "learning_rate": 1.945831387030569e-06, + "loss": 36.25, + "step": 13955 + }, + { + "epoch": 0.13210779905529102, + "grad_norm": 185.9911651611328, + "learning_rate": 1.945821433096665e-06, + "loss": 18.5859, + "step": 13956 + }, + { + "epoch": 0.13211726507700608, + "grad_norm": 317.9609680175781, + "learning_rate": 1.945811478273749e-06, + "loss": 29.6328, + "step": 13957 + }, + { + "epoch": 0.13212673109872114, + "grad_norm": 696.7667846679688, + "learning_rate": 1.945801522561833e-06, + "loss": 33.2812, + "step": 13958 + }, + { + "epoch": 0.1321361971204362, + "grad_norm": 211.266845703125, + "learning_rate": 1.945791565960925e-06, + "loss": 16.1641, + "step": 13959 + }, + { + "epoch": 0.13214566314215126, + "grad_norm": 233.521240234375, + "learning_rate": 1.945781608471034e-06, + "loss": 20.3125, + "step": 13960 + }, + { + "epoch": 0.1321551291638663, + "grad_norm": 414.9678955078125, + "learning_rate": 1.94577165009217e-06, + "loss": 40.7812, + "step": 13961 + }, + { + "epoch": 0.13216459518558135, + "grad_norm": 440.48492431640625, + "learning_rate": 1.945761690824342e-06, + "loss": 8.0391, + "step": 13962 + }, + { + "epoch": 0.1321740612072964, + "grad_norm": 2.8680191040039062, + "learning_rate": 1.94575173066756e-06, + "loss": 0.9751, + "step": 13963 + }, + { + "epoch": 0.13218352722901147, + "grad_norm": 532.6820068359375, + "learning_rate": 1.9457417696218324e-06, + "loss": 51.7656, + "step": 13964 + }, + { + "epoch": 0.1321929932507265, + "grad_norm": 572.3438110351562, + "learning_rate": 1.945731807687169e-06, + "loss": 36.6016, + "step": 13965 + }, + { + "epoch": 0.13220245927244156, + "grad_norm": 595.00341796875, + "learning_rate": 1.9457218448635796e-06, + "loss": 20.7891, + "step": 13966 + }, + { + "epoch": 0.13221192529415662, + "grad_norm": 144.9382781982422, + "learning_rate": 1.945711881151073e-06, + "loss": 18.875, + "step": 13967 + }, + { + "epoch": 0.13222139131587168, + "grad_norm": 265.1666564941406, + "learning_rate": 1.945701916549659e-06, + "loss": 21.1094, + "step": 13968 + }, + { + "epoch": 0.13223085733758674, + "grad_norm": 526.660400390625, + "learning_rate": 1.9456919510593465e-06, + "loss": 19.6602, + "step": 13969 + }, + { + "epoch": 0.13224032335930178, + "grad_norm": 128.69468688964844, + "learning_rate": 1.945681984680145e-06, + "loss": 19.7891, + "step": 13970 + }, + { + "epoch": 0.13224978938101684, + "grad_norm": 595.4284057617188, + "learning_rate": 1.945672017412064e-06, + "loss": 46.6797, + "step": 13971 + }, + { + "epoch": 0.1322592554027319, + "grad_norm": 396.3382263183594, + "learning_rate": 1.9456620492551133e-06, + "loss": 39.7188, + "step": 13972 + }, + { + "epoch": 0.13226872142444696, + "grad_norm": 182.8799285888672, + "learning_rate": 1.945652080209301e-06, + "loss": 22.6328, + "step": 13973 + }, + { + "epoch": 0.132278187446162, + "grad_norm": 485.30859375, + "learning_rate": 1.9456421102746383e-06, + "loss": 56.1094, + "step": 13974 + }, + { + "epoch": 0.13228765346787705, + "grad_norm": 193.2859344482422, + "learning_rate": 1.9456321394511326e-06, + "loss": 12.5117, + "step": 13975 + }, + { + "epoch": 0.1322971194895921, + "grad_norm": 224.33802795410156, + "learning_rate": 1.9456221677387945e-06, + "loss": 15.2109, + "step": 13976 + }, + { + "epoch": 0.13230658551130717, + "grad_norm": 587.7809448242188, + "learning_rate": 1.945612195137633e-06, + "loss": 22.5117, + "step": 13977 + }, + { + "epoch": 0.13231605153302223, + "grad_norm": 627.0035400390625, + "learning_rate": 1.945602221647658e-06, + "loss": 52.4219, + "step": 13978 + }, + { + "epoch": 0.13232551755473726, + "grad_norm": 241.34335327148438, + "learning_rate": 1.945592247268878e-06, + "loss": 18.3945, + "step": 13979 + }, + { + "epoch": 0.13233498357645232, + "grad_norm": 757.5731201171875, + "learning_rate": 1.945582272001303e-06, + "loss": 25.2891, + "step": 13980 + }, + { + "epoch": 0.13234444959816738, + "grad_norm": 240.85231018066406, + "learning_rate": 1.9455722958449417e-06, + "loss": 10.0234, + "step": 13981 + }, + { + "epoch": 0.13235391561988244, + "grad_norm": 445.7492370605469, + "learning_rate": 1.9455623187998045e-06, + "loss": 34.5312, + "step": 13982 + }, + { + "epoch": 0.1323633816415975, + "grad_norm": 375.5331726074219, + "learning_rate": 1.9455523408659e-06, + "loss": 27.5, + "step": 13983 + }, + { + "epoch": 0.13237284766331253, + "grad_norm": 435.4332275390625, + "learning_rate": 1.9455423620432376e-06, + "loss": 31.4766, + "step": 13984 + }, + { + "epoch": 0.1323823136850276, + "grad_norm": 312.35943603515625, + "learning_rate": 1.945532382331827e-06, + "loss": 17.3438, + "step": 13985 + }, + { + "epoch": 0.13239177970674265, + "grad_norm": 633.0208740234375, + "learning_rate": 1.945522401731678e-06, + "loss": 40.2969, + "step": 13986 + }, + { + "epoch": 0.1324012457284577, + "grad_norm": 626.302001953125, + "learning_rate": 1.945512420242799e-06, + "loss": 30.0156, + "step": 13987 + }, + { + "epoch": 0.13241071175017274, + "grad_norm": 318.468505859375, + "learning_rate": 1.9455024378652e-06, + "loss": 22.1562, + "step": 13988 + }, + { + "epoch": 0.1324201777718878, + "grad_norm": 1213.4654541015625, + "learning_rate": 1.94549245459889e-06, + "loss": 42.4688, + "step": 13989 + }, + { + "epoch": 0.13242964379360286, + "grad_norm": 440.8114318847656, + "learning_rate": 1.9454824704438787e-06, + "loss": 20.3203, + "step": 13990 + }, + { + "epoch": 0.13243910981531792, + "grad_norm": 317.60693359375, + "learning_rate": 1.9454724854001755e-06, + "loss": 22.8672, + "step": 13991 + }, + { + "epoch": 0.13244857583703298, + "grad_norm": 371.7138366699219, + "learning_rate": 1.9454624994677895e-06, + "loss": 33.2812, + "step": 13992 + }, + { + "epoch": 0.13245804185874802, + "grad_norm": 445.5029296875, + "learning_rate": 1.9454525126467304e-06, + "loss": 37.3633, + "step": 13993 + }, + { + "epoch": 0.13246750788046308, + "grad_norm": 403.0223693847656, + "learning_rate": 1.9454425249370075e-06, + "loss": 31.8906, + "step": 13994 + }, + { + "epoch": 0.13247697390217814, + "grad_norm": 3.151064872741699, + "learning_rate": 1.9454325363386297e-06, + "loss": 0.9731, + "step": 13995 + }, + { + "epoch": 0.1324864399238932, + "grad_norm": 188.68118286132812, + "learning_rate": 1.9454225468516072e-06, + "loss": 21.1641, + "step": 13996 + }, + { + "epoch": 0.13249590594560823, + "grad_norm": 315.9077453613281, + "learning_rate": 1.945412556475949e-06, + "loss": 26.1328, + "step": 13997 + }, + { + "epoch": 0.1325053719673233, + "grad_norm": 1034.1585693359375, + "learning_rate": 1.9454025652116646e-06, + "loss": 45.4219, + "step": 13998 + }, + { + "epoch": 0.13251483798903835, + "grad_norm": 296.4239501953125, + "learning_rate": 1.945392573058763e-06, + "loss": 27.1953, + "step": 13999 + }, + { + "epoch": 0.1325243040107534, + "grad_norm": 342.42462158203125, + "learning_rate": 1.945382580017254e-06, + "loss": 17.9688, + "step": 14000 + }, + { + "epoch": 0.13253377003246847, + "grad_norm": 501.4331359863281, + "learning_rate": 1.945372586087147e-06, + "loss": 49.0625, + "step": 14001 + }, + { + "epoch": 0.1325432360541835, + "grad_norm": 827.2095947265625, + "learning_rate": 1.945362591268451e-06, + "loss": 49.0273, + "step": 14002 + }, + { + "epoch": 0.13255270207589856, + "grad_norm": 624.2586059570312, + "learning_rate": 1.945352595561176e-06, + "loss": 22.9375, + "step": 14003 + }, + { + "epoch": 0.13256216809761362, + "grad_norm": 370.6922302246094, + "learning_rate": 1.945342598965331e-06, + "loss": 24.4844, + "step": 14004 + }, + { + "epoch": 0.13257163411932868, + "grad_norm": 274.0337219238281, + "learning_rate": 1.9453326014809252e-06, + "loss": 17.9727, + "step": 14005 + }, + { + "epoch": 0.1325811001410437, + "grad_norm": 470.4805603027344, + "learning_rate": 1.9453226031079685e-06, + "loss": 46.3828, + "step": 14006 + }, + { + "epoch": 0.13259056616275877, + "grad_norm": 814.704833984375, + "learning_rate": 1.9453126038464696e-06, + "loss": 61.9766, + "step": 14007 + }, + { + "epoch": 0.13260003218447383, + "grad_norm": 196.5371551513672, + "learning_rate": 1.945302603696439e-06, + "loss": 19.0547, + "step": 14008 + }, + { + "epoch": 0.1326094982061889, + "grad_norm": 2.998887300491333, + "learning_rate": 1.9452926026578853e-06, + "loss": 0.832, + "step": 14009 + }, + { + "epoch": 0.13261896422790395, + "grad_norm": 328.5835266113281, + "learning_rate": 1.9452826007308177e-06, + "loss": 10.7578, + "step": 14010 + }, + { + "epoch": 0.13262843024961898, + "grad_norm": 364.3477478027344, + "learning_rate": 1.945272597915246e-06, + "loss": 13.4414, + "step": 14011 + }, + { + "epoch": 0.13263789627133404, + "grad_norm": 319.021728515625, + "learning_rate": 1.9452625942111796e-06, + "loss": 23.2148, + "step": 14012 + }, + { + "epoch": 0.1326473622930491, + "grad_norm": 248.9507598876953, + "learning_rate": 1.945252589618628e-06, + "loss": 23.7031, + "step": 14013 + }, + { + "epoch": 0.13265682831476416, + "grad_norm": 693.1679077148438, + "learning_rate": 1.9452425841376004e-06, + "loss": 15.2422, + "step": 14014 + }, + { + "epoch": 0.1326662943364792, + "grad_norm": 448.82891845703125, + "learning_rate": 1.9452325777681064e-06, + "loss": 46.2578, + "step": 14015 + }, + { + "epoch": 0.13267576035819426, + "grad_norm": 328.3697509765625, + "learning_rate": 1.945222570510155e-06, + "loss": 8.7949, + "step": 14016 + }, + { + "epoch": 0.13268522637990932, + "grad_norm": 299.48699951171875, + "learning_rate": 1.9452125623637562e-06, + "loss": 24.6641, + "step": 14017 + }, + { + "epoch": 0.13269469240162438, + "grad_norm": 3.302295207977295, + "learning_rate": 1.945202553328919e-06, + "loss": 0.9116, + "step": 14018 + }, + { + "epoch": 0.13270415842333944, + "grad_norm": 440.435791015625, + "learning_rate": 1.9451925434056523e-06, + "loss": 23.9531, + "step": 14019 + }, + { + "epoch": 0.13271362444505447, + "grad_norm": 165.55784606933594, + "learning_rate": 1.945182532593967e-06, + "loss": 16.8477, + "step": 14020 + }, + { + "epoch": 0.13272309046676953, + "grad_norm": 430.63531494140625, + "learning_rate": 1.9451725208938708e-06, + "loss": 42.1719, + "step": 14021 + }, + { + "epoch": 0.1327325564884846, + "grad_norm": 3.082817792892456, + "learning_rate": 1.9451625083053744e-06, + "loss": 0.9644, + "step": 14022 + }, + { + "epoch": 0.13274202251019965, + "grad_norm": 3.186969041824341, + "learning_rate": 1.9451524948284866e-06, + "loss": 0.8296, + "step": 14023 + }, + { + "epoch": 0.13275148853191468, + "grad_norm": 527.2069702148438, + "learning_rate": 1.945142480463217e-06, + "loss": 17.0859, + "step": 14024 + }, + { + "epoch": 0.13276095455362974, + "grad_norm": 375.37225341796875, + "learning_rate": 1.9451324652095747e-06, + "loss": 27.6094, + "step": 14025 + }, + { + "epoch": 0.1327704205753448, + "grad_norm": 441.5431213378906, + "learning_rate": 1.9451224490675698e-06, + "loss": 35.0156, + "step": 14026 + }, + { + "epoch": 0.13277988659705986, + "grad_norm": 350.35675048828125, + "learning_rate": 1.945112432037211e-06, + "loss": 28.0, + "step": 14027 + }, + { + "epoch": 0.13278935261877492, + "grad_norm": 1471.958740234375, + "learning_rate": 1.9451024141185083e-06, + "loss": 55.7969, + "step": 14028 + }, + { + "epoch": 0.13279881864048995, + "grad_norm": 1316.1343994140625, + "learning_rate": 1.9450923953114704e-06, + "loss": 20.375, + "step": 14029 + }, + { + "epoch": 0.132808284662205, + "grad_norm": 168.56581115722656, + "learning_rate": 1.9450823756161076e-06, + "loss": 17.1172, + "step": 14030 + }, + { + "epoch": 0.13281775068392007, + "grad_norm": 782.4176635742188, + "learning_rate": 1.9450723550324282e-06, + "loss": 43.7969, + "step": 14031 + }, + { + "epoch": 0.13282721670563513, + "grad_norm": 278.43548583984375, + "learning_rate": 1.9450623335604426e-06, + "loss": 19.4609, + "step": 14032 + }, + { + "epoch": 0.13283668272735016, + "grad_norm": 372.5954284667969, + "learning_rate": 1.9450523112001604e-06, + "loss": 30.0625, + "step": 14033 + }, + { + "epoch": 0.13284614874906522, + "grad_norm": 597.493896484375, + "learning_rate": 1.9450422879515896e-06, + "loss": 55.75, + "step": 14034 + }, + { + "epoch": 0.13285561477078028, + "grad_norm": 417.73553466796875, + "learning_rate": 1.9450322638147413e-06, + "loss": 42.7969, + "step": 14035 + }, + { + "epoch": 0.13286508079249534, + "grad_norm": 600.1283569335938, + "learning_rate": 1.9450222387896236e-06, + "loss": 50.4258, + "step": 14036 + }, + { + "epoch": 0.1328745468142104, + "grad_norm": 617.43310546875, + "learning_rate": 1.9450122128762465e-06, + "loss": 59.3594, + "step": 14037 + }, + { + "epoch": 0.13288401283592544, + "grad_norm": 590.3587646484375, + "learning_rate": 1.94500218607462e-06, + "loss": 21.2344, + "step": 14038 + }, + { + "epoch": 0.1328934788576405, + "grad_norm": 3.4318699836730957, + "learning_rate": 1.9449921583847526e-06, + "loss": 0.9961, + "step": 14039 + }, + { + "epoch": 0.13290294487935556, + "grad_norm": 197.85964965820312, + "learning_rate": 1.944982129806654e-06, + "loss": 14.5234, + "step": 14040 + }, + { + "epoch": 0.13291241090107062, + "grad_norm": 425.5473937988281, + "learning_rate": 1.944972100340333e-06, + "loss": 27.7578, + "step": 14041 + }, + { + "epoch": 0.13292187692278565, + "grad_norm": 638.3748168945312, + "learning_rate": 1.9449620699858006e-06, + "loss": 10.0625, + "step": 14042 + }, + { + "epoch": 0.1329313429445007, + "grad_norm": 397.6438903808594, + "learning_rate": 1.944952038743065e-06, + "loss": 23.4727, + "step": 14043 + }, + { + "epoch": 0.13294080896621577, + "grad_norm": 221.83880615234375, + "learning_rate": 1.9449420066121362e-06, + "loss": 15.8281, + "step": 14044 + }, + { + "epoch": 0.13295027498793083, + "grad_norm": 590.9420166015625, + "learning_rate": 1.944931973593023e-06, + "loss": 9.918, + "step": 14045 + }, + { + "epoch": 0.1329597410096459, + "grad_norm": 696.0506591796875, + "learning_rate": 1.9449219396857353e-06, + "loss": 36.7422, + "step": 14046 + }, + { + "epoch": 0.13296920703136092, + "grad_norm": 313.4411315917969, + "learning_rate": 1.944911904890283e-06, + "loss": 18.6172, + "step": 14047 + }, + { + "epoch": 0.13297867305307598, + "grad_norm": 213.51771545410156, + "learning_rate": 1.9449018692066745e-06, + "loss": 23.1562, + "step": 14048 + }, + { + "epoch": 0.13298813907479104, + "grad_norm": 325.18402099609375, + "learning_rate": 1.9448918326349196e-06, + "loss": 18.5977, + "step": 14049 + }, + { + "epoch": 0.1329976050965061, + "grad_norm": 198.39801025390625, + "learning_rate": 1.944881795175028e-06, + "loss": 15.4219, + "step": 14050 + }, + { + "epoch": 0.13300707111822113, + "grad_norm": 173.6390838623047, + "learning_rate": 1.944871756827009e-06, + "loss": 21.3438, + "step": 14051 + }, + { + "epoch": 0.1330165371399362, + "grad_norm": 268.3299255371094, + "learning_rate": 1.944861717590872e-06, + "loss": 23.4297, + "step": 14052 + }, + { + "epoch": 0.13302600316165125, + "grad_norm": 537.9740600585938, + "learning_rate": 1.9448516774666264e-06, + "loss": 41.25, + "step": 14053 + }, + { + "epoch": 0.1330354691833663, + "grad_norm": 478.17059326171875, + "learning_rate": 1.944841636454282e-06, + "loss": 24.1562, + "step": 14054 + }, + { + "epoch": 0.13304493520508137, + "grad_norm": 690.7666015625, + "learning_rate": 1.9448315945538474e-06, + "loss": 49.3438, + "step": 14055 + }, + { + "epoch": 0.1330544012267964, + "grad_norm": 610.751220703125, + "learning_rate": 1.9448215517653327e-06, + "loss": 11.4805, + "step": 14056 + }, + { + "epoch": 0.13306386724851146, + "grad_norm": 424.6312255859375, + "learning_rate": 1.9448115080887477e-06, + "loss": 21.5, + "step": 14057 + }, + { + "epoch": 0.13307333327022652, + "grad_norm": 231.652099609375, + "learning_rate": 1.9448014635241007e-06, + "loss": 19.1094, + "step": 14058 + }, + { + "epoch": 0.13308279929194158, + "grad_norm": 423.5890808105469, + "learning_rate": 1.944791418071402e-06, + "loss": 13.5, + "step": 14059 + }, + { + "epoch": 0.13309226531365662, + "grad_norm": 242.61744689941406, + "learning_rate": 1.944781371730661e-06, + "loss": 21.0547, + "step": 14060 + }, + { + "epoch": 0.13310173133537168, + "grad_norm": 147.78231811523438, + "learning_rate": 1.9447713245018867e-06, + "loss": 16.1992, + "step": 14061 + }, + { + "epoch": 0.13311119735708674, + "grad_norm": 184.26898193359375, + "learning_rate": 1.9447612763850893e-06, + "loss": 19.0391, + "step": 14062 + }, + { + "epoch": 0.1331206633788018, + "grad_norm": 674.0974731445312, + "learning_rate": 1.9447512273802773e-06, + "loss": 38.3203, + "step": 14063 + }, + { + "epoch": 0.13313012940051686, + "grad_norm": 343.9236145019531, + "learning_rate": 1.9447411774874606e-06, + "loss": 22.5859, + "step": 14064 + }, + { + "epoch": 0.1331395954222319, + "grad_norm": 571.6119995117188, + "learning_rate": 1.944731126706649e-06, + "loss": 23.3438, + "step": 14065 + }, + { + "epoch": 0.13314906144394695, + "grad_norm": 492.3842468261719, + "learning_rate": 1.9447210750378515e-06, + "loss": 29.3828, + "step": 14066 + }, + { + "epoch": 0.133158527465662, + "grad_norm": 450.27471923828125, + "learning_rate": 1.9447110224810776e-06, + "loss": 36.8281, + "step": 14067 + }, + { + "epoch": 0.13316799348737707, + "grad_norm": 420.8173522949219, + "learning_rate": 1.9447009690363367e-06, + "loss": 57.6016, + "step": 14068 + }, + { + "epoch": 0.13317745950909213, + "grad_norm": 401.215087890625, + "learning_rate": 1.944690914703638e-06, + "loss": 37.1094, + "step": 14069 + }, + { + "epoch": 0.13318692553080716, + "grad_norm": 692.4955444335938, + "learning_rate": 1.944680859482992e-06, + "loss": 17.293, + "step": 14070 + }, + { + "epoch": 0.13319639155252222, + "grad_norm": 323.14605712890625, + "learning_rate": 1.944670803374407e-06, + "loss": 36.75, + "step": 14071 + }, + { + "epoch": 0.13320585757423728, + "grad_norm": 243.7960662841797, + "learning_rate": 1.944660746377893e-06, + "loss": 17.668, + "step": 14072 + }, + { + "epoch": 0.13321532359595234, + "grad_norm": 375.0509033203125, + "learning_rate": 1.94465068849346e-06, + "loss": 13.0625, + "step": 14073 + }, + { + "epoch": 0.13322478961766737, + "grad_norm": 349.4651794433594, + "learning_rate": 1.944640629721116e-06, + "loss": 33.6406, + "step": 14074 + }, + { + "epoch": 0.13323425563938243, + "grad_norm": 385.38372802734375, + "learning_rate": 1.9446305700608713e-06, + "loss": 37.8281, + "step": 14075 + }, + { + "epoch": 0.1332437216610975, + "grad_norm": 411.33740234375, + "learning_rate": 1.9446205095127355e-06, + "loss": 40.8281, + "step": 14076 + }, + { + "epoch": 0.13325318768281255, + "grad_norm": 374.66839599609375, + "learning_rate": 1.9446104480767177e-06, + "loss": 24.3672, + "step": 14077 + }, + { + "epoch": 0.1332626537045276, + "grad_norm": 264.6129150390625, + "learning_rate": 1.9446003857528276e-06, + "loss": 10.3867, + "step": 14078 + }, + { + "epoch": 0.13327211972624264, + "grad_norm": 302.4737854003906, + "learning_rate": 1.9445903225410747e-06, + "loss": 33.0156, + "step": 14079 + }, + { + "epoch": 0.1332815857479577, + "grad_norm": 399.12786865234375, + "learning_rate": 1.9445802584414682e-06, + "loss": 37.9375, + "step": 14080 + }, + { + "epoch": 0.13329105176967276, + "grad_norm": 383.6885986328125, + "learning_rate": 1.9445701934540183e-06, + "loss": 18.0273, + "step": 14081 + }, + { + "epoch": 0.13330051779138782, + "grad_norm": 295.3364562988281, + "learning_rate": 1.944560127578733e-06, + "loss": 18.0234, + "step": 14082 + }, + { + "epoch": 0.13330998381310286, + "grad_norm": 548.1799926757812, + "learning_rate": 1.944550060815623e-06, + "loss": 46.6484, + "step": 14083 + }, + { + "epoch": 0.13331944983481792, + "grad_norm": 315.5260925292969, + "learning_rate": 1.9445399931646974e-06, + "loss": 34.1562, + "step": 14084 + }, + { + "epoch": 0.13332891585653298, + "grad_norm": 344.89483642578125, + "learning_rate": 1.9445299246259657e-06, + "loss": 20.1719, + "step": 14085 + }, + { + "epoch": 0.13333838187824804, + "grad_norm": 616.0325927734375, + "learning_rate": 1.9445198551994376e-06, + "loss": 43.8047, + "step": 14086 + }, + { + "epoch": 0.1333478478999631, + "grad_norm": 448.6207580566406, + "learning_rate": 1.9445097848851216e-06, + "loss": 50.6094, + "step": 14087 + }, + { + "epoch": 0.13335731392167813, + "grad_norm": 270.03350830078125, + "learning_rate": 1.944499713683028e-06, + "loss": 20.4688, + "step": 14088 + }, + { + "epoch": 0.1333667799433932, + "grad_norm": 940.17919921875, + "learning_rate": 1.9444896415931668e-06, + "loss": 40.7891, + "step": 14089 + }, + { + "epoch": 0.13337624596510825, + "grad_norm": 567.7007446289062, + "learning_rate": 1.9444795686155462e-06, + "loss": 48.1094, + "step": 14090 + }, + { + "epoch": 0.1333857119868233, + "grad_norm": 275.9041442871094, + "learning_rate": 1.9444694947501763e-06, + "loss": 18.2812, + "step": 14091 + }, + { + "epoch": 0.13339517800853834, + "grad_norm": 524.6348266601562, + "learning_rate": 1.9444594199970667e-06, + "loss": 37.7969, + "step": 14092 + }, + { + "epoch": 0.1334046440302534, + "grad_norm": 664.5223388671875, + "learning_rate": 1.9444493443562263e-06, + "loss": 16.5391, + "step": 14093 + }, + { + "epoch": 0.13341411005196846, + "grad_norm": 693.8245239257812, + "learning_rate": 1.9444392678276653e-06, + "loss": 31.125, + "step": 14094 + }, + { + "epoch": 0.13342357607368352, + "grad_norm": 274.4820251464844, + "learning_rate": 1.9444291904113927e-06, + "loss": 27.1172, + "step": 14095 + }, + { + "epoch": 0.13343304209539858, + "grad_norm": 255.0679473876953, + "learning_rate": 1.9444191121074184e-06, + "loss": 20.625, + "step": 14096 + }, + { + "epoch": 0.1334425081171136, + "grad_norm": 389.8281555175781, + "learning_rate": 1.944409032915751e-06, + "loss": 17.9688, + "step": 14097 + }, + { + "epoch": 0.13345197413882867, + "grad_norm": 562.8201904296875, + "learning_rate": 1.944398952836401e-06, + "loss": 48.0469, + "step": 14098 + }, + { + "epoch": 0.13346144016054373, + "grad_norm": 161.4737548828125, + "learning_rate": 1.944388871869377e-06, + "loss": 21.5312, + "step": 14099 + }, + { + "epoch": 0.1334709061822588, + "grad_norm": 417.68060302734375, + "learning_rate": 1.9443787900146896e-06, + "loss": 42.4375, + "step": 14100 + }, + { + "epoch": 0.13348037220397382, + "grad_norm": 385.7509765625, + "learning_rate": 1.944368707272347e-06, + "loss": 27.7812, + "step": 14101 + }, + { + "epoch": 0.13348983822568888, + "grad_norm": 137.77792358398438, + "learning_rate": 1.944358623642359e-06, + "loss": 14.6523, + "step": 14102 + }, + { + "epoch": 0.13349930424740394, + "grad_norm": 372.3295593261719, + "learning_rate": 1.944348539124736e-06, + "loss": 20.9141, + "step": 14103 + }, + { + "epoch": 0.133508770269119, + "grad_norm": 279.2896728515625, + "learning_rate": 1.9443384537194865e-06, + "loss": 19.1484, + "step": 14104 + }, + { + "epoch": 0.13351823629083406, + "grad_norm": 276.90118408203125, + "learning_rate": 1.9443283674266203e-06, + "loss": 23.8594, + "step": 14105 + }, + { + "epoch": 0.1335277023125491, + "grad_norm": 3.2869646549224854, + "learning_rate": 1.944318280246147e-06, + "loss": 0.9873, + "step": 14106 + }, + { + "epoch": 0.13353716833426416, + "grad_norm": 313.6844787597656, + "learning_rate": 1.944308192178076e-06, + "loss": 47.3359, + "step": 14107 + }, + { + "epoch": 0.13354663435597922, + "grad_norm": 304.5935363769531, + "learning_rate": 1.944298103222416e-06, + "loss": 16.7461, + "step": 14108 + }, + { + "epoch": 0.13355610037769428, + "grad_norm": 249.31724548339844, + "learning_rate": 1.9442880133791783e-06, + "loss": 20.7734, + "step": 14109 + }, + { + "epoch": 0.1335655663994093, + "grad_norm": 251.36898803710938, + "learning_rate": 1.9442779226483706e-06, + "loss": 30.4688, + "step": 14110 + }, + { + "epoch": 0.13357503242112437, + "grad_norm": 206.7998504638672, + "learning_rate": 1.9442678310300032e-06, + "loss": 16.0312, + "step": 14111 + }, + { + "epoch": 0.13358449844283943, + "grad_norm": 278.50421142578125, + "learning_rate": 1.9442577385240855e-06, + "loss": 20.6484, + "step": 14112 + }, + { + "epoch": 0.1335939644645545, + "grad_norm": 528.382568359375, + "learning_rate": 1.944247645130627e-06, + "loss": 19.4453, + "step": 14113 + }, + { + "epoch": 0.13360343048626955, + "grad_norm": 353.4820861816406, + "learning_rate": 1.944237550849637e-06, + "loss": 19.7266, + "step": 14114 + }, + { + "epoch": 0.13361289650798458, + "grad_norm": 3.281451463699341, + "learning_rate": 1.944227455681125e-06, + "loss": 0.7817, + "step": 14115 + }, + { + "epoch": 0.13362236252969964, + "grad_norm": 470.3594665527344, + "learning_rate": 1.9442173596251013e-06, + "loss": 42.0469, + "step": 14116 + }, + { + "epoch": 0.1336318285514147, + "grad_norm": 388.6083984375, + "learning_rate": 1.944207262681574e-06, + "loss": 24.9219, + "step": 14117 + }, + { + "epoch": 0.13364129457312976, + "grad_norm": 294.1619567871094, + "learning_rate": 1.944197164850554e-06, + "loss": 30.6094, + "step": 14118 + }, + { + "epoch": 0.1336507605948448, + "grad_norm": 1353.3441162109375, + "learning_rate": 1.9441870661320493e-06, + "loss": 46.3438, + "step": 14119 + }, + { + "epoch": 0.13366022661655985, + "grad_norm": 348.78643798828125, + "learning_rate": 1.9441769665260706e-06, + "loss": 47.6172, + "step": 14120 + }, + { + "epoch": 0.1336696926382749, + "grad_norm": 666.6171264648438, + "learning_rate": 1.944166866032627e-06, + "loss": 14.8359, + "step": 14121 + }, + { + "epoch": 0.13367915865998997, + "grad_norm": 256.5204772949219, + "learning_rate": 1.944156764651728e-06, + "loss": 21.4219, + "step": 14122 + }, + { + "epoch": 0.13368862468170503, + "grad_norm": 459.7120666503906, + "learning_rate": 1.944146662383383e-06, + "loss": 26.1836, + "step": 14123 + }, + { + "epoch": 0.13369809070342006, + "grad_norm": 158.26951599121094, + "learning_rate": 1.9441365592276015e-06, + "loss": 23.375, + "step": 14124 + }, + { + "epoch": 0.13370755672513512, + "grad_norm": 456.4707336425781, + "learning_rate": 1.944126455184393e-06, + "loss": 25.7969, + "step": 14125 + }, + { + "epoch": 0.13371702274685018, + "grad_norm": 501.8450622558594, + "learning_rate": 1.9441163502537672e-06, + "loss": 38.0723, + "step": 14126 + }, + { + "epoch": 0.13372648876856524, + "grad_norm": 380.2008361816406, + "learning_rate": 1.9441062444357336e-06, + "loss": 40.6133, + "step": 14127 + }, + { + "epoch": 0.13373595479028028, + "grad_norm": 542.9956665039062, + "learning_rate": 1.944096137730301e-06, + "loss": 20.3203, + "step": 14128 + }, + { + "epoch": 0.13374542081199534, + "grad_norm": 439.3186340332031, + "learning_rate": 1.94408603013748e-06, + "loss": 51.7188, + "step": 14129 + }, + { + "epoch": 0.1337548868337104, + "grad_norm": 211.76760864257812, + "learning_rate": 1.944075921657279e-06, + "loss": 24.9766, + "step": 14130 + }, + { + "epoch": 0.13376435285542546, + "grad_norm": 400.4131774902344, + "learning_rate": 1.9440658122897084e-06, + "loss": 32.5, + "step": 14131 + }, + { + "epoch": 0.13377381887714052, + "grad_norm": 235.63385009765625, + "learning_rate": 1.9440557020347772e-06, + "loss": 26.918, + "step": 14132 + }, + { + "epoch": 0.13378328489885555, + "grad_norm": 216.56845092773438, + "learning_rate": 1.9440455908924953e-06, + "loss": 16.4531, + "step": 14133 + }, + { + "epoch": 0.1337927509205706, + "grad_norm": 749.4583129882812, + "learning_rate": 1.9440354788628717e-06, + "loss": 50.4375, + "step": 14134 + }, + { + "epoch": 0.13380221694228567, + "grad_norm": 1230.23291015625, + "learning_rate": 1.9440253659459164e-06, + "loss": 10.3203, + "step": 14135 + }, + { + "epoch": 0.13381168296400073, + "grad_norm": 1373.510498046875, + "learning_rate": 1.9440152521416385e-06, + "loss": 52.5195, + "step": 14136 + }, + { + "epoch": 0.13382114898571576, + "grad_norm": 3.376716136932373, + "learning_rate": 1.944005137450048e-06, + "loss": 0.9297, + "step": 14137 + }, + { + "epoch": 0.13383061500743082, + "grad_norm": 227.19464111328125, + "learning_rate": 1.9439950218711535e-06, + "loss": 20.3125, + "step": 14138 + }, + { + "epoch": 0.13384008102914588, + "grad_norm": 348.6930236816406, + "learning_rate": 1.9439849054049653e-06, + "loss": 25.6797, + "step": 14139 + }, + { + "epoch": 0.13384954705086094, + "grad_norm": 2.7287697792053223, + "learning_rate": 1.943974788051493e-06, + "loss": 0.8452, + "step": 14140 + }, + { + "epoch": 0.133859013072576, + "grad_norm": 454.28912353515625, + "learning_rate": 1.9439646698107456e-06, + "loss": 27.6797, + "step": 14141 + }, + { + "epoch": 0.13386847909429103, + "grad_norm": 2.954986572265625, + "learning_rate": 1.9439545506827327e-06, + "loss": 0.8867, + "step": 14142 + }, + { + "epoch": 0.1338779451160061, + "grad_norm": 454.2418518066406, + "learning_rate": 1.9439444306674642e-06, + "loss": 51.9844, + "step": 14143 + }, + { + "epoch": 0.13388741113772115, + "grad_norm": 404.5354309082031, + "learning_rate": 1.943934309764949e-06, + "loss": 49.4766, + "step": 14144 + }, + { + "epoch": 0.1338968771594362, + "grad_norm": 194.04605102539062, + "learning_rate": 1.9439241879751974e-06, + "loss": 20.9297, + "step": 14145 + }, + { + "epoch": 0.13390634318115124, + "grad_norm": 305.3949279785156, + "learning_rate": 1.943914065298218e-06, + "loss": 32.4219, + "step": 14146 + }, + { + "epoch": 0.1339158092028663, + "grad_norm": 230.68368530273438, + "learning_rate": 1.9439039417340213e-06, + "loss": 21.75, + "step": 14147 + }, + { + "epoch": 0.13392527522458136, + "grad_norm": 1219.0914306640625, + "learning_rate": 1.943893817282616e-06, + "loss": 25.1953, + "step": 14148 + }, + { + "epoch": 0.13393474124629642, + "grad_norm": 221.67417907714844, + "learning_rate": 1.943883691944012e-06, + "loss": 14.793, + "step": 14149 + }, + { + "epoch": 0.13394420726801148, + "grad_norm": 306.36102294921875, + "learning_rate": 1.9438735657182186e-06, + "loss": 25.6094, + "step": 14150 + }, + { + "epoch": 0.13395367328972652, + "grad_norm": 459.3589172363281, + "learning_rate": 1.9438634386052456e-06, + "loss": 44.8906, + "step": 14151 + }, + { + "epoch": 0.13396313931144158, + "grad_norm": 443.8653869628906, + "learning_rate": 1.943853310605102e-06, + "loss": 54.9688, + "step": 14152 + }, + { + "epoch": 0.13397260533315664, + "grad_norm": 355.4482116699219, + "learning_rate": 1.9438431817177983e-06, + "loss": 28.4609, + "step": 14153 + }, + { + "epoch": 0.1339820713548717, + "grad_norm": 830.1468505859375, + "learning_rate": 1.943833051943343e-06, + "loss": 41.1875, + "step": 14154 + }, + { + "epoch": 0.13399153737658676, + "grad_norm": 268.7648620605469, + "learning_rate": 1.943822921281746e-06, + "loss": 21.1641, + "step": 14155 + }, + { + "epoch": 0.1340010033983018, + "grad_norm": 302.2831726074219, + "learning_rate": 1.943812789733017e-06, + "loss": 17.9688, + "step": 14156 + }, + { + "epoch": 0.13401046942001685, + "grad_norm": 308.0558166503906, + "learning_rate": 1.943802657297165e-06, + "loss": 49.9688, + "step": 14157 + }, + { + "epoch": 0.1340199354417319, + "grad_norm": 267.1319274902344, + "learning_rate": 1.9437925239742006e-06, + "loss": 15.4688, + "step": 14158 + }, + { + "epoch": 0.13402940146344697, + "grad_norm": 3.4573042392730713, + "learning_rate": 1.943782389764132e-06, + "loss": 1.0981, + "step": 14159 + }, + { + "epoch": 0.134038867485162, + "grad_norm": 615.1035766601562, + "learning_rate": 1.9437722546669697e-06, + "loss": 19.9492, + "step": 14160 + }, + { + "epoch": 0.13404833350687706, + "grad_norm": 505.2385559082031, + "learning_rate": 1.9437621186827233e-06, + "loss": 37.8828, + "step": 14161 + }, + { + "epoch": 0.13405779952859212, + "grad_norm": 427.3658752441406, + "learning_rate": 1.9437519818114014e-06, + "loss": 16.1445, + "step": 14162 + }, + { + "epoch": 0.13406726555030718, + "grad_norm": 645.6940307617188, + "learning_rate": 1.943741844053014e-06, + "loss": 34.9688, + "step": 14163 + }, + { + "epoch": 0.13407673157202224, + "grad_norm": 361.7265625, + "learning_rate": 1.9437317054075705e-06, + "loss": 32.8281, + "step": 14164 + }, + { + "epoch": 0.13408619759373727, + "grad_norm": 242.26365661621094, + "learning_rate": 1.9437215658750806e-06, + "loss": 24.5547, + "step": 14165 + }, + { + "epoch": 0.13409566361545233, + "grad_norm": 610.8528442382812, + "learning_rate": 1.943711425455554e-06, + "loss": 27.25, + "step": 14166 + }, + { + "epoch": 0.1341051296371674, + "grad_norm": 633.407958984375, + "learning_rate": 1.943701284149e-06, + "loss": 53.6641, + "step": 14167 + }, + { + "epoch": 0.13411459565888245, + "grad_norm": 872.4664916992188, + "learning_rate": 1.9436911419554287e-06, + "loss": 48.4531, + "step": 14168 + }, + { + "epoch": 0.13412406168059748, + "grad_norm": 417.2672424316406, + "learning_rate": 1.9436809988748483e-06, + "loss": 21.4453, + "step": 14169 + }, + { + "epoch": 0.13413352770231254, + "grad_norm": 393.7019958496094, + "learning_rate": 1.9436708549072698e-06, + "loss": 27.0469, + "step": 14170 + }, + { + "epoch": 0.1341429937240276, + "grad_norm": 191.14700317382812, + "learning_rate": 1.9436607100527017e-06, + "loss": 13.9023, + "step": 14171 + }, + { + "epoch": 0.13415245974574266, + "grad_norm": 696.6983642578125, + "learning_rate": 1.943650564311154e-06, + "loss": 31.4375, + "step": 14172 + }, + { + "epoch": 0.13416192576745772, + "grad_norm": 722.6909790039062, + "learning_rate": 1.943640417682636e-06, + "loss": 44.0547, + "step": 14173 + }, + { + "epoch": 0.13417139178917276, + "grad_norm": 470.7260437011719, + "learning_rate": 1.9436302701671575e-06, + "loss": 31.2656, + "step": 14174 + }, + { + "epoch": 0.13418085781088782, + "grad_norm": 269.5786437988281, + "learning_rate": 1.943620121764728e-06, + "loss": 27.5156, + "step": 14175 + }, + { + "epoch": 0.13419032383260288, + "grad_norm": 502.9660949707031, + "learning_rate": 1.943609972475357e-06, + "loss": 21.3047, + "step": 14176 + }, + { + "epoch": 0.13419978985431794, + "grad_norm": 345.9015808105469, + "learning_rate": 1.943599822299054e-06, + "loss": 20.2891, + "step": 14177 + }, + { + "epoch": 0.13420925587603297, + "grad_norm": 388.90447998046875, + "learning_rate": 1.943589671235829e-06, + "loss": 19.375, + "step": 14178 + }, + { + "epoch": 0.13421872189774803, + "grad_norm": 301.009765625, + "learning_rate": 1.9435795192856905e-06, + "loss": 19.2578, + "step": 14179 + }, + { + "epoch": 0.1342281879194631, + "grad_norm": 649.9561767578125, + "learning_rate": 1.9435693664486487e-06, + "loss": 21.8359, + "step": 14180 + }, + { + "epoch": 0.13423765394117815, + "grad_norm": 156.1368865966797, + "learning_rate": 1.943559212724713e-06, + "loss": 16.6406, + "step": 14181 + }, + { + "epoch": 0.1342471199628932, + "grad_norm": 373.58624267578125, + "learning_rate": 1.9435490581138933e-06, + "loss": 23.1484, + "step": 14182 + }, + { + "epoch": 0.13425658598460824, + "grad_norm": 280.83135986328125, + "learning_rate": 1.9435389026161987e-06, + "loss": 18.5859, + "step": 14183 + }, + { + "epoch": 0.1342660520063233, + "grad_norm": 198.314697265625, + "learning_rate": 1.943528746231639e-06, + "loss": 24.6719, + "step": 14184 + }, + { + "epoch": 0.13427551802803836, + "grad_norm": 504.6814270019531, + "learning_rate": 1.943518588960224e-06, + "loss": 29.9766, + "step": 14185 + }, + { + "epoch": 0.13428498404975342, + "grad_norm": 285.0907287597656, + "learning_rate": 1.943508430801962e-06, + "loss": 14.8125, + "step": 14186 + }, + { + "epoch": 0.13429445007146845, + "grad_norm": 422.0186767578125, + "learning_rate": 1.943498271756864e-06, + "loss": 41.4844, + "step": 14187 + }, + { + "epoch": 0.1343039160931835, + "grad_norm": 728.0380249023438, + "learning_rate": 1.9434881118249392e-06, + "loss": 58.0, + "step": 14188 + }, + { + "epoch": 0.13431338211489857, + "grad_norm": 309.2087097167969, + "learning_rate": 1.9434779510061963e-06, + "loss": 35.6406, + "step": 14189 + }, + { + "epoch": 0.13432284813661363, + "grad_norm": 413.2934265136719, + "learning_rate": 1.9434677893006463e-06, + "loss": 39.9531, + "step": 14190 + }, + { + "epoch": 0.1343323141583287, + "grad_norm": 444.0975341796875, + "learning_rate": 1.9434576267082978e-06, + "loss": 37.9141, + "step": 14191 + }, + { + "epoch": 0.13434178018004372, + "grad_norm": 204.76400756835938, + "learning_rate": 1.94344746322916e-06, + "loss": 23.9219, + "step": 14192 + }, + { + "epoch": 0.13435124620175878, + "grad_norm": 383.9498596191406, + "learning_rate": 1.9434372988632433e-06, + "loss": 41.4531, + "step": 14193 + }, + { + "epoch": 0.13436071222347384, + "grad_norm": 408.2906188964844, + "learning_rate": 1.943427133610557e-06, + "loss": 53.125, + "step": 14194 + }, + { + "epoch": 0.1343701782451889, + "grad_norm": 496.3587341308594, + "learning_rate": 1.94341696747111e-06, + "loss": 68.3828, + "step": 14195 + }, + { + "epoch": 0.13437964426690394, + "grad_norm": 917.7031860351562, + "learning_rate": 1.943406800444913e-06, + "loss": 66.1562, + "step": 14196 + }, + { + "epoch": 0.134389110288619, + "grad_norm": 705.3610229492188, + "learning_rate": 1.943396632531975e-06, + "loss": 41.6172, + "step": 14197 + }, + { + "epoch": 0.13439857631033406, + "grad_norm": 4.417141437530518, + "learning_rate": 1.9433864637323053e-06, + "loss": 1.0679, + "step": 14198 + }, + { + "epoch": 0.13440804233204912, + "grad_norm": 223.31044006347656, + "learning_rate": 1.943376294045914e-06, + "loss": 20.7773, + "step": 14199 + }, + { + "epoch": 0.13441750835376418, + "grad_norm": 996.3882446289062, + "learning_rate": 1.94336612347281e-06, + "loss": 35.3359, + "step": 14200 + }, + { + "epoch": 0.1344269743754792, + "grad_norm": 321.0157775878906, + "learning_rate": 1.9433559520130034e-06, + "loss": 24.0234, + "step": 14201 + }, + { + "epoch": 0.13443644039719427, + "grad_norm": 367.0862121582031, + "learning_rate": 1.943345779666504e-06, + "loss": 24.4062, + "step": 14202 + }, + { + "epoch": 0.13444590641890933, + "grad_norm": 136.08985900878906, + "learning_rate": 1.94333560643332e-06, + "loss": 20.625, + "step": 14203 + }, + { + "epoch": 0.1344553724406244, + "grad_norm": 389.1661071777344, + "learning_rate": 1.9433254323134624e-06, + "loss": 42.5625, + "step": 14204 + }, + { + "epoch": 0.13446483846233942, + "grad_norm": 285.5508117675781, + "learning_rate": 1.94331525730694e-06, + "loss": 36.4922, + "step": 14205 + }, + { + "epoch": 0.13447430448405448, + "grad_norm": 339.95556640625, + "learning_rate": 1.9433050814137627e-06, + "loss": 10.043, + "step": 14206 + }, + { + "epoch": 0.13448377050576954, + "grad_norm": 309.1684875488281, + "learning_rate": 1.9432949046339405e-06, + "loss": 25.1719, + "step": 14207 + }, + { + "epoch": 0.1344932365274846, + "grad_norm": 146.3238067626953, + "learning_rate": 1.943284726967482e-06, + "loss": 19.2109, + "step": 14208 + }, + { + "epoch": 0.13450270254919966, + "grad_norm": 368.12451171875, + "learning_rate": 1.943274548414397e-06, + "loss": 28.6484, + "step": 14209 + }, + { + "epoch": 0.1345121685709147, + "grad_norm": 339.240478515625, + "learning_rate": 1.9432643689746956e-06, + "loss": 12.8027, + "step": 14210 + }, + { + "epoch": 0.13452163459262975, + "grad_norm": 606.95556640625, + "learning_rate": 1.943254188648387e-06, + "loss": 30.1094, + "step": 14211 + }, + { + "epoch": 0.1345311006143448, + "grad_norm": 213.56454467773438, + "learning_rate": 1.943244007435481e-06, + "loss": 21.5469, + "step": 14212 + }, + { + "epoch": 0.13454056663605987, + "grad_norm": 370.81524658203125, + "learning_rate": 1.943233825335987e-06, + "loss": 23.9844, + "step": 14213 + }, + { + "epoch": 0.1345500326577749, + "grad_norm": 311.1214294433594, + "learning_rate": 1.9432236423499143e-06, + "loss": 19.0977, + "step": 14214 + }, + { + "epoch": 0.13455949867948996, + "grad_norm": 316.57562255859375, + "learning_rate": 1.943213458477273e-06, + "loss": 39.375, + "step": 14215 + }, + { + "epoch": 0.13456896470120502, + "grad_norm": 1624.587646484375, + "learning_rate": 1.9432032737180724e-06, + "loss": 49.9375, + "step": 14216 + }, + { + "epoch": 0.13457843072292008, + "grad_norm": 710.3591918945312, + "learning_rate": 1.9431930880723216e-06, + "loss": 33.1484, + "step": 14217 + }, + { + "epoch": 0.13458789674463514, + "grad_norm": 717.5377197265625, + "learning_rate": 1.9431829015400314e-06, + "loss": 36.9688, + "step": 14218 + }, + { + "epoch": 0.13459736276635018, + "grad_norm": 1054.202392578125, + "learning_rate": 1.94317271412121e-06, + "loss": 55.7109, + "step": 14219 + }, + { + "epoch": 0.13460682878806524, + "grad_norm": 749.232666015625, + "learning_rate": 1.9431625258158683e-06, + "loss": 35.4258, + "step": 14220 + }, + { + "epoch": 0.1346162948097803, + "grad_norm": 370.68304443359375, + "learning_rate": 1.9431523366240144e-06, + "loss": 42.9688, + "step": 14221 + }, + { + "epoch": 0.13462576083149536, + "grad_norm": 272.285400390625, + "learning_rate": 1.943142146545659e-06, + "loss": 13.168, + "step": 14222 + }, + { + "epoch": 0.1346352268532104, + "grad_norm": 299.84320068359375, + "learning_rate": 1.9431319555808114e-06, + "loss": 28.8906, + "step": 14223 + }, + { + "epoch": 0.13464469287492545, + "grad_norm": 3.0009701251983643, + "learning_rate": 1.943121763729481e-06, + "loss": 1.0073, + "step": 14224 + }, + { + "epoch": 0.1346541588966405, + "grad_norm": 1224.3033447265625, + "learning_rate": 1.9431115709916773e-06, + "loss": 81.0781, + "step": 14225 + }, + { + "epoch": 0.13466362491835557, + "grad_norm": 245.18484497070312, + "learning_rate": 1.9431013773674106e-06, + "loss": 20.4062, + "step": 14226 + }, + { + "epoch": 0.13467309094007063, + "grad_norm": 739.587646484375, + "learning_rate": 1.9430911828566896e-06, + "loss": 51.8281, + "step": 14227 + }, + { + "epoch": 0.13468255696178566, + "grad_norm": 183.4781951904297, + "learning_rate": 1.9430809874595245e-06, + "loss": 15.0938, + "step": 14228 + }, + { + "epoch": 0.13469202298350072, + "grad_norm": 288.9739990234375, + "learning_rate": 1.943070791175924e-06, + "loss": 27.4609, + "step": 14229 + }, + { + "epoch": 0.13470148900521578, + "grad_norm": 499.42315673828125, + "learning_rate": 1.9430605940058992e-06, + "loss": 43.0156, + "step": 14230 + }, + { + "epoch": 0.13471095502693084, + "grad_norm": 450.610107421875, + "learning_rate": 1.9430503959494585e-06, + "loss": 49.4141, + "step": 14231 + }, + { + "epoch": 0.13472042104864587, + "grad_norm": 230.6866455078125, + "learning_rate": 1.9430401970066114e-06, + "loss": 24.8203, + "step": 14232 + }, + { + "epoch": 0.13472988707036093, + "grad_norm": 536.8527221679688, + "learning_rate": 1.9430299971773684e-06, + "loss": 47.4531, + "step": 14233 + }, + { + "epoch": 0.134739353092076, + "grad_norm": 544.7283325195312, + "learning_rate": 1.9430197964617377e-06, + "loss": 21.6562, + "step": 14234 + }, + { + "epoch": 0.13474881911379105, + "grad_norm": 444.9249267578125, + "learning_rate": 1.9430095948597305e-06, + "loss": 36.1094, + "step": 14235 + }, + { + "epoch": 0.1347582851355061, + "grad_norm": 308.171630859375, + "learning_rate": 1.9429993923713555e-06, + "loss": 36.2188, + "step": 14236 + }, + { + "epoch": 0.13476775115722114, + "grad_norm": 312.9499206542969, + "learning_rate": 1.942989188996622e-06, + "loss": 27.4492, + "step": 14237 + }, + { + "epoch": 0.1347772171789362, + "grad_norm": 385.99346923828125, + "learning_rate": 1.942978984735541e-06, + "loss": 35.5938, + "step": 14238 + }, + { + "epoch": 0.13478668320065126, + "grad_norm": 228.2179412841797, + "learning_rate": 1.94296877958812e-06, + "loss": 22.3906, + "step": 14239 + }, + { + "epoch": 0.13479614922236632, + "grad_norm": 595.6715698242188, + "learning_rate": 1.94295857355437e-06, + "loss": 21.375, + "step": 14240 + }, + { + "epoch": 0.13480561524408138, + "grad_norm": 210.91259765625, + "learning_rate": 1.9429483666343006e-06, + "loss": 13.1484, + "step": 14241 + }, + { + "epoch": 0.13481508126579642, + "grad_norm": 382.349853515625, + "learning_rate": 1.9429381588279207e-06, + "loss": 41.3516, + "step": 14242 + }, + { + "epoch": 0.13482454728751148, + "grad_norm": 221.2873992919922, + "learning_rate": 1.9429279501352407e-06, + "loss": 14.7891, + "step": 14243 + }, + { + "epoch": 0.13483401330922654, + "grad_norm": 769.3270874023438, + "learning_rate": 1.9429177405562695e-06, + "loss": 29.2031, + "step": 14244 + }, + { + "epoch": 0.1348434793309416, + "grad_norm": 580.925537109375, + "learning_rate": 1.942907530091017e-06, + "loss": 44.2656, + "step": 14245 + }, + { + "epoch": 0.13485294535265663, + "grad_norm": 712.106689453125, + "learning_rate": 1.9428973187394927e-06, + "loss": 21.8203, + "step": 14246 + }, + { + "epoch": 0.1348624113743717, + "grad_norm": 409.77777099609375, + "learning_rate": 1.942887106501706e-06, + "loss": 51.1562, + "step": 14247 + }, + { + "epoch": 0.13487187739608675, + "grad_norm": 548.4237060546875, + "learning_rate": 1.9428768933776674e-06, + "loss": 27.0938, + "step": 14248 + }, + { + "epoch": 0.1348813434178018, + "grad_norm": 578.2396240234375, + "learning_rate": 1.9428666793673853e-06, + "loss": 33.3203, + "step": 14249 + }, + { + "epoch": 0.13489080943951687, + "grad_norm": 290.0273742675781, + "learning_rate": 1.9428564644708704e-06, + "loss": 17.7344, + "step": 14250 + }, + { + "epoch": 0.1349002754612319, + "grad_norm": 549.3677978515625, + "learning_rate": 1.942846248688131e-06, + "loss": 48.8125, + "step": 14251 + }, + { + "epoch": 0.13490974148294696, + "grad_norm": 3.2586915493011475, + "learning_rate": 1.942836032019178e-06, + "loss": 1.1069, + "step": 14252 + }, + { + "epoch": 0.13491920750466202, + "grad_norm": 572.5894775390625, + "learning_rate": 1.9428258144640204e-06, + "loss": 49.7227, + "step": 14253 + }, + { + "epoch": 0.13492867352637708, + "grad_norm": 293.2240295410156, + "learning_rate": 1.942815596022668e-06, + "loss": 19.4922, + "step": 14254 + }, + { + "epoch": 0.1349381395480921, + "grad_norm": 630.0292358398438, + "learning_rate": 1.94280537669513e-06, + "loss": 55.2578, + "step": 14255 + }, + { + "epoch": 0.13494760556980717, + "grad_norm": 2.585941791534424, + "learning_rate": 1.9427951564814163e-06, + "loss": 0.8528, + "step": 14256 + }, + { + "epoch": 0.13495707159152223, + "grad_norm": 651.8449096679688, + "learning_rate": 1.942784935381537e-06, + "loss": 47.8984, + "step": 14257 + }, + { + "epoch": 0.1349665376132373, + "grad_norm": 648.421142578125, + "learning_rate": 1.9427747133955008e-06, + "loss": 60.1172, + "step": 14258 + }, + { + "epoch": 0.13497600363495235, + "grad_norm": 189.5576629638672, + "learning_rate": 1.9427644905233173e-06, + "loss": 19.8047, + "step": 14259 + }, + { + "epoch": 0.13498546965666738, + "grad_norm": 887.979736328125, + "learning_rate": 1.942754266764997e-06, + "loss": 41.4609, + "step": 14260 + }, + { + "epoch": 0.13499493567838244, + "grad_norm": 277.2003173828125, + "learning_rate": 1.942744042120549e-06, + "loss": 21.0547, + "step": 14261 + }, + { + "epoch": 0.1350044017000975, + "grad_norm": 4.32442569732666, + "learning_rate": 1.942733816589983e-06, + "loss": 0.9893, + "step": 14262 + }, + { + "epoch": 0.13501386772181256, + "grad_norm": 426.88409423828125, + "learning_rate": 1.9427235901733083e-06, + "loss": 32.3281, + "step": 14263 + }, + { + "epoch": 0.1350233337435276, + "grad_norm": 452.5902099609375, + "learning_rate": 1.9427133628705348e-06, + "loss": 37.6094, + "step": 14264 + }, + { + "epoch": 0.13503279976524266, + "grad_norm": 304.7467956542969, + "learning_rate": 1.9427031346816724e-06, + "loss": 15.0234, + "step": 14265 + }, + { + "epoch": 0.13504226578695772, + "grad_norm": 233.39991760253906, + "learning_rate": 1.9426929056067297e-06, + "loss": 18.4297, + "step": 14266 + }, + { + "epoch": 0.13505173180867278, + "grad_norm": 297.34393310546875, + "learning_rate": 1.9426826756457173e-06, + "loss": 28.8359, + "step": 14267 + }, + { + "epoch": 0.13506119783038784, + "grad_norm": 3.0538952350616455, + "learning_rate": 1.942672444798645e-06, + "loss": 0.9043, + "step": 14268 + }, + { + "epoch": 0.13507066385210287, + "grad_norm": 399.8939208984375, + "learning_rate": 1.942662213065521e-06, + "loss": 43.5625, + "step": 14269 + }, + { + "epoch": 0.13508012987381793, + "grad_norm": 467.2568359375, + "learning_rate": 1.9426519804463566e-06, + "loss": 25.4297, + "step": 14270 + }, + { + "epoch": 0.135089595895533, + "grad_norm": 132.637939453125, + "learning_rate": 1.9426417469411606e-06, + "loss": 18.5938, + "step": 14271 + }, + { + "epoch": 0.13509906191724805, + "grad_norm": 390.696044921875, + "learning_rate": 1.9426315125499423e-06, + "loss": 26.8906, + "step": 14272 + }, + { + "epoch": 0.13510852793896308, + "grad_norm": 315.2506408691406, + "learning_rate": 1.942621277272712e-06, + "loss": 27.0703, + "step": 14273 + }, + { + "epoch": 0.13511799396067814, + "grad_norm": 496.1028747558594, + "learning_rate": 1.9426110411094786e-06, + "loss": 18.5781, + "step": 14274 + }, + { + "epoch": 0.1351274599823932, + "grad_norm": 306.5714111328125, + "learning_rate": 1.9426008040602526e-06, + "loss": 16.7734, + "step": 14275 + }, + { + "epoch": 0.13513692600410826, + "grad_norm": 2.76682186126709, + "learning_rate": 1.9425905661250434e-06, + "loss": 1.0996, + "step": 14276 + }, + { + "epoch": 0.13514639202582332, + "grad_norm": 267.9477233886719, + "learning_rate": 1.94258032730386e-06, + "loss": 15.457, + "step": 14277 + }, + { + "epoch": 0.13515585804753835, + "grad_norm": 236.2344512939453, + "learning_rate": 1.942570087596712e-06, + "loss": 18.6797, + "step": 14278 + }, + { + "epoch": 0.1351653240692534, + "grad_norm": 318.80670166015625, + "learning_rate": 1.94255984700361e-06, + "loss": 20.8984, + "step": 14279 + }, + { + "epoch": 0.13517479009096847, + "grad_norm": 400.8988037109375, + "learning_rate": 1.9425496055245627e-06, + "loss": 20.4531, + "step": 14280 + }, + { + "epoch": 0.13518425611268353, + "grad_norm": 451.33221435546875, + "learning_rate": 1.9425393631595804e-06, + "loss": 15.0234, + "step": 14281 + }, + { + "epoch": 0.13519372213439856, + "grad_norm": 556.8779296875, + "learning_rate": 1.9425291199086727e-06, + "loss": 28.5391, + "step": 14282 + }, + { + "epoch": 0.13520318815611362, + "grad_norm": 324.721435546875, + "learning_rate": 1.9425188757718486e-06, + "loss": 26.4609, + "step": 14283 + }, + { + "epoch": 0.13521265417782868, + "grad_norm": 340.038818359375, + "learning_rate": 1.942508630749118e-06, + "loss": 20.7266, + "step": 14284 + }, + { + "epoch": 0.13522212019954374, + "grad_norm": 227.8000946044922, + "learning_rate": 1.9424983848404906e-06, + "loss": 26.4609, + "step": 14285 + }, + { + "epoch": 0.1352315862212588, + "grad_norm": 333.4505920410156, + "learning_rate": 1.942488138045976e-06, + "loss": 20.8906, + "step": 14286 + }, + { + "epoch": 0.13524105224297384, + "grad_norm": 346.12481689453125, + "learning_rate": 1.9424778903655837e-06, + "loss": 23.0078, + "step": 14287 + }, + { + "epoch": 0.1352505182646889, + "grad_norm": 312.9981689453125, + "learning_rate": 1.942467641799324e-06, + "loss": 32.5234, + "step": 14288 + }, + { + "epoch": 0.13525998428640396, + "grad_norm": 272.75189208984375, + "learning_rate": 1.9424573923472056e-06, + "loss": 22.2734, + "step": 14289 + }, + { + "epoch": 0.13526945030811902, + "grad_norm": 422.30047607421875, + "learning_rate": 1.942447142009239e-06, + "loss": 36.125, + "step": 14290 + }, + { + "epoch": 0.13527891632983405, + "grad_norm": 413.9452209472656, + "learning_rate": 1.942436890785433e-06, + "loss": 18.5859, + "step": 14291 + }, + { + "epoch": 0.1352883823515491, + "grad_norm": 415.3386535644531, + "learning_rate": 1.9424266386757977e-06, + "loss": 35.25, + "step": 14292 + }, + { + "epoch": 0.13529784837326417, + "grad_norm": 779.1536254882812, + "learning_rate": 1.9424163856803426e-06, + "loss": 36.2969, + "step": 14293 + }, + { + "epoch": 0.13530731439497923, + "grad_norm": 804.4453125, + "learning_rate": 1.9424061317990775e-06, + "loss": 50.0, + "step": 14294 + }, + { + "epoch": 0.1353167804166943, + "grad_norm": 450.2271728515625, + "learning_rate": 1.9423958770320123e-06, + "loss": 49.9688, + "step": 14295 + }, + { + "epoch": 0.13532624643840932, + "grad_norm": 310.309814453125, + "learning_rate": 1.9423856213791557e-06, + "loss": 20.4531, + "step": 14296 + }, + { + "epoch": 0.13533571246012438, + "grad_norm": 487.6849365234375, + "learning_rate": 1.9423753648405182e-06, + "loss": 38.6016, + "step": 14297 + }, + { + "epoch": 0.13534517848183944, + "grad_norm": 370.09765625, + "learning_rate": 1.942365107416109e-06, + "loss": 46.1406, + "step": 14298 + }, + { + "epoch": 0.1353546445035545, + "grad_norm": 537.191162109375, + "learning_rate": 1.9423548491059382e-06, + "loss": 35.9727, + "step": 14299 + }, + { + "epoch": 0.13536411052526953, + "grad_norm": 652.0018920898438, + "learning_rate": 1.942344589910015e-06, + "loss": 40.2188, + "step": 14300 + }, + { + "epoch": 0.1353735765469846, + "grad_norm": 475.2620849609375, + "learning_rate": 1.942334329828349e-06, + "loss": 45.7031, + "step": 14301 + }, + { + "epoch": 0.13538304256869965, + "grad_norm": 499.7846374511719, + "learning_rate": 1.94232406886095e-06, + "loss": 41.0781, + "step": 14302 + }, + { + "epoch": 0.1353925085904147, + "grad_norm": 759.3964233398438, + "learning_rate": 1.942313807007828e-06, + "loss": 37.1797, + "step": 14303 + }, + { + "epoch": 0.13540197461212977, + "grad_norm": 360.4717712402344, + "learning_rate": 1.9423035442689924e-06, + "loss": 39.7734, + "step": 14304 + }, + { + "epoch": 0.1354114406338448, + "grad_norm": 288.0164794921875, + "learning_rate": 1.942293280644453e-06, + "loss": 22.0781, + "step": 14305 + }, + { + "epoch": 0.13542090665555986, + "grad_norm": 342.9420471191406, + "learning_rate": 1.9422830161342185e-06, + "loss": 45.1875, + "step": 14306 + }, + { + "epoch": 0.13543037267727492, + "grad_norm": 449.69415283203125, + "learning_rate": 1.9422727507382994e-06, + "loss": 42.2578, + "step": 14307 + }, + { + "epoch": 0.13543983869898998, + "grad_norm": 289.931884765625, + "learning_rate": 1.9422624844567056e-06, + "loss": 17.3281, + "step": 14308 + }, + { + "epoch": 0.13544930472070502, + "grad_norm": 285.14739990234375, + "learning_rate": 1.9422522172894458e-06, + "loss": 12.1758, + "step": 14309 + }, + { + "epoch": 0.13545877074242008, + "grad_norm": 390.376953125, + "learning_rate": 1.942241949236531e-06, + "loss": 23.0156, + "step": 14310 + }, + { + "epoch": 0.13546823676413514, + "grad_norm": 182.66494750976562, + "learning_rate": 1.9422316802979696e-06, + "loss": 17.4062, + "step": 14311 + }, + { + "epoch": 0.1354777027858502, + "grad_norm": 185.91004943847656, + "learning_rate": 1.9422214104737715e-06, + "loss": 22.6719, + "step": 14312 + }, + { + "epoch": 0.13548716880756526, + "grad_norm": 249.09140014648438, + "learning_rate": 1.9422111397639467e-06, + "loss": 26.9531, + "step": 14313 + }, + { + "epoch": 0.1354966348292803, + "grad_norm": 721.0611572265625, + "learning_rate": 1.942200868168505e-06, + "loss": 44.8203, + "step": 14314 + }, + { + "epoch": 0.13550610085099535, + "grad_norm": 516.061279296875, + "learning_rate": 1.9421905956874554e-06, + "loss": 24.1016, + "step": 14315 + }, + { + "epoch": 0.1355155668727104, + "grad_norm": 526.4065551757812, + "learning_rate": 1.942180322320808e-06, + "loss": 22.8438, + "step": 14316 + }, + { + "epoch": 0.13552503289442547, + "grad_norm": 463.66571044921875, + "learning_rate": 1.942170048068573e-06, + "loss": 44.9219, + "step": 14317 + }, + { + "epoch": 0.1355344989161405, + "grad_norm": 242.68045043945312, + "learning_rate": 1.9421597729307588e-06, + "loss": 18.3477, + "step": 14318 + }, + { + "epoch": 0.13554396493785556, + "grad_norm": 180.79383850097656, + "learning_rate": 1.9421494969073756e-06, + "loss": 24.875, + "step": 14319 + }, + { + "epoch": 0.13555343095957062, + "grad_norm": 549.184326171875, + "learning_rate": 1.942139219998434e-06, + "loss": 43.5781, + "step": 14320 + }, + { + "epoch": 0.13556289698128568, + "grad_norm": 253.86744689941406, + "learning_rate": 1.942128942203942e-06, + "loss": 8.2617, + "step": 14321 + }, + { + "epoch": 0.13557236300300074, + "grad_norm": 262.84063720703125, + "learning_rate": 1.9421186635239103e-06, + "loss": 16.25, + "step": 14322 + }, + { + "epoch": 0.13558182902471577, + "grad_norm": 274.1982727050781, + "learning_rate": 1.9421083839583484e-06, + "loss": 16.625, + "step": 14323 + }, + { + "epoch": 0.13559129504643083, + "grad_norm": 150.78271484375, + "learning_rate": 1.942098103507266e-06, + "loss": 8.6172, + "step": 14324 + }, + { + "epoch": 0.1356007610681459, + "grad_norm": 282.309326171875, + "learning_rate": 1.9420878221706726e-06, + "loss": 34.0938, + "step": 14325 + }, + { + "epoch": 0.13561022708986095, + "grad_norm": 546.4632568359375, + "learning_rate": 1.9420775399485777e-06, + "loss": 43.8594, + "step": 14326 + }, + { + "epoch": 0.135619693111576, + "grad_norm": 3.3821778297424316, + "learning_rate": 1.9420672568409913e-06, + "loss": 0.9565, + "step": 14327 + }, + { + "epoch": 0.13562915913329104, + "grad_norm": 218.5395050048828, + "learning_rate": 1.9420569728479234e-06, + "loss": 16.2031, + "step": 14328 + }, + { + "epoch": 0.1356386251550061, + "grad_norm": 462.84063720703125, + "learning_rate": 1.9420466879693825e-06, + "loss": 21.1484, + "step": 14329 + }, + { + "epoch": 0.13564809117672116, + "grad_norm": 174.74119567871094, + "learning_rate": 1.9420364022053796e-06, + "loss": 19.8047, + "step": 14330 + }, + { + "epoch": 0.13565755719843622, + "grad_norm": 591.1915283203125, + "learning_rate": 1.9420261155559236e-06, + "loss": 40.0547, + "step": 14331 + }, + { + "epoch": 0.13566702322015126, + "grad_norm": 2.5343663692474365, + "learning_rate": 1.9420158280210242e-06, + "loss": 0.9028, + "step": 14332 + }, + { + "epoch": 0.13567648924186632, + "grad_norm": 453.20965576171875, + "learning_rate": 1.9420055396006913e-06, + "loss": 40.5938, + "step": 14333 + }, + { + "epoch": 0.13568595526358138, + "grad_norm": 528.5856323242188, + "learning_rate": 1.9419952502949345e-06, + "loss": 40.0469, + "step": 14334 + }, + { + "epoch": 0.13569542128529644, + "grad_norm": 422.9614562988281, + "learning_rate": 1.9419849601037635e-06, + "loss": 24.2578, + "step": 14335 + }, + { + "epoch": 0.1357048873070115, + "grad_norm": 2.8967514038085938, + "learning_rate": 1.9419746690271878e-06, + "loss": 1.0, + "step": 14336 + }, + { + "epoch": 0.13571435332872653, + "grad_norm": 321.081787109375, + "learning_rate": 1.9419643770652174e-06, + "loss": 26.4688, + "step": 14337 + }, + { + "epoch": 0.1357238193504416, + "grad_norm": 281.95123291015625, + "learning_rate": 1.9419540842178616e-06, + "loss": 28.0938, + "step": 14338 + }, + { + "epoch": 0.13573328537215665, + "grad_norm": 338.8901062011719, + "learning_rate": 1.94194379048513e-06, + "loss": 20.2656, + "step": 14339 + }, + { + "epoch": 0.1357427513938717, + "grad_norm": 282.5542297363281, + "learning_rate": 1.9419334958670327e-06, + "loss": 20.5312, + "step": 14340 + }, + { + "epoch": 0.13575221741558674, + "grad_norm": 2.748065233230591, + "learning_rate": 1.9419232003635794e-06, + "loss": 0.9019, + "step": 14341 + }, + { + "epoch": 0.1357616834373018, + "grad_norm": 216.1139678955078, + "learning_rate": 1.9419129039747794e-06, + "loss": 13.6016, + "step": 14342 + }, + { + "epoch": 0.13577114945901686, + "grad_norm": 216.45445251464844, + "learning_rate": 1.9419026067006427e-06, + "loss": 17.3516, + "step": 14343 + }, + { + "epoch": 0.13578061548073192, + "grad_norm": 391.2931213378906, + "learning_rate": 1.9418923085411783e-06, + "loss": 14.1133, + "step": 14344 + }, + { + "epoch": 0.13579008150244698, + "grad_norm": 162.72409057617188, + "learning_rate": 1.941882009496397e-06, + "loss": 15.5156, + "step": 14345 + }, + { + "epoch": 0.135799547524162, + "grad_norm": 3.540029287338257, + "learning_rate": 1.9418717095663078e-06, + "loss": 1.0161, + "step": 14346 + }, + { + "epoch": 0.13580901354587707, + "grad_norm": 350.5783996582031, + "learning_rate": 1.94186140875092e-06, + "loss": 19.9453, + "step": 14347 + }, + { + "epoch": 0.13581847956759213, + "grad_norm": 176.37191772460938, + "learning_rate": 1.9418511070502445e-06, + "loss": 6.2266, + "step": 14348 + }, + { + "epoch": 0.1358279455893072, + "grad_norm": 569.4752807617188, + "learning_rate": 1.9418408044642897e-06, + "loss": 15.5625, + "step": 14349 + }, + { + "epoch": 0.13583741161102222, + "grad_norm": 349.7993469238281, + "learning_rate": 1.941830500993066e-06, + "loss": 12.3398, + "step": 14350 + }, + { + "epoch": 0.13584687763273728, + "grad_norm": 300.9669189453125, + "learning_rate": 1.9418201966365827e-06, + "loss": 12.8438, + "step": 14351 + }, + { + "epoch": 0.13585634365445234, + "grad_norm": 436.9273681640625, + "learning_rate": 1.9418098913948497e-06, + "loss": 33.0703, + "step": 14352 + }, + { + "epoch": 0.1358658096761674, + "grad_norm": 246.7513885498047, + "learning_rate": 1.941799585267877e-06, + "loss": 24.1797, + "step": 14353 + }, + { + "epoch": 0.13587527569788246, + "grad_norm": 319.73773193359375, + "learning_rate": 1.9417892782556736e-06, + "loss": 43.1406, + "step": 14354 + }, + { + "epoch": 0.1358847417195975, + "grad_norm": 1370.270263671875, + "learning_rate": 1.9417789703582496e-06, + "loss": 58.3438, + "step": 14355 + }, + { + "epoch": 0.13589420774131256, + "grad_norm": 733.3107299804688, + "learning_rate": 1.9417686615756147e-06, + "loss": 39.1797, + "step": 14356 + }, + { + "epoch": 0.13590367376302762, + "grad_norm": 1589.033935546875, + "learning_rate": 1.9417583519077783e-06, + "loss": 34.4219, + "step": 14357 + }, + { + "epoch": 0.13591313978474268, + "grad_norm": 419.569580078125, + "learning_rate": 1.9417480413547506e-06, + "loss": 39.0469, + "step": 14358 + }, + { + "epoch": 0.1359226058064577, + "grad_norm": 3.1426379680633545, + "learning_rate": 1.9417377299165413e-06, + "loss": 1.1025, + "step": 14359 + }, + { + "epoch": 0.13593207182817277, + "grad_norm": 184.78567504882812, + "learning_rate": 1.941727417593159e-06, + "loss": 14.6797, + "step": 14360 + }, + { + "epoch": 0.13594153784988783, + "grad_norm": 368.5979919433594, + "learning_rate": 1.941717104384615e-06, + "loss": 45.1016, + "step": 14361 + }, + { + "epoch": 0.1359510038716029, + "grad_norm": 317.9703674316406, + "learning_rate": 1.941706790290918e-06, + "loss": 33.5469, + "step": 14362 + }, + { + "epoch": 0.13596046989331795, + "grad_norm": 247.11314392089844, + "learning_rate": 1.9416964753120774e-06, + "loss": 29.9375, + "step": 14363 + }, + { + "epoch": 0.13596993591503298, + "grad_norm": 393.7843017578125, + "learning_rate": 1.9416861594481036e-06, + "loss": 8.1211, + "step": 14364 + }, + { + "epoch": 0.13597940193674804, + "grad_norm": 195.6481170654297, + "learning_rate": 1.941675842699006e-06, + "loss": 12.6172, + "step": 14365 + }, + { + "epoch": 0.1359888679584631, + "grad_norm": 765.937255859375, + "learning_rate": 1.941665525064795e-06, + "loss": 45.5859, + "step": 14366 + }, + { + "epoch": 0.13599833398017816, + "grad_norm": 389.0563049316406, + "learning_rate": 1.9416552065454794e-06, + "loss": 39.7422, + "step": 14367 + }, + { + "epoch": 0.1360078000018932, + "grad_norm": 306.54901123046875, + "learning_rate": 1.9416448871410687e-06, + "loss": 41.8594, + "step": 14368 + }, + { + "epoch": 0.13601726602360825, + "grad_norm": 355.2373352050781, + "learning_rate": 1.941634566851573e-06, + "loss": 18.3516, + "step": 14369 + }, + { + "epoch": 0.1360267320453233, + "grad_norm": 639.3895874023438, + "learning_rate": 1.941624245677003e-06, + "loss": 54.8672, + "step": 14370 + }, + { + "epoch": 0.13603619806703837, + "grad_norm": 420.28448486328125, + "learning_rate": 1.9416139236173663e-06, + "loss": 21.6875, + "step": 14371 + }, + { + "epoch": 0.13604566408875343, + "grad_norm": 424.9850769042969, + "learning_rate": 1.9416036006726743e-06, + "loss": 43.7812, + "step": 14372 + }, + { + "epoch": 0.13605513011046846, + "grad_norm": 455.503662109375, + "learning_rate": 1.941593276842936e-06, + "loss": 34.1875, + "step": 14373 + }, + { + "epoch": 0.13606459613218352, + "grad_norm": 589.6475219726562, + "learning_rate": 1.9415829521281618e-06, + "loss": 51.1094, + "step": 14374 + }, + { + "epoch": 0.13607406215389858, + "grad_norm": 602.8186645507812, + "learning_rate": 1.9415726265283607e-06, + "loss": 48.9219, + "step": 14375 + }, + { + "epoch": 0.13608352817561364, + "grad_norm": 347.6571960449219, + "learning_rate": 1.9415623000435424e-06, + "loss": 36.375, + "step": 14376 + }, + { + "epoch": 0.13609299419732868, + "grad_norm": 377.8717346191406, + "learning_rate": 1.9415519726737165e-06, + "loss": 33.8594, + "step": 14377 + }, + { + "epoch": 0.13610246021904374, + "grad_norm": 175.28419494628906, + "learning_rate": 1.9415416444188936e-06, + "loss": 8.2695, + "step": 14378 + }, + { + "epoch": 0.1361119262407588, + "grad_norm": 328.6724853515625, + "learning_rate": 1.9415313152790823e-06, + "loss": 40.9766, + "step": 14379 + }, + { + "epoch": 0.13612139226247386, + "grad_norm": 2943.770263671875, + "learning_rate": 1.9415209852542934e-06, + "loss": 25.9531, + "step": 14380 + }, + { + "epoch": 0.13613085828418892, + "grad_norm": 382.8528747558594, + "learning_rate": 1.9415106543445356e-06, + "loss": 33.4531, + "step": 14381 + }, + { + "epoch": 0.13614032430590395, + "grad_norm": 321.88623046875, + "learning_rate": 1.941500322549819e-06, + "loss": 30.457, + "step": 14382 + }, + { + "epoch": 0.136149790327619, + "grad_norm": 201.5207061767578, + "learning_rate": 1.9414899898701535e-06, + "loss": 10.8047, + "step": 14383 + }, + { + "epoch": 0.13615925634933407, + "grad_norm": 459.3773193359375, + "learning_rate": 1.9414796563055488e-06, + "loss": 50.0234, + "step": 14384 + }, + { + "epoch": 0.13616872237104913, + "grad_norm": 222.8262939453125, + "learning_rate": 1.941469321856014e-06, + "loss": 25.4531, + "step": 14385 + }, + { + "epoch": 0.13617818839276416, + "grad_norm": 264.5660705566406, + "learning_rate": 1.9414589865215593e-06, + "loss": 18.7031, + "step": 14386 + }, + { + "epoch": 0.13618765441447922, + "grad_norm": 590.0133666992188, + "learning_rate": 1.941448650302195e-06, + "loss": 35.5469, + "step": 14387 + }, + { + "epoch": 0.13619712043619428, + "grad_norm": 332.54998779296875, + "learning_rate": 1.9414383131979297e-06, + "loss": 11.0586, + "step": 14388 + }, + { + "epoch": 0.13620658645790934, + "grad_norm": 740.3516845703125, + "learning_rate": 1.941427975208774e-06, + "loss": 43.8516, + "step": 14389 + }, + { + "epoch": 0.1362160524796244, + "grad_norm": 504.6856994628906, + "learning_rate": 1.9414176363347366e-06, + "loss": 44.4922, + "step": 14390 + }, + { + "epoch": 0.13622551850133943, + "grad_norm": 478.0530090332031, + "learning_rate": 1.9414072965758286e-06, + "loss": 53.2969, + "step": 14391 + }, + { + "epoch": 0.1362349845230545, + "grad_norm": 200.093505859375, + "learning_rate": 1.9413969559320588e-06, + "loss": 17.2344, + "step": 14392 + }, + { + "epoch": 0.13624445054476955, + "grad_norm": 555.2085571289062, + "learning_rate": 1.941386614403437e-06, + "loss": 54.2188, + "step": 14393 + }, + { + "epoch": 0.1362539165664846, + "grad_norm": 1164.4610595703125, + "learning_rate": 1.941376271989973e-06, + "loss": 46.4805, + "step": 14394 + }, + { + "epoch": 0.13626338258819964, + "grad_norm": 461.6676940917969, + "learning_rate": 1.9413659286916768e-06, + "loss": 27.2656, + "step": 14395 + }, + { + "epoch": 0.1362728486099147, + "grad_norm": 473.1082763671875, + "learning_rate": 1.9413555845085573e-06, + "loss": 29.7812, + "step": 14396 + }, + { + "epoch": 0.13628231463162976, + "grad_norm": 270.583740234375, + "learning_rate": 1.941345239440625e-06, + "loss": 18.9297, + "step": 14397 + }, + { + "epoch": 0.13629178065334482, + "grad_norm": 387.0010986328125, + "learning_rate": 1.94133489348789e-06, + "loss": 10.3125, + "step": 14398 + }, + { + "epoch": 0.13630124667505988, + "grad_norm": 603.7037963867188, + "learning_rate": 1.9413245466503606e-06, + "loss": 12.4727, + "step": 14399 + }, + { + "epoch": 0.13631071269677492, + "grad_norm": 366.6186828613281, + "learning_rate": 1.941314198928048e-06, + "loss": 15.2344, + "step": 14400 + }, + { + "epoch": 0.13632017871848998, + "grad_norm": 350.64312744140625, + "learning_rate": 1.941303850320961e-06, + "loss": 18.7109, + "step": 14401 + }, + { + "epoch": 0.13632964474020504, + "grad_norm": 428.6097412109375, + "learning_rate": 1.9412935008291094e-06, + "loss": 41.7656, + "step": 14402 + }, + { + "epoch": 0.1363391107619201, + "grad_norm": 578.7471923828125, + "learning_rate": 1.9412831504525032e-06, + "loss": 48.0, + "step": 14403 + }, + { + "epoch": 0.13634857678363513, + "grad_norm": 613.8093872070312, + "learning_rate": 1.9412727991911525e-06, + "loss": 51.9219, + "step": 14404 + }, + { + "epoch": 0.1363580428053502, + "grad_norm": 250.78173828125, + "learning_rate": 1.941262447045066e-06, + "loss": 29.0625, + "step": 14405 + }, + { + "epoch": 0.13636750882706525, + "grad_norm": 603.5148315429688, + "learning_rate": 1.9412520940142547e-06, + "loss": 39.2266, + "step": 14406 + }, + { + "epoch": 0.1363769748487803, + "grad_norm": 1130.936279296875, + "learning_rate": 1.941241740098727e-06, + "loss": 58.4219, + "step": 14407 + }, + { + "epoch": 0.13638644087049537, + "grad_norm": 284.7001953125, + "learning_rate": 1.941231385298494e-06, + "loss": 22.4219, + "step": 14408 + }, + { + "epoch": 0.1363959068922104, + "grad_norm": 703.0911865234375, + "learning_rate": 1.9412210296135645e-06, + "loss": 55.3438, + "step": 14409 + }, + { + "epoch": 0.13640537291392546, + "grad_norm": 287.46197509765625, + "learning_rate": 1.9412106730439485e-06, + "loss": 15.6719, + "step": 14410 + }, + { + "epoch": 0.13641483893564052, + "grad_norm": 372.4715270996094, + "learning_rate": 1.9412003155896553e-06, + "loss": 29.8984, + "step": 14411 + }, + { + "epoch": 0.13642430495735558, + "grad_norm": 338.6893310546875, + "learning_rate": 1.941189957250695e-06, + "loss": 16.3906, + "step": 14412 + }, + { + "epoch": 0.1364337709790706, + "grad_norm": 448.7368469238281, + "learning_rate": 1.941179598027078e-06, + "loss": 46.8438, + "step": 14413 + }, + { + "epoch": 0.13644323700078567, + "grad_norm": 282.96319580078125, + "learning_rate": 1.941169237918813e-06, + "loss": 24.9297, + "step": 14414 + }, + { + "epoch": 0.13645270302250073, + "grad_norm": 622.4090576171875, + "learning_rate": 1.94115887692591e-06, + "loss": 8.0, + "step": 14415 + }, + { + "epoch": 0.1364621690442158, + "grad_norm": 547.6738891601562, + "learning_rate": 1.941148515048379e-06, + "loss": 53.1719, + "step": 14416 + }, + { + "epoch": 0.13647163506593085, + "grad_norm": 340.5558166503906, + "learning_rate": 1.9411381522862296e-06, + "loss": 18.2227, + "step": 14417 + }, + { + "epoch": 0.13648110108764588, + "grad_norm": 279.1901550292969, + "learning_rate": 1.941127788639472e-06, + "loss": 25.625, + "step": 14418 + }, + { + "epoch": 0.13649056710936094, + "grad_norm": 435.7654113769531, + "learning_rate": 1.9411174241081153e-06, + "loss": 22.7344, + "step": 14419 + }, + { + "epoch": 0.136500033131076, + "grad_norm": 355.8713684082031, + "learning_rate": 1.941107058692169e-06, + "loss": 34.9531, + "step": 14420 + }, + { + "epoch": 0.13650949915279106, + "grad_norm": 768.5711059570312, + "learning_rate": 1.9410966923916437e-06, + "loss": 35.2617, + "step": 14421 + }, + { + "epoch": 0.13651896517450612, + "grad_norm": 447.43719482421875, + "learning_rate": 1.9410863252065488e-06, + "loss": 32.8594, + "step": 14422 + }, + { + "epoch": 0.13652843119622116, + "grad_norm": 346.3257141113281, + "learning_rate": 1.941075957136894e-06, + "loss": 22.3125, + "step": 14423 + }, + { + "epoch": 0.13653789721793622, + "grad_norm": 249.5777130126953, + "learning_rate": 1.9410655881826885e-06, + "loss": 22.9453, + "step": 14424 + }, + { + "epoch": 0.13654736323965128, + "grad_norm": 565.896728515625, + "learning_rate": 1.9410552183439427e-06, + "loss": 45.3359, + "step": 14425 + }, + { + "epoch": 0.13655682926136634, + "grad_norm": 1192.61279296875, + "learning_rate": 1.9410448476206667e-06, + "loss": 43.7969, + "step": 14426 + }, + { + "epoch": 0.13656629528308137, + "grad_norm": 252.94654846191406, + "learning_rate": 1.9410344760128692e-06, + "loss": 14.8516, + "step": 14427 + }, + { + "epoch": 0.13657576130479643, + "grad_norm": 364.39715576171875, + "learning_rate": 1.941024103520561e-06, + "loss": 32.7188, + "step": 14428 + }, + { + "epoch": 0.1365852273265115, + "grad_norm": 358.43499755859375, + "learning_rate": 1.941013730143751e-06, + "loss": 19.4922, + "step": 14429 + }, + { + "epoch": 0.13659469334822655, + "grad_norm": 423.6853942871094, + "learning_rate": 1.9410033558824496e-06, + "loss": 22.0312, + "step": 14430 + }, + { + "epoch": 0.1366041593699416, + "grad_norm": 673.2259521484375, + "learning_rate": 1.940992980736666e-06, + "loss": 49.5938, + "step": 14431 + }, + { + "epoch": 0.13661362539165664, + "grad_norm": 500.832275390625, + "learning_rate": 1.9409826047064104e-06, + "loss": 21.6211, + "step": 14432 + }, + { + "epoch": 0.1366230914133717, + "grad_norm": 428.1765441894531, + "learning_rate": 1.9409722277916923e-06, + "loss": 22.5, + "step": 14433 + }, + { + "epoch": 0.13663255743508676, + "grad_norm": 330.9739074707031, + "learning_rate": 1.940961849992521e-06, + "loss": 17.6562, + "step": 14434 + }, + { + "epoch": 0.13664202345680182, + "grad_norm": 314.803955078125, + "learning_rate": 1.940951471308908e-06, + "loss": 20.9609, + "step": 14435 + }, + { + "epoch": 0.13665148947851685, + "grad_norm": 510.18927001953125, + "learning_rate": 1.9409410917408608e-06, + "loss": 33.4531, + "step": 14436 + }, + { + "epoch": 0.1366609555002319, + "grad_norm": 607.7249755859375, + "learning_rate": 1.9409307112883906e-06, + "loss": 48.3672, + "step": 14437 + }, + { + "epoch": 0.13667042152194697, + "grad_norm": 441.3402099609375, + "learning_rate": 1.9409203299515067e-06, + "loss": 25.5078, + "step": 14438 + }, + { + "epoch": 0.13667988754366203, + "grad_norm": 451.9589538574219, + "learning_rate": 1.9409099477302187e-06, + "loss": 32.2031, + "step": 14439 + }, + { + "epoch": 0.1366893535653771, + "grad_norm": 1083.9979248046875, + "learning_rate": 1.940899564624537e-06, + "loss": 45.2578, + "step": 14440 + }, + { + "epoch": 0.13669881958709212, + "grad_norm": 470.5418395996094, + "learning_rate": 1.94088918063447e-06, + "loss": 45.0312, + "step": 14441 + }, + { + "epoch": 0.13670828560880718, + "grad_norm": 191.46092224121094, + "learning_rate": 1.9408787957600293e-06, + "loss": 19.5, + "step": 14442 + }, + { + "epoch": 0.13671775163052224, + "grad_norm": 217.44686889648438, + "learning_rate": 1.9408684100012234e-06, + "loss": 12.4805, + "step": 14443 + }, + { + "epoch": 0.1367272176522373, + "grad_norm": 185.24842834472656, + "learning_rate": 1.9408580233580626e-06, + "loss": 14.6094, + "step": 14444 + }, + { + "epoch": 0.13673668367395234, + "grad_norm": 351.1026306152344, + "learning_rate": 1.9408476358305563e-06, + "loss": 23.75, + "step": 14445 + }, + { + "epoch": 0.1367461496956674, + "grad_norm": 588.9005737304688, + "learning_rate": 1.9408372474187145e-06, + "loss": 33.9531, + "step": 14446 + }, + { + "epoch": 0.13675561571738246, + "grad_norm": 239.61715698242188, + "learning_rate": 1.9408268581225465e-06, + "loss": 20.9688, + "step": 14447 + }, + { + "epoch": 0.13676508173909752, + "grad_norm": 510.9012145996094, + "learning_rate": 1.940816467942063e-06, + "loss": 27.5312, + "step": 14448 + }, + { + "epoch": 0.13677454776081258, + "grad_norm": 357.74835205078125, + "learning_rate": 1.940806076877273e-06, + "loss": 37.875, + "step": 14449 + }, + { + "epoch": 0.1367840137825276, + "grad_norm": 834.3912963867188, + "learning_rate": 1.9407956849281865e-06, + "loss": 49.3125, + "step": 14450 + }, + { + "epoch": 0.13679347980424267, + "grad_norm": 820.1607055664062, + "learning_rate": 1.940785292094813e-06, + "loss": 43.0156, + "step": 14451 + }, + { + "epoch": 0.13680294582595773, + "grad_norm": 204.52906799316406, + "learning_rate": 1.940774898377163e-06, + "loss": 18.4297, + "step": 14452 + }, + { + "epoch": 0.1368124118476728, + "grad_norm": 465.4677734375, + "learning_rate": 1.940764503775246e-06, + "loss": 23.625, + "step": 14453 + }, + { + "epoch": 0.13682187786938782, + "grad_norm": 270.3006286621094, + "learning_rate": 1.940754108289071e-06, + "loss": 19.5234, + "step": 14454 + }, + { + "epoch": 0.13683134389110288, + "grad_norm": 496.26397705078125, + "learning_rate": 1.9407437119186485e-06, + "loss": 46.5781, + "step": 14455 + }, + { + "epoch": 0.13684080991281794, + "grad_norm": 621.424560546875, + "learning_rate": 1.940733314663988e-06, + "loss": 36.2812, + "step": 14456 + }, + { + "epoch": 0.136850275934533, + "grad_norm": 236.60400390625, + "learning_rate": 1.9407229165250995e-06, + "loss": 17.3789, + "step": 14457 + }, + { + "epoch": 0.13685974195624806, + "grad_norm": 349.22100830078125, + "learning_rate": 1.9407125175019927e-06, + "loss": 23.5703, + "step": 14458 + }, + { + "epoch": 0.1368692079779631, + "grad_norm": 449.05224609375, + "learning_rate": 1.9407021175946775e-06, + "loss": 39.2578, + "step": 14459 + }, + { + "epoch": 0.13687867399967815, + "grad_norm": 463.70794677734375, + "learning_rate": 1.9406917168031633e-06, + "loss": 27.0273, + "step": 14460 + }, + { + "epoch": 0.1368881400213932, + "grad_norm": 360.3856201171875, + "learning_rate": 1.94068131512746e-06, + "loss": 28.7109, + "step": 14461 + }, + { + "epoch": 0.13689760604310827, + "grad_norm": 3.029500961303711, + "learning_rate": 1.940670912567577e-06, + "loss": 1.0229, + "step": 14462 + }, + { + "epoch": 0.1369070720648233, + "grad_norm": 641.0867919921875, + "learning_rate": 1.9406605091235255e-06, + "loss": 20.9805, + "step": 14463 + }, + { + "epoch": 0.13691653808653836, + "grad_norm": 453.7593688964844, + "learning_rate": 1.9406501047953137e-06, + "loss": 24.7188, + "step": 14464 + }, + { + "epoch": 0.13692600410825342, + "grad_norm": 3.157745838165283, + "learning_rate": 1.9406396995829524e-06, + "loss": 0.8594, + "step": 14465 + }, + { + "epoch": 0.13693547012996848, + "grad_norm": 443.2610168457031, + "learning_rate": 1.9406292934864506e-06, + "loss": 68.9688, + "step": 14466 + }, + { + "epoch": 0.13694493615168354, + "grad_norm": 425.0729064941406, + "learning_rate": 1.9406188865058186e-06, + "loss": 18.5703, + "step": 14467 + }, + { + "epoch": 0.13695440217339858, + "grad_norm": 378.45648193359375, + "learning_rate": 1.940608478641066e-06, + "loss": 17.957, + "step": 14468 + }, + { + "epoch": 0.13696386819511364, + "grad_norm": 593.649658203125, + "learning_rate": 1.9405980698922024e-06, + "loss": 22.75, + "step": 14469 + }, + { + "epoch": 0.1369733342168287, + "grad_norm": 2.674243211746216, + "learning_rate": 1.9405876602592385e-06, + "loss": 0.8962, + "step": 14470 + }, + { + "epoch": 0.13698280023854376, + "grad_norm": 2.755335569381714, + "learning_rate": 1.940577249742183e-06, + "loss": 0.8269, + "step": 14471 + }, + { + "epoch": 0.1369922662602588, + "grad_norm": 496.1726379394531, + "learning_rate": 1.940566838341046e-06, + "loss": 20.4062, + "step": 14472 + }, + { + "epoch": 0.13700173228197385, + "grad_norm": 1147.906494140625, + "learning_rate": 1.940556426055837e-06, + "loss": 24.8906, + "step": 14473 + }, + { + "epoch": 0.1370111983036889, + "grad_norm": 756.1587524414062, + "learning_rate": 1.940546012886567e-06, + "loss": 41.8203, + "step": 14474 + }, + { + "epoch": 0.13702066432540397, + "grad_norm": 204.9695281982422, + "learning_rate": 1.940535598833244e-06, + "loss": 16.75, + "step": 14475 + }, + { + "epoch": 0.13703013034711903, + "grad_norm": 495.5716247558594, + "learning_rate": 1.9405251838958795e-06, + "loss": 45.7969, + "step": 14476 + }, + { + "epoch": 0.13703959636883406, + "grad_norm": 282.9782409667969, + "learning_rate": 1.940514768074482e-06, + "loss": 18.5781, + "step": 14477 + }, + { + "epoch": 0.13704906239054912, + "grad_norm": 454.7116394042969, + "learning_rate": 1.940504351369062e-06, + "loss": 10.668, + "step": 14478 + }, + { + "epoch": 0.13705852841226418, + "grad_norm": 433.0386657714844, + "learning_rate": 1.9404939337796292e-06, + "loss": 33.7188, + "step": 14479 + }, + { + "epoch": 0.13706799443397924, + "grad_norm": 3.1748204231262207, + "learning_rate": 1.940483515306193e-06, + "loss": 0.8975, + "step": 14480 + }, + { + "epoch": 0.13707746045569427, + "grad_norm": 314.2892761230469, + "learning_rate": 1.940473095948764e-06, + "loss": 20.2539, + "step": 14481 + }, + { + "epoch": 0.13708692647740933, + "grad_norm": 457.5575256347656, + "learning_rate": 1.940462675707351e-06, + "loss": 45.5156, + "step": 14482 + }, + { + "epoch": 0.1370963924991244, + "grad_norm": 3.34739089012146, + "learning_rate": 1.9404522545819643e-06, + "loss": 1.0029, + "step": 14483 + }, + { + "epoch": 0.13710585852083945, + "grad_norm": 146.04843139648438, + "learning_rate": 1.9404418325726135e-06, + "loss": 20.3281, + "step": 14484 + }, + { + "epoch": 0.1371153245425545, + "grad_norm": 354.4349060058594, + "learning_rate": 1.9404314096793094e-06, + "loss": 23.9453, + "step": 14485 + }, + { + "epoch": 0.13712479056426954, + "grad_norm": 222.0786590576172, + "learning_rate": 1.9404209859020603e-06, + "loss": 10.3594, + "step": 14486 + }, + { + "epoch": 0.1371342565859846, + "grad_norm": 382.436767578125, + "learning_rate": 1.9404105612408764e-06, + "loss": 19.0156, + "step": 14487 + }, + { + "epoch": 0.13714372260769966, + "grad_norm": 540.5431518554688, + "learning_rate": 1.9404001356957684e-06, + "loss": 50.4219, + "step": 14488 + }, + { + "epoch": 0.13715318862941472, + "grad_norm": 189.47349548339844, + "learning_rate": 1.940389709266745e-06, + "loss": 15.0781, + "step": 14489 + }, + { + "epoch": 0.13716265465112976, + "grad_norm": 511.69537353515625, + "learning_rate": 1.940379281953817e-06, + "loss": 24.2578, + "step": 14490 + }, + { + "epoch": 0.13717212067284482, + "grad_norm": 467.4491882324219, + "learning_rate": 1.940368853756993e-06, + "loss": 25.3672, + "step": 14491 + }, + { + "epoch": 0.13718158669455988, + "grad_norm": 541.051025390625, + "learning_rate": 1.9403584246762834e-06, + "loss": 44.2422, + "step": 14492 + }, + { + "epoch": 0.13719105271627494, + "grad_norm": 266.64813232421875, + "learning_rate": 1.9403479947116984e-06, + "loss": 20.125, + "step": 14493 + }, + { + "epoch": 0.13720051873799, + "grad_norm": 686.5930786132812, + "learning_rate": 1.9403375638632476e-06, + "loss": 52.2656, + "step": 14494 + }, + { + "epoch": 0.13720998475970503, + "grad_norm": 1744.3304443359375, + "learning_rate": 1.9403271321309403e-06, + "loss": 57.9531, + "step": 14495 + }, + { + "epoch": 0.1372194507814201, + "grad_norm": 295.7391052246094, + "learning_rate": 1.940316699514787e-06, + "loss": 28.4141, + "step": 14496 + }, + { + "epoch": 0.13722891680313515, + "grad_norm": 431.36322021484375, + "learning_rate": 1.9403062660147968e-06, + "loss": 11.9727, + "step": 14497 + }, + { + "epoch": 0.1372383828248502, + "grad_norm": 660.0068359375, + "learning_rate": 1.94029583163098e-06, + "loss": 21.5312, + "step": 14498 + }, + { + "epoch": 0.13724784884656524, + "grad_norm": 229.7149200439453, + "learning_rate": 1.9402853963633465e-06, + "loss": 19.7734, + "step": 14499 + }, + { + "epoch": 0.1372573148682803, + "grad_norm": 3.157325506210327, + "learning_rate": 1.9402749602119057e-06, + "loss": 0.939, + "step": 14500 + }, + { + "epoch": 0.13726678088999536, + "grad_norm": 420.3076171875, + "learning_rate": 1.940264523176668e-06, + "loss": 30.6641, + "step": 14501 + }, + { + "epoch": 0.13727624691171042, + "grad_norm": 219.36309814453125, + "learning_rate": 1.940254085257642e-06, + "loss": 17.2891, + "step": 14502 + }, + { + "epoch": 0.13728571293342548, + "grad_norm": 425.1888732910156, + "learning_rate": 1.940243646454839e-06, + "loss": 52.4531, + "step": 14503 + }, + { + "epoch": 0.1372951789551405, + "grad_norm": 708.7177124023438, + "learning_rate": 1.940233206768268e-06, + "loss": 19.5117, + "step": 14504 + }, + { + "epoch": 0.13730464497685557, + "grad_norm": 496.3577575683594, + "learning_rate": 1.9402227661979388e-06, + "loss": 49.3125, + "step": 14505 + }, + { + "epoch": 0.13731411099857063, + "grad_norm": 426.8575744628906, + "learning_rate": 1.9402123247438617e-06, + "loss": 19.1719, + "step": 14506 + }, + { + "epoch": 0.1373235770202857, + "grad_norm": 191.63951110839844, + "learning_rate": 1.940201882406046e-06, + "loss": 20.7578, + "step": 14507 + }, + { + "epoch": 0.13733304304200075, + "grad_norm": 522.3524780273438, + "learning_rate": 1.940191439184501e-06, + "loss": 34.3672, + "step": 14508 + }, + { + "epoch": 0.13734250906371578, + "grad_norm": 513.3342895507812, + "learning_rate": 1.940180995079238e-06, + "loss": 20.3672, + "step": 14509 + }, + { + "epoch": 0.13735197508543084, + "grad_norm": 400.19683837890625, + "learning_rate": 1.940170550090266e-06, + "loss": 40.4375, + "step": 14510 + }, + { + "epoch": 0.1373614411071459, + "grad_norm": 541.7872314453125, + "learning_rate": 1.940160104217595e-06, + "loss": 37.2812, + "step": 14511 + }, + { + "epoch": 0.13737090712886096, + "grad_norm": 575.3005981445312, + "learning_rate": 1.9401496574612343e-06, + "loss": 45.6094, + "step": 14512 + }, + { + "epoch": 0.137380373150576, + "grad_norm": 779.1284790039062, + "learning_rate": 1.9401392098211943e-06, + "loss": 25.0234, + "step": 14513 + }, + { + "epoch": 0.13738983917229106, + "grad_norm": 3.790364980697632, + "learning_rate": 1.9401287612974842e-06, + "loss": 1.0581, + "step": 14514 + }, + { + "epoch": 0.13739930519400612, + "grad_norm": 544.45703125, + "learning_rate": 1.9401183118901146e-06, + "loss": 61.1328, + "step": 14515 + }, + { + "epoch": 0.13740877121572118, + "grad_norm": 391.787353515625, + "learning_rate": 1.940107861599095e-06, + "loss": 48.0, + "step": 14516 + }, + { + "epoch": 0.13741823723743624, + "grad_norm": 554.7273559570312, + "learning_rate": 1.940097410424435e-06, + "loss": 41.0859, + "step": 14517 + }, + { + "epoch": 0.13742770325915127, + "grad_norm": 230.86343383789062, + "learning_rate": 1.9400869583661446e-06, + "loss": 8.9766, + "step": 14518 + }, + { + "epoch": 0.13743716928086633, + "grad_norm": 548.0765380859375, + "learning_rate": 1.9400765054242336e-06, + "loss": 34.9453, + "step": 14519 + }, + { + "epoch": 0.1374466353025814, + "grad_norm": 423.25982666015625, + "learning_rate": 1.9400660515987117e-06, + "loss": 23.2344, + "step": 14520 + }, + { + "epoch": 0.13745610132429645, + "grad_norm": 2.9143121242523193, + "learning_rate": 1.9400555968895893e-06, + "loss": 0.8862, + "step": 14521 + }, + { + "epoch": 0.13746556734601148, + "grad_norm": 434.7381286621094, + "learning_rate": 1.9400451412968755e-06, + "loss": 45.0781, + "step": 14522 + }, + { + "epoch": 0.13747503336772654, + "grad_norm": 391.5168762207031, + "learning_rate": 1.9400346848205806e-06, + "loss": 9.6094, + "step": 14523 + }, + { + "epoch": 0.1374844993894416, + "grad_norm": 2.711182117462158, + "learning_rate": 1.940024227460714e-06, + "loss": 0.9448, + "step": 14524 + }, + { + "epoch": 0.13749396541115666, + "grad_norm": 301.67889404296875, + "learning_rate": 1.940013769217286e-06, + "loss": 17.625, + "step": 14525 + }, + { + "epoch": 0.13750343143287172, + "grad_norm": 315.4129943847656, + "learning_rate": 1.9400033100903056e-06, + "loss": 30.9297, + "step": 14526 + }, + { + "epoch": 0.13751289745458675, + "grad_norm": 215.34165954589844, + "learning_rate": 1.939992850079784e-06, + "loss": 15.7578, + "step": 14527 + }, + { + "epoch": 0.1375223634763018, + "grad_norm": 293.5810546875, + "learning_rate": 1.9399823891857298e-06, + "loss": 32.7031, + "step": 14528 + }, + { + "epoch": 0.13753182949801687, + "grad_norm": 3.564134359359741, + "learning_rate": 1.939971927408154e-06, + "loss": 1.0027, + "step": 14529 + }, + { + "epoch": 0.13754129551973193, + "grad_norm": 341.1619873046875, + "learning_rate": 1.939961464747065e-06, + "loss": 21.5156, + "step": 14530 + }, + { + "epoch": 0.13755076154144696, + "grad_norm": 287.5250549316406, + "learning_rate": 1.939951001202473e-06, + "loss": 20.7109, + "step": 14531 + }, + { + "epoch": 0.13756022756316202, + "grad_norm": 194.63169860839844, + "learning_rate": 1.939940536774389e-06, + "loss": 15.5078, + "step": 14532 + }, + { + "epoch": 0.13756969358487708, + "grad_norm": 561.3964233398438, + "learning_rate": 1.9399300714628224e-06, + "loss": 39.8672, + "step": 14533 + }, + { + "epoch": 0.13757915960659214, + "grad_norm": 640.3809204101562, + "learning_rate": 1.9399196052677815e-06, + "loss": 39.3125, + "step": 14534 + }, + { + "epoch": 0.1375886256283072, + "grad_norm": 231.7581329345703, + "learning_rate": 1.939909138189278e-06, + "loss": 13.9688, + "step": 14535 + }, + { + "epoch": 0.13759809165002224, + "grad_norm": 826.9564819335938, + "learning_rate": 1.939898670227321e-06, + "loss": 62.7734, + "step": 14536 + }, + { + "epoch": 0.1376075576717373, + "grad_norm": 479.51385498046875, + "learning_rate": 1.9398882013819205e-06, + "loss": 23.6094, + "step": 14537 + }, + { + "epoch": 0.13761702369345236, + "grad_norm": 430.9534606933594, + "learning_rate": 1.939877731653086e-06, + "loss": 44.2188, + "step": 14538 + }, + { + "epoch": 0.13762648971516742, + "grad_norm": 289.9022216796875, + "learning_rate": 1.939867261040828e-06, + "loss": 17.3906, + "step": 14539 + }, + { + "epoch": 0.13763595573688245, + "grad_norm": 268.0016174316406, + "learning_rate": 1.9398567895451556e-06, + "loss": 17.3281, + "step": 14540 + }, + { + "epoch": 0.1376454217585975, + "grad_norm": 348.9818115234375, + "learning_rate": 1.939846317166079e-06, + "loss": 30.9688, + "step": 14541 + }, + { + "epoch": 0.13765488778031257, + "grad_norm": 248.12713623046875, + "learning_rate": 1.939835843903608e-06, + "loss": 24.9766, + "step": 14542 + }, + { + "epoch": 0.13766435380202763, + "grad_norm": 291.0238037109375, + "learning_rate": 1.9398253697577523e-06, + "loss": 22.8281, + "step": 14543 + }, + { + "epoch": 0.1376738198237427, + "grad_norm": 433.50347900390625, + "learning_rate": 1.939814894728522e-06, + "loss": 46.6172, + "step": 14544 + }, + { + "epoch": 0.13768328584545772, + "grad_norm": 306.7830810546875, + "learning_rate": 1.939804418815927e-06, + "loss": 9.793, + "step": 14545 + }, + { + "epoch": 0.13769275186717278, + "grad_norm": 591.6935424804688, + "learning_rate": 1.9397939420199767e-06, + "loss": 24.7344, + "step": 14546 + }, + { + "epoch": 0.13770221788888784, + "grad_norm": 427.2427978515625, + "learning_rate": 1.9397834643406815e-06, + "loss": 20.4141, + "step": 14547 + }, + { + "epoch": 0.1377116839106029, + "grad_norm": 221.27027893066406, + "learning_rate": 1.9397729857780508e-06, + "loss": 14.2266, + "step": 14548 + }, + { + "epoch": 0.13772114993231793, + "grad_norm": 831.1246948242188, + "learning_rate": 1.9397625063320947e-06, + "loss": 43.9688, + "step": 14549 + }, + { + "epoch": 0.137730615954033, + "grad_norm": 256.3222961425781, + "learning_rate": 1.939752026002823e-06, + "loss": 17.2109, + "step": 14550 + }, + { + "epoch": 0.13774008197574805, + "grad_norm": 520.4042358398438, + "learning_rate": 1.9397415447902455e-06, + "loss": 48.1953, + "step": 14551 + }, + { + "epoch": 0.1377495479974631, + "grad_norm": 486.5082092285156, + "learning_rate": 1.9397310626943723e-06, + "loss": 20.625, + "step": 14552 + }, + { + "epoch": 0.13775901401917817, + "grad_norm": 188.5352325439453, + "learning_rate": 1.9397205797152126e-06, + "loss": 17.5234, + "step": 14553 + }, + { + "epoch": 0.1377684800408932, + "grad_norm": 185.6084747314453, + "learning_rate": 1.939710095852777e-06, + "loss": 19.1289, + "step": 14554 + }, + { + "epoch": 0.13777794606260826, + "grad_norm": 2.90786075592041, + "learning_rate": 1.9396996111070753e-06, + "loss": 0.8301, + "step": 14555 + }, + { + "epoch": 0.13778741208432332, + "grad_norm": 386.15484619140625, + "learning_rate": 1.9396891254781163e-06, + "loss": 27.0625, + "step": 14556 + }, + { + "epoch": 0.13779687810603838, + "grad_norm": 473.3148498535156, + "learning_rate": 1.939678638965911e-06, + "loss": 17.8047, + "step": 14557 + }, + { + "epoch": 0.13780634412775342, + "grad_norm": 3.382570743560791, + "learning_rate": 1.939668151570469e-06, + "loss": 0.9507, + "step": 14558 + }, + { + "epoch": 0.13781581014946848, + "grad_norm": 429.8518371582031, + "learning_rate": 1.9396576632918004e-06, + "loss": 28.2812, + "step": 14559 + }, + { + "epoch": 0.13782527617118354, + "grad_norm": 198.17893981933594, + "learning_rate": 1.9396471741299144e-06, + "loss": 16.4453, + "step": 14560 + }, + { + "epoch": 0.1378347421928986, + "grad_norm": 285.2873229980469, + "learning_rate": 1.939636684084821e-06, + "loss": 17.5312, + "step": 14561 + }, + { + "epoch": 0.13784420821461366, + "grad_norm": 370.8929138183594, + "learning_rate": 1.9396261931565305e-06, + "loss": 55.6719, + "step": 14562 + }, + { + "epoch": 0.1378536742363287, + "grad_norm": 384.3174133300781, + "learning_rate": 1.9396157013450526e-06, + "loss": 20.5078, + "step": 14563 + }, + { + "epoch": 0.13786314025804375, + "grad_norm": 233.0722198486328, + "learning_rate": 1.9396052086503967e-06, + "loss": 34.1289, + "step": 14564 + }, + { + "epoch": 0.1378726062797588, + "grad_norm": 3.177105665206909, + "learning_rate": 1.9395947150725734e-06, + "loss": 0.8604, + "step": 14565 + }, + { + "epoch": 0.13788207230147387, + "grad_norm": 539.3713989257812, + "learning_rate": 1.939584220611592e-06, + "loss": 19.4297, + "step": 14566 + }, + { + "epoch": 0.1378915383231889, + "grad_norm": 432.9978942871094, + "learning_rate": 1.939573725267463e-06, + "loss": 12.8203, + "step": 14567 + }, + { + "epoch": 0.13790100434490396, + "grad_norm": 483.4571838378906, + "learning_rate": 1.939563229040195e-06, + "loss": 26.5352, + "step": 14568 + }, + { + "epoch": 0.13791047036661902, + "grad_norm": 416.947021484375, + "learning_rate": 1.9395527319297993e-06, + "loss": 11.9531, + "step": 14569 + }, + { + "epoch": 0.13791993638833408, + "grad_norm": 287.9900817871094, + "learning_rate": 1.939542233936285e-06, + "loss": 12.2266, + "step": 14570 + }, + { + "epoch": 0.13792940241004914, + "grad_norm": 694.2150268554688, + "learning_rate": 1.939531735059662e-06, + "loss": 44.0, + "step": 14571 + }, + { + "epoch": 0.13793886843176417, + "grad_norm": 330.19573974609375, + "learning_rate": 1.93952123529994e-06, + "loss": 15.0625, + "step": 14572 + }, + { + "epoch": 0.13794833445347923, + "grad_norm": 255.7476348876953, + "learning_rate": 1.9395107346571294e-06, + "loss": 22.8203, + "step": 14573 + }, + { + "epoch": 0.1379578004751943, + "grad_norm": 375.4814147949219, + "learning_rate": 1.9395002331312403e-06, + "loss": 25.5, + "step": 14574 + }, + { + "epoch": 0.13796726649690935, + "grad_norm": 568.2471923828125, + "learning_rate": 1.9394897307222815e-06, + "loss": 35.5078, + "step": 14575 + }, + { + "epoch": 0.13797673251862438, + "grad_norm": 215.6570281982422, + "learning_rate": 1.939479227430264e-06, + "loss": 19.4844, + "step": 14576 + }, + { + "epoch": 0.13798619854033944, + "grad_norm": 415.9028015136719, + "learning_rate": 1.9394687232551966e-06, + "loss": 14.5, + "step": 14577 + }, + { + "epoch": 0.1379956645620545, + "grad_norm": 2.813129186630249, + "learning_rate": 1.9394582181970902e-06, + "loss": 0.8262, + "step": 14578 + }, + { + "epoch": 0.13800513058376956, + "grad_norm": 3.7464773654937744, + "learning_rate": 1.9394477122559537e-06, + "loss": 0.9429, + "step": 14579 + }, + { + "epoch": 0.13801459660548462, + "grad_norm": 527.9346313476562, + "learning_rate": 1.939437205431798e-06, + "loss": 33.3594, + "step": 14580 + }, + { + "epoch": 0.13802406262719966, + "grad_norm": 874.8916625976562, + "learning_rate": 1.9394266977246322e-06, + "loss": 60.7031, + "step": 14581 + }, + { + "epoch": 0.13803352864891472, + "grad_norm": 3.212329626083374, + "learning_rate": 1.9394161891344662e-06, + "loss": 0.7817, + "step": 14582 + }, + { + "epoch": 0.13804299467062978, + "grad_norm": 242.2941131591797, + "learning_rate": 1.9394056796613104e-06, + "loss": 22.5469, + "step": 14583 + }, + { + "epoch": 0.13805246069234484, + "grad_norm": 1113.37353515625, + "learning_rate": 1.9393951693051743e-06, + "loss": 56.1875, + "step": 14584 + }, + { + "epoch": 0.13806192671405987, + "grad_norm": 2.8500778675079346, + "learning_rate": 1.9393846580660678e-06, + "loss": 0.7788, + "step": 14585 + }, + { + "epoch": 0.13807139273577493, + "grad_norm": 731.5200805664062, + "learning_rate": 1.9393741459440006e-06, + "loss": 14.4961, + "step": 14586 + }, + { + "epoch": 0.13808085875749, + "grad_norm": 734.0994262695312, + "learning_rate": 1.9393636329389833e-06, + "loss": 34.0781, + "step": 14587 + }, + { + "epoch": 0.13809032477920505, + "grad_norm": 306.208984375, + "learning_rate": 1.939353119051025e-06, + "loss": 21.2812, + "step": 14588 + }, + { + "epoch": 0.1380997908009201, + "grad_norm": 959.3610229492188, + "learning_rate": 1.939342604280136e-06, + "loss": 60.375, + "step": 14589 + }, + { + "epoch": 0.13810925682263514, + "grad_norm": 360.4083557128906, + "learning_rate": 1.939332088626326e-06, + "loss": 12.8906, + "step": 14590 + }, + { + "epoch": 0.1381187228443502, + "grad_norm": 223.05877685546875, + "learning_rate": 1.9393215720896048e-06, + "loss": 20.2031, + "step": 14591 + }, + { + "epoch": 0.13812818886606526, + "grad_norm": 778.7549438476562, + "learning_rate": 1.9393110546699826e-06, + "loss": 44.7031, + "step": 14592 + }, + { + "epoch": 0.13813765488778032, + "grad_norm": 250.13792419433594, + "learning_rate": 1.939300536367469e-06, + "loss": 12.3984, + "step": 14593 + }, + { + "epoch": 0.13814712090949538, + "grad_norm": 595.5359497070312, + "learning_rate": 1.9392900171820743e-06, + "loss": 51.5938, + "step": 14594 + }, + { + "epoch": 0.1381565869312104, + "grad_norm": 914.6290283203125, + "learning_rate": 1.9392794971138076e-06, + "loss": 61.75, + "step": 14595 + }, + { + "epoch": 0.13816605295292547, + "grad_norm": 726.3569946289062, + "learning_rate": 1.93926897616268e-06, + "loss": 47.9531, + "step": 14596 + }, + { + "epoch": 0.13817551897464053, + "grad_norm": 475.15155029296875, + "learning_rate": 1.9392584543286997e-06, + "loss": 37.2188, + "step": 14597 + }, + { + "epoch": 0.1381849849963556, + "grad_norm": 570.043212890625, + "learning_rate": 1.9392479316118783e-06, + "loss": 50.1875, + "step": 14598 + }, + { + "epoch": 0.13819445101807062, + "grad_norm": 379.0234069824219, + "learning_rate": 1.9392374080122246e-06, + "loss": 12.9219, + "step": 14599 + }, + { + "epoch": 0.13820391703978568, + "grad_norm": 684.9772338867188, + "learning_rate": 1.939226883529749e-06, + "loss": 69.2305, + "step": 14600 + }, + { + "epoch": 0.13821338306150074, + "grad_norm": 3.2398524284362793, + "learning_rate": 1.939216358164461e-06, + "loss": 0.9194, + "step": 14601 + }, + { + "epoch": 0.1382228490832158, + "grad_norm": 206.59017944335938, + "learning_rate": 1.939205831916371e-06, + "loss": 23.1875, + "step": 14602 + }, + { + "epoch": 0.13823231510493086, + "grad_norm": 388.6730041503906, + "learning_rate": 1.9391953047854887e-06, + "loss": 23.625, + "step": 14603 + }, + { + "epoch": 0.1382417811266459, + "grad_norm": 511.4549255371094, + "learning_rate": 1.939184776771824e-06, + "loss": 33.5, + "step": 14604 + }, + { + "epoch": 0.13825124714836096, + "grad_norm": 228.3059844970703, + "learning_rate": 1.939174247875386e-06, + "loss": 17.4688, + "step": 14605 + }, + { + "epoch": 0.13826071317007602, + "grad_norm": 1140.053466796875, + "learning_rate": 1.9391637180961856e-06, + "loss": 44.3047, + "step": 14606 + }, + { + "epoch": 0.13827017919179108, + "grad_norm": 293.0855407714844, + "learning_rate": 1.9391531874342324e-06, + "loss": 37.2656, + "step": 14607 + }, + { + "epoch": 0.1382796452135061, + "grad_norm": 283.7287292480469, + "learning_rate": 1.9391426558895367e-06, + "loss": 24.2344, + "step": 14608 + }, + { + "epoch": 0.13828911123522117, + "grad_norm": 567.024169921875, + "learning_rate": 1.9391321234621077e-06, + "loss": 22.8594, + "step": 14609 + }, + { + "epoch": 0.13829857725693623, + "grad_norm": 393.46478271484375, + "learning_rate": 1.9391215901519556e-06, + "loss": 23.875, + "step": 14610 + }, + { + "epoch": 0.1383080432786513, + "grad_norm": 319.70440673828125, + "learning_rate": 1.9391110559590905e-06, + "loss": 25.7031, + "step": 14611 + }, + { + "epoch": 0.13831750930036635, + "grad_norm": 561.1226196289062, + "learning_rate": 1.9391005208835223e-06, + "loss": 51.0938, + "step": 14612 + }, + { + "epoch": 0.13832697532208138, + "grad_norm": 219.4717559814453, + "learning_rate": 1.93908998492526e-06, + "loss": 12.7422, + "step": 14613 + }, + { + "epoch": 0.13833644134379644, + "grad_norm": 364.7059326171875, + "learning_rate": 1.9390794480843147e-06, + "loss": 17.0039, + "step": 14614 + }, + { + "epoch": 0.1383459073655115, + "grad_norm": 318.0524597167969, + "learning_rate": 1.9390689103606953e-06, + "loss": 30.0, + "step": 14615 + }, + { + "epoch": 0.13835537338722656, + "grad_norm": 183.27444458007812, + "learning_rate": 1.939058371754413e-06, + "loss": 17.9922, + "step": 14616 + }, + { + "epoch": 0.1383648394089416, + "grad_norm": 580.42822265625, + "learning_rate": 1.9390478322654765e-06, + "loss": 38.4844, + "step": 14617 + }, + { + "epoch": 0.13837430543065665, + "grad_norm": 640.0345458984375, + "learning_rate": 1.939037291893896e-06, + "loss": 44.5625, + "step": 14618 + }, + { + "epoch": 0.1383837714523717, + "grad_norm": 287.50872802734375, + "learning_rate": 1.9390267506396816e-06, + "loss": 15.8516, + "step": 14619 + }, + { + "epoch": 0.13839323747408677, + "grad_norm": 329.6753845214844, + "learning_rate": 1.939016208502843e-06, + "loss": 48.0625, + "step": 14620 + }, + { + "epoch": 0.13840270349580183, + "grad_norm": 2.5778005123138428, + "learning_rate": 1.939005665483391e-06, + "loss": 0.8147, + "step": 14621 + }, + { + "epoch": 0.13841216951751686, + "grad_norm": 410.013427734375, + "learning_rate": 1.938995121581334e-06, + "loss": 44.9062, + "step": 14622 + }, + { + "epoch": 0.13842163553923192, + "grad_norm": 817.0358276367188, + "learning_rate": 1.9389845767966827e-06, + "loss": 55.2344, + "step": 14623 + }, + { + "epoch": 0.13843110156094698, + "grad_norm": 291.4938049316406, + "learning_rate": 1.9389740311294473e-06, + "loss": 22.5, + "step": 14624 + }, + { + "epoch": 0.13844056758266204, + "grad_norm": 471.0341491699219, + "learning_rate": 1.938963484579637e-06, + "loss": 19.0781, + "step": 14625 + }, + { + "epoch": 0.13845003360437708, + "grad_norm": 478.44732666015625, + "learning_rate": 1.9389529371472624e-06, + "loss": 23.8906, + "step": 14626 + }, + { + "epoch": 0.13845949962609214, + "grad_norm": 433.5776062011719, + "learning_rate": 1.9389423888323333e-06, + "loss": 14.375, + "step": 14627 + }, + { + "epoch": 0.1384689656478072, + "grad_norm": 799.7786254882812, + "learning_rate": 1.938931839634859e-06, + "loss": 46.3672, + "step": 14628 + }, + { + "epoch": 0.13847843166952226, + "grad_norm": 373.0169982910156, + "learning_rate": 1.9389212895548504e-06, + "loss": 24.7148, + "step": 14629 + }, + { + "epoch": 0.13848789769123732, + "grad_norm": 388.2788391113281, + "learning_rate": 1.9389107385923166e-06, + "loss": 20.6484, + "step": 14630 + }, + { + "epoch": 0.13849736371295235, + "grad_norm": 300.3373107910156, + "learning_rate": 1.9389001867472675e-06, + "loss": 48.0391, + "step": 14631 + }, + { + "epoch": 0.1385068297346674, + "grad_norm": 1285.8031005859375, + "learning_rate": 1.9388896340197135e-06, + "loss": 48.6016, + "step": 14632 + }, + { + "epoch": 0.13851629575638247, + "grad_norm": 588.3613891601562, + "learning_rate": 1.9388790804096646e-06, + "loss": 20.7148, + "step": 14633 + }, + { + "epoch": 0.13852576177809753, + "grad_norm": 364.2294616699219, + "learning_rate": 1.93886852591713e-06, + "loss": 33.5469, + "step": 14634 + }, + { + "epoch": 0.13853522779981256, + "grad_norm": 497.79437255859375, + "learning_rate": 1.9388579705421205e-06, + "loss": 48.3125, + "step": 14635 + }, + { + "epoch": 0.13854469382152762, + "grad_norm": 805.8424682617188, + "learning_rate": 1.9388474142846452e-06, + "loss": 67.5469, + "step": 14636 + }, + { + "epoch": 0.13855415984324268, + "grad_norm": 924.9945068359375, + "learning_rate": 1.9388368571447145e-06, + "loss": 66.0078, + "step": 14637 + }, + { + "epoch": 0.13856362586495774, + "grad_norm": 357.7459411621094, + "learning_rate": 1.9388262991223384e-06, + "loss": 26.5195, + "step": 14638 + }, + { + "epoch": 0.1385730918866728, + "grad_norm": 561.8486328125, + "learning_rate": 1.938815740217527e-06, + "loss": 7.4766, + "step": 14639 + }, + { + "epoch": 0.13858255790838783, + "grad_norm": 554.224853515625, + "learning_rate": 1.938805180430289e-06, + "loss": 47.9375, + "step": 14640 + }, + { + "epoch": 0.1385920239301029, + "grad_norm": 211.40899658203125, + "learning_rate": 1.9387946197606356e-06, + "loss": 10.9746, + "step": 14641 + }, + { + "epoch": 0.13860148995181795, + "grad_norm": 181.70863342285156, + "learning_rate": 1.9387840582085763e-06, + "loss": 22.7188, + "step": 14642 + }, + { + "epoch": 0.138610955973533, + "grad_norm": 211.50318908691406, + "learning_rate": 1.9387734957741213e-06, + "loss": 14.8359, + "step": 14643 + }, + { + "epoch": 0.13862042199524804, + "grad_norm": 317.8036193847656, + "learning_rate": 1.93876293245728e-06, + "loss": 31.7656, + "step": 14644 + }, + { + "epoch": 0.1386298880169631, + "grad_norm": 247.92874145507812, + "learning_rate": 1.9387523682580625e-06, + "loss": 7.9336, + "step": 14645 + }, + { + "epoch": 0.13863935403867816, + "grad_norm": 260.0355529785156, + "learning_rate": 1.938741803176479e-06, + "loss": 19.5859, + "step": 14646 + }, + { + "epoch": 0.13864882006039322, + "grad_norm": 365.44549560546875, + "learning_rate": 1.9387312372125395e-06, + "loss": 37.0625, + "step": 14647 + }, + { + "epoch": 0.13865828608210828, + "grad_norm": 231.75460815429688, + "learning_rate": 1.9387206703662536e-06, + "loss": 18.3984, + "step": 14648 + }, + { + "epoch": 0.13866775210382332, + "grad_norm": 1112.9189453125, + "learning_rate": 1.938710102637631e-06, + "loss": 97.6562, + "step": 14649 + }, + { + "epoch": 0.13867721812553838, + "grad_norm": 567.42919921875, + "learning_rate": 1.9386995340266824e-06, + "loss": 64.0, + "step": 14650 + }, + { + "epoch": 0.13868668414725344, + "grad_norm": 3.5189602375030518, + "learning_rate": 1.9386889645334173e-06, + "loss": 1.0576, + "step": 14651 + }, + { + "epoch": 0.1386961501689685, + "grad_norm": 433.8910827636719, + "learning_rate": 1.9386783941578454e-06, + "loss": 22.0859, + "step": 14652 + }, + { + "epoch": 0.13870561619068353, + "grad_norm": 307.06951904296875, + "learning_rate": 1.9386678228999773e-06, + "loss": 17.7773, + "step": 14653 + }, + { + "epoch": 0.1387150822123986, + "grad_norm": 143.4996795654297, + "learning_rate": 1.938657250759822e-06, + "loss": 15.3516, + "step": 14654 + }, + { + "epoch": 0.13872454823411365, + "grad_norm": 2.637031316757202, + "learning_rate": 1.93864667773739e-06, + "loss": 0.8843, + "step": 14655 + }, + { + "epoch": 0.1387340142558287, + "grad_norm": 415.98773193359375, + "learning_rate": 1.938636103832691e-06, + "loss": 29.6875, + "step": 14656 + }, + { + "epoch": 0.13874348027754377, + "grad_norm": 785.0823974609375, + "learning_rate": 1.9386255290457357e-06, + "loss": 41.0703, + "step": 14657 + }, + { + "epoch": 0.1387529462992588, + "grad_norm": 465.4402770996094, + "learning_rate": 1.938614953376533e-06, + "loss": 32.1719, + "step": 14658 + }, + { + "epoch": 0.13876241232097386, + "grad_norm": 357.15045166015625, + "learning_rate": 1.9386043768250937e-06, + "loss": 20.2969, + "step": 14659 + }, + { + "epoch": 0.13877187834268892, + "grad_norm": 267.90667724609375, + "learning_rate": 1.938593799391427e-06, + "loss": 37.375, + "step": 14660 + }, + { + "epoch": 0.13878134436440398, + "grad_norm": 166.49069213867188, + "learning_rate": 1.9385832210755435e-06, + "loss": 14.3516, + "step": 14661 + }, + { + "epoch": 0.138790810386119, + "grad_norm": 319.9599609375, + "learning_rate": 1.9385726418774526e-06, + "loss": 23.8438, + "step": 14662 + }, + { + "epoch": 0.13880027640783407, + "grad_norm": 480.7591552734375, + "learning_rate": 1.9385620617971646e-06, + "loss": 37.0859, + "step": 14663 + }, + { + "epoch": 0.13880974242954913, + "grad_norm": 772.5506591796875, + "learning_rate": 1.9385514808346893e-06, + "loss": 49.6172, + "step": 14664 + }, + { + "epoch": 0.1388192084512642, + "grad_norm": 554.5753173828125, + "learning_rate": 1.9385408989900367e-06, + "loss": 12.0469, + "step": 14665 + }, + { + "epoch": 0.13882867447297925, + "grad_norm": 894.3859252929688, + "learning_rate": 1.9385303162632164e-06, + "loss": 48.3125, + "step": 14666 + }, + { + "epoch": 0.13883814049469428, + "grad_norm": 414.9085998535156, + "learning_rate": 1.938519732654239e-06, + "loss": 20.8047, + "step": 14667 + }, + { + "epoch": 0.13884760651640934, + "grad_norm": 262.47235107421875, + "learning_rate": 1.938509148163114e-06, + "loss": 24.5781, + "step": 14668 + }, + { + "epoch": 0.1388570725381244, + "grad_norm": 269.55267333984375, + "learning_rate": 1.9384985627898517e-06, + "loss": 16.6797, + "step": 14669 + }, + { + "epoch": 0.13886653855983946, + "grad_norm": 262.482177734375, + "learning_rate": 1.9384879765344614e-06, + "loss": 27.625, + "step": 14670 + }, + { + "epoch": 0.1388760045815545, + "grad_norm": 525.8825073242188, + "learning_rate": 1.9384773893969533e-06, + "loss": 23.2812, + "step": 14671 + }, + { + "epoch": 0.13888547060326956, + "grad_norm": 435.8257141113281, + "learning_rate": 1.9384668013773383e-06, + "loss": 26.9141, + "step": 14672 + }, + { + "epoch": 0.13889493662498462, + "grad_norm": 615.0692749023438, + "learning_rate": 1.938456212475625e-06, + "loss": 36.75, + "step": 14673 + }, + { + "epoch": 0.13890440264669968, + "grad_norm": 321.2135314941406, + "learning_rate": 1.938445622691824e-06, + "loss": 41.6797, + "step": 14674 + }, + { + "epoch": 0.13891386866841474, + "grad_norm": 507.8591003417969, + "learning_rate": 1.938435032025945e-06, + "loss": 45.9922, + "step": 14675 + }, + { + "epoch": 0.13892333469012977, + "grad_norm": 430.25799560546875, + "learning_rate": 1.9384244404779986e-06, + "loss": 45.6719, + "step": 14676 + }, + { + "epoch": 0.13893280071184483, + "grad_norm": 221.51779174804688, + "learning_rate": 1.9384138480479935e-06, + "loss": 9.125, + "step": 14677 + }, + { + "epoch": 0.1389422667335599, + "grad_norm": 240.30027770996094, + "learning_rate": 1.938403254735941e-06, + "loss": 29.2266, + "step": 14678 + }, + { + "epoch": 0.13895173275527495, + "grad_norm": 743.7542114257812, + "learning_rate": 1.9383926605418504e-06, + "loss": 13.3359, + "step": 14679 + }, + { + "epoch": 0.13896119877699, + "grad_norm": 788.7483520507812, + "learning_rate": 1.9383820654657317e-06, + "loss": 50.6562, + "step": 14680 + }, + { + "epoch": 0.13897066479870504, + "grad_norm": 615.8445434570312, + "learning_rate": 1.938371469507595e-06, + "loss": 47.2812, + "step": 14681 + }, + { + "epoch": 0.1389801308204201, + "grad_norm": 295.3322448730469, + "learning_rate": 1.93836087266745e-06, + "loss": 7.7129, + "step": 14682 + }, + { + "epoch": 0.13898959684213516, + "grad_norm": 3.525876760482788, + "learning_rate": 1.9383502749453067e-06, + "loss": 0.9104, + "step": 14683 + }, + { + "epoch": 0.13899906286385022, + "grad_norm": 367.0206604003906, + "learning_rate": 1.9383396763411756e-06, + "loss": 29.8906, + "step": 14684 + }, + { + "epoch": 0.13900852888556525, + "grad_norm": 617.1051635742188, + "learning_rate": 1.938329076855066e-06, + "loss": 62.9766, + "step": 14685 + }, + { + "epoch": 0.1390179949072803, + "grad_norm": 3.014101505279541, + "learning_rate": 1.9383184764869883e-06, + "loss": 0.9424, + "step": 14686 + }, + { + "epoch": 0.13902746092899537, + "grad_norm": 3.2322189807891846, + "learning_rate": 1.938307875236952e-06, + "loss": 0.9316, + "step": 14687 + }, + { + "epoch": 0.13903692695071043, + "grad_norm": 866.3662109375, + "learning_rate": 1.938297273104967e-06, + "loss": 35.8828, + "step": 14688 + }, + { + "epoch": 0.1390463929724255, + "grad_norm": 405.4069519042969, + "learning_rate": 1.9382866700910445e-06, + "loss": 9.8438, + "step": 14689 + }, + { + "epoch": 0.13905585899414052, + "grad_norm": 164.38766479492188, + "learning_rate": 1.938276066195193e-06, + "loss": 16.1953, + "step": 14690 + }, + { + "epoch": 0.13906532501585558, + "grad_norm": 517.9735107421875, + "learning_rate": 1.938265461417423e-06, + "loss": 27.8125, + "step": 14691 + }, + { + "epoch": 0.13907479103757064, + "grad_norm": 2.874856472015381, + "learning_rate": 1.938254855757745e-06, + "loss": 0.9194, + "step": 14692 + }, + { + "epoch": 0.1390842570592857, + "grad_norm": 819.78125, + "learning_rate": 1.938244249216168e-06, + "loss": 49.8672, + "step": 14693 + }, + { + "epoch": 0.13909372308100074, + "grad_norm": 231.9407501220703, + "learning_rate": 1.938233641792703e-06, + "loss": 21.5312, + "step": 14694 + }, + { + "epoch": 0.1391031891027158, + "grad_norm": 700.6179809570312, + "learning_rate": 1.938223033487359e-06, + "loss": 28.8359, + "step": 14695 + }, + { + "epoch": 0.13911265512443086, + "grad_norm": 493.6686096191406, + "learning_rate": 1.9382124243001462e-06, + "loss": 33.9141, + "step": 14696 + }, + { + "epoch": 0.13912212114614592, + "grad_norm": 365.427734375, + "learning_rate": 1.9382018142310748e-06, + "loss": 26.4531, + "step": 14697 + }, + { + "epoch": 0.13913158716786098, + "grad_norm": 323.0664978027344, + "learning_rate": 1.938191203280155e-06, + "loss": 22.6016, + "step": 14698 + }, + { + "epoch": 0.139141053189576, + "grad_norm": 213.05152893066406, + "learning_rate": 1.9381805914473964e-06, + "loss": 24.3906, + "step": 14699 + }, + { + "epoch": 0.13915051921129107, + "grad_norm": 428.69476318359375, + "learning_rate": 1.938169978732809e-06, + "loss": 17.6016, + "step": 14700 + }, + { + "epoch": 0.13915998523300613, + "grad_norm": 189.87913513183594, + "learning_rate": 1.938159365136403e-06, + "loss": 19.0781, + "step": 14701 + }, + { + "epoch": 0.1391694512547212, + "grad_norm": 287.02996826171875, + "learning_rate": 1.938148750658188e-06, + "loss": 20.9688, + "step": 14702 + }, + { + "epoch": 0.13917891727643622, + "grad_norm": 303.7251281738281, + "learning_rate": 1.9381381352981743e-06, + "loss": 39.5156, + "step": 14703 + }, + { + "epoch": 0.13918838329815128, + "grad_norm": 746.0884399414062, + "learning_rate": 1.938127519056372e-06, + "loss": 47.5, + "step": 14704 + }, + { + "epoch": 0.13919784931986634, + "grad_norm": 364.38238525390625, + "learning_rate": 1.9381169019327907e-06, + "loss": 35.2656, + "step": 14705 + }, + { + "epoch": 0.1392073153415814, + "grad_norm": 496.6123046875, + "learning_rate": 1.9381062839274407e-06, + "loss": 20.2031, + "step": 14706 + }, + { + "epoch": 0.13921678136329646, + "grad_norm": 317.5548400878906, + "learning_rate": 1.9380956650403314e-06, + "loss": 8.5703, + "step": 14707 + }, + { + "epoch": 0.1392262473850115, + "grad_norm": 287.4568786621094, + "learning_rate": 1.938085045271473e-06, + "loss": 16.2773, + "step": 14708 + }, + { + "epoch": 0.13923571340672655, + "grad_norm": 1182.2938232421875, + "learning_rate": 1.9380744246208765e-06, + "loss": 26.8281, + "step": 14709 + }, + { + "epoch": 0.1392451794284416, + "grad_norm": 2.8970727920532227, + "learning_rate": 1.9380638030885507e-06, + "loss": 0.8994, + "step": 14710 + }, + { + "epoch": 0.13925464545015667, + "grad_norm": 463.2901916503906, + "learning_rate": 1.938053180674506e-06, + "loss": 51.8281, + "step": 14711 + }, + { + "epoch": 0.1392641114718717, + "grad_norm": 295.49725341796875, + "learning_rate": 1.938042557378752e-06, + "loss": 22.0938, + "step": 14712 + }, + { + "epoch": 0.13927357749358676, + "grad_norm": 235.4300537109375, + "learning_rate": 1.9380319332012995e-06, + "loss": 12.9375, + "step": 14713 + }, + { + "epoch": 0.13928304351530182, + "grad_norm": 234.47129821777344, + "learning_rate": 1.938021308142158e-06, + "loss": 20.9062, + "step": 14714 + }, + { + "epoch": 0.13929250953701688, + "grad_norm": 313.0304260253906, + "learning_rate": 1.938010682201337e-06, + "loss": 32.6094, + "step": 14715 + }, + { + "epoch": 0.13930197555873194, + "grad_norm": 360.6496276855469, + "learning_rate": 1.9380000553788476e-06, + "loss": 22.2188, + "step": 14716 + }, + { + "epoch": 0.13931144158044698, + "grad_norm": 227.46578979492188, + "learning_rate": 1.937989427674699e-06, + "loss": 20.9883, + "step": 14717 + }, + { + "epoch": 0.13932090760216204, + "grad_norm": 242.79908752441406, + "learning_rate": 1.937978799088901e-06, + "loss": 20.5703, + "step": 14718 + }, + { + "epoch": 0.1393303736238771, + "grad_norm": 349.3076171875, + "learning_rate": 1.9379681696214642e-06, + "loss": 35.8281, + "step": 14719 + }, + { + "epoch": 0.13933983964559216, + "grad_norm": 342.6841125488281, + "learning_rate": 1.9379575392723983e-06, + "loss": 23.0312, + "step": 14720 + }, + { + "epoch": 0.1393493056673072, + "grad_norm": 885.8406372070312, + "learning_rate": 1.937946908041713e-06, + "loss": 31.7891, + "step": 14721 + }, + { + "epoch": 0.13935877168902225, + "grad_norm": 466.5111083984375, + "learning_rate": 1.937936275929419e-06, + "loss": 12.2734, + "step": 14722 + }, + { + "epoch": 0.1393682377107373, + "grad_norm": 272.4049377441406, + "learning_rate": 1.937925642935526e-06, + "loss": 17.6562, + "step": 14723 + }, + { + "epoch": 0.13937770373245237, + "grad_norm": 513.0679931640625, + "learning_rate": 1.9379150090600434e-06, + "loss": 16.6719, + "step": 14724 + }, + { + "epoch": 0.13938716975416743, + "grad_norm": 881.9497680664062, + "learning_rate": 1.9379043743029825e-06, + "loss": 57.4336, + "step": 14725 + }, + { + "epoch": 0.13939663577588246, + "grad_norm": 479.6487731933594, + "learning_rate": 1.9378937386643517e-06, + "loss": 46.2344, + "step": 14726 + }, + { + "epoch": 0.13940610179759752, + "grad_norm": 384.3031921386719, + "learning_rate": 1.9378831021441625e-06, + "loss": 56.3906, + "step": 14727 + }, + { + "epoch": 0.13941556781931258, + "grad_norm": 342.2161560058594, + "learning_rate": 1.9378724647424237e-06, + "loss": 39.7812, + "step": 14728 + }, + { + "epoch": 0.13942503384102764, + "grad_norm": 322.2275390625, + "learning_rate": 1.9378618264591464e-06, + "loss": 14.6758, + "step": 14729 + }, + { + "epoch": 0.13943449986274267, + "grad_norm": 421.5139465332031, + "learning_rate": 1.9378511872943395e-06, + "loss": 29.6016, + "step": 14730 + }, + { + "epoch": 0.13944396588445773, + "grad_norm": 207.29180908203125, + "learning_rate": 1.937840547248013e-06, + "loss": 17.9531, + "step": 14731 + }, + { + "epoch": 0.1394534319061728, + "grad_norm": 684.3483276367188, + "learning_rate": 1.937829906320178e-06, + "loss": 68.3828, + "step": 14732 + }, + { + "epoch": 0.13946289792788785, + "grad_norm": 311.2198791503906, + "learning_rate": 1.937819264510844e-06, + "loss": 9.918, + "step": 14733 + }, + { + "epoch": 0.1394723639496029, + "grad_norm": 427.2634582519531, + "learning_rate": 1.937808621820021e-06, + "loss": 46.4062, + "step": 14734 + }, + { + "epoch": 0.13948182997131794, + "grad_norm": 437.158203125, + "learning_rate": 1.9377979782477186e-06, + "loss": 34.4219, + "step": 14735 + }, + { + "epoch": 0.139491295993033, + "grad_norm": 305.1331481933594, + "learning_rate": 1.9377873337939473e-06, + "loss": 21.6406, + "step": 14736 + }, + { + "epoch": 0.13950076201474806, + "grad_norm": 590.8109130859375, + "learning_rate": 1.9377766884587167e-06, + "loss": 32.4297, + "step": 14737 + }, + { + "epoch": 0.13951022803646312, + "grad_norm": 780.8865356445312, + "learning_rate": 1.937766042242037e-06, + "loss": 38.2656, + "step": 14738 + }, + { + "epoch": 0.13951969405817816, + "grad_norm": 362.9554138183594, + "learning_rate": 1.9377553951439185e-06, + "loss": 24.1953, + "step": 14739 + }, + { + "epoch": 0.13952916007989322, + "grad_norm": 197.2570037841797, + "learning_rate": 1.9377447471643704e-06, + "loss": 17.2969, + "step": 14740 + }, + { + "epoch": 0.13953862610160828, + "grad_norm": 603.64599609375, + "learning_rate": 1.9377340983034037e-06, + "loss": 34.0938, + "step": 14741 + }, + { + "epoch": 0.13954809212332334, + "grad_norm": 1074.408203125, + "learning_rate": 1.937723448561028e-06, + "loss": 47.9688, + "step": 14742 + }, + { + "epoch": 0.1395575581450384, + "grad_norm": 468.36724853515625, + "learning_rate": 1.9377127979372532e-06, + "loss": 47.5156, + "step": 14743 + }, + { + "epoch": 0.13956702416675343, + "grad_norm": 265.2091064453125, + "learning_rate": 1.9377021464320893e-06, + "loss": 17.7109, + "step": 14744 + }, + { + "epoch": 0.1395764901884685, + "grad_norm": 331.79412841796875, + "learning_rate": 1.937691494045546e-06, + "loss": 35.2422, + "step": 14745 + }, + { + "epoch": 0.13958595621018355, + "grad_norm": 374.520751953125, + "learning_rate": 1.9376808407776343e-06, + "loss": 26.1641, + "step": 14746 + }, + { + "epoch": 0.1395954222318986, + "grad_norm": 1232.1910400390625, + "learning_rate": 1.9376701866283635e-06, + "loss": 18.4766, + "step": 14747 + }, + { + "epoch": 0.13960488825361364, + "grad_norm": 318.7411804199219, + "learning_rate": 1.9376595315977435e-06, + "loss": 20.3984, + "step": 14748 + }, + { + "epoch": 0.1396143542753287, + "grad_norm": 541.0393676757812, + "learning_rate": 1.937648875685785e-06, + "loss": 41.5312, + "step": 14749 + }, + { + "epoch": 0.13962382029704376, + "grad_norm": 440.8999938964844, + "learning_rate": 1.937638218892497e-06, + "loss": 25.2812, + "step": 14750 + }, + { + "epoch": 0.13963328631875882, + "grad_norm": 532.0723876953125, + "learning_rate": 1.93762756121789e-06, + "loss": 10.7539, + "step": 14751 + }, + { + "epoch": 0.13964275234047388, + "grad_norm": 242.78932189941406, + "learning_rate": 1.9376169026619745e-06, + "loss": 22.2812, + "step": 14752 + }, + { + "epoch": 0.1396522183621889, + "grad_norm": 302.1191101074219, + "learning_rate": 1.93760624322476e-06, + "loss": 27.3984, + "step": 14753 + }, + { + "epoch": 0.13966168438390397, + "grad_norm": 753.4466552734375, + "learning_rate": 1.937595582906257e-06, + "loss": 27.4688, + "step": 14754 + }, + { + "epoch": 0.13967115040561903, + "grad_norm": 186.31051635742188, + "learning_rate": 1.9375849217064743e-06, + "loss": 21.3906, + "step": 14755 + }, + { + "epoch": 0.1396806164273341, + "grad_norm": 326.7740173339844, + "learning_rate": 1.9375742596254235e-06, + "loss": 29.7422, + "step": 14756 + }, + { + "epoch": 0.13969008244904912, + "grad_norm": 639.6425170898438, + "learning_rate": 1.9375635966631133e-06, + "loss": 24.4922, + "step": 14757 + }, + { + "epoch": 0.13969954847076418, + "grad_norm": 690.0364990234375, + "learning_rate": 1.9375529328195546e-06, + "loss": 53.9375, + "step": 14758 + }, + { + "epoch": 0.13970901449247924, + "grad_norm": 471.1467590332031, + "learning_rate": 1.9375422680947573e-06, + "loss": 25.1172, + "step": 14759 + }, + { + "epoch": 0.1397184805141943, + "grad_norm": 659.37255859375, + "learning_rate": 1.937531602488731e-06, + "loss": 49.4219, + "step": 14760 + }, + { + "epoch": 0.13972794653590936, + "grad_norm": 192.2098846435547, + "learning_rate": 1.9375209360014863e-06, + "loss": 15.4453, + "step": 14761 + }, + { + "epoch": 0.1397374125576244, + "grad_norm": 296.0223388671875, + "learning_rate": 1.9375102686330327e-06, + "loss": 20.2617, + "step": 14762 + }, + { + "epoch": 0.13974687857933946, + "grad_norm": 191.3737030029297, + "learning_rate": 1.93749960038338e-06, + "loss": 17.625, + "step": 14763 + }, + { + "epoch": 0.13975634460105452, + "grad_norm": 396.9471740722656, + "learning_rate": 1.9374889312525393e-06, + "loss": 31.8828, + "step": 14764 + }, + { + "epoch": 0.13976581062276958, + "grad_norm": 460.6831970214844, + "learning_rate": 1.93747826124052e-06, + "loss": 25.1797, + "step": 14765 + }, + { + "epoch": 0.13977527664448464, + "grad_norm": 328.3064880371094, + "learning_rate": 1.937467590347332e-06, + "loss": 20.0234, + "step": 14766 + }, + { + "epoch": 0.13978474266619967, + "grad_norm": 495.137939453125, + "learning_rate": 1.937456918572985e-06, + "loss": 28.4141, + "step": 14767 + }, + { + "epoch": 0.13979420868791473, + "grad_norm": 231.49789428710938, + "learning_rate": 1.9374462459174902e-06, + "loss": 19.3516, + "step": 14768 + }, + { + "epoch": 0.1398036747096298, + "grad_norm": 534.6295166015625, + "learning_rate": 1.9374355723808562e-06, + "loss": 27.1016, + "step": 14769 + }, + { + "epoch": 0.13981314073134485, + "grad_norm": 313.2982482910156, + "learning_rate": 1.937424897963094e-06, + "loss": 16.9219, + "step": 14770 + }, + { + "epoch": 0.13982260675305988, + "grad_norm": 290.97271728515625, + "learning_rate": 1.937414222664214e-06, + "loss": 20.9844, + "step": 14771 + }, + { + "epoch": 0.13983207277477494, + "grad_norm": 2.9243922233581543, + "learning_rate": 1.937403546484225e-06, + "loss": 0.7751, + "step": 14772 + }, + { + "epoch": 0.13984153879649, + "grad_norm": 3.2363650798797607, + "learning_rate": 1.9373928694231376e-06, + "loss": 0.9321, + "step": 14773 + }, + { + "epoch": 0.13985100481820506, + "grad_norm": 595.9929809570312, + "learning_rate": 1.937382191480962e-06, + "loss": 53.75, + "step": 14774 + }, + { + "epoch": 0.13986047083992012, + "grad_norm": 606.3397827148438, + "learning_rate": 1.9373715126577082e-06, + "loss": 24.2031, + "step": 14775 + }, + { + "epoch": 0.13986993686163515, + "grad_norm": 258.60443115234375, + "learning_rate": 1.937360832953386e-06, + "loss": 21.4688, + "step": 14776 + }, + { + "epoch": 0.1398794028833502, + "grad_norm": 238.96742248535156, + "learning_rate": 1.9373501523680056e-06, + "loss": 19.3672, + "step": 14777 + }, + { + "epoch": 0.13988886890506527, + "grad_norm": 191.65463256835938, + "learning_rate": 1.9373394709015775e-06, + "loss": 27.8125, + "step": 14778 + }, + { + "epoch": 0.13989833492678033, + "grad_norm": 306.83099365234375, + "learning_rate": 1.9373287885541106e-06, + "loss": 38.5312, + "step": 14779 + }, + { + "epoch": 0.13990780094849536, + "grad_norm": 591.3590698242188, + "learning_rate": 1.937318105325616e-06, + "loss": 52.2031, + "step": 14780 + }, + { + "epoch": 0.13991726697021042, + "grad_norm": 266.3226013183594, + "learning_rate": 1.9373074212161033e-06, + "loss": 29.7266, + "step": 14781 + }, + { + "epoch": 0.13992673299192548, + "grad_norm": 3.179981231689453, + "learning_rate": 1.9372967362255827e-06, + "loss": 1.064, + "step": 14782 + }, + { + "epoch": 0.13993619901364054, + "grad_norm": 511.89898681640625, + "learning_rate": 1.937286050354064e-06, + "loss": 27.3281, + "step": 14783 + }, + { + "epoch": 0.1399456650353556, + "grad_norm": 257.7176818847656, + "learning_rate": 1.9372753636015575e-06, + "loss": 30.4336, + "step": 14784 + }, + { + "epoch": 0.13995513105707064, + "grad_norm": 388.4053649902344, + "learning_rate": 1.937264675968073e-06, + "loss": 42.5312, + "step": 14785 + }, + { + "epoch": 0.1399645970787857, + "grad_norm": 314.0517578125, + "learning_rate": 1.9372539874536206e-06, + "loss": 35.4766, + "step": 14786 + }, + { + "epoch": 0.13997406310050076, + "grad_norm": 713.219482421875, + "learning_rate": 1.9372432980582106e-06, + "loss": 53.9453, + "step": 14787 + }, + { + "epoch": 0.13998352912221582, + "grad_norm": 368.81512451171875, + "learning_rate": 1.937232607781853e-06, + "loss": 21.5859, + "step": 14788 + }, + { + "epoch": 0.13999299514393085, + "grad_norm": 3.103353261947632, + "learning_rate": 1.9372219166245578e-06, + "loss": 0.9585, + "step": 14789 + }, + { + "epoch": 0.1400024611656459, + "grad_norm": 383.419677734375, + "learning_rate": 1.9372112245863343e-06, + "loss": 26.6641, + "step": 14790 + }, + { + "epoch": 0.14001192718736097, + "grad_norm": 282.3758544921875, + "learning_rate": 1.9372005316671936e-06, + "loss": 12.2656, + "step": 14791 + }, + { + "epoch": 0.14002139320907603, + "grad_norm": 234.12095642089844, + "learning_rate": 1.9371898378671455e-06, + "loss": 25.3906, + "step": 14792 + }, + { + "epoch": 0.1400308592307911, + "grad_norm": 373.3013000488281, + "learning_rate": 1.9371791431862e-06, + "loss": 21.0859, + "step": 14793 + }, + { + "epoch": 0.14004032525250612, + "grad_norm": 257.5171203613281, + "learning_rate": 1.937168447624367e-06, + "loss": 15.8633, + "step": 14794 + }, + { + "epoch": 0.14004979127422118, + "grad_norm": 487.8263854980469, + "learning_rate": 1.9371577511816564e-06, + "loss": 55.4688, + "step": 14795 + }, + { + "epoch": 0.14005925729593624, + "grad_norm": 219.743408203125, + "learning_rate": 1.9371470538580786e-06, + "loss": 8.1406, + "step": 14796 + }, + { + "epoch": 0.1400687233176513, + "grad_norm": 716.4696655273438, + "learning_rate": 1.9371363556536434e-06, + "loss": 48.1484, + "step": 14797 + }, + { + "epoch": 0.14007818933936633, + "grad_norm": 305.5185241699219, + "learning_rate": 1.937125656568361e-06, + "loss": 25.957, + "step": 14798 + }, + { + "epoch": 0.1400876553610814, + "grad_norm": 421.0831604003906, + "learning_rate": 1.937114956602242e-06, + "loss": 34.4219, + "step": 14799 + }, + { + "epoch": 0.14009712138279645, + "grad_norm": 297.7588806152344, + "learning_rate": 1.9371042557552954e-06, + "loss": 17.875, + "step": 14800 + }, + { + "epoch": 0.1401065874045115, + "grad_norm": 202.34754943847656, + "learning_rate": 1.9370935540275317e-06, + "loss": 17.625, + "step": 14801 + }, + { + "epoch": 0.14011605342622657, + "grad_norm": 366.3656311035156, + "learning_rate": 1.9370828514189613e-06, + "loss": 22.8203, + "step": 14802 + }, + { + "epoch": 0.1401255194479416, + "grad_norm": 3.9386703968048096, + "learning_rate": 1.9370721479295936e-06, + "loss": 1.0179, + "step": 14803 + }, + { + "epoch": 0.14013498546965666, + "grad_norm": 380.4600524902344, + "learning_rate": 1.9370614435594395e-06, + "loss": 26.1953, + "step": 14804 + }, + { + "epoch": 0.14014445149137172, + "grad_norm": 250.21963500976562, + "learning_rate": 1.9370507383085083e-06, + "loss": 21.7109, + "step": 14805 + }, + { + "epoch": 0.14015391751308678, + "grad_norm": 602.987548828125, + "learning_rate": 1.9370400321768107e-06, + "loss": 33.2812, + "step": 14806 + }, + { + "epoch": 0.14016338353480182, + "grad_norm": 408.9290771484375, + "learning_rate": 1.937029325164356e-06, + "loss": 35.8594, + "step": 14807 + }, + { + "epoch": 0.14017284955651688, + "grad_norm": 259.8009948730469, + "learning_rate": 1.937018617271155e-06, + "loss": 15.7812, + "step": 14808 + }, + { + "epoch": 0.14018231557823194, + "grad_norm": 285.5356140136719, + "learning_rate": 1.9370079084972174e-06, + "loss": 17.5977, + "step": 14809 + }, + { + "epoch": 0.140191781599947, + "grad_norm": 306.8110046386719, + "learning_rate": 1.936997198842553e-06, + "loss": 19.7891, + "step": 14810 + }, + { + "epoch": 0.14020124762166206, + "grad_norm": 288.61083984375, + "learning_rate": 1.9369864883071726e-06, + "loss": 33.1211, + "step": 14811 + }, + { + "epoch": 0.1402107136433771, + "grad_norm": 168.44198608398438, + "learning_rate": 1.9369757768910855e-06, + "loss": 25.2891, + "step": 14812 + }, + { + "epoch": 0.14022017966509215, + "grad_norm": 517.4474487304688, + "learning_rate": 1.9369650645943026e-06, + "loss": 49.7344, + "step": 14813 + }, + { + "epoch": 0.1402296456868072, + "grad_norm": 499.2262878417969, + "learning_rate": 1.936954351416833e-06, + "loss": 53.8906, + "step": 14814 + }, + { + "epoch": 0.14023911170852227, + "grad_norm": 811.6121215820312, + "learning_rate": 1.9369436373586874e-06, + "loss": 61.1875, + "step": 14815 + }, + { + "epoch": 0.1402485777302373, + "grad_norm": 241.40069580078125, + "learning_rate": 1.936932922419876e-06, + "loss": 21.4531, + "step": 14816 + }, + { + "epoch": 0.14025804375195236, + "grad_norm": 550.1591186523438, + "learning_rate": 1.936922206600408e-06, + "loss": 30.7969, + "step": 14817 + }, + { + "epoch": 0.14026750977366742, + "grad_norm": 319.7801513671875, + "learning_rate": 1.936911489900295e-06, + "loss": 27.1172, + "step": 14818 + }, + { + "epoch": 0.14027697579538248, + "grad_norm": 435.9626159667969, + "learning_rate": 1.9369007723195456e-06, + "loss": 41.7188, + "step": 14819 + }, + { + "epoch": 0.14028644181709754, + "grad_norm": 419.075439453125, + "learning_rate": 1.9368900538581704e-06, + "loss": 36.1875, + "step": 14820 + }, + { + "epoch": 0.14029590783881257, + "grad_norm": 3.2273290157318115, + "learning_rate": 1.9368793345161796e-06, + "loss": 0.958, + "step": 14821 + }, + { + "epoch": 0.14030537386052763, + "grad_norm": 855.9342651367188, + "learning_rate": 1.9368686142935832e-06, + "loss": 50.1641, + "step": 14822 + }, + { + "epoch": 0.1403148398822427, + "grad_norm": 399.515380859375, + "learning_rate": 1.936857893190391e-06, + "loss": 34.0625, + "step": 14823 + }, + { + "epoch": 0.14032430590395775, + "grad_norm": 3.0411877632141113, + "learning_rate": 1.9368471712066133e-06, + "loss": 0.9434, + "step": 14824 + }, + { + "epoch": 0.14033377192567278, + "grad_norm": 467.5329895019531, + "learning_rate": 1.9368364483422605e-06, + "loss": 26.1953, + "step": 14825 + }, + { + "epoch": 0.14034323794738784, + "grad_norm": 1178.7945556640625, + "learning_rate": 1.9368257245973424e-06, + "loss": 16.2031, + "step": 14826 + }, + { + "epoch": 0.1403527039691029, + "grad_norm": 358.08551025390625, + "learning_rate": 1.936814999971869e-06, + "loss": 23.2812, + "step": 14827 + }, + { + "epoch": 0.14036216999081796, + "grad_norm": 452.68731689453125, + "learning_rate": 1.93680427446585e-06, + "loss": 37.0703, + "step": 14828 + }, + { + "epoch": 0.14037163601253302, + "grad_norm": 728.7552490234375, + "learning_rate": 1.936793548079297e-06, + "loss": 37.9375, + "step": 14829 + }, + { + "epoch": 0.14038110203424806, + "grad_norm": 195.11766052246094, + "learning_rate": 1.936782820812218e-06, + "loss": 19.1094, + "step": 14830 + }, + { + "epoch": 0.14039056805596312, + "grad_norm": 839.2469482421875, + "learning_rate": 1.9367720926646246e-06, + "loss": 51.2344, + "step": 14831 + }, + { + "epoch": 0.14040003407767818, + "grad_norm": 275.2636413574219, + "learning_rate": 1.936761363636526e-06, + "loss": 15.9766, + "step": 14832 + }, + { + "epoch": 0.14040950009939324, + "grad_norm": 426.24530029296875, + "learning_rate": 1.936750633727933e-06, + "loss": 23.5078, + "step": 14833 + }, + { + "epoch": 0.14041896612110827, + "grad_norm": 514.3822631835938, + "learning_rate": 1.9367399029388553e-06, + "loss": 17.793, + "step": 14834 + }, + { + "epoch": 0.14042843214282333, + "grad_norm": 383.7377624511719, + "learning_rate": 1.9367291712693028e-06, + "loss": 23.7266, + "step": 14835 + }, + { + "epoch": 0.1404378981645384, + "grad_norm": 354.6600036621094, + "learning_rate": 1.936718438719286e-06, + "loss": 64.3594, + "step": 14836 + }, + { + "epoch": 0.14044736418625345, + "grad_norm": 343.0989074707031, + "learning_rate": 1.936707705288815e-06, + "loss": 35.0625, + "step": 14837 + }, + { + "epoch": 0.1404568302079685, + "grad_norm": 567.6829833984375, + "learning_rate": 1.9366969709778994e-06, + "loss": 41.3516, + "step": 14838 + }, + { + "epoch": 0.14046629622968354, + "grad_norm": 286.7606201171875, + "learning_rate": 1.9366862357865495e-06, + "loss": 19.6875, + "step": 14839 + }, + { + "epoch": 0.1404757622513986, + "grad_norm": 160.978759765625, + "learning_rate": 1.9366754997147757e-06, + "loss": 16.3672, + "step": 14840 + }, + { + "epoch": 0.14048522827311366, + "grad_norm": 3.7228291034698486, + "learning_rate": 1.9366647627625875e-06, + "loss": 1.2271, + "step": 14841 + }, + { + "epoch": 0.14049469429482872, + "grad_norm": 572.9854125976562, + "learning_rate": 1.9366540249299957e-06, + "loss": 50.8906, + "step": 14842 + }, + { + "epoch": 0.14050416031654375, + "grad_norm": 884.3321533203125, + "learning_rate": 1.93664328621701e-06, + "loss": 37.5625, + "step": 14843 + }, + { + "epoch": 0.1405136263382588, + "grad_norm": 693.8450317382812, + "learning_rate": 1.9366325466236406e-06, + "loss": 50.4219, + "step": 14844 + }, + { + "epoch": 0.14052309235997387, + "grad_norm": 162.1572723388672, + "learning_rate": 1.9366218061498976e-06, + "loss": 17.8516, + "step": 14845 + }, + { + "epoch": 0.14053255838168893, + "grad_norm": 246.1961669921875, + "learning_rate": 1.936611064795791e-06, + "loss": 16.7656, + "step": 14846 + }, + { + "epoch": 0.140542024403404, + "grad_norm": 334.9472961425781, + "learning_rate": 1.9366003225613308e-06, + "loss": 19.5312, + "step": 14847 + }, + { + "epoch": 0.14055149042511902, + "grad_norm": 192.149169921875, + "learning_rate": 1.936589579446527e-06, + "loss": 7.1348, + "step": 14848 + }, + { + "epoch": 0.14056095644683408, + "grad_norm": 306.6083679199219, + "learning_rate": 1.9365788354513904e-06, + "loss": 13.0039, + "step": 14849 + }, + { + "epoch": 0.14057042246854914, + "grad_norm": 502.3069152832031, + "learning_rate": 1.9365680905759306e-06, + "loss": 54.25, + "step": 14850 + }, + { + "epoch": 0.1405798884902642, + "grad_norm": 529.5167236328125, + "learning_rate": 1.9365573448201575e-06, + "loss": 46.1406, + "step": 14851 + }, + { + "epoch": 0.14058935451197926, + "grad_norm": 225.75418090820312, + "learning_rate": 1.9365465981840813e-06, + "loss": 21.1328, + "step": 14852 + }, + { + "epoch": 0.1405988205336943, + "grad_norm": 264.4288024902344, + "learning_rate": 1.9365358506677125e-06, + "loss": 19.4062, + "step": 14853 + }, + { + "epoch": 0.14060828655540936, + "grad_norm": 1039.6920166015625, + "learning_rate": 1.9365251022710605e-06, + "loss": 64.1016, + "step": 14854 + }, + { + "epoch": 0.14061775257712442, + "grad_norm": 310.14556884765625, + "learning_rate": 1.9365143529941363e-06, + "loss": 24.7734, + "step": 14855 + }, + { + "epoch": 0.14062721859883948, + "grad_norm": 362.9111328125, + "learning_rate": 1.936503602836949e-06, + "loss": 37.0469, + "step": 14856 + }, + { + "epoch": 0.1406366846205545, + "grad_norm": 763.2771606445312, + "learning_rate": 1.9364928517995098e-06, + "loss": 10.6367, + "step": 14857 + }, + { + "epoch": 0.14064615064226957, + "grad_norm": 245.13009643554688, + "learning_rate": 1.936482099881828e-06, + "loss": 21.7344, + "step": 14858 + }, + { + "epoch": 0.14065561666398463, + "grad_norm": 3.131619930267334, + "learning_rate": 1.9364713470839142e-06, + "loss": 0.9819, + "step": 14859 + }, + { + "epoch": 0.1406650826856997, + "grad_norm": 360.0959777832031, + "learning_rate": 1.936460593405778e-06, + "loss": 28.0703, + "step": 14860 + }, + { + "epoch": 0.14067454870741475, + "grad_norm": 434.6404724121094, + "learning_rate": 1.9364498388474297e-06, + "loss": 26.9414, + "step": 14861 + }, + { + "epoch": 0.14068401472912978, + "grad_norm": 2.6577236652374268, + "learning_rate": 1.9364390834088795e-06, + "loss": 0.79, + "step": 14862 + }, + { + "epoch": 0.14069348075084484, + "grad_norm": 513.5977172851562, + "learning_rate": 1.936428327090138e-06, + "loss": 22.7969, + "step": 14863 + }, + { + "epoch": 0.1407029467725599, + "grad_norm": 355.4776611328125, + "learning_rate": 1.936417569891214e-06, + "loss": 15.7344, + "step": 14864 + }, + { + "epoch": 0.14071241279427496, + "grad_norm": 2.9007630348205566, + "learning_rate": 1.9364068118121187e-06, + "loss": 0.9897, + "step": 14865 + }, + { + "epoch": 0.14072187881599, + "grad_norm": 415.6474914550781, + "learning_rate": 1.936396052852862e-06, + "loss": 32.0, + "step": 14866 + }, + { + "epoch": 0.14073134483770505, + "grad_norm": 1756.56005859375, + "learning_rate": 1.936385293013454e-06, + "loss": 45.6719, + "step": 14867 + }, + { + "epoch": 0.1407408108594201, + "grad_norm": 190.78494262695312, + "learning_rate": 1.9363745322939045e-06, + "loss": 21.9609, + "step": 14868 + }, + { + "epoch": 0.14075027688113517, + "grad_norm": 276.0221862792969, + "learning_rate": 1.9363637706942243e-06, + "loss": 16.9297, + "step": 14869 + }, + { + "epoch": 0.14075974290285023, + "grad_norm": 184.41880798339844, + "learning_rate": 1.9363530082144226e-06, + "loss": 25.5312, + "step": 14870 + }, + { + "epoch": 0.14076920892456526, + "grad_norm": 275.74365234375, + "learning_rate": 1.93634224485451e-06, + "loss": 15.2344, + "step": 14871 + }, + { + "epoch": 0.14077867494628032, + "grad_norm": 415.7905578613281, + "learning_rate": 1.936331480614497e-06, + "loss": 34.6562, + "step": 14872 + }, + { + "epoch": 0.14078814096799538, + "grad_norm": 526.1575927734375, + "learning_rate": 1.936320715494393e-06, + "loss": 32.4375, + "step": 14873 + }, + { + "epoch": 0.14079760698971044, + "grad_norm": 603.8992309570312, + "learning_rate": 1.9363099494942086e-06, + "loss": 41.7188, + "step": 14874 + }, + { + "epoch": 0.14080707301142548, + "grad_norm": 849.7255859375, + "learning_rate": 1.936299182613954e-06, + "loss": 33.3672, + "step": 14875 + }, + { + "epoch": 0.14081653903314054, + "grad_norm": 638.0672607421875, + "learning_rate": 1.9362884148536382e-06, + "loss": 28.6328, + "step": 14876 + }, + { + "epoch": 0.1408260050548556, + "grad_norm": 348.4264831542969, + "learning_rate": 1.936277646213273e-06, + "loss": 26.0781, + "step": 14877 + }, + { + "epoch": 0.14083547107657066, + "grad_norm": 259.0904846191406, + "learning_rate": 1.9362668766928676e-06, + "loss": 18.7734, + "step": 14878 + }, + { + "epoch": 0.14084493709828572, + "grad_norm": 4.028494834899902, + "learning_rate": 1.936256106292432e-06, + "loss": 0.9824, + "step": 14879 + }, + { + "epoch": 0.14085440312000075, + "grad_norm": 274.38751220703125, + "learning_rate": 1.9362453350119766e-06, + "loss": 27.0859, + "step": 14880 + }, + { + "epoch": 0.1408638691417158, + "grad_norm": 567.98583984375, + "learning_rate": 1.9362345628515116e-06, + "loss": 21.7344, + "step": 14881 + }, + { + "epoch": 0.14087333516343087, + "grad_norm": 294.11212158203125, + "learning_rate": 1.9362237898110467e-06, + "loss": 18.6055, + "step": 14882 + }, + { + "epoch": 0.14088280118514593, + "grad_norm": 232.0441131591797, + "learning_rate": 1.9362130158905925e-06, + "loss": 19.0078, + "step": 14883 + }, + { + "epoch": 0.14089226720686096, + "grad_norm": 3.058352470397949, + "learning_rate": 1.9362022410901596e-06, + "loss": 0.9204, + "step": 14884 + }, + { + "epoch": 0.14090173322857602, + "grad_norm": 333.24786376953125, + "learning_rate": 1.936191465409757e-06, + "loss": 27.7969, + "step": 14885 + }, + { + "epoch": 0.14091119925029108, + "grad_norm": 371.3109436035156, + "learning_rate": 1.9361806888493953e-06, + "loss": 48.1562, + "step": 14886 + }, + { + "epoch": 0.14092066527200614, + "grad_norm": 359.437744140625, + "learning_rate": 1.9361699114090847e-06, + "loss": 17.3984, + "step": 14887 + }, + { + "epoch": 0.1409301312937212, + "grad_norm": 1254.5804443359375, + "learning_rate": 1.936159133088835e-06, + "loss": 40.75, + "step": 14888 + }, + { + "epoch": 0.14093959731543623, + "grad_norm": 190.00767517089844, + "learning_rate": 1.9361483538886574e-06, + "loss": 12.8398, + "step": 14889 + }, + { + "epoch": 0.1409490633371513, + "grad_norm": 266.86273193359375, + "learning_rate": 1.9361375738085605e-06, + "loss": 40.7344, + "step": 14890 + }, + { + "epoch": 0.14095852935886635, + "grad_norm": 424.8352355957031, + "learning_rate": 1.9361267928485552e-06, + "loss": 36.2812, + "step": 14891 + }, + { + "epoch": 0.1409679953805814, + "grad_norm": 383.71978759765625, + "learning_rate": 1.9361160110086517e-06, + "loss": 24.0391, + "step": 14892 + }, + { + "epoch": 0.14097746140229644, + "grad_norm": 2.852748394012451, + "learning_rate": 1.9361052282888606e-06, + "loss": 0.8389, + "step": 14893 + }, + { + "epoch": 0.1409869274240115, + "grad_norm": 421.240478515625, + "learning_rate": 1.936094444689191e-06, + "loss": 31.4141, + "step": 14894 + }, + { + "epoch": 0.14099639344572656, + "grad_norm": 3.516915798187256, + "learning_rate": 1.9360836602096534e-06, + "loss": 0.9912, + "step": 14895 + }, + { + "epoch": 0.14100585946744162, + "grad_norm": 489.42236328125, + "learning_rate": 1.936072874850258e-06, + "loss": 49.7344, + "step": 14896 + }, + { + "epoch": 0.14101532548915668, + "grad_norm": 226.53851318359375, + "learning_rate": 1.9360620886110154e-06, + "loss": 16.8594, + "step": 14897 + }, + { + "epoch": 0.14102479151087172, + "grad_norm": 538.2565307617188, + "learning_rate": 1.9360513014919354e-06, + "loss": 38.1875, + "step": 14898 + }, + { + "epoch": 0.14103425753258678, + "grad_norm": 543.2459106445312, + "learning_rate": 1.936040513493028e-06, + "loss": 30.6641, + "step": 14899 + }, + { + "epoch": 0.14104372355430184, + "grad_norm": 196.1897735595703, + "learning_rate": 1.936029724614303e-06, + "loss": 14.957, + "step": 14900 + }, + { + "epoch": 0.1410531895760169, + "grad_norm": 194.02976989746094, + "learning_rate": 1.936018934855771e-06, + "loss": 16.25, + "step": 14901 + }, + { + "epoch": 0.14106265559773193, + "grad_norm": 268.9617919921875, + "learning_rate": 1.9360081442174423e-06, + "loss": 17.9219, + "step": 14902 + }, + { + "epoch": 0.141072121619447, + "grad_norm": 174.6212158203125, + "learning_rate": 1.935997352699327e-06, + "loss": 24.2891, + "step": 14903 + }, + { + "epoch": 0.14108158764116205, + "grad_norm": 279.06591796875, + "learning_rate": 1.9359865603014346e-06, + "loss": 31.625, + "step": 14904 + }, + { + "epoch": 0.1410910536628771, + "grad_norm": 257.3699951171875, + "learning_rate": 1.9359757670237763e-06, + "loss": 20.5625, + "step": 14905 + }, + { + "epoch": 0.14110051968459217, + "grad_norm": 261.7530212402344, + "learning_rate": 1.9359649728663616e-06, + "loss": 20.6914, + "step": 14906 + }, + { + "epoch": 0.1411099857063072, + "grad_norm": 228.652587890625, + "learning_rate": 1.9359541778292002e-06, + "loss": 25.625, + "step": 14907 + }, + { + "epoch": 0.14111945172802226, + "grad_norm": 398.1429443359375, + "learning_rate": 1.935943381912303e-06, + "loss": 14.3203, + "step": 14908 + }, + { + "epoch": 0.14112891774973732, + "grad_norm": 590.08447265625, + "learning_rate": 1.93593258511568e-06, + "loss": 24.2109, + "step": 14909 + }, + { + "epoch": 0.14113838377145238, + "grad_norm": 555.4364013671875, + "learning_rate": 1.935921787439341e-06, + "loss": 41.1719, + "step": 14910 + }, + { + "epoch": 0.1411478497931674, + "grad_norm": 168.37217712402344, + "learning_rate": 1.9359109888832966e-06, + "loss": 20.25, + "step": 14911 + }, + { + "epoch": 0.14115731581488247, + "grad_norm": 450.8891906738281, + "learning_rate": 1.935900189447557e-06, + "loss": 29.8203, + "step": 14912 + }, + { + "epoch": 0.14116678183659753, + "grad_norm": 223.3078155517578, + "learning_rate": 1.935889389132132e-06, + "loss": 13.1719, + "step": 14913 + }, + { + "epoch": 0.1411762478583126, + "grad_norm": 3.176321268081665, + "learning_rate": 1.935878587937032e-06, + "loss": 0.9463, + "step": 14914 + }, + { + "epoch": 0.14118571388002765, + "grad_norm": 286.59185791015625, + "learning_rate": 1.9358677858622663e-06, + "loss": 27.2344, + "step": 14915 + }, + { + "epoch": 0.14119517990174268, + "grad_norm": 849.7738037109375, + "learning_rate": 1.9358569829078465e-06, + "loss": 55.7031, + "step": 14916 + }, + { + "epoch": 0.14120464592345774, + "grad_norm": 295.3871765136719, + "learning_rate": 1.9358461790737817e-06, + "loss": 16.9141, + "step": 14917 + }, + { + "epoch": 0.1412141119451728, + "grad_norm": 487.6592102050781, + "learning_rate": 1.9358353743600825e-06, + "loss": 39.5469, + "step": 14918 + }, + { + "epoch": 0.14122357796688786, + "grad_norm": 718.5209350585938, + "learning_rate": 1.9358245687667587e-06, + "loss": 38.0312, + "step": 14919 + }, + { + "epoch": 0.1412330439886029, + "grad_norm": 477.38824462890625, + "learning_rate": 1.935813762293821e-06, + "loss": 27.2969, + "step": 14920 + }, + { + "epoch": 0.14124251001031796, + "grad_norm": 488.3709716796875, + "learning_rate": 1.935802954941279e-06, + "loss": 37.5938, + "step": 14921 + }, + { + "epoch": 0.14125197603203302, + "grad_norm": 243.21595764160156, + "learning_rate": 1.935792146709143e-06, + "loss": 20.625, + "step": 14922 + }, + { + "epoch": 0.14126144205374808, + "grad_norm": 432.6713562011719, + "learning_rate": 1.9357813375974235e-06, + "loss": 38.9844, + "step": 14923 + }, + { + "epoch": 0.14127090807546314, + "grad_norm": 563.6336669921875, + "learning_rate": 1.9357705276061305e-06, + "loss": 26.8242, + "step": 14924 + }, + { + "epoch": 0.14128037409717817, + "grad_norm": 356.2821044921875, + "learning_rate": 1.935759716735274e-06, + "loss": 19.8281, + "step": 14925 + }, + { + "epoch": 0.14128984011889323, + "grad_norm": 3.5303356647491455, + "learning_rate": 1.935748904984864e-06, + "loss": 1.0215, + "step": 14926 + }, + { + "epoch": 0.1412993061406083, + "grad_norm": 275.6093444824219, + "learning_rate": 1.935738092354911e-06, + "loss": 23.2266, + "step": 14927 + }, + { + "epoch": 0.14130877216232335, + "grad_norm": 242.27023315429688, + "learning_rate": 1.9357272788454253e-06, + "loss": 30.3281, + "step": 14928 + }, + { + "epoch": 0.14131823818403838, + "grad_norm": 218.72962951660156, + "learning_rate": 1.9357164644564165e-06, + "loss": 23.4219, + "step": 14929 + }, + { + "epoch": 0.14132770420575344, + "grad_norm": 434.3189392089844, + "learning_rate": 1.935705649187895e-06, + "loss": 37.5391, + "step": 14930 + }, + { + "epoch": 0.1413371702274685, + "grad_norm": 749.3450317382812, + "learning_rate": 1.9356948330398717e-06, + "loss": 53.5469, + "step": 14931 + }, + { + "epoch": 0.14134663624918356, + "grad_norm": 375.61151123046875, + "learning_rate": 1.9356840160123557e-06, + "loss": 37.2422, + "step": 14932 + }, + { + "epoch": 0.14135610227089862, + "grad_norm": 333.8162841796875, + "learning_rate": 1.9356731981053577e-06, + "loss": 39.2969, + "step": 14933 + }, + { + "epoch": 0.14136556829261365, + "grad_norm": 605.7168579101562, + "learning_rate": 1.935662379318888e-06, + "loss": 45.5469, + "step": 14934 + }, + { + "epoch": 0.1413750343143287, + "grad_norm": 519.3638305664062, + "learning_rate": 1.9356515596529558e-06, + "loss": 46.4688, + "step": 14935 + }, + { + "epoch": 0.14138450033604377, + "grad_norm": 818.6119384765625, + "learning_rate": 1.9356407391075726e-06, + "loss": 20.2422, + "step": 14936 + }, + { + "epoch": 0.14139396635775883, + "grad_norm": 187.82460021972656, + "learning_rate": 1.935629917682748e-06, + "loss": 24.3359, + "step": 14937 + }, + { + "epoch": 0.1414034323794739, + "grad_norm": 876.9950561523438, + "learning_rate": 1.9356190953784917e-06, + "loss": 30.1055, + "step": 14938 + }, + { + "epoch": 0.14141289840118892, + "grad_norm": 337.6133117675781, + "learning_rate": 1.9356082721948147e-06, + "loss": 31.7578, + "step": 14939 + }, + { + "epoch": 0.14142236442290398, + "grad_norm": 209.1629638671875, + "learning_rate": 1.935597448131727e-06, + "loss": 27.625, + "step": 14940 + }, + { + "epoch": 0.14143183044461904, + "grad_norm": 254.03309631347656, + "learning_rate": 1.9355866231892378e-06, + "loss": 20.2031, + "step": 14941 + }, + { + "epoch": 0.1414412964663341, + "grad_norm": 214.27084350585938, + "learning_rate": 1.9355757973673585e-06, + "loss": 17.4453, + "step": 14942 + }, + { + "epoch": 0.14145076248804914, + "grad_norm": 3.5663514137268066, + "learning_rate": 1.935564970666099e-06, + "loss": 1.084, + "step": 14943 + }, + { + "epoch": 0.1414602285097642, + "grad_norm": 360.5165710449219, + "learning_rate": 1.935554143085469e-06, + "loss": 35.3281, + "step": 14944 + }, + { + "epoch": 0.14146969453147926, + "grad_norm": 179.7698211669922, + "learning_rate": 1.935543314625479e-06, + "loss": 17.2344, + "step": 14945 + }, + { + "epoch": 0.14147916055319432, + "grad_norm": 908.4193115234375, + "learning_rate": 1.935532485286139e-06, + "loss": 38.875, + "step": 14946 + }, + { + "epoch": 0.14148862657490938, + "grad_norm": 572.784912109375, + "learning_rate": 1.9355216550674596e-06, + "loss": 18.6016, + "step": 14947 + }, + { + "epoch": 0.1414980925966244, + "grad_norm": 749.9091796875, + "learning_rate": 1.9355108239694507e-06, + "loss": 17.2266, + "step": 14948 + }, + { + "epoch": 0.14150755861833947, + "grad_norm": 345.8460693359375, + "learning_rate": 1.9354999919921225e-06, + "loss": 19.3789, + "step": 14949 + }, + { + "epoch": 0.14151702464005453, + "grad_norm": 396.0506286621094, + "learning_rate": 1.935489159135485e-06, + "loss": 49.6719, + "step": 14950 + }, + { + "epoch": 0.1415264906617696, + "grad_norm": 186.4997100830078, + "learning_rate": 1.935478325399549e-06, + "loss": 22.0547, + "step": 14951 + }, + { + "epoch": 0.14153595668348462, + "grad_norm": 312.6936950683594, + "learning_rate": 1.9354674907843232e-06, + "loss": 16.3438, + "step": 14952 + }, + { + "epoch": 0.14154542270519968, + "grad_norm": 386.38531494140625, + "learning_rate": 1.93545665528982e-06, + "loss": 17.8438, + "step": 14953 + }, + { + "epoch": 0.14155488872691474, + "grad_norm": 360.10150146484375, + "learning_rate": 1.9354458189160476e-06, + "loss": 24.3828, + "step": 14954 + }, + { + "epoch": 0.1415643547486298, + "grad_norm": 296.5243835449219, + "learning_rate": 1.9354349816630175e-06, + "loss": 25.5312, + "step": 14955 + }, + { + "epoch": 0.14157382077034486, + "grad_norm": 523.4317016601562, + "learning_rate": 1.935424143530739e-06, + "loss": 45.9219, + "step": 14956 + }, + { + "epoch": 0.1415832867920599, + "grad_norm": 345.9701843261719, + "learning_rate": 1.935413304519223e-06, + "loss": 36.4531, + "step": 14957 + }, + { + "epoch": 0.14159275281377495, + "grad_norm": 501.1931457519531, + "learning_rate": 1.9354024646284787e-06, + "loss": 55.1406, + "step": 14958 + }, + { + "epoch": 0.14160221883549, + "grad_norm": 217.55792236328125, + "learning_rate": 1.9353916238585174e-06, + "loss": 19.2656, + "step": 14959 + }, + { + "epoch": 0.14161168485720507, + "grad_norm": 426.17791748046875, + "learning_rate": 1.935380782209349e-06, + "loss": 44.8125, + "step": 14960 + }, + { + "epoch": 0.1416211508789201, + "grad_norm": 372.80328369140625, + "learning_rate": 1.935369939680983e-06, + "loss": 45.5312, + "step": 14961 + }, + { + "epoch": 0.14163061690063516, + "grad_norm": 181.2518310546875, + "learning_rate": 1.9353590962734307e-06, + "loss": 20.7031, + "step": 14962 + }, + { + "epoch": 0.14164008292235022, + "grad_norm": 3.0146660804748535, + "learning_rate": 1.935348251986701e-06, + "loss": 0.8823, + "step": 14963 + }, + { + "epoch": 0.14164954894406528, + "grad_norm": 1221.7445068359375, + "learning_rate": 1.9353374068208053e-06, + "loss": 11.3672, + "step": 14964 + }, + { + "epoch": 0.14165901496578034, + "grad_norm": 165.58859252929688, + "learning_rate": 1.9353265607757536e-06, + "loss": 15.0938, + "step": 14965 + }, + { + "epoch": 0.14166848098749538, + "grad_norm": 161.23675537109375, + "learning_rate": 1.9353157138515553e-06, + "loss": 21.9844, + "step": 14966 + }, + { + "epoch": 0.14167794700921044, + "grad_norm": 467.98858642578125, + "learning_rate": 1.935304866048221e-06, + "loss": 44.9922, + "step": 14967 + }, + { + "epoch": 0.1416874130309255, + "grad_norm": 209.8342742919922, + "learning_rate": 1.9352940173657612e-06, + "loss": 22.7109, + "step": 14968 + }, + { + "epoch": 0.14169687905264056, + "grad_norm": 371.5445861816406, + "learning_rate": 1.9352831678041857e-06, + "loss": 27.1602, + "step": 14969 + }, + { + "epoch": 0.1417063450743556, + "grad_norm": 213.59861755371094, + "learning_rate": 1.9352723173635052e-06, + "loss": 21.2734, + "step": 14970 + }, + { + "epoch": 0.14171581109607065, + "grad_norm": 318.62664794921875, + "learning_rate": 1.935261466043729e-06, + "loss": 26.1562, + "step": 14971 + }, + { + "epoch": 0.1417252771177857, + "grad_norm": 200.87779235839844, + "learning_rate": 1.935250613844868e-06, + "loss": 7.6875, + "step": 14972 + }, + { + "epoch": 0.14173474313950077, + "grad_norm": 387.32568359375, + "learning_rate": 1.9352397607669328e-06, + "loss": 35.8125, + "step": 14973 + }, + { + "epoch": 0.14174420916121583, + "grad_norm": 274.792724609375, + "learning_rate": 1.9352289068099326e-06, + "loss": 29.6719, + "step": 14974 + }, + { + "epoch": 0.14175367518293086, + "grad_norm": 637.9340209960938, + "learning_rate": 1.9352180519738783e-06, + "loss": 29.293, + "step": 14975 + }, + { + "epoch": 0.14176314120464592, + "grad_norm": 2.849439859390259, + "learning_rate": 1.9352071962587795e-06, + "loss": 0.874, + "step": 14976 + }, + { + "epoch": 0.14177260722636098, + "grad_norm": 516.3411865234375, + "learning_rate": 1.935196339664647e-06, + "loss": 15.5508, + "step": 14977 + }, + { + "epoch": 0.14178207324807604, + "grad_norm": 606.6853637695312, + "learning_rate": 1.9351854821914906e-06, + "loss": 31.3281, + "step": 14978 + }, + { + "epoch": 0.14179153926979107, + "grad_norm": 836.5219116210938, + "learning_rate": 1.9351746238393212e-06, + "loss": 37.375, + "step": 14979 + }, + { + "epoch": 0.14180100529150613, + "grad_norm": 781.0455322265625, + "learning_rate": 1.935163764608148e-06, + "loss": 33.2422, + "step": 14980 + }, + { + "epoch": 0.1418104713132212, + "grad_norm": 369.2563171386719, + "learning_rate": 1.935152904497982e-06, + "loss": 30.125, + "step": 14981 + }, + { + "epoch": 0.14181993733493625, + "grad_norm": 333.2777099609375, + "learning_rate": 1.935142043508833e-06, + "loss": 10.2852, + "step": 14982 + }, + { + "epoch": 0.1418294033566513, + "grad_norm": 650.202392578125, + "learning_rate": 1.9351311816407112e-06, + "loss": 20.8672, + "step": 14983 + }, + { + "epoch": 0.14183886937836634, + "grad_norm": 407.6846923828125, + "learning_rate": 1.935120318893627e-06, + "loss": 21.7812, + "step": 14984 + }, + { + "epoch": 0.1418483354000814, + "grad_norm": 3.2504734992980957, + "learning_rate": 1.9351094552675904e-06, + "loss": 0.9248, + "step": 14985 + }, + { + "epoch": 0.14185780142179646, + "grad_norm": 360.27508544921875, + "learning_rate": 1.9350985907626122e-06, + "loss": 23.3555, + "step": 14986 + }, + { + "epoch": 0.14186726744351152, + "grad_norm": 219.43235778808594, + "learning_rate": 1.935087725378702e-06, + "loss": 27.5312, + "step": 14987 + }, + { + "epoch": 0.14187673346522656, + "grad_norm": 310.915283203125, + "learning_rate": 1.93507685911587e-06, + "loss": 34.3438, + "step": 14988 + }, + { + "epoch": 0.14188619948694162, + "grad_norm": 427.67108154296875, + "learning_rate": 1.9350659919741267e-06, + "loss": 29.6719, + "step": 14989 + }, + { + "epoch": 0.14189566550865668, + "grad_norm": 709.0814208984375, + "learning_rate": 1.935055123953482e-06, + "loss": 46.5781, + "step": 14990 + }, + { + "epoch": 0.14190513153037174, + "grad_norm": 717.1183471679688, + "learning_rate": 1.9350442550539465e-06, + "loss": 37.9414, + "step": 14991 + }, + { + "epoch": 0.1419145975520868, + "grad_norm": 210.3656005859375, + "learning_rate": 1.9350333852755296e-06, + "loss": 22.4609, + "step": 14992 + }, + { + "epoch": 0.14192406357380183, + "grad_norm": 240.66934204101562, + "learning_rate": 1.9350225146182432e-06, + "loss": 22.0898, + "step": 14993 + }, + { + "epoch": 0.1419335295955169, + "grad_norm": 197.9385223388672, + "learning_rate": 1.935011643082096e-06, + "loss": 19.9688, + "step": 14994 + }, + { + "epoch": 0.14194299561723195, + "grad_norm": 633.2142333984375, + "learning_rate": 1.9350007706670983e-06, + "loss": 43.6875, + "step": 14995 + }, + { + "epoch": 0.141952461638947, + "grad_norm": 638.7588500976562, + "learning_rate": 1.9349898973732614e-06, + "loss": 35.6484, + "step": 14996 + }, + { + "epoch": 0.14196192766066204, + "grad_norm": 205.34483337402344, + "learning_rate": 1.934979023200594e-06, + "loss": 18.3828, + "step": 14997 + }, + { + "epoch": 0.1419713936823771, + "grad_norm": 542.4378051757812, + "learning_rate": 1.9349681481491076e-06, + "loss": 52.7578, + "step": 14998 + }, + { + "epoch": 0.14198085970409216, + "grad_norm": 401.89617919921875, + "learning_rate": 1.934957272218812e-06, + "loss": 35.1641, + "step": 14999 + }, + { + "epoch": 0.14199032572580722, + "grad_norm": 431.6573181152344, + "learning_rate": 1.934946395409717e-06, + "loss": 21.1641, + "step": 15000 + }, + { + "epoch": 0.14199979174752228, + "grad_norm": 3.381633996963501, + "learning_rate": 1.934935517721834e-06, + "loss": 0.8657, + "step": 15001 + }, + { + "epoch": 0.1420092577692373, + "grad_norm": 583.3131713867188, + "learning_rate": 1.9349246391551718e-06, + "loss": 49.418, + "step": 15002 + }, + { + "epoch": 0.14201872379095237, + "grad_norm": 626.3351440429688, + "learning_rate": 1.9349137597097415e-06, + "loss": 33.4531, + "step": 15003 + }, + { + "epoch": 0.14202818981266743, + "grad_norm": 210.5750274658203, + "learning_rate": 1.934902879385553e-06, + "loss": 14.3125, + "step": 15004 + }, + { + "epoch": 0.1420376558343825, + "grad_norm": 234.41673278808594, + "learning_rate": 1.9348919981826168e-06, + "loss": 19.4062, + "step": 15005 + }, + { + "epoch": 0.14204712185609752, + "grad_norm": 513.8355712890625, + "learning_rate": 1.934881116100943e-06, + "loss": 58.8438, + "step": 15006 + }, + { + "epoch": 0.14205658787781258, + "grad_norm": 978.072265625, + "learning_rate": 1.9348702331405416e-06, + "loss": 33.2109, + "step": 15007 + }, + { + "epoch": 0.14206605389952764, + "grad_norm": 506.51751708984375, + "learning_rate": 1.9348593493014227e-06, + "loss": 29.8906, + "step": 15008 + }, + { + "epoch": 0.1420755199212427, + "grad_norm": 217.81178283691406, + "learning_rate": 1.9348484645835974e-06, + "loss": 21.3828, + "step": 15009 + }, + { + "epoch": 0.14208498594295776, + "grad_norm": 297.2958068847656, + "learning_rate": 1.934837578987075e-06, + "loss": 28.0312, + "step": 15010 + }, + { + "epoch": 0.1420944519646728, + "grad_norm": 408.3726806640625, + "learning_rate": 1.934826692511866e-06, + "loss": 28.2109, + "step": 15011 + }, + { + "epoch": 0.14210391798638786, + "grad_norm": 497.74847412109375, + "learning_rate": 1.934815805157981e-06, + "loss": 45.7305, + "step": 15012 + }, + { + "epoch": 0.14211338400810292, + "grad_norm": 3.1334245204925537, + "learning_rate": 1.93480491692543e-06, + "loss": 0.9268, + "step": 15013 + }, + { + "epoch": 0.14212285002981798, + "grad_norm": 420.79779052734375, + "learning_rate": 1.934794027814223e-06, + "loss": 45.2188, + "step": 15014 + }, + { + "epoch": 0.142132316051533, + "grad_norm": 1309.2716064453125, + "learning_rate": 1.9347831378243704e-06, + "loss": 28.7188, + "step": 15015 + }, + { + "epoch": 0.14214178207324807, + "grad_norm": 252.41717529296875, + "learning_rate": 1.934772246955882e-06, + "loss": 13.9648, + "step": 15016 + }, + { + "epoch": 0.14215124809496313, + "grad_norm": 171.01271057128906, + "learning_rate": 1.9347613552087693e-06, + "loss": 23.9766, + "step": 15017 + }, + { + "epoch": 0.1421607141166782, + "grad_norm": 177.41879272460938, + "learning_rate": 1.9347504625830413e-06, + "loss": 19.1328, + "step": 15018 + }, + { + "epoch": 0.14217018013839325, + "grad_norm": 209.6791534423828, + "learning_rate": 1.934739569078709e-06, + "loss": 6.4121, + "step": 15019 + }, + { + "epoch": 0.14217964616010828, + "grad_norm": 150.61643981933594, + "learning_rate": 1.934728674695782e-06, + "loss": 18.6484, + "step": 15020 + }, + { + "epoch": 0.14218911218182334, + "grad_norm": 252.52371215820312, + "learning_rate": 1.934717779434271e-06, + "loss": 20.1406, + "step": 15021 + }, + { + "epoch": 0.1421985782035384, + "grad_norm": 407.21893310546875, + "learning_rate": 1.934706883294186e-06, + "loss": 13.4375, + "step": 15022 + }, + { + "epoch": 0.14220804422525346, + "grad_norm": 833.4176025390625, + "learning_rate": 1.9346959862755376e-06, + "loss": 59.1719, + "step": 15023 + }, + { + "epoch": 0.14221751024696852, + "grad_norm": 250.90589904785156, + "learning_rate": 1.9346850883783357e-06, + "loss": 22.9023, + "step": 15024 + }, + { + "epoch": 0.14222697626868355, + "grad_norm": 409.11981201171875, + "learning_rate": 1.9346741896025904e-06, + "loss": 37.1094, + "step": 15025 + }, + { + "epoch": 0.1422364422903986, + "grad_norm": 257.3052062988281, + "learning_rate": 1.9346632899483123e-06, + "loss": 23.1562, + "step": 15026 + }, + { + "epoch": 0.14224590831211367, + "grad_norm": 347.3602294921875, + "learning_rate": 1.9346523894155114e-06, + "loss": 12.8008, + "step": 15027 + }, + { + "epoch": 0.14225537433382873, + "grad_norm": 959.337890625, + "learning_rate": 1.934641488004198e-06, + "loss": 58.7891, + "step": 15028 + }, + { + "epoch": 0.14226484035554376, + "grad_norm": 286.35760498046875, + "learning_rate": 1.934630585714383e-06, + "loss": 26.8047, + "step": 15029 + }, + { + "epoch": 0.14227430637725882, + "grad_norm": 248.2887420654297, + "learning_rate": 1.934619682546075e-06, + "loss": 8.1992, + "step": 15030 + }, + { + "epoch": 0.14228377239897388, + "grad_norm": 555.1014404296875, + "learning_rate": 1.934608778499286e-06, + "loss": 21.4219, + "step": 15031 + }, + { + "epoch": 0.14229323842068894, + "grad_norm": 339.97015380859375, + "learning_rate": 1.9345978735740256e-06, + "loss": 32.9375, + "step": 15032 + }, + { + "epoch": 0.142302704442404, + "grad_norm": 389.6776428222656, + "learning_rate": 1.934586967770304e-06, + "loss": 12.4121, + "step": 15033 + }, + { + "epoch": 0.14231217046411904, + "grad_norm": 821.6337890625, + "learning_rate": 1.934576061088131e-06, + "loss": 69.7031, + "step": 15034 + }, + { + "epoch": 0.1423216364858341, + "grad_norm": 585.4558715820312, + "learning_rate": 1.934565153527518e-06, + "loss": 18.4453, + "step": 15035 + }, + { + "epoch": 0.14233110250754916, + "grad_norm": 180.5910186767578, + "learning_rate": 1.934554245088474e-06, + "loss": 24.3516, + "step": 15036 + }, + { + "epoch": 0.14234056852926422, + "grad_norm": 508.0458984375, + "learning_rate": 1.93454333577101e-06, + "loss": 19.3281, + "step": 15037 + }, + { + "epoch": 0.14235003455097925, + "grad_norm": 283.2483215332031, + "learning_rate": 1.934532425575136e-06, + "loss": 16.7031, + "step": 15038 + }, + { + "epoch": 0.1423595005726943, + "grad_norm": 625.2389526367188, + "learning_rate": 1.9345215145008626e-06, + "loss": 25.7148, + "step": 15039 + }, + { + "epoch": 0.14236896659440937, + "grad_norm": 807.3861083984375, + "learning_rate": 1.9345106025481993e-06, + "loss": 44.2188, + "step": 15040 + }, + { + "epoch": 0.14237843261612443, + "grad_norm": 284.7561340332031, + "learning_rate": 1.934499689717157e-06, + "loss": 16.6797, + "step": 15041 + }, + { + "epoch": 0.1423878986378395, + "grad_norm": 345.09906005859375, + "learning_rate": 1.934488776007746e-06, + "loss": 30.8516, + "step": 15042 + }, + { + "epoch": 0.14239736465955452, + "grad_norm": 2.9000608921051025, + "learning_rate": 1.9344778614199758e-06, + "loss": 0.8916, + "step": 15043 + }, + { + "epoch": 0.14240683068126958, + "grad_norm": 4.282545566558838, + "learning_rate": 1.934466945953858e-06, + "loss": 1.0117, + "step": 15044 + }, + { + "epoch": 0.14241629670298464, + "grad_norm": 680.4677734375, + "learning_rate": 1.9344560296094016e-06, + "loss": 49.2344, + "step": 15045 + }, + { + "epoch": 0.1424257627246997, + "grad_norm": 325.0600280761719, + "learning_rate": 1.934445112386617e-06, + "loss": 26.5938, + "step": 15046 + }, + { + "epoch": 0.14243522874641473, + "grad_norm": 376.8807067871094, + "learning_rate": 1.934434194285515e-06, + "loss": 24.0547, + "step": 15047 + }, + { + "epoch": 0.1424446947681298, + "grad_norm": 3.2208545207977295, + "learning_rate": 1.934423275306106e-06, + "loss": 1.002, + "step": 15048 + }, + { + "epoch": 0.14245416078984485, + "grad_norm": 342.3756408691406, + "learning_rate": 1.9344123554483994e-06, + "loss": 20.0078, + "step": 15049 + }, + { + "epoch": 0.1424636268115599, + "grad_norm": 277.0859375, + "learning_rate": 1.9344014347124068e-06, + "loss": 13.4141, + "step": 15050 + }, + { + "epoch": 0.14247309283327497, + "grad_norm": 370.1090087890625, + "learning_rate": 1.9343905130981367e-06, + "loss": 41.2812, + "step": 15051 + }, + { + "epoch": 0.14248255885499, + "grad_norm": 649.1536865234375, + "learning_rate": 1.934379590605601e-06, + "loss": 27.2188, + "step": 15052 + }, + { + "epoch": 0.14249202487670506, + "grad_norm": 254.20254516601562, + "learning_rate": 1.9343686672348086e-06, + "loss": 20.3086, + "step": 15053 + }, + { + "epoch": 0.14250149089842012, + "grad_norm": 176.53903198242188, + "learning_rate": 1.934357742985771e-06, + "loss": 24.875, + "step": 15054 + }, + { + "epoch": 0.14251095692013518, + "grad_norm": 194.96331787109375, + "learning_rate": 1.9343468178584977e-06, + "loss": 22.0391, + "step": 15055 + }, + { + "epoch": 0.14252042294185022, + "grad_norm": 176.11834716796875, + "learning_rate": 1.934335891852999e-06, + "loss": 15.7656, + "step": 15056 + }, + { + "epoch": 0.14252988896356528, + "grad_norm": 417.9226989746094, + "learning_rate": 1.934324964969286e-06, + "loss": 43.0625, + "step": 15057 + }, + { + "epoch": 0.14253935498528034, + "grad_norm": 224.34561157226562, + "learning_rate": 1.9343140372073675e-06, + "loss": 23.6094, + "step": 15058 + }, + { + "epoch": 0.1425488210069954, + "grad_norm": 265.7191162109375, + "learning_rate": 1.934303108567255e-06, + "loss": 22.6328, + "step": 15059 + }, + { + "epoch": 0.14255828702871046, + "grad_norm": 401.9120788574219, + "learning_rate": 1.9342921790489585e-06, + "loss": 37.3984, + "step": 15060 + }, + { + "epoch": 0.1425677530504255, + "grad_norm": 480.4211730957031, + "learning_rate": 1.9342812486524877e-06, + "loss": 19.1875, + "step": 15061 + }, + { + "epoch": 0.14257721907214055, + "grad_norm": 155.780029296875, + "learning_rate": 1.9342703173778534e-06, + "loss": 18.6523, + "step": 15062 + }, + { + "epoch": 0.1425866850938556, + "grad_norm": 179.51998901367188, + "learning_rate": 1.934259385225066e-06, + "loss": 7.0273, + "step": 15063 + }, + { + "epoch": 0.14259615111557067, + "grad_norm": 412.17840576171875, + "learning_rate": 1.9342484521941358e-06, + "loss": 27.1094, + "step": 15064 + }, + { + "epoch": 0.1426056171372857, + "grad_norm": 3.1990880966186523, + "learning_rate": 1.9342375182850723e-06, + "loss": 0.9102, + "step": 15065 + }, + { + "epoch": 0.14261508315900076, + "grad_norm": 220.90377807617188, + "learning_rate": 1.9342265834978864e-06, + "loss": 21.7539, + "step": 15066 + }, + { + "epoch": 0.14262454918071582, + "grad_norm": 166.53692626953125, + "learning_rate": 1.9342156478325882e-06, + "loss": 20.4141, + "step": 15067 + }, + { + "epoch": 0.14263401520243088, + "grad_norm": 1002.5890502929688, + "learning_rate": 1.9342047112891888e-06, + "loss": 46.9375, + "step": 15068 + }, + { + "epoch": 0.14264348122414594, + "grad_norm": 233.7747802734375, + "learning_rate": 1.934193773867697e-06, + "loss": 18.4844, + "step": 15069 + }, + { + "epoch": 0.14265294724586097, + "grad_norm": 298.31866455078125, + "learning_rate": 1.934182835568124e-06, + "loss": 22.9062, + "step": 15070 + }, + { + "epoch": 0.14266241326757603, + "grad_norm": 1667.442626953125, + "learning_rate": 1.9341718963904802e-06, + "loss": 12.1992, + "step": 15071 + }, + { + "epoch": 0.1426718792892911, + "grad_norm": 499.3363952636719, + "learning_rate": 1.9341609563347752e-06, + "loss": 39.8906, + "step": 15072 + }, + { + "epoch": 0.14268134531100615, + "grad_norm": 248.62149047851562, + "learning_rate": 1.93415001540102e-06, + "loss": 15.1055, + "step": 15073 + }, + { + "epoch": 0.14269081133272118, + "grad_norm": 306.1246643066406, + "learning_rate": 1.934139073589224e-06, + "loss": 21.3438, + "step": 15074 + }, + { + "epoch": 0.14270027735443624, + "grad_norm": 566.353271484375, + "learning_rate": 1.9341281308993982e-06, + "loss": 40.9219, + "step": 15075 + }, + { + "epoch": 0.1427097433761513, + "grad_norm": 234.3957061767578, + "learning_rate": 1.9341171873315533e-06, + "loss": 15.5234, + "step": 15076 + }, + { + "epoch": 0.14271920939786636, + "grad_norm": 460.35845947265625, + "learning_rate": 1.9341062428856985e-06, + "loss": 53.1875, + "step": 15077 + }, + { + "epoch": 0.14272867541958142, + "grad_norm": 315.42718505859375, + "learning_rate": 1.9340952975618446e-06, + "loss": 22.9531, + "step": 15078 + }, + { + "epoch": 0.14273814144129646, + "grad_norm": 372.8793640136719, + "learning_rate": 1.934084351360002e-06, + "loss": 18.8438, + "step": 15079 + }, + { + "epoch": 0.14274760746301152, + "grad_norm": 405.15338134765625, + "learning_rate": 1.934073404280181e-06, + "loss": 7.875, + "step": 15080 + }, + { + "epoch": 0.14275707348472658, + "grad_norm": 3.618328094482422, + "learning_rate": 1.9340624563223918e-06, + "loss": 1.0308, + "step": 15081 + }, + { + "epoch": 0.14276653950644164, + "grad_norm": 612.0001831054688, + "learning_rate": 1.9340515074866444e-06, + "loss": 48.6562, + "step": 15082 + }, + { + "epoch": 0.14277600552815667, + "grad_norm": 368.4115295410156, + "learning_rate": 1.9340405577729494e-06, + "loss": 35.2344, + "step": 15083 + }, + { + "epoch": 0.14278547154987173, + "grad_norm": 385.4376220703125, + "learning_rate": 1.9340296071813175e-06, + "loss": 17.7266, + "step": 15084 + }, + { + "epoch": 0.1427949375715868, + "grad_norm": 115.67277526855469, + "learning_rate": 1.9340186557117576e-06, + "loss": 12.7969, + "step": 15085 + }, + { + "epoch": 0.14280440359330185, + "grad_norm": 318.2754821777344, + "learning_rate": 1.9340077033642815e-06, + "loss": 17.6641, + "step": 15086 + }, + { + "epoch": 0.1428138696150169, + "grad_norm": 478.5979919433594, + "learning_rate": 1.933996750138899e-06, + "loss": 60.5078, + "step": 15087 + }, + { + "epoch": 0.14282333563673194, + "grad_norm": 536.027587890625, + "learning_rate": 1.9339857960356205e-06, + "loss": 34.5234, + "step": 15088 + }, + { + "epoch": 0.142832801658447, + "grad_norm": 1008.610107421875, + "learning_rate": 1.9339748410544555e-06, + "loss": 59.125, + "step": 15089 + }, + { + "epoch": 0.14284226768016206, + "grad_norm": 345.2802734375, + "learning_rate": 1.9339638851954154e-06, + "loss": 12.5469, + "step": 15090 + }, + { + "epoch": 0.14285173370187712, + "grad_norm": 272.3673400878906, + "learning_rate": 1.9339529284585095e-06, + "loss": 17.7031, + "step": 15091 + }, + { + "epoch": 0.14286119972359215, + "grad_norm": 847.7354736328125, + "learning_rate": 1.9339419708437492e-06, + "loss": 54.1641, + "step": 15092 + }, + { + "epoch": 0.1428706657453072, + "grad_norm": 668.4669189453125, + "learning_rate": 1.933931012351144e-06, + "loss": 44.8438, + "step": 15093 + }, + { + "epoch": 0.14288013176702227, + "grad_norm": 198.38461303710938, + "learning_rate": 1.933920052980704e-06, + "loss": 15.8672, + "step": 15094 + }, + { + "epoch": 0.14288959778873733, + "grad_norm": 1019.2544555664062, + "learning_rate": 1.9339090927324406e-06, + "loss": 29.0117, + "step": 15095 + }, + { + "epoch": 0.1428990638104524, + "grad_norm": 273.0321960449219, + "learning_rate": 1.933898131606363e-06, + "loss": 18.3828, + "step": 15096 + }, + { + "epoch": 0.14290852983216742, + "grad_norm": 219.0131072998047, + "learning_rate": 1.933887169602482e-06, + "loss": 19.5703, + "step": 15097 + }, + { + "epoch": 0.14291799585388248, + "grad_norm": 381.2464904785156, + "learning_rate": 1.9338762067208078e-06, + "loss": 31.5469, + "step": 15098 + }, + { + "epoch": 0.14292746187559754, + "grad_norm": 445.5354309082031, + "learning_rate": 1.9338652429613504e-06, + "loss": 29.1406, + "step": 15099 + }, + { + "epoch": 0.1429369278973126, + "grad_norm": 304.3021545410156, + "learning_rate": 1.933854278324121e-06, + "loss": 10.2344, + "step": 15100 + }, + { + "epoch": 0.14294639391902764, + "grad_norm": 588.8605346679688, + "learning_rate": 1.933843312809129e-06, + "loss": 66.8906, + "step": 15101 + }, + { + "epoch": 0.1429558599407427, + "grad_norm": 320.7030334472656, + "learning_rate": 1.933832346416385e-06, + "loss": 26.0781, + "step": 15102 + }, + { + "epoch": 0.14296532596245776, + "grad_norm": 210.2862548828125, + "learning_rate": 1.9338213791458995e-06, + "loss": 18.7539, + "step": 15103 + }, + { + "epoch": 0.14297479198417282, + "grad_norm": 900.4187622070312, + "learning_rate": 1.9338104109976827e-06, + "loss": 22.1992, + "step": 15104 + }, + { + "epoch": 0.14298425800588788, + "grad_norm": 187.70144653320312, + "learning_rate": 1.9337994419717446e-06, + "loss": 17.7969, + "step": 15105 + }, + { + "epoch": 0.1429937240276029, + "grad_norm": 316.35308837890625, + "learning_rate": 1.933788472068096e-06, + "loss": 18.4141, + "step": 15106 + }, + { + "epoch": 0.14300319004931797, + "grad_norm": 243.87332153320312, + "learning_rate": 1.933777501286747e-06, + "loss": 16.1094, + "step": 15107 + }, + { + "epoch": 0.14301265607103303, + "grad_norm": 3.401803731918335, + "learning_rate": 1.9337665296277078e-06, + "loss": 0.8462, + "step": 15108 + }, + { + "epoch": 0.1430221220927481, + "grad_norm": 169.64222717285156, + "learning_rate": 1.9337555570909883e-06, + "loss": 18.7031, + "step": 15109 + }, + { + "epoch": 0.14303158811446312, + "grad_norm": 214.14158630371094, + "learning_rate": 1.9337445836766e-06, + "loss": 21.5234, + "step": 15110 + }, + { + "epoch": 0.14304105413617818, + "grad_norm": 447.16326904296875, + "learning_rate": 1.9337336093845526e-06, + "loss": 45.3594, + "step": 15111 + }, + { + "epoch": 0.14305052015789324, + "grad_norm": 607.8016357421875, + "learning_rate": 1.9337226342148562e-06, + "loss": 28.2969, + "step": 15112 + }, + { + "epoch": 0.1430599861796083, + "grad_norm": 302.921875, + "learning_rate": 1.933711658167521e-06, + "loss": 26.4141, + "step": 15113 + }, + { + "epoch": 0.14306945220132336, + "grad_norm": 364.46844482421875, + "learning_rate": 1.9337006812425577e-06, + "loss": 27.2891, + "step": 15114 + }, + { + "epoch": 0.1430789182230384, + "grad_norm": 286.7474365234375, + "learning_rate": 1.9336897034399767e-06, + "loss": 20.0859, + "step": 15115 + }, + { + "epoch": 0.14308838424475345, + "grad_norm": 265.363037109375, + "learning_rate": 1.933678724759788e-06, + "loss": 26.3984, + "step": 15116 + }, + { + "epoch": 0.1430978502664685, + "grad_norm": 274.6993713378906, + "learning_rate": 1.933667745202002e-06, + "loss": 8.3027, + "step": 15117 + }, + { + "epoch": 0.14310731628818357, + "grad_norm": 409.05084228515625, + "learning_rate": 1.9336567647666293e-06, + "loss": 42.7422, + "step": 15118 + }, + { + "epoch": 0.14311678230989863, + "grad_norm": 237.0898895263672, + "learning_rate": 1.9336457834536795e-06, + "loss": 18.375, + "step": 15119 + }, + { + "epoch": 0.14312624833161366, + "grad_norm": 462.790283203125, + "learning_rate": 1.9336348012631637e-06, + "loss": 14.1309, + "step": 15120 + }, + { + "epoch": 0.14313571435332872, + "grad_norm": 314.2847595214844, + "learning_rate": 1.933623818195092e-06, + "loss": 42.2266, + "step": 15121 + }, + { + "epoch": 0.14314518037504378, + "grad_norm": 506.0605163574219, + "learning_rate": 1.9336128342494743e-06, + "loss": 23.7656, + "step": 15122 + }, + { + "epoch": 0.14315464639675884, + "grad_norm": 768.1614379882812, + "learning_rate": 1.9336018494263217e-06, + "loss": 16.1484, + "step": 15123 + }, + { + "epoch": 0.14316411241847388, + "grad_norm": 304.1311950683594, + "learning_rate": 1.933590863725644e-06, + "loss": 35.8906, + "step": 15124 + }, + { + "epoch": 0.14317357844018894, + "grad_norm": 440.6781921386719, + "learning_rate": 1.9335798771474512e-06, + "loss": 27.0703, + "step": 15125 + }, + { + "epoch": 0.143183044461904, + "grad_norm": 329.81646728515625, + "learning_rate": 1.9335688896917546e-06, + "loss": 31.5547, + "step": 15126 + }, + { + "epoch": 0.14319251048361906, + "grad_norm": 238.33580017089844, + "learning_rate": 1.9335579013585635e-06, + "loss": 32.8594, + "step": 15127 + }, + { + "epoch": 0.14320197650533412, + "grad_norm": 266.1275329589844, + "learning_rate": 1.9335469121478887e-06, + "loss": 19.9766, + "step": 15128 + }, + { + "epoch": 0.14321144252704915, + "grad_norm": 406.73370361328125, + "learning_rate": 1.933535922059741e-06, + "loss": 18.6875, + "step": 15129 + }, + { + "epoch": 0.1432209085487642, + "grad_norm": 277.3866271972656, + "learning_rate": 1.93352493109413e-06, + "loss": 23.7422, + "step": 15130 + }, + { + "epoch": 0.14323037457047927, + "grad_norm": 234.54129028320312, + "learning_rate": 1.9335139392510665e-06, + "loss": 17.8867, + "step": 15131 + }, + { + "epoch": 0.14323984059219433, + "grad_norm": 417.92999267578125, + "learning_rate": 1.9335029465305604e-06, + "loss": 30.7969, + "step": 15132 + }, + { + "epoch": 0.14324930661390936, + "grad_norm": 391.29705810546875, + "learning_rate": 1.9334919529326225e-06, + "loss": 43.9062, + "step": 15133 + }, + { + "epoch": 0.14325877263562442, + "grad_norm": 276.12969970703125, + "learning_rate": 1.9334809584572626e-06, + "loss": 23.0547, + "step": 15134 + }, + { + "epoch": 0.14326823865733948, + "grad_norm": 456.70355224609375, + "learning_rate": 1.9334699631044915e-06, + "loss": 44.4297, + "step": 15135 + }, + { + "epoch": 0.14327770467905454, + "grad_norm": 1445.6588134765625, + "learning_rate": 1.933458966874319e-06, + "loss": 26.3438, + "step": 15136 + }, + { + "epoch": 0.1432871707007696, + "grad_norm": 518.8522338867188, + "learning_rate": 1.933447969766756e-06, + "loss": 21.8359, + "step": 15137 + }, + { + "epoch": 0.14329663672248463, + "grad_norm": 379.80047607421875, + "learning_rate": 1.933436971781813e-06, + "loss": 28.2188, + "step": 15138 + }, + { + "epoch": 0.1433061027441997, + "grad_norm": 373.9418640136719, + "learning_rate": 1.9334259729194998e-06, + "loss": 20.4609, + "step": 15139 + }, + { + "epoch": 0.14331556876591475, + "grad_norm": 597.2593383789062, + "learning_rate": 1.9334149731798268e-06, + "loss": 40.1406, + "step": 15140 + }, + { + "epoch": 0.1433250347876298, + "grad_norm": 616.8663940429688, + "learning_rate": 1.9334039725628043e-06, + "loss": 47.2188, + "step": 15141 + }, + { + "epoch": 0.14333450080934484, + "grad_norm": 260.93511962890625, + "learning_rate": 1.9333929710684432e-06, + "loss": 17.1641, + "step": 15142 + }, + { + "epoch": 0.1433439668310599, + "grad_norm": 271.7450866699219, + "learning_rate": 1.933381968696753e-06, + "loss": 16.9844, + "step": 15143 + }, + { + "epoch": 0.14335343285277496, + "grad_norm": 276.700439453125, + "learning_rate": 1.933370965447745e-06, + "loss": 20.4141, + "step": 15144 + }, + { + "epoch": 0.14336289887449002, + "grad_norm": 784.4716186523438, + "learning_rate": 1.9333599613214286e-06, + "loss": 50.2734, + "step": 15145 + }, + { + "epoch": 0.14337236489620508, + "grad_norm": 506.24591064453125, + "learning_rate": 1.9333489563178143e-06, + "loss": 29.2031, + "step": 15146 + }, + { + "epoch": 0.14338183091792012, + "grad_norm": 443.435546875, + "learning_rate": 1.933337950436913e-06, + "loss": 48.1094, + "step": 15147 + }, + { + "epoch": 0.14339129693963518, + "grad_norm": 210.49887084960938, + "learning_rate": 1.933326943678735e-06, + "loss": 9.0352, + "step": 15148 + }, + { + "epoch": 0.14340076296135024, + "grad_norm": 599.705810546875, + "learning_rate": 1.93331593604329e-06, + "loss": 20.6719, + "step": 15149 + }, + { + "epoch": 0.1434102289830653, + "grad_norm": 3.3060007095336914, + "learning_rate": 1.933304927530589e-06, + "loss": 0.9785, + "step": 15150 + }, + { + "epoch": 0.14341969500478033, + "grad_norm": 285.97113037109375, + "learning_rate": 1.933293918140642e-06, + "loss": 40.8906, + "step": 15151 + }, + { + "epoch": 0.1434291610264954, + "grad_norm": 847.18798828125, + "learning_rate": 1.9332829078734592e-06, + "loss": 35.2969, + "step": 15152 + }, + { + "epoch": 0.14343862704821045, + "grad_norm": 3.4034831523895264, + "learning_rate": 1.9332718967290513e-06, + "loss": 0.8813, + "step": 15153 + }, + { + "epoch": 0.1434480930699255, + "grad_norm": 305.17889404296875, + "learning_rate": 1.933260884707429e-06, + "loss": 43.4219, + "step": 15154 + }, + { + "epoch": 0.14345755909164057, + "grad_norm": 2.9368278980255127, + "learning_rate": 1.9332498718086015e-06, + "loss": 0.9097, + "step": 15155 + }, + { + "epoch": 0.1434670251133556, + "grad_norm": 3.9697649478912354, + "learning_rate": 1.9332388580325804e-06, + "loss": 1.0278, + "step": 15156 + }, + { + "epoch": 0.14347649113507066, + "grad_norm": 398.1662902832031, + "learning_rate": 1.933227843379375e-06, + "loss": 42.5781, + "step": 15157 + }, + { + "epoch": 0.14348595715678572, + "grad_norm": 173.739990234375, + "learning_rate": 1.933216827848996e-06, + "loss": 15.4297, + "step": 15158 + }, + { + "epoch": 0.14349542317850078, + "grad_norm": 3.239828586578369, + "learning_rate": 1.9332058114414545e-06, + "loss": 0.9136, + "step": 15159 + }, + { + "epoch": 0.1435048892002158, + "grad_norm": 181.7291717529297, + "learning_rate": 1.9331947941567603e-06, + "loss": 15.25, + "step": 15160 + }, + { + "epoch": 0.14351435522193087, + "grad_norm": 842.88037109375, + "learning_rate": 1.933183775994923e-06, + "loss": 41.1328, + "step": 15161 + }, + { + "epoch": 0.14352382124364593, + "grad_norm": 319.208251953125, + "learning_rate": 1.933172756955954e-06, + "loss": 24.4062, + "step": 15162 + }, + { + "epoch": 0.143533287265361, + "grad_norm": 353.47711181640625, + "learning_rate": 1.9331617370398635e-06, + "loss": 28.4609, + "step": 15163 + }, + { + "epoch": 0.14354275328707605, + "grad_norm": 1818.3917236328125, + "learning_rate": 1.9331507162466614e-06, + "loss": 28.707, + "step": 15164 + }, + { + "epoch": 0.14355221930879108, + "grad_norm": 1108.818359375, + "learning_rate": 1.933139694576359e-06, + "loss": 55.2734, + "step": 15165 + }, + { + "epoch": 0.14356168533050614, + "grad_norm": 349.72412109375, + "learning_rate": 1.933128672028965e-06, + "loss": 25.0, + "step": 15166 + }, + { + "epoch": 0.1435711513522212, + "grad_norm": 399.8388977050781, + "learning_rate": 1.9331176486044912e-06, + "loss": 21.4219, + "step": 15167 + }, + { + "epoch": 0.14358061737393626, + "grad_norm": 364.91693115234375, + "learning_rate": 1.9331066243029478e-06, + "loss": 17.6992, + "step": 15168 + }, + { + "epoch": 0.1435900833956513, + "grad_norm": 401.1291809082031, + "learning_rate": 1.9330955991243444e-06, + "loss": 49.5, + "step": 15169 + }, + { + "epoch": 0.14359954941736636, + "grad_norm": 1327.2767333984375, + "learning_rate": 1.933084573068692e-06, + "loss": 46.7031, + "step": 15170 + }, + { + "epoch": 0.14360901543908142, + "grad_norm": 497.6336364746094, + "learning_rate": 1.933073546136001e-06, + "loss": 32.4141, + "step": 15171 + }, + { + "epoch": 0.14361848146079648, + "grad_norm": 607.8160400390625, + "learning_rate": 1.9330625183262813e-06, + "loss": 45.7031, + "step": 15172 + }, + { + "epoch": 0.14362794748251154, + "grad_norm": 2369.32470703125, + "learning_rate": 1.9330514896395436e-06, + "loss": 21.0371, + "step": 15173 + }, + { + "epoch": 0.14363741350422657, + "grad_norm": 459.57073974609375, + "learning_rate": 1.933040460075798e-06, + "loss": 49.4961, + "step": 15174 + }, + { + "epoch": 0.14364687952594163, + "grad_norm": 300.58367919921875, + "learning_rate": 1.9330294296350556e-06, + "loss": 31.168, + "step": 15175 + }, + { + "epoch": 0.1436563455476567, + "grad_norm": 195.72265625, + "learning_rate": 1.9330183983173256e-06, + "loss": 24.4062, + "step": 15176 + }, + { + "epoch": 0.14366581156937175, + "grad_norm": 258.92645263671875, + "learning_rate": 1.9330073661226197e-06, + "loss": 21.6016, + "step": 15177 + }, + { + "epoch": 0.14367527759108678, + "grad_norm": 270.21636962890625, + "learning_rate": 1.9329963330509474e-06, + "loss": 16.1953, + "step": 15178 + }, + { + "epoch": 0.14368474361280184, + "grad_norm": 450.8699951171875, + "learning_rate": 1.9329852991023187e-06, + "loss": 23.1406, + "step": 15179 + }, + { + "epoch": 0.1436942096345169, + "grad_norm": 852.3424682617188, + "learning_rate": 1.932974264276745e-06, + "loss": 40.7422, + "step": 15180 + }, + { + "epoch": 0.14370367565623196, + "grad_norm": 454.4208679199219, + "learning_rate": 1.9329632285742362e-06, + "loss": 24.2578, + "step": 15181 + }, + { + "epoch": 0.14371314167794702, + "grad_norm": 1114.3001708984375, + "learning_rate": 1.9329521919948024e-06, + "loss": 70.6562, + "step": 15182 + }, + { + "epoch": 0.14372260769966205, + "grad_norm": 755.0794677734375, + "learning_rate": 1.9329411545384543e-06, + "loss": 48.5312, + "step": 15183 + }, + { + "epoch": 0.1437320737213771, + "grad_norm": 569.6973876953125, + "learning_rate": 1.9329301162052023e-06, + "loss": 22.25, + "step": 15184 + }, + { + "epoch": 0.14374153974309217, + "grad_norm": 458.7666015625, + "learning_rate": 1.9329190769950565e-06, + "loss": 32.7266, + "step": 15185 + }, + { + "epoch": 0.14375100576480723, + "grad_norm": 279.8185119628906, + "learning_rate": 1.932908036908028e-06, + "loss": 19.1719, + "step": 15186 + }, + { + "epoch": 0.14376047178652226, + "grad_norm": 209.25303649902344, + "learning_rate": 1.932896995944126e-06, + "loss": 24.6562, + "step": 15187 + }, + { + "epoch": 0.14376993780823732, + "grad_norm": 313.22979736328125, + "learning_rate": 1.9328859541033616e-06, + "loss": 20.9141, + "step": 15188 + }, + { + "epoch": 0.14377940382995238, + "grad_norm": 300.68292236328125, + "learning_rate": 1.9328749113857455e-06, + "loss": 21.7266, + "step": 15189 + }, + { + "epoch": 0.14378886985166744, + "grad_norm": 243.9975128173828, + "learning_rate": 1.9328638677912872e-06, + "loss": 14.625, + "step": 15190 + }, + { + "epoch": 0.1437983358733825, + "grad_norm": 341.22747802734375, + "learning_rate": 1.932852823319998e-06, + "loss": 34.9688, + "step": 15191 + }, + { + "epoch": 0.14380780189509754, + "grad_norm": 446.56787109375, + "learning_rate": 1.9328417779718875e-06, + "loss": 33.6953, + "step": 15192 + }, + { + "epoch": 0.1438172679168126, + "grad_norm": 344.5507507324219, + "learning_rate": 1.9328307317469667e-06, + "loss": 35.9844, + "step": 15193 + }, + { + "epoch": 0.14382673393852766, + "grad_norm": 429.9443359375, + "learning_rate": 1.9328196846452453e-06, + "loss": 33.8555, + "step": 15194 + }, + { + "epoch": 0.14383619996024272, + "grad_norm": 588.8190307617188, + "learning_rate": 1.9328086366667343e-06, + "loss": 27.6953, + "step": 15195 + }, + { + "epoch": 0.14384566598195775, + "grad_norm": 519.2130126953125, + "learning_rate": 1.9327975878114438e-06, + "loss": 13.0117, + "step": 15196 + }, + { + "epoch": 0.1438551320036728, + "grad_norm": 399.8504333496094, + "learning_rate": 1.9327865380793844e-06, + "loss": 39.625, + "step": 15197 + }, + { + "epoch": 0.14386459802538787, + "grad_norm": 764.2711181640625, + "learning_rate": 1.932775487470566e-06, + "loss": 34.6016, + "step": 15198 + }, + { + "epoch": 0.14387406404710293, + "grad_norm": 774.0384521484375, + "learning_rate": 1.9327644359849996e-06, + "loss": 38.8672, + "step": 15199 + }, + { + "epoch": 0.143883530068818, + "grad_norm": 184.6457061767578, + "learning_rate": 1.9327533836226954e-06, + "loss": 14.5, + "step": 15200 + }, + { + "epoch": 0.14389299609053302, + "grad_norm": 787.7421264648438, + "learning_rate": 1.9327423303836633e-06, + "loss": 54.4844, + "step": 15201 + }, + { + "epoch": 0.14390246211224808, + "grad_norm": 246.78732299804688, + "learning_rate": 1.9327312762679145e-06, + "loss": 27.8828, + "step": 15202 + }, + { + "epoch": 0.14391192813396314, + "grad_norm": 493.9295654296875, + "learning_rate": 1.9327202212754586e-06, + "loss": 46.0781, + "step": 15203 + }, + { + "epoch": 0.1439213941556782, + "grad_norm": 585.8896484375, + "learning_rate": 1.9327091654063067e-06, + "loss": 33.3594, + "step": 15204 + }, + { + "epoch": 0.14393086017739326, + "grad_norm": 192.9053497314453, + "learning_rate": 1.932698108660469e-06, + "loss": 19.4688, + "step": 15205 + }, + { + "epoch": 0.1439403261991083, + "grad_norm": 322.32177734375, + "learning_rate": 1.9326870510379554e-06, + "loss": 10.8984, + "step": 15206 + }, + { + "epoch": 0.14394979222082335, + "grad_norm": 235.56895446777344, + "learning_rate": 1.9326759925387768e-06, + "loss": 11.9844, + "step": 15207 + }, + { + "epoch": 0.1439592582425384, + "grad_norm": 281.1301574707031, + "learning_rate": 1.932664933162943e-06, + "loss": 17.5781, + "step": 15208 + }, + { + "epoch": 0.14396872426425347, + "grad_norm": 334.0773010253906, + "learning_rate": 1.9326538729104657e-06, + "loss": 20.9688, + "step": 15209 + }, + { + "epoch": 0.1439781902859685, + "grad_norm": 246.84591674804688, + "learning_rate": 1.932642811781354e-06, + "loss": 22.125, + "step": 15210 + }, + { + "epoch": 0.14398765630768356, + "grad_norm": 764.5955810546875, + "learning_rate": 1.9326317497756185e-06, + "loss": 45.8359, + "step": 15211 + }, + { + "epoch": 0.14399712232939862, + "grad_norm": 291.10601806640625, + "learning_rate": 1.93262068689327e-06, + "loss": 30.5938, + "step": 15212 + }, + { + "epoch": 0.14400658835111368, + "grad_norm": 225.79205322265625, + "learning_rate": 1.932609623134319e-06, + "loss": 19.1328, + "step": 15213 + }, + { + "epoch": 0.14401605437282874, + "grad_norm": 3.3662750720977783, + "learning_rate": 1.9325985584987753e-06, + "loss": 0.8857, + "step": 15214 + }, + { + "epoch": 0.14402552039454378, + "grad_norm": 360.12835693359375, + "learning_rate": 1.9325874929866493e-06, + "loss": 20.0, + "step": 15215 + }, + { + "epoch": 0.14403498641625884, + "grad_norm": 475.2279052734375, + "learning_rate": 1.9325764265979525e-06, + "loss": 38.918, + "step": 15216 + }, + { + "epoch": 0.1440444524379739, + "grad_norm": 429.67559814453125, + "learning_rate": 1.932565359332694e-06, + "loss": 37.3438, + "step": 15217 + }, + { + "epoch": 0.14405391845968896, + "grad_norm": 344.075927734375, + "learning_rate": 1.932554291190885e-06, + "loss": 15.3711, + "step": 15218 + }, + { + "epoch": 0.144063384481404, + "grad_norm": 443.9195556640625, + "learning_rate": 1.9325432221725355e-06, + "loss": 21.8516, + "step": 15219 + }, + { + "epoch": 0.14407285050311905, + "grad_norm": 288.36474609375, + "learning_rate": 1.932532152277656e-06, + "loss": 21.8125, + "step": 15220 + }, + { + "epoch": 0.1440823165248341, + "grad_norm": 264.3172912597656, + "learning_rate": 1.932521081506257e-06, + "loss": 23.3125, + "step": 15221 + }, + { + "epoch": 0.14409178254654917, + "grad_norm": 369.1775207519531, + "learning_rate": 1.932510009858349e-06, + "loss": 16.9062, + "step": 15222 + }, + { + "epoch": 0.14410124856826423, + "grad_norm": 391.6280212402344, + "learning_rate": 1.932498937333942e-06, + "loss": 35.4219, + "step": 15223 + }, + { + "epoch": 0.14411071458997926, + "grad_norm": 579.010009765625, + "learning_rate": 1.932487863933047e-06, + "loss": 44.3594, + "step": 15224 + }, + { + "epoch": 0.14412018061169432, + "grad_norm": 1068.5322265625, + "learning_rate": 1.9324767896556735e-06, + "loss": 36.9492, + "step": 15225 + }, + { + "epoch": 0.14412964663340938, + "grad_norm": 933.8759155273438, + "learning_rate": 1.9324657145018328e-06, + "loss": 67.0625, + "step": 15226 + }, + { + "epoch": 0.14413911265512444, + "grad_norm": 593.1398315429688, + "learning_rate": 1.932454638471535e-06, + "loss": 78.2031, + "step": 15227 + }, + { + "epoch": 0.14414857867683947, + "grad_norm": 197.66744995117188, + "learning_rate": 1.9324435615647904e-06, + "loss": 13.7109, + "step": 15228 + }, + { + "epoch": 0.14415804469855453, + "grad_norm": 340.63165283203125, + "learning_rate": 1.93243248378161e-06, + "loss": 21.25, + "step": 15229 + }, + { + "epoch": 0.1441675107202696, + "grad_norm": 246.25238037109375, + "learning_rate": 1.932421405122003e-06, + "loss": 18.2969, + "step": 15230 + }, + { + "epoch": 0.14417697674198465, + "grad_norm": 308.67901611328125, + "learning_rate": 1.9324103255859808e-06, + "loss": 17.6992, + "step": 15231 + }, + { + "epoch": 0.1441864427636997, + "grad_norm": 442.6431579589844, + "learning_rate": 1.9323992451735537e-06, + "loss": 18.0469, + "step": 15232 + }, + { + "epoch": 0.14419590878541474, + "grad_norm": 1405.720947265625, + "learning_rate": 1.932388163884732e-06, + "loss": 43.5234, + "step": 15233 + }, + { + "epoch": 0.1442053748071298, + "grad_norm": 150.04327392578125, + "learning_rate": 1.932377081719526e-06, + "loss": 22.6523, + "step": 15234 + }, + { + "epoch": 0.14421484082884486, + "grad_norm": 427.5689697265625, + "learning_rate": 1.932365998677946e-06, + "loss": 33.4219, + "step": 15235 + }, + { + "epoch": 0.14422430685055992, + "grad_norm": 745.6260986328125, + "learning_rate": 1.932354914760003e-06, + "loss": 64.875, + "step": 15236 + }, + { + "epoch": 0.14423377287227496, + "grad_norm": 328.2194519042969, + "learning_rate": 1.9323438299657067e-06, + "loss": 18.7812, + "step": 15237 + }, + { + "epoch": 0.14424323889399002, + "grad_norm": 241.6074676513672, + "learning_rate": 1.932332744295068e-06, + "loss": 22.6719, + "step": 15238 + }, + { + "epoch": 0.14425270491570508, + "grad_norm": 344.4136657714844, + "learning_rate": 1.932321657748097e-06, + "loss": 33.8906, + "step": 15239 + }, + { + "epoch": 0.14426217093742014, + "grad_norm": 691.70703125, + "learning_rate": 1.9323105703248047e-06, + "loss": 29.3203, + "step": 15240 + }, + { + "epoch": 0.1442716369591352, + "grad_norm": 354.4599914550781, + "learning_rate": 1.9322994820252006e-06, + "loss": 9.4141, + "step": 15241 + }, + { + "epoch": 0.14428110298085023, + "grad_norm": 260.04888916015625, + "learning_rate": 1.9322883928492957e-06, + "loss": 10.3398, + "step": 15242 + }, + { + "epoch": 0.1442905690025653, + "grad_norm": 509.20556640625, + "learning_rate": 1.932277302797101e-06, + "loss": 31.5781, + "step": 15243 + }, + { + "epoch": 0.14430003502428035, + "grad_norm": 2.838557004928589, + "learning_rate": 1.9322662118686256e-06, + "loss": 0.7949, + "step": 15244 + }, + { + "epoch": 0.1443095010459954, + "grad_norm": 555.5860595703125, + "learning_rate": 1.932255120063881e-06, + "loss": 40.1016, + "step": 15245 + }, + { + "epoch": 0.14431896706771044, + "grad_norm": 715.8921508789062, + "learning_rate": 1.932244027382877e-06, + "loss": 76.6562, + "step": 15246 + }, + { + "epoch": 0.1443284330894255, + "grad_norm": 404.9414978027344, + "learning_rate": 1.9322329338256244e-06, + "loss": 30.1719, + "step": 15247 + }, + { + "epoch": 0.14433789911114056, + "grad_norm": 597.4869384765625, + "learning_rate": 1.932221839392133e-06, + "loss": 45.1719, + "step": 15248 + }, + { + "epoch": 0.14434736513285562, + "grad_norm": 3.5332748889923096, + "learning_rate": 1.9322107440824145e-06, + "loss": 1.0039, + "step": 15249 + }, + { + "epoch": 0.14435683115457068, + "grad_norm": 306.0921325683594, + "learning_rate": 1.932199647896478e-06, + "loss": 37.5781, + "step": 15250 + }, + { + "epoch": 0.1443662971762857, + "grad_norm": 270.4045104980469, + "learning_rate": 1.932188550834335e-06, + "loss": 16.3125, + "step": 15251 + }, + { + "epoch": 0.14437576319800077, + "grad_norm": 477.49835205078125, + "learning_rate": 1.9321774528959947e-06, + "loss": 54.8594, + "step": 15252 + }, + { + "epoch": 0.14438522921971583, + "grad_norm": 328.686767578125, + "learning_rate": 1.932166354081469e-06, + "loss": 25.3125, + "step": 15253 + }, + { + "epoch": 0.1443946952414309, + "grad_norm": 666.1035766601562, + "learning_rate": 1.932155254390767e-06, + "loss": 45.6875, + "step": 15254 + }, + { + "epoch": 0.14440416126314592, + "grad_norm": 153.6597137451172, + "learning_rate": 1.9321441538238997e-06, + "loss": 9.8906, + "step": 15255 + }, + { + "epoch": 0.14441362728486098, + "grad_norm": 151.93350219726562, + "learning_rate": 1.932133052380878e-06, + "loss": 22.0234, + "step": 15256 + }, + { + "epoch": 0.14442309330657604, + "grad_norm": 507.10968017578125, + "learning_rate": 1.9321219500617113e-06, + "loss": 30.6094, + "step": 15257 + }, + { + "epoch": 0.1444325593282911, + "grad_norm": 2.7130303382873535, + "learning_rate": 1.932110846866411e-06, + "loss": 0.9194, + "step": 15258 + }, + { + "epoch": 0.14444202535000616, + "grad_norm": 302.67071533203125, + "learning_rate": 1.9320997427949872e-06, + "loss": 17.3438, + "step": 15259 + }, + { + "epoch": 0.1444514913717212, + "grad_norm": 160.24560546875, + "learning_rate": 1.93208863784745e-06, + "loss": 19.4297, + "step": 15260 + }, + { + "epoch": 0.14446095739343626, + "grad_norm": 208.5304718017578, + "learning_rate": 1.9320775320238104e-06, + "loss": 10.1543, + "step": 15261 + }, + { + "epoch": 0.14447042341515132, + "grad_norm": 220.9407196044922, + "learning_rate": 1.932066425324078e-06, + "loss": 11.0391, + "step": 15262 + }, + { + "epoch": 0.14447988943686638, + "grad_norm": 304.44000244140625, + "learning_rate": 1.9320553177482644e-06, + "loss": 28.8672, + "step": 15263 + }, + { + "epoch": 0.1444893554585814, + "grad_norm": 283.9429016113281, + "learning_rate": 1.932044209296379e-06, + "loss": 10.3789, + "step": 15264 + }, + { + "epoch": 0.14449882148029647, + "grad_norm": 613.2890625, + "learning_rate": 1.9320330999684333e-06, + "loss": 65.6562, + "step": 15265 + }, + { + "epoch": 0.14450828750201153, + "grad_norm": 611.1076049804688, + "learning_rate": 1.932021989764436e-06, + "loss": 18.3008, + "step": 15266 + }, + { + "epoch": 0.1445177535237266, + "grad_norm": 1202.2972412109375, + "learning_rate": 1.9320108786844e-06, + "loss": 32.1328, + "step": 15267 + }, + { + "epoch": 0.14452721954544165, + "grad_norm": 408.3705749511719, + "learning_rate": 1.9319997667283332e-06, + "loss": 33.4609, + "step": 15268 + }, + { + "epoch": 0.14453668556715668, + "grad_norm": 728.7333374023438, + "learning_rate": 1.9319886538962482e-06, + "loss": 37.0703, + "step": 15269 + }, + { + "epoch": 0.14454615158887174, + "grad_norm": 358.7647399902344, + "learning_rate": 1.931977540188154e-06, + "loss": 37.1875, + "step": 15270 + }, + { + "epoch": 0.1445556176105868, + "grad_norm": 268.5113830566406, + "learning_rate": 1.931966425604062e-06, + "loss": 22.1016, + "step": 15271 + }, + { + "epoch": 0.14456508363230186, + "grad_norm": 2.757086753845215, + "learning_rate": 1.931955310143982e-06, + "loss": 0.8408, + "step": 15272 + }, + { + "epoch": 0.1445745496540169, + "grad_norm": 252.75143432617188, + "learning_rate": 1.931944193807924e-06, + "loss": 17.9883, + "step": 15273 + }, + { + "epoch": 0.14458401567573195, + "grad_norm": 235.9954833984375, + "learning_rate": 1.9319330765958998e-06, + "loss": 16.5391, + "step": 15274 + }, + { + "epoch": 0.144593481697447, + "grad_norm": 206.99440002441406, + "learning_rate": 1.9319219585079186e-06, + "loss": 20.1641, + "step": 15275 + }, + { + "epoch": 0.14460294771916207, + "grad_norm": 536.6063232421875, + "learning_rate": 1.9319108395439918e-06, + "loss": 34.5078, + "step": 15276 + }, + { + "epoch": 0.14461241374087713, + "grad_norm": 1250.3209228515625, + "learning_rate": 1.931899719704129e-06, + "loss": 63.7578, + "step": 15277 + }, + { + "epoch": 0.14462187976259216, + "grad_norm": 217.1771697998047, + "learning_rate": 1.9318885989883418e-06, + "loss": 17.8594, + "step": 15278 + }, + { + "epoch": 0.14463134578430722, + "grad_norm": 191.97279357910156, + "learning_rate": 1.9318774773966397e-06, + "loss": 21.8594, + "step": 15279 + }, + { + "epoch": 0.14464081180602228, + "grad_norm": 404.46795654296875, + "learning_rate": 1.931866354929033e-06, + "loss": 56.7109, + "step": 15280 + }, + { + "epoch": 0.14465027782773734, + "grad_norm": 263.25494384765625, + "learning_rate": 1.931855231585533e-06, + "loss": 20.2031, + "step": 15281 + }, + { + "epoch": 0.14465974384945238, + "grad_norm": 989.7471923828125, + "learning_rate": 1.9318441073661494e-06, + "loss": 49.8789, + "step": 15282 + }, + { + "epoch": 0.14466920987116744, + "grad_norm": 386.1849060058594, + "learning_rate": 1.931832982270893e-06, + "loss": 23.7969, + "step": 15283 + }, + { + "epoch": 0.1446786758928825, + "grad_norm": 1037.1513671875, + "learning_rate": 1.9318218562997742e-06, + "loss": 41.0625, + "step": 15284 + }, + { + "epoch": 0.14468814191459756, + "grad_norm": 292.0083923339844, + "learning_rate": 1.9318107294528036e-06, + "loss": 16.2812, + "step": 15285 + }, + { + "epoch": 0.14469760793631262, + "grad_norm": 197.6056365966797, + "learning_rate": 1.9317996017299912e-06, + "loss": 12.5781, + "step": 15286 + }, + { + "epoch": 0.14470707395802765, + "grad_norm": 3.1623575687408447, + "learning_rate": 1.9317884731313484e-06, + "loss": 1.0049, + "step": 15287 + }, + { + "epoch": 0.1447165399797427, + "grad_norm": 288.075927734375, + "learning_rate": 1.9317773436568846e-06, + "loss": 20.9688, + "step": 15288 + }, + { + "epoch": 0.14472600600145777, + "grad_norm": 402.6111145019531, + "learning_rate": 1.931766213306611e-06, + "loss": 33.3047, + "step": 15289 + }, + { + "epoch": 0.14473547202317283, + "grad_norm": 2.9730167388916016, + "learning_rate": 1.9317550820805374e-06, + "loss": 0.7891, + "step": 15290 + }, + { + "epoch": 0.1447449380448879, + "grad_norm": 263.28118896484375, + "learning_rate": 1.931743949978675e-06, + "loss": 15.6133, + "step": 15291 + }, + { + "epoch": 0.14475440406660292, + "grad_norm": 2.9477014541625977, + "learning_rate": 1.931732817001034e-06, + "loss": 0.9399, + "step": 15292 + }, + { + "epoch": 0.14476387008831798, + "grad_norm": 1176.673583984375, + "learning_rate": 1.9317216831476243e-06, + "loss": 61.4062, + "step": 15293 + }, + { + "epoch": 0.14477333611003304, + "grad_norm": 350.12469482421875, + "learning_rate": 1.931710548418457e-06, + "loss": 22.3438, + "step": 15294 + }, + { + "epoch": 0.1447828021317481, + "grad_norm": 550.4080200195312, + "learning_rate": 1.9316994128135426e-06, + "loss": 18.7578, + "step": 15295 + }, + { + "epoch": 0.14479226815346313, + "grad_norm": 884.8142700195312, + "learning_rate": 1.931688276332891e-06, + "loss": 39.4688, + "step": 15296 + }, + { + "epoch": 0.1448017341751782, + "grad_norm": 800.2797241210938, + "learning_rate": 1.9316771389765133e-06, + "loss": 27.8398, + "step": 15297 + }, + { + "epoch": 0.14481120019689325, + "grad_norm": 404.05255126953125, + "learning_rate": 1.9316660007444195e-06, + "loss": 38.8867, + "step": 15298 + }, + { + "epoch": 0.1448206662186083, + "grad_norm": 339.6365966796875, + "learning_rate": 1.9316548616366208e-06, + "loss": 37.5234, + "step": 15299 + }, + { + "epoch": 0.14483013224032337, + "grad_norm": 1048.458984375, + "learning_rate": 1.9316437216531263e-06, + "loss": 61.7109, + "step": 15300 + }, + { + "epoch": 0.1448395982620384, + "grad_norm": 328.7762451171875, + "learning_rate": 1.931632580793948e-06, + "loss": 22.5703, + "step": 15301 + }, + { + "epoch": 0.14484906428375346, + "grad_norm": 255.71669006347656, + "learning_rate": 1.931621439059095e-06, + "loss": 17.8438, + "step": 15302 + }, + { + "epoch": 0.14485853030546852, + "grad_norm": 267.7637634277344, + "learning_rate": 1.9316102964485793e-06, + "loss": 27.6562, + "step": 15303 + }, + { + "epoch": 0.14486799632718358, + "grad_norm": 245.42233276367188, + "learning_rate": 1.93159915296241e-06, + "loss": 14.6016, + "step": 15304 + }, + { + "epoch": 0.14487746234889862, + "grad_norm": 2.9895009994506836, + "learning_rate": 1.9315880086005984e-06, + "loss": 0.9517, + "step": 15305 + }, + { + "epoch": 0.14488692837061368, + "grad_norm": 905.8873901367188, + "learning_rate": 1.9315768633631543e-06, + "loss": 52.4062, + "step": 15306 + }, + { + "epoch": 0.14489639439232874, + "grad_norm": 292.3561096191406, + "learning_rate": 1.9315657172500885e-06, + "loss": 30.6094, + "step": 15307 + }, + { + "epoch": 0.1449058604140438, + "grad_norm": 180.87374877929688, + "learning_rate": 1.931554570261412e-06, + "loss": 23.8906, + "step": 15308 + }, + { + "epoch": 0.14491532643575886, + "grad_norm": 284.0401916503906, + "learning_rate": 1.9315434223971346e-06, + "loss": 16.0508, + "step": 15309 + }, + { + "epoch": 0.1449247924574739, + "grad_norm": 400.8475036621094, + "learning_rate": 1.9315322736572668e-06, + "loss": 50.0469, + "step": 15310 + }, + { + "epoch": 0.14493425847918895, + "grad_norm": 3.252338171005249, + "learning_rate": 1.9315211240418193e-06, + "loss": 0.9819, + "step": 15311 + }, + { + "epoch": 0.144943724500904, + "grad_norm": 643.2327880859375, + "learning_rate": 1.9315099735508028e-06, + "loss": 30.6953, + "step": 15312 + }, + { + "epoch": 0.14495319052261907, + "grad_norm": 322.9156494140625, + "learning_rate": 1.9314988221842274e-06, + "loss": 25.6719, + "step": 15313 + }, + { + "epoch": 0.1449626565443341, + "grad_norm": 633.7265014648438, + "learning_rate": 1.9314876699421036e-06, + "loss": 38.8125, + "step": 15314 + }, + { + "epoch": 0.14497212256604916, + "grad_norm": 311.7604675292969, + "learning_rate": 1.931476516824442e-06, + "loss": 18.1875, + "step": 15315 + }, + { + "epoch": 0.14498158858776422, + "grad_norm": 334.6456298828125, + "learning_rate": 1.9314653628312536e-06, + "loss": 31.5703, + "step": 15316 + }, + { + "epoch": 0.14499105460947928, + "grad_norm": 475.5924072265625, + "learning_rate": 1.9314542079625476e-06, + "loss": 36.75, + "step": 15317 + }, + { + "epoch": 0.14500052063119434, + "grad_norm": 3.6958518028259277, + "learning_rate": 1.9314430522183356e-06, + "loss": 0.8772, + "step": 15318 + }, + { + "epoch": 0.14500998665290937, + "grad_norm": 296.15545654296875, + "learning_rate": 1.9314318955986274e-06, + "loss": 21.0391, + "step": 15319 + }, + { + "epoch": 0.14501945267462443, + "grad_norm": 274.33612060546875, + "learning_rate": 1.9314207381034345e-06, + "loss": 9.7383, + "step": 15320 + }, + { + "epoch": 0.1450289186963395, + "grad_norm": 172.7273712158203, + "learning_rate": 1.931409579732766e-06, + "loss": 20.8359, + "step": 15321 + }, + { + "epoch": 0.14503838471805455, + "grad_norm": 515.306884765625, + "learning_rate": 1.9313984204866336e-06, + "loss": 35.2656, + "step": 15322 + }, + { + "epoch": 0.14504785073976958, + "grad_norm": 345.9835205078125, + "learning_rate": 1.931387260365047e-06, + "loss": 43.0312, + "step": 15323 + }, + { + "epoch": 0.14505731676148464, + "grad_norm": 823.2337646484375, + "learning_rate": 1.9313760993680172e-06, + "loss": 18.5508, + "step": 15324 + }, + { + "epoch": 0.1450667827831997, + "grad_norm": 538.2637329101562, + "learning_rate": 1.9313649374955537e-06, + "loss": 51.9297, + "step": 15325 + }, + { + "epoch": 0.14507624880491476, + "grad_norm": 324.6404724121094, + "learning_rate": 1.931353774747669e-06, + "loss": 24.0859, + "step": 15326 + }, + { + "epoch": 0.14508571482662982, + "grad_norm": 1087.0177001953125, + "learning_rate": 1.9313426111243713e-06, + "loss": 19.8633, + "step": 15327 + }, + { + "epoch": 0.14509518084834486, + "grad_norm": 385.94769287109375, + "learning_rate": 1.9313314466256723e-06, + "loss": 14.5312, + "step": 15328 + }, + { + "epoch": 0.14510464687005992, + "grad_norm": 218.30252075195312, + "learning_rate": 1.9313202812515826e-06, + "loss": 25.75, + "step": 15329 + }, + { + "epoch": 0.14511411289177498, + "grad_norm": 281.4032287597656, + "learning_rate": 1.9313091150021123e-06, + "loss": 9.6641, + "step": 15330 + }, + { + "epoch": 0.14512357891349004, + "grad_norm": 728.920654296875, + "learning_rate": 1.9312979478772723e-06, + "loss": 14.4961, + "step": 15331 + }, + { + "epoch": 0.14513304493520507, + "grad_norm": 580.5492553710938, + "learning_rate": 1.9312867798770726e-06, + "loss": 9.0742, + "step": 15332 + }, + { + "epoch": 0.14514251095692013, + "grad_norm": 377.46063232421875, + "learning_rate": 1.931275611001524e-06, + "loss": 36.6797, + "step": 15333 + }, + { + "epoch": 0.1451519769786352, + "grad_norm": 641.3618774414062, + "learning_rate": 1.9312644412506363e-06, + "loss": 21.5117, + "step": 15334 + }, + { + "epoch": 0.14516144300035025, + "grad_norm": 268.2963562011719, + "learning_rate": 1.9312532706244214e-06, + "loss": 25.9219, + "step": 15335 + }, + { + "epoch": 0.1451709090220653, + "grad_norm": 615.53271484375, + "learning_rate": 1.9312420991228886e-06, + "loss": 35.7734, + "step": 15336 + }, + { + "epoch": 0.14518037504378034, + "grad_norm": 586.6865234375, + "learning_rate": 1.931230926746049e-06, + "loss": 33.7656, + "step": 15337 + }, + { + "epoch": 0.1451898410654954, + "grad_norm": 1269.653076171875, + "learning_rate": 1.9312197534939128e-06, + "loss": 28.4805, + "step": 15338 + }, + { + "epoch": 0.14519930708721046, + "grad_norm": 419.89361572265625, + "learning_rate": 1.9312085793664905e-06, + "loss": 39.4844, + "step": 15339 + }, + { + "epoch": 0.14520877310892552, + "grad_norm": 378.0401916503906, + "learning_rate": 1.9311974043637928e-06, + "loss": 25.6016, + "step": 15340 + }, + { + "epoch": 0.14521823913064055, + "grad_norm": 341.60369873046875, + "learning_rate": 1.9311862284858304e-06, + "loss": 42.125, + "step": 15341 + }, + { + "epoch": 0.1452277051523556, + "grad_norm": 772.61083984375, + "learning_rate": 1.9311750517326133e-06, + "loss": 52.3984, + "step": 15342 + }, + { + "epoch": 0.14523717117407067, + "grad_norm": 604.82421875, + "learning_rate": 1.931163874104152e-06, + "loss": 31.7188, + "step": 15343 + }, + { + "epoch": 0.14524663719578573, + "grad_norm": 467.3868103027344, + "learning_rate": 1.9311526956004577e-06, + "loss": 52.8438, + "step": 15344 + }, + { + "epoch": 0.1452561032175008, + "grad_norm": 153.61558532714844, + "learning_rate": 1.93114151622154e-06, + "loss": 10.9844, + "step": 15345 + }, + { + "epoch": 0.14526556923921582, + "grad_norm": 318.7857971191406, + "learning_rate": 1.93113033596741e-06, + "loss": 36.2812, + "step": 15346 + }, + { + "epoch": 0.14527503526093088, + "grad_norm": 3.182764768600464, + "learning_rate": 1.931119154838078e-06, + "loss": 0.8599, + "step": 15347 + }, + { + "epoch": 0.14528450128264594, + "grad_norm": 288.125, + "learning_rate": 1.9311079728335547e-06, + "loss": 20.1406, + "step": 15348 + }, + { + "epoch": 0.145293967304361, + "grad_norm": 625.4888305664062, + "learning_rate": 1.9310967899538503e-06, + "loss": 50.4844, + "step": 15349 + }, + { + "epoch": 0.14530343332607604, + "grad_norm": 3.9516122341156006, + "learning_rate": 1.9310856061989758e-06, + "loss": 1.0571, + "step": 15350 + }, + { + "epoch": 0.1453128993477911, + "grad_norm": 373.8796691894531, + "learning_rate": 1.9310744215689413e-06, + "loss": 20.625, + "step": 15351 + }, + { + "epoch": 0.14532236536950616, + "grad_norm": 364.927734375, + "learning_rate": 1.9310632360637575e-06, + "loss": 27.7344, + "step": 15352 + }, + { + "epoch": 0.14533183139122122, + "grad_norm": 1086.2034912109375, + "learning_rate": 1.9310520496834345e-06, + "loss": 27.9258, + "step": 15353 + }, + { + "epoch": 0.14534129741293628, + "grad_norm": 535.2662963867188, + "learning_rate": 1.9310408624279833e-06, + "loss": 40.5391, + "step": 15354 + }, + { + "epoch": 0.1453507634346513, + "grad_norm": 397.5802307128906, + "learning_rate": 1.9310296742974145e-06, + "loss": 34.0156, + "step": 15355 + }, + { + "epoch": 0.14536022945636637, + "grad_norm": 418.2440490722656, + "learning_rate": 1.931018485291738e-06, + "loss": 21.2266, + "step": 15356 + }, + { + "epoch": 0.14536969547808143, + "grad_norm": 399.5590515136719, + "learning_rate": 1.931007295410965e-06, + "loss": 36.0312, + "step": 15357 + }, + { + "epoch": 0.1453791614997965, + "grad_norm": 3.081341028213501, + "learning_rate": 1.9309961046551055e-06, + "loss": 1.0566, + "step": 15358 + }, + { + "epoch": 0.14538862752151152, + "grad_norm": 398.389404296875, + "learning_rate": 1.93098491302417e-06, + "loss": 48.6719, + "step": 15359 + }, + { + "epoch": 0.14539809354322658, + "grad_norm": 420.65216064453125, + "learning_rate": 1.9309737205181696e-06, + "loss": 51.9531, + "step": 15360 + }, + { + "epoch": 0.14540755956494164, + "grad_norm": 212.00119018554688, + "learning_rate": 1.9309625271371142e-06, + "loss": 16.6172, + "step": 15361 + }, + { + "epoch": 0.1454170255866567, + "grad_norm": 145.61851501464844, + "learning_rate": 1.930951332881015e-06, + "loss": 25.5391, + "step": 15362 + }, + { + "epoch": 0.14542649160837176, + "grad_norm": 668.5740966796875, + "learning_rate": 1.930940137749882e-06, + "loss": 43.4844, + "step": 15363 + }, + { + "epoch": 0.1454359576300868, + "grad_norm": 514.5537109375, + "learning_rate": 1.9309289417437257e-06, + "loss": 15.9688, + "step": 15364 + }, + { + "epoch": 0.14544542365180185, + "grad_norm": 398.42425537109375, + "learning_rate": 1.930917744862557e-06, + "loss": 26.2422, + "step": 15365 + }, + { + "epoch": 0.1454548896735169, + "grad_norm": 295.0373229980469, + "learning_rate": 1.930906547106386e-06, + "loss": 23.6875, + "step": 15366 + }, + { + "epoch": 0.14546435569523197, + "grad_norm": 491.82635498046875, + "learning_rate": 1.9308953484752233e-06, + "loss": 45.625, + "step": 15367 + }, + { + "epoch": 0.145473821716947, + "grad_norm": 446.44293212890625, + "learning_rate": 1.9308841489690795e-06, + "loss": 36.7188, + "step": 15368 + }, + { + "epoch": 0.14548328773866206, + "grad_norm": 309.190185546875, + "learning_rate": 1.930872948587965e-06, + "loss": 11.1211, + "step": 15369 + }, + { + "epoch": 0.14549275376037712, + "grad_norm": 336.8580627441406, + "learning_rate": 1.9308617473318906e-06, + "loss": 22.6641, + "step": 15370 + }, + { + "epoch": 0.14550221978209218, + "grad_norm": 295.9739990234375, + "learning_rate": 1.930850545200867e-06, + "loss": 17.1328, + "step": 15371 + }, + { + "epoch": 0.14551168580380724, + "grad_norm": 256.2739562988281, + "learning_rate": 1.9308393421949044e-06, + "loss": 19.8672, + "step": 15372 + }, + { + "epoch": 0.14552115182552228, + "grad_norm": 418.40032958984375, + "learning_rate": 1.9308281383140133e-06, + "loss": 48.5938, + "step": 15373 + }, + { + "epoch": 0.14553061784723734, + "grad_norm": 830.4214477539062, + "learning_rate": 1.930816933558204e-06, + "loss": 16.2148, + "step": 15374 + }, + { + "epoch": 0.1455400838689524, + "grad_norm": 304.38629150390625, + "learning_rate": 1.930805727927488e-06, + "loss": 16.125, + "step": 15375 + }, + { + "epoch": 0.14554954989066746, + "grad_norm": 542.42724609375, + "learning_rate": 1.9307945214218746e-06, + "loss": 41.8594, + "step": 15376 + }, + { + "epoch": 0.14555901591238252, + "grad_norm": 353.9496154785156, + "learning_rate": 1.930783314041375e-06, + "loss": 11.9414, + "step": 15377 + }, + { + "epoch": 0.14556848193409755, + "grad_norm": 289.4814453125, + "learning_rate": 1.9307721057859997e-06, + "loss": 23.1289, + "step": 15378 + }, + { + "epoch": 0.1455779479558126, + "grad_norm": 2317.79443359375, + "learning_rate": 1.930760896655759e-06, + "loss": 20.2891, + "step": 15379 + }, + { + "epoch": 0.14558741397752767, + "grad_norm": 520.6400756835938, + "learning_rate": 1.930749686650664e-06, + "loss": 50.9531, + "step": 15380 + }, + { + "epoch": 0.14559687999924273, + "grad_norm": 310.4266357421875, + "learning_rate": 1.930738475770725e-06, + "loss": 17.2266, + "step": 15381 + }, + { + "epoch": 0.14560634602095776, + "grad_norm": 138.8509063720703, + "learning_rate": 1.930727264015952e-06, + "loss": 13.9688, + "step": 15382 + }, + { + "epoch": 0.14561581204267282, + "grad_norm": 238.36622619628906, + "learning_rate": 1.9307160513863558e-06, + "loss": 19.7422, + "step": 15383 + }, + { + "epoch": 0.14562527806438788, + "grad_norm": 172.1270294189453, + "learning_rate": 1.930704837881947e-06, + "loss": 16.5625, + "step": 15384 + }, + { + "epoch": 0.14563474408610294, + "grad_norm": 174.24119567871094, + "learning_rate": 1.9306936235027363e-06, + "loss": 16.4766, + "step": 15385 + }, + { + "epoch": 0.145644210107818, + "grad_norm": 281.9092712402344, + "learning_rate": 1.9306824082487343e-06, + "loss": 16.6914, + "step": 15386 + }, + { + "epoch": 0.14565367612953303, + "grad_norm": 460.2672119140625, + "learning_rate": 1.9306711921199514e-06, + "loss": 17.0234, + "step": 15387 + }, + { + "epoch": 0.1456631421512481, + "grad_norm": 348.0470275878906, + "learning_rate": 1.930659975116398e-06, + "loss": 27.9297, + "step": 15388 + }, + { + "epoch": 0.14567260817296315, + "grad_norm": 313.9343566894531, + "learning_rate": 1.930648757238085e-06, + "loss": 14.9219, + "step": 15389 + }, + { + "epoch": 0.1456820741946782, + "grad_norm": 223.55345153808594, + "learning_rate": 1.9306375384850223e-06, + "loss": 20.0078, + "step": 15390 + }, + { + "epoch": 0.14569154021639324, + "grad_norm": 350.4989318847656, + "learning_rate": 1.9306263188572212e-06, + "loss": 44.5117, + "step": 15391 + }, + { + "epoch": 0.1457010062381083, + "grad_norm": 232.56321716308594, + "learning_rate": 1.9306150983546918e-06, + "loss": 12.8008, + "step": 15392 + }, + { + "epoch": 0.14571047225982336, + "grad_norm": 192.4589385986328, + "learning_rate": 1.9306038769774445e-06, + "loss": 17.8086, + "step": 15393 + }, + { + "epoch": 0.14571993828153842, + "grad_norm": 2.910679817199707, + "learning_rate": 1.9305926547254905e-06, + "loss": 0.9121, + "step": 15394 + }, + { + "epoch": 0.14572940430325348, + "grad_norm": 1373.914306640625, + "learning_rate": 1.93058143159884e-06, + "loss": 8.0703, + "step": 15395 + }, + { + "epoch": 0.14573887032496852, + "grad_norm": 258.8292236328125, + "learning_rate": 1.930570207597503e-06, + "loss": 20.8672, + "step": 15396 + }, + { + "epoch": 0.14574833634668358, + "grad_norm": 401.4604797363281, + "learning_rate": 1.9305589827214907e-06, + "loss": 36.875, + "step": 15397 + }, + { + "epoch": 0.14575780236839864, + "grad_norm": 487.74658203125, + "learning_rate": 1.9305477569708137e-06, + "loss": 44.3281, + "step": 15398 + }, + { + "epoch": 0.1457672683901137, + "grad_norm": 191.20655822753906, + "learning_rate": 1.930536530345482e-06, + "loss": 20.6484, + "step": 15399 + }, + { + "epoch": 0.14577673441182873, + "grad_norm": 312.29095458984375, + "learning_rate": 1.9305253028455067e-06, + "loss": 23.9844, + "step": 15400 + }, + { + "epoch": 0.1457862004335438, + "grad_norm": 406.6753845214844, + "learning_rate": 1.9305140744708982e-06, + "loss": 22.25, + "step": 15401 + }, + { + "epoch": 0.14579566645525885, + "grad_norm": 333.5839538574219, + "learning_rate": 1.9305028452216673e-06, + "loss": 18.1406, + "step": 15402 + }, + { + "epoch": 0.1458051324769739, + "grad_norm": 232.5741424560547, + "learning_rate": 1.9304916150978236e-06, + "loss": 23.4766, + "step": 15403 + }, + { + "epoch": 0.14581459849868897, + "grad_norm": 337.94671630859375, + "learning_rate": 1.930480384099379e-06, + "loss": 10.1836, + "step": 15404 + }, + { + "epoch": 0.145824064520404, + "grad_norm": 2.7893576622009277, + "learning_rate": 1.930469152226343e-06, + "loss": 0.832, + "step": 15405 + }, + { + "epoch": 0.14583353054211906, + "grad_norm": 440.8857116699219, + "learning_rate": 1.9304579194787264e-06, + "loss": 28.7969, + "step": 15406 + }, + { + "epoch": 0.14584299656383412, + "grad_norm": 587.2883911132812, + "learning_rate": 1.93044668585654e-06, + "loss": 43.3984, + "step": 15407 + }, + { + "epoch": 0.14585246258554918, + "grad_norm": 422.9544982910156, + "learning_rate": 1.9304354513597943e-06, + "loss": 28.2188, + "step": 15408 + }, + { + "epoch": 0.1458619286072642, + "grad_norm": 320.99261474609375, + "learning_rate": 1.9304242159884998e-06, + "loss": 24.75, + "step": 15409 + }, + { + "epoch": 0.14587139462897927, + "grad_norm": 289.92755126953125, + "learning_rate": 1.930412979742667e-06, + "loss": 19.8594, + "step": 15410 + }, + { + "epoch": 0.14588086065069433, + "grad_norm": 3.1352522373199463, + "learning_rate": 1.9304017426223066e-06, + "loss": 0.9209, + "step": 15411 + }, + { + "epoch": 0.1458903266724094, + "grad_norm": 307.08514404296875, + "learning_rate": 1.930390504627429e-06, + "loss": 33.5312, + "step": 15412 + }, + { + "epoch": 0.14589979269412445, + "grad_norm": 446.5391845703125, + "learning_rate": 1.930379265758045e-06, + "loss": 44.9688, + "step": 15413 + }, + { + "epoch": 0.14590925871583948, + "grad_norm": 166.76451110839844, + "learning_rate": 1.930368026014165e-06, + "loss": 14.3906, + "step": 15414 + }, + { + "epoch": 0.14591872473755454, + "grad_norm": 215.72486877441406, + "learning_rate": 1.9303567853957995e-06, + "loss": 22.0781, + "step": 15415 + }, + { + "epoch": 0.1459281907592696, + "grad_norm": 373.6656494140625, + "learning_rate": 1.9303455439029593e-06, + "loss": 22.1602, + "step": 15416 + }, + { + "epoch": 0.14593765678098466, + "grad_norm": 659.1682739257812, + "learning_rate": 1.9303343015356545e-06, + "loss": 27.3906, + "step": 15417 + }, + { + "epoch": 0.1459471228026997, + "grad_norm": 2.9057343006134033, + "learning_rate": 1.9303230582938963e-06, + "loss": 0.7339, + "step": 15418 + }, + { + "epoch": 0.14595658882441476, + "grad_norm": 492.9624938964844, + "learning_rate": 1.9303118141776946e-06, + "loss": 41.1406, + "step": 15419 + }, + { + "epoch": 0.14596605484612982, + "grad_norm": 463.8767395019531, + "learning_rate": 1.9303005691870607e-06, + "loss": 21.0938, + "step": 15420 + }, + { + "epoch": 0.14597552086784488, + "grad_norm": 355.8436279296875, + "learning_rate": 1.9302893233220045e-06, + "loss": 27.2656, + "step": 15421 + }, + { + "epoch": 0.14598498688955994, + "grad_norm": 520.2819213867188, + "learning_rate": 1.9302780765825372e-06, + "loss": 13.832, + "step": 15422 + }, + { + "epoch": 0.14599445291127497, + "grad_norm": 332.2615051269531, + "learning_rate": 1.9302668289686685e-06, + "loss": 26.5312, + "step": 15423 + }, + { + "epoch": 0.14600391893299003, + "grad_norm": 664.0385131835938, + "learning_rate": 1.9302555804804098e-06, + "loss": 52.0625, + "step": 15424 + }, + { + "epoch": 0.1460133849547051, + "grad_norm": 509.80145263671875, + "learning_rate": 1.9302443311177716e-06, + "loss": 48.7969, + "step": 15425 + }, + { + "epoch": 0.14602285097642015, + "grad_norm": 518.6346435546875, + "learning_rate": 1.930233080880764e-06, + "loss": 25.1328, + "step": 15426 + }, + { + "epoch": 0.14603231699813518, + "grad_norm": 254.66073608398438, + "learning_rate": 1.9302218297693973e-06, + "loss": 22.8516, + "step": 15427 + }, + { + "epoch": 0.14604178301985024, + "grad_norm": 369.2476806640625, + "learning_rate": 1.9302105777836832e-06, + "loss": 23.2734, + "step": 15428 + }, + { + "epoch": 0.1460512490415653, + "grad_norm": 554.47607421875, + "learning_rate": 1.930199324923632e-06, + "loss": 39.7422, + "step": 15429 + }, + { + "epoch": 0.14606071506328036, + "grad_norm": 295.1675720214844, + "learning_rate": 1.930188071189253e-06, + "loss": 24.3438, + "step": 15430 + }, + { + "epoch": 0.14607018108499542, + "grad_norm": 303.25250244140625, + "learning_rate": 1.9301768165805584e-06, + "loss": 18.3516, + "step": 15431 + }, + { + "epoch": 0.14607964710671045, + "grad_norm": 359.349609375, + "learning_rate": 1.9301655610975584e-06, + "loss": 22.6172, + "step": 15432 + }, + { + "epoch": 0.1460891131284255, + "grad_norm": 621.8168334960938, + "learning_rate": 1.9301543047402628e-06, + "loss": 36.168, + "step": 15433 + }, + { + "epoch": 0.14609857915014057, + "grad_norm": 683.5079956054688, + "learning_rate": 1.9301430475086826e-06, + "loss": 36.8516, + "step": 15434 + }, + { + "epoch": 0.14610804517185563, + "grad_norm": 336.6072082519531, + "learning_rate": 1.9301317894028285e-06, + "loss": 22.0234, + "step": 15435 + }, + { + "epoch": 0.14611751119357066, + "grad_norm": 179.83294677734375, + "learning_rate": 1.930120530422711e-06, + "loss": 16.3672, + "step": 15436 + }, + { + "epoch": 0.14612697721528572, + "grad_norm": 279.71673583984375, + "learning_rate": 1.930109270568341e-06, + "loss": 20.1367, + "step": 15437 + }, + { + "epoch": 0.14613644323700078, + "grad_norm": 222.47967529296875, + "learning_rate": 1.9300980098397284e-06, + "loss": 22.3359, + "step": 15438 + }, + { + "epoch": 0.14614590925871584, + "grad_norm": 179.10020446777344, + "learning_rate": 1.9300867482368844e-06, + "loss": 12.668, + "step": 15439 + }, + { + "epoch": 0.1461553752804309, + "grad_norm": 341.98583984375, + "learning_rate": 1.9300754857598193e-06, + "loss": 40.9297, + "step": 15440 + }, + { + "epoch": 0.14616484130214594, + "grad_norm": 2.586590528488159, + "learning_rate": 1.930064222408544e-06, + "loss": 0.8491, + "step": 15441 + }, + { + "epoch": 0.146174307323861, + "grad_norm": 639.0401611328125, + "learning_rate": 1.9300529581830686e-06, + "loss": 42.3438, + "step": 15442 + }, + { + "epoch": 0.14618377334557606, + "grad_norm": 287.8226318359375, + "learning_rate": 1.9300416930834036e-06, + "loss": 24.9141, + "step": 15443 + }, + { + "epoch": 0.14619323936729112, + "grad_norm": 1024.8544921875, + "learning_rate": 1.9300304271095604e-06, + "loss": 22.1641, + "step": 15444 + }, + { + "epoch": 0.14620270538900615, + "grad_norm": 275.9740905761719, + "learning_rate": 1.930019160261549e-06, + "loss": 40.2578, + "step": 15445 + }, + { + "epoch": 0.1462121714107212, + "grad_norm": 228.43661499023438, + "learning_rate": 1.93000789253938e-06, + "loss": 9.4648, + "step": 15446 + }, + { + "epoch": 0.14622163743243627, + "grad_norm": 169.8919219970703, + "learning_rate": 1.929996623943064e-06, + "loss": 19.8203, + "step": 15447 + }, + { + "epoch": 0.14623110345415133, + "grad_norm": 443.593994140625, + "learning_rate": 1.929985354472612e-06, + "loss": 21.3594, + "step": 15448 + }, + { + "epoch": 0.1462405694758664, + "grad_norm": 184.52334594726562, + "learning_rate": 1.929974084128034e-06, + "loss": 20.625, + "step": 15449 + }, + { + "epoch": 0.14625003549758142, + "grad_norm": 3.3386003971099854, + "learning_rate": 1.9299628129093406e-06, + "loss": 1.0498, + "step": 15450 + }, + { + "epoch": 0.14625950151929648, + "grad_norm": 609.5357055664062, + "learning_rate": 1.9299515408165433e-06, + "loss": 11.7891, + "step": 15451 + }, + { + "epoch": 0.14626896754101154, + "grad_norm": 659.7943725585938, + "learning_rate": 1.9299402678496515e-06, + "loss": 46.5938, + "step": 15452 + }, + { + "epoch": 0.1462784335627266, + "grad_norm": 632.5967407226562, + "learning_rate": 1.929928994008677e-06, + "loss": 25.4375, + "step": 15453 + }, + { + "epoch": 0.14628789958444163, + "grad_norm": 461.90093994140625, + "learning_rate": 1.929917719293629e-06, + "loss": 32.2812, + "step": 15454 + }, + { + "epoch": 0.1462973656061567, + "grad_norm": 404.2304992675781, + "learning_rate": 1.9299064437045194e-06, + "loss": 26.7656, + "step": 15455 + }, + { + "epoch": 0.14630683162787175, + "grad_norm": 210.68089294433594, + "learning_rate": 1.9298951672413577e-06, + "loss": 24.1406, + "step": 15456 + }, + { + "epoch": 0.1463162976495868, + "grad_norm": 328.6700134277344, + "learning_rate": 1.9298838899041554e-06, + "loss": 19.7578, + "step": 15457 + }, + { + "epoch": 0.14632576367130187, + "grad_norm": 262.3499450683594, + "learning_rate": 1.9298726116929227e-06, + "loss": 18.1797, + "step": 15458 + }, + { + "epoch": 0.1463352296930169, + "grad_norm": 338.2308044433594, + "learning_rate": 1.92986133260767e-06, + "loss": 18.6016, + "step": 15459 + }, + { + "epoch": 0.14634469571473196, + "grad_norm": 3.009578227996826, + "learning_rate": 1.9298500526484084e-06, + "loss": 0.7612, + "step": 15460 + }, + { + "epoch": 0.14635416173644702, + "grad_norm": 243.5596466064453, + "learning_rate": 1.9298387718151484e-06, + "loss": 10.9531, + "step": 15461 + }, + { + "epoch": 0.14636362775816208, + "grad_norm": 375.3368835449219, + "learning_rate": 1.9298274901079003e-06, + "loss": 44.0, + "step": 15462 + }, + { + "epoch": 0.14637309377987714, + "grad_norm": 189.6853485107422, + "learning_rate": 1.9298162075266747e-06, + "loss": 22.1562, + "step": 15463 + }, + { + "epoch": 0.14638255980159218, + "grad_norm": 555.4299926757812, + "learning_rate": 1.929804924071482e-06, + "loss": 29.0742, + "step": 15464 + }, + { + "epoch": 0.14639202582330724, + "grad_norm": 420.4630126953125, + "learning_rate": 1.9297936397423338e-06, + "loss": 35.3281, + "step": 15465 + }, + { + "epoch": 0.1464014918450223, + "grad_norm": 246.65846252441406, + "learning_rate": 1.92978235453924e-06, + "loss": 11.4668, + "step": 15466 + }, + { + "epoch": 0.14641095786673736, + "grad_norm": 376.1831970214844, + "learning_rate": 1.929771068462211e-06, + "loss": 21.6328, + "step": 15467 + }, + { + "epoch": 0.1464204238884524, + "grad_norm": 819.1104736328125, + "learning_rate": 1.929759781511258e-06, + "loss": 39.9531, + "step": 15468 + }, + { + "epoch": 0.14642988991016745, + "grad_norm": 306.78387451171875, + "learning_rate": 1.929748493686391e-06, + "loss": 17.2969, + "step": 15469 + }, + { + "epoch": 0.1464393559318825, + "grad_norm": 556.6681518554688, + "learning_rate": 1.9297372049876207e-06, + "loss": 50.0234, + "step": 15470 + }, + { + "epoch": 0.14644882195359757, + "grad_norm": 363.2102355957031, + "learning_rate": 1.9297259154149585e-06, + "loss": 21.5312, + "step": 15471 + }, + { + "epoch": 0.14645828797531263, + "grad_norm": 191.63482666015625, + "learning_rate": 1.929714624968414e-06, + "loss": 15.4219, + "step": 15472 + }, + { + "epoch": 0.14646775399702766, + "grad_norm": 389.99542236328125, + "learning_rate": 1.9297033336479985e-06, + "loss": 33.4844, + "step": 15473 + }, + { + "epoch": 0.14647722001874272, + "grad_norm": 222.23306274414062, + "learning_rate": 1.9296920414537223e-06, + "loss": 21.6797, + "step": 15474 + }, + { + "epoch": 0.14648668604045778, + "grad_norm": 272.63897705078125, + "learning_rate": 1.929680748385596e-06, + "loss": 29.1719, + "step": 15475 + }, + { + "epoch": 0.14649615206217284, + "grad_norm": 489.9247131347656, + "learning_rate": 1.92966945444363e-06, + "loss": 42.0312, + "step": 15476 + }, + { + "epoch": 0.14650561808388787, + "grad_norm": 149.4058380126953, + "learning_rate": 1.9296581596278355e-06, + "loss": 21.0781, + "step": 15477 + }, + { + "epoch": 0.14651508410560293, + "grad_norm": 349.3733825683594, + "learning_rate": 1.929646863938223e-06, + "loss": 48.7344, + "step": 15478 + }, + { + "epoch": 0.146524550127318, + "grad_norm": 494.1368713378906, + "learning_rate": 1.9296355673748024e-06, + "loss": 41.7812, + "step": 15479 + }, + { + "epoch": 0.14653401614903305, + "grad_norm": 3.13472580909729, + "learning_rate": 1.929624269937585e-06, + "loss": 0.9355, + "step": 15480 + }, + { + "epoch": 0.1465434821707481, + "grad_norm": 3.3371899127960205, + "learning_rate": 1.9296129716265815e-06, + "loss": 0.9385, + "step": 15481 + }, + { + "epoch": 0.14655294819246314, + "grad_norm": 509.9200134277344, + "learning_rate": 1.929601672441802e-06, + "loss": 40.3984, + "step": 15482 + }, + { + "epoch": 0.1465624142141782, + "grad_norm": 213.44924926757812, + "learning_rate": 1.9295903723832578e-06, + "loss": 25.5859, + "step": 15483 + }, + { + "epoch": 0.14657188023589326, + "grad_norm": 338.3318786621094, + "learning_rate": 1.9295790714509588e-06, + "loss": 19.7812, + "step": 15484 + }, + { + "epoch": 0.14658134625760832, + "grad_norm": 960.7919311523438, + "learning_rate": 1.929567769644916e-06, + "loss": 56.0, + "step": 15485 + }, + { + "epoch": 0.14659081227932336, + "grad_norm": 770.890380859375, + "learning_rate": 1.9295564669651402e-06, + "loss": 60.0938, + "step": 15486 + }, + { + "epoch": 0.14660027830103842, + "grad_norm": 373.2703552246094, + "learning_rate": 1.9295451634116414e-06, + "loss": 36.8125, + "step": 15487 + }, + { + "epoch": 0.14660974432275348, + "grad_norm": 2.5276918411254883, + "learning_rate": 1.9295338589844307e-06, + "loss": 0.8613, + "step": 15488 + }, + { + "epoch": 0.14661921034446854, + "grad_norm": 182.7940673828125, + "learning_rate": 1.929522553683519e-06, + "loss": 21.8281, + "step": 15489 + }, + { + "epoch": 0.1466286763661836, + "grad_norm": 1699.9224853515625, + "learning_rate": 1.929511247508916e-06, + "loss": 39.8672, + "step": 15490 + }, + { + "epoch": 0.14663814238789863, + "grad_norm": 473.5752868652344, + "learning_rate": 1.929499940460633e-06, + "loss": 21.2656, + "step": 15491 + }, + { + "epoch": 0.1466476084096137, + "grad_norm": 413.7255554199219, + "learning_rate": 1.929488632538681e-06, + "loss": 16.2031, + "step": 15492 + }, + { + "epoch": 0.14665707443132875, + "grad_norm": 412.9191589355469, + "learning_rate": 1.9294773237430697e-06, + "loss": 35.9688, + "step": 15493 + }, + { + "epoch": 0.1466665404530438, + "grad_norm": 161.6313018798828, + "learning_rate": 1.9294660140738105e-06, + "loss": 18.0391, + "step": 15494 + }, + { + "epoch": 0.14667600647475884, + "grad_norm": 223.23245239257812, + "learning_rate": 1.9294547035309135e-06, + "loss": 23.6719, + "step": 15495 + }, + { + "epoch": 0.1466854724964739, + "grad_norm": 1236.330810546875, + "learning_rate": 1.9294433921143896e-06, + "loss": 55.0312, + "step": 15496 + }, + { + "epoch": 0.14669493851818896, + "grad_norm": 922.1594848632812, + "learning_rate": 1.9294320798242496e-06, + "loss": 39.3281, + "step": 15497 + }, + { + "epoch": 0.14670440453990402, + "grad_norm": 327.15203857421875, + "learning_rate": 1.929420766660504e-06, + "loss": 29.0781, + "step": 15498 + }, + { + "epoch": 0.14671387056161908, + "grad_norm": 293.8498229980469, + "learning_rate": 1.929409452623163e-06, + "loss": 20.4375, + "step": 15499 + }, + { + "epoch": 0.1467233365833341, + "grad_norm": 249.34933471679688, + "learning_rate": 1.9293981377122374e-06, + "loss": 18.6484, + "step": 15500 + }, + { + "epoch": 0.14673280260504917, + "grad_norm": 616.675537109375, + "learning_rate": 1.9293868219277383e-06, + "loss": 41.7031, + "step": 15501 + }, + { + "epoch": 0.14674226862676423, + "grad_norm": 269.9367980957031, + "learning_rate": 1.929375505269676e-06, + "loss": 24.5625, + "step": 15502 + }, + { + "epoch": 0.1467517346484793, + "grad_norm": 517.479736328125, + "learning_rate": 1.9293641877380612e-06, + "loss": 24.2891, + "step": 15503 + }, + { + "epoch": 0.14676120067019433, + "grad_norm": 268.4432373046875, + "learning_rate": 1.929352869332905e-06, + "loss": 29.2656, + "step": 15504 + }, + { + "epoch": 0.14677066669190938, + "grad_norm": 3.2432141304016113, + "learning_rate": 1.9293415500542165e-06, + "loss": 1.0088, + "step": 15505 + }, + { + "epoch": 0.14678013271362444, + "grad_norm": 175.04969787597656, + "learning_rate": 1.929330229902008e-06, + "loss": 11.0508, + "step": 15506 + }, + { + "epoch": 0.1467895987353395, + "grad_norm": 416.4031982421875, + "learning_rate": 1.9293189088762898e-06, + "loss": 28.9062, + "step": 15507 + }, + { + "epoch": 0.14679906475705456, + "grad_norm": 3.2946012020111084, + "learning_rate": 1.9293075869770717e-06, + "loss": 1.0474, + "step": 15508 + }, + { + "epoch": 0.1468085307787696, + "grad_norm": 343.39630126953125, + "learning_rate": 1.9292962642043652e-06, + "loss": 19.9688, + "step": 15509 + }, + { + "epoch": 0.14681799680048466, + "grad_norm": 227.02401733398438, + "learning_rate": 1.9292849405581806e-06, + "loss": 21.5391, + "step": 15510 + }, + { + "epoch": 0.14682746282219972, + "grad_norm": 259.226318359375, + "learning_rate": 1.929273616038529e-06, + "loss": 10.6289, + "step": 15511 + }, + { + "epoch": 0.14683692884391478, + "grad_norm": 603.3012084960938, + "learning_rate": 1.92926229064542e-06, + "loss": 51.1875, + "step": 15512 + }, + { + "epoch": 0.1468463948656298, + "grad_norm": 176.41104125976562, + "learning_rate": 1.9292509643788654e-06, + "loss": 16.3125, + "step": 15513 + }, + { + "epoch": 0.14685586088734487, + "grad_norm": 246.45257568359375, + "learning_rate": 1.9292396372388754e-06, + "loss": 13.2656, + "step": 15514 + }, + { + "epoch": 0.14686532690905993, + "grad_norm": 265.08721923828125, + "learning_rate": 1.92922830922546e-06, + "loss": 23.6562, + "step": 15515 + }, + { + "epoch": 0.146874792930775, + "grad_norm": 223.20782470703125, + "learning_rate": 1.929216980338631e-06, + "loss": 21.9844, + "step": 15516 + }, + { + "epoch": 0.14688425895249005, + "grad_norm": 394.58245849609375, + "learning_rate": 1.929205650578398e-06, + "loss": 36.5938, + "step": 15517 + }, + { + "epoch": 0.14689372497420508, + "grad_norm": 3.25015926361084, + "learning_rate": 1.929194319944773e-06, + "loss": 0.8867, + "step": 15518 + }, + { + "epoch": 0.14690319099592014, + "grad_norm": 172.21092224121094, + "learning_rate": 1.929182988437765e-06, + "loss": 17.0352, + "step": 15519 + }, + { + "epoch": 0.1469126570176352, + "grad_norm": 796.4215698242188, + "learning_rate": 1.929171656057386e-06, + "loss": 31.8828, + "step": 15520 + }, + { + "epoch": 0.14692212303935026, + "grad_norm": 977.5111083984375, + "learning_rate": 1.9291603228036453e-06, + "loss": 82.3828, + "step": 15521 + }, + { + "epoch": 0.1469315890610653, + "grad_norm": 3.178448438644409, + "learning_rate": 1.929148988676555e-06, + "loss": 0.999, + "step": 15522 + }, + { + "epoch": 0.14694105508278035, + "grad_norm": 387.2662658691406, + "learning_rate": 1.929137653676125e-06, + "loss": 18.9297, + "step": 15523 + }, + { + "epoch": 0.1469505211044954, + "grad_norm": 251.8770751953125, + "learning_rate": 1.929126317802366e-06, + "loss": 20.3203, + "step": 15524 + }, + { + "epoch": 0.14695998712621047, + "grad_norm": 190.8336944580078, + "learning_rate": 1.9291149810552883e-06, + "loss": 16.0703, + "step": 15525 + }, + { + "epoch": 0.14696945314792553, + "grad_norm": 816.9220581054688, + "learning_rate": 1.9291036434349036e-06, + "loss": 26.6328, + "step": 15526 + }, + { + "epoch": 0.14697891916964057, + "grad_norm": 372.9020080566406, + "learning_rate": 1.9290923049412217e-06, + "loss": 23.6406, + "step": 15527 + }, + { + "epoch": 0.14698838519135562, + "grad_norm": 176.89552307128906, + "learning_rate": 1.9290809655742534e-06, + "loss": 19.0391, + "step": 15528 + }, + { + "epoch": 0.14699785121307068, + "grad_norm": 229.40541076660156, + "learning_rate": 1.9290696253340094e-06, + "loss": 17.9766, + "step": 15529 + }, + { + "epoch": 0.14700731723478574, + "grad_norm": 316.967041015625, + "learning_rate": 1.9290582842205e-06, + "loss": 19.1719, + "step": 15530 + }, + { + "epoch": 0.14701678325650078, + "grad_norm": 700.8687133789062, + "learning_rate": 1.929046942233737e-06, + "loss": 36.6406, + "step": 15531 + }, + { + "epoch": 0.14702624927821584, + "grad_norm": 217.62371826171875, + "learning_rate": 1.92903559937373e-06, + "loss": 16.3672, + "step": 15532 + }, + { + "epoch": 0.1470357152999309, + "grad_norm": 373.0626525878906, + "learning_rate": 1.92902425564049e-06, + "loss": 35.375, + "step": 15533 + }, + { + "epoch": 0.14704518132164596, + "grad_norm": 910.3222045898438, + "learning_rate": 1.9290129110340274e-06, + "loss": 40.2969, + "step": 15534 + }, + { + "epoch": 0.14705464734336102, + "grad_norm": 453.47540283203125, + "learning_rate": 1.9290015655543536e-06, + "loss": 37.8828, + "step": 15535 + }, + { + "epoch": 0.14706411336507605, + "grad_norm": 291.3238830566406, + "learning_rate": 1.9289902192014785e-06, + "loss": 28.5703, + "step": 15536 + }, + { + "epoch": 0.1470735793867911, + "grad_norm": 246.9541015625, + "learning_rate": 1.928978871975413e-06, + "loss": 22.875, + "step": 15537 + }, + { + "epoch": 0.14708304540850617, + "grad_norm": 330.1142272949219, + "learning_rate": 1.9289675238761676e-06, + "loss": 51.3125, + "step": 15538 + }, + { + "epoch": 0.14709251143022123, + "grad_norm": 329.915771484375, + "learning_rate": 1.9289561749037534e-06, + "loss": 23.3359, + "step": 15539 + }, + { + "epoch": 0.14710197745193626, + "grad_norm": 261.9580993652344, + "learning_rate": 1.9289448250581807e-06, + "loss": 21.7969, + "step": 15540 + }, + { + "epoch": 0.14711144347365132, + "grad_norm": 499.9205322265625, + "learning_rate": 1.9289334743394603e-06, + "loss": 11.4609, + "step": 15541 + }, + { + "epoch": 0.14712090949536638, + "grad_norm": 214.72122192382812, + "learning_rate": 1.928922122747603e-06, + "loss": 18.8594, + "step": 15542 + }, + { + "epoch": 0.14713037551708144, + "grad_norm": 388.1897277832031, + "learning_rate": 1.9289107702826195e-06, + "loss": 36.2578, + "step": 15543 + }, + { + "epoch": 0.1471398415387965, + "grad_norm": 347.3939208984375, + "learning_rate": 1.92889941694452e-06, + "loss": 29.4375, + "step": 15544 + }, + { + "epoch": 0.14714930756051153, + "grad_norm": 382.0763244628906, + "learning_rate": 1.9288880627333154e-06, + "loss": 24.625, + "step": 15545 + }, + { + "epoch": 0.1471587735822266, + "grad_norm": 293.2266540527344, + "learning_rate": 1.9288767076490167e-06, + "loss": 21.1719, + "step": 15546 + }, + { + "epoch": 0.14716823960394165, + "grad_norm": 554.9200439453125, + "learning_rate": 1.9288653516916342e-06, + "loss": 30.9531, + "step": 15547 + }, + { + "epoch": 0.1471777056256567, + "grad_norm": 275.975830078125, + "learning_rate": 1.928853994861179e-06, + "loss": 17.1367, + "step": 15548 + }, + { + "epoch": 0.14718717164737177, + "grad_norm": 730.9281616210938, + "learning_rate": 1.928842637157661e-06, + "loss": 60.0469, + "step": 15549 + }, + { + "epoch": 0.1471966376690868, + "grad_norm": 239.47274780273438, + "learning_rate": 1.9288312785810915e-06, + "loss": 18.3359, + "step": 15550 + }, + { + "epoch": 0.14720610369080186, + "grad_norm": 374.057861328125, + "learning_rate": 1.928819919131481e-06, + "loss": 39.7969, + "step": 15551 + }, + { + "epoch": 0.14721556971251692, + "grad_norm": 271.994873046875, + "learning_rate": 1.9288085588088402e-06, + "loss": 24.7734, + "step": 15552 + }, + { + "epoch": 0.14722503573423198, + "grad_norm": 442.10125732421875, + "learning_rate": 1.9287971976131802e-06, + "loss": 30.7891, + "step": 15553 + }, + { + "epoch": 0.14723450175594702, + "grad_norm": 534.0426025390625, + "learning_rate": 1.928785835544511e-06, + "loss": 49.9531, + "step": 15554 + }, + { + "epoch": 0.14724396777766208, + "grad_norm": 542.2315673828125, + "learning_rate": 1.9287744726028433e-06, + "loss": 30.5781, + "step": 15555 + }, + { + "epoch": 0.14725343379937714, + "grad_norm": 156.2601318359375, + "learning_rate": 1.928763108788188e-06, + "loss": 14.2227, + "step": 15556 + }, + { + "epoch": 0.1472628998210922, + "grad_norm": 358.9439697265625, + "learning_rate": 1.928751744100556e-06, + "loss": 24.5156, + "step": 15557 + }, + { + "epoch": 0.14727236584280726, + "grad_norm": 744.9380493164062, + "learning_rate": 1.9287403785399577e-06, + "loss": 65.4961, + "step": 15558 + }, + { + "epoch": 0.1472818318645223, + "grad_norm": 351.4787902832031, + "learning_rate": 1.928729012106404e-06, + "loss": 22.9766, + "step": 15559 + }, + { + "epoch": 0.14729129788623735, + "grad_norm": 448.8901672363281, + "learning_rate": 1.928717644799905e-06, + "loss": 50.1719, + "step": 15560 + }, + { + "epoch": 0.1473007639079524, + "grad_norm": 256.0363464355469, + "learning_rate": 1.928706276620472e-06, + "loss": 30.3828, + "step": 15561 + }, + { + "epoch": 0.14731022992966747, + "grad_norm": 299.56402587890625, + "learning_rate": 1.928694907568116e-06, + "loss": 29.1133, + "step": 15562 + }, + { + "epoch": 0.1473196959513825, + "grad_norm": 3.7016592025756836, + "learning_rate": 1.928683537642847e-06, + "loss": 1.0586, + "step": 15563 + }, + { + "epoch": 0.14732916197309756, + "grad_norm": 281.7708435058594, + "learning_rate": 1.928672166844676e-06, + "loss": 26.1719, + "step": 15564 + }, + { + "epoch": 0.14733862799481262, + "grad_norm": 291.1203918457031, + "learning_rate": 1.928660795173613e-06, + "loss": 7.0918, + "step": 15565 + }, + { + "epoch": 0.14734809401652768, + "grad_norm": 373.3625793457031, + "learning_rate": 1.92864942262967e-06, + "loss": 25.6953, + "step": 15566 + }, + { + "epoch": 0.14735756003824274, + "grad_norm": 137.21490478515625, + "learning_rate": 1.9286380492128563e-06, + "loss": 13.2969, + "step": 15567 + }, + { + "epoch": 0.14736702605995777, + "grad_norm": 262.0765075683594, + "learning_rate": 1.9286266749231834e-06, + "loss": 20.5703, + "step": 15568 + }, + { + "epoch": 0.14737649208167283, + "grad_norm": 320.77032470703125, + "learning_rate": 1.928615299760662e-06, + "loss": 25.875, + "step": 15569 + }, + { + "epoch": 0.1473859581033879, + "grad_norm": 392.58587646484375, + "learning_rate": 1.928603923725303e-06, + "loss": 34.2969, + "step": 15570 + }, + { + "epoch": 0.14739542412510295, + "grad_norm": 354.9620056152344, + "learning_rate": 1.928592546817116e-06, + "loss": 34.0312, + "step": 15571 + }, + { + "epoch": 0.14740489014681799, + "grad_norm": 317.7339172363281, + "learning_rate": 1.928581169036113e-06, + "loss": 19.7969, + "step": 15572 + }, + { + "epoch": 0.14741435616853305, + "grad_norm": 218.7632598876953, + "learning_rate": 1.9285697903823036e-06, + "loss": 13.0801, + "step": 15573 + }, + { + "epoch": 0.1474238221902481, + "grad_norm": 363.4873962402344, + "learning_rate": 1.9285584108556995e-06, + "loss": 23.3672, + "step": 15574 + }, + { + "epoch": 0.14743328821196316, + "grad_norm": 369.8494567871094, + "learning_rate": 1.92854703045631e-06, + "loss": 21.2109, + "step": 15575 + }, + { + "epoch": 0.14744275423367822, + "grad_norm": 267.354736328125, + "learning_rate": 1.928535649184148e-06, + "loss": 21.0781, + "step": 15576 + }, + { + "epoch": 0.14745222025539326, + "grad_norm": 463.1070251464844, + "learning_rate": 1.928524267039222e-06, + "loss": 39.6484, + "step": 15577 + }, + { + "epoch": 0.14746168627710832, + "grad_norm": 283.15216064453125, + "learning_rate": 1.928512884021544e-06, + "loss": 8.1289, + "step": 15578 + }, + { + "epoch": 0.14747115229882338, + "grad_norm": 492.9833679199219, + "learning_rate": 1.928501500131124e-06, + "loss": 13.2422, + "step": 15579 + }, + { + "epoch": 0.14748061832053844, + "grad_norm": 155.0847930908203, + "learning_rate": 1.928490115367973e-06, + "loss": 20.6797, + "step": 15580 + }, + { + "epoch": 0.14749008434225347, + "grad_norm": 217.97799682617188, + "learning_rate": 1.928478729732102e-06, + "loss": 10.4551, + "step": 15581 + }, + { + "epoch": 0.14749955036396853, + "grad_norm": 991.4630737304688, + "learning_rate": 1.928467343223521e-06, + "loss": 36.1797, + "step": 15582 + }, + { + "epoch": 0.1475090163856836, + "grad_norm": 557.9981079101562, + "learning_rate": 1.928455955842241e-06, + "loss": 51.4922, + "step": 15583 + }, + { + "epoch": 0.14751848240739865, + "grad_norm": 215.77947998046875, + "learning_rate": 1.9284445675882733e-06, + "loss": 25.5781, + "step": 15584 + }, + { + "epoch": 0.1475279484291137, + "grad_norm": 507.5409240722656, + "learning_rate": 1.928433178461628e-06, + "loss": 42.6406, + "step": 15585 + }, + { + "epoch": 0.14753741445082874, + "grad_norm": 733.7870483398438, + "learning_rate": 1.928421788462316e-06, + "loss": 50.7969, + "step": 15586 + }, + { + "epoch": 0.1475468804725438, + "grad_norm": 422.5614318847656, + "learning_rate": 1.9284103975903476e-06, + "loss": 22.9141, + "step": 15587 + }, + { + "epoch": 0.14755634649425886, + "grad_norm": 381.3815002441406, + "learning_rate": 1.9283990058457337e-06, + "loss": 33.875, + "step": 15588 + }, + { + "epoch": 0.14756581251597392, + "grad_norm": 469.9471435546875, + "learning_rate": 1.9283876132284853e-06, + "loss": 34.9688, + "step": 15589 + }, + { + "epoch": 0.14757527853768895, + "grad_norm": 700.4280395507812, + "learning_rate": 1.9283762197386134e-06, + "loss": 54.168, + "step": 15590 + }, + { + "epoch": 0.147584744559404, + "grad_norm": 601.268798828125, + "learning_rate": 1.928364825376128e-06, + "loss": 30.9844, + "step": 15591 + }, + { + "epoch": 0.14759421058111907, + "grad_norm": 264.1831359863281, + "learning_rate": 1.9283534301410398e-06, + "loss": 9.4668, + "step": 15592 + }, + { + "epoch": 0.14760367660283413, + "grad_norm": 472.2716979980469, + "learning_rate": 1.92834203403336e-06, + "loss": 29.0508, + "step": 15593 + }, + { + "epoch": 0.1476131426245492, + "grad_norm": 378.13470458984375, + "learning_rate": 1.928330637053099e-06, + "loss": 28.7695, + "step": 15594 + }, + { + "epoch": 0.14762260864626423, + "grad_norm": 363.9725646972656, + "learning_rate": 1.9283192392002676e-06, + "loss": 18.8438, + "step": 15595 + }, + { + "epoch": 0.14763207466797929, + "grad_norm": 508.1999206542969, + "learning_rate": 1.9283078404748766e-06, + "loss": 9.9883, + "step": 15596 + }, + { + "epoch": 0.14764154068969434, + "grad_norm": 434.307861328125, + "learning_rate": 1.928296440876936e-06, + "loss": 51.9844, + "step": 15597 + }, + { + "epoch": 0.1476510067114094, + "grad_norm": 992.5032348632812, + "learning_rate": 1.928285040406458e-06, + "loss": 37.8125, + "step": 15598 + }, + { + "epoch": 0.14766047273312444, + "grad_norm": 3.153144359588623, + "learning_rate": 1.928273639063452e-06, + "loss": 0.8877, + "step": 15599 + }, + { + "epoch": 0.1476699387548395, + "grad_norm": 780.2010498046875, + "learning_rate": 1.928262236847929e-06, + "loss": 46.8359, + "step": 15600 + }, + { + "epoch": 0.14767940477655456, + "grad_norm": 606.2879028320312, + "learning_rate": 1.9282508337599004e-06, + "loss": 33.9727, + "step": 15601 + }, + { + "epoch": 0.14768887079826962, + "grad_norm": 313.943115234375, + "learning_rate": 1.928239429799376e-06, + "loss": 47.2031, + "step": 15602 + }, + { + "epoch": 0.14769833681998468, + "grad_norm": 306.6687316894531, + "learning_rate": 1.9282280249663672e-06, + "loss": 18.5938, + "step": 15603 + }, + { + "epoch": 0.1477078028416997, + "grad_norm": 648.8347778320312, + "learning_rate": 1.9282166192608843e-06, + "loss": 36.4531, + "step": 15604 + }, + { + "epoch": 0.14771726886341477, + "grad_norm": 280.39544677734375, + "learning_rate": 1.928205212682938e-06, + "loss": 32.4062, + "step": 15605 + }, + { + "epoch": 0.14772673488512983, + "grad_norm": 613.4972534179688, + "learning_rate": 1.9281938052325397e-06, + "loss": 45.0469, + "step": 15606 + }, + { + "epoch": 0.1477362009068449, + "grad_norm": 347.4449768066406, + "learning_rate": 1.928182396909699e-06, + "loss": 32.8125, + "step": 15607 + }, + { + "epoch": 0.14774566692855992, + "grad_norm": 507.0713195800781, + "learning_rate": 1.9281709877144277e-06, + "loss": 10.7246, + "step": 15608 + }, + { + "epoch": 0.14775513295027498, + "grad_norm": 280.8185729980469, + "learning_rate": 1.9281595776467357e-06, + "loss": 18.3438, + "step": 15609 + }, + { + "epoch": 0.14776459897199004, + "grad_norm": 802.9524536132812, + "learning_rate": 1.9281481667066345e-06, + "loss": 20.0469, + "step": 15610 + }, + { + "epoch": 0.1477740649937051, + "grad_norm": 1142.871826171875, + "learning_rate": 1.928136754894134e-06, + "loss": 46.6797, + "step": 15611 + }, + { + "epoch": 0.14778353101542016, + "grad_norm": 625.1661376953125, + "learning_rate": 1.9281253422092456e-06, + "loss": 21.1602, + "step": 15612 + }, + { + "epoch": 0.1477929970371352, + "grad_norm": 2.60447096824646, + "learning_rate": 1.9281139286519794e-06, + "loss": 0.854, + "step": 15613 + }, + { + "epoch": 0.14780246305885025, + "grad_norm": 928.193603515625, + "learning_rate": 1.9281025142223464e-06, + "loss": 18.875, + "step": 15614 + }, + { + "epoch": 0.1478119290805653, + "grad_norm": 990.1611938476562, + "learning_rate": 1.928091098920358e-06, + "loss": 31.0234, + "step": 15615 + }, + { + "epoch": 0.14782139510228037, + "grad_norm": 3.0132157802581787, + "learning_rate": 1.928079682746024e-06, + "loss": 0.9497, + "step": 15616 + }, + { + "epoch": 0.1478308611239954, + "grad_norm": 220.1630859375, + "learning_rate": 1.9280682656993555e-06, + "loss": 18.6797, + "step": 15617 + }, + { + "epoch": 0.14784032714571047, + "grad_norm": 217.16139221191406, + "learning_rate": 1.9280568477803635e-06, + "loss": 9.8965, + "step": 15618 + }, + { + "epoch": 0.14784979316742553, + "grad_norm": 477.3331298828125, + "learning_rate": 1.928045428989058e-06, + "loss": 22.3047, + "step": 15619 + }, + { + "epoch": 0.14785925918914058, + "grad_norm": 240.98468017578125, + "learning_rate": 1.9280340093254504e-06, + "loss": 17.1797, + "step": 15620 + }, + { + "epoch": 0.14786872521085564, + "grad_norm": 952.88232421875, + "learning_rate": 1.928022588789551e-06, + "loss": 43.2734, + "step": 15621 + }, + { + "epoch": 0.14787819123257068, + "grad_norm": 206.64207458496094, + "learning_rate": 1.928011167381371e-06, + "loss": 16.2109, + "step": 15622 + }, + { + "epoch": 0.14788765725428574, + "grad_norm": 404.6994323730469, + "learning_rate": 1.927999745100921e-06, + "loss": 19.8359, + "step": 15623 + }, + { + "epoch": 0.1478971232760008, + "grad_norm": 1819.6339111328125, + "learning_rate": 1.9279883219482113e-06, + "loss": 19.25, + "step": 15624 + }, + { + "epoch": 0.14790658929771586, + "grad_norm": 334.6298828125, + "learning_rate": 1.9279768979232527e-06, + "loss": 22.3359, + "step": 15625 + }, + { + "epoch": 0.1479160553194309, + "grad_norm": 352.8998718261719, + "learning_rate": 1.9279654730260564e-06, + "loss": 22.5859, + "step": 15626 + }, + { + "epoch": 0.14792552134114595, + "grad_norm": 554.5792236328125, + "learning_rate": 1.927954047256633e-06, + "loss": 57.9688, + "step": 15627 + }, + { + "epoch": 0.147934987362861, + "grad_norm": 3.091740608215332, + "learning_rate": 1.927942620614993e-06, + "loss": 0.96, + "step": 15628 + }, + { + "epoch": 0.14794445338457607, + "grad_norm": 618.4608154296875, + "learning_rate": 1.9279311931011478e-06, + "loss": 32.8359, + "step": 15629 + }, + { + "epoch": 0.14795391940629113, + "grad_norm": 316.1160583496094, + "learning_rate": 1.9279197647151075e-06, + "loss": 7.5391, + "step": 15630 + }, + { + "epoch": 0.14796338542800616, + "grad_norm": 373.1421203613281, + "learning_rate": 1.9279083354568824e-06, + "loss": 12.5938, + "step": 15631 + }, + { + "epoch": 0.14797285144972122, + "grad_norm": 447.1734313964844, + "learning_rate": 1.9278969053264845e-06, + "loss": 45.1875, + "step": 15632 + }, + { + "epoch": 0.14798231747143628, + "grad_norm": 388.899169921875, + "learning_rate": 1.927885474323924e-06, + "loss": 35.1328, + "step": 15633 + }, + { + "epoch": 0.14799178349315134, + "grad_norm": 3.2035765647888184, + "learning_rate": 1.9278740424492107e-06, + "loss": 0.9385, + "step": 15634 + }, + { + "epoch": 0.1480012495148664, + "grad_norm": 425.2375183105469, + "learning_rate": 1.9278626097023567e-06, + "loss": 46.625, + "step": 15635 + }, + { + "epoch": 0.14801071553658143, + "grad_norm": 286.401611328125, + "learning_rate": 1.9278511760833723e-06, + "loss": 25.3242, + "step": 15636 + }, + { + "epoch": 0.1480201815582965, + "grad_norm": 281.6573486328125, + "learning_rate": 1.927839741592268e-06, + "loss": 56.1094, + "step": 15637 + }, + { + "epoch": 0.14802964758001155, + "grad_norm": 276.7017517089844, + "learning_rate": 1.927828306229054e-06, + "loss": 17.8438, + "step": 15638 + }, + { + "epoch": 0.1480391136017266, + "grad_norm": 626.1773681640625, + "learning_rate": 1.927816869993743e-06, + "loss": 65.3984, + "step": 15639 + }, + { + "epoch": 0.14804857962344165, + "grad_norm": 193.36138916015625, + "learning_rate": 1.927805432886344e-06, + "loss": 17.6719, + "step": 15640 + }, + { + "epoch": 0.1480580456451567, + "grad_norm": 634.9346923828125, + "learning_rate": 1.927793994906868e-06, + "loss": 46.5703, + "step": 15641 + }, + { + "epoch": 0.14806751166687177, + "grad_norm": 409.6676330566406, + "learning_rate": 1.9277825560553263e-06, + "loss": 18.5781, + "step": 15642 + }, + { + "epoch": 0.14807697768858682, + "grad_norm": 454.13916015625, + "learning_rate": 1.927771116331729e-06, + "loss": 23.1094, + "step": 15643 + }, + { + "epoch": 0.14808644371030188, + "grad_norm": 222.09632873535156, + "learning_rate": 1.9277596757360872e-06, + "loss": 22.6484, + "step": 15644 + }, + { + "epoch": 0.14809590973201692, + "grad_norm": 206.30885314941406, + "learning_rate": 1.927748234268412e-06, + "loss": 19.2148, + "step": 15645 + }, + { + "epoch": 0.14810537575373198, + "grad_norm": 3.4163424968719482, + "learning_rate": 1.927736791928714e-06, + "loss": 1.0234, + "step": 15646 + }, + { + "epoch": 0.14811484177544704, + "grad_norm": 3.13472056388855, + "learning_rate": 1.9277253487170035e-06, + "loss": 0.8164, + "step": 15647 + }, + { + "epoch": 0.1481243077971621, + "grad_norm": 485.8520812988281, + "learning_rate": 1.9277139046332916e-06, + "loss": 52.4375, + "step": 15648 + }, + { + "epoch": 0.14813377381887713, + "grad_norm": 188.07386779785156, + "learning_rate": 1.927702459677589e-06, + "loss": 11.4453, + "step": 15649 + }, + { + "epoch": 0.1481432398405922, + "grad_norm": 290.31549072265625, + "learning_rate": 1.9276910138499058e-06, + "loss": 38.4844, + "step": 15650 + }, + { + "epoch": 0.14815270586230725, + "grad_norm": 404.0642395019531, + "learning_rate": 1.927679567150254e-06, + "loss": 37.9141, + "step": 15651 + }, + { + "epoch": 0.1481621718840223, + "grad_norm": 665.4826049804688, + "learning_rate": 1.9276681195786436e-06, + "loss": 53.375, + "step": 15652 + }, + { + "epoch": 0.14817163790573737, + "grad_norm": 434.3675842285156, + "learning_rate": 1.927656671135086e-06, + "loss": 34.3281, + "step": 15653 + }, + { + "epoch": 0.1481811039274524, + "grad_norm": 541.2559204101562, + "learning_rate": 1.9276452218195908e-06, + "loss": 26.9531, + "step": 15654 + }, + { + "epoch": 0.14819056994916746, + "grad_norm": 528.1937866210938, + "learning_rate": 1.9276337716321697e-06, + "loss": 29.5938, + "step": 15655 + }, + { + "epoch": 0.14820003597088252, + "grad_norm": 394.1620178222656, + "learning_rate": 1.927622320572833e-06, + "loss": 37.2734, + "step": 15656 + }, + { + "epoch": 0.14820950199259758, + "grad_norm": 227.65679931640625, + "learning_rate": 1.927610868641592e-06, + "loss": 18.3477, + "step": 15657 + }, + { + "epoch": 0.1482189680143126, + "grad_norm": 343.5088806152344, + "learning_rate": 1.9275994158384572e-06, + "loss": 36.1953, + "step": 15658 + }, + { + "epoch": 0.14822843403602767, + "grad_norm": 227.80215454101562, + "learning_rate": 1.927587962163439e-06, + "loss": 19.0625, + "step": 15659 + }, + { + "epoch": 0.14823790005774273, + "grad_norm": 317.0726623535156, + "learning_rate": 1.9275765076165485e-06, + "loss": 45.4844, + "step": 15660 + }, + { + "epoch": 0.1482473660794578, + "grad_norm": 424.9021301269531, + "learning_rate": 1.9275650521977964e-06, + "loss": 43.0625, + "step": 15661 + }, + { + "epoch": 0.14825683210117285, + "grad_norm": 1661.7786865234375, + "learning_rate": 1.9275535959071938e-06, + "loss": 16.75, + "step": 15662 + }, + { + "epoch": 0.14826629812288789, + "grad_norm": 3.449712038040161, + "learning_rate": 1.9275421387447507e-06, + "loss": 1.0107, + "step": 15663 + }, + { + "epoch": 0.14827576414460295, + "grad_norm": 179.31814575195312, + "learning_rate": 1.9275306807104786e-06, + "loss": 23.9531, + "step": 15664 + }, + { + "epoch": 0.148285230166318, + "grad_norm": 195.76657104492188, + "learning_rate": 1.927519221804388e-06, + "loss": 19.9141, + "step": 15665 + }, + { + "epoch": 0.14829469618803306, + "grad_norm": 594.778564453125, + "learning_rate": 1.92750776202649e-06, + "loss": 38.0234, + "step": 15666 + }, + { + "epoch": 0.1483041622097481, + "grad_norm": 247.01730346679688, + "learning_rate": 1.9274963013767943e-06, + "loss": 30.6953, + "step": 15667 + }, + { + "epoch": 0.14831362823146316, + "grad_norm": 374.70166015625, + "learning_rate": 1.927484839855313e-06, + "loss": 34.4688, + "step": 15668 + }, + { + "epoch": 0.14832309425317822, + "grad_norm": 222.84719848632812, + "learning_rate": 1.927473377462056e-06, + "loss": 14.4062, + "step": 15669 + }, + { + "epoch": 0.14833256027489328, + "grad_norm": 440.94354248046875, + "learning_rate": 1.9274619141970346e-06, + "loss": 34.5938, + "step": 15670 + }, + { + "epoch": 0.14834202629660834, + "grad_norm": 277.5548400878906, + "learning_rate": 1.927450450060259e-06, + "loss": 23.0156, + "step": 15671 + }, + { + "epoch": 0.14835149231832337, + "grad_norm": 1092.1798095703125, + "learning_rate": 1.9274389850517404e-06, + "loss": 45.2852, + "step": 15672 + }, + { + "epoch": 0.14836095834003843, + "grad_norm": 561.7603759765625, + "learning_rate": 1.9274275191714894e-06, + "loss": 20.0547, + "step": 15673 + }, + { + "epoch": 0.1483704243617535, + "grad_norm": 473.4435119628906, + "learning_rate": 1.927416052419517e-06, + "loss": 38.2812, + "step": 15674 + }, + { + "epoch": 0.14837989038346855, + "grad_norm": 742.7647705078125, + "learning_rate": 1.927404584795834e-06, + "loss": 61.7188, + "step": 15675 + }, + { + "epoch": 0.14838935640518358, + "grad_norm": 243.6381378173828, + "learning_rate": 1.927393116300451e-06, + "loss": 22.0703, + "step": 15676 + }, + { + "epoch": 0.14839882242689864, + "grad_norm": 243.02879333496094, + "learning_rate": 1.9273816469333787e-06, + "loss": 26.6094, + "step": 15677 + }, + { + "epoch": 0.1484082884486137, + "grad_norm": 461.3034362792969, + "learning_rate": 1.9273701766946278e-06, + "loss": 51.8281, + "step": 15678 + }, + { + "epoch": 0.14841775447032876, + "grad_norm": 239.20848083496094, + "learning_rate": 1.9273587055842096e-06, + "loss": 18.4297, + "step": 15679 + }, + { + "epoch": 0.14842722049204382, + "grad_norm": 1007.4185180664062, + "learning_rate": 1.9273472336021344e-06, + "loss": 47.6406, + "step": 15680 + }, + { + "epoch": 0.14843668651375885, + "grad_norm": 457.3825988769531, + "learning_rate": 1.927335760748413e-06, + "loss": 10.4297, + "step": 15681 + }, + { + "epoch": 0.1484461525354739, + "grad_norm": 456.6561584472656, + "learning_rate": 1.9273242870230565e-06, + "loss": 29.0938, + "step": 15682 + }, + { + "epoch": 0.14845561855718897, + "grad_norm": 386.7440490722656, + "learning_rate": 1.9273128124260754e-06, + "loss": 25.4375, + "step": 15683 + }, + { + "epoch": 0.14846508457890403, + "grad_norm": 799.71630859375, + "learning_rate": 1.927301336957481e-06, + "loss": 87.5312, + "step": 15684 + }, + { + "epoch": 0.14847455060061907, + "grad_norm": 322.6341247558594, + "learning_rate": 1.927289860617283e-06, + "loss": 20.9453, + "step": 15685 + }, + { + "epoch": 0.14848401662233413, + "grad_norm": 2.724047899246216, + "learning_rate": 1.9272783834054935e-06, + "loss": 0.9248, + "step": 15686 + }, + { + "epoch": 0.14849348264404919, + "grad_norm": 209.15220642089844, + "learning_rate": 1.927266905322122e-06, + "loss": 22.375, + "step": 15687 + }, + { + "epoch": 0.14850294866576425, + "grad_norm": 433.30780029296875, + "learning_rate": 1.9272554263671804e-06, + "loss": 8.2734, + "step": 15688 + }, + { + "epoch": 0.1485124146874793, + "grad_norm": 2.940467357635498, + "learning_rate": 1.927243946540679e-06, + "loss": 1.002, + "step": 15689 + }, + { + "epoch": 0.14852188070919434, + "grad_norm": 308.1149597167969, + "learning_rate": 1.927232465842628e-06, + "loss": 8.3945, + "step": 15690 + }, + { + "epoch": 0.1485313467309094, + "grad_norm": 920.0921020507812, + "learning_rate": 1.927220984273039e-06, + "loss": 48.2344, + "step": 15691 + }, + { + "epoch": 0.14854081275262446, + "grad_norm": 386.56982421875, + "learning_rate": 1.927209501831923e-06, + "loss": 31.0, + "step": 15692 + }, + { + "epoch": 0.14855027877433952, + "grad_norm": 2.923671245574951, + "learning_rate": 1.9271980185192902e-06, + "loss": 0.8994, + "step": 15693 + }, + { + "epoch": 0.14855974479605455, + "grad_norm": 428.05804443359375, + "learning_rate": 1.927186534335152e-06, + "loss": 10.0195, + "step": 15694 + }, + { + "epoch": 0.1485692108177696, + "grad_norm": 893.1290283203125, + "learning_rate": 1.9271750492795183e-06, + "loss": 27.7656, + "step": 15695 + }, + { + "epoch": 0.14857867683948467, + "grad_norm": 205.18759155273438, + "learning_rate": 1.9271635633524004e-06, + "loss": 19.9062, + "step": 15696 + }, + { + "epoch": 0.14858814286119973, + "grad_norm": 385.5495300292969, + "learning_rate": 1.927152076553809e-06, + "loss": 19.8203, + "step": 15697 + }, + { + "epoch": 0.1485976088829148, + "grad_norm": 436.5403747558594, + "learning_rate": 1.9271405888837552e-06, + "loss": 43.5312, + "step": 15698 + }, + { + "epoch": 0.14860707490462982, + "grad_norm": 260.28863525390625, + "learning_rate": 1.9271291003422494e-06, + "loss": 15.8398, + "step": 15699 + }, + { + "epoch": 0.14861654092634488, + "grad_norm": 338.907958984375, + "learning_rate": 1.9271176109293026e-06, + "loss": 39.0938, + "step": 15700 + }, + { + "epoch": 0.14862600694805994, + "grad_norm": 908.3390502929688, + "learning_rate": 1.9271061206449257e-06, + "loss": 26.3281, + "step": 15701 + }, + { + "epoch": 0.148635472969775, + "grad_norm": 180.68116760253906, + "learning_rate": 1.927094629489129e-06, + "loss": 21.2422, + "step": 15702 + }, + { + "epoch": 0.14864493899149003, + "grad_norm": 218.04051208496094, + "learning_rate": 1.927083137461924e-06, + "loss": 24.9922, + "step": 15703 + }, + { + "epoch": 0.1486544050132051, + "grad_norm": 716.41162109375, + "learning_rate": 1.927071644563321e-06, + "loss": 35.0469, + "step": 15704 + }, + { + "epoch": 0.14866387103492015, + "grad_norm": 3470.238525390625, + "learning_rate": 1.927060150793331e-06, + "loss": 23.1875, + "step": 15705 + }, + { + "epoch": 0.1486733370566352, + "grad_norm": 3.616055488586426, + "learning_rate": 1.9270486561519647e-06, + "loss": 0.9697, + "step": 15706 + }, + { + "epoch": 0.14868280307835027, + "grad_norm": 273.7857971191406, + "learning_rate": 1.9270371606392334e-06, + "loss": 16.8555, + "step": 15707 + }, + { + "epoch": 0.1486922691000653, + "grad_norm": 459.5173645019531, + "learning_rate": 1.927025664255147e-06, + "loss": 47.7266, + "step": 15708 + }, + { + "epoch": 0.14870173512178037, + "grad_norm": 258.4297790527344, + "learning_rate": 1.927014166999717e-06, + "loss": 24.1094, + "step": 15709 + }, + { + "epoch": 0.14871120114349543, + "grad_norm": 291.7412414550781, + "learning_rate": 1.9270026688729537e-06, + "loss": 26.0547, + "step": 15710 + }, + { + "epoch": 0.14872066716521049, + "grad_norm": 469.84332275390625, + "learning_rate": 1.9269911698748685e-06, + "loss": 23.2109, + "step": 15711 + }, + { + "epoch": 0.14873013318692552, + "grad_norm": 408.1236267089844, + "learning_rate": 1.9269796700054715e-06, + "loss": 16.9141, + "step": 15712 + }, + { + "epoch": 0.14873959920864058, + "grad_norm": 352.71014404296875, + "learning_rate": 1.9269681692647744e-06, + "loss": 32.0469, + "step": 15713 + }, + { + "epoch": 0.14874906523035564, + "grad_norm": 562.0230102539062, + "learning_rate": 1.9269566676527875e-06, + "loss": 26.4062, + "step": 15714 + }, + { + "epoch": 0.1487585312520707, + "grad_norm": 566.742431640625, + "learning_rate": 1.9269451651695213e-06, + "loss": 48.25, + "step": 15715 + }, + { + "epoch": 0.14876799727378576, + "grad_norm": 681.0260009765625, + "learning_rate": 1.9269336618149873e-06, + "loss": 45.3359, + "step": 15716 + }, + { + "epoch": 0.1487774632955008, + "grad_norm": 439.9293212890625, + "learning_rate": 1.9269221575891956e-06, + "loss": 49.3516, + "step": 15717 + }, + { + "epoch": 0.14878692931721585, + "grad_norm": 584.7760009765625, + "learning_rate": 1.9269106524921577e-06, + "loss": 16.8984, + "step": 15718 + }, + { + "epoch": 0.1487963953389309, + "grad_norm": 325.67449951171875, + "learning_rate": 1.9268991465238837e-06, + "loss": 29.3828, + "step": 15719 + }, + { + "epoch": 0.14880586136064597, + "grad_norm": 400.5621032714844, + "learning_rate": 1.926887639684385e-06, + "loss": 26.3867, + "step": 15720 + }, + { + "epoch": 0.14881532738236103, + "grad_norm": 967.1906127929688, + "learning_rate": 1.9268761319736725e-06, + "loss": 56.1953, + "step": 15721 + }, + { + "epoch": 0.14882479340407606, + "grad_norm": 184.33966064453125, + "learning_rate": 1.9268646233917564e-06, + "loss": 21.5156, + "step": 15722 + }, + { + "epoch": 0.14883425942579112, + "grad_norm": 369.4918518066406, + "learning_rate": 1.9268531139386482e-06, + "loss": 45.3906, + "step": 15723 + }, + { + "epoch": 0.14884372544750618, + "grad_norm": 213.08914184570312, + "learning_rate": 1.926841603614358e-06, + "loss": 24.9688, + "step": 15724 + }, + { + "epoch": 0.14885319146922124, + "grad_norm": 749.3993530273438, + "learning_rate": 1.926830092418897e-06, + "loss": 18.0117, + "step": 15725 + }, + { + "epoch": 0.14886265749093627, + "grad_norm": 463.123291015625, + "learning_rate": 1.926818580352276e-06, + "loss": 16.6797, + "step": 15726 + }, + { + "epoch": 0.14887212351265133, + "grad_norm": 433.26605224609375, + "learning_rate": 1.926807067414506e-06, + "loss": 34.7031, + "step": 15727 + }, + { + "epoch": 0.1488815895343664, + "grad_norm": 402.1501159667969, + "learning_rate": 1.9267955536055974e-06, + "loss": 44.4062, + "step": 15728 + }, + { + "epoch": 0.14889105555608145, + "grad_norm": 227.80194091796875, + "learning_rate": 1.9267840389255616e-06, + "loss": 15.2812, + "step": 15729 + }, + { + "epoch": 0.1489005215777965, + "grad_norm": 567.0005493164062, + "learning_rate": 1.926772523374409e-06, + "loss": 53.1641, + "step": 15730 + }, + { + "epoch": 0.14890998759951155, + "grad_norm": 563.1014404296875, + "learning_rate": 1.9267610069521503e-06, + "loss": 34.4688, + "step": 15731 + }, + { + "epoch": 0.1489194536212266, + "grad_norm": 354.1593933105469, + "learning_rate": 1.926749489658797e-06, + "loss": 42.75, + "step": 15732 + }, + { + "epoch": 0.14892891964294167, + "grad_norm": 622.53076171875, + "learning_rate": 1.926737971494359e-06, + "loss": 45.9688, + "step": 15733 + }, + { + "epoch": 0.14893838566465673, + "grad_norm": 834.5885009765625, + "learning_rate": 1.926726452458848e-06, + "loss": 38.0859, + "step": 15734 + }, + { + "epoch": 0.14894785168637176, + "grad_norm": 534.9523315429688, + "learning_rate": 1.926714932552274e-06, + "loss": 54.3594, + "step": 15735 + }, + { + "epoch": 0.14895731770808682, + "grad_norm": 621.267822265625, + "learning_rate": 1.9267034117746484e-06, + "loss": 57.5586, + "step": 15736 + }, + { + "epoch": 0.14896678372980188, + "grad_norm": 717.8208618164062, + "learning_rate": 1.9266918901259823e-06, + "loss": 36.25, + "step": 15737 + }, + { + "epoch": 0.14897624975151694, + "grad_norm": 252.95150756835938, + "learning_rate": 1.9266803676062856e-06, + "loss": 25.2969, + "step": 15738 + }, + { + "epoch": 0.148985715773232, + "grad_norm": 522.353515625, + "learning_rate": 1.9266688442155696e-06, + "loss": 40.1719, + "step": 15739 + }, + { + "epoch": 0.14899518179494703, + "grad_norm": 509.0024108886719, + "learning_rate": 1.9266573199538455e-06, + "loss": 37.4375, + "step": 15740 + }, + { + "epoch": 0.1490046478166621, + "grad_norm": 348.7193603515625, + "learning_rate": 1.9266457948211233e-06, + "loss": 45.2578, + "step": 15741 + }, + { + "epoch": 0.14901411383837715, + "grad_norm": 2.797466278076172, + "learning_rate": 1.926634268817415e-06, + "loss": 0.8765, + "step": 15742 + }, + { + "epoch": 0.1490235798600922, + "grad_norm": 539.4781494140625, + "learning_rate": 1.9266227419427304e-06, + "loss": 22.7266, + "step": 15743 + }, + { + "epoch": 0.14903304588180724, + "grad_norm": 233.60662841796875, + "learning_rate": 1.9266112141970806e-06, + "loss": 10.1992, + "step": 15744 + }, + { + "epoch": 0.1490425119035223, + "grad_norm": 383.2189025878906, + "learning_rate": 1.9265996855804767e-06, + "loss": 25.6797, + "step": 15745 + }, + { + "epoch": 0.14905197792523736, + "grad_norm": 226.02166748046875, + "learning_rate": 1.9265881560929294e-06, + "loss": 15.8359, + "step": 15746 + }, + { + "epoch": 0.14906144394695242, + "grad_norm": 483.8922119140625, + "learning_rate": 1.9265766257344493e-06, + "loss": 17.6797, + "step": 15747 + }, + { + "epoch": 0.14907090996866748, + "grad_norm": 504.23272705078125, + "learning_rate": 1.9265650945050478e-06, + "loss": 32.1094, + "step": 15748 + }, + { + "epoch": 0.1490803759903825, + "grad_norm": 439.8804931640625, + "learning_rate": 1.9265535624047354e-06, + "loss": 26.9062, + "step": 15749 + }, + { + "epoch": 0.14908984201209757, + "grad_norm": 707.2577514648438, + "learning_rate": 1.9265420294335225e-06, + "loss": 43.7969, + "step": 15750 + }, + { + "epoch": 0.14909930803381263, + "grad_norm": 361.3864440917969, + "learning_rate": 1.9265304955914207e-06, + "loss": 52.9453, + "step": 15751 + }, + { + "epoch": 0.1491087740555277, + "grad_norm": 440.5207214355469, + "learning_rate": 1.9265189608784404e-06, + "loss": 36.6406, + "step": 15752 + }, + { + "epoch": 0.14911824007724273, + "grad_norm": 420.8001403808594, + "learning_rate": 1.9265074252945924e-06, + "loss": 42.0781, + "step": 15753 + }, + { + "epoch": 0.14912770609895779, + "grad_norm": 317.609619140625, + "learning_rate": 1.926495888839888e-06, + "loss": 19.4531, + "step": 15754 + }, + { + "epoch": 0.14913717212067285, + "grad_norm": 323.0169677734375, + "learning_rate": 1.9264843515143377e-06, + "loss": 23.2969, + "step": 15755 + }, + { + "epoch": 0.1491466381423879, + "grad_norm": 479.4408874511719, + "learning_rate": 1.926472813317952e-06, + "loss": 48.3281, + "step": 15756 + }, + { + "epoch": 0.14915610416410297, + "grad_norm": 1234.7120361328125, + "learning_rate": 1.9264612742507425e-06, + "loss": 16.9453, + "step": 15757 + }, + { + "epoch": 0.149165570185818, + "grad_norm": 871.9913330078125, + "learning_rate": 1.9264497343127196e-06, + "loss": 60.9062, + "step": 15758 + }, + { + "epoch": 0.14917503620753306, + "grad_norm": 483.66180419921875, + "learning_rate": 1.9264381935038942e-06, + "loss": 37.1016, + "step": 15759 + }, + { + "epoch": 0.14918450222924812, + "grad_norm": 385.2890625, + "learning_rate": 1.9264266518242775e-06, + "loss": 8.0703, + "step": 15760 + }, + { + "epoch": 0.14919396825096318, + "grad_norm": 423.07977294921875, + "learning_rate": 1.9264151092738796e-06, + "loss": 29.7578, + "step": 15761 + }, + { + "epoch": 0.1492034342726782, + "grad_norm": 315.2047119140625, + "learning_rate": 1.926403565852712e-06, + "loss": 25.6641, + "step": 15762 + }, + { + "epoch": 0.14921290029439327, + "grad_norm": 295.0118713378906, + "learning_rate": 1.926392021560785e-06, + "loss": 25.6719, + "step": 15763 + }, + { + "epoch": 0.14922236631610833, + "grad_norm": 1949.8148193359375, + "learning_rate": 1.92638047639811e-06, + "loss": 8.332, + "step": 15764 + }, + { + "epoch": 0.1492318323378234, + "grad_norm": 1088.4554443359375, + "learning_rate": 1.9263689303646973e-06, + "loss": 36.0859, + "step": 15765 + }, + { + "epoch": 0.14924129835953845, + "grad_norm": 396.4223327636719, + "learning_rate": 1.926357383460558e-06, + "loss": 21.3281, + "step": 15766 + }, + { + "epoch": 0.14925076438125348, + "grad_norm": 396.4221496582031, + "learning_rate": 1.9263458356857037e-06, + "loss": 16.1133, + "step": 15767 + }, + { + "epoch": 0.14926023040296854, + "grad_norm": 349.9913024902344, + "learning_rate": 1.9263342870401443e-06, + "loss": 31.0781, + "step": 15768 + }, + { + "epoch": 0.1492696964246836, + "grad_norm": 247.81800842285156, + "learning_rate": 1.9263227375238906e-06, + "loss": 16.0312, + "step": 15769 + }, + { + "epoch": 0.14927916244639866, + "grad_norm": 3.2522964477539062, + "learning_rate": 1.9263111871369543e-06, + "loss": 0.8857, + "step": 15770 + }, + { + "epoch": 0.1492886284681137, + "grad_norm": 498.37335205078125, + "learning_rate": 1.9262996358793453e-06, + "loss": 24.6914, + "step": 15771 + }, + { + "epoch": 0.14929809448982875, + "grad_norm": 561.4940795898438, + "learning_rate": 1.9262880837510753e-06, + "loss": 22.3203, + "step": 15772 + }, + { + "epoch": 0.1493075605115438, + "grad_norm": 426.8276062011719, + "learning_rate": 1.9262765307521545e-06, + "loss": 36.5312, + "step": 15773 + }, + { + "epoch": 0.14931702653325887, + "grad_norm": 1703.7567138671875, + "learning_rate": 1.926264976882594e-06, + "loss": 28.8594, + "step": 15774 + }, + { + "epoch": 0.14932649255497393, + "grad_norm": 236.64950561523438, + "learning_rate": 1.9262534221424048e-06, + "loss": 20.7109, + "step": 15775 + }, + { + "epoch": 0.14933595857668897, + "grad_norm": 220.9412384033203, + "learning_rate": 1.9262418665315978e-06, + "loss": 7.3945, + "step": 15776 + }, + { + "epoch": 0.14934542459840403, + "grad_norm": 764.82470703125, + "learning_rate": 1.9262303100501832e-06, + "loss": 22.8281, + "step": 15777 + }, + { + "epoch": 0.14935489062011909, + "grad_norm": 170.78890991210938, + "learning_rate": 1.9262187526981725e-06, + "loss": 18.3438, + "step": 15778 + }, + { + "epoch": 0.14936435664183415, + "grad_norm": 390.1445617675781, + "learning_rate": 1.9262071944755767e-06, + "loss": 36.2344, + "step": 15779 + }, + { + "epoch": 0.14937382266354918, + "grad_norm": 973.1336669921875, + "learning_rate": 1.9261956353824064e-06, + "loss": 22.2422, + "step": 15780 + }, + { + "epoch": 0.14938328868526424, + "grad_norm": 344.7187194824219, + "learning_rate": 1.926184075418672e-06, + "loss": 21.9531, + "step": 15781 + }, + { + "epoch": 0.1493927547069793, + "grad_norm": 614.89697265625, + "learning_rate": 1.9261725145843853e-06, + "loss": 11.1719, + "step": 15782 + }, + { + "epoch": 0.14940222072869436, + "grad_norm": 207.30282592773438, + "learning_rate": 1.9261609528795566e-06, + "loss": 19.9688, + "step": 15783 + }, + { + "epoch": 0.14941168675040942, + "grad_norm": 1096.509521484375, + "learning_rate": 1.926149390304197e-06, + "loss": 45.6484, + "step": 15784 + }, + { + "epoch": 0.14942115277212445, + "grad_norm": 415.6158752441406, + "learning_rate": 1.9261378268583166e-06, + "loss": 34.2344, + "step": 15785 + }, + { + "epoch": 0.1494306187938395, + "grad_norm": 439.1882019042969, + "learning_rate": 1.9261262625419273e-06, + "loss": 29.1172, + "step": 15786 + }, + { + "epoch": 0.14944008481555457, + "grad_norm": 482.66015625, + "learning_rate": 1.9261146973550397e-06, + "loss": 34.2344, + "step": 15787 + }, + { + "epoch": 0.14944955083726963, + "grad_norm": 390.1897277832031, + "learning_rate": 1.9261031312976644e-06, + "loss": 19.4844, + "step": 15788 + }, + { + "epoch": 0.14945901685898466, + "grad_norm": 551.6903686523438, + "learning_rate": 1.9260915643698124e-06, + "loss": 36.9688, + "step": 15789 + }, + { + "epoch": 0.14946848288069972, + "grad_norm": 523.8162841796875, + "learning_rate": 1.9260799965714948e-06, + "loss": 42.4688, + "step": 15790 + }, + { + "epoch": 0.14947794890241478, + "grad_norm": 450.2486572265625, + "learning_rate": 1.9260684279027216e-06, + "loss": 12.2344, + "step": 15791 + }, + { + "epoch": 0.14948741492412984, + "grad_norm": 248.080322265625, + "learning_rate": 1.926056858363505e-06, + "loss": 26.1328, + "step": 15792 + }, + { + "epoch": 0.1494968809458449, + "grad_norm": 289.0199890136719, + "learning_rate": 1.9260452879538547e-06, + "loss": 14.8047, + "step": 15793 + }, + { + "epoch": 0.14950634696755993, + "grad_norm": 985.5465698242188, + "learning_rate": 1.9260337166737823e-06, + "loss": 41.3164, + "step": 15794 + }, + { + "epoch": 0.149515812989275, + "grad_norm": 324.4724426269531, + "learning_rate": 1.9260221445232988e-06, + "loss": 14.9336, + "step": 15795 + }, + { + "epoch": 0.14952527901099005, + "grad_norm": 210.2920379638672, + "learning_rate": 1.9260105715024144e-06, + "loss": 7.1875, + "step": 15796 + }, + { + "epoch": 0.1495347450327051, + "grad_norm": 538.3289794921875, + "learning_rate": 1.92599899761114e-06, + "loss": 16.1719, + "step": 15797 + }, + { + "epoch": 0.14954421105442015, + "grad_norm": 251.3007354736328, + "learning_rate": 1.9259874228494874e-06, + "loss": 17.8047, + "step": 15798 + }, + { + "epoch": 0.1495536770761352, + "grad_norm": 750.9677734375, + "learning_rate": 1.9259758472174664e-06, + "loss": 35.2812, + "step": 15799 + }, + { + "epoch": 0.14956314309785027, + "grad_norm": 620.5933227539062, + "learning_rate": 1.9259642707150886e-06, + "loss": 38.8906, + "step": 15800 + }, + { + "epoch": 0.14957260911956533, + "grad_norm": 630.4015502929688, + "learning_rate": 1.9259526933423645e-06, + "loss": 34.3125, + "step": 15801 + }, + { + "epoch": 0.14958207514128039, + "grad_norm": 225.30072021484375, + "learning_rate": 1.925941115099305e-06, + "loss": 22.3281, + "step": 15802 + }, + { + "epoch": 0.14959154116299542, + "grad_norm": 468.62457275390625, + "learning_rate": 1.925929535985921e-06, + "loss": 60.0156, + "step": 15803 + }, + { + "epoch": 0.14960100718471048, + "grad_norm": 444.92633056640625, + "learning_rate": 1.925917956002224e-06, + "loss": 51.7031, + "step": 15804 + }, + { + "epoch": 0.14961047320642554, + "grad_norm": 438.3891906738281, + "learning_rate": 1.925906375148224e-06, + "loss": 42.9688, + "step": 15805 + }, + { + "epoch": 0.1496199392281406, + "grad_norm": 4.241981506347656, + "learning_rate": 1.925894793423932e-06, + "loss": 1.0259, + "step": 15806 + }, + { + "epoch": 0.14962940524985566, + "grad_norm": 262.8886413574219, + "learning_rate": 1.9258832108293595e-06, + "loss": 22.6797, + "step": 15807 + }, + { + "epoch": 0.1496388712715707, + "grad_norm": 647.5609741210938, + "learning_rate": 1.925871627364517e-06, + "loss": 35.125, + "step": 15808 + }, + { + "epoch": 0.14964833729328575, + "grad_norm": 521.3070068359375, + "learning_rate": 1.925860043029415e-06, + "loss": 55.3125, + "step": 15809 + }, + { + "epoch": 0.1496578033150008, + "grad_norm": 285.0725402832031, + "learning_rate": 1.9258484578240654e-06, + "loss": 50.6562, + "step": 15810 + }, + { + "epoch": 0.14966726933671587, + "grad_norm": 192.86045837402344, + "learning_rate": 1.925836871748478e-06, + "loss": 24.0781, + "step": 15811 + }, + { + "epoch": 0.1496767353584309, + "grad_norm": 661.7197265625, + "learning_rate": 1.925825284802664e-06, + "loss": 16.9609, + "step": 15812 + }, + { + "epoch": 0.14968620138014596, + "grad_norm": 371.3602600097656, + "learning_rate": 1.9258136969866354e-06, + "loss": 38.6094, + "step": 15813 + }, + { + "epoch": 0.14969566740186102, + "grad_norm": 445.8435363769531, + "learning_rate": 1.9258021083004014e-06, + "loss": 20.2266, + "step": 15814 + }, + { + "epoch": 0.14970513342357608, + "grad_norm": 184.66587829589844, + "learning_rate": 1.925790518743974e-06, + "loss": 19.3672, + "step": 15815 + }, + { + "epoch": 0.14971459944529114, + "grad_norm": 353.6419982910156, + "learning_rate": 1.9257789283173634e-06, + "loss": 20.3359, + "step": 15816 + }, + { + "epoch": 0.14972406546700617, + "grad_norm": 285.8191223144531, + "learning_rate": 1.925767337020581e-06, + "loss": 35.0625, + "step": 15817 + }, + { + "epoch": 0.14973353148872123, + "grad_norm": 378.7111511230469, + "learning_rate": 1.9257557448536375e-06, + "loss": 23.6719, + "step": 15818 + }, + { + "epoch": 0.1497429975104363, + "grad_norm": 303.0896301269531, + "learning_rate": 1.925744151816544e-06, + "loss": 19.4766, + "step": 15819 + }, + { + "epoch": 0.14975246353215135, + "grad_norm": 352.67694091796875, + "learning_rate": 1.925732557909311e-06, + "loss": 16.4375, + "step": 15820 + }, + { + "epoch": 0.14976192955386639, + "grad_norm": 359.3232421875, + "learning_rate": 1.9257209631319495e-06, + "loss": 39.4688, + "step": 15821 + }, + { + "epoch": 0.14977139557558145, + "grad_norm": 225.13934326171875, + "learning_rate": 1.9257093674844705e-06, + "loss": 17.1875, + "step": 15822 + }, + { + "epoch": 0.1497808615972965, + "grad_norm": 3.3164539337158203, + "learning_rate": 1.9256977709668854e-06, + "loss": 0.916, + "step": 15823 + }, + { + "epoch": 0.14979032761901157, + "grad_norm": 209.1502227783203, + "learning_rate": 1.9256861735792043e-06, + "loss": 20.7656, + "step": 15824 + }, + { + "epoch": 0.14979979364072663, + "grad_norm": 205.46340942382812, + "learning_rate": 1.9256745753214382e-06, + "loss": 19.5234, + "step": 15825 + }, + { + "epoch": 0.14980925966244166, + "grad_norm": 588.6609497070312, + "learning_rate": 1.9256629761935987e-06, + "loss": 8.3203, + "step": 15826 + }, + { + "epoch": 0.14981872568415672, + "grad_norm": 302.0202331542969, + "learning_rate": 1.9256513761956962e-06, + "loss": 35.3594, + "step": 15827 + }, + { + "epoch": 0.14982819170587178, + "grad_norm": 3.3157570362091064, + "learning_rate": 1.925639775327741e-06, + "loss": 1.0293, + "step": 15828 + }, + { + "epoch": 0.14983765772758684, + "grad_norm": 419.89495849609375, + "learning_rate": 1.9256281735897453e-06, + "loss": 38.4219, + "step": 15829 + }, + { + "epoch": 0.14984712374930187, + "grad_norm": 651.988525390625, + "learning_rate": 1.925616570981719e-06, + "loss": 57.4375, + "step": 15830 + }, + { + "epoch": 0.14985658977101693, + "grad_norm": 402.02642822265625, + "learning_rate": 1.9256049675036735e-06, + "loss": 30.2188, + "step": 15831 + }, + { + "epoch": 0.149866055792732, + "grad_norm": 203.0720672607422, + "learning_rate": 1.9255933631556194e-06, + "loss": 23.3281, + "step": 15832 + }, + { + "epoch": 0.14987552181444705, + "grad_norm": 4.478382587432861, + "learning_rate": 1.9255817579375677e-06, + "loss": 0.9048, + "step": 15833 + }, + { + "epoch": 0.1498849878361621, + "grad_norm": 609.2659301757812, + "learning_rate": 1.9255701518495292e-06, + "loss": 28.9688, + "step": 15834 + }, + { + "epoch": 0.14989445385787714, + "grad_norm": 672.2642822265625, + "learning_rate": 1.9255585448915155e-06, + "loss": 49.0938, + "step": 15835 + }, + { + "epoch": 0.1499039198795922, + "grad_norm": 622.2691040039062, + "learning_rate": 1.9255469370635367e-06, + "loss": 29.25, + "step": 15836 + }, + { + "epoch": 0.14991338590130726, + "grad_norm": 328.4287414550781, + "learning_rate": 1.925535328365604e-06, + "loss": 21.7891, + "step": 15837 + }, + { + "epoch": 0.14992285192302232, + "grad_norm": 306.4164123535156, + "learning_rate": 1.9255237187977283e-06, + "loss": 23.3477, + "step": 15838 + }, + { + "epoch": 0.14993231794473735, + "grad_norm": 756.99267578125, + "learning_rate": 1.9255121083599203e-06, + "loss": 25.8359, + "step": 15839 + }, + { + "epoch": 0.1499417839664524, + "grad_norm": 601.7548828125, + "learning_rate": 1.9255004970521917e-06, + "loss": 30.2578, + "step": 15840 + }, + { + "epoch": 0.14995124998816747, + "grad_norm": 385.9080505371094, + "learning_rate": 1.925488884874552e-06, + "loss": 28.2734, + "step": 15841 + }, + { + "epoch": 0.14996071600988253, + "grad_norm": 361.01812744140625, + "learning_rate": 1.9254772718270136e-06, + "loss": 20.3984, + "step": 15842 + }, + { + "epoch": 0.1499701820315976, + "grad_norm": 455.7162170410156, + "learning_rate": 1.9254656579095866e-06, + "loss": 24.2266, + "step": 15843 + }, + { + "epoch": 0.14997964805331263, + "grad_norm": 350.27362060546875, + "learning_rate": 1.925454043122282e-06, + "loss": 20.1719, + "step": 15844 + }, + { + "epoch": 0.14998911407502769, + "grad_norm": 304.2891540527344, + "learning_rate": 1.925442427465111e-06, + "loss": 30.6875, + "step": 15845 + }, + { + "epoch": 0.14999858009674275, + "grad_norm": 179.34400939941406, + "learning_rate": 1.9254308109380843e-06, + "loss": 19.4141, + "step": 15846 + }, + { + "epoch": 0.1500080461184578, + "grad_norm": 913.3694458007812, + "learning_rate": 1.925419193541213e-06, + "loss": 37.75, + "step": 15847 + }, + { + "epoch": 0.15001751214017284, + "grad_norm": 137.0207977294922, + "learning_rate": 1.9254075752745076e-06, + "loss": 15.4531, + "step": 15848 + }, + { + "epoch": 0.1500269781618879, + "grad_norm": 283.70111083984375, + "learning_rate": 1.9253959561379795e-06, + "loss": 12.5938, + "step": 15849 + }, + { + "epoch": 0.15003644418360296, + "grad_norm": 342.1932678222656, + "learning_rate": 1.925384336131639e-06, + "loss": 20.5703, + "step": 15850 + }, + { + "epoch": 0.15004591020531802, + "grad_norm": 272.4956359863281, + "learning_rate": 1.9253727152554974e-06, + "loss": 20.2188, + "step": 15851 + }, + { + "epoch": 0.15005537622703308, + "grad_norm": 604.15966796875, + "learning_rate": 1.9253610935095657e-06, + "loss": 51.5977, + "step": 15852 + }, + { + "epoch": 0.1500648422487481, + "grad_norm": 179.6300811767578, + "learning_rate": 1.9253494708938553e-06, + "loss": 8.3828, + "step": 15853 + }, + { + "epoch": 0.15007430827046317, + "grad_norm": 520.267578125, + "learning_rate": 1.925337847408376e-06, + "loss": 45.3281, + "step": 15854 + }, + { + "epoch": 0.15008377429217823, + "grad_norm": 582.6358642578125, + "learning_rate": 1.92532622305314e-06, + "loss": 48.7812, + "step": 15855 + }, + { + "epoch": 0.1500932403138933, + "grad_norm": 471.0153503417969, + "learning_rate": 1.9253145978281567e-06, + "loss": 21.9609, + "step": 15856 + }, + { + "epoch": 0.15010270633560832, + "grad_norm": 239.2001953125, + "learning_rate": 1.9253029717334385e-06, + "loss": 21.8047, + "step": 15857 + }, + { + "epoch": 0.15011217235732338, + "grad_norm": 264.435302734375, + "learning_rate": 1.9252913447689958e-06, + "loss": 21.125, + "step": 15858 + }, + { + "epoch": 0.15012163837903844, + "grad_norm": 346.58489990234375, + "learning_rate": 1.9252797169348386e-06, + "loss": 25.4531, + "step": 15859 + }, + { + "epoch": 0.1501311044007535, + "grad_norm": 2.855743646621704, + "learning_rate": 1.9252680882309794e-06, + "loss": 0.9292, + "step": 15860 + }, + { + "epoch": 0.15014057042246856, + "grad_norm": 783.0327758789062, + "learning_rate": 1.925256458657428e-06, + "loss": 60.9062, + "step": 15861 + }, + { + "epoch": 0.1501500364441836, + "grad_norm": 204.0355987548828, + "learning_rate": 1.9252448282141958e-06, + "loss": 21.9688, + "step": 15862 + }, + { + "epoch": 0.15015950246589865, + "grad_norm": 265.5856628417969, + "learning_rate": 1.925233196901294e-06, + "loss": 14.8906, + "step": 15863 + }, + { + "epoch": 0.1501689684876137, + "grad_norm": 546.90087890625, + "learning_rate": 1.925221564718733e-06, + "loss": 22.1172, + "step": 15864 + }, + { + "epoch": 0.15017843450932877, + "grad_norm": 505.1416931152344, + "learning_rate": 1.9252099316665234e-06, + "loss": 20.6562, + "step": 15865 + }, + { + "epoch": 0.1501879005310438, + "grad_norm": 451.2748718261719, + "learning_rate": 1.9251982977446774e-06, + "loss": 52.6484, + "step": 15866 + }, + { + "epoch": 0.15019736655275887, + "grad_norm": 168.306640625, + "learning_rate": 1.9251866629532046e-06, + "loss": 22.4688, + "step": 15867 + }, + { + "epoch": 0.15020683257447393, + "grad_norm": 3.0663516521453857, + "learning_rate": 1.925175027292117e-06, + "loss": 0.9976, + "step": 15868 + }, + { + "epoch": 0.15021629859618899, + "grad_norm": 564.333984375, + "learning_rate": 1.9251633907614248e-06, + "loss": 17.3555, + "step": 15869 + }, + { + "epoch": 0.15022576461790405, + "grad_norm": 250.57960510253906, + "learning_rate": 1.9251517533611393e-06, + "loss": 19.2031, + "step": 15870 + }, + { + "epoch": 0.15023523063961908, + "grad_norm": 512.9970092773438, + "learning_rate": 1.9251401150912715e-06, + "loss": 40.7031, + "step": 15871 + }, + { + "epoch": 0.15024469666133414, + "grad_norm": 209.144287109375, + "learning_rate": 1.925128475951832e-06, + "loss": 17.7109, + "step": 15872 + }, + { + "epoch": 0.1502541626830492, + "grad_norm": 1180.8203125, + "learning_rate": 1.9251168359428317e-06, + "loss": 45.6875, + "step": 15873 + }, + { + "epoch": 0.15026362870476426, + "grad_norm": 299.4532775878906, + "learning_rate": 1.925105195064282e-06, + "loss": 36.2656, + "step": 15874 + }, + { + "epoch": 0.1502730947264793, + "grad_norm": 346.56121826171875, + "learning_rate": 1.925093553316194e-06, + "loss": 31.8047, + "step": 15875 + }, + { + "epoch": 0.15028256074819435, + "grad_norm": 596.4038696289062, + "learning_rate": 1.9250819106985775e-06, + "loss": 32.2188, + "step": 15876 + }, + { + "epoch": 0.1502920267699094, + "grad_norm": 334.33587646484375, + "learning_rate": 1.9250702672114442e-06, + "loss": 19.7031, + "step": 15877 + }, + { + "epoch": 0.15030149279162447, + "grad_norm": 486.9529724121094, + "learning_rate": 1.9250586228548058e-06, + "loss": 64.0625, + "step": 15878 + }, + { + "epoch": 0.15031095881333953, + "grad_norm": 658.6571044921875, + "learning_rate": 1.925046977628672e-06, + "loss": 39.0586, + "step": 15879 + }, + { + "epoch": 0.15032042483505456, + "grad_norm": 339.92755126953125, + "learning_rate": 1.925035331533054e-06, + "loss": 42.4688, + "step": 15880 + }, + { + "epoch": 0.15032989085676962, + "grad_norm": 902.2074584960938, + "learning_rate": 1.925023684567963e-06, + "loss": 43.3281, + "step": 15881 + }, + { + "epoch": 0.15033935687848468, + "grad_norm": 505.3840637207031, + "learning_rate": 1.92501203673341e-06, + "loss": 22.4688, + "step": 15882 + }, + { + "epoch": 0.15034882290019974, + "grad_norm": 345.2973327636719, + "learning_rate": 1.925000388029406e-06, + "loss": 26.6641, + "step": 15883 + }, + { + "epoch": 0.15035828892191477, + "grad_norm": 716.7442626953125, + "learning_rate": 1.924988738455962e-06, + "loss": 47.3438, + "step": 15884 + }, + { + "epoch": 0.15036775494362983, + "grad_norm": 580.680419921875, + "learning_rate": 1.9249770880130885e-06, + "loss": 25.0781, + "step": 15885 + }, + { + "epoch": 0.1503772209653449, + "grad_norm": 1043.8282470703125, + "learning_rate": 1.9249654367007967e-06, + "loss": 38.8906, + "step": 15886 + }, + { + "epoch": 0.15038668698705995, + "grad_norm": 556.4962158203125, + "learning_rate": 1.9249537845190976e-06, + "loss": 30.8125, + "step": 15887 + }, + { + "epoch": 0.150396153008775, + "grad_norm": 815.1170043945312, + "learning_rate": 1.924942131468002e-06, + "loss": 20.3438, + "step": 15888 + }, + { + "epoch": 0.15040561903049005, + "grad_norm": 529.4838256835938, + "learning_rate": 1.924930477547521e-06, + "loss": 50.9688, + "step": 15889 + }, + { + "epoch": 0.1504150850522051, + "grad_norm": 3.266963005065918, + "learning_rate": 1.9249188227576658e-06, + "loss": 0.9951, + "step": 15890 + }, + { + "epoch": 0.15042455107392017, + "grad_norm": 363.1669006347656, + "learning_rate": 1.9249071670984467e-06, + "loss": 45.625, + "step": 15891 + }, + { + "epoch": 0.15043401709563523, + "grad_norm": 598.1331787109375, + "learning_rate": 1.9248955105698753e-06, + "loss": 33.5469, + "step": 15892 + }, + { + "epoch": 0.15044348311735026, + "grad_norm": 1090.7269287109375, + "learning_rate": 1.9248838531719623e-06, + "loss": 16.1016, + "step": 15893 + }, + { + "epoch": 0.15045294913906532, + "grad_norm": 182.5230712890625, + "learning_rate": 1.924872194904718e-06, + "loss": 11.3008, + "step": 15894 + }, + { + "epoch": 0.15046241516078038, + "grad_norm": 749.8870239257812, + "learning_rate": 1.9248605357681548e-06, + "loss": 12.25, + "step": 15895 + }, + { + "epoch": 0.15047188118249544, + "grad_norm": 683.7249145507812, + "learning_rate": 1.9248488757622828e-06, + "loss": 26.9531, + "step": 15896 + }, + { + "epoch": 0.1504813472042105, + "grad_norm": 539.680908203125, + "learning_rate": 1.9248372148871127e-06, + "loss": 32.6719, + "step": 15897 + }, + { + "epoch": 0.15049081322592553, + "grad_norm": 341.26513671875, + "learning_rate": 1.9248255531426556e-06, + "loss": 24.1094, + "step": 15898 + }, + { + "epoch": 0.1505002792476406, + "grad_norm": 340.134521484375, + "learning_rate": 1.9248138905289233e-06, + "loss": 21.0, + "step": 15899 + }, + { + "epoch": 0.15050974526935565, + "grad_norm": 180.4652862548828, + "learning_rate": 1.9248022270459256e-06, + "loss": 12.7188, + "step": 15900 + }, + { + "epoch": 0.1505192112910707, + "grad_norm": 3.4405643939971924, + "learning_rate": 1.9247905626936743e-06, + "loss": 0.8613, + "step": 15901 + }, + { + "epoch": 0.15052867731278577, + "grad_norm": 244.41018676757812, + "learning_rate": 1.92477889747218e-06, + "loss": 19.7969, + "step": 15902 + }, + { + "epoch": 0.1505381433345008, + "grad_norm": 186.23745727539062, + "learning_rate": 1.9247672313814533e-06, + "loss": 16.2656, + "step": 15903 + }, + { + "epoch": 0.15054760935621586, + "grad_norm": 385.86920166015625, + "learning_rate": 1.924755564421506e-06, + "loss": 42.0703, + "step": 15904 + }, + { + "epoch": 0.15055707537793092, + "grad_norm": 613.3121948242188, + "learning_rate": 1.9247438965923483e-06, + "loss": 12.3594, + "step": 15905 + }, + { + "epoch": 0.15056654139964598, + "grad_norm": 364.0429382324219, + "learning_rate": 1.9247322278939917e-06, + "loss": 22.8984, + "step": 15906 + }, + { + "epoch": 0.150576007421361, + "grad_norm": 165.5546112060547, + "learning_rate": 1.924720558326447e-06, + "loss": 17.1172, + "step": 15907 + }, + { + "epoch": 0.15058547344307607, + "grad_norm": 653.9844360351562, + "learning_rate": 1.9247088878897252e-06, + "loss": 22.2344, + "step": 15908 + }, + { + "epoch": 0.15059493946479113, + "grad_norm": 313.48382568359375, + "learning_rate": 1.9246972165838366e-06, + "loss": 29.4648, + "step": 15909 + }, + { + "epoch": 0.1506044054865062, + "grad_norm": 516.813720703125, + "learning_rate": 1.9246855444087934e-06, + "loss": 20.5391, + "step": 15910 + }, + { + "epoch": 0.15061387150822125, + "grad_norm": 234.4116668701172, + "learning_rate": 1.9246738713646055e-06, + "loss": 21.6328, + "step": 15911 + }, + { + "epoch": 0.15062333752993629, + "grad_norm": 585.2155151367188, + "learning_rate": 1.9246621974512847e-06, + "loss": 22.9609, + "step": 15912 + }, + { + "epoch": 0.15063280355165135, + "grad_norm": 358.904052734375, + "learning_rate": 1.9246505226688416e-06, + "loss": 43.7031, + "step": 15913 + }, + { + "epoch": 0.1506422695733664, + "grad_norm": 352.25506591796875, + "learning_rate": 1.924638847017287e-06, + "loss": 30.5, + "step": 15914 + }, + { + "epoch": 0.15065173559508147, + "grad_norm": 378.2925109863281, + "learning_rate": 1.9246271704966318e-06, + "loss": 36.1719, + "step": 15915 + }, + { + "epoch": 0.1506612016167965, + "grad_norm": 309.8816223144531, + "learning_rate": 1.9246154931068875e-06, + "loss": 26.0625, + "step": 15916 + }, + { + "epoch": 0.15067066763851156, + "grad_norm": 439.50531005859375, + "learning_rate": 1.924603814848065e-06, + "loss": 44.4297, + "step": 15917 + }, + { + "epoch": 0.15068013366022662, + "grad_norm": 314.3935852050781, + "learning_rate": 1.9245921357201746e-06, + "loss": 19.1094, + "step": 15918 + }, + { + "epoch": 0.15068959968194168, + "grad_norm": 607.9560546875, + "learning_rate": 1.924580455723228e-06, + "loss": 57.0312, + "step": 15919 + }, + { + "epoch": 0.15069906570365674, + "grad_norm": 414.8005676269531, + "learning_rate": 1.924568774857236e-06, + "loss": 33.3828, + "step": 15920 + }, + { + "epoch": 0.15070853172537177, + "grad_norm": 186.07579040527344, + "learning_rate": 1.9245570931222093e-06, + "loss": 17.3125, + "step": 15921 + }, + { + "epoch": 0.15071799774708683, + "grad_norm": 759.4568481445312, + "learning_rate": 1.924545410518159e-06, + "loss": 33.7812, + "step": 15922 + }, + { + "epoch": 0.1507274637688019, + "grad_norm": 264.6676940917969, + "learning_rate": 1.9245337270450965e-06, + "loss": 27.5469, + "step": 15923 + }, + { + "epoch": 0.15073692979051695, + "grad_norm": 251.0618896484375, + "learning_rate": 1.9245220427030323e-06, + "loss": 19.0469, + "step": 15924 + }, + { + "epoch": 0.15074639581223198, + "grad_norm": 167.65887451171875, + "learning_rate": 1.9245103574919776e-06, + "loss": 20.3125, + "step": 15925 + }, + { + "epoch": 0.15075586183394704, + "grad_norm": 591.0492553710938, + "learning_rate": 1.924498671411943e-06, + "loss": 51.0625, + "step": 15926 + }, + { + "epoch": 0.1507653278556621, + "grad_norm": 579.18115234375, + "learning_rate": 1.9244869844629397e-06, + "loss": 44.9375, + "step": 15927 + }, + { + "epoch": 0.15077479387737716, + "grad_norm": 382.61492919921875, + "learning_rate": 1.9244752966449793e-06, + "loss": 25.6016, + "step": 15928 + }, + { + "epoch": 0.15078425989909222, + "grad_norm": 598.7092895507812, + "learning_rate": 1.924463607958072e-06, + "loss": 46.4688, + "step": 15929 + }, + { + "epoch": 0.15079372592080725, + "grad_norm": 266.32391357421875, + "learning_rate": 1.924451918402229e-06, + "loss": 25.0078, + "step": 15930 + }, + { + "epoch": 0.1508031919425223, + "grad_norm": 520.501953125, + "learning_rate": 1.9244402279774613e-06, + "loss": 38.3906, + "step": 15931 + }, + { + "epoch": 0.15081265796423737, + "grad_norm": 1033.274169921875, + "learning_rate": 1.9244285366837804e-06, + "loss": 39.1797, + "step": 15932 + }, + { + "epoch": 0.15082212398595243, + "grad_norm": 161.0829315185547, + "learning_rate": 1.924416844521196e-06, + "loss": 8.5195, + "step": 15933 + }, + { + "epoch": 0.15083159000766747, + "grad_norm": 275.91595458984375, + "learning_rate": 1.924405151489721e-06, + "loss": 26.6875, + "step": 15934 + }, + { + "epoch": 0.15084105602938253, + "grad_norm": 325.6798400878906, + "learning_rate": 1.9243934575893643e-06, + "loss": 25.9453, + "step": 15935 + }, + { + "epoch": 0.15085052205109759, + "grad_norm": 562.7462768554688, + "learning_rate": 1.9243817628201382e-06, + "loss": 28.5391, + "step": 15936 + }, + { + "epoch": 0.15085998807281265, + "grad_norm": 292.0237121582031, + "learning_rate": 1.9243700671820533e-06, + "loss": 29.7031, + "step": 15937 + }, + { + "epoch": 0.1508694540945277, + "grad_norm": 380.80743408203125, + "learning_rate": 1.924358370675121e-06, + "loss": 26.8281, + "step": 15938 + }, + { + "epoch": 0.15087892011624274, + "grad_norm": 356.9715881347656, + "learning_rate": 1.9243466732993518e-06, + "loss": 32.7266, + "step": 15939 + }, + { + "epoch": 0.1508883861379578, + "grad_norm": 747.6587524414062, + "learning_rate": 1.9243349750547567e-06, + "loss": 48.0312, + "step": 15940 + }, + { + "epoch": 0.15089785215967286, + "grad_norm": 3.2325010299682617, + "learning_rate": 1.924323275941347e-06, + "loss": 1.0132, + "step": 15941 + }, + { + "epoch": 0.15090731818138792, + "grad_norm": 1070.19091796875, + "learning_rate": 1.9243115759591336e-06, + "loss": 31.9062, + "step": 15942 + }, + { + "epoch": 0.15091678420310295, + "grad_norm": 538.5552978515625, + "learning_rate": 1.924299875108127e-06, + "loss": 50.1445, + "step": 15943 + }, + { + "epoch": 0.150926250224818, + "grad_norm": 1286.1748046875, + "learning_rate": 1.9242881733883393e-06, + "loss": 30.9375, + "step": 15944 + }, + { + "epoch": 0.15093571624653307, + "grad_norm": 228.91351318359375, + "learning_rate": 1.9242764707997803e-06, + "loss": 19.0938, + "step": 15945 + }, + { + "epoch": 0.15094518226824813, + "grad_norm": 228.2642822265625, + "learning_rate": 1.924264767342462e-06, + "loss": 26.2891, + "step": 15946 + }, + { + "epoch": 0.1509546482899632, + "grad_norm": 384.0343017578125, + "learning_rate": 1.9242530630163944e-06, + "loss": 33.1406, + "step": 15947 + }, + { + "epoch": 0.15096411431167822, + "grad_norm": 650.8197021484375, + "learning_rate": 1.9242413578215895e-06, + "loss": 61.5312, + "step": 15948 + }, + { + "epoch": 0.15097358033339328, + "grad_norm": 315.7159118652344, + "learning_rate": 1.924229651758058e-06, + "loss": 38.1406, + "step": 15949 + }, + { + "epoch": 0.15098304635510834, + "grad_norm": 3.356628656387329, + "learning_rate": 1.9242179448258106e-06, + "loss": 0.9814, + "step": 15950 + }, + { + "epoch": 0.1509925123768234, + "grad_norm": 500.4942626953125, + "learning_rate": 1.924206237024858e-06, + "loss": 42.1562, + "step": 15951 + }, + { + "epoch": 0.15100197839853843, + "grad_norm": 653.0972290039062, + "learning_rate": 1.924194528355212e-06, + "loss": 36.4531, + "step": 15952 + }, + { + "epoch": 0.1510114444202535, + "grad_norm": 3.8596463203430176, + "learning_rate": 1.9241828188168835e-06, + "loss": 1.0127, + "step": 15953 + }, + { + "epoch": 0.15102091044196855, + "grad_norm": 887.1087646484375, + "learning_rate": 1.924171108409883e-06, + "loss": 28.6484, + "step": 15954 + }, + { + "epoch": 0.1510303764636836, + "grad_norm": 370.3211364746094, + "learning_rate": 1.924159397134222e-06, + "loss": 44.7344, + "step": 15955 + }, + { + "epoch": 0.15103984248539867, + "grad_norm": 332.69976806640625, + "learning_rate": 1.9241476849899106e-06, + "loss": 19.4297, + "step": 15956 + }, + { + "epoch": 0.1510493085071137, + "grad_norm": 181.0024871826172, + "learning_rate": 1.924135971976961e-06, + "loss": 17.75, + "step": 15957 + }, + { + "epoch": 0.15105877452882877, + "grad_norm": 2012.956787109375, + "learning_rate": 1.924124258095384e-06, + "loss": 66.2188, + "step": 15958 + }, + { + "epoch": 0.15106824055054383, + "grad_norm": 527.802490234375, + "learning_rate": 1.9241125433451903e-06, + "loss": 24.8359, + "step": 15959 + }, + { + "epoch": 0.15107770657225889, + "grad_norm": 428.0569763183594, + "learning_rate": 1.9241008277263905e-06, + "loss": 30.1875, + "step": 15960 + }, + { + "epoch": 0.15108717259397392, + "grad_norm": 592.8944091796875, + "learning_rate": 1.924089111238996e-06, + "loss": 45.5156, + "step": 15961 + }, + { + "epoch": 0.15109663861568898, + "grad_norm": 997.6264038085938, + "learning_rate": 1.9240773938830183e-06, + "loss": 40.1328, + "step": 15962 + }, + { + "epoch": 0.15110610463740404, + "grad_norm": 277.46209716796875, + "learning_rate": 1.9240656756584674e-06, + "loss": 22.9297, + "step": 15963 + }, + { + "epoch": 0.1511155706591191, + "grad_norm": 290.4744567871094, + "learning_rate": 1.9240539565653553e-06, + "loss": 16.3828, + "step": 15964 + }, + { + "epoch": 0.15112503668083416, + "grad_norm": 221.81922912597656, + "learning_rate": 1.9240422366036927e-06, + "loss": 19.7812, + "step": 15965 + }, + { + "epoch": 0.1511345027025492, + "grad_norm": 197.1556396484375, + "learning_rate": 1.92403051577349e-06, + "loss": 17.3203, + "step": 15966 + }, + { + "epoch": 0.15114396872426425, + "grad_norm": 3.9623067378997803, + "learning_rate": 1.924018794074759e-06, + "loss": 0.8921, + "step": 15967 + }, + { + "epoch": 0.1511534347459793, + "grad_norm": 1010.3749389648438, + "learning_rate": 1.9240070715075104e-06, + "loss": 40.1641, + "step": 15968 + }, + { + "epoch": 0.15116290076769437, + "grad_norm": 1194.725341796875, + "learning_rate": 1.9239953480717553e-06, + "loss": 35.9453, + "step": 15969 + }, + { + "epoch": 0.1511723667894094, + "grad_norm": 708.308837890625, + "learning_rate": 1.9239836237675047e-06, + "loss": 53.4453, + "step": 15970 + }, + { + "epoch": 0.15118183281112446, + "grad_norm": 1056.927001953125, + "learning_rate": 1.9239718985947696e-06, + "loss": 30.9375, + "step": 15971 + }, + { + "epoch": 0.15119129883283952, + "grad_norm": 235.1652374267578, + "learning_rate": 1.923960172553561e-06, + "loss": 21.875, + "step": 15972 + }, + { + "epoch": 0.15120076485455458, + "grad_norm": 514.7023315429688, + "learning_rate": 1.92394844564389e-06, + "loss": 23.0625, + "step": 15973 + }, + { + "epoch": 0.15121023087626964, + "grad_norm": 4.495181560516357, + "learning_rate": 1.9239367178657676e-06, + "loss": 0.895, + "step": 15974 + }, + { + "epoch": 0.15121969689798467, + "grad_norm": 936.2716674804688, + "learning_rate": 1.9239249892192047e-06, + "loss": 29.6953, + "step": 15975 + }, + { + "epoch": 0.15122916291969973, + "grad_norm": 167.79531860351562, + "learning_rate": 1.9239132597042124e-06, + "loss": 14.9766, + "step": 15976 + }, + { + "epoch": 0.1512386289414148, + "grad_norm": 514.2603149414062, + "learning_rate": 1.923901529320802e-06, + "loss": 53.7812, + "step": 15977 + }, + { + "epoch": 0.15124809496312985, + "grad_norm": 179.00308227539062, + "learning_rate": 1.923889798068984e-06, + "loss": 20.2266, + "step": 15978 + }, + { + "epoch": 0.15125756098484489, + "grad_norm": 509.60382080078125, + "learning_rate": 1.92387806594877e-06, + "loss": 49.0703, + "step": 15979 + }, + { + "epoch": 0.15126702700655995, + "grad_norm": 446.52850341796875, + "learning_rate": 1.9238663329601705e-06, + "loss": 39.3906, + "step": 15980 + }, + { + "epoch": 0.151276493028275, + "grad_norm": 496.1611633300781, + "learning_rate": 1.9238545991031966e-06, + "loss": 17.6406, + "step": 15981 + }, + { + "epoch": 0.15128595904999007, + "grad_norm": 315.08013916015625, + "learning_rate": 1.9238428643778595e-06, + "loss": 8.9238, + "step": 15982 + }, + { + "epoch": 0.15129542507170513, + "grad_norm": 170.89340209960938, + "learning_rate": 1.9238311287841703e-06, + "loss": 15.707, + "step": 15983 + }, + { + "epoch": 0.15130489109342016, + "grad_norm": 897.1996459960938, + "learning_rate": 1.92381939232214e-06, + "loss": 90.125, + "step": 15984 + }, + { + "epoch": 0.15131435711513522, + "grad_norm": 590.6878662109375, + "learning_rate": 1.9238076549917796e-06, + "loss": 44.2031, + "step": 15985 + }, + { + "epoch": 0.15132382313685028, + "grad_norm": 310.3890686035156, + "learning_rate": 1.9237959167931e-06, + "loss": 16.1133, + "step": 15986 + }, + { + "epoch": 0.15133328915856534, + "grad_norm": 160.8904266357422, + "learning_rate": 1.9237841777261124e-06, + "loss": 17.4141, + "step": 15987 + }, + { + "epoch": 0.1513427551802804, + "grad_norm": 303.73681640625, + "learning_rate": 1.9237724377908276e-06, + "loss": 12.1914, + "step": 15988 + }, + { + "epoch": 0.15135222120199543, + "grad_norm": 199.0948028564453, + "learning_rate": 1.923760696987257e-06, + "loss": 19.9844, + "step": 15989 + }, + { + "epoch": 0.1513616872237105, + "grad_norm": 570.3359375, + "learning_rate": 1.9237489553154115e-06, + "loss": 42.1719, + "step": 15990 + }, + { + "epoch": 0.15137115324542555, + "grad_norm": 548.3731079101562, + "learning_rate": 1.9237372127753017e-06, + "loss": 43.75, + "step": 15991 + }, + { + "epoch": 0.1513806192671406, + "grad_norm": 721.8799438476562, + "learning_rate": 1.923725469366939e-06, + "loss": 36.3633, + "step": 15992 + }, + { + "epoch": 0.15139008528885564, + "grad_norm": 191.91952514648438, + "learning_rate": 1.923713725090335e-06, + "loss": 20.3984, + "step": 15993 + }, + { + "epoch": 0.1513995513105707, + "grad_norm": 203.49057006835938, + "learning_rate": 1.9237019799455e-06, + "loss": 21.2773, + "step": 15994 + }, + { + "epoch": 0.15140901733228576, + "grad_norm": 326.1747741699219, + "learning_rate": 1.923690233932445e-06, + "loss": 27.6875, + "step": 15995 + }, + { + "epoch": 0.15141848335400082, + "grad_norm": 334.3539123535156, + "learning_rate": 1.9236784870511814e-06, + "loss": 44.8047, + "step": 15996 + }, + { + "epoch": 0.15142794937571588, + "grad_norm": 728.9762573242188, + "learning_rate": 1.92366673930172e-06, + "loss": 27.4375, + "step": 15997 + }, + { + "epoch": 0.1514374153974309, + "grad_norm": 218.830810546875, + "learning_rate": 1.923654990684072e-06, + "loss": 15.4297, + "step": 15998 + }, + { + "epoch": 0.15144688141914597, + "grad_norm": 405.2086181640625, + "learning_rate": 1.9236432411982486e-06, + "loss": 34.8125, + "step": 15999 + }, + { + "epoch": 0.15145634744086103, + "grad_norm": 364.3819580078125, + "learning_rate": 1.9236314908442604e-06, + "loss": 19.2148, + "step": 16000 + }, + { + "epoch": 0.1514658134625761, + "grad_norm": 215.13108825683594, + "learning_rate": 1.923619739622119e-06, + "loss": 9.0039, + "step": 16001 + }, + { + "epoch": 0.15147527948429113, + "grad_norm": 272.7998962402344, + "learning_rate": 1.923607987531835e-06, + "loss": 16.6328, + "step": 16002 + }, + { + "epoch": 0.15148474550600619, + "grad_norm": 687.15966796875, + "learning_rate": 1.9235962345734195e-06, + "loss": 18.3359, + "step": 16003 + }, + { + "epoch": 0.15149421152772125, + "grad_norm": 516.9276733398438, + "learning_rate": 1.9235844807468837e-06, + "loss": 38.4297, + "step": 16004 + }, + { + "epoch": 0.1515036775494363, + "grad_norm": 324.189697265625, + "learning_rate": 1.9235727260522383e-06, + "loss": 37.0312, + "step": 16005 + }, + { + "epoch": 0.15151314357115137, + "grad_norm": 505.9603271484375, + "learning_rate": 1.923560970489495e-06, + "loss": 41.7734, + "step": 16006 + }, + { + "epoch": 0.1515226095928664, + "grad_norm": 319.7236022949219, + "learning_rate": 1.923549214058664e-06, + "loss": 28.4297, + "step": 16007 + }, + { + "epoch": 0.15153207561458146, + "grad_norm": 603.7570190429688, + "learning_rate": 1.923537456759757e-06, + "loss": 27.832, + "step": 16008 + }, + { + "epoch": 0.15154154163629652, + "grad_norm": 667.630615234375, + "learning_rate": 1.923525698592785e-06, + "loss": 32.5234, + "step": 16009 + }, + { + "epoch": 0.15155100765801158, + "grad_norm": 193.77467346191406, + "learning_rate": 1.923513939557759e-06, + "loss": 23.0156, + "step": 16010 + }, + { + "epoch": 0.1515604736797266, + "grad_norm": 580.5731811523438, + "learning_rate": 1.92350217965469e-06, + "loss": 33.7734, + "step": 16011 + }, + { + "epoch": 0.15156993970144167, + "grad_norm": 330.9787292480469, + "learning_rate": 1.923490418883589e-06, + "loss": 20.6875, + "step": 16012 + }, + { + "epoch": 0.15157940572315673, + "grad_norm": 349.6717834472656, + "learning_rate": 1.9234786572444667e-06, + "loss": 43.1406, + "step": 16013 + }, + { + "epoch": 0.1515888717448718, + "grad_norm": 433.4192810058594, + "learning_rate": 1.923466894737335e-06, + "loss": 36.75, + "step": 16014 + }, + { + "epoch": 0.15159833776658685, + "grad_norm": 250.643310546875, + "learning_rate": 1.9234551313622043e-06, + "loss": 16.2656, + "step": 16015 + }, + { + "epoch": 0.15160780378830188, + "grad_norm": 447.3929138183594, + "learning_rate": 1.9234433671190856e-06, + "loss": 49.5703, + "step": 16016 + }, + { + "epoch": 0.15161726981001694, + "grad_norm": 213.32827758789062, + "learning_rate": 1.9234316020079906e-06, + "loss": 16.1016, + "step": 16017 + }, + { + "epoch": 0.151626735831732, + "grad_norm": 261.9588928222656, + "learning_rate": 1.92341983602893e-06, + "loss": 18.8828, + "step": 16018 + }, + { + "epoch": 0.15163620185344706, + "grad_norm": 846.6092529296875, + "learning_rate": 1.9234080691819148e-06, + "loss": 49.4375, + "step": 16019 + }, + { + "epoch": 0.1516456678751621, + "grad_norm": 2.830075740814209, + "learning_rate": 1.923396301466956e-06, + "loss": 0.8174, + "step": 16020 + }, + { + "epoch": 0.15165513389687715, + "grad_norm": 469.00860595703125, + "learning_rate": 1.9233845328840643e-06, + "loss": 26.4844, + "step": 16021 + }, + { + "epoch": 0.1516645999185922, + "grad_norm": 202.45687866210938, + "learning_rate": 1.923372763433252e-06, + "loss": 21.0938, + "step": 16022 + }, + { + "epoch": 0.15167406594030727, + "grad_norm": 953.3057250976562, + "learning_rate": 1.9233609931145285e-06, + "loss": 44.6562, + "step": 16023 + }, + { + "epoch": 0.15168353196202233, + "grad_norm": 512.30517578125, + "learning_rate": 1.9233492219279064e-06, + "loss": 24.5234, + "step": 16024 + }, + { + "epoch": 0.15169299798373737, + "grad_norm": 524.45556640625, + "learning_rate": 1.9233374498733957e-06, + "loss": 27.5859, + "step": 16025 + }, + { + "epoch": 0.15170246400545243, + "grad_norm": 715.0831298828125, + "learning_rate": 1.923325676951008e-06, + "loss": 40.3359, + "step": 16026 + }, + { + "epoch": 0.15171193002716749, + "grad_norm": 685.8060302734375, + "learning_rate": 1.9233139031607546e-06, + "loss": 42.875, + "step": 16027 + }, + { + "epoch": 0.15172139604888255, + "grad_norm": 302.92828369140625, + "learning_rate": 1.923302128502646e-06, + "loss": 20.0664, + "step": 16028 + }, + { + "epoch": 0.15173086207059758, + "grad_norm": 623.8862915039062, + "learning_rate": 1.9232903529766934e-06, + "loss": 17.9375, + "step": 16029 + }, + { + "epoch": 0.15174032809231264, + "grad_norm": 735.4859619140625, + "learning_rate": 1.923278576582908e-06, + "loss": 50.5, + "step": 16030 + }, + { + "epoch": 0.1517497941140277, + "grad_norm": 2.821641445159912, + "learning_rate": 1.9232667993213006e-06, + "loss": 0.9407, + "step": 16031 + }, + { + "epoch": 0.15175926013574276, + "grad_norm": 304.4107666015625, + "learning_rate": 1.9232550211918827e-06, + "loss": 20.0781, + "step": 16032 + }, + { + "epoch": 0.15176872615745782, + "grad_norm": 181.06155395507812, + "learning_rate": 1.9232432421946647e-06, + "loss": 15.9531, + "step": 16033 + }, + { + "epoch": 0.15177819217917285, + "grad_norm": 786.2135620117188, + "learning_rate": 1.9232314623296585e-06, + "loss": 15.0938, + "step": 16034 + }, + { + "epoch": 0.1517876582008879, + "grad_norm": 217.8589630126953, + "learning_rate": 1.9232196815968747e-06, + "loss": 15.1719, + "step": 16035 + }, + { + "epoch": 0.15179712422260297, + "grad_norm": 630.968505859375, + "learning_rate": 1.9232078999963242e-06, + "loss": 37.5547, + "step": 16036 + }, + { + "epoch": 0.15180659024431803, + "grad_norm": 209.85504150390625, + "learning_rate": 1.9231961175280186e-06, + "loss": 20.332, + "step": 16037 + }, + { + "epoch": 0.15181605626603306, + "grad_norm": 162.24441528320312, + "learning_rate": 1.9231843341919688e-06, + "loss": 17.6797, + "step": 16038 + }, + { + "epoch": 0.15182552228774812, + "grad_norm": 321.9690856933594, + "learning_rate": 1.923172549988186e-06, + "loss": 10.4648, + "step": 16039 + }, + { + "epoch": 0.15183498830946318, + "grad_norm": 313.15557861328125, + "learning_rate": 1.9231607649166807e-06, + "loss": 20.2422, + "step": 16040 + }, + { + "epoch": 0.15184445433117824, + "grad_norm": 223.81187438964844, + "learning_rate": 1.923148978977464e-06, + "loss": 15.4023, + "step": 16041 + }, + { + "epoch": 0.1518539203528933, + "grad_norm": 111.59276580810547, + "learning_rate": 1.923137192170548e-06, + "loss": 13.5, + "step": 16042 + }, + { + "epoch": 0.15186338637460833, + "grad_norm": 208.4658203125, + "learning_rate": 1.9231254044959425e-06, + "loss": 16.5742, + "step": 16043 + }, + { + "epoch": 0.1518728523963234, + "grad_norm": 426.67547607421875, + "learning_rate": 1.9231136159536598e-06, + "loss": 15.9219, + "step": 16044 + }, + { + "epoch": 0.15188231841803845, + "grad_norm": 358.7205505371094, + "learning_rate": 1.92310182654371e-06, + "loss": 25.4258, + "step": 16045 + }, + { + "epoch": 0.1518917844397535, + "grad_norm": 424.1452941894531, + "learning_rate": 1.9230900362661044e-06, + "loss": 30.6328, + "step": 16046 + }, + { + "epoch": 0.15190125046146855, + "grad_norm": 435.9208679199219, + "learning_rate": 1.9230782451208547e-06, + "loss": 20.6406, + "step": 16047 + }, + { + "epoch": 0.1519107164831836, + "grad_norm": 459.1526794433594, + "learning_rate": 1.923066453107971e-06, + "loss": 19.4766, + "step": 16048 + }, + { + "epoch": 0.15192018250489867, + "grad_norm": 467.2306823730469, + "learning_rate": 1.923054660227465e-06, + "loss": 42.7344, + "step": 16049 + }, + { + "epoch": 0.15192964852661373, + "grad_norm": 168.11265563964844, + "learning_rate": 1.9230428664793477e-06, + "loss": 19.125, + "step": 16050 + }, + { + "epoch": 0.15193911454832879, + "grad_norm": 856.0687866210938, + "learning_rate": 1.9230310718636303e-06, + "loss": 63.4688, + "step": 16051 + }, + { + "epoch": 0.15194858057004382, + "grad_norm": 448.9898681640625, + "learning_rate": 1.9230192763803234e-06, + "loss": 21.5703, + "step": 16052 + }, + { + "epoch": 0.15195804659175888, + "grad_norm": 391.4226989746094, + "learning_rate": 1.9230074800294387e-06, + "loss": 47.5469, + "step": 16053 + }, + { + "epoch": 0.15196751261347394, + "grad_norm": 364.10992431640625, + "learning_rate": 1.922995682810987e-06, + "loss": 35.8203, + "step": 16054 + }, + { + "epoch": 0.151976978635189, + "grad_norm": 184.00637817382812, + "learning_rate": 1.9229838847249793e-06, + "loss": 9.3086, + "step": 16055 + }, + { + "epoch": 0.15198644465690403, + "grad_norm": 336.3006591796875, + "learning_rate": 1.9229720857714267e-06, + "loss": 18.0664, + "step": 16056 + }, + { + "epoch": 0.1519959106786191, + "grad_norm": 550.2630615234375, + "learning_rate": 1.9229602859503408e-06, + "loss": 36.0, + "step": 16057 + }, + { + "epoch": 0.15200537670033415, + "grad_norm": 451.956298828125, + "learning_rate": 1.9229484852617318e-06, + "loss": 31.5938, + "step": 16058 + }, + { + "epoch": 0.1520148427220492, + "grad_norm": 652.0521240234375, + "learning_rate": 1.9229366837056115e-06, + "loss": 32.0781, + "step": 16059 + }, + { + "epoch": 0.15202430874376427, + "grad_norm": 209.29946899414062, + "learning_rate": 1.922924881281991e-06, + "loss": 10.3281, + "step": 16060 + }, + { + "epoch": 0.1520337747654793, + "grad_norm": 439.3474426269531, + "learning_rate": 1.922913077990881e-06, + "loss": 57.7422, + "step": 16061 + }, + { + "epoch": 0.15204324078719436, + "grad_norm": 302.68524169921875, + "learning_rate": 1.9229012738322926e-06, + "loss": 17.2812, + "step": 16062 + }, + { + "epoch": 0.15205270680890942, + "grad_norm": 167.45062255859375, + "learning_rate": 1.922889468806237e-06, + "loss": 15.8477, + "step": 16063 + }, + { + "epoch": 0.15206217283062448, + "grad_norm": 170.8251953125, + "learning_rate": 1.9228776629127253e-06, + "loss": 14.6016, + "step": 16064 + }, + { + "epoch": 0.1520716388523395, + "grad_norm": 292.1324157714844, + "learning_rate": 1.922865856151769e-06, + "loss": 27.3906, + "step": 16065 + }, + { + "epoch": 0.15208110487405457, + "grad_norm": 164.26805114746094, + "learning_rate": 1.9228540485233783e-06, + "loss": 14.0742, + "step": 16066 + }, + { + "epoch": 0.15209057089576963, + "grad_norm": 178.5732879638672, + "learning_rate": 1.9228422400275654e-06, + "loss": 19.3516, + "step": 16067 + }, + { + "epoch": 0.1521000369174847, + "grad_norm": 267.71063232421875, + "learning_rate": 1.9228304306643403e-06, + "loss": 25.1406, + "step": 16068 + }, + { + "epoch": 0.15210950293919975, + "grad_norm": 336.5518493652344, + "learning_rate": 1.922818620433715e-06, + "loss": 29.2344, + "step": 16069 + }, + { + "epoch": 0.15211896896091479, + "grad_norm": 836.1954956054688, + "learning_rate": 1.9228068093357e-06, + "loss": 57.0, + "step": 16070 + }, + { + "epoch": 0.15212843498262985, + "grad_norm": 218.77040100097656, + "learning_rate": 1.9227949973703067e-06, + "loss": 26.8594, + "step": 16071 + }, + { + "epoch": 0.1521379010043449, + "grad_norm": 298.8296813964844, + "learning_rate": 1.922783184537546e-06, + "loss": 20.4844, + "step": 16072 + }, + { + "epoch": 0.15214736702605997, + "grad_norm": 353.08892822265625, + "learning_rate": 1.9227713708374294e-06, + "loss": 28.1172, + "step": 16073 + }, + { + "epoch": 0.15215683304777503, + "grad_norm": 387.21868896484375, + "learning_rate": 1.922759556269968e-06, + "loss": 19.2461, + "step": 16074 + }, + { + "epoch": 0.15216629906949006, + "grad_norm": 545.0728149414062, + "learning_rate": 1.922747740835172e-06, + "loss": 43.9219, + "step": 16075 + }, + { + "epoch": 0.15217576509120512, + "grad_norm": 176.4123077392578, + "learning_rate": 1.9227359245330536e-06, + "loss": 17.7891, + "step": 16076 + }, + { + "epoch": 0.15218523111292018, + "grad_norm": 205.753662109375, + "learning_rate": 1.9227241073636233e-06, + "loss": 15.4805, + "step": 16077 + }, + { + "epoch": 0.15219469713463524, + "grad_norm": 1785.8316650390625, + "learning_rate": 1.9227122893268923e-06, + "loss": 15.6875, + "step": 16078 + }, + { + "epoch": 0.15220416315635027, + "grad_norm": 291.2571105957031, + "learning_rate": 1.922700470422872e-06, + "loss": 15.9297, + "step": 16079 + }, + { + "epoch": 0.15221362917806533, + "grad_norm": 276.522705078125, + "learning_rate": 1.922688650651573e-06, + "loss": 25.6719, + "step": 16080 + }, + { + "epoch": 0.1522230951997804, + "grad_norm": 509.0711364746094, + "learning_rate": 1.922676830013007e-06, + "loss": 41.4062, + "step": 16081 + }, + { + "epoch": 0.15223256122149545, + "grad_norm": 342.739501953125, + "learning_rate": 1.9226650085071846e-06, + "loss": 23.1367, + "step": 16082 + }, + { + "epoch": 0.1522420272432105, + "grad_norm": 390.4176940917969, + "learning_rate": 1.9226531861341167e-06, + "loss": 20.3438, + "step": 16083 + }, + { + "epoch": 0.15225149326492554, + "grad_norm": 246.78805541992188, + "learning_rate": 1.9226413628938154e-06, + "loss": 19.6172, + "step": 16084 + }, + { + "epoch": 0.1522609592866406, + "grad_norm": 205.09378051757812, + "learning_rate": 1.922629538786291e-06, + "loss": 18.125, + "step": 16085 + }, + { + "epoch": 0.15227042530835566, + "grad_norm": 332.6913146972656, + "learning_rate": 1.922617713811555e-06, + "loss": 17.5469, + "step": 16086 + }, + { + "epoch": 0.15227989133007072, + "grad_norm": 215.2665557861328, + "learning_rate": 1.9226058879696184e-06, + "loss": 20.4531, + "step": 16087 + }, + { + "epoch": 0.15228935735178575, + "grad_norm": 195.63018798828125, + "learning_rate": 1.9225940612604923e-06, + "loss": 17.2344, + "step": 16088 + }, + { + "epoch": 0.1522988233735008, + "grad_norm": 317.62255859375, + "learning_rate": 1.9225822336841877e-06, + "loss": 24.9531, + "step": 16089 + }, + { + "epoch": 0.15230828939521587, + "grad_norm": 3.6642847061157227, + "learning_rate": 1.922570405240716e-06, + "loss": 1.0938, + "step": 16090 + }, + { + "epoch": 0.15231775541693093, + "grad_norm": 367.3928527832031, + "learning_rate": 1.922558575930088e-06, + "loss": 22.8672, + "step": 16091 + }, + { + "epoch": 0.152327221438646, + "grad_norm": 383.02264404296875, + "learning_rate": 1.9225467457523146e-06, + "loss": 16.7422, + "step": 16092 + }, + { + "epoch": 0.15233668746036103, + "grad_norm": 480.639892578125, + "learning_rate": 1.9225349147074076e-06, + "loss": 38.4375, + "step": 16093 + }, + { + "epoch": 0.15234615348207609, + "grad_norm": 614.7803955078125, + "learning_rate": 1.922523082795378e-06, + "loss": 70.3125, + "step": 16094 + }, + { + "epoch": 0.15235561950379115, + "grad_norm": 306.5180358886719, + "learning_rate": 1.9225112500162365e-06, + "loss": 33.8594, + "step": 16095 + }, + { + "epoch": 0.1523650855255062, + "grad_norm": 178.33486938476562, + "learning_rate": 1.9224994163699944e-06, + "loss": 13.4492, + "step": 16096 + }, + { + "epoch": 0.15237455154722124, + "grad_norm": 337.2597351074219, + "learning_rate": 1.9224875818566633e-06, + "loss": 46.3828, + "step": 16097 + }, + { + "epoch": 0.1523840175689363, + "grad_norm": 637.857177734375, + "learning_rate": 1.9224757464762533e-06, + "loss": 39.1016, + "step": 16098 + }, + { + "epoch": 0.15239348359065136, + "grad_norm": 545.854736328125, + "learning_rate": 1.922463910228776e-06, + "loss": 51.8242, + "step": 16099 + }, + { + "epoch": 0.15240294961236642, + "grad_norm": 387.1495666503906, + "learning_rate": 1.922452073114243e-06, + "loss": 8.3652, + "step": 16100 + }, + { + "epoch": 0.15241241563408148, + "grad_norm": 581.1470336914062, + "learning_rate": 1.9224402351326652e-06, + "loss": 18.3906, + "step": 16101 + }, + { + "epoch": 0.1524218816557965, + "grad_norm": 235.78826904296875, + "learning_rate": 1.9224283962840534e-06, + "loss": 27.1016, + "step": 16102 + }, + { + "epoch": 0.15243134767751157, + "grad_norm": 2.889896869659424, + "learning_rate": 1.9224165565684187e-06, + "loss": 0.8589, + "step": 16103 + }, + { + "epoch": 0.15244081369922663, + "grad_norm": 3.652980327606201, + "learning_rate": 1.9224047159857726e-06, + "loss": 1.022, + "step": 16104 + }, + { + "epoch": 0.1524502797209417, + "grad_norm": 361.1519470214844, + "learning_rate": 1.9223928745361263e-06, + "loss": 17.9141, + "step": 16105 + }, + { + "epoch": 0.15245974574265672, + "grad_norm": 153.5265655517578, + "learning_rate": 1.9223810322194906e-06, + "loss": 18.625, + "step": 16106 + }, + { + "epoch": 0.15246921176437178, + "grad_norm": 697.6231689453125, + "learning_rate": 1.9223691890358764e-06, + "loss": 32.4688, + "step": 16107 + }, + { + "epoch": 0.15247867778608684, + "grad_norm": 725.5969848632812, + "learning_rate": 1.9223573449852957e-06, + "loss": 20.1797, + "step": 16108 + }, + { + "epoch": 0.1524881438078019, + "grad_norm": 504.4724426269531, + "learning_rate": 1.9223455000677585e-06, + "loss": 21.4141, + "step": 16109 + }, + { + "epoch": 0.15249760982951696, + "grad_norm": 222.5875244140625, + "learning_rate": 1.9223336542832767e-06, + "loss": 10.9766, + "step": 16110 + }, + { + "epoch": 0.152507075851232, + "grad_norm": 492.6831359863281, + "learning_rate": 1.922321807631862e-06, + "loss": 22.1641, + "step": 16111 + }, + { + "epoch": 0.15251654187294705, + "grad_norm": 337.513671875, + "learning_rate": 1.922309960113524e-06, + "loss": 17.9688, + "step": 16112 + }, + { + "epoch": 0.1525260078946621, + "grad_norm": 271.1631774902344, + "learning_rate": 1.922298111728275e-06, + "loss": 19.6328, + "step": 16113 + }, + { + "epoch": 0.15253547391637717, + "grad_norm": 429.91986083984375, + "learning_rate": 1.9222862624761254e-06, + "loss": 52.4688, + "step": 16114 + }, + { + "epoch": 0.1525449399380922, + "grad_norm": 237.381591796875, + "learning_rate": 1.9222744123570868e-06, + "loss": 22.3438, + "step": 16115 + }, + { + "epoch": 0.15255440595980727, + "grad_norm": 217.96609497070312, + "learning_rate": 1.9222625613711705e-06, + "loss": 17.1562, + "step": 16116 + }, + { + "epoch": 0.15256387198152233, + "grad_norm": 242.33111572265625, + "learning_rate": 1.9222507095183876e-06, + "loss": 18.8828, + "step": 16117 + }, + { + "epoch": 0.15257333800323739, + "grad_norm": 180.04541015625, + "learning_rate": 1.9222388567987486e-06, + "loss": 14.0938, + "step": 16118 + }, + { + "epoch": 0.15258280402495245, + "grad_norm": 2.954066038131714, + "learning_rate": 1.922227003212265e-06, + "loss": 0.8506, + "step": 16119 + }, + { + "epoch": 0.15259227004666748, + "grad_norm": 215.73353576660156, + "learning_rate": 1.922215148758948e-06, + "loss": 23.6875, + "step": 16120 + }, + { + "epoch": 0.15260173606838254, + "grad_norm": 3.434169292449951, + "learning_rate": 1.922203293438809e-06, + "loss": 0.9253, + "step": 16121 + }, + { + "epoch": 0.1526112020900976, + "grad_norm": 428.19354248046875, + "learning_rate": 1.9221914372518595e-06, + "loss": 8.6875, + "step": 16122 + }, + { + "epoch": 0.15262066811181266, + "grad_norm": 520.5916748046875, + "learning_rate": 1.922179580198109e-06, + "loss": 45.0625, + "step": 16123 + }, + { + "epoch": 0.1526301341335277, + "grad_norm": 234.01443481445312, + "learning_rate": 1.92216772227757e-06, + "loss": 19.4922, + "step": 16124 + }, + { + "epoch": 0.15263960015524275, + "grad_norm": 174.78030395507812, + "learning_rate": 1.9221558634902534e-06, + "loss": 20.3867, + "step": 16125 + }, + { + "epoch": 0.1526490661769578, + "grad_norm": 421.09710693359375, + "learning_rate": 1.9221440038361704e-06, + "loss": 13.8633, + "step": 16126 + }, + { + "epoch": 0.15265853219867287, + "grad_norm": 442.8919372558594, + "learning_rate": 1.9221321433153317e-06, + "loss": 43.9375, + "step": 16127 + }, + { + "epoch": 0.15266799822038793, + "grad_norm": 379.0863342285156, + "learning_rate": 1.9221202819277492e-06, + "loss": 22.8594, + "step": 16128 + }, + { + "epoch": 0.15267746424210296, + "grad_norm": 533.1328735351562, + "learning_rate": 1.9221084196734335e-06, + "loss": 18.7578, + "step": 16129 + }, + { + "epoch": 0.15268693026381802, + "grad_norm": 217.66603088378906, + "learning_rate": 1.922096556552396e-06, + "loss": 18.5625, + "step": 16130 + }, + { + "epoch": 0.15269639628553308, + "grad_norm": 368.08917236328125, + "learning_rate": 1.922084692564647e-06, + "loss": 30.75, + "step": 16131 + }, + { + "epoch": 0.15270586230724814, + "grad_norm": 292.1047668457031, + "learning_rate": 1.922072827710199e-06, + "loss": 19.2969, + "step": 16132 + }, + { + "epoch": 0.15271532832896317, + "grad_norm": 3.2242071628570557, + "learning_rate": 1.9220609619890625e-06, + "loss": 0.9419, + "step": 16133 + }, + { + "epoch": 0.15272479435067823, + "grad_norm": 463.46075439453125, + "learning_rate": 1.9220490954012483e-06, + "loss": 51.2344, + "step": 16134 + }, + { + "epoch": 0.1527342603723933, + "grad_norm": 311.667236328125, + "learning_rate": 1.9220372279467686e-06, + "loss": 26.3594, + "step": 16135 + }, + { + "epoch": 0.15274372639410835, + "grad_norm": 337.0829162597656, + "learning_rate": 1.9220253596256335e-06, + "loss": 19.7891, + "step": 16136 + }, + { + "epoch": 0.1527531924158234, + "grad_norm": 3.1618921756744385, + "learning_rate": 1.9220134904378546e-06, + "loss": 0.9922, + "step": 16137 + }, + { + "epoch": 0.15276265843753845, + "grad_norm": 785.076171875, + "learning_rate": 1.922001620383443e-06, + "loss": 41.6406, + "step": 16138 + }, + { + "epoch": 0.1527721244592535, + "grad_norm": 591.9149169921875, + "learning_rate": 1.9219897494624096e-06, + "loss": 30.9297, + "step": 16139 + }, + { + "epoch": 0.15278159048096857, + "grad_norm": 396.8208312988281, + "learning_rate": 1.921977877674766e-06, + "loss": 46.5312, + "step": 16140 + }, + { + "epoch": 0.15279105650268363, + "grad_norm": 490.5712585449219, + "learning_rate": 1.9219660050205234e-06, + "loss": 27.3477, + "step": 16141 + }, + { + "epoch": 0.15280052252439866, + "grad_norm": 569.3952026367188, + "learning_rate": 1.9219541314996924e-06, + "loss": 34.9219, + "step": 16142 + }, + { + "epoch": 0.15280998854611372, + "grad_norm": 470.43212890625, + "learning_rate": 1.921942257112285e-06, + "loss": 15.8594, + "step": 16143 + }, + { + "epoch": 0.15281945456782878, + "grad_norm": 242.21405029296875, + "learning_rate": 1.9219303818583114e-06, + "loss": 23.0586, + "step": 16144 + }, + { + "epoch": 0.15282892058954384, + "grad_norm": 144.14479064941406, + "learning_rate": 1.921918505737783e-06, + "loss": 19.2617, + "step": 16145 + }, + { + "epoch": 0.1528383866112589, + "grad_norm": 456.53961181640625, + "learning_rate": 1.921906628750712e-06, + "loss": 30.8359, + "step": 16146 + }, + { + "epoch": 0.15284785263297393, + "grad_norm": 659.1858520507812, + "learning_rate": 1.921894750897108e-06, + "loss": 36.3828, + "step": 16147 + }, + { + "epoch": 0.152857318654689, + "grad_norm": 363.5350646972656, + "learning_rate": 1.921882872176983e-06, + "loss": 16.625, + "step": 16148 + }, + { + "epoch": 0.15286678467640405, + "grad_norm": 581.6115112304688, + "learning_rate": 1.921870992590348e-06, + "loss": 27.5469, + "step": 16149 + }, + { + "epoch": 0.1528762506981191, + "grad_norm": 463.8111267089844, + "learning_rate": 1.9218591121372143e-06, + "loss": 37.9688, + "step": 16150 + }, + { + "epoch": 0.15288571671983414, + "grad_norm": 401.6366271972656, + "learning_rate": 1.9218472308175934e-06, + "loss": 40.1719, + "step": 16151 + }, + { + "epoch": 0.1528951827415492, + "grad_norm": 195.3497772216797, + "learning_rate": 1.921835348631496e-06, + "loss": 18.3555, + "step": 16152 + }, + { + "epoch": 0.15290464876326426, + "grad_norm": 156.3960418701172, + "learning_rate": 1.921823465578933e-06, + "loss": 22.875, + "step": 16153 + }, + { + "epoch": 0.15291411478497932, + "grad_norm": 572.1671752929688, + "learning_rate": 1.921811581659916e-06, + "loss": 74.6562, + "step": 16154 + }, + { + "epoch": 0.15292358080669438, + "grad_norm": 222.97964477539062, + "learning_rate": 1.921799696874456e-06, + "loss": 17.3359, + "step": 16155 + }, + { + "epoch": 0.1529330468284094, + "grad_norm": 902.150634765625, + "learning_rate": 1.9217878112225646e-06, + "loss": 18.1406, + "step": 16156 + }, + { + "epoch": 0.15294251285012447, + "grad_norm": 190.6318817138672, + "learning_rate": 1.9217759247042523e-06, + "loss": 16.3203, + "step": 16157 + }, + { + "epoch": 0.15295197887183953, + "grad_norm": 600.290771484375, + "learning_rate": 1.921764037319531e-06, + "loss": 8.4707, + "step": 16158 + }, + { + "epoch": 0.1529614448935546, + "grad_norm": 775.888671875, + "learning_rate": 1.921752149068411e-06, + "loss": 27.5703, + "step": 16159 + }, + { + "epoch": 0.15297091091526965, + "grad_norm": 394.05804443359375, + "learning_rate": 1.921740259950904e-06, + "loss": 9.5, + "step": 16160 + }, + { + "epoch": 0.15298037693698469, + "grad_norm": 193.1033935546875, + "learning_rate": 1.9217283699670216e-06, + "loss": 16.2578, + "step": 16161 + }, + { + "epoch": 0.15298984295869975, + "grad_norm": 3.0666539669036865, + "learning_rate": 1.921716479116774e-06, + "loss": 0.9268, + "step": 16162 + }, + { + "epoch": 0.1529993089804148, + "grad_norm": 3.2971949577331543, + "learning_rate": 1.921704587400173e-06, + "loss": 0.8623, + "step": 16163 + }, + { + "epoch": 0.15300877500212987, + "grad_norm": 269.1973571777344, + "learning_rate": 1.92169269481723e-06, + "loss": 23.3594, + "step": 16164 + }, + { + "epoch": 0.1530182410238449, + "grad_norm": 2.9364142417907715, + "learning_rate": 1.9216808013679552e-06, + "loss": 0.8398, + "step": 16165 + }, + { + "epoch": 0.15302770704555996, + "grad_norm": 2290.124267578125, + "learning_rate": 1.9216689070523608e-06, + "loss": 15.0156, + "step": 16166 + }, + { + "epoch": 0.15303717306727502, + "grad_norm": 942.3045043945312, + "learning_rate": 1.9216570118704574e-06, + "loss": 67.8281, + "step": 16167 + }, + { + "epoch": 0.15304663908899008, + "grad_norm": 241.48245239257812, + "learning_rate": 1.9216451158222566e-06, + "loss": 22.2617, + "step": 16168 + }, + { + "epoch": 0.15305610511070514, + "grad_norm": 402.2320556640625, + "learning_rate": 1.921633218907769e-06, + "loss": 46.9922, + "step": 16169 + }, + { + "epoch": 0.15306557113242017, + "grad_norm": 854.984130859375, + "learning_rate": 1.9216213211270063e-06, + "loss": 59.4844, + "step": 16170 + }, + { + "epoch": 0.15307503715413523, + "grad_norm": 536.5677490234375, + "learning_rate": 1.9216094224799797e-06, + "loss": 7.293, + "step": 16171 + }, + { + "epoch": 0.1530845031758503, + "grad_norm": 4.172063827514648, + "learning_rate": 1.9215975229667e-06, + "loss": 1.0195, + "step": 16172 + }, + { + "epoch": 0.15309396919756535, + "grad_norm": 855.91845703125, + "learning_rate": 1.9215856225871786e-06, + "loss": 65.7344, + "step": 16173 + }, + { + "epoch": 0.15310343521928038, + "grad_norm": 284.8402404785156, + "learning_rate": 1.921573721341427e-06, + "loss": 9.0781, + "step": 16174 + }, + { + "epoch": 0.15311290124099544, + "grad_norm": 312.882568359375, + "learning_rate": 1.9215618192294553e-06, + "loss": 24.25, + "step": 16175 + }, + { + "epoch": 0.1531223672627105, + "grad_norm": 194.31137084960938, + "learning_rate": 1.921549916251276e-06, + "loss": 7.3008, + "step": 16176 + }, + { + "epoch": 0.15313183328442556, + "grad_norm": 638.7959594726562, + "learning_rate": 1.9215380124068998e-06, + "loss": 22.5938, + "step": 16177 + }, + { + "epoch": 0.15314129930614062, + "grad_norm": 287.4122009277344, + "learning_rate": 1.9215261076963374e-06, + "loss": 10.418, + "step": 16178 + }, + { + "epoch": 0.15315076532785565, + "grad_norm": 606.76904296875, + "learning_rate": 1.921514202119601e-06, + "loss": 11.3398, + "step": 16179 + }, + { + "epoch": 0.1531602313495707, + "grad_norm": 338.3473815917969, + "learning_rate": 1.9215022956767007e-06, + "loss": 20.9297, + "step": 16180 + }, + { + "epoch": 0.15316969737128577, + "grad_norm": 3.516780376434326, + "learning_rate": 1.9214903883676483e-06, + "loss": 0.9868, + "step": 16181 + }, + { + "epoch": 0.15317916339300083, + "grad_norm": 260.16229248046875, + "learning_rate": 1.9214784801924552e-06, + "loss": 17.7559, + "step": 16182 + }, + { + "epoch": 0.15318862941471587, + "grad_norm": 760.2420043945312, + "learning_rate": 1.9214665711511316e-06, + "loss": 23.9453, + "step": 16183 + }, + { + "epoch": 0.15319809543643093, + "grad_norm": 240.76004028320312, + "learning_rate": 1.92145466124369e-06, + "loss": 20.6523, + "step": 16184 + }, + { + "epoch": 0.15320756145814599, + "grad_norm": 363.9151611328125, + "learning_rate": 1.9214427504701403e-06, + "loss": 20.4336, + "step": 16185 + }, + { + "epoch": 0.15321702747986105, + "grad_norm": 241.7029571533203, + "learning_rate": 1.9214308388304953e-06, + "loss": 22.0625, + "step": 16186 + }, + { + "epoch": 0.1532264935015761, + "grad_norm": 165.103271484375, + "learning_rate": 1.9214189263247645e-06, + "loss": 16.6719, + "step": 16187 + }, + { + "epoch": 0.15323595952329114, + "grad_norm": 3.057293653488159, + "learning_rate": 1.9214070129529603e-06, + "loss": 0.9565, + "step": 16188 + }, + { + "epoch": 0.1532454255450062, + "grad_norm": 366.55706787109375, + "learning_rate": 1.921395098715093e-06, + "loss": 46.8281, + "step": 16189 + }, + { + "epoch": 0.15325489156672126, + "grad_norm": 463.0946960449219, + "learning_rate": 1.9213831836111745e-06, + "loss": 35.3516, + "step": 16190 + }, + { + "epoch": 0.15326435758843632, + "grad_norm": 757.4456787109375, + "learning_rate": 1.921371267641216e-06, + "loss": 42.5547, + "step": 16191 + }, + { + "epoch": 0.15327382361015135, + "grad_norm": 275.6822814941406, + "learning_rate": 1.921359350805228e-06, + "loss": 19.6953, + "step": 16192 + }, + { + "epoch": 0.1532832896318664, + "grad_norm": 3.023874521255493, + "learning_rate": 1.9213474331032225e-06, + "loss": 1.0605, + "step": 16193 + }, + { + "epoch": 0.15329275565358147, + "grad_norm": 375.99749755859375, + "learning_rate": 1.92133551453521e-06, + "loss": 46.6211, + "step": 16194 + }, + { + "epoch": 0.15330222167529653, + "grad_norm": 3.2069520950317383, + "learning_rate": 1.921323595101202e-06, + "loss": 0.8772, + "step": 16195 + }, + { + "epoch": 0.1533116876970116, + "grad_norm": 195.20980834960938, + "learning_rate": 1.92131167480121e-06, + "loss": 22.25, + "step": 16196 + }, + { + "epoch": 0.15332115371872662, + "grad_norm": 207.5913543701172, + "learning_rate": 1.921299753635245e-06, + "loss": 16.6016, + "step": 16197 + }, + { + "epoch": 0.15333061974044168, + "grad_norm": 974.5494384765625, + "learning_rate": 1.9212878316033184e-06, + "loss": 48.6758, + "step": 16198 + }, + { + "epoch": 0.15334008576215674, + "grad_norm": 392.8874816894531, + "learning_rate": 1.921275908705441e-06, + "loss": 34.5469, + "step": 16199 + }, + { + "epoch": 0.1533495517838718, + "grad_norm": 3.514676570892334, + "learning_rate": 1.921263984941624e-06, + "loss": 0.9365, + "step": 16200 + }, + { + "epoch": 0.15335901780558683, + "grad_norm": 328.82684326171875, + "learning_rate": 1.9212520603118787e-06, + "loss": 56.4219, + "step": 16201 + }, + { + "epoch": 0.1533684838273019, + "grad_norm": 474.8507995605469, + "learning_rate": 1.9212401348162166e-06, + "loss": 48.8281, + "step": 16202 + }, + { + "epoch": 0.15337794984901695, + "grad_norm": 260.08056640625, + "learning_rate": 1.921228208454649e-06, + "loss": 24.3984, + "step": 16203 + }, + { + "epoch": 0.153387415870732, + "grad_norm": 691.4534301757812, + "learning_rate": 1.921216281227186e-06, + "loss": 54.7891, + "step": 16204 + }, + { + "epoch": 0.15339688189244707, + "grad_norm": 646.875, + "learning_rate": 1.92120435313384e-06, + "loss": 53.7188, + "step": 16205 + }, + { + "epoch": 0.1534063479141621, + "grad_norm": 172.63705444335938, + "learning_rate": 1.921192424174622e-06, + "loss": 16.5625, + "step": 16206 + }, + { + "epoch": 0.15341581393587717, + "grad_norm": 2.845731496810913, + "learning_rate": 1.921180494349543e-06, + "loss": 0.8745, + "step": 16207 + }, + { + "epoch": 0.15342527995759223, + "grad_norm": 427.71417236328125, + "learning_rate": 1.921168563658614e-06, + "loss": 21.1953, + "step": 16208 + }, + { + "epoch": 0.15343474597930729, + "grad_norm": 276.7975158691406, + "learning_rate": 1.921156632101847e-06, + "loss": 20.7656, + "step": 16209 + }, + { + "epoch": 0.15344421200102232, + "grad_norm": 842.2060546875, + "learning_rate": 1.9211446996792524e-06, + "loss": 56.0, + "step": 16210 + }, + { + "epoch": 0.15345367802273738, + "grad_norm": 237.14230346679688, + "learning_rate": 1.921132766390842e-06, + "loss": 20.3828, + "step": 16211 + }, + { + "epoch": 0.15346314404445244, + "grad_norm": 315.85504150390625, + "learning_rate": 1.921120832236626e-06, + "loss": 24.9375, + "step": 16212 + }, + { + "epoch": 0.1534726100661675, + "grad_norm": 595.228759765625, + "learning_rate": 1.921108897216617e-06, + "loss": 24.8047, + "step": 16213 + }, + { + "epoch": 0.15348207608788256, + "grad_norm": 499.7102355957031, + "learning_rate": 1.9210969613308252e-06, + "loss": 18.7305, + "step": 16214 + }, + { + "epoch": 0.1534915421095976, + "grad_norm": 541.4906616210938, + "learning_rate": 1.9210850245792625e-06, + "loss": 21.3984, + "step": 16215 + }, + { + "epoch": 0.15350100813131265, + "grad_norm": 403.3624572753906, + "learning_rate": 1.9210730869619394e-06, + "loss": 42.8281, + "step": 16216 + }, + { + "epoch": 0.1535104741530277, + "grad_norm": 838.8551025390625, + "learning_rate": 1.9210611484788678e-06, + "loss": 43.6406, + "step": 16217 + }, + { + "epoch": 0.15351994017474277, + "grad_norm": 588.9100341796875, + "learning_rate": 1.9210492091300585e-06, + "loss": 59.0938, + "step": 16218 + }, + { + "epoch": 0.1535294061964578, + "grad_norm": 252.0417022705078, + "learning_rate": 1.921037268915523e-06, + "loss": 16.3203, + "step": 16219 + }, + { + "epoch": 0.15353887221817286, + "grad_norm": 462.49920654296875, + "learning_rate": 1.9210253278352723e-06, + "loss": 16.1172, + "step": 16220 + }, + { + "epoch": 0.15354833823988792, + "grad_norm": 349.31475830078125, + "learning_rate": 1.9210133858893177e-06, + "loss": 15.8359, + "step": 16221 + }, + { + "epoch": 0.15355780426160298, + "grad_norm": 286.87310791015625, + "learning_rate": 1.9210014430776705e-06, + "loss": 16.9688, + "step": 16222 + }, + { + "epoch": 0.15356727028331804, + "grad_norm": 375.7095947265625, + "learning_rate": 1.920989499400342e-06, + "loss": 36.5039, + "step": 16223 + }, + { + "epoch": 0.15357673630503307, + "grad_norm": 366.8863830566406, + "learning_rate": 1.920977554857343e-06, + "loss": 17.4844, + "step": 16224 + }, + { + "epoch": 0.15358620232674813, + "grad_norm": 235.6692352294922, + "learning_rate": 1.920965609448685e-06, + "loss": 9.4141, + "step": 16225 + }, + { + "epoch": 0.1535956683484632, + "grad_norm": 2.9796533584594727, + "learning_rate": 1.9209536631743796e-06, + "loss": 0.8682, + "step": 16226 + }, + { + "epoch": 0.15360513437017825, + "grad_norm": 264.8631896972656, + "learning_rate": 1.9209417160344376e-06, + "loss": 18.3906, + "step": 16227 + }, + { + "epoch": 0.15361460039189329, + "grad_norm": 562.6543579101562, + "learning_rate": 1.92092976802887e-06, + "loss": 50.0781, + "step": 16228 + }, + { + "epoch": 0.15362406641360835, + "grad_norm": 415.6836242675781, + "learning_rate": 1.9209178191576884e-06, + "loss": 18.3711, + "step": 16229 + }, + { + "epoch": 0.1536335324353234, + "grad_norm": 177.12525939941406, + "learning_rate": 1.920905869420904e-06, + "loss": 16.2578, + "step": 16230 + }, + { + "epoch": 0.15364299845703847, + "grad_norm": 349.83013916015625, + "learning_rate": 1.9208939188185276e-06, + "loss": 30.7969, + "step": 16231 + }, + { + "epoch": 0.15365246447875353, + "grad_norm": 162.96603393554688, + "learning_rate": 1.9208819673505717e-06, + "loss": 21.7227, + "step": 16232 + }, + { + "epoch": 0.15366193050046856, + "grad_norm": 223.2879180908203, + "learning_rate": 1.920870015017046e-06, + "loss": 19.5312, + "step": 16233 + }, + { + "epoch": 0.15367139652218362, + "grad_norm": 495.13311767578125, + "learning_rate": 1.9208580618179627e-06, + "loss": 31.9688, + "step": 16234 + }, + { + "epoch": 0.15368086254389868, + "grad_norm": 248.5853729248047, + "learning_rate": 1.9208461077533324e-06, + "loss": 19.8867, + "step": 16235 + }, + { + "epoch": 0.15369032856561374, + "grad_norm": 524.27099609375, + "learning_rate": 1.920834152823167e-06, + "loss": 15.8359, + "step": 16236 + }, + { + "epoch": 0.15369979458732877, + "grad_norm": 370.98968505859375, + "learning_rate": 1.920822197027477e-06, + "loss": 25.6016, + "step": 16237 + }, + { + "epoch": 0.15370926060904383, + "grad_norm": 709.5507202148438, + "learning_rate": 1.9208102403662746e-06, + "loss": 36.9531, + "step": 16238 + }, + { + "epoch": 0.1537187266307589, + "grad_norm": 791.4130859375, + "learning_rate": 1.92079828283957e-06, + "loss": 50.7969, + "step": 16239 + }, + { + "epoch": 0.15372819265247395, + "grad_norm": 485.9677429199219, + "learning_rate": 1.920786324447375e-06, + "loss": 37.4531, + "step": 16240 + }, + { + "epoch": 0.153737658674189, + "grad_norm": 386.917724609375, + "learning_rate": 1.920774365189701e-06, + "loss": 27.6719, + "step": 16241 + }, + { + "epoch": 0.15374712469590404, + "grad_norm": 3.443755626678467, + "learning_rate": 1.9207624050665588e-06, + "loss": 1.0396, + "step": 16242 + }, + { + "epoch": 0.1537565907176191, + "grad_norm": 204.73666381835938, + "learning_rate": 1.92075044407796e-06, + "loss": 20.4062, + "step": 16243 + }, + { + "epoch": 0.15376605673933416, + "grad_norm": 294.8785705566406, + "learning_rate": 1.9207384822239154e-06, + "loss": 25.2031, + "step": 16244 + }, + { + "epoch": 0.15377552276104922, + "grad_norm": 438.7674255371094, + "learning_rate": 1.9207265195044368e-06, + "loss": 42.5234, + "step": 16245 + }, + { + "epoch": 0.15378498878276428, + "grad_norm": 441.4047546386719, + "learning_rate": 1.9207145559195352e-06, + "loss": 24.3906, + "step": 16246 + }, + { + "epoch": 0.1537944548044793, + "grad_norm": 215.48118591308594, + "learning_rate": 1.920702591469222e-06, + "loss": 20.1016, + "step": 16247 + }, + { + "epoch": 0.15380392082619437, + "grad_norm": 203.98814392089844, + "learning_rate": 1.9206906261535075e-06, + "loss": 26.6953, + "step": 16248 + }, + { + "epoch": 0.15381338684790943, + "grad_norm": 286.5159912109375, + "learning_rate": 1.9206786599724043e-06, + "loss": 23.0156, + "step": 16249 + }, + { + "epoch": 0.1538228528696245, + "grad_norm": 674.1764526367188, + "learning_rate": 1.9206666929259226e-06, + "loss": 30.6172, + "step": 16250 + }, + { + "epoch": 0.15383231889133953, + "grad_norm": 316.13922119140625, + "learning_rate": 1.9206547250140744e-06, + "loss": 17.7734, + "step": 16251 + }, + { + "epoch": 0.15384178491305459, + "grad_norm": 277.71868896484375, + "learning_rate": 1.9206427562368706e-06, + "loss": 31.7656, + "step": 16252 + }, + { + "epoch": 0.15385125093476965, + "grad_norm": 1113.766357421875, + "learning_rate": 1.9206307865943227e-06, + "loss": 36.9062, + "step": 16253 + }, + { + "epoch": 0.1538607169564847, + "grad_norm": 407.31500244140625, + "learning_rate": 1.9206188160864418e-06, + "loss": 31.293, + "step": 16254 + }, + { + "epoch": 0.15387018297819977, + "grad_norm": 241.64736938476562, + "learning_rate": 1.9206068447132387e-06, + "loss": 24.5234, + "step": 16255 + }, + { + "epoch": 0.1538796489999148, + "grad_norm": 392.5383605957031, + "learning_rate": 1.9205948724747255e-06, + "loss": 25.0898, + "step": 16256 + }, + { + "epoch": 0.15388911502162986, + "grad_norm": 475.2239074707031, + "learning_rate": 1.920582899370913e-06, + "loss": 32.7969, + "step": 16257 + }, + { + "epoch": 0.15389858104334492, + "grad_norm": 567.0758666992188, + "learning_rate": 1.920570925401812e-06, + "loss": 29.7188, + "step": 16258 + }, + { + "epoch": 0.15390804706505998, + "grad_norm": 343.9632873535156, + "learning_rate": 1.9205589505674344e-06, + "loss": 42.6094, + "step": 16259 + }, + { + "epoch": 0.153917513086775, + "grad_norm": 340.0948486328125, + "learning_rate": 1.920546974867791e-06, + "loss": 24.5, + "step": 16260 + }, + { + "epoch": 0.15392697910849007, + "grad_norm": 476.4005432128906, + "learning_rate": 1.920534998302894e-06, + "loss": 34.6953, + "step": 16261 + }, + { + "epoch": 0.15393644513020513, + "grad_norm": 1711.4124755859375, + "learning_rate": 1.9205230208727533e-06, + "loss": 26.6953, + "step": 16262 + }, + { + "epoch": 0.1539459111519202, + "grad_norm": 487.4019775390625, + "learning_rate": 1.9205110425773814e-06, + "loss": 41.8125, + "step": 16263 + }, + { + "epoch": 0.15395537717363525, + "grad_norm": 299.5681457519531, + "learning_rate": 1.9204990634167887e-06, + "loss": 31.0156, + "step": 16264 + }, + { + "epoch": 0.15396484319535028, + "grad_norm": 455.4039611816406, + "learning_rate": 1.920487083390987e-06, + "loss": 58.3125, + "step": 16265 + }, + { + "epoch": 0.15397430921706534, + "grad_norm": 572.2139892578125, + "learning_rate": 1.9204751024999872e-06, + "loss": 32.25, + "step": 16266 + }, + { + "epoch": 0.1539837752387804, + "grad_norm": 487.3323059082031, + "learning_rate": 1.9204631207438005e-06, + "loss": 21.0781, + "step": 16267 + }, + { + "epoch": 0.15399324126049546, + "grad_norm": 1247.009033203125, + "learning_rate": 1.9204511381224387e-06, + "loss": 17.4531, + "step": 16268 + }, + { + "epoch": 0.1540027072822105, + "grad_norm": 189.97430419921875, + "learning_rate": 1.9204391546359123e-06, + "loss": 7.7344, + "step": 16269 + }, + { + "epoch": 0.15401217330392555, + "grad_norm": 298.3226013183594, + "learning_rate": 1.9204271702842333e-06, + "loss": 31.3828, + "step": 16270 + }, + { + "epoch": 0.1540216393256406, + "grad_norm": 3.246371030807495, + "learning_rate": 1.9204151850674126e-06, + "loss": 0.8481, + "step": 16271 + }, + { + "epoch": 0.15403110534735567, + "grad_norm": 306.8942565917969, + "learning_rate": 1.9204031989854612e-06, + "loss": 15.8906, + "step": 16272 + }, + { + "epoch": 0.15404057136907073, + "grad_norm": 806.4746704101562, + "learning_rate": 1.920391212038391e-06, + "loss": 36.4297, + "step": 16273 + }, + { + "epoch": 0.15405003739078577, + "grad_norm": 214.61260986328125, + "learning_rate": 1.9203792242262127e-06, + "loss": 13.0469, + "step": 16274 + }, + { + "epoch": 0.15405950341250083, + "grad_norm": 253.20541381835938, + "learning_rate": 1.920367235548938e-06, + "loss": 17.1562, + "step": 16275 + }, + { + "epoch": 0.15406896943421589, + "grad_norm": 478.59991455078125, + "learning_rate": 1.920355246006578e-06, + "loss": 18.1641, + "step": 16276 + }, + { + "epoch": 0.15407843545593095, + "grad_norm": 218.8177032470703, + "learning_rate": 1.920343255599144e-06, + "loss": 31.5781, + "step": 16277 + }, + { + "epoch": 0.15408790147764598, + "grad_norm": 182.7889862060547, + "learning_rate": 1.920331264326647e-06, + "loss": 13.3984, + "step": 16278 + }, + { + "epoch": 0.15409736749936104, + "grad_norm": 638.2752685546875, + "learning_rate": 1.9203192721890985e-06, + "loss": 63.6719, + "step": 16279 + }, + { + "epoch": 0.1541068335210761, + "grad_norm": 620.19921875, + "learning_rate": 1.9203072791865098e-06, + "loss": 41.0312, + "step": 16280 + }, + { + "epoch": 0.15411629954279116, + "grad_norm": 1515.0430908203125, + "learning_rate": 1.920295285318892e-06, + "loss": 62.4766, + "step": 16281 + }, + { + "epoch": 0.15412576556450622, + "grad_norm": 693.3588256835938, + "learning_rate": 1.9202832905862567e-06, + "loss": 42.9297, + "step": 16282 + }, + { + "epoch": 0.15413523158622125, + "grad_norm": 322.6294860839844, + "learning_rate": 1.920271294988615e-06, + "loss": 38.4219, + "step": 16283 + }, + { + "epoch": 0.1541446976079363, + "grad_norm": 284.6968078613281, + "learning_rate": 1.920259298525978e-06, + "loss": 17.4219, + "step": 16284 + }, + { + "epoch": 0.15415416362965137, + "grad_norm": 370.7577209472656, + "learning_rate": 1.920247301198357e-06, + "loss": 31.0469, + "step": 16285 + }, + { + "epoch": 0.15416362965136643, + "grad_norm": 748.88134765625, + "learning_rate": 1.9202353030057637e-06, + "loss": 38.9375, + "step": 16286 + }, + { + "epoch": 0.15417309567308146, + "grad_norm": 217.0111846923828, + "learning_rate": 1.920223303948209e-06, + "loss": 17.6953, + "step": 16287 + }, + { + "epoch": 0.15418256169479652, + "grad_norm": 873.84423828125, + "learning_rate": 1.9202113040257043e-06, + "loss": 27.8594, + "step": 16288 + }, + { + "epoch": 0.15419202771651158, + "grad_norm": 290.7265625, + "learning_rate": 1.920199303238261e-06, + "loss": 26.9219, + "step": 16289 + }, + { + "epoch": 0.15420149373822664, + "grad_norm": 475.5423278808594, + "learning_rate": 1.92018730158589e-06, + "loss": 21.7969, + "step": 16290 + }, + { + "epoch": 0.1542109597599417, + "grad_norm": 910.0374145507812, + "learning_rate": 1.9201752990686026e-06, + "loss": 32.3906, + "step": 16291 + }, + { + "epoch": 0.15422042578165673, + "grad_norm": 3.294243097305298, + "learning_rate": 1.9201632956864103e-06, + "loss": 0.9409, + "step": 16292 + }, + { + "epoch": 0.1542298918033718, + "grad_norm": 433.8780212402344, + "learning_rate": 1.9201512914393245e-06, + "loss": 7.6855, + "step": 16293 + }, + { + "epoch": 0.15423935782508685, + "grad_norm": 236.78524780273438, + "learning_rate": 1.9201392863273563e-06, + "loss": 18.5703, + "step": 16294 + }, + { + "epoch": 0.1542488238468019, + "grad_norm": 630.2100830078125, + "learning_rate": 1.9201272803505174e-06, + "loss": 34.3359, + "step": 16295 + }, + { + "epoch": 0.15425828986851695, + "grad_norm": 397.4283752441406, + "learning_rate": 1.920115273508818e-06, + "loss": 28.4609, + "step": 16296 + }, + { + "epoch": 0.154267755890232, + "grad_norm": 590.8140869140625, + "learning_rate": 1.920103265802271e-06, + "loss": 58.1562, + "step": 16297 + }, + { + "epoch": 0.15427722191194707, + "grad_norm": 254.15878295898438, + "learning_rate": 1.920091257230886e-06, + "loss": 18.8242, + "step": 16298 + }, + { + "epoch": 0.15428668793366213, + "grad_norm": 282.3572082519531, + "learning_rate": 1.9200792477946756e-06, + "loss": 20.3281, + "step": 16299 + }, + { + "epoch": 0.15429615395537719, + "grad_norm": 1193.404296875, + "learning_rate": 1.92006723749365e-06, + "loss": 66.8203, + "step": 16300 + }, + { + "epoch": 0.15430561997709222, + "grad_norm": 526.6163940429688, + "learning_rate": 1.9200552263278217e-06, + "loss": 23.4062, + "step": 16301 + }, + { + "epoch": 0.15431508599880728, + "grad_norm": 338.5609130859375, + "learning_rate": 1.920043214297201e-06, + "loss": 43.5781, + "step": 16302 + }, + { + "epoch": 0.15432455202052234, + "grad_norm": 203.55650329589844, + "learning_rate": 1.9200312014017992e-06, + "loss": 22.5938, + "step": 16303 + }, + { + "epoch": 0.1543340180422374, + "grad_norm": 237.24232482910156, + "learning_rate": 1.9200191876416284e-06, + "loss": 16.2031, + "step": 16304 + }, + { + "epoch": 0.15434348406395243, + "grad_norm": 459.88873291015625, + "learning_rate": 1.920007173016699e-06, + "loss": 18.2344, + "step": 16305 + }, + { + "epoch": 0.1543529500856675, + "grad_norm": 278.4903564453125, + "learning_rate": 1.9199951575270234e-06, + "loss": 18.4141, + "step": 16306 + }, + { + "epoch": 0.15436241610738255, + "grad_norm": 403.04034423828125, + "learning_rate": 1.919983141172612e-06, + "loss": 34.9922, + "step": 16307 + }, + { + "epoch": 0.1543718821290976, + "grad_norm": 156.9527130126953, + "learning_rate": 1.919971123953476e-06, + "loss": 5.9395, + "step": 16308 + }, + { + "epoch": 0.15438134815081267, + "grad_norm": 182.5918731689453, + "learning_rate": 1.919959105869627e-06, + "loss": 20.2812, + "step": 16309 + }, + { + "epoch": 0.1543908141725277, + "grad_norm": 337.80682373046875, + "learning_rate": 1.9199470869210763e-06, + "loss": 18.4453, + "step": 16310 + }, + { + "epoch": 0.15440028019424276, + "grad_norm": 2.7976958751678467, + "learning_rate": 1.919935067107835e-06, + "loss": 0.9351, + "step": 16311 + }, + { + "epoch": 0.15440974621595782, + "grad_norm": 233.53773498535156, + "learning_rate": 1.9199230464299154e-06, + "loss": 17.4609, + "step": 16312 + }, + { + "epoch": 0.15441921223767288, + "grad_norm": 1171.8572998046875, + "learning_rate": 1.919911024887327e-06, + "loss": 51.7734, + "step": 16313 + }, + { + "epoch": 0.1544286782593879, + "grad_norm": 213.5642852783203, + "learning_rate": 1.9198990024800826e-06, + "loss": 17.1875, + "step": 16314 + }, + { + "epoch": 0.15443814428110297, + "grad_norm": 304.3841247558594, + "learning_rate": 1.919886979208193e-06, + "loss": 23.4453, + "step": 16315 + }, + { + "epoch": 0.15444761030281803, + "grad_norm": 379.419921875, + "learning_rate": 1.9198749550716695e-06, + "loss": 47.7188, + "step": 16316 + }, + { + "epoch": 0.1544570763245331, + "grad_norm": 230.04103088378906, + "learning_rate": 1.919862930070523e-06, + "loss": 22.6406, + "step": 16317 + }, + { + "epoch": 0.15446654234624815, + "grad_norm": 710.1038818359375, + "learning_rate": 1.9198509042047654e-06, + "loss": 48.9922, + "step": 16318 + }, + { + "epoch": 0.15447600836796319, + "grad_norm": 527.7965698242188, + "learning_rate": 1.919838877474408e-06, + "loss": 44.5625, + "step": 16319 + }, + { + "epoch": 0.15448547438967825, + "grad_norm": 1157.0361328125, + "learning_rate": 1.9198268498794617e-06, + "loss": 26.8203, + "step": 16320 + }, + { + "epoch": 0.1544949404113933, + "grad_norm": 535.78173828125, + "learning_rate": 1.9198148214199383e-06, + "loss": 15.0391, + "step": 16321 + }, + { + "epoch": 0.15450440643310837, + "grad_norm": 450.09906005859375, + "learning_rate": 1.9198027920958483e-06, + "loss": 36.0625, + "step": 16322 + }, + { + "epoch": 0.1545138724548234, + "grad_norm": 372.124267578125, + "learning_rate": 1.9197907619072037e-06, + "loss": 8.5469, + "step": 16323 + }, + { + "epoch": 0.15452333847653846, + "grad_norm": 480.92022705078125, + "learning_rate": 1.919778730854016e-06, + "loss": 53.6406, + "step": 16324 + }, + { + "epoch": 0.15453280449825352, + "grad_norm": 312.5768737792969, + "learning_rate": 1.9197666989362953e-06, + "loss": 24.0391, + "step": 16325 + }, + { + "epoch": 0.15454227051996858, + "grad_norm": 383.5633239746094, + "learning_rate": 1.9197546661540544e-06, + "loss": 37.375, + "step": 16326 + }, + { + "epoch": 0.15455173654168364, + "grad_norm": 619.4132080078125, + "learning_rate": 1.9197426325073038e-06, + "loss": 8.0273, + "step": 16327 + }, + { + "epoch": 0.15456120256339867, + "grad_norm": 230.07249450683594, + "learning_rate": 1.9197305979960548e-06, + "loss": 16.2891, + "step": 16328 + }, + { + "epoch": 0.15457066858511373, + "grad_norm": 172.0858154296875, + "learning_rate": 1.9197185626203193e-06, + "loss": 17.3594, + "step": 16329 + }, + { + "epoch": 0.1545801346068288, + "grad_norm": 493.27178955078125, + "learning_rate": 1.919706526380108e-06, + "loss": 39.5781, + "step": 16330 + }, + { + "epoch": 0.15458960062854385, + "grad_norm": 225.18907165527344, + "learning_rate": 1.919694489275432e-06, + "loss": 20.5859, + "step": 16331 + }, + { + "epoch": 0.1545990666502589, + "grad_norm": 181.7891387939453, + "learning_rate": 1.919682451306303e-06, + "loss": 18.8047, + "step": 16332 + }, + { + "epoch": 0.15460853267197394, + "grad_norm": 472.5028076171875, + "learning_rate": 1.919670412472733e-06, + "loss": 44.6328, + "step": 16333 + }, + { + "epoch": 0.154617998693689, + "grad_norm": 1273.8048095703125, + "learning_rate": 1.9196583727747325e-06, + "loss": 21.4375, + "step": 16334 + }, + { + "epoch": 0.15462746471540406, + "grad_norm": 322.2464294433594, + "learning_rate": 1.9196463322123124e-06, + "loss": 26.4062, + "step": 16335 + }, + { + "epoch": 0.15463693073711912, + "grad_norm": 451.5979309082031, + "learning_rate": 1.919634290785485e-06, + "loss": 22.6562, + "step": 16336 + }, + { + "epoch": 0.15464639675883415, + "grad_norm": 246.59523010253906, + "learning_rate": 1.919622248494261e-06, + "loss": 20.1016, + "step": 16337 + }, + { + "epoch": 0.1546558627805492, + "grad_norm": 154.71571350097656, + "learning_rate": 1.919610205338652e-06, + "loss": 17.4805, + "step": 16338 + }, + { + "epoch": 0.15466532880226427, + "grad_norm": 334.3149108886719, + "learning_rate": 1.9195981613186693e-06, + "loss": 45.0938, + "step": 16339 + }, + { + "epoch": 0.15467479482397933, + "grad_norm": 343.0191345214844, + "learning_rate": 1.919586116434324e-06, + "loss": 23.2266, + "step": 16340 + }, + { + "epoch": 0.1546842608456944, + "grad_norm": 345.5526123046875, + "learning_rate": 1.919574070685628e-06, + "loss": 21.8438, + "step": 16341 + }, + { + "epoch": 0.15469372686740943, + "grad_norm": 465.7305603027344, + "learning_rate": 1.9195620240725917e-06, + "loss": 37.5469, + "step": 16342 + }, + { + "epoch": 0.15470319288912449, + "grad_norm": 778.9481811523438, + "learning_rate": 1.919549976595227e-06, + "loss": 43.8516, + "step": 16343 + }, + { + "epoch": 0.15471265891083955, + "grad_norm": 472.4157409667969, + "learning_rate": 1.9195379282535453e-06, + "loss": 38.0391, + "step": 16344 + }, + { + "epoch": 0.1547221249325546, + "grad_norm": 545.0217895507812, + "learning_rate": 1.9195258790475575e-06, + "loss": 16.9922, + "step": 16345 + }, + { + "epoch": 0.15473159095426964, + "grad_norm": 192.59535217285156, + "learning_rate": 1.9195138289772755e-06, + "loss": 17.0859, + "step": 16346 + }, + { + "epoch": 0.1547410569759847, + "grad_norm": 241.14108276367188, + "learning_rate": 1.9195017780427103e-06, + "loss": 30.4922, + "step": 16347 + }, + { + "epoch": 0.15475052299769976, + "grad_norm": 191.71517944335938, + "learning_rate": 1.919489726243873e-06, + "loss": 18.7578, + "step": 16348 + }, + { + "epoch": 0.15475998901941482, + "grad_norm": 447.7306823730469, + "learning_rate": 1.9194776735807756e-06, + "loss": 23.2266, + "step": 16349 + }, + { + "epoch": 0.15476945504112988, + "grad_norm": 167.48573303222656, + "learning_rate": 1.9194656200534285e-06, + "loss": 9.6914, + "step": 16350 + }, + { + "epoch": 0.1547789210628449, + "grad_norm": 390.9641418457031, + "learning_rate": 1.9194535656618436e-06, + "loss": 51.0312, + "step": 16351 + }, + { + "epoch": 0.15478838708455997, + "grad_norm": 187.15635681152344, + "learning_rate": 1.9194415104060322e-06, + "loss": 24.1641, + "step": 16352 + }, + { + "epoch": 0.15479785310627503, + "grad_norm": 3.0754077434539795, + "learning_rate": 1.9194294542860063e-06, + "loss": 0.8623, + "step": 16353 + }, + { + "epoch": 0.1548073191279901, + "grad_norm": 187.8343505859375, + "learning_rate": 1.9194173973017755e-06, + "loss": 17.5938, + "step": 16354 + }, + { + "epoch": 0.15481678514970512, + "grad_norm": 518.291748046875, + "learning_rate": 1.9194053394533526e-06, + "loss": 36.4453, + "step": 16355 + }, + { + "epoch": 0.15482625117142018, + "grad_norm": 602.2943115234375, + "learning_rate": 1.9193932807407485e-06, + "loss": 54.8906, + "step": 16356 + }, + { + "epoch": 0.15483571719313524, + "grad_norm": 343.6151428222656, + "learning_rate": 1.9193812211639747e-06, + "loss": 8.2715, + "step": 16357 + }, + { + "epoch": 0.1548451832148503, + "grad_norm": 319.1268005371094, + "learning_rate": 1.919369160723042e-06, + "loss": 6.582, + "step": 16358 + }, + { + "epoch": 0.15485464923656536, + "grad_norm": 187.23049926757812, + "learning_rate": 1.9193570994179626e-06, + "loss": 22.3281, + "step": 16359 + }, + { + "epoch": 0.1548641152582804, + "grad_norm": 3.2234842777252197, + "learning_rate": 1.9193450372487467e-06, + "loss": 0.8984, + "step": 16360 + }, + { + "epoch": 0.15487358127999545, + "grad_norm": 520.8546752929688, + "learning_rate": 1.919332974215407e-06, + "loss": 17.8359, + "step": 16361 + }, + { + "epoch": 0.1548830473017105, + "grad_norm": 382.053955078125, + "learning_rate": 1.919320910317953e-06, + "loss": 42.8438, + "step": 16362 + }, + { + "epoch": 0.15489251332342557, + "grad_norm": 716.7582397460938, + "learning_rate": 1.9193088455563984e-06, + "loss": 44.0078, + "step": 16363 + }, + { + "epoch": 0.1549019793451406, + "grad_norm": 506.0771484375, + "learning_rate": 1.9192967799307523e-06, + "loss": 28.875, + "step": 16364 + }, + { + "epoch": 0.15491144536685567, + "grad_norm": 452.3298645019531, + "learning_rate": 1.9192847134410275e-06, + "loss": 31.1406, + "step": 16365 + }, + { + "epoch": 0.15492091138857073, + "grad_norm": 350.7535095214844, + "learning_rate": 1.919272646087235e-06, + "loss": 31.5078, + "step": 16366 + }, + { + "epoch": 0.15493037741028579, + "grad_norm": 187.03021240234375, + "learning_rate": 1.9192605778693857e-06, + "loss": 21.1641, + "step": 16367 + }, + { + "epoch": 0.15493984343200085, + "grad_norm": 182.61865234375, + "learning_rate": 1.9192485087874916e-06, + "loss": 26.2266, + "step": 16368 + }, + { + "epoch": 0.15494930945371588, + "grad_norm": 493.9795837402344, + "learning_rate": 1.9192364388415633e-06, + "loss": 39.3906, + "step": 16369 + }, + { + "epoch": 0.15495877547543094, + "grad_norm": 273.8565368652344, + "learning_rate": 1.919224368031613e-06, + "loss": 21.3438, + "step": 16370 + }, + { + "epoch": 0.154968241497146, + "grad_norm": 190.79254150390625, + "learning_rate": 1.919212296357651e-06, + "loss": 8.6719, + "step": 16371 + }, + { + "epoch": 0.15497770751886106, + "grad_norm": 375.5628662109375, + "learning_rate": 1.9192002238196897e-06, + "loss": 47.5156, + "step": 16372 + }, + { + "epoch": 0.1549871735405761, + "grad_norm": 539.9116821289062, + "learning_rate": 1.91918815041774e-06, + "loss": 11.8203, + "step": 16373 + }, + { + "epoch": 0.15499663956229115, + "grad_norm": 3.299675226211548, + "learning_rate": 1.919176076151813e-06, + "loss": 0.9844, + "step": 16374 + }, + { + "epoch": 0.1550061055840062, + "grad_norm": 248.8745574951172, + "learning_rate": 1.9191640010219207e-06, + "loss": 15.7578, + "step": 16375 + }, + { + "epoch": 0.15501557160572127, + "grad_norm": 439.4442138671875, + "learning_rate": 1.9191519250280734e-06, + "loss": 23.4453, + "step": 16376 + }, + { + "epoch": 0.15502503762743633, + "grad_norm": 440.1876525878906, + "learning_rate": 1.919139848170284e-06, + "loss": 44.3906, + "step": 16377 + }, + { + "epoch": 0.15503450364915136, + "grad_norm": 841.2374877929688, + "learning_rate": 1.919127770448562e-06, + "loss": 47.25, + "step": 16378 + }, + { + "epoch": 0.15504396967086642, + "grad_norm": 932.9439697265625, + "learning_rate": 1.9191156918629197e-06, + "loss": 45.8438, + "step": 16379 + }, + { + "epoch": 0.15505343569258148, + "grad_norm": 378.23980712890625, + "learning_rate": 1.919103612413369e-06, + "loss": 19.6875, + "step": 16380 + }, + { + "epoch": 0.15506290171429654, + "grad_norm": 611.8768920898438, + "learning_rate": 1.9190915320999204e-06, + "loss": 21.9922, + "step": 16381 + }, + { + "epoch": 0.15507236773601157, + "grad_norm": 879.3230590820312, + "learning_rate": 1.919079450922586e-06, + "loss": 67.457, + "step": 16382 + }, + { + "epoch": 0.15508183375772663, + "grad_norm": 330.8801574707031, + "learning_rate": 1.919067368881376e-06, + "loss": 41.6875, + "step": 16383 + }, + { + "epoch": 0.1550912997794417, + "grad_norm": 207.39588928222656, + "learning_rate": 1.9190552859763028e-06, + "loss": 8.9961, + "step": 16384 + }, + { + "epoch": 0.15510076580115675, + "grad_norm": 261.1646423339844, + "learning_rate": 1.9190432022073774e-06, + "loss": 16.8633, + "step": 16385 + }, + { + "epoch": 0.1551102318228718, + "grad_norm": 430.41510009765625, + "learning_rate": 1.9190311175746114e-06, + "loss": 28.9062, + "step": 16386 + }, + { + "epoch": 0.15511969784458685, + "grad_norm": 647.6255493164062, + "learning_rate": 1.9190190320780157e-06, + "loss": 34.1719, + "step": 16387 + }, + { + "epoch": 0.1551291638663019, + "grad_norm": 364.625, + "learning_rate": 1.919006945717602e-06, + "loss": 21.6953, + "step": 16388 + }, + { + "epoch": 0.15513862988801697, + "grad_norm": 308.61419677734375, + "learning_rate": 1.9189948584933814e-06, + "loss": 22.5938, + "step": 16389 + }, + { + "epoch": 0.15514809590973203, + "grad_norm": 531.9016723632812, + "learning_rate": 1.9189827704053655e-06, + "loss": 25.6953, + "step": 16390 + }, + { + "epoch": 0.15515756193144706, + "grad_norm": 815.558837890625, + "learning_rate": 1.918970681453566e-06, + "loss": 50.7188, + "step": 16391 + }, + { + "epoch": 0.15516702795316212, + "grad_norm": 353.7265319824219, + "learning_rate": 1.9189585916379933e-06, + "loss": 16.1797, + "step": 16392 + }, + { + "epoch": 0.15517649397487718, + "grad_norm": 302.4866943359375, + "learning_rate": 1.9189465009586593e-06, + "loss": 23.1562, + "step": 16393 + }, + { + "epoch": 0.15518595999659224, + "grad_norm": 420.8631286621094, + "learning_rate": 1.9189344094155753e-06, + "loss": 53.7188, + "step": 16394 + }, + { + "epoch": 0.1551954260183073, + "grad_norm": 488.3992614746094, + "learning_rate": 1.918922317008753e-06, + "loss": 45.6562, + "step": 16395 + }, + { + "epoch": 0.15520489204002233, + "grad_norm": 165.83763122558594, + "learning_rate": 1.9189102237382035e-06, + "loss": 15.0469, + "step": 16396 + }, + { + "epoch": 0.1552143580617374, + "grad_norm": 462.24609375, + "learning_rate": 1.918898129603938e-06, + "loss": 48.9531, + "step": 16397 + }, + { + "epoch": 0.15522382408345245, + "grad_norm": 382.1523742675781, + "learning_rate": 1.918886034605968e-06, + "loss": 48.1875, + "step": 16398 + }, + { + "epoch": 0.1552332901051675, + "grad_norm": 579.11572265625, + "learning_rate": 1.9188739387443053e-06, + "loss": 27.7734, + "step": 16399 + }, + { + "epoch": 0.15524275612688254, + "grad_norm": 272.75494384765625, + "learning_rate": 1.9188618420189607e-06, + "loss": 15.3125, + "step": 16400 + }, + { + "epoch": 0.1552522221485976, + "grad_norm": 220.265869140625, + "learning_rate": 1.9188497444299456e-06, + "loss": 14.9102, + "step": 16401 + }, + { + "epoch": 0.15526168817031266, + "grad_norm": 877.8680419921875, + "learning_rate": 1.9188376459772713e-06, + "loss": 30.4062, + "step": 16402 + }, + { + "epoch": 0.15527115419202772, + "grad_norm": 469.42327880859375, + "learning_rate": 1.91882554666095e-06, + "loss": 46.9062, + "step": 16403 + }, + { + "epoch": 0.15528062021374278, + "grad_norm": 442.28564453125, + "learning_rate": 1.9188134464809923e-06, + "loss": 28.7109, + "step": 16404 + }, + { + "epoch": 0.1552900862354578, + "grad_norm": 183.0595245361328, + "learning_rate": 1.9188013454374094e-06, + "loss": 24.9531, + "step": 16405 + }, + { + "epoch": 0.15529955225717287, + "grad_norm": 381.1760559082031, + "learning_rate": 1.918789243530213e-06, + "loss": 31.8828, + "step": 16406 + }, + { + "epoch": 0.15530901827888793, + "grad_norm": 298.19024658203125, + "learning_rate": 1.918777140759415e-06, + "loss": 21.0859, + "step": 16407 + }, + { + "epoch": 0.155318484300603, + "grad_norm": 851.986328125, + "learning_rate": 1.9187650371250257e-06, + "loss": 23.6641, + "step": 16408 + }, + { + "epoch": 0.15532795032231803, + "grad_norm": 474.7716979980469, + "learning_rate": 1.9187529326270573e-06, + "loss": 44.0625, + "step": 16409 + }, + { + "epoch": 0.15533741634403309, + "grad_norm": 239.886474609375, + "learning_rate": 1.9187408272655206e-06, + "loss": 24.1016, + "step": 16410 + }, + { + "epoch": 0.15534688236574815, + "grad_norm": 314.10858154296875, + "learning_rate": 1.918728721040428e-06, + "loss": 52.625, + "step": 16411 + }, + { + "epoch": 0.1553563483874632, + "grad_norm": 363.9648132324219, + "learning_rate": 1.9187166139517893e-06, + "loss": 34.125, + "step": 16412 + }, + { + "epoch": 0.15536581440917827, + "grad_norm": 537.5307006835938, + "learning_rate": 1.918704505999617e-06, + "loss": 43.9844, + "step": 16413 + }, + { + "epoch": 0.1553752804308933, + "grad_norm": 770.3675537109375, + "learning_rate": 1.918692397183923e-06, + "loss": 55.9688, + "step": 16414 + }, + { + "epoch": 0.15538474645260836, + "grad_norm": 228.67697143554688, + "learning_rate": 1.918680287504717e-06, + "loss": 17.9141, + "step": 16415 + }, + { + "epoch": 0.15539421247432342, + "grad_norm": 513.72509765625, + "learning_rate": 1.918668176962012e-06, + "loss": 25.2031, + "step": 16416 + }, + { + "epoch": 0.15540367849603848, + "grad_norm": 1324.215087890625, + "learning_rate": 1.918656065555818e-06, + "loss": 48.6406, + "step": 16417 + }, + { + "epoch": 0.15541314451775354, + "grad_norm": 379.3255615234375, + "learning_rate": 1.918643953286147e-06, + "loss": 36.5781, + "step": 16418 + }, + { + "epoch": 0.15542261053946857, + "grad_norm": 281.9704895019531, + "learning_rate": 1.918631840153011e-06, + "loss": 17.7188, + "step": 16419 + }, + { + "epoch": 0.15543207656118363, + "grad_norm": 273.12835693359375, + "learning_rate": 1.9186197261564206e-06, + "loss": 15.2305, + "step": 16420 + }, + { + "epoch": 0.1554415425828987, + "grad_norm": 376.79083251953125, + "learning_rate": 1.9186076112963877e-06, + "loss": 46.2656, + "step": 16421 + }, + { + "epoch": 0.15545100860461375, + "grad_norm": 909.6356811523438, + "learning_rate": 1.918595495572923e-06, + "loss": 59.3281, + "step": 16422 + }, + { + "epoch": 0.15546047462632878, + "grad_norm": 841.6072387695312, + "learning_rate": 1.9185833789860385e-06, + "loss": 46.4375, + "step": 16423 + }, + { + "epoch": 0.15546994064804384, + "grad_norm": 487.0523986816406, + "learning_rate": 1.9185712615357453e-06, + "loss": 10.3242, + "step": 16424 + }, + { + "epoch": 0.1554794066697589, + "grad_norm": 206.18630981445312, + "learning_rate": 1.9185591432220546e-06, + "loss": 15.7266, + "step": 16425 + }, + { + "epoch": 0.15548887269147396, + "grad_norm": 260.0870361328125, + "learning_rate": 1.9185470240449785e-06, + "loss": 28.1719, + "step": 16426 + }, + { + "epoch": 0.15549833871318902, + "grad_norm": 338.20843505859375, + "learning_rate": 1.9185349040045277e-06, + "loss": 20.7656, + "step": 16427 + }, + { + "epoch": 0.15550780473490405, + "grad_norm": 377.23675537109375, + "learning_rate": 1.9185227831007144e-06, + "loss": 52.0469, + "step": 16428 + }, + { + "epoch": 0.1555172707566191, + "grad_norm": 477.5021667480469, + "learning_rate": 1.918510661333549e-06, + "loss": 32.3438, + "step": 16429 + }, + { + "epoch": 0.15552673677833417, + "grad_norm": 437.10614013671875, + "learning_rate": 1.918498538703043e-06, + "loss": 41.1094, + "step": 16430 + }, + { + "epoch": 0.15553620280004923, + "grad_norm": 396.8966369628906, + "learning_rate": 1.9184864152092085e-06, + "loss": 25.0703, + "step": 16431 + }, + { + "epoch": 0.15554566882176427, + "grad_norm": 268.1622314453125, + "learning_rate": 1.9184742908520563e-06, + "loss": 19.7031, + "step": 16432 + }, + { + "epoch": 0.15555513484347933, + "grad_norm": 3.605226755142212, + "learning_rate": 1.9184621656315984e-06, + "loss": 0.9263, + "step": 16433 + }, + { + "epoch": 0.15556460086519439, + "grad_norm": 389.4281921386719, + "learning_rate": 1.9184500395478457e-06, + "loss": 30.3281, + "step": 16434 + }, + { + "epoch": 0.15557406688690945, + "grad_norm": 246.17724609375, + "learning_rate": 1.9184379126008094e-06, + "loss": 21.1094, + "step": 16435 + }, + { + "epoch": 0.1555835329086245, + "grad_norm": 2.813278913497925, + "learning_rate": 1.9184257847905016e-06, + "loss": 0.8062, + "step": 16436 + }, + { + "epoch": 0.15559299893033954, + "grad_norm": 1023.4330444335938, + "learning_rate": 1.918413656116933e-06, + "loss": 11.3008, + "step": 16437 + }, + { + "epoch": 0.1556024649520546, + "grad_norm": 358.2649841308594, + "learning_rate": 1.9184015265801155e-06, + "loss": 24.5938, + "step": 16438 + }, + { + "epoch": 0.15561193097376966, + "grad_norm": 680.5565795898438, + "learning_rate": 1.91838939618006e-06, + "loss": 49.1953, + "step": 16439 + }, + { + "epoch": 0.15562139699548472, + "grad_norm": 355.8365478515625, + "learning_rate": 1.9183772649167787e-06, + "loss": 18.5234, + "step": 16440 + }, + { + "epoch": 0.15563086301719975, + "grad_norm": 3.1078226566314697, + "learning_rate": 1.9183651327902824e-06, + "loss": 0.8799, + "step": 16441 + }, + { + "epoch": 0.1556403290389148, + "grad_norm": 692.1497802734375, + "learning_rate": 1.9183529998005825e-06, + "loss": 53.9062, + "step": 16442 + }, + { + "epoch": 0.15564979506062987, + "grad_norm": 452.36187744140625, + "learning_rate": 1.9183408659476903e-06, + "loss": 26.4453, + "step": 16443 + }, + { + "epoch": 0.15565926108234493, + "grad_norm": 627.5320434570312, + "learning_rate": 1.918328731231618e-06, + "loss": 20.0625, + "step": 16444 + }, + { + "epoch": 0.15566872710406, + "grad_norm": 178.78173828125, + "learning_rate": 1.9183165956523755e-06, + "loss": 19.2969, + "step": 16445 + }, + { + "epoch": 0.15567819312577502, + "grad_norm": 640.6980590820312, + "learning_rate": 1.9183044592099758e-06, + "loss": 45.5625, + "step": 16446 + }, + { + "epoch": 0.15568765914749008, + "grad_norm": 620.7662353515625, + "learning_rate": 1.91829232190443e-06, + "loss": 59.3125, + "step": 16447 + }, + { + "epoch": 0.15569712516920514, + "grad_norm": 405.415283203125, + "learning_rate": 1.9182801837357486e-06, + "loss": 62.7656, + "step": 16448 + }, + { + "epoch": 0.1557065911909202, + "grad_norm": 413.3269348144531, + "learning_rate": 1.9182680447039433e-06, + "loss": 16.3672, + "step": 16449 + }, + { + "epoch": 0.15571605721263523, + "grad_norm": 425.98724365234375, + "learning_rate": 1.9182559048090266e-06, + "loss": 20.5469, + "step": 16450 + }, + { + "epoch": 0.1557255232343503, + "grad_norm": 289.1714782714844, + "learning_rate": 1.9182437640510084e-06, + "loss": 21.8906, + "step": 16451 + }, + { + "epoch": 0.15573498925606535, + "grad_norm": 304.91912841796875, + "learning_rate": 1.9182316224299012e-06, + "loss": 26.0703, + "step": 16452 + }, + { + "epoch": 0.1557444552777804, + "grad_norm": 3.5759549140930176, + "learning_rate": 1.918219479945716e-06, + "loss": 1.0854, + "step": 16453 + }, + { + "epoch": 0.15575392129949547, + "grad_norm": 349.3283996582031, + "learning_rate": 1.918207336598464e-06, + "loss": 47.0, + "step": 16454 + }, + { + "epoch": 0.1557633873212105, + "grad_norm": 397.14849853515625, + "learning_rate": 1.9181951923881567e-06, + "loss": 23.3672, + "step": 16455 + }, + { + "epoch": 0.15577285334292557, + "grad_norm": 264.03912353515625, + "learning_rate": 1.918183047314806e-06, + "loss": 15.5547, + "step": 16456 + }, + { + "epoch": 0.15578231936464063, + "grad_norm": 670.0082397460938, + "learning_rate": 1.918170901378423e-06, + "loss": 31.1641, + "step": 16457 + }, + { + "epoch": 0.15579178538635569, + "grad_norm": 367.660888671875, + "learning_rate": 1.918158754579019e-06, + "loss": 24.4688, + "step": 16458 + }, + { + "epoch": 0.15580125140807072, + "grad_norm": 333.1833190917969, + "learning_rate": 1.918146606916605e-06, + "loss": 25.5469, + "step": 16459 + }, + { + "epoch": 0.15581071742978578, + "grad_norm": 922.9368896484375, + "learning_rate": 1.9181344583911934e-06, + "loss": 36.9688, + "step": 16460 + }, + { + "epoch": 0.15582018345150084, + "grad_norm": 527.8322143554688, + "learning_rate": 1.918122309002795e-06, + "loss": 45.125, + "step": 16461 + }, + { + "epoch": 0.1558296494732159, + "grad_norm": 920.4498291015625, + "learning_rate": 1.9181101587514217e-06, + "loss": 63.5625, + "step": 16462 + }, + { + "epoch": 0.15583911549493096, + "grad_norm": 154.07032775878906, + "learning_rate": 1.9180980076370844e-06, + "loss": 14.6172, + "step": 16463 + }, + { + "epoch": 0.155848581516646, + "grad_norm": 746.3416137695312, + "learning_rate": 1.9180858556597946e-06, + "loss": 28.4531, + "step": 16464 + }, + { + "epoch": 0.15585804753836105, + "grad_norm": 275.1768798828125, + "learning_rate": 1.9180737028195637e-06, + "loss": 24.8359, + "step": 16465 + }, + { + "epoch": 0.1558675135600761, + "grad_norm": 970.7776489257812, + "learning_rate": 1.9180615491164036e-06, + "loss": 38.4219, + "step": 16466 + }, + { + "epoch": 0.15587697958179117, + "grad_norm": 209.15170288085938, + "learning_rate": 1.918049394550325e-06, + "loss": 24.0859, + "step": 16467 + }, + { + "epoch": 0.1558864456035062, + "grad_norm": 847.5222778320312, + "learning_rate": 1.9180372391213398e-06, + "loss": 56.4844, + "step": 16468 + }, + { + "epoch": 0.15589591162522126, + "grad_norm": 3.550759792327881, + "learning_rate": 1.9180250828294594e-06, + "loss": 0.9102, + "step": 16469 + }, + { + "epoch": 0.15590537764693632, + "grad_norm": 548.6339721679688, + "learning_rate": 1.918012925674695e-06, + "loss": 20.2188, + "step": 16470 + }, + { + "epoch": 0.15591484366865138, + "grad_norm": 283.82269287109375, + "learning_rate": 1.9180007676570583e-06, + "loss": 18.2891, + "step": 16471 + }, + { + "epoch": 0.15592430969036644, + "grad_norm": 857.8822631835938, + "learning_rate": 1.9179886087765605e-06, + "loss": 37.0078, + "step": 16472 + }, + { + "epoch": 0.15593377571208147, + "grad_norm": 3.346806764602661, + "learning_rate": 1.917976449033213e-06, + "loss": 0.9316, + "step": 16473 + }, + { + "epoch": 0.15594324173379653, + "grad_norm": 316.6571350097656, + "learning_rate": 1.917964288427028e-06, + "loss": 30.0781, + "step": 16474 + }, + { + "epoch": 0.1559527077555116, + "grad_norm": 369.745361328125, + "learning_rate": 1.9179521269580157e-06, + "loss": 26.8125, + "step": 16475 + }, + { + "epoch": 0.15596217377722665, + "grad_norm": 496.2659606933594, + "learning_rate": 1.917939964626188e-06, + "loss": 31.5859, + "step": 16476 + }, + { + "epoch": 0.15597163979894169, + "grad_norm": 3283.56298828125, + "learning_rate": 1.9179278014315568e-06, + "loss": 33.5625, + "step": 16477 + }, + { + "epoch": 0.15598110582065675, + "grad_norm": 438.6822509765625, + "learning_rate": 1.9179156373741334e-06, + "loss": 39.4609, + "step": 16478 + }, + { + "epoch": 0.1559905718423718, + "grad_norm": 522.319091796875, + "learning_rate": 1.9179034724539285e-06, + "loss": 45.7969, + "step": 16479 + }, + { + "epoch": 0.15600003786408687, + "grad_norm": 357.8663024902344, + "learning_rate": 1.917891306670954e-06, + "loss": 22.2422, + "step": 16480 + }, + { + "epoch": 0.15600950388580193, + "grad_norm": 206.53590393066406, + "learning_rate": 1.9178791400252217e-06, + "loss": 22.207, + "step": 16481 + }, + { + "epoch": 0.15601896990751696, + "grad_norm": 496.60321044921875, + "learning_rate": 1.917866972516743e-06, + "loss": 32.2188, + "step": 16482 + }, + { + "epoch": 0.15602843592923202, + "grad_norm": 321.3220520019531, + "learning_rate": 1.9178548041455286e-06, + "loss": 16.5234, + "step": 16483 + }, + { + "epoch": 0.15603790195094708, + "grad_norm": 940.5941162109375, + "learning_rate": 1.9178426349115908e-06, + "loss": 48.5156, + "step": 16484 + }, + { + "epoch": 0.15604736797266214, + "grad_norm": 2.6129918098449707, + "learning_rate": 1.91783046481494e-06, + "loss": 0.8887, + "step": 16485 + }, + { + "epoch": 0.15605683399437717, + "grad_norm": 298.32244873046875, + "learning_rate": 1.9178182938555887e-06, + "loss": 26.7188, + "step": 16486 + }, + { + "epoch": 0.15606630001609223, + "grad_norm": 751.0360107421875, + "learning_rate": 1.9178061220335476e-06, + "loss": 11.7812, + "step": 16487 + }, + { + "epoch": 0.1560757660378073, + "grad_norm": 286.6654052734375, + "learning_rate": 1.917793949348829e-06, + "loss": 21.4141, + "step": 16488 + }, + { + "epoch": 0.15608523205952235, + "grad_norm": 476.4429016113281, + "learning_rate": 1.9177817758014432e-06, + "loss": 36.832, + "step": 16489 + }, + { + "epoch": 0.1560946980812374, + "grad_norm": 227.92446899414062, + "learning_rate": 1.9177696013914027e-06, + "loss": 16.8438, + "step": 16490 + }, + { + "epoch": 0.15610416410295244, + "grad_norm": 1441.44091796875, + "learning_rate": 1.9177574261187185e-06, + "loss": 30.7031, + "step": 16491 + }, + { + "epoch": 0.1561136301246675, + "grad_norm": 472.5524597167969, + "learning_rate": 1.9177452499834014e-06, + "loss": 46.7188, + "step": 16492 + }, + { + "epoch": 0.15612309614638256, + "grad_norm": 363.026611328125, + "learning_rate": 1.917733072985464e-06, + "loss": 37.8906, + "step": 16493 + }, + { + "epoch": 0.15613256216809762, + "grad_norm": 333.2865905761719, + "learning_rate": 1.917720895124917e-06, + "loss": 34.4844, + "step": 16494 + }, + { + "epoch": 0.15614202818981265, + "grad_norm": 3.355358839035034, + "learning_rate": 1.9177087164017724e-06, + "loss": 0.8564, + "step": 16495 + }, + { + "epoch": 0.1561514942115277, + "grad_norm": 586.096435546875, + "learning_rate": 1.917696536816041e-06, + "loss": 40.875, + "step": 16496 + }, + { + "epoch": 0.15616096023324277, + "grad_norm": 196.24667358398438, + "learning_rate": 1.9176843563677346e-06, + "loss": 20.7734, + "step": 16497 + }, + { + "epoch": 0.15617042625495783, + "grad_norm": 343.7352294921875, + "learning_rate": 1.9176721750568645e-06, + "loss": 23.7891, + "step": 16498 + }, + { + "epoch": 0.1561798922766729, + "grad_norm": 718.52685546875, + "learning_rate": 1.9176599928834427e-06, + "loss": 59.8867, + "step": 16499 + }, + { + "epoch": 0.15618935829838793, + "grad_norm": 302.9837646484375, + "learning_rate": 1.91764780984748e-06, + "loss": 24.4297, + "step": 16500 + }, + { + "epoch": 0.15619882432010299, + "grad_norm": 787.551025390625, + "learning_rate": 1.9176356259489874e-06, + "loss": 66.2031, + "step": 16501 + }, + { + "epoch": 0.15620829034181805, + "grad_norm": 650.3306274414062, + "learning_rate": 1.9176234411879777e-06, + "loss": 36.293, + "step": 16502 + }, + { + "epoch": 0.1562177563635331, + "grad_norm": 549.24072265625, + "learning_rate": 1.9176112555644614e-06, + "loss": 49.0781, + "step": 16503 + }, + { + "epoch": 0.15622722238524817, + "grad_norm": 372.9659423828125, + "learning_rate": 1.9175990690784505e-06, + "loss": 61.0469, + "step": 16504 + }, + { + "epoch": 0.1562366884069632, + "grad_norm": 430.0223693847656, + "learning_rate": 1.917586881729956e-06, + "loss": 39.2188, + "step": 16505 + }, + { + "epoch": 0.15624615442867826, + "grad_norm": 719.133544921875, + "learning_rate": 1.9175746935189894e-06, + "loss": 40.5547, + "step": 16506 + }, + { + "epoch": 0.15625562045039332, + "grad_norm": 232.16043090820312, + "learning_rate": 1.917562504445562e-06, + "loss": 21.2812, + "step": 16507 + }, + { + "epoch": 0.15626508647210838, + "grad_norm": 382.67681884765625, + "learning_rate": 1.9175503145096864e-06, + "loss": 28.5938, + "step": 16508 + }, + { + "epoch": 0.1562745524938234, + "grad_norm": 219.78182983398438, + "learning_rate": 1.9175381237113728e-06, + "loss": 15.9961, + "step": 16509 + }, + { + "epoch": 0.15628401851553847, + "grad_norm": 325.71856689453125, + "learning_rate": 1.917525932050633e-06, + "loss": 36.4531, + "step": 16510 + }, + { + "epoch": 0.15629348453725353, + "grad_norm": 331.7279357910156, + "learning_rate": 1.917513739527478e-06, + "loss": 21.3359, + "step": 16511 + }, + { + "epoch": 0.1563029505589686, + "grad_norm": 495.0187072753906, + "learning_rate": 1.9175015461419205e-06, + "loss": 22.2422, + "step": 16512 + }, + { + "epoch": 0.15631241658068365, + "grad_norm": 277.2162170410156, + "learning_rate": 1.917489351893971e-06, + "loss": 16.3594, + "step": 16513 + }, + { + "epoch": 0.15632188260239868, + "grad_norm": 375.4522399902344, + "learning_rate": 1.9174771567836414e-06, + "loss": 32.4961, + "step": 16514 + }, + { + "epoch": 0.15633134862411374, + "grad_norm": 270.26275634765625, + "learning_rate": 1.9174649608109424e-06, + "loss": 18.6328, + "step": 16515 + }, + { + "epoch": 0.1563408146458288, + "grad_norm": 1607.1103515625, + "learning_rate": 1.9174527639758867e-06, + "loss": 30.7969, + "step": 16516 + }, + { + "epoch": 0.15635028066754386, + "grad_norm": 289.5047912597656, + "learning_rate": 1.9174405662784846e-06, + "loss": 26.2266, + "step": 16517 + }, + { + "epoch": 0.1563597466892589, + "grad_norm": 413.7156982421875, + "learning_rate": 1.9174283677187483e-06, + "loss": 45.4219, + "step": 16518 + }, + { + "epoch": 0.15636921271097395, + "grad_norm": 542.1063232421875, + "learning_rate": 1.917416168296689e-06, + "loss": 24.2852, + "step": 16519 + }, + { + "epoch": 0.156378678732689, + "grad_norm": 280.4878234863281, + "learning_rate": 1.917403968012318e-06, + "loss": 18.0859, + "step": 16520 + }, + { + "epoch": 0.15638814475440407, + "grad_norm": 382.4866638183594, + "learning_rate": 1.9173917668656474e-06, + "loss": 24.0312, + "step": 16521 + }, + { + "epoch": 0.15639761077611913, + "grad_norm": 515.8401489257812, + "learning_rate": 1.9173795648566875e-06, + "loss": 35.2812, + "step": 16522 + }, + { + "epoch": 0.15640707679783417, + "grad_norm": 497.1317443847656, + "learning_rate": 1.917367361985451e-06, + "loss": 22.3359, + "step": 16523 + }, + { + "epoch": 0.15641654281954923, + "grad_norm": 2.8423643112182617, + "learning_rate": 1.917355158251949e-06, + "loss": 0.9443, + "step": 16524 + }, + { + "epoch": 0.15642600884126429, + "grad_norm": 390.5072021484375, + "learning_rate": 1.9173429536561923e-06, + "loss": 38.9219, + "step": 16525 + }, + { + "epoch": 0.15643547486297935, + "grad_norm": 214.38699340820312, + "learning_rate": 1.917330748198193e-06, + "loss": 12.9062, + "step": 16526 + }, + { + "epoch": 0.15644494088469438, + "grad_norm": 549.4683837890625, + "learning_rate": 1.917318541877963e-06, + "loss": 33.0781, + "step": 16527 + }, + { + "epoch": 0.15645440690640944, + "grad_norm": 359.5817565917969, + "learning_rate": 1.9173063346955125e-06, + "loss": 16.1367, + "step": 16528 + }, + { + "epoch": 0.1564638729281245, + "grad_norm": 325.8199462890625, + "learning_rate": 1.917294126650854e-06, + "loss": 18.6562, + "step": 16529 + }, + { + "epoch": 0.15647333894983956, + "grad_norm": 514.154541015625, + "learning_rate": 1.917281917743999e-06, + "loss": 22.2188, + "step": 16530 + }, + { + "epoch": 0.15648280497155462, + "grad_norm": 290.4158630371094, + "learning_rate": 1.9172697079749584e-06, + "loss": 25.0625, + "step": 16531 + }, + { + "epoch": 0.15649227099326965, + "grad_norm": 233.24105834960938, + "learning_rate": 1.917257497343744e-06, + "loss": 22.1719, + "step": 16532 + }, + { + "epoch": 0.1565017370149847, + "grad_norm": 561.7127685546875, + "learning_rate": 1.917245285850367e-06, + "loss": 28.0547, + "step": 16533 + }, + { + "epoch": 0.15651120303669977, + "grad_norm": 249.157958984375, + "learning_rate": 1.9172330734948397e-06, + "loss": 21.6016, + "step": 16534 + }, + { + "epoch": 0.15652066905841483, + "grad_norm": 176.96229553222656, + "learning_rate": 1.9172208602771724e-06, + "loss": 18.7188, + "step": 16535 + }, + { + "epoch": 0.15653013508012986, + "grad_norm": 347.1831970214844, + "learning_rate": 1.9172086461973776e-06, + "loss": 23.9297, + "step": 16536 + }, + { + "epoch": 0.15653960110184492, + "grad_norm": 3.0802738666534424, + "learning_rate": 1.917196431255466e-06, + "loss": 1.002, + "step": 16537 + }, + { + "epoch": 0.15654906712355998, + "grad_norm": 540.3087158203125, + "learning_rate": 1.9171842154514496e-06, + "loss": 40.2344, + "step": 16538 + }, + { + "epoch": 0.15655853314527504, + "grad_norm": 698.4066162109375, + "learning_rate": 1.9171719987853396e-06, + "loss": 42.3516, + "step": 16539 + }, + { + "epoch": 0.1565679991669901, + "grad_norm": 402.79595947265625, + "learning_rate": 1.9171597812571475e-06, + "loss": 36.3594, + "step": 16540 + }, + { + "epoch": 0.15657746518870513, + "grad_norm": 226.0202178955078, + "learning_rate": 1.917147562866885e-06, + "loss": 17.6992, + "step": 16541 + }, + { + "epoch": 0.1565869312104202, + "grad_norm": 291.8571472167969, + "learning_rate": 1.9171353436145637e-06, + "loss": 14.9727, + "step": 16542 + }, + { + "epoch": 0.15659639723213525, + "grad_norm": 475.0023498535156, + "learning_rate": 1.9171231235001945e-06, + "loss": 39.2969, + "step": 16543 + }, + { + "epoch": 0.1566058632538503, + "grad_norm": 580.7921142578125, + "learning_rate": 1.9171109025237893e-06, + "loss": 30.5, + "step": 16544 + }, + { + "epoch": 0.15661532927556535, + "grad_norm": 434.2157897949219, + "learning_rate": 1.91709868068536e-06, + "loss": 38.9062, + "step": 16545 + }, + { + "epoch": 0.1566247952972804, + "grad_norm": 548.2258911132812, + "learning_rate": 1.917086457984917e-06, + "loss": 44.25, + "step": 16546 + }, + { + "epoch": 0.15663426131899547, + "grad_norm": 249.34210205078125, + "learning_rate": 1.9170742344224722e-06, + "loss": 17.9844, + "step": 16547 + }, + { + "epoch": 0.15664372734071053, + "grad_norm": 375.05352783203125, + "learning_rate": 1.9170620099980377e-06, + "loss": 45.5, + "step": 16548 + }, + { + "epoch": 0.15665319336242559, + "grad_norm": 310.9997253417969, + "learning_rate": 1.9170497847116243e-06, + "loss": 32.9375, + "step": 16549 + }, + { + "epoch": 0.15666265938414062, + "grad_norm": 471.3978576660156, + "learning_rate": 1.917037558563244e-06, + "loss": 57.0, + "step": 16550 + }, + { + "epoch": 0.15667212540585568, + "grad_norm": 501.3362731933594, + "learning_rate": 1.917025331552908e-06, + "loss": 43.5312, + "step": 16551 + }, + { + "epoch": 0.15668159142757074, + "grad_norm": 335.9559020996094, + "learning_rate": 1.917013103680628e-06, + "loss": 7.3125, + "step": 16552 + }, + { + "epoch": 0.1566910574492858, + "grad_norm": 225.4190673828125, + "learning_rate": 1.917000874946415e-06, + "loss": 15.4453, + "step": 16553 + }, + { + "epoch": 0.15670052347100083, + "grad_norm": 186.1197052001953, + "learning_rate": 1.916988645350281e-06, + "loss": 21.3906, + "step": 16554 + }, + { + "epoch": 0.1567099894927159, + "grad_norm": 3.2301716804504395, + "learning_rate": 1.916976414892237e-06, + "loss": 0.9128, + "step": 16555 + }, + { + "epoch": 0.15671945551443095, + "grad_norm": 719.0043334960938, + "learning_rate": 1.9169641835722955e-06, + "loss": 34.8516, + "step": 16556 + }, + { + "epoch": 0.156728921536146, + "grad_norm": 283.2489929199219, + "learning_rate": 1.9169519513904673e-06, + "loss": 15.6328, + "step": 16557 + }, + { + "epoch": 0.15673838755786107, + "grad_norm": 557.70654296875, + "learning_rate": 1.9169397183467635e-06, + "loss": 31.5156, + "step": 16558 + }, + { + "epoch": 0.1567478535795761, + "grad_norm": 793.2528686523438, + "learning_rate": 1.916927484441196e-06, + "loss": 42.9453, + "step": 16559 + }, + { + "epoch": 0.15675731960129116, + "grad_norm": 3.5094308853149414, + "learning_rate": 1.9169152496737766e-06, + "loss": 0.9731, + "step": 16560 + }, + { + "epoch": 0.15676678562300622, + "grad_norm": 948.0128173828125, + "learning_rate": 1.916903014044516e-06, + "loss": 20.0781, + "step": 16561 + }, + { + "epoch": 0.15677625164472128, + "grad_norm": 529.05322265625, + "learning_rate": 1.916890777553427e-06, + "loss": 36.2734, + "step": 16562 + }, + { + "epoch": 0.15678571766643631, + "grad_norm": 308.5867614746094, + "learning_rate": 1.9168785402005197e-06, + "loss": 18.3594, + "step": 16563 + }, + { + "epoch": 0.15679518368815137, + "grad_norm": 289.1612854003906, + "learning_rate": 1.9168663019858064e-06, + "loss": 28.6641, + "step": 16564 + }, + { + "epoch": 0.15680464970986643, + "grad_norm": 592.051025390625, + "learning_rate": 1.9168540629092986e-06, + "loss": 20.8242, + "step": 16565 + }, + { + "epoch": 0.1568141157315815, + "grad_norm": 476.79241943359375, + "learning_rate": 1.916841822971007e-06, + "loss": 27.3438, + "step": 16566 + }, + { + "epoch": 0.15682358175329655, + "grad_norm": 2.976701498031616, + "learning_rate": 1.9168295821709443e-06, + "loss": 0.8879, + "step": 16567 + }, + { + "epoch": 0.15683304777501159, + "grad_norm": 351.5396728515625, + "learning_rate": 1.9168173405091212e-06, + "loss": 49.3359, + "step": 16568 + }, + { + "epoch": 0.15684251379672665, + "grad_norm": 537.328125, + "learning_rate": 1.9168050979855497e-06, + "loss": 58.3906, + "step": 16569 + }, + { + "epoch": 0.1568519798184417, + "grad_norm": 335.12884521484375, + "learning_rate": 1.9167928546002407e-06, + "loss": 30.9844, + "step": 16570 + }, + { + "epoch": 0.15686144584015677, + "grad_norm": 2165.3369140625, + "learning_rate": 1.9167806103532066e-06, + "loss": 22.2461, + "step": 16571 + }, + { + "epoch": 0.1568709118618718, + "grad_norm": 288.483154296875, + "learning_rate": 1.916768365244458e-06, + "loss": 17.7695, + "step": 16572 + }, + { + "epoch": 0.15688037788358686, + "grad_norm": 343.6012268066406, + "learning_rate": 1.916756119274007e-06, + "loss": 19.4766, + "step": 16573 + }, + { + "epoch": 0.15688984390530192, + "grad_norm": 4.689981460571289, + "learning_rate": 1.916743872441864e-06, + "loss": 0.981, + "step": 16574 + }, + { + "epoch": 0.15689930992701698, + "grad_norm": 535.2788696289062, + "learning_rate": 1.9167316247480425e-06, + "loss": 46.9141, + "step": 16575 + }, + { + "epoch": 0.15690877594873204, + "grad_norm": 302.0567932128906, + "learning_rate": 1.916719376192553e-06, + "loss": 8.5781, + "step": 16576 + }, + { + "epoch": 0.15691824197044707, + "grad_norm": 296.0642395019531, + "learning_rate": 1.9167071267754057e-06, + "loss": 7.9336, + "step": 16577 + }, + { + "epoch": 0.15692770799216213, + "grad_norm": 210.5123748779297, + "learning_rate": 1.9166948764966144e-06, + "loss": 18.9453, + "step": 16578 + }, + { + "epoch": 0.1569371740138772, + "grad_norm": 753.578125, + "learning_rate": 1.916682625356189e-06, + "loss": 31.6211, + "step": 16579 + }, + { + "epoch": 0.15694664003559225, + "grad_norm": 480.4267272949219, + "learning_rate": 1.9166703733541417e-06, + "loss": 33.8125, + "step": 16580 + }, + { + "epoch": 0.15695610605730728, + "grad_norm": 363.88690185546875, + "learning_rate": 1.916658120490484e-06, + "loss": 24.5625, + "step": 16581 + }, + { + "epoch": 0.15696557207902234, + "grad_norm": 359.9840087890625, + "learning_rate": 1.9166458667652275e-06, + "loss": 54.3906, + "step": 16582 + }, + { + "epoch": 0.1569750381007374, + "grad_norm": 637.7329711914062, + "learning_rate": 1.916633612178383e-06, + "loss": 49.1562, + "step": 16583 + }, + { + "epoch": 0.15698450412245246, + "grad_norm": 238.236328125, + "learning_rate": 1.916621356729963e-06, + "loss": 26.5078, + "step": 16584 + }, + { + "epoch": 0.15699397014416752, + "grad_norm": 525.8947143554688, + "learning_rate": 1.9166091004199784e-06, + "loss": 58.5938, + "step": 16585 + }, + { + "epoch": 0.15700343616588255, + "grad_norm": 3.1564717292785645, + "learning_rate": 1.9165968432484408e-06, + "loss": 1.0493, + "step": 16586 + }, + { + "epoch": 0.1570129021875976, + "grad_norm": 287.0762939453125, + "learning_rate": 1.916584585215362e-06, + "loss": 17.875, + "step": 16587 + }, + { + "epoch": 0.15702236820931267, + "grad_norm": 277.54901123046875, + "learning_rate": 1.916572326320753e-06, + "loss": 9.0898, + "step": 16588 + }, + { + "epoch": 0.15703183423102773, + "grad_norm": 895.5343627929688, + "learning_rate": 1.9165600665646256e-06, + "loss": 6.7734, + "step": 16589 + }, + { + "epoch": 0.15704130025274277, + "grad_norm": 182.8898162841797, + "learning_rate": 1.9165478059469916e-06, + "loss": 21.6016, + "step": 16590 + }, + { + "epoch": 0.15705076627445783, + "grad_norm": 3.121965169906616, + "learning_rate": 1.9165355444678624e-06, + "loss": 0.8115, + "step": 16591 + }, + { + "epoch": 0.15706023229617289, + "grad_norm": 1630.626708984375, + "learning_rate": 1.916523282127249e-06, + "loss": 43.4648, + "step": 16592 + }, + { + "epoch": 0.15706969831788795, + "grad_norm": 427.5636291503906, + "learning_rate": 1.916511018925164e-06, + "loss": 27.1797, + "step": 16593 + }, + { + "epoch": 0.157079164339603, + "grad_norm": 614.0263061523438, + "learning_rate": 1.9164987548616177e-06, + "loss": 52.0703, + "step": 16594 + }, + { + "epoch": 0.15708863036131804, + "grad_norm": 532.046630859375, + "learning_rate": 1.916486489936622e-06, + "loss": 37.875, + "step": 16595 + }, + { + "epoch": 0.1570980963830331, + "grad_norm": 400.97015380859375, + "learning_rate": 1.9164742241501894e-06, + "loss": 49.0781, + "step": 16596 + }, + { + "epoch": 0.15710756240474816, + "grad_norm": 209.12342834472656, + "learning_rate": 1.91646195750233e-06, + "loss": 8.6914, + "step": 16597 + }, + { + "epoch": 0.15711702842646322, + "grad_norm": 306.1680908203125, + "learning_rate": 1.9164496899930565e-06, + "loss": 40.9531, + "step": 16598 + }, + { + "epoch": 0.15712649444817828, + "grad_norm": 3.032686233520508, + "learning_rate": 1.916437421622379e-06, + "loss": 0.8828, + "step": 16599 + }, + { + "epoch": 0.1571359604698933, + "grad_norm": 468.6991271972656, + "learning_rate": 1.916425152390311e-06, + "loss": 24.4375, + "step": 16600 + }, + { + "epoch": 0.15714542649160837, + "grad_norm": 502.57818603515625, + "learning_rate": 1.9164128822968625e-06, + "loss": 54.4844, + "step": 16601 + }, + { + "epoch": 0.15715489251332343, + "grad_norm": 218.59664916992188, + "learning_rate": 1.9164006113420456e-06, + "loss": 19.1406, + "step": 16602 + }, + { + "epoch": 0.1571643585350385, + "grad_norm": 886.2828979492188, + "learning_rate": 1.9163883395258717e-06, + "loss": 45.0625, + "step": 16603 + }, + { + "epoch": 0.15717382455675352, + "grad_norm": 242.91552734375, + "learning_rate": 1.916376066848352e-06, + "loss": 18.8047, + "step": 16604 + }, + { + "epoch": 0.15718329057846858, + "grad_norm": 229.9532470703125, + "learning_rate": 1.916363793309499e-06, + "loss": 23.3984, + "step": 16605 + }, + { + "epoch": 0.15719275660018364, + "grad_norm": 154.60174560546875, + "learning_rate": 1.9163515189093234e-06, + "loss": 18.3984, + "step": 16606 + }, + { + "epoch": 0.1572022226218987, + "grad_norm": 1134.1595458984375, + "learning_rate": 1.916339243647837e-06, + "loss": 23.6562, + "step": 16607 + }, + { + "epoch": 0.15721168864361376, + "grad_norm": 321.1361083984375, + "learning_rate": 1.9163269675250514e-06, + "loss": 19.2656, + "step": 16608 + }, + { + "epoch": 0.1572211546653288, + "grad_norm": 329.7748718261719, + "learning_rate": 1.916314690540978e-06, + "loss": 31.5781, + "step": 16609 + }, + { + "epoch": 0.15723062068704385, + "grad_norm": 677.8848266601562, + "learning_rate": 1.9163024126956284e-06, + "loss": 33.5312, + "step": 16610 + }, + { + "epoch": 0.1572400867087589, + "grad_norm": 350.4939270019531, + "learning_rate": 1.916290133989014e-06, + "loss": 36.875, + "step": 16611 + }, + { + "epoch": 0.15724955273047397, + "grad_norm": 375.0602111816406, + "learning_rate": 1.9162778544211466e-06, + "loss": 35.0078, + "step": 16612 + }, + { + "epoch": 0.157259018752189, + "grad_norm": 685.34130859375, + "learning_rate": 1.9162655739920375e-06, + "loss": 31.9062, + "step": 16613 + }, + { + "epoch": 0.15726848477390407, + "grad_norm": 198.237548828125, + "learning_rate": 1.9162532927016983e-06, + "loss": 20.1719, + "step": 16614 + }, + { + "epoch": 0.15727795079561913, + "grad_norm": 539.9365234375, + "learning_rate": 1.9162410105501414e-06, + "loss": 21.5547, + "step": 16615 + }, + { + "epoch": 0.15728741681733419, + "grad_norm": 609.3953247070312, + "learning_rate": 1.916228727537377e-06, + "loss": 46.6875, + "step": 16616 + }, + { + "epoch": 0.15729688283904925, + "grad_norm": 228.87779235839844, + "learning_rate": 1.916216443663417e-06, + "loss": 19.6758, + "step": 16617 + }, + { + "epoch": 0.15730634886076428, + "grad_norm": 392.754150390625, + "learning_rate": 1.916204158928273e-06, + "loss": 46.3047, + "step": 16618 + }, + { + "epoch": 0.15731581488247934, + "grad_norm": 206.27879333496094, + "learning_rate": 1.9161918733319573e-06, + "loss": 16.6875, + "step": 16619 + }, + { + "epoch": 0.1573252809041944, + "grad_norm": 483.047607421875, + "learning_rate": 1.9161795868744804e-06, + "loss": 18.0469, + "step": 16620 + }, + { + "epoch": 0.15733474692590946, + "grad_norm": 3.042423963546753, + "learning_rate": 1.9161672995558544e-06, + "loss": 0.9089, + "step": 16621 + }, + { + "epoch": 0.1573442129476245, + "grad_norm": 686.5133666992188, + "learning_rate": 1.916155011376091e-06, + "loss": 53.8438, + "step": 16622 + }, + { + "epoch": 0.15735367896933955, + "grad_norm": 403.2264099121094, + "learning_rate": 1.916142722335201e-06, + "loss": 39.0234, + "step": 16623 + }, + { + "epoch": 0.1573631449910546, + "grad_norm": 173.27500915527344, + "learning_rate": 1.9161304324331972e-06, + "loss": 20.2031, + "step": 16624 + }, + { + "epoch": 0.15737261101276967, + "grad_norm": 637.0682983398438, + "learning_rate": 1.9161181416700897e-06, + "loss": 50.3438, + "step": 16625 + }, + { + "epoch": 0.15738207703448473, + "grad_norm": 436.74090576171875, + "learning_rate": 1.916105850045891e-06, + "loss": 16.7031, + "step": 16626 + }, + { + "epoch": 0.15739154305619976, + "grad_norm": 3.2130258083343506, + "learning_rate": 1.916093557560612e-06, + "loss": 0.843, + "step": 16627 + }, + { + "epoch": 0.15740100907791482, + "grad_norm": 457.4765930175781, + "learning_rate": 1.916081264214265e-06, + "loss": 35.4922, + "step": 16628 + }, + { + "epoch": 0.15741047509962988, + "grad_norm": 733.1608276367188, + "learning_rate": 1.9160689700068614e-06, + "loss": 56.3125, + "step": 16629 + }, + { + "epoch": 0.15741994112134494, + "grad_norm": 838.9122314453125, + "learning_rate": 1.9160566749384125e-06, + "loss": 50.8008, + "step": 16630 + }, + { + "epoch": 0.15742940714305997, + "grad_norm": 507.49365234375, + "learning_rate": 1.9160443790089295e-06, + "loss": 19.2578, + "step": 16631 + }, + { + "epoch": 0.15743887316477503, + "grad_norm": 495.65997314453125, + "learning_rate": 1.916032082218425e-06, + "loss": 31.2031, + "step": 16632 + }, + { + "epoch": 0.1574483391864901, + "grad_norm": 560.3295288085938, + "learning_rate": 1.9160197845669096e-06, + "loss": 56.5, + "step": 16633 + }, + { + "epoch": 0.15745780520820515, + "grad_norm": 436.8662109375, + "learning_rate": 1.916007486054395e-06, + "loss": 51.6875, + "step": 16634 + }, + { + "epoch": 0.1574672712299202, + "grad_norm": 303.48095703125, + "learning_rate": 1.915995186680893e-06, + "loss": 32.5391, + "step": 16635 + }, + { + "epoch": 0.15747673725163525, + "grad_norm": 335.89422607421875, + "learning_rate": 1.9159828864464155e-06, + "loss": 22.5156, + "step": 16636 + }, + { + "epoch": 0.1574862032733503, + "grad_norm": 473.04534912109375, + "learning_rate": 1.915970585350973e-06, + "loss": 36.5938, + "step": 16637 + }, + { + "epoch": 0.15749566929506537, + "grad_norm": 273.86309814453125, + "learning_rate": 1.9159582833945784e-06, + "loss": 19.8203, + "step": 16638 + }, + { + "epoch": 0.15750513531678043, + "grad_norm": 263.9110107421875, + "learning_rate": 1.9159459805772424e-06, + "loss": 19.4141, + "step": 16639 + }, + { + "epoch": 0.15751460133849546, + "grad_norm": 350.29425048828125, + "learning_rate": 1.9159336768989767e-06, + "loss": 17.0469, + "step": 16640 + }, + { + "epoch": 0.15752406736021052, + "grad_norm": 735.8990478515625, + "learning_rate": 1.915921372359793e-06, + "loss": 70.7695, + "step": 16641 + }, + { + "epoch": 0.15753353338192558, + "grad_norm": 427.37255859375, + "learning_rate": 1.9159090669597025e-06, + "loss": 33.0156, + "step": 16642 + }, + { + "epoch": 0.15754299940364064, + "grad_norm": 2.9002742767333984, + "learning_rate": 1.9158967606987177e-06, + "loss": 0.9109, + "step": 16643 + }, + { + "epoch": 0.1575524654253557, + "grad_norm": 189.94271850585938, + "learning_rate": 1.915884453576849e-06, + "loss": 19.0312, + "step": 16644 + }, + { + "epoch": 0.15756193144707073, + "grad_norm": 378.9605407714844, + "learning_rate": 1.9158721455941083e-06, + "loss": 22.5234, + "step": 16645 + }, + { + "epoch": 0.1575713974687858, + "grad_norm": 311.0993347167969, + "learning_rate": 1.915859836750508e-06, + "loss": 16.3867, + "step": 16646 + }, + { + "epoch": 0.15758086349050085, + "grad_norm": 535.3472290039062, + "learning_rate": 1.9158475270460588e-06, + "loss": 17.6484, + "step": 16647 + }, + { + "epoch": 0.1575903295122159, + "grad_norm": 236.158447265625, + "learning_rate": 1.9158352164807723e-06, + "loss": 21.2188, + "step": 16648 + }, + { + "epoch": 0.15759979553393094, + "grad_norm": 565.2423095703125, + "learning_rate": 1.9158229050546602e-06, + "loss": 17.0312, + "step": 16649 + }, + { + "epoch": 0.157609261555646, + "grad_norm": 367.9898376464844, + "learning_rate": 1.915810592767735e-06, + "loss": 35.4844, + "step": 16650 + }, + { + "epoch": 0.15761872757736106, + "grad_norm": 1287.79443359375, + "learning_rate": 1.9157982796200067e-06, + "loss": 72.8203, + "step": 16651 + }, + { + "epoch": 0.15762819359907612, + "grad_norm": 313.0833740234375, + "learning_rate": 1.9157859656114872e-06, + "loss": 19.7266, + "step": 16652 + }, + { + "epoch": 0.15763765962079118, + "grad_norm": 398.5746154785156, + "learning_rate": 1.915773650742189e-06, + "loss": 32.9375, + "step": 16653 + }, + { + "epoch": 0.15764712564250621, + "grad_norm": 240.6494140625, + "learning_rate": 1.915761335012123e-06, + "loss": 23.3359, + "step": 16654 + }, + { + "epoch": 0.15765659166422127, + "grad_norm": 206.2313690185547, + "learning_rate": 1.9157490184213013e-06, + "loss": 24.1406, + "step": 16655 + }, + { + "epoch": 0.15766605768593633, + "grad_norm": 563.893798828125, + "learning_rate": 1.915736700969735e-06, + "loss": 14.9766, + "step": 16656 + }, + { + "epoch": 0.1576755237076514, + "grad_norm": 315.50897216796875, + "learning_rate": 1.9157243826574355e-06, + "loss": 25.2031, + "step": 16657 + }, + { + "epoch": 0.15768498972936643, + "grad_norm": 738.5973510742188, + "learning_rate": 1.9157120634844147e-06, + "loss": 45.75, + "step": 16658 + }, + { + "epoch": 0.15769445575108149, + "grad_norm": 343.2291259765625, + "learning_rate": 1.915699743450684e-06, + "loss": 23.0703, + "step": 16659 + }, + { + "epoch": 0.15770392177279655, + "grad_norm": 319.7482604980469, + "learning_rate": 1.9156874225562555e-06, + "loss": 17.3945, + "step": 16660 + }, + { + "epoch": 0.1577133877945116, + "grad_norm": 171.5980987548828, + "learning_rate": 1.91567510080114e-06, + "loss": 14.9258, + "step": 16661 + }, + { + "epoch": 0.15772285381622667, + "grad_norm": 198.6282501220703, + "learning_rate": 1.9156627781853495e-06, + "loss": 17.4922, + "step": 16662 + }, + { + "epoch": 0.1577323198379417, + "grad_norm": 214.5541534423828, + "learning_rate": 1.915650454708896e-06, + "loss": 14.7812, + "step": 16663 + }, + { + "epoch": 0.15774178585965676, + "grad_norm": 276.412353515625, + "learning_rate": 1.9156381303717902e-06, + "loss": 18.5156, + "step": 16664 + }, + { + "epoch": 0.15775125188137182, + "grad_norm": 212.73748779296875, + "learning_rate": 1.9156258051740443e-06, + "loss": 15.1875, + "step": 16665 + }, + { + "epoch": 0.15776071790308688, + "grad_norm": 153.9167938232422, + "learning_rate": 1.9156134791156698e-06, + "loss": 17.4688, + "step": 16666 + }, + { + "epoch": 0.1577701839248019, + "grad_norm": 582.4440307617188, + "learning_rate": 1.915601152196678e-06, + "loss": 27.8125, + "step": 16667 + }, + { + "epoch": 0.15777964994651697, + "grad_norm": 318.6811828613281, + "learning_rate": 1.9155888244170808e-06, + "loss": 19.4062, + "step": 16668 + }, + { + "epoch": 0.15778911596823203, + "grad_norm": 484.5511474609375, + "learning_rate": 1.915576495776889e-06, + "loss": 72.9062, + "step": 16669 + }, + { + "epoch": 0.1577985819899471, + "grad_norm": 795.3704223632812, + "learning_rate": 1.915564166276116e-06, + "loss": 45.2344, + "step": 16670 + }, + { + "epoch": 0.15780804801166215, + "grad_norm": 503.2070617675781, + "learning_rate": 1.9155518359147717e-06, + "loss": 22.1719, + "step": 16671 + }, + { + "epoch": 0.15781751403337718, + "grad_norm": 371.6351623535156, + "learning_rate": 1.915539504692868e-06, + "loss": 53.9219, + "step": 16672 + }, + { + "epoch": 0.15782698005509224, + "grad_norm": 325.371337890625, + "learning_rate": 1.9155271726104173e-06, + "loss": 23.25, + "step": 16673 + }, + { + "epoch": 0.1578364460768073, + "grad_norm": 409.01165771484375, + "learning_rate": 1.91551483966743e-06, + "loss": 37.0156, + "step": 16674 + }, + { + "epoch": 0.15784591209852236, + "grad_norm": 3.349351167678833, + "learning_rate": 1.9155025058639188e-06, + "loss": 0.9541, + "step": 16675 + }, + { + "epoch": 0.1578553781202374, + "grad_norm": 269.1447448730469, + "learning_rate": 1.9154901711998944e-06, + "loss": 27.1562, + "step": 16676 + }, + { + "epoch": 0.15786484414195245, + "grad_norm": 450.0188293457031, + "learning_rate": 1.9154778356753693e-06, + "loss": 28.1445, + "step": 16677 + }, + { + "epoch": 0.15787431016366751, + "grad_norm": 585.004150390625, + "learning_rate": 1.915465499290354e-06, + "loss": 63.75, + "step": 16678 + }, + { + "epoch": 0.15788377618538257, + "grad_norm": 409.2629699707031, + "learning_rate": 1.9154531620448612e-06, + "loss": 16.8828, + "step": 16679 + }, + { + "epoch": 0.15789324220709763, + "grad_norm": 261.5265808105469, + "learning_rate": 1.9154408239389016e-06, + "loss": 18.8438, + "step": 16680 + }, + { + "epoch": 0.15790270822881267, + "grad_norm": 386.06103515625, + "learning_rate": 1.9154284849724873e-06, + "loss": 27.5469, + "step": 16681 + }, + { + "epoch": 0.15791217425052773, + "grad_norm": 3.2375829219818115, + "learning_rate": 1.91541614514563e-06, + "loss": 0.8945, + "step": 16682 + }, + { + "epoch": 0.15792164027224279, + "grad_norm": 545.2178344726562, + "learning_rate": 1.9154038044583407e-06, + "loss": 21.2422, + "step": 16683 + }, + { + "epoch": 0.15793110629395785, + "grad_norm": 385.0981140136719, + "learning_rate": 1.9153914629106316e-06, + "loss": 28.2031, + "step": 16684 + }, + { + "epoch": 0.1579405723156729, + "grad_norm": 407.74755859375, + "learning_rate": 1.915379120502514e-06, + "loss": 31.4766, + "step": 16685 + }, + { + "epoch": 0.15795003833738794, + "grad_norm": 2.93766713142395, + "learning_rate": 1.9153667772339996e-06, + "loss": 0.9463, + "step": 16686 + }, + { + "epoch": 0.157959504359103, + "grad_norm": 259.8419189453125, + "learning_rate": 1.9153544331051e-06, + "loss": 23.8281, + "step": 16687 + }, + { + "epoch": 0.15796897038081806, + "grad_norm": 314.89801025390625, + "learning_rate": 1.915342088115827e-06, + "loss": 41.0938, + "step": 16688 + }, + { + "epoch": 0.15797843640253312, + "grad_norm": 608.0072021484375, + "learning_rate": 1.9153297422661918e-06, + "loss": 37.7109, + "step": 16689 + }, + { + "epoch": 0.15798790242424815, + "grad_norm": 418.8529357910156, + "learning_rate": 1.9153173955562057e-06, + "loss": 28.6797, + "step": 16690 + }, + { + "epoch": 0.1579973684459632, + "grad_norm": 520.9064331054688, + "learning_rate": 1.9153050479858814e-06, + "loss": 37.5312, + "step": 16691 + }, + { + "epoch": 0.15800683446767827, + "grad_norm": 461.8236389160156, + "learning_rate": 1.9152926995552297e-06, + "loss": 36.0312, + "step": 16692 + }, + { + "epoch": 0.15801630048939333, + "grad_norm": 364.763671875, + "learning_rate": 1.9152803502642626e-06, + "loss": 9.332, + "step": 16693 + }, + { + "epoch": 0.1580257665111084, + "grad_norm": 297.52447509765625, + "learning_rate": 1.9152680001129913e-06, + "loss": 17.543, + "step": 16694 + }, + { + "epoch": 0.15803523253282342, + "grad_norm": 277.56390380859375, + "learning_rate": 1.9152556491014276e-06, + "loss": 26.4531, + "step": 16695 + }, + { + "epoch": 0.15804469855453848, + "grad_norm": 360.039306640625, + "learning_rate": 1.9152432972295833e-06, + "loss": 29.0547, + "step": 16696 + }, + { + "epoch": 0.15805416457625354, + "grad_norm": 622.3440551757812, + "learning_rate": 1.9152309444974696e-06, + "loss": 65.625, + "step": 16697 + }, + { + "epoch": 0.1580636305979686, + "grad_norm": 261.2226867675781, + "learning_rate": 1.9152185909050985e-06, + "loss": 14.7852, + "step": 16698 + }, + { + "epoch": 0.15807309661968363, + "grad_norm": 968.8908081054688, + "learning_rate": 1.9152062364524817e-06, + "loss": 44.2461, + "step": 16699 + }, + { + "epoch": 0.1580825626413987, + "grad_norm": 878.2884521484375, + "learning_rate": 1.91519388113963e-06, + "loss": 26.0156, + "step": 16700 + }, + { + "epoch": 0.15809202866311375, + "grad_norm": 228.66180419921875, + "learning_rate": 1.9151815249665563e-06, + "loss": 17.4414, + "step": 16701 + }, + { + "epoch": 0.1581014946848288, + "grad_norm": 829.703125, + "learning_rate": 1.915169167933271e-06, + "loss": 56.3047, + "step": 16702 + }, + { + "epoch": 0.15811096070654387, + "grad_norm": 557.052490234375, + "learning_rate": 1.9151568100397865e-06, + "loss": 20.5312, + "step": 16703 + }, + { + "epoch": 0.1581204267282589, + "grad_norm": 3.4059550762176514, + "learning_rate": 1.915144451286114e-06, + "loss": 0.9795, + "step": 16704 + }, + { + "epoch": 0.15812989274997397, + "grad_norm": 1066.88037109375, + "learning_rate": 1.915132091672265e-06, + "loss": 48.3984, + "step": 16705 + }, + { + "epoch": 0.15813935877168903, + "grad_norm": 272.6195983886719, + "learning_rate": 1.9151197311982517e-06, + "loss": 15.5195, + "step": 16706 + }, + { + "epoch": 0.15814882479340409, + "grad_norm": 515.0051879882812, + "learning_rate": 1.9151073698640855e-06, + "loss": 28.2812, + "step": 16707 + }, + { + "epoch": 0.15815829081511912, + "grad_norm": 396.6145324707031, + "learning_rate": 1.9150950076697776e-06, + "loss": 8.7812, + "step": 16708 + }, + { + "epoch": 0.15816775683683418, + "grad_norm": 2.904782772064209, + "learning_rate": 1.91508264461534e-06, + "loss": 0.8579, + "step": 16709 + }, + { + "epoch": 0.15817722285854924, + "grad_norm": 1289.9766845703125, + "learning_rate": 1.915070280700784e-06, + "loss": 40.8359, + "step": 16710 + }, + { + "epoch": 0.1581866888802643, + "grad_norm": 587.4703979492188, + "learning_rate": 1.915057915926122e-06, + "loss": 40.875, + "step": 16711 + }, + { + "epoch": 0.15819615490197936, + "grad_norm": 197.47503662109375, + "learning_rate": 1.9150455502913647e-06, + "loss": 20.5859, + "step": 16712 + }, + { + "epoch": 0.1582056209236944, + "grad_norm": 325.01605224609375, + "learning_rate": 1.9150331837965244e-06, + "loss": 24.6719, + "step": 16713 + }, + { + "epoch": 0.15821508694540945, + "grad_norm": 358.17620849609375, + "learning_rate": 1.915020816441612e-06, + "loss": 26.7578, + "step": 16714 + }, + { + "epoch": 0.1582245529671245, + "grad_norm": 266.7516174316406, + "learning_rate": 1.9150084482266398e-06, + "loss": 33.9531, + "step": 16715 + }, + { + "epoch": 0.15823401898883957, + "grad_norm": 199.58592224121094, + "learning_rate": 1.9149960791516195e-06, + "loss": 17.6797, + "step": 16716 + }, + { + "epoch": 0.1582434850105546, + "grad_norm": 245.19932556152344, + "learning_rate": 1.9149837092165617e-06, + "loss": 25.2031, + "step": 16717 + }, + { + "epoch": 0.15825295103226966, + "grad_norm": 450.3280029296875, + "learning_rate": 1.9149713384214792e-06, + "loss": 64.0352, + "step": 16718 + }, + { + "epoch": 0.15826241705398472, + "grad_norm": 3.350313901901245, + "learning_rate": 1.9149589667663834e-06, + "loss": 0.8521, + "step": 16719 + }, + { + "epoch": 0.15827188307569978, + "grad_norm": 438.7167663574219, + "learning_rate": 1.9149465942512854e-06, + "loss": 46.9062, + "step": 16720 + }, + { + "epoch": 0.15828134909741484, + "grad_norm": 371.8592224121094, + "learning_rate": 1.914934220876197e-06, + "loss": 26.7109, + "step": 16721 + }, + { + "epoch": 0.15829081511912987, + "grad_norm": 381.557861328125, + "learning_rate": 1.91492184664113e-06, + "loss": 9.2461, + "step": 16722 + }, + { + "epoch": 0.15830028114084493, + "grad_norm": 456.6440124511719, + "learning_rate": 1.914909471546096e-06, + "loss": 30.2031, + "step": 16723 + }, + { + "epoch": 0.15830974716256, + "grad_norm": 267.07965087890625, + "learning_rate": 1.914897095591107e-06, + "loss": 10.6055, + "step": 16724 + }, + { + "epoch": 0.15831921318427505, + "grad_norm": 526.7637939453125, + "learning_rate": 1.9148847187761735e-06, + "loss": 9.9688, + "step": 16725 + }, + { + "epoch": 0.15832867920599009, + "grad_norm": 234.38644409179688, + "learning_rate": 1.9148723411013085e-06, + "loss": 27.9922, + "step": 16726 + }, + { + "epoch": 0.15833814522770515, + "grad_norm": 292.95458984375, + "learning_rate": 1.9148599625665226e-06, + "loss": 22.5469, + "step": 16727 + }, + { + "epoch": 0.1583476112494202, + "grad_norm": 824.0736694335938, + "learning_rate": 1.9148475831718285e-06, + "loss": 38.1641, + "step": 16728 + }, + { + "epoch": 0.15835707727113527, + "grad_norm": 327.6979064941406, + "learning_rate": 1.9148352029172366e-06, + "loss": 20.1484, + "step": 16729 + }, + { + "epoch": 0.15836654329285033, + "grad_norm": 145.2967529296875, + "learning_rate": 1.914822821802759e-06, + "loss": 15.0547, + "step": 16730 + }, + { + "epoch": 0.15837600931456536, + "grad_norm": 539.2099609375, + "learning_rate": 1.914810439828408e-06, + "loss": 57.2109, + "step": 16731 + }, + { + "epoch": 0.15838547533628042, + "grad_norm": 3.2426750659942627, + "learning_rate": 1.9147980569941943e-06, + "loss": 0.9316, + "step": 16732 + }, + { + "epoch": 0.15839494135799548, + "grad_norm": 3.0679337978363037, + "learning_rate": 1.9147856733001303e-06, + "loss": 0.792, + "step": 16733 + }, + { + "epoch": 0.15840440737971054, + "grad_norm": 355.65570068359375, + "learning_rate": 1.914773288746227e-06, + "loss": 20.5156, + "step": 16734 + }, + { + "epoch": 0.15841387340142557, + "grad_norm": 439.00390625, + "learning_rate": 1.9147609033324965e-06, + "loss": 48.875, + "step": 16735 + }, + { + "epoch": 0.15842333942314063, + "grad_norm": 203.95919799804688, + "learning_rate": 1.9147485170589503e-06, + "loss": 17.9297, + "step": 16736 + }, + { + "epoch": 0.1584328054448557, + "grad_norm": 150.6885223388672, + "learning_rate": 1.9147361299255996e-06, + "loss": 17.5625, + "step": 16737 + }, + { + "epoch": 0.15844227146657075, + "grad_norm": 832.0249633789062, + "learning_rate": 1.9147237419324567e-06, + "loss": 57.6562, + "step": 16738 + }, + { + "epoch": 0.1584517374882858, + "grad_norm": 294.4139709472656, + "learning_rate": 1.914711353079533e-06, + "loss": 28.7031, + "step": 16739 + }, + { + "epoch": 0.15846120351000084, + "grad_norm": 292.7087097167969, + "learning_rate": 1.91469896336684e-06, + "loss": 20.5312, + "step": 16740 + }, + { + "epoch": 0.1584706695317159, + "grad_norm": 3.4786717891693115, + "learning_rate": 1.91468657279439e-06, + "loss": 0.9482, + "step": 16741 + }, + { + "epoch": 0.15848013555343096, + "grad_norm": 288.1067199707031, + "learning_rate": 1.9146741813621933e-06, + "loss": 38.9531, + "step": 16742 + }, + { + "epoch": 0.15848960157514602, + "grad_norm": 275.5558166503906, + "learning_rate": 1.914661789070263e-06, + "loss": 44.5781, + "step": 16743 + }, + { + "epoch": 0.15849906759686105, + "grad_norm": 871.09716796875, + "learning_rate": 1.9146493959186098e-06, + "loss": 45.5234, + "step": 16744 + }, + { + "epoch": 0.15850853361857611, + "grad_norm": 217.4517059326172, + "learning_rate": 1.9146370019072457e-06, + "loss": 18.2188, + "step": 16745 + }, + { + "epoch": 0.15851799964029117, + "grad_norm": 583.5205688476562, + "learning_rate": 1.9146246070361826e-06, + "loss": 48.6016, + "step": 16746 + }, + { + "epoch": 0.15852746566200623, + "grad_norm": 320.3880615234375, + "learning_rate": 1.9146122113054314e-06, + "loss": 26.8359, + "step": 16747 + }, + { + "epoch": 0.1585369316837213, + "grad_norm": 306.0249938964844, + "learning_rate": 1.9145998147150045e-06, + "loss": 29.2734, + "step": 16748 + }, + { + "epoch": 0.15854639770543633, + "grad_norm": 349.4996337890625, + "learning_rate": 1.914587417264913e-06, + "loss": 16.4766, + "step": 16749 + }, + { + "epoch": 0.15855586372715139, + "grad_norm": 327.65399169921875, + "learning_rate": 1.9145750189551695e-06, + "loss": 41.8594, + "step": 16750 + }, + { + "epoch": 0.15856532974886645, + "grad_norm": 269.5568542480469, + "learning_rate": 1.9145626197857843e-06, + "loss": 26.3125, + "step": 16751 + }, + { + "epoch": 0.1585747957705815, + "grad_norm": 530.5887451171875, + "learning_rate": 1.9145502197567704e-06, + "loss": 26.8281, + "step": 16752 + }, + { + "epoch": 0.15858426179229654, + "grad_norm": 587.6936645507812, + "learning_rate": 1.9145378188681383e-06, + "loss": 15.4102, + "step": 16753 + }, + { + "epoch": 0.1585937278140116, + "grad_norm": 126.08260345458984, + "learning_rate": 1.9145254171199003e-06, + "loss": 21.4375, + "step": 16754 + }, + { + "epoch": 0.15860319383572666, + "grad_norm": 1099.384521484375, + "learning_rate": 1.9145130145120675e-06, + "loss": 21.7305, + "step": 16755 + }, + { + "epoch": 0.15861265985744172, + "grad_norm": 449.0958557128906, + "learning_rate": 1.9145006110446524e-06, + "loss": 9.0586, + "step": 16756 + }, + { + "epoch": 0.15862212587915678, + "grad_norm": 493.82916259765625, + "learning_rate": 1.914488206717666e-06, + "loss": 36.2422, + "step": 16757 + }, + { + "epoch": 0.1586315919008718, + "grad_norm": 308.0479736328125, + "learning_rate": 1.9144758015311204e-06, + "loss": 27.4922, + "step": 16758 + }, + { + "epoch": 0.15864105792258687, + "grad_norm": 314.78985595703125, + "learning_rate": 1.914463395485027e-06, + "loss": 37.8125, + "step": 16759 + }, + { + "epoch": 0.15865052394430193, + "grad_norm": 687.5770874023438, + "learning_rate": 1.9144509885793974e-06, + "loss": 45.25, + "step": 16760 + }, + { + "epoch": 0.158659989966017, + "grad_norm": 302.0760803222656, + "learning_rate": 1.9144385808142435e-06, + "loss": 41.6641, + "step": 16761 + }, + { + "epoch": 0.15866945598773202, + "grad_norm": 277.14703369140625, + "learning_rate": 1.9144261721895765e-06, + "loss": 30.8984, + "step": 16762 + }, + { + "epoch": 0.15867892200944708, + "grad_norm": 214.63186645507812, + "learning_rate": 1.9144137627054086e-06, + "loss": 21.2266, + "step": 16763 + }, + { + "epoch": 0.15868838803116214, + "grad_norm": 162.1470947265625, + "learning_rate": 1.9144013523617513e-06, + "loss": 16.1641, + "step": 16764 + }, + { + "epoch": 0.1586978540528772, + "grad_norm": 192.6674346923828, + "learning_rate": 1.914388941158616e-06, + "loss": 14.1953, + "step": 16765 + }, + { + "epoch": 0.15870732007459226, + "grad_norm": 206.794921875, + "learning_rate": 1.9143765290960147e-06, + "loss": 18.0625, + "step": 16766 + }, + { + "epoch": 0.1587167860963073, + "grad_norm": 332.32342529296875, + "learning_rate": 1.914364116173959e-06, + "loss": 25.7578, + "step": 16767 + }, + { + "epoch": 0.15872625211802235, + "grad_norm": 615.2371215820312, + "learning_rate": 1.9143517023924606e-06, + "loss": 20.5156, + "step": 16768 + }, + { + "epoch": 0.15873571813973741, + "grad_norm": 323.914794921875, + "learning_rate": 1.914339287751531e-06, + "loss": 38.7031, + "step": 16769 + }, + { + "epoch": 0.15874518416145247, + "grad_norm": 247.9381561279297, + "learning_rate": 1.914326872251182e-06, + "loss": 24.25, + "step": 16770 + }, + { + "epoch": 0.15875465018316753, + "grad_norm": 259.513427734375, + "learning_rate": 1.914314455891425e-06, + "loss": 34.1562, + "step": 16771 + }, + { + "epoch": 0.15876411620488257, + "grad_norm": 1694.21240234375, + "learning_rate": 1.9143020386722723e-06, + "loss": 22.4766, + "step": 16772 + }, + { + "epoch": 0.15877358222659763, + "grad_norm": 263.5105895996094, + "learning_rate": 1.9142896205937346e-06, + "loss": 31.8281, + "step": 16773 + }, + { + "epoch": 0.15878304824831269, + "grad_norm": 427.7369079589844, + "learning_rate": 1.914277201655825e-06, + "loss": 36.5625, + "step": 16774 + }, + { + "epoch": 0.15879251427002775, + "grad_norm": 278.5095520019531, + "learning_rate": 1.9142647818585535e-06, + "loss": 18.3281, + "step": 16775 + }, + { + "epoch": 0.15880198029174278, + "grad_norm": 119.75418853759766, + "learning_rate": 1.9142523612019326e-06, + "loss": 11.7812, + "step": 16776 + }, + { + "epoch": 0.15881144631345784, + "grad_norm": 752.1878662109375, + "learning_rate": 1.9142399396859742e-06, + "loss": 45.0312, + "step": 16777 + }, + { + "epoch": 0.1588209123351729, + "grad_norm": 403.5720520019531, + "learning_rate": 1.9142275173106896e-06, + "loss": 19.2656, + "step": 16778 + }, + { + "epoch": 0.15883037835688796, + "grad_norm": 370.3413391113281, + "learning_rate": 1.914215094076091e-06, + "loss": 17.6797, + "step": 16779 + }, + { + "epoch": 0.15883984437860302, + "grad_norm": 248.7978515625, + "learning_rate": 1.914202669982189e-06, + "loss": 19.4141, + "step": 16780 + }, + { + "epoch": 0.15884931040031805, + "grad_norm": 3.473383903503418, + "learning_rate": 1.914190245028997e-06, + "loss": 1.0474, + "step": 16781 + }, + { + "epoch": 0.1588587764220331, + "grad_norm": 460.45263671875, + "learning_rate": 1.914177819216525e-06, + "loss": 23.4375, + "step": 16782 + }, + { + "epoch": 0.15886824244374817, + "grad_norm": 497.06591796875, + "learning_rate": 1.914165392544785e-06, + "loss": 36.125, + "step": 16783 + }, + { + "epoch": 0.15887770846546323, + "grad_norm": 466.79791259765625, + "learning_rate": 1.9141529650137895e-06, + "loss": 49.7031, + "step": 16784 + }, + { + "epoch": 0.15888717448717826, + "grad_norm": 490.60211181640625, + "learning_rate": 1.9141405366235493e-06, + "loss": 11.5312, + "step": 16785 + }, + { + "epoch": 0.15889664050889332, + "grad_norm": 274.3878479003906, + "learning_rate": 1.914128107374077e-06, + "loss": 10.9766, + "step": 16786 + }, + { + "epoch": 0.15890610653060838, + "grad_norm": 3.437119483947754, + "learning_rate": 1.914115677265383e-06, + "loss": 0.8721, + "step": 16787 + }, + { + "epoch": 0.15891557255232344, + "grad_norm": 416.5845642089844, + "learning_rate": 1.9141032462974802e-06, + "loss": 27.3203, + "step": 16788 + }, + { + "epoch": 0.1589250385740385, + "grad_norm": 433.0208435058594, + "learning_rate": 1.91409081447038e-06, + "loss": 14.5234, + "step": 16789 + }, + { + "epoch": 0.15893450459575353, + "grad_norm": 438.4230651855469, + "learning_rate": 1.9140783817840936e-06, + "loss": 61.4375, + "step": 16790 + }, + { + "epoch": 0.1589439706174686, + "grad_norm": 458.2090759277344, + "learning_rate": 1.914065948238633e-06, + "loss": 19.2148, + "step": 16791 + }, + { + "epoch": 0.15895343663918365, + "grad_norm": 501.81927490234375, + "learning_rate": 1.91405351383401e-06, + "loss": 40.9688, + "step": 16792 + }, + { + "epoch": 0.15896290266089871, + "grad_norm": 282.976806640625, + "learning_rate": 1.914041078570236e-06, + "loss": 36.5938, + "step": 16793 + }, + { + "epoch": 0.15897236868261375, + "grad_norm": 1047.6583251953125, + "learning_rate": 1.914028642447323e-06, + "loss": 42.4688, + "step": 16794 + }, + { + "epoch": 0.1589818347043288, + "grad_norm": 2.8649582862854004, + "learning_rate": 1.9140162054652824e-06, + "loss": 0.9424, + "step": 16795 + }, + { + "epoch": 0.15899130072604387, + "grad_norm": 560.88134765625, + "learning_rate": 1.9140037676241265e-06, + "loss": 38.5312, + "step": 16796 + }, + { + "epoch": 0.15900076674775893, + "grad_norm": 3.7380740642547607, + "learning_rate": 1.913991328923866e-06, + "loss": 1.0503, + "step": 16797 + }, + { + "epoch": 0.15901023276947399, + "grad_norm": 438.25543212890625, + "learning_rate": 1.913978889364513e-06, + "loss": 41.1719, + "step": 16798 + }, + { + "epoch": 0.15901969879118902, + "grad_norm": 405.7065734863281, + "learning_rate": 1.9139664489460795e-06, + "loss": 40.3125, + "step": 16799 + }, + { + "epoch": 0.15902916481290408, + "grad_norm": 297.6296081542969, + "learning_rate": 1.913954007668577e-06, + "loss": 10.5469, + "step": 16800 + }, + { + "epoch": 0.15903863083461914, + "grad_norm": 829.7300415039062, + "learning_rate": 1.9139415655320175e-06, + "loss": 8.4609, + "step": 16801 + }, + { + "epoch": 0.1590480968563342, + "grad_norm": 467.1604919433594, + "learning_rate": 1.913929122536412e-06, + "loss": 41.8906, + "step": 16802 + }, + { + "epoch": 0.15905756287804923, + "grad_norm": 421.3735046386719, + "learning_rate": 1.9139166786817727e-06, + "loss": 38.1172, + "step": 16803 + }, + { + "epoch": 0.1590670288997643, + "grad_norm": 557.0076293945312, + "learning_rate": 1.9139042339681112e-06, + "loss": 37.1055, + "step": 16804 + }, + { + "epoch": 0.15907649492147935, + "grad_norm": 166.70140075683594, + "learning_rate": 1.913891788395439e-06, + "loss": 18.543, + "step": 16805 + }, + { + "epoch": 0.1590859609431944, + "grad_norm": 414.6969299316406, + "learning_rate": 1.9138793419637684e-06, + "loss": 44.6406, + "step": 16806 + }, + { + "epoch": 0.15909542696490947, + "grad_norm": 3.4006152153015137, + "learning_rate": 1.9138668946731104e-06, + "loss": 0.856, + "step": 16807 + }, + { + "epoch": 0.1591048929866245, + "grad_norm": 340.63897705078125, + "learning_rate": 1.913854446523477e-06, + "loss": 26.3945, + "step": 16808 + }, + { + "epoch": 0.15911435900833956, + "grad_norm": 226.16517639160156, + "learning_rate": 1.9138419975148795e-06, + "loss": 18.6211, + "step": 16809 + }, + { + "epoch": 0.15912382503005462, + "grad_norm": 174.06768798828125, + "learning_rate": 1.9138295476473305e-06, + "loss": 25.1562, + "step": 16810 + }, + { + "epoch": 0.15913329105176968, + "grad_norm": 3.0575830936431885, + "learning_rate": 1.913817096920841e-06, + "loss": 0.9136, + "step": 16811 + }, + { + "epoch": 0.15914275707348471, + "grad_norm": 392.5357360839844, + "learning_rate": 1.9138046453354225e-06, + "loss": 41.1875, + "step": 16812 + }, + { + "epoch": 0.15915222309519977, + "grad_norm": 517.9003295898438, + "learning_rate": 1.913792192891088e-06, + "loss": 46.2188, + "step": 16813 + }, + { + "epoch": 0.15916168911691483, + "grad_norm": 783.1094970703125, + "learning_rate": 1.9137797395878473e-06, + "loss": 57.8438, + "step": 16814 + }, + { + "epoch": 0.1591711551386299, + "grad_norm": 398.61834716796875, + "learning_rate": 1.9137672854257137e-06, + "loss": 56.9375, + "step": 16815 + }, + { + "epoch": 0.15918062116034495, + "grad_norm": 300.68438720703125, + "learning_rate": 1.913754830404698e-06, + "loss": 31.0547, + "step": 16816 + }, + { + "epoch": 0.15919008718205999, + "grad_norm": 2904.936279296875, + "learning_rate": 1.913742374524812e-06, + "loss": 8.8164, + "step": 16817 + }, + { + "epoch": 0.15919955320377505, + "grad_norm": 785.5325317382812, + "learning_rate": 1.913729917786068e-06, + "loss": 9.6719, + "step": 16818 + }, + { + "epoch": 0.1592090192254901, + "grad_norm": 682.7310791015625, + "learning_rate": 1.9137174601884778e-06, + "loss": 22.0078, + "step": 16819 + }, + { + "epoch": 0.15921848524720517, + "grad_norm": 1154.6016845703125, + "learning_rate": 1.9137050017320514e-06, + "loss": 25.5352, + "step": 16820 + }, + { + "epoch": 0.1592279512689202, + "grad_norm": 223.41993713378906, + "learning_rate": 1.9136925424168027e-06, + "loss": 24.5625, + "step": 16821 + }, + { + "epoch": 0.15923741729063526, + "grad_norm": 714.4197387695312, + "learning_rate": 1.9136800822427422e-06, + "loss": 33.7891, + "step": 16822 + }, + { + "epoch": 0.15924688331235032, + "grad_norm": 243.70887756347656, + "learning_rate": 1.913667621209882e-06, + "loss": 25.2578, + "step": 16823 + }, + { + "epoch": 0.15925634933406538, + "grad_norm": 373.1144714355469, + "learning_rate": 1.913655159318233e-06, + "loss": 26.875, + "step": 16824 + }, + { + "epoch": 0.15926581535578044, + "grad_norm": 160.59503173828125, + "learning_rate": 1.913642696567808e-06, + "loss": 20.5938, + "step": 16825 + }, + { + "epoch": 0.15927528137749547, + "grad_norm": 470.7149353027344, + "learning_rate": 1.9136302329586187e-06, + "loss": 42.9453, + "step": 16826 + }, + { + "epoch": 0.15928474739921053, + "grad_norm": 813.3823852539062, + "learning_rate": 1.913617768490676e-06, + "loss": 51.25, + "step": 16827 + }, + { + "epoch": 0.1592942134209256, + "grad_norm": 775.14794921875, + "learning_rate": 1.913605303163992e-06, + "loss": 50.0781, + "step": 16828 + }, + { + "epoch": 0.15930367944264065, + "grad_norm": 377.03057861328125, + "learning_rate": 1.913592836978579e-06, + "loss": 51.5312, + "step": 16829 + }, + { + "epoch": 0.15931314546435568, + "grad_norm": 319.7200927734375, + "learning_rate": 1.9135803699344476e-06, + "loss": 14.3906, + "step": 16830 + }, + { + "epoch": 0.15932261148607074, + "grad_norm": 202.57498168945312, + "learning_rate": 1.9135679020316104e-06, + "loss": 27.3125, + "step": 16831 + }, + { + "epoch": 0.1593320775077858, + "grad_norm": 250.65184020996094, + "learning_rate": 1.9135554332700784e-06, + "loss": 14.5625, + "step": 16832 + }, + { + "epoch": 0.15934154352950086, + "grad_norm": 239.19822692871094, + "learning_rate": 1.913542963649864e-06, + "loss": 15.8125, + "step": 16833 + }, + { + "epoch": 0.15935100955121592, + "grad_norm": 368.4674987792969, + "learning_rate": 1.913530493170978e-06, + "loss": 18.6289, + "step": 16834 + }, + { + "epoch": 0.15936047557293095, + "grad_norm": 267.605712890625, + "learning_rate": 1.913518021833434e-06, + "loss": 16.8203, + "step": 16835 + }, + { + "epoch": 0.15936994159464601, + "grad_norm": 550.9542846679688, + "learning_rate": 1.9135055496372416e-06, + "loss": 49.7109, + "step": 16836 + }, + { + "epoch": 0.15937940761636107, + "grad_norm": 235.47161865234375, + "learning_rate": 1.9134930765824138e-06, + "loss": 18.0703, + "step": 16837 + }, + { + "epoch": 0.15938887363807613, + "grad_norm": 250.7138671875, + "learning_rate": 1.9134806026689618e-06, + "loss": 20.6094, + "step": 16838 + }, + { + "epoch": 0.15939833965979117, + "grad_norm": 2.096381664276123, + "learning_rate": 1.9134681278968974e-06, + "loss": 0.6552, + "step": 16839 + }, + { + "epoch": 0.15940780568150623, + "grad_norm": 360.9614562988281, + "learning_rate": 1.9134556522662322e-06, + "loss": 22.6797, + "step": 16840 + }, + { + "epoch": 0.15941727170322129, + "grad_norm": 1087.54638671875, + "learning_rate": 1.913443175776979e-06, + "loss": 42.3203, + "step": 16841 + }, + { + "epoch": 0.15942673772493635, + "grad_norm": 292.4931945800781, + "learning_rate": 1.9134306984291475e-06, + "loss": 19.1797, + "step": 16842 + }, + { + "epoch": 0.1594362037466514, + "grad_norm": 373.0649108886719, + "learning_rate": 1.9134182202227512e-06, + "loss": 42.6719, + "step": 16843 + }, + { + "epoch": 0.15944566976836644, + "grad_norm": 1444.93505859375, + "learning_rate": 1.9134057411578007e-06, + "loss": 14.5234, + "step": 16844 + }, + { + "epoch": 0.1594551357900815, + "grad_norm": 413.7614440917969, + "learning_rate": 1.913393261234309e-06, + "loss": 37.9688, + "step": 16845 + }, + { + "epoch": 0.15946460181179656, + "grad_norm": 404.218017578125, + "learning_rate": 1.9133807804522863e-06, + "loss": 19.4844, + "step": 16846 + }, + { + "epoch": 0.15947406783351162, + "grad_norm": 1287.6866455078125, + "learning_rate": 1.9133682988117457e-06, + "loss": 58.3594, + "step": 16847 + }, + { + "epoch": 0.15948353385522665, + "grad_norm": 482.8599548339844, + "learning_rate": 1.913355816312698e-06, + "loss": 32.2344, + "step": 16848 + }, + { + "epoch": 0.1594929998769417, + "grad_norm": 352.47015380859375, + "learning_rate": 1.913343332955155e-06, + "loss": 20.1094, + "step": 16849 + }, + { + "epoch": 0.15950246589865677, + "grad_norm": 3.843177080154419, + "learning_rate": 1.913330848739129e-06, + "loss": 1.0, + "step": 16850 + }, + { + "epoch": 0.15951193192037183, + "grad_norm": 461.7691955566406, + "learning_rate": 1.9133183636646314e-06, + "loss": 37.4297, + "step": 16851 + }, + { + "epoch": 0.1595213979420869, + "grad_norm": 571.4805908203125, + "learning_rate": 1.9133058777316743e-06, + "loss": 46.0938, + "step": 16852 + }, + { + "epoch": 0.15953086396380192, + "grad_norm": 645.2913818359375, + "learning_rate": 1.9132933909402683e-06, + "loss": 66.125, + "step": 16853 + }, + { + "epoch": 0.15954032998551698, + "grad_norm": 654.1763916015625, + "learning_rate": 1.9132809032904267e-06, + "loss": 38.4531, + "step": 16854 + }, + { + "epoch": 0.15954979600723204, + "grad_norm": 445.5452880859375, + "learning_rate": 1.91326841478216e-06, + "loss": 36.7969, + "step": 16855 + }, + { + "epoch": 0.1595592620289471, + "grad_norm": 402.40789794921875, + "learning_rate": 1.9132559254154807e-06, + "loss": 16.1328, + "step": 16856 + }, + { + "epoch": 0.15956872805066216, + "grad_norm": 494.7954406738281, + "learning_rate": 1.9132434351903997e-06, + "loss": 21.1641, + "step": 16857 + }, + { + "epoch": 0.1595781940723772, + "grad_norm": 230.38267517089844, + "learning_rate": 1.91323094410693e-06, + "loss": 24.4453, + "step": 16858 + }, + { + "epoch": 0.15958766009409225, + "grad_norm": 276.19317626953125, + "learning_rate": 1.913218452165082e-06, + "loss": 20.1953, + "step": 16859 + }, + { + "epoch": 0.15959712611580731, + "grad_norm": 250.1018524169922, + "learning_rate": 1.913205959364868e-06, + "loss": 25.6172, + "step": 16860 + }, + { + "epoch": 0.15960659213752237, + "grad_norm": 976.7036743164062, + "learning_rate": 1.9131934657063006e-06, + "loss": 32.7422, + "step": 16861 + }, + { + "epoch": 0.1596160581592374, + "grad_norm": 516.8192749023438, + "learning_rate": 1.91318097118939e-06, + "loss": 37.0, + "step": 16862 + }, + { + "epoch": 0.15962552418095247, + "grad_norm": 789.1393432617188, + "learning_rate": 1.9131684758141493e-06, + "loss": 19.2109, + "step": 16863 + }, + { + "epoch": 0.15963499020266753, + "grad_norm": 196.55267333984375, + "learning_rate": 1.9131559795805892e-06, + "loss": 17.4375, + "step": 16864 + }, + { + "epoch": 0.15964445622438259, + "grad_norm": 426.2040710449219, + "learning_rate": 1.913143482488722e-06, + "loss": 16.2344, + "step": 16865 + }, + { + "epoch": 0.15965392224609765, + "grad_norm": 384.6396789550781, + "learning_rate": 1.9131309845385593e-06, + "loss": 25.4688, + "step": 16866 + }, + { + "epoch": 0.15966338826781268, + "grad_norm": 247.65512084960938, + "learning_rate": 1.9131184857301127e-06, + "loss": 12.7461, + "step": 16867 + }, + { + "epoch": 0.15967285428952774, + "grad_norm": 244.25123596191406, + "learning_rate": 1.9131059860633943e-06, + "loss": 25.4297, + "step": 16868 + }, + { + "epoch": 0.1596823203112428, + "grad_norm": 257.3616943359375, + "learning_rate": 1.913093485538416e-06, + "loss": 22.2734, + "step": 16869 + }, + { + "epoch": 0.15969178633295786, + "grad_norm": 2.965759038925171, + "learning_rate": 1.9130809841551887e-06, + "loss": 0.9893, + "step": 16870 + }, + { + "epoch": 0.1597012523546729, + "grad_norm": 341.5444641113281, + "learning_rate": 1.9130684819137248e-06, + "loss": 24.0938, + "step": 16871 + }, + { + "epoch": 0.15971071837638795, + "grad_norm": 534.2518920898438, + "learning_rate": 1.9130559788140365e-06, + "loss": 55.2188, + "step": 16872 + }, + { + "epoch": 0.159720184398103, + "grad_norm": 333.8830261230469, + "learning_rate": 1.9130434748561344e-06, + "loss": 36.5, + "step": 16873 + }, + { + "epoch": 0.15972965041981807, + "grad_norm": 2.5867865085601807, + "learning_rate": 1.913030970040031e-06, + "loss": 0.8126, + "step": 16874 + }, + { + "epoch": 0.15973911644153313, + "grad_norm": 787.484619140625, + "learning_rate": 1.9130184643657376e-06, + "loss": 30.3906, + "step": 16875 + }, + { + "epoch": 0.15974858246324816, + "grad_norm": 477.4033508300781, + "learning_rate": 1.9130059578332666e-06, + "loss": 42.6562, + "step": 16876 + }, + { + "epoch": 0.15975804848496322, + "grad_norm": 207.07969665527344, + "learning_rate": 1.9129934504426292e-06, + "loss": 20.9609, + "step": 16877 + }, + { + "epoch": 0.15976751450667828, + "grad_norm": 400.5614013671875, + "learning_rate": 1.9129809421938374e-06, + "loss": 23.1406, + "step": 16878 + }, + { + "epoch": 0.15977698052839334, + "grad_norm": 252.4215087890625, + "learning_rate": 1.912968433086903e-06, + "loss": 25.9219, + "step": 16879 + }, + { + "epoch": 0.15978644655010837, + "grad_norm": 481.4036865234375, + "learning_rate": 1.9129559231218373e-06, + "loss": 20.9336, + "step": 16880 + }, + { + "epoch": 0.15979591257182343, + "grad_norm": 593.870361328125, + "learning_rate": 1.9129434122986527e-06, + "loss": 49.5312, + "step": 16881 + }, + { + "epoch": 0.1598053785935385, + "grad_norm": 336.33331298828125, + "learning_rate": 1.912930900617361e-06, + "loss": 40.3594, + "step": 16882 + }, + { + "epoch": 0.15981484461525355, + "grad_norm": 3.0921714305877686, + "learning_rate": 1.912918388077973e-06, + "loss": 0.8838, + "step": 16883 + }, + { + "epoch": 0.15982431063696861, + "grad_norm": 553.4419555664062, + "learning_rate": 1.9129058746805015e-06, + "loss": 48.6172, + "step": 16884 + }, + { + "epoch": 0.15983377665868365, + "grad_norm": 285.0718078613281, + "learning_rate": 1.9128933604249577e-06, + "loss": 29.3125, + "step": 16885 + }, + { + "epoch": 0.1598432426803987, + "grad_norm": 501.3463439941406, + "learning_rate": 1.9128808453113534e-06, + "loss": 38.1094, + "step": 16886 + }, + { + "epoch": 0.15985270870211377, + "grad_norm": 2.746562957763672, + "learning_rate": 1.912868329339701e-06, + "loss": 0.7812, + "step": 16887 + }, + { + "epoch": 0.15986217472382883, + "grad_norm": 3.340176582336426, + "learning_rate": 1.9128558125100114e-06, + "loss": 0.8965, + "step": 16888 + }, + { + "epoch": 0.15987164074554386, + "grad_norm": 239.31446838378906, + "learning_rate": 1.912843294822297e-06, + "loss": 25.9453, + "step": 16889 + }, + { + "epoch": 0.15988110676725892, + "grad_norm": 187.79762268066406, + "learning_rate": 1.9128307762765694e-06, + "loss": 20.8125, + "step": 16890 + }, + { + "epoch": 0.15989057278897398, + "grad_norm": 403.58453369140625, + "learning_rate": 1.9128182568728394e-06, + "loss": 19.4297, + "step": 16891 + }, + { + "epoch": 0.15990003881068904, + "grad_norm": 283.1717224121094, + "learning_rate": 1.9128057366111205e-06, + "loss": 21.8203, + "step": 16892 + }, + { + "epoch": 0.1599095048324041, + "grad_norm": 581.7300415039062, + "learning_rate": 1.912793215491423e-06, + "loss": 37.1719, + "step": 16893 + }, + { + "epoch": 0.15991897085411913, + "grad_norm": 3.4857993125915527, + "learning_rate": 1.9127806935137597e-06, + "loss": 0.853, + "step": 16894 + }, + { + "epoch": 0.1599284368758342, + "grad_norm": 628.8172607421875, + "learning_rate": 1.9127681706781414e-06, + "loss": 55.5, + "step": 16895 + }, + { + "epoch": 0.15993790289754925, + "grad_norm": 395.4135437011719, + "learning_rate": 1.9127556469845813e-06, + "loss": 18.4453, + "step": 16896 + }, + { + "epoch": 0.1599473689192643, + "grad_norm": 3.3371591567993164, + "learning_rate": 1.9127431224330895e-06, + "loss": 0.9829, + "step": 16897 + }, + { + "epoch": 0.15995683494097934, + "grad_norm": 297.6080322265625, + "learning_rate": 1.9127305970236786e-06, + "loss": 18.7266, + "step": 16898 + }, + { + "epoch": 0.1599663009626944, + "grad_norm": 314.972412109375, + "learning_rate": 1.9127180707563606e-06, + "loss": 18.0234, + "step": 16899 + }, + { + "epoch": 0.15997576698440946, + "grad_norm": 462.6767883300781, + "learning_rate": 1.912705543631147e-06, + "loss": 20.0938, + "step": 16900 + }, + { + "epoch": 0.15998523300612452, + "grad_norm": 342.5396728515625, + "learning_rate": 1.9126930156480493e-06, + "loss": 25.2734, + "step": 16901 + }, + { + "epoch": 0.15999469902783958, + "grad_norm": 449.50421142578125, + "learning_rate": 1.9126804868070797e-06, + "loss": 44.0, + "step": 16902 + }, + { + "epoch": 0.16000416504955461, + "grad_norm": 849.6061401367188, + "learning_rate": 1.91266795710825e-06, + "loss": 20.8242, + "step": 16903 + }, + { + "epoch": 0.16001363107126967, + "grad_norm": 704.3296508789062, + "learning_rate": 1.912655426551571e-06, + "loss": 73.2812, + "step": 16904 + }, + { + "epoch": 0.16002309709298473, + "grad_norm": 1293.2872314453125, + "learning_rate": 1.912642895137056e-06, + "loss": 40.4375, + "step": 16905 + }, + { + "epoch": 0.1600325631146998, + "grad_norm": 448.6490173339844, + "learning_rate": 1.912630362864716e-06, + "loss": 18.8906, + "step": 16906 + }, + { + "epoch": 0.16004202913641483, + "grad_norm": 470.2567138671875, + "learning_rate": 1.912617829734563e-06, + "loss": 34.0469, + "step": 16907 + }, + { + "epoch": 0.1600514951581299, + "grad_norm": 206.54473876953125, + "learning_rate": 1.9126052957466082e-06, + "loss": 13.2383, + "step": 16908 + }, + { + "epoch": 0.16006096117984495, + "grad_norm": 496.5941162109375, + "learning_rate": 1.9125927609008637e-06, + "loss": 33.6719, + "step": 16909 + }, + { + "epoch": 0.16007042720156, + "grad_norm": 273.2015075683594, + "learning_rate": 1.9125802251973416e-06, + "loss": 16.0039, + "step": 16910 + }, + { + "epoch": 0.16007989322327507, + "grad_norm": 360.2948303222656, + "learning_rate": 1.9125676886360534e-06, + "loss": 29.1406, + "step": 16911 + }, + { + "epoch": 0.1600893592449901, + "grad_norm": 2.364194393157959, + "learning_rate": 1.912555151217011e-06, + "loss": 0.7612, + "step": 16912 + }, + { + "epoch": 0.16009882526670516, + "grad_norm": 370.0992431640625, + "learning_rate": 1.9125426129402264e-06, + "loss": 18.7617, + "step": 16913 + }, + { + "epoch": 0.16010829128842022, + "grad_norm": 435.2680358886719, + "learning_rate": 1.9125300738057105e-06, + "loss": 25.5938, + "step": 16914 + }, + { + "epoch": 0.16011775731013528, + "grad_norm": 261.8534240722656, + "learning_rate": 1.9125175338134763e-06, + "loss": 21.2734, + "step": 16915 + }, + { + "epoch": 0.1601272233318503, + "grad_norm": 433.4310302734375, + "learning_rate": 1.9125049929635345e-06, + "loss": 36.3594, + "step": 16916 + }, + { + "epoch": 0.16013668935356537, + "grad_norm": 342.3426208496094, + "learning_rate": 1.9124924512558977e-06, + "loss": 21.7734, + "step": 16917 + }, + { + "epoch": 0.16014615537528043, + "grad_norm": 266.1431579589844, + "learning_rate": 1.9124799086905774e-06, + "loss": 22.5312, + "step": 16918 + }, + { + "epoch": 0.1601556213969955, + "grad_norm": 230.7234344482422, + "learning_rate": 1.912467365267585e-06, + "loss": 8.1719, + "step": 16919 + }, + { + "epoch": 0.16016508741871055, + "grad_norm": 272.4124450683594, + "learning_rate": 1.9124548209869326e-06, + "loss": 17.7656, + "step": 16920 + }, + { + "epoch": 0.16017455344042558, + "grad_norm": 604.1654052734375, + "learning_rate": 1.9124422758486324e-06, + "loss": 31.4609, + "step": 16921 + }, + { + "epoch": 0.16018401946214064, + "grad_norm": 478.78961181640625, + "learning_rate": 1.9124297298526956e-06, + "loss": 24.7109, + "step": 16922 + }, + { + "epoch": 0.1601934854838557, + "grad_norm": 365.4930725097656, + "learning_rate": 1.9124171829991346e-06, + "loss": 27.8945, + "step": 16923 + }, + { + "epoch": 0.16020295150557076, + "grad_norm": 524.9852905273438, + "learning_rate": 1.91240463528796e-06, + "loss": 43.375, + "step": 16924 + }, + { + "epoch": 0.1602124175272858, + "grad_norm": 334.66845703125, + "learning_rate": 1.9123920867191847e-06, + "loss": 20.3906, + "step": 16925 + }, + { + "epoch": 0.16022188354900085, + "grad_norm": 248.23683166503906, + "learning_rate": 1.9123795372928204e-06, + "loss": 21.0391, + "step": 16926 + }, + { + "epoch": 0.16023134957071591, + "grad_norm": 598.56005859375, + "learning_rate": 1.912366987008879e-06, + "loss": 28.0, + "step": 16927 + }, + { + "epoch": 0.16024081559243097, + "grad_norm": 504.17669677734375, + "learning_rate": 1.9123544358673716e-06, + "loss": 18.6367, + "step": 16928 + }, + { + "epoch": 0.16025028161414603, + "grad_norm": 338.59442138671875, + "learning_rate": 1.9123418838683107e-06, + "loss": 29.6094, + "step": 16929 + }, + { + "epoch": 0.16025974763586107, + "grad_norm": 205.1815185546875, + "learning_rate": 1.912329331011707e-06, + "loss": 19.7109, + "step": 16930 + }, + { + "epoch": 0.16026921365757613, + "grad_norm": 349.1948547363281, + "learning_rate": 1.9123167772975738e-06, + "loss": 33.5938, + "step": 16931 + }, + { + "epoch": 0.16027867967929119, + "grad_norm": 260.69061279296875, + "learning_rate": 1.912304222725922e-06, + "loss": 28.2578, + "step": 16932 + }, + { + "epoch": 0.16028814570100625, + "grad_norm": 218.4563751220703, + "learning_rate": 1.9122916672967633e-06, + "loss": 19.3516, + "step": 16933 + }, + { + "epoch": 0.16029761172272128, + "grad_norm": 515.3067016601562, + "learning_rate": 1.91227911101011e-06, + "loss": 30.2344, + "step": 16934 + }, + { + "epoch": 0.16030707774443634, + "grad_norm": 349.8551330566406, + "learning_rate": 1.912266553865974e-06, + "loss": 30.2812, + "step": 16935 + }, + { + "epoch": 0.1603165437661514, + "grad_norm": 518.0768432617188, + "learning_rate": 1.9122539958643666e-06, + "loss": 35.5859, + "step": 16936 + }, + { + "epoch": 0.16032600978786646, + "grad_norm": 384.69476318359375, + "learning_rate": 1.9122414370052994e-06, + "loss": 21.6797, + "step": 16937 + }, + { + "epoch": 0.16033547580958152, + "grad_norm": 4059.861083984375, + "learning_rate": 1.912228877288785e-06, + "loss": 21.1016, + "step": 16938 + }, + { + "epoch": 0.16034494183129655, + "grad_norm": 390.7470397949219, + "learning_rate": 1.912216316714835e-06, + "loss": 39.7422, + "step": 16939 + }, + { + "epoch": 0.1603544078530116, + "grad_norm": 477.29754638671875, + "learning_rate": 1.9122037552834603e-06, + "loss": 37.7031, + "step": 16940 + }, + { + "epoch": 0.16036387387472667, + "grad_norm": 565.3533325195312, + "learning_rate": 1.912191192994674e-06, + "loss": 21.6172, + "step": 16941 + }, + { + "epoch": 0.16037333989644173, + "grad_norm": 2.887160301208496, + "learning_rate": 1.912178629848487e-06, + "loss": 0.9512, + "step": 16942 + }, + { + "epoch": 0.1603828059181568, + "grad_norm": 3.325629949569702, + "learning_rate": 1.9121660658449113e-06, + "loss": 0.8584, + "step": 16943 + }, + { + "epoch": 0.16039227193987182, + "grad_norm": 409.7071228027344, + "learning_rate": 1.912153500983959e-06, + "loss": 17.9688, + "step": 16944 + }, + { + "epoch": 0.16040173796158688, + "grad_norm": 252.21726989746094, + "learning_rate": 1.912140935265642e-06, + "loss": 23.0, + "step": 16945 + }, + { + "epoch": 0.16041120398330194, + "grad_norm": 398.6767883300781, + "learning_rate": 1.9121283686899717e-06, + "loss": 19.9043, + "step": 16946 + }, + { + "epoch": 0.160420670005017, + "grad_norm": 375.1625061035156, + "learning_rate": 1.91211580125696e-06, + "loss": 34.0625, + "step": 16947 + }, + { + "epoch": 0.16043013602673203, + "grad_norm": 291.8757629394531, + "learning_rate": 1.912103232966619e-06, + "loss": 34.9141, + "step": 16948 + }, + { + "epoch": 0.1604396020484471, + "grad_norm": 3.3349342346191406, + "learning_rate": 1.91209066381896e-06, + "loss": 0.9844, + "step": 16949 + }, + { + "epoch": 0.16044906807016215, + "grad_norm": 410.27099609375, + "learning_rate": 1.912078093813995e-06, + "loss": 23.0156, + "step": 16950 + }, + { + "epoch": 0.16045853409187721, + "grad_norm": 332.38470458984375, + "learning_rate": 1.9120655229517368e-06, + "loss": 22.7266, + "step": 16951 + }, + { + "epoch": 0.16046800011359227, + "grad_norm": 220.82229614257812, + "learning_rate": 1.9120529512321954e-06, + "loss": 23.3125, + "step": 16952 + }, + { + "epoch": 0.1604774661353073, + "grad_norm": 235.512939453125, + "learning_rate": 1.912040378655384e-06, + "loss": 16.2031, + "step": 16953 + }, + { + "epoch": 0.16048693215702237, + "grad_norm": 295.41046142578125, + "learning_rate": 1.9120278052213136e-06, + "loss": 23.9141, + "step": 16954 + }, + { + "epoch": 0.16049639817873743, + "grad_norm": 635.16552734375, + "learning_rate": 1.912015230929997e-06, + "loss": 35.7969, + "step": 16955 + }, + { + "epoch": 0.16050586420045249, + "grad_norm": 694.1016235351562, + "learning_rate": 1.9120026557814447e-06, + "loss": 45.2969, + "step": 16956 + }, + { + "epoch": 0.16051533022216752, + "grad_norm": 210.99777221679688, + "learning_rate": 1.9119900797756698e-06, + "loss": 21.6562, + "step": 16957 + }, + { + "epoch": 0.16052479624388258, + "grad_norm": 1657.641357421875, + "learning_rate": 1.9119775029126835e-06, + "loss": 42.0938, + "step": 16958 + }, + { + "epoch": 0.16053426226559764, + "grad_norm": 178.64962768554688, + "learning_rate": 1.9119649251924973e-06, + "loss": 20.0938, + "step": 16959 + }, + { + "epoch": 0.1605437282873127, + "grad_norm": 300.8656311035156, + "learning_rate": 1.9119523466151236e-06, + "loss": 14.3203, + "step": 16960 + }, + { + "epoch": 0.16055319430902776, + "grad_norm": 463.6260986328125, + "learning_rate": 1.9119397671805745e-06, + "loss": 27.0547, + "step": 16961 + }, + { + "epoch": 0.1605626603307428, + "grad_norm": 407.12335205078125, + "learning_rate": 1.9119271868888607e-06, + "loss": 42.9375, + "step": 16962 + }, + { + "epoch": 0.16057212635245785, + "grad_norm": 372.8177185058594, + "learning_rate": 1.911914605739995e-06, + "loss": 26.4766, + "step": 16963 + }, + { + "epoch": 0.1605815923741729, + "grad_norm": 185.95860290527344, + "learning_rate": 1.9119020237339885e-06, + "loss": 15.7188, + "step": 16964 + }, + { + "epoch": 0.16059105839588797, + "grad_norm": 273.1447448730469, + "learning_rate": 1.9118894408708537e-06, + "loss": 17.0234, + "step": 16965 + }, + { + "epoch": 0.160600524417603, + "grad_norm": 337.231201171875, + "learning_rate": 1.911876857150602e-06, + "loss": 22.4531, + "step": 16966 + }, + { + "epoch": 0.16060999043931806, + "grad_norm": 505.4077453613281, + "learning_rate": 1.9118642725732455e-06, + "loss": 30.7969, + "step": 16967 + }, + { + "epoch": 0.16061945646103312, + "grad_norm": 199.98495483398438, + "learning_rate": 1.911851687138796e-06, + "loss": 19.5156, + "step": 16968 + }, + { + "epoch": 0.16062892248274818, + "grad_norm": 273.60955810546875, + "learning_rate": 1.911839100847265e-06, + "loss": 21.6016, + "step": 16969 + }, + { + "epoch": 0.16063838850446324, + "grad_norm": 358.0626220703125, + "learning_rate": 1.911826513698665e-06, + "loss": 50.457, + "step": 16970 + }, + { + "epoch": 0.16064785452617827, + "grad_norm": 193.36729431152344, + "learning_rate": 1.911813925693007e-06, + "loss": 19.8633, + "step": 16971 + }, + { + "epoch": 0.16065732054789333, + "grad_norm": 200.96035766601562, + "learning_rate": 1.911801336830303e-06, + "loss": 15.8516, + "step": 16972 + }, + { + "epoch": 0.1606667865696084, + "grad_norm": 408.78057861328125, + "learning_rate": 1.9117887471105658e-06, + "loss": 20.3047, + "step": 16973 + }, + { + "epoch": 0.16067625259132345, + "grad_norm": 186.53636169433594, + "learning_rate": 1.911776156533806e-06, + "loss": 21.3594, + "step": 16974 + }, + { + "epoch": 0.1606857186130385, + "grad_norm": 261.13433837890625, + "learning_rate": 1.911763565100036e-06, + "loss": 16.9453, + "step": 16975 + }, + { + "epoch": 0.16069518463475355, + "grad_norm": 266.4335021972656, + "learning_rate": 1.9117509728092676e-06, + "loss": 16.4688, + "step": 16976 + }, + { + "epoch": 0.1607046506564686, + "grad_norm": 396.63690185546875, + "learning_rate": 1.9117383796615127e-06, + "loss": 42.2188, + "step": 16977 + }, + { + "epoch": 0.16071411667818367, + "grad_norm": 2.821974277496338, + "learning_rate": 1.9117257856567827e-06, + "loss": 1.0352, + "step": 16978 + }, + { + "epoch": 0.16072358269989873, + "grad_norm": 560.1165771484375, + "learning_rate": 1.91171319079509e-06, + "loss": 59.7969, + "step": 16979 + }, + { + "epoch": 0.16073304872161376, + "grad_norm": 609.7498779296875, + "learning_rate": 1.9117005950764464e-06, + "loss": 50.6094, + "step": 16980 + }, + { + "epoch": 0.16074251474332882, + "grad_norm": 656.9005126953125, + "learning_rate": 1.911687998500863e-06, + "loss": 29.3125, + "step": 16981 + }, + { + "epoch": 0.16075198076504388, + "grad_norm": 324.0623779296875, + "learning_rate": 1.911675401068353e-06, + "loss": 16.6875, + "step": 16982 + }, + { + "epoch": 0.16076144678675894, + "grad_norm": 327.7328796386719, + "learning_rate": 1.9116628027789266e-06, + "loss": 20.8242, + "step": 16983 + }, + { + "epoch": 0.16077091280847397, + "grad_norm": 999.0296630859375, + "learning_rate": 1.911650203632597e-06, + "loss": 67.7734, + "step": 16984 + }, + { + "epoch": 0.16078037883018903, + "grad_norm": 402.6531066894531, + "learning_rate": 1.911637603629375e-06, + "loss": 50.0156, + "step": 16985 + }, + { + "epoch": 0.1607898448519041, + "grad_norm": 2.9536476135253906, + "learning_rate": 1.9116250027692735e-06, + "loss": 0.8442, + "step": 16986 + }, + { + "epoch": 0.16079931087361915, + "grad_norm": 367.35345458984375, + "learning_rate": 1.9116124010523035e-06, + "loss": 16.125, + "step": 16987 + }, + { + "epoch": 0.1608087768953342, + "grad_norm": 182.04039001464844, + "learning_rate": 1.9115997984784774e-06, + "loss": 16.4414, + "step": 16988 + }, + { + "epoch": 0.16081824291704924, + "grad_norm": 372.857421875, + "learning_rate": 1.911587195047807e-06, + "loss": 16.6016, + "step": 16989 + }, + { + "epoch": 0.1608277089387643, + "grad_norm": 766.78271484375, + "learning_rate": 1.911574590760303e-06, + "loss": 49.2188, + "step": 16990 + }, + { + "epoch": 0.16083717496047936, + "grad_norm": 849.5000610351562, + "learning_rate": 1.911561985615979e-06, + "loss": 31.0625, + "step": 16991 + }, + { + "epoch": 0.16084664098219442, + "grad_norm": 268.73284912109375, + "learning_rate": 1.9115493796148455e-06, + "loss": 19.4297, + "step": 16992 + }, + { + "epoch": 0.16085610700390945, + "grad_norm": 419.7916564941406, + "learning_rate": 1.9115367727569156e-06, + "loss": 20.1289, + "step": 16993 + }, + { + "epoch": 0.16086557302562451, + "grad_norm": 701.0419921875, + "learning_rate": 1.9115241650421997e-06, + "loss": 29.875, + "step": 16994 + }, + { + "epoch": 0.16087503904733957, + "grad_norm": 851.1591796875, + "learning_rate": 1.911511556470711e-06, + "loss": 38.4062, + "step": 16995 + }, + { + "epoch": 0.16088450506905463, + "grad_norm": 319.7021789550781, + "learning_rate": 1.9114989470424604e-06, + "loss": 33.1562, + "step": 16996 + }, + { + "epoch": 0.1608939710907697, + "grad_norm": 653.3712768554688, + "learning_rate": 1.91148633675746e-06, + "loss": 39.0312, + "step": 16997 + }, + { + "epoch": 0.16090343711248473, + "grad_norm": 411.3580322265625, + "learning_rate": 1.911473725615722e-06, + "loss": 43.3906, + "step": 16998 + }, + { + "epoch": 0.1609129031341998, + "grad_norm": 1791.1370849609375, + "learning_rate": 1.911461113617258e-06, + "loss": 34.25, + "step": 16999 + }, + { + "epoch": 0.16092236915591485, + "grad_norm": 565.5704956054688, + "learning_rate": 1.9114485007620796e-06, + "loss": 46.7422, + "step": 17000 + }, + { + "epoch": 0.1609318351776299, + "grad_norm": 453.46075439453125, + "learning_rate": 1.911435887050199e-06, + "loss": 18.6758, + "step": 17001 + }, + { + "epoch": 0.16094130119934494, + "grad_norm": 284.1117248535156, + "learning_rate": 1.9114232724816278e-06, + "loss": 27.4219, + "step": 17002 + }, + { + "epoch": 0.16095076722106, + "grad_norm": 887.9302368164062, + "learning_rate": 1.9114106570563786e-06, + "loss": 46.3203, + "step": 17003 + }, + { + "epoch": 0.16096023324277506, + "grad_norm": 470.27362060546875, + "learning_rate": 1.9113980407744622e-06, + "loss": 36.7969, + "step": 17004 + }, + { + "epoch": 0.16096969926449012, + "grad_norm": 293.1057434082031, + "learning_rate": 1.9113854236358907e-06, + "loss": 24.1172, + "step": 17005 + }, + { + "epoch": 0.16097916528620518, + "grad_norm": 425.1702880859375, + "learning_rate": 1.9113728056406766e-06, + "loss": 37.9453, + "step": 17006 + }, + { + "epoch": 0.1609886313079202, + "grad_norm": 924.8650512695312, + "learning_rate": 1.911360186788831e-06, + "loss": 22.0312, + "step": 17007 + }, + { + "epoch": 0.16099809732963527, + "grad_norm": 1134.1319580078125, + "learning_rate": 1.9113475670803666e-06, + "loss": 37.6367, + "step": 17008 + }, + { + "epoch": 0.16100756335135033, + "grad_norm": 557.943603515625, + "learning_rate": 1.9113349465152948e-06, + "loss": 27.0703, + "step": 17009 + }, + { + "epoch": 0.1610170293730654, + "grad_norm": 460.73162841796875, + "learning_rate": 1.9113223250936266e-06, + "loss": 35.6875, + "step": 17010 + }, + { + "epoch": 0.16102649539478042, + "grad_norm": 715.5545654296875, + "learning_rate": 1.9113097028153756e-06, + "loss": 71.7734, + "step": 17011 + }, + { + "epoch": 0.16103596141649548, + "grad_norm": 735.2754516601562, + "learning_rate": 1.9112970796805523e-06, + "loss": 34.3438, + "step": 17012 + }, + { + "epoch": 0.16104542743821054, + "grad_norm": 438.83642578125, + "learning_rate": 1.911284455689169e-06, + "loss": 21.6133, + "step": 17013 + }, + { + "epoch": 0.1610548934599256, + "grad_norm": 244.44195556640625, + "learning_rate": 1.9112718308412383e-06, + "loss": 17.8906, + "step": 17014 + }, + { + "epoch": 0.16106435948164066, + "grad_norm": 297.4529113769531, + "learning_rate": 1.9112592051367704e-06, + "loss": 25.6016, + "step": 17015 + }, + { + "epoch": 0.1610738255033557, + "grad_norm": 456.4322814941406, + "learning_rate": 1.9112465785757785e-06, + "loss": 32.5312, + "step": 17016 + }, + { + "epoch": 0.16108329152507075, + "grad_norm": 407.0337829589844, + "learning_rate": 1.911233951158274e-06, + "loss": 22.6875, + "step": 17017 + }, + { + "epoch": 0.16109275754678581, + "grad_norm": 218.86134338378906, + "learning_rate": 1.9112213228842694e-06, + "loss": 21.5938, + "step": 17018 + }, + { + "epoch": 0.16110222356850087, + "grad_norm": 395.30316162109375, + "learning_rate": 1.9112086937537756e-06, + "loss": 21.6406, + "step": 17019 + }, + { + "epoch": 0.1611116895902159, + "grad_norm": 353.6055908203125, + "learning_rate": 1.911196063766805e-06, + "loss": 27.7734, + "step": 17020 + }, + { + "epoch": 0.16112115561193097, + "grad_norm": 379.74713134765625, + "learning_rate": 1.9111834329233693e-06, + "loss": 25.7578, + "step": 17021 + }, + { + "epoch": 0.16113062163364603, + "grad_norm": 3.2509233951568604, + "learning_rate": 1.91117080122348e-06, + "loss": 0.9849, + "step": 17022 + }, + { + "epoch": 0.1611400876553611, + "grad_norm": 999.8805541992188, + "learning_rate": 1.91115816866715e-06, + "loss": 67.8281, + "step": 17023 + }, + { + "epoch": 0.16114955367707615, + "grad_norm": 282.7792053222656, + "learning_rate": 1.911145535254391e-06, + "loss": 16.5625, + "step": 17024 + }, + { + "epoch": 0.16115901969879118, + "grad_norm": 3.4331343173980713, + "learning_rate": 1.911132900985214e-06, + "loss": 0.9604, + "step": 17025 + }, + { + "epoch": 0.16116848572050624, + "grad_norm": 254.82723999023438, + "learning_rate": 1.9111202658596312e-06, + "loss": 22.2656, + "step": 17026 + }, + { + "epoch": 0.1611779517422213, + "grad_norm": 346.1422119140625, + "learning_rate": 1.9111076298776543e-06, + "loss": 25.6172, + "step": 17027 + }, + { + "epoch": 0.16118741776393636, + "grad_norm": 163.77120971679688, + "learning_rate": 1.911094993039296e-06, + "loss": 15.6172, + "step": 17028 + }, + { + "epoch": 0.16119688378565142, + "grad_norm": 3.4059479236602783, + "learning_rate": 1.9110823553445678e-06, + "loss": 0.9683, + "step": 17029 + }, + { + "epoch": 0.16120634980736645, + "grad_norm": 668.4746704101562, + "learning_rate": 1.9110697167934815e-06, + "loss": 28.6016, + "step": 17030 + }, + { + "epoch": 0.1612158158290815, + "grad_norm": 3.196014642715454, + "learning_rate": 1.9110570773860482e-06, + "loss": 0.853, + "step": 17031 + }, + { + "epoch": 0.16122528185079657, + "grad_norm": 410.2226867675781, + "learning_rate": 1.9110444371222812e-06, + "loss": 23.3906, + "step": 17032 + }, + { + "epoch": 0.16123474787251163, + "grad_norm": 191.06529235839844, + "learning_rate": 1.9110317960021914e-06, + "loss": 19.6875, + "step": 17033 + }, + { + "epoch": 0.16124421389422666, + "grad_norm": 378.4808654785156, + "learning_rate": 1.911019154025791e-06, + "loss": 27.2344, + "step": 17034 + }, + { + "epoch": 0.16125367991594172, + "grad_norm": 589.58837890625, + "learning_rate": 1.911006511193092e-06, + "loss": 19.125, + "step": 17035 + }, + { + "epoch": 0.16126314593765678, + "grad_norm": 322.2188720703125, + "learning_rate": 1.910993867504106e-06, + "loss": 36.9531, + "step": 17036 + }, + { + "epoch": 0.16127261195937184, + "grad_norm": 387.8877258300781, + "learning_rate": 1.9109812229588454e-06, + "loss": 23.7422, + "step": 17037 + }, + { + "epoch": 0.1612820779810869, + "grad_norm": 3.699342727661133, + "learning_rate": 1.9109685775573213e-06, + "loss": 0.9492, + "step": 17038 + }, + { + "epoch": 0.16129154400280193, + "grad_norm": 512.0419921875, + "learning_rate": 1.9109559312995463e-06, + "loss": 44.0156, + "step": 17039 + }, + { + "epoch": 0.161301010024517, + "grad_norm": 410.6933288574219, + "learning_rate": 1.9109432841855316e-06, + "loss": 8.3359, + "step": 17040 + }, + { + "epoch": 0.16131047604623205, + "grad_norm": 211.17330932617188, + "learning_rate": 1.91093063621529e-06, + "loss": 20.5, + "step": 17041 + }, + { + "epoch": 0.16131994206794711, + "grad_norm": 206.46800231933594, + "learning_rate": 1.9109179873888325e-06, + "loss": 12.7773, + "step": 17042 + }, + { + "epoch": 0.16132940808966215, + "grad_norm": 316.2215576171875, + "learning_rate": 1.9109053377061713e-06, + "loss": 26.2578, + "step": 17043 + }, + { + "epoch": 0.1613388741113772, + "grad_norm": 589.4833374023438, + "learning_rate": 1.9108926871673187e-06, + "loss": 54.6797, + "step": 17044 + }, + { + "epoch": 0.16134834013309227, + "grad_norm": 311.3746643066406, + "learning_rate": 1.9108800357722856e-06, + "loss": 14.4609, + "step": 17045 + }, + { + "epoch": 0.16135780615480733, + "grad_norm": 214.340576171875, + "learning_rate": 1.910867383521085e-06, + "loss": 20.2148, + "step": 17046 + }, + { + "epoch": 0.16136727217652239, + "grad_norm": 375.3583984375, + "learning_rate": 1.9108547304137283e-06, + "loss": 23.2109, + "step": 17047 + }, + { + "epoch": 0.16137673819823742, + "grad_norm": 124.23546600341797, + "learning_rate": 1.9108420764502274e-06, + "loss": 19.25, + "step": 17048 + }, + { + "epoch": 0.16138620421995248, + "grad_norm": 349.0888366699219, + "learning_rate": 1.910829421630594e-06, + "loss": 30.8438, + "step": 17049 + }, + { + "epoch": 0.16139567024166754, + "grad_norm": 224.28025817871094, + "learning_rate": 1.9108167659548405e-06, + "loss": 20.1484, + "step": 17050 + }, + { + "epoch": 0.1614051362633826, + "grad_norm": 274.1299133300781, + "learning_rate": 1.910804109422978e-06, + "loss": 10.8281, + "step": 17051 + }, + { + "epoch": 0.16141460228509763, + "grad_norm": 382.4142761230469, + "learning_rate": 1.910791452035019e-06, + "loss": 15.1406, + "step": 17052 + }, + { + "epoch": 0.1614240683068127, + "grad_norm": 495.7969055175781, + "learning_rate": 1.910778793790976e-06, + "loss": 49.9062, + "step": 17053 + }, + { + "epoch": 0.16143353432852775, + "grad_norm": 356.5565490722656, + "learning_rate": 1.9107661346908593e-06, + "loss": 34.8672, + "step": 17054 + }, + { + "epoch": 0.1614430003502428, + "grad_norm": 659.2477416992188, + "learning_rate": 1.9107534747346825e-06, + "loss": 25.2891, + "step": 17055 + }, + { + "epoch": 0.16145246637195787, + "grad_norm": 320.6579895019531, + "learning_rate": 1.910740813922456e-06, + "loss": 19.9141, + "step": 17056 + }, + { + "epoch": 0.1614619323936729, + "grad_norm": 377.04083251953125, + "learning_rate": 1.9107281522541927e-06, + "loss": 40.1562, + "step": 17057 + }, + { + "epoch": 0.16147139841538796, + "grad_norm": 190.27761840820312, + "learning_rate": 1.910715489729904e-06, + "loss": 15.5312, + "step": 17058 + }, + { + "epoch": 0.16148086443710302, + "grad_norm": 348.1722412109375, + "learning_rate": 1.9107028263496023e-06, + "loss": 22.9219, + "step": 17059 + }, + { + "epoch": 0.16149033045881808, + "grad_norm": 231.05259704589844, + "learning_rate": 1.910690162113299e-06, + "loss": 20.1445, + "step": 17060 + }, + { + "epoch": 0.16149979648053311, + "grad_norm": 385.2059631347656, + "learning_rate": 1.910677497021006e-06, + "loss": 39.5312, + "step": 17061 + }, + { + "epoch": 0.16150926250224817, + "grad_norm": 549.7839965820312, + "learning_rate": 1.9106648310727358e-06, + "loss": 19.6016, + "step": 17062 + }, + { + "epoch": 0.16151872852396323, + "grad_norm": 1272.0810546875, + "learning_rate": 1.9106521642684996e-06, + "loss": 20.4688, + "step": 17063 + }, + { + "epoch": 0.1615281945456783, + "grad_norm": 366.4230041503906, + "learning_rate": 1.9106394966083095e-06, + "loss": 56.5938, + "step": 17064 + }, + { + "epoch": 0.16153766056739335, + "grad_norm": 610.529541015625, + "learning_rate": 1.9106268280921774e-06, + "loss": 41.9062, + "step": 17065 + }, + { + "epoch": 0.1615471265891084, + "grad_norm": 524.8357543945312, + "learning_rate": 1.910614158720116e-06, + "loss": 41.5703, + "step": 17066 + }, + { + "epoch": 0.16155659261082345, + "grad_norm": 379.4273986816406, + "learning_rate": 1.910601488492136e-06, + "loss": 44.125, + "step": 17067 + }, + { + "epoch": 0.1615660586325385, + "grad_norm": 3.61087703704834, + "learning_rate": 1.91058881740825e-06, + "loss": 1.1064, + "step": 17068 + }, + { + "epoch": 0.16157552465425357, + "grad_norm": 458.9352722167969, + "learning_rate": 1.91057614546847e-06, + "loss": 41.8594, + "step": 17069 + }, + { + "epoch": 0.1615849906759686, + "grad_norm": 501.1684265136719, + "learning_rate": 1.910563472672807e-06, + "loss": 33.3906, + "step": 17070 + }, + { + "epoch": 0.16159445669768366, + "grad_norm": 3.0331943035125732, + "learning_rate": 1.910550799021274e-06, + "loss": 0.8342, + "step": 17071 + }, + { + "epoch": 0.16160392271939872, + "grad_norm": 393.4090270996094, + "learning_rate": 1.9105381245138826e-06, + "loss": 49.0938, + "step": 17072 + }, + { + "epoch": 0.16161338874111378, + "grad_norm": 292.204833984375, + "learning_rate": 1.9105254491506444e-06, + "loss": 31.2969, + "step": 17073 + }, + { + "epoch": 0.16162285476282884, + "grad_norm": 3.132530927658081, + "learning_rate": 1.910512772931572e-06, + "loss": 0.9712, + "step": 17074 + }, + { + "epoch": 0.16163232078454387, + "grad_norm": 378.35345458984375, + "learning_rate": 1.9105000958566758e-06, + "loss": 35.7422, + "step": 17075 + }, + { + "epoch": 0.16164178680625893, + "grad_norm": 336.9638977050781, + "learning_rate": 1.9104874179259694e-06, + "loss": 25.6641, + "step": 17076 + }, + { + "epoch": 0.161651252827974, + "grad_norm": 373.9454040527344, + "learning_rate": 1.910474739139464e-06, + "loss": 47.8828, + "step": 17077 + }, + { + "epoch": 0.16166071884968905, + "grad_norm": 208.96713256835938, + "learning_rate": 1.9104620594971718e-06, + "loss": 18.4766, + "step": 17078 + }, + { + "epoch": 0.16167018487140408, + "grad_norm": 2.5472519397735596, + "learning_rate": 1.910449378999104e-06, + "loss": 0.853, + "step": 17079 + }, + { + "epoch": 0.16167965089311914, + "grad_norm": 2.880476474761963, + "learning_rate": 1.910436697645273e-06, + "loss": 0.9434, + "step": 17080 + }, + { + "epoch": 0.1616891169148342, + "grad_norm": 200.97308349609375, + "learning_rate": 1.9104240154356912e-06, + "loss": 20.8398, + "step": 17081 + }, + { + "epoch": 0.16169858293654926, + "grad_norm": 3.1474947929382324, + "learning_rate": 1.9104113323703702e-06, + "loss": 0.9336, + "step": 17082 + }, + { + "epoch": 0.16170804895826432, + "grad_norm": 545.2364501953125, + "learning_rate": 1.9103986484493215e-06, + "loss": 49.1406, + "step": 17083 + }, + { + "epoch": 0.16171751497997935, + "grad_norm": 546.783447265625, + "learning_rate": 1.910385963672557e-06, + "loss": 44.0312, + "step": 17084 + }, + { + "epoch": 0.16172698100169441, + "grad_norm": 3.403522491455078, + "learning_rate": 1.910373278040089e-06, + "loss": 0.9551, + "step": 17085 + }, + { + "epoch": 0.16173644702340947, + "grad_norm": 3.383004665374756, + "learning_rate": 1.9103605915519295e-06, + "loss": 0.8813, + "step": 17086 + }, + { + "epoch": 0.16174591304512453, + "grad_norm": 353.47833251953125, + "learning_rate": 1.9103479042080904e-06, + "loss": 20.0, + "step": 17087 + }, + { + "epoch": 0.16175537906683957, + "grad_norm": 285.5270080566406, + "learning_rate": 1.9103352160085834e-06, + "loss": 29.4375, + "step": 17088 + }, + { + "epoch": 0.16176484508855463, + "grad_norm": 478.9138488769531, + "learning_rate": 1.91032252695342e-06, + "loss": 39.9375, + "step": 17089 + }, + { + "epoch": 0.1617743111102697, + "grad_norm": 551.6159057617188, + "learning_rate": 1.9103098370426135e-06, + "loss": 26.2344, + "step": 17090 + }, + { + "epoch": 0.16178377713198475, + "grad_norm": 810.7564697265625, + "learning_rate": 1.9102971462761744e-06, + "loss": 36.8047, + "step": 17091 + }, + { + "epoch": 0.1617932431536998, + "grad_norm": 329.4029541015625, + "learning_rate": 1.9102844546541156e-06, + "loss": 22.7969, + "step": 17092 + }, + { + "epoch": 0.16180270917541484, + "grad_norm": 710.7526245117188, + "learning_rate": 1.9102717621764484e-06, + "loss": 41.8633, + "step": 17093 + }, + { + "epoch": 0.1618121751971299, + "grad_norm": 411.8090515136719, + "learning_rate": 1.9102590688431848e-06, + "loss": 33.3438, + "step": 17094 + }, + { + "epoch": 0.16182164121884496, + "grad_norm": 319.9405822753906, + "learning_rate": 1.910246374654337e-06, + "loss": 22.2578, + "step": 17095 + }, + { + "epoch": 0.16183110724056002, + "grad_norm": 258.8850402832031, + "learning_rate": 1.910233679609917e-06, + "loss": 6.3574, + "step": 17096 + }, + { + "epoch": 0.16184057326227505, + "grad_norm": 419.0711975097656, + "learning_rate": 1.9102209837099364e-06, + "loss": 38.6719, + "step": 17097 + }, + { + "epoch": 0.1618500392839901, + "grad_norm": 330.5131530761719, + "learning_rate": 1.9102082869544072e-06, + "loss": 39.4219, + "step": 17098 + }, + { + "epoch": 0.16185950530570517, + "grad_norm": 418.7228088378906, + "learning_rate": 1.9101955893433418e-06, + "loss": 19.0078, + "step": 17099 + }, + { + "epoch": 0.16186897132742023, + "grad_norm": 332.8175964355469, + "learning_rate": 1.9101828908767514e-06, + "loss": 9.0625, + "step": 17100 + }, + { + "epoch": 0.1618784373491353, + "grad_norm": 587.3089599609375, + "learning_rate": 1.9101701915546485e-06, + "loss": 19.3672, + "step": 17101 + }, + { + "epoch": 0.16188790337085032, + "grad_norm": 210.52789306640625, + "learning_rate": 1.9101574913770444e-06, + "loss": 8.5, + "step": 17102 + }, + { + "epoch": 0.16189736939256538, + "grad_norm": 234.1007843017578, + "learning_rate": 1.9101447903439523e-06, + "loss": 17.2109, + "step": 17103 + }, + { + "epoch": 0.16190683541428044, + "grad_norm": 239.49139404296875, + "learning_rate": 1.9101320884553824e-06, + "loss": 21.0938, + "step": 17104 + }, + { + "epoch": 0.1619163014359955, + "grad_norm": 542.4743041992188, + "learning_rate": 1.910119385711348e-06, + "loss": 46.6719, + "step": 17105 + }, + { + "epoch": 0.16192576745771053, + "grad_norm": 558.11572265625, + "learning_rate": 1.9101066821118605e-06, + "loss": 49.0469, + "step": 17106 + }, + { + "epoch": 0.1619352334794256, + "grad_norm": 464.9468078613281, + "learning_rate": 1.910093977656932e-06, + "loss": 47.0156, + "step": 17107 + }, + { + "epoch": 0.16194469950114065, + "grad_norm": 304.21148681640625, + "learning_rate": 1.9100812723465745e-06, + "loss": 17.2031, + "step": 17108 + }, + { + "epoch": 0.16195416552285571, + "grad_norm": 727.4903564453125, + "learning_rate": 1.9100685661807994e-06, + "loss": 32.5352, + "step": 17109 + }, + { + "epoch": 0.16196363154457077, + "grad_norm": 581.537841796875, + "learning_rate": 1.9100558591596198e-06, + "loss": 26.0703, + "step": 17110 + }, + { + "epoch": 0.1619730975662858, + "grad_norm": 380.3512268066406, + "learning_rate": 1.910043151283046e-06, + "loss": 28.457, + "step": 17111 + }, + { + "epoch": 0.16198256358800087, + "grad_norm": 359.89471435546875, + "learning_rate": 1.9100304425510915e-06, + "loss": 22.5703, + "step": 17112 + }, + { + "epoch": 0.16199202960971593, + "grad_norm": 300.28375244140625, + "learning_rate": 1.910017732963767e-06, + "loss": 27.3906, + "step": 17113 + }, + { + "epoch": 0.162001495631431, + "grad_norm": 706.1578369140625, + "learning_rate": 1.9100050225210855e-06, + "loss": 30.4219, + "step": 17114 + }, + { + "epoch": 0.16201096165314605, + "grad_norm": 971.186279296875, + "learning_rate": 1.909992311223058e-06, + "loss": 27.9219, + "step": 17115 + }, + { + "epoch": 0.16202042767486108, + "grad_norm": 251.74732971191406, + "learning_rate": 1.909979599069698e-06, + "loss": 18.0859, + "step": 17116 + }, + { + "epoch": 0.16202989369657614, + "grad_norm": 385.5003356933594, + "learning_rate": 1.909966886061015e-06, + "loss": 38.8906, + "step": 17117 + }, + { + "epoch": 0.1620393597182912, + "grad_norm": 413.82977294921875, + "learning_rate": 1.909954172197023e-06, + "loss": 49.9375, + "step": 17118 + }, + { + "epoch": 0.16204882574000626, + "grad_norm": 273.7763366699219, + "learning_rate": 1.9099414574777334e-06, + "loss": 17.4062, + "step": 17119 + }, + { + "epoch": 0.1620582917617213, + "grad_norm": 3.4670865535736084, + "learning_rate": 1.909928741903158e-06, + "loss": 0.9517, + "step": 17120 + }, + { + "epoch": 0.16206775778343635, + "grad_norm": 357.6307373046875, + "learning_rate": 1.9099160254733086e-06, + "loss": 32.1562, + "step": 17121 + }, + { + "epoch": 0.1620772238051514, + "grad_norm": 415.7175598144531, + "learning_rate": 1.909903308188197e-06, + "loss": 29.5156, + "step": 17122 + }, + { + "epoch": 0.16208668982686647, + "grad_norm": 406.2748107910156, + "learning_rate": 1.9098905900478363e-06, + "loss": 32.8125, + "step": 17123 + }, + { + "epoch": 0.16209615584858153, + "grad_norm": 406.7070617675781, + "learning_rate": 1.909877871052237e-06, + "loss": 36.2188, + "step": 17124 + }, + { + "epoch": 0.16210562187029656, + "grad_norm": 560.647216796875, + "learning_rate": 1.9098651512014118e-06, + "loss": 40.3633, + "step": 17125 + }, + { + "epoch": 0.16211508789201162, + "grad_norm": 456.6175537109375, + "learning_rate": 1.9098524304953725e-06, + "loss": 66.1875, + "step": 17126 + }, + { + "epoch": 0.16212455391372668, + "grad_norm": 365.978271484375, + "learning_rate": 1.909839708934131e-06, + "loss": 11.2305, + "step": 17127 + }, + { + "epoch": 0.16213401993544174, + "grad_norm": 690.6747436523438, + "learning_rate": 1.9098269865177e-06, + "loss": 42.918, + "step": 17128 + }, + { + "epoch": 0.16214348595715677, + "grad_norm": 200.08790588378906, + "learning_rate": 1.9098142632460903e-06, + "loss": 14.375, + "step": 17129 + }, + { + "epoch": 0.16215295197887183, + "grad_norm": 3.420147657394409, + "learning_rate": 1.9098015391193144e-06, + "loss": 0.9441, + "step": 17130 + }, + { + "epoch": 0.1621624180005869, + "grad_norm": 457.46514892578125, + "learning_rate": 1.9097888141373843e-06, + "loss": 43.7812, + "step": 17131 + }, + { + "epoch": 0.16217188402230195, + "grad_norm": 174.50579833984375, + "learning_rate": 1.909776088300312e-06, + "loss": 20.0234, + "step": 17132 + }, + { + "epoch": 0.16218135004401701, + "grad_norm": 236.48043823242188, + "learning_rate": 1.9097633616081094e-06, + "loss": 19.0, + "step": 17133 + }, + { + "epoch": 0.16219081606573205, + "grad_norm": 301.0186462402344, + "learning_rate": 1.909750634060788e-06, + "loss": 17.6484, + "step": 17134 + }, + { + "epoch": 0.1622002820874471, + "grad_norm": 325.7938232421875, + "learning_rate": 1.909737905658361e-06, + "loss": 30.5938, + "step": 17135 + }, + { + "epoch": 0.16220974810916217, + "grad_norm": 584.9515380859375, + "learning_rate": 1.909725176400839e-06, + "loss": 41.5391, + "step": 17136 + }, + { + "epoch": 0.16221921413087723, + "grad_norm": 362.670654296875, + "learning_rate": 1.909712446288234e-06, + "loss": 38.8281, + "step": 17137 + }, + { + "epoch": 0.16222868015259226, + "grad_norm": 193.35032653808594, + "learning_rate": 1.9096997153205592e-06, + "loss": 14.6484, + "step": 17138 + }, + { + "epoch": 0.16223814617430732, + "grad_norm": 347.5096435546875, + "learning_rate": 1.909686983497826e-06, + "loss": 27.2891, + "step": 17139 + }, + { + "epoch": 0.16224761219602238, + "grad_norm": 312.19268798828125, + "learning_rate": 1.9096742508200457e-06, + "loss": 36.0312, + "step": 17140 + }, + { + "epoch": 0.16225707821773744, + "grad_norm": 794.9446411132812, + "learning_rate": 1.909661517287231e-06, + "loss": 47.4453, + "step": 17141 + }, + { + "epoch": 0.1622665442394525, + "grad_norm": 194.83502197265625, + "learning_rate": 1.9096487828993936e-06, + "loss": 18.3672, + "step": 17142 + }, + { + "epoch": 0.16227601026116753, + "grad_norm": 364.40667724609375, + "learning_rate": 1.9096360476565457e-06, + "loss": 18.5859, + "step": 17143 + }, + { + "epoch": 0.1622854762828826, + "grad_norm": 259.6159973144531, + "learning_rate": 1.909623311558699e-06, + "loss": 19.2031, + "step": 17144 + }, + { + "epoch": 0.16229494230459765, + "grad_norm": 277.787841796875, + "learning_rate": 1.9096105746058652e-06, + "loss": 10.9961, + "step": 17145 + }, + { + "epoch": 0.1623044083263127, + "grad_norm": 252.2308807373047, + "learning_rate": 1.909597836798057e-06, + "loss": 18.8047, + "step": 17146 + }, + { + "epoch": 0.16231387434802774, + "grad_norm": 227.10679626464844, + "learning_rate": 1.9095850981352862e-06, + "loss": 13.8008, + "step": 17147 + }, + { + "epoch": 0.1623233403697428, + "grad_norm": 302.960693359375, + "learning_rate": 1.9095723586175643e-06, + "loss": 13.9688, + "step": 17148 + }, + { + "epoch": 0.16233280639145786, + "grad_norm": 272.6726379394531, + "learning_rate": 1.9095596182449032e-06, + "loss": 21.3203, + "step": 17149 + }, + { + "epoch": 0.16234227241317292, + "grad_norm": 611.236083984375, + "learning_rate": 1.909546877017316e-06, + "loss": 40.25, + "step": 17150 + }, + { + "epoch": 0.16235173843488798, + "grad_norm": 513.290283203125, + "learning_rate": 1.9095341349348136e-06, + "loss": 28.9688, + "step": 17151 + }, + { + "epoch": 0.16236120445660301, + "grad_norm": 3.0989832878112793, + "learning_rate": 1.909521391997408e-06, + "loss": 1.0117, + "step": 17152 + }, + { + "epoch": 0.16237067047831807, + "grad_norm": 468.90093994140625, + "learning_rate": 1.909508648205112e-06, + "loss": 26.7031, + "step": 17153 + }, + { + "epoch": 0.16238013650003313, + "grad_norm": 606.1409912109375, + "learning_rate": 1.9094959035579365e-06, + "loss": 16.1016, + "step": 17154 + }, + { + "epoch": 0.1623896025217482, + "grad_norm": 329.4105224609375, + "learning_rate": 1.9094831580558942e-06, + "loss": 21.1797, + "step": 17155 + }, + { + "epoch": 0.16239906854346323, + "grad_norm": 3.0354511737823486, + "learning_rate": 1.909470411698997e-06, + "loss": 0.7454, + "step": 17156 + }, + { + "epoch": 0.1624085345651783, + "grad_norm": 380.9478454589844, + "learning_rate": 1.909457664487257e-06, + "loss": 19.1406, + "step": 17157 + }, + { + "epoch": 0.16241800058689335, + "grad_norm": 145.47605895996094, + "learning_rate": 1.909444916420685e-06, + "loss": 12.0547, + "step": 17158 + }, + { + "epoch": 0.1624274666086084, + "grad_norm": 391.83172607421875, + "learning_rate": 1.909432167499295e-06, + "loss": 13.9141, + "step": 17159 + }, + { + "epoch": 0.16243693263032347, + "grad_norm": 740.0308837890625, + "learning_rate": 1.9094194177230975e-06, + "loss": 19.3789, + "step": 17160 + }, + { + "epoch": 0.1624463986520385, + "grad_norm": 523.7193603515625, + "learning_rate": 1.9094066670921047e-06, + "loss": 34.125, + "step": 17161 + }, + { + "epoch": 0.16245586467375356, + "grad_norm": 244.2097930908203, + "learning_rate": 1.909393915606329e-06, + "loss": 15.0312, + "step": 17162 + }, + { + "epoch": 0.16246533069546862, + "grad_norm": 218.5679168701172, + "learning_rate": 1.9093811632657823e-06, + "loss": 18.418, + "step": 17163 + }, + { + "epoch": 0.16247479671718368, + "grad_norm": 226.27503967285156, + "learning_rate": 1.9093684100704764e-06, + "loss": 24.9688, + "step": 17164 + }, + { + "epoch": 0.1624842627388987, + "grad_norm": 838.9443969726562, + "learning_rate": 1.909355656020423e-06, + "loss": 48.0156, + "step": 17165 + }, + { + "epoch": 0.16249372876061377, + "grad_norm": 438.9240417480469, + "learning_rate": 1.909342901115635e-06, + "loss": 23.0781, + "step": 17166 + }, + { + "epoch": 0.16250319478232883, + "grad_norm": 351.3302307128906, + "learning_rate": 1.9093301453561236e-06, + "loss": 19.8906, + "step": 17167 + }, + { + "epoch": 0.1625126608040439, + "grad_norm": 199.6851806640625, + "learning_rate": 1.9093173887419014e-06, + "loss": 15.8516, + "step": 17168 + }, + { + "epoch": 0.16252212682575895, + "grad_norm": 2.6602160930633545, + "learning_rate": 1.9093046312729793e-06, + "loss": 0.7588, + "step": 17169 + }, + { + "epoch": 0.16253159284747398, + "grad_norm": 819.4470825195312, + "learning_rate": 1.9092918729493704e-06, + "loss": 28.4609, + "step": 17170 + }, + { + "epoch": 0.16254105886918904, + "grad_norm": 236.28158569335938, + "learning_rate": 1.9092791137710863e-06, + "loss": 11.3594, + "step": 17171 + }, + { + "epoch": 0.1625505248909041, + "grad_norm": 322.6043701171875, + "learning_rate": 1.9092663537381387e-06, + "loss": 24.6641, + "step": 17172 + }, + { + "epoch": 0.16255999091261916, + "grad_norm": 577.3320922851562, + "learning_rate": 1.9092535928505404e-06, + "loss": 11.1816, + "step": 17173 + }, + { + "epoch": 0.1625694569343342, + "grad_norm": 671.29638671875, + "learning_rate": 1.9092408311083023e-06, + "loss": 41.6562, + "step": 17174 + }, + { + "epoch": 0.16257892295604925, + "grad_norm": 242.12611389160156, + "learning_rate": 1.9092280685114373e-06, + "loss": 19.625, + "step": 17175 + }, + { + "epoch": 0.16258838897776431, + "grad_norm": 1076.1988525390625, + "learning_rate": 1.909215305059957e-06, + "loss": 64.9688, + "step": 17176 + }, + { + "epoch": 0.16259785499947937, + "grad_norm": 197.93309020996094, + "learning_rate": 1.9092025407538735e-06, + "loss": 14.1484, + "step": 17177 + }, + { + "epoch": 0.16260732102119443, + "grad_norm": 441.7080078125, + "learning_rate": 1.909189775593199e-06, + "loss": 44.5156, + "step": 17178 + }, + { + "epoch": 0.16261678704290947, + "grad_norm": 458.0302734375, + "learning_rate": 1.9091770095779448e-06, + "loss": 35.9219, + "step": 17179 + }, + { + "epoch": 0.16262625306462453, + "grad_norm": 3.109907627105713, + "learning_rate": 1.9091642427081233e-06, + "loss": 0.7944, + "step": 17180 + }, + { + "epoch": 0.1626357190863396, + "grad_norm": 606.73828125, + "learning_rate": 1.909151474983747e-06, + "loss": 18.5, + "step": 17181 + }, + { + "epoch": 0.16264518510805465, + "grad_norm": 1083.8465576171875, + "learning_rate": 1.909138706404827e-06, + "loss": 63.2031, + "step": 17182 + }, + { + "epoch": 0.16265465112976968, + "grad_norm": 293.37738037109375, + "learning_rate": 1.9091259369713762e-06, + "loss": 25.9375, + "step": 17183 + }, + { + "epoch": 0.16266411715148474, + "grad_norm": 213.6715087890625, + "learning_rate": 1.909113166683406e-06, + "loss": 17.4219, + "step": 17184 + }, + { + "epoch": 0.1626735831731998, + "grad_norm": 178.16970825195312, + "learning_rate": 1.9091003955409283e-06, + "loss": 20.375, + "step": 17185 + }, + { + "epoch": 0.16268304919491486, + "grad_norm": 467.533935546875, + "learning_rate": 1.909087623543956e-06, + "loss": 18.2266, + "step": 17186 + }, + { + "epoch": 0.16269251521662992, + "grad_norm": 580.991943359375, + "learning_rate": 1.9090748506925e-06, + "loss": 38.4531, + "step": 17187 + }, + { + "epoch": 0.16270198123834495, + "grad_norm": 470.53033447265625, + "learning_rate": 1.9090620769865725e-06, + "loss": 48.125, + "step": 17188 + }, + { + "epoch": 0.16271144726006, + "grad_norm": 3.287400960922241, + "learning_rate": 1.909049302426186e-06, + "loss": 0.9988, + "step": 17189 + }, + { + "epoch": 0.16272091328177507, + "grad_norm": 806.9395751953125, + "learning_rate": 1.9090365270113527e-06, + "loss": 31.1328, + "step": 17190 + }, + { + "epoch": 0.16273037930349013, + "grad_norm": 2.913935899734497, + "learning_rate": 1.9090237507420837e-06, + "loss": 0.9429, + "step": 17191 + }, + { + "epoch": 0.16273984532520516, + "grad_norm": 335.96502685546875, + "learning_rate": 1.909010973618392e-06, + "loss": 16.707, + "step": 17192 + }, + { + "epoch": 0.16274931134692022, + "grad_norm": 1045.2623291015625, + "learning_rate": 1.908998195640289e-06, + "loss": 21.4844, + "step": 17193 + }, + { + "epoch": 0.16275877736863528, + "grad_norm": 301.88983154296875, + "learning_rate": 1.9089854168077864e-06, + "loss": 14.5, + "step": 17194 + }, + { + "epoch": 0.16276824339035034, + "grad_norm": 453.4959411621094, + "learning_rate": 1.908972637120897e-06, + "loss": 37.8906, + "step": 17195 + }, + { + "epoch": 0.1627777094120654, + "grad_norm": 572.2178344726562, + "learning_rate": 1.908959856579632e-06, + "loss": 66.0312, + "step": 17196 + }, + { + "epoch": 0.16278717543378043, + "grad_norm": 702.9304809570312, + "learning_rate": 1.9089470751840046e-06, + "loss": 52.1797, + "step": 17197 + }, + { + "epoch": 0.1627966414554955, + "grad_norm": 577.6237182617188, + "learning_rate": 1.9089342929340257e-06, + "loss": 25.3281, + "step": 17198 + }, + { + "epoch": 0.16280610747721055, + "grad_norm": 374.94049072265625, + "learning_rate": 1.9089215098297078e-06, + "loss": 20.3672, + "step": 17199 + }, + { + "epoch": 0.16281557349892561, + "grad_norm": 219.3008575439453, + "learning_rate": 1.9089087258710627e-06, + "loss": 20.125, + "step": 17200 + }, + { + "epoch": 0.16282503952064067, + "grad_norm": 498.6373596191406, + "learning_rate": 1.9088959410581027e-06, + "loss": 26.0, + "step": 17201 + }, + { + "epoch": 0.1628345055423557, + "grad_norm": 158.0800323486328, + "learning_rate": 1.9088831553908397e-06, + "loss": 8.9609, + "step": 17202 + }, + { + "epoch": 0.16284397156407077, + "grad_norm": 756.7716674804688, + "learning_rate": 1.908870368869285e-06, + "loss": 10.0547, + "step": 17203 + }, + { + "epoch": 0.16285343758578583, + "grad_norm": 367.3434753417969, + "learning_rate": 1.908857581493452e-06, + "loss": 9.5039, + "step": 17204 + }, + { + "epoch": 0.1628629036075009, + "grad_norm": 1055.233642578125, + "learning_rate": 1.9088447932633514e-06, + "loss": 26.3867, + "step": 17205 + }, + { + "epoch": 0.16287236962921592, + "grad_norm": 681.2559204101562, + "learning_rate": 1.9088320041789966e-06, + "loss": 51.0625, + "step": 17206 + }, + { + "epoch": 0.16288183565093098, + "grad_norm": 3.624427080154419, + "learning_rate": 1.9088192142403984e-06, + "loss": 0.9932, + "step": 17207 + }, + { + "epoch": 0.16289130167264604, + "grad_norm": 295.9122619628906, + "learning_rate": 1.908806423447569e-06, + "loss": 34.3438, + "step": 17208 + }, + { + "epoch": 0.1629007676943611, + "grad_norm": 285.0680236816406, + "learning_rate": 1.9087936318005212e-06, + "loss": 23.7344, + "step": 17209 + }, + { + "epoch": 0.16291023371607616, + "grad_norm": 765.9578857421875, + "learning_rate": 1.908780839299266e-06, + "loss": 9.0957, + "step": 17210 + }, + { + "epoch": 0.1629196997377912, + "grad_norm": 369.7676696777344, + "learning_rate": 1.9087680459438165e-06, + "loss": 21.1328, + "step": 17211 + }, + { + "epoch": 0.16292916575950625, + "grad_norm": 547.5623168945312, + "learning_rate": 1.9087552517341834e-06, + "loss": 47.3281, + "step": 17212 + }, + { + "epoch": 0.1629386317812213, + "grad_norm": 448.5507507324219, + "learning_rate": 1.9087424566703803e-06, + "loss": 19.6133, + "step": 17213 + }, + { + "epoch": 0.16294809780293637, + "grad_norm": 274.9565734863281, + "learning_rate": 1.9087296607524177e-06, + "loss": 31.5, + "step": 17214 + }, + { + "epoch": 0.1629575638246514, + "grad_norm": 565.0906372070312, + "learning_rate": 1.908716863980309e-06, + "loss": 39.6641, + "step": 17215 + }, + { + "epoch": 0.16296702984636646, + "grad_norm": 310.8688659667969, + "learning_rate": 1.908704066354065e-06, + "loss": 15.1875, + "step": 17216 + }, + { + "epoch": 0.16297649586808152, + "grad_norm": 417.39556884765625, + "learning_rate": 1.9086912678736986e-06, + "loss": 34.4844, + "step": 17217 + }, + { + "epoch": 0.16298596188979658, + "grad_norm": 516.5235595703125, + "learning_rate": 1.9086784685392215e-06, + "loss": 30.7266, + "step": 17218 + }, + { + "epoch": 0.16299542791151164, + "grad_norm": 244.13966369628906, + "learning_rate": 1.908665668350646e-06, + "loss": 15.5234, + "step": 17219 + }, + { + "epoch": 0.16300489393322667, + "grad_norm": 425.6676025390625, + "learning_rate": 1.908652867307983e-06, + "loss": 24.0469, + "step": 17220 + }, + { + "epoch": 0.16301435995494173, + "grad_norm": 1046.1126708984375, + "learning_rate": 1.908640065411246e-06, + "loss": 8.8594, + "step": 17221 + }, + { + "epoch": 0.1630238259766568, + "grad_norm": 648.1852416992188, + "learning_rate": 1.9086272626604467e-06, + "loss": 26.3359, + "step": 17222 + }, + { + "epoch": 0.16303329199837185, + "grad_norm": 235.3729705810547, + "learning_rate": 1.9086144590555966e-06, + "loss": 18.7812, + "step": 17223 + }, + { + "epoch": 0.1630427580200869, + "grad_norm": 390.0371398925781, + "learning_rate": 1.908601654596708e-06, + "loss": 35.9688, + "step": 17224 + }, + { + "epoch": 0.16305222404180195, + "grad_norm": 356.9551696777344, + "learning_rate": 1.908588849283793e-06, + "loss": 21.2734, + "step": 17225 + }, + { + "epoch": 0.163061690063517, + "grad_norm": 283.96258544921875, + "learning_rate": 1.9085760431168637e-06, + "loss": 19.4219, + "step": 17226 + }, + { + "epoch": 0.16307115608523207, + "grad_norm": 455.4337158203125, + "learning_rate": 1.908563236095932e-06, + "loss": 34.3984, + "step": 17227 + }, + { + "epoch": 0.16308062210694713, + "grad_norm": 309.80169677734375, + "learning_rate": 1.90855042822101e-06, + "loss": 20.0312, + "step": 17228 + }, + { + "epoch": 0.16309008812866216, + "grad_norm": 555.0149536132812, + "learning_rate": 1.9085376194921094e-06, + "loss": 54.8906, + "step": 17229 + }, + { + "epoch": 0.16309955415037722, + "grad_norm": 427.07611083984375, + "learning_rate": 1.9085248099092426e-06, + "loss": 39.2266, + "step": 17230 + }, + { + "epoch": 0.16310902017209228, + "grad_norm": 733.1690063476562, + "learning_rate": 1.908511999472422e-06, + "loss": 41.5, + "step": 17231 + }, + { + "epoch": 0.16311848619380734, + "grad_norm": 262.6268310546875, + "learning_rate": 1.908499188181659e-06, + "loss": 21.9375, + "step": 17232 + }, + { + "epoch": 0.16312795221552237, + "grad_norm": 498.63299560546875, + "learning_rate": 1.9084863760369655e-06, + "loss": 32.9375, + "step": 17233 + }, + { + "epoch": 0.16313741823723743, + "grad_norm": 402.1280212402344, + "learning_rate": 1.9084735630383544e-06, + "loss": 21.1641, + "step": 17234 + }, + { + "epoch": 0.1631468842589525, + "grad_norm": 467.3144836425781, + "learning_rate": 1.9084607491858367e-06, + "loss": 49.5312, + "step": 17235 + }, + { + "epoch": 0.16315635028066755, + "grad_norm": 733.6912841796875, + "learning_rate": 1.9084479344794256e-06, + "loss": 34.2266, + "step": 17236 + }, + { + "epoch": 0.1631658163023826, + "grad_norm": 539.5352172851562, + "learning_rate": 1.9084351189191323e-06, + "loss": 42.5469, + "step": 17237 + }, + { + "epoch": 0.16317528232409764, + "grad_norm": 493.86199951171875, + "learning_rate": 1.908422302504969e-06, + "loss": 17.375, + "step": 17238 + }, + { + "epoch": 0.1631847483458127, + "grad_norm": 306.2831726074219, + "learning_rate": 1.908409485236948e-06, + "loss": 7.7539, + "step": 17239 + }, + { + "epoch": 0.16319421436752776, + "grad_norm": 497.1737365722656, + "learning_rate": 1.908396667115081e-06, + "loss": 54.625, + "step": 17240 + }, + { + "epoch": 0.16320368038924282, + "grad_norm": 1190.3018798828125, + "learning_rate": 1.90838384813938e-06, + "loss": 57.3398, + "step": 17241 + }, + { + "epoch": 0.16321314641095785, + "grad_norm": 2.7576329708099365, + "learning_rate": 1.908371028309858e-06, + "loss": 0.8093, + "step": 17242 + }, + { + "epoch": 0.16322261243267291, + "grad_norm": 268.7993469238281, + "learning_rate": 1.908358207626526e-06, + "loss": 38.4531, + "step": 17243 + }, + { + "epoch": 0.16323207845438797, + "grad_norm": 1502.006103515625, + "learning_rate": 1.908345386089396e-06, + "loss": 48.3281, + "step": 17244 + }, + { + "epoch": 0.16324154447610303, + "grad_norm": 584.24658203125, + "learning_rate": 1.908332563698481e-06, + "loss": 41.0938, + "step": 17245 + }, + { + "epoch": 0.1632510104978181, + "grad_norm": 619.9765625, + "learning_rate": 1.908319740453792e-06, + "loss": 45.3242, + "step": 17246 + }, + { + "epoch": 0.16326047651953313, + "grad_norm": 220.34048461914062, + "learning_rate": 1.908306916355342e-06, + "loss": 30.0938, + "step": 17247 + }, + { + "epoch": 0.1632699425412482, + "grad_norm": 456.5369567871094, + "learning_rate": 1.9082940914031422e-06, + "loss": 14.125, + "step": 17248 + }, + { + "epoch": 0.16327940856296325, + "grad_norm": 228.76438903808594, + "learning_rate": 1.908281265597205e-06, + "loss": 29.4375, + "step": 17249 + }, + { + "epoch": 0.1632888745846783, + "grad_norm": 634.3157958984375, + "learning_rate": 1.9082684389375427e-06, + "loss": 45.2031, + "step": 17250 + }, + { + "epoch": 0.16329834060639334, + "grad_norm": 511.55804443359375, + "learning_rate": 1.9082556114241674e-06, + "loss": 39.2031, + "step": 17251 + }, + { + "epoch": 0.1633078066281084, + "grad_norm": 464.3226013183594, + "learning_rate": 1.9082427830570905e-06, + "loss": 59.1406, + "step": 17252 + }, + { + "epoch": 0.16331727264982346, + "grad_norm": 1442.189697265625, + "learning_rate": 1.9082299538363247e-06, + "loss": 46.8359, + "step": 17253 + }, + { + "epoch": 0.16332673867153852, + "grad_norm": 723.2037353515625, + "learning_rate": 1.9082171237618817e-06, + "loss": 53.0078, + "step": 17254 + }, + { + "epoch": 0.16333620469325358, + "grad_norm": 461.0644226074219, + "learning_rate": 1.9082042928337736e-06, + "loss": 17.7969, + "step": 17255 + }, + { + "epoch": 0.1633456707149686, + "grad_norm": 472.1877746582031, + "learning_rate": 1.908191461052013e-06, + "loss": 50.9688, + "step": 17256 + }, + { + "epoch": 0.16335513673668367, + "grad_norm": 381.3504638671875, + "learning_rate": 1.9081786284166112e-06, + "loss": 29.2891, + "step": 17257 + }, + { + "epoch": 0.16336460275839873, + "grad_norm": 308.43524169921875, + "learning_rate": 1.9081657949275803e-06, + "loss": 21.3984, + "step": 17258 + }, + { + "epoch": 0.1633740687801138, + "grad_norm": 619.8270874023438, + "learning_rate": 1.908152960584933e-06, + "loss": 41.5, + "step": 17259 + }, + { + "epoch": 0.16338353480182882, + "grad_norm": 334.5119323730469, + "learning_rate": 1.9081401253886807e-06, + "loss": 19.0273, + "step": 17260 + }, + { + "epoch": 0.16339300082354388, + "grad_norm": 3.2657604217529297, + "learning_rate": 1.908127289338836e-06, + "loss": 1.0107, + "step": 17261 + }, + { + "epoch": 0.16340246684525894, + "grad_norm": 343.782470703125, + "learning_rate": 1.908114452435411e-06, + "loss": 22.0703, + "step": 17262 + }, + { + "epoch": 0.163411932866974, + "grad_norm": 418.0935974121094, + "learning_rate": 1.9081016146784168e-06, + "loss": 32.7266, + "step": 17263 + }, + { + "epoch": 0.16342139888868906, + "grad_norm": 530.8961181640625, + "learning_rate": 1.9080887760678668e-06, + "loss": 46.9062, + "step": 17264 + }, + { + "epoch": 0.1634308649104041, + "grad_norm": 930.7993774414062, + "learning_rate": 1.908075936603772e-06, + "loss": 8.498, + "step": 17265 + }, + { + "epoch": 0.16344033093211915, + "grad_norm": 460.0788879394531, + "learning_rate": 1.908063096286145e-06, + "loss": 34.6133, + "step": 17266 + }, + { + "epoch": 0.16344979695383421, + "grad_norm": 186.2071533203125, + "learning_rate": 1.908050255114998e-06, + "loss": 20.5664, + "step": 17267 + }, + { + "epoch": 0.16345926297554927, + "grad_norm": 227.62094116210938, + "learning_rate": 1.9080374130903423e-06, + "loss": 42.7656, + "step": 17268 + }, + { + "epoch": 0.1634687289972643, + "grad_norm": 346.6227722167969, + "learning_rate": 1.908024570212191e-06, + "loss": 9.1992, + "step": 17269 + }, + { + "epoch": 0.16347819501897937, + "grad_norm": 267.4680480957031, + "learning_rate": 1.9080117264805554e-06, + "loss": 19.7188, + "step": 17270 + }, + { + "epoch": 0.16348766104069443, + "grad_norm": 284.62701416015625, + "learning_rate": 1.907998881895448e-06, + "loss": 27.7734, + "step": 17271 + }, + { + "epoch": 0.1634971270624095, + "grad_norm": 455.7958984375, + "learning_rate": 1.9079860364568806e-06, + "loss": 40.2812, + "step": 17272 + }, + { + "epoch": 0.16350659308412455, + "grad_norm": 274.2398986816406, + "learning_rate": 1.9079731901648654e-06, + "loss": 17.2148, + "step": 17273 + }, + { + "epoch": 0.16351605910583958, + "grad_norm": 229.88185119628906, + "learning_rate": 1.907960343019415e-06, + "loss": 19.168, + "step": 17274 + }, + { + "epoch": 0.16352552512755464, + "grad_norm": 2.779298782348633, + "learning_rate": 1.90794749502054e-06, + "loss": 0.833, + "step": 17275 + }, + { + "epoch": 0.1635349911492697, + "grad_norm": 905.8943481445312, + "learning_rate": 1.907934646168254e-06, + "loss": 31.9375, + "step": 17276 + }, + { + "epoch": 0.16354445717098476, + "grad_norm": 303.1787109375, + "learning_rate": 1.9079217964625683e-06, + "loss": 22.8438, + "step": 17277 + }, + { + "epoch": 0.1635539231926998, + "grad_norm": 246.9368438720703, + "learning_rate": 1.9079089459034954e-06, + "loss": 12.8984, + "step": 17278 + }, + { + "epoch": 0.16356338921441485, + "grad_norm": 355.64166259765625, + "learning_rate": 1.907896094491047e-06, + "loss": 40.9219, + "step": 17279 + }, + { + "epoch": 0.1635728552361299, + "grad_norm": 784.2803344726562, + "learning_rate": 1.9078832422252353e-06, + "loss": 38.4609, + "step": 17280 + }, + { + "epoch": 0.16358232125784497, + "grad_norm": 257.0386962890625, + "learning_rate": 1.9078703891060727e-06, + "loss": 9.9375, + "step": 17281 + }, + { + "epoch": 0.16359178727956003, + "grad_norm": 305.16387939453125, + "learning_rate": 1.9078575351335705e-06, + "loss": 17.8594, + "step": 17282 + }, + { + "epoch": 0.16360125330127506, + "grad_norm": 426.4391784667969, + "learning_rate": 1.9078446803077415e-06, + "loss": 33.7539, + "step": 17283 + }, + { + "epoch": 0.16361071932299012, + "grad_norm": 426.05194091796875, + "learning_rate": 1.9078318246285976e-06, + "loss": 44.875, + "step": 17284 + }, + { + "epoch": 0.16362018534470518, + "grad_norm": 284.321044921875, + "learning_rate": 1.907818968096151e-06, + "loss": 33.8984, + "step": 17285 + }, + { + "epoch": 0.16362965136642024, + "grad_norm": 315.1356201171875, + "learning_rate": 1.9078061107104134e-06, + "loss": 18.7188, + "step": 17286 + }, + { + "epoch": 0.16363911738813527, + "grad_norm": 204.96136474609375, + "learning_rate": 1.9077932524713974e-06, + "loss": 17.7734, + "step": 17287 + }, + { + "epoch": 0.16364858340985033, + "grad_norm": 539.9306030273438, + "learning_rate": 1.9077803933791143e-06, + "loss": 25.3867, + "step": 17288 + }, + { + "epoch": 0.1636580494315654, + "grad_norm": 325.587646484375, + "learning_rate": 1.907767533433577e-06, + "loss": 20.8438, + "step": 17289 + }, + { + "epoch": 0.16366751545328045, + "grad_norm": 729.2837524414062, + "learning_rate": 1.9077546726347975e-06, + "loss": 55.5312, + "step": 17290 + }, + { + "epoch": 0.16367698147499551, + "grad_norm": 150.3682403564453, + "learning_rate": 1.907741810982787e-06, + "loss": 18.6719, + "step": 17291 + }, + { + "epoch": 0.16368644749671055, + "grad_norm": 278.2935791015625, + "learning_rate": 1.907728948477559e-06, + "loss": 19.1094, + "step": 17292 + }, + { + "epoch": 0.1636959135184256, + "grad_norm": 671.7658081054688, + "learning_rate": 1.9077160851191244e-06, + "loss": 10.5039, + "step": 17293 + }, + { + "epoch": 0.16370537954014067, + "grad_norm": 379.17999267578125, + "learning_rate": 1.907703220907496e-06, + "loss": 47.2031, + "step": 17294 + }, + { + "epoch": 0.16371484556185573, + "grad_norm": 282.73321533203125, + "learning_rate": 1.9076903558426854e-06, + "loss": 16.5156, + "step": 17295 + }, + { + "epoch": 0.1637243115835708, + "grad_norm": 2127.893798828125, + "learning_rate": 1.907677489924705e-06, + "loss": 42.9297, + "step": 17296 + }, + { + "epoch": 0.16373377760528582, + "grad_norm": 3.3144431114196777, + "learning_rate": 1.9076646231535667e-06, + "loss": 1.0825, + "step": 17297 + }, + { + "epoch": 0.16374324362700088, + "grad_norm": 576.4899291992188, + "learning_rate": 1.907651755529283e-06, + "loss": 27.4375, + "step": 17298 + }, + { + "epoch": 0.16375270964871594, + "grad_norm": 161.1691131591797, + "learning_rate": 1.907638887051865e-06, + "loss": 20.8828, + "step": 17299 + }, + { + "epoch": 0.163762175670431, + "grad_norm": 2.8430819511413574, + "learning_rate": 1.907626017721326e-06, + "loss": 0.8096, + "step": 17300 + }, + { + "epoch": 0.16377164169214603, + "grad_norm": 335.9741516113281, + "learning_rate": 1.9076131475376776e-06, + "loss": 22.9102, + "step": 17301 + }, + { + "epoch": 0.1637811077138611, + "grad_norm": 392.5373229980469, + "learning_rate": 1.9076002765009322e-06, + "loss": 30.9531, + "step": 17302 + }, + { + "epoch": 0.16379057373557615, + "grad_norm": 522.0945434570312, + "learning_rate": 1.907587404611101e-06, + "loss": 44.0312, + "step": 17303 + }, + { + "epoch": 0.1638000397572912, + "grad_norm": 208.47308349609375, + "learning_rate": 1.9075745318681967e-06, + "loss": 17.5898, + "step": 17304 + }, + { + "epoch": 0.16380950577900627, + "grad_norm": 454.6498718261719, + "learning_rate": 1.907561658272232e-06, + "loss": 23.2109, + "step": 17305 + }, + { + "epoch": 0.1638189718007213, + "grad_norm": 738.67138671875, + "learning_rate": 1.9075487838232178e-06, + "loss": 42.0781, + "step": 17306 + }, + { + "epoch": 0.16382843782243636, + "grad_norm": 3.1086196899414062, + "learning_rate": 1.907535908521167e-06, + "loss": 1.0962, + "step": 17307 + }, + { + "epoch": 0.16383790384415142, + "grad_norm": 653.8179321289062, + "learning_rate": 1.9075230323660915e-06, + "loss": 22.9844, + "step": 17308 + }, + { + "epoch": 0.16384736986586648, + "grad_norm": 724.3300170898438, + "learning_rate": 1.907510155358003e-06, + "loss": 46.2969, + "step": 17309 + }, + { + "epoch": 0.16385683588758151, + "grad_norm": 477.46240234375, + "learning_rate": 1.9074972774969145e-06, + "loss": 33.7031, + "step": 17310 + }, + { + "epoch": 0.16386630190929657, + "grad_norm": 1505.8099365234375, + "learning_rate": 1.9074843987828375e-06, + "loss": 43.7188, + "step": 17311 + }, + { + "epoch": 0.16387576793101163, + "grad_norm": 189.4551544189453, + "learning_rate": 1.907471519215784e-06, + "loss": 19.4375, + "step": 17312 + }, + { + "epoch": 0.1638852339527267, + "grad_norm": 3.7274160385131836, + "learning_rate": 1.9074586387957663e-06, + "loss": 0.9453, + "step": 17313 + }, + { + "epoch": 0.16389469997444175, + "grad_norm": 207.0487823486328, + "learning_rate": 1.907445757522797e-06, + "loss": 17.1484, + "step": 17314 + }, + { + "epoch": 0.1639041659961568, + "grad_norm": 770.6456909179688, + "learning_rate": 1.907432875396887e-06, + "loss": 43.9062, + "step": 17315 + }, + { + "epoch": 0.16391363201787185, + "grad_norm": 135.98394775390625, + "learning_rate": 1.9074199924180496e-06, + "loss": 21.1562, + "step": 17316 + }, + { + "epoch": 0.1639230980395869, + "grad_norm": 183.80084228515625, + "learning_rate": 1.9074071085862964e-06, + "loss": 16.6172, + "step": 17317 + }, + { + "epoch": 0.16393256406130197, + "grad_norm": 213.6418914794922, + "learning_rate": 1.9073942239016395e-06, + "loss": 21.918, + "step": 17318 + }, + { + "epoch": 0.163942030083017, + "grad_norm": 653.19287109375, + "learning_rate": 1.9073813383640908e-06, + "loss": 37.1094, + "step": 17319 + }, + { + "epoch": 0.16395149610473206, + "grad_norm": 172.4814453125, + "learning_rate": 1.907368451973663e-06, + "loss": 7.8789, + "step": 17320 + }, + { + "epoch": 0.16396096212644712, + "grad_norm": 355.8941345214844, + "learning_rate": 1.907355564730368e-06, + "loss": 17.3906, + "step": 17321 + }, + { + "epoch": 0.16397042814816218, + "grad_norm": 264.2322082519531, + "learning_rate": 1.9073426766342173e-06, + "loss": 26.4844, + "step": 17322 + }, + { + "epoch": 0.16397989416987724, + "grad_norm": 3.1622257232666016, + "learning_rate": 1.907329787685224e-06, + "loss": 0.9995, + "step": 17323 + }, + { + "epoch": 0.16398936019159227, + "grad_norm": 217.45404052734375, + "learning_rate": 1.9073168978833994e-06, + "loss": 11.166, + "step": 17324 + }, + { + "epoch": 0.16399882621330733, + "grad_norm": 3.4124906063079834, + "learning_rate": 1.907304007228756e-06, + "loss": 0.9761, + "step": 17325 + }, + { + "epoch": 0.1640082922350224, + "grad_norm": 240.8367156982422, + "learning_rate": 1.9072911157213061e-06, + "loss": 16.4297, + "step": 17326 + }, + { + "epoch": 0.16401775825673745, + "grad_norm": 372.6999206542969, + "learning_rate": 1.9072782233610614e-06, + "loss": 14.5039, + "step": 17327 + }, + { + "epoch": 0.16402722427845248, + "grad_norm": 514.5304565429688, + "learning_rate": 1.9072653301480342e-06, + "loss": 15.5488, + "step": 17328 + }, + { + "epoch": 0.16403669030016754, + "grad_norm": 300.5985107421875, + "learning_rate": 1.9072524360822367e-06, + "loss": 18.5156, + "step": 17329 + }, + { + "epoch": 0.1640461563218826, + "grad_norm": 236.85272216796875, + "learning_rate": 1.9072395411636805e-06, + "loss": 20.3398, + "step": 17330 + }, + { + "epoch": 0.16405562234359766, + "grad_norm": 254.76138305664062, + "learning_rate": 1.9072266453923786e-06, + "loss": 25.3984, + "step": 17331 + }, + { + "epoch": 0.16406508836531272, + "grad_norm": 563.8883666992188, + "learning_rate": 1.9072137487683423e-06, + "loss": 33.0781, + "step": 17332 + }, + { + "epoch": 0.16407455438702775, + "grad_norm": 1606.2283935546875, + "learning_rate": 1.9072008512915847e-06, + "loss": 36.5703, + "step": 17333 + }, + { + "epoch": 0.16408402040874281, + "grad_norm": 202.05392456054688, + "learning_rate": 1.9071879529621168e-06, + "loss": 23.0156, + "step": 17334 + }, + { + "epoch": 0.16409348643045787, + "grad_norm": 708.4649047851562, + "learning_rate": 1.9071750537799515e-06, + "loss": 38.7188, + "step": 17335 + }, + { + "epoch": 0.16410295245217293, + "grad_norm": 191.0648956298828, + "learning_rate": 1.9071621537451005e-06, + "loss": 22.6797, + "step": 17336 + }, + { + "epoch": 0.16411241847388797, + "grad_norm": 391.1949157714844, + "learning_rate": 1.907149252857576e-06, + "loss": 43.7188, + "step": 17337 + }, + { + "epoch": 0.16412188449560303, + "grad_norm": 569.3390502929688, + "learning_rate": 1.9071363511173905e-06, + "loss": 24.3359, + "step": 17338 + }, + { + "epoch": 0.1641313505173181, + "grad_norm": 156.3509979248047, + "learning_rate": 1.9071234485245557e-06, + "loss": 17.2266, + "step": 17339 + }, + { + "epoch": 0.16414081653903315, + "grad_norm": 479.2027282714844, + "learning_rate": 1.9071105450790838e-06, + "loss": 26.9453, + "step": 17340 + }, + { + "epoch": 0.1641502825607482, + "grad_norm": 615.8892822265625, + "learning_rate": 1.907097640780987e-06, + "loss": 49.2812, + "step": 17341 + }, + { + "epoch": 0.16415974858246324, + "grad_norm": 3.2099802494049072, + "learning_rate": 1.9070847356302778e-06, + "loss": 0.9307, + "step": 17342 + }, + { + "epoch": 0.1641692146041783, + "grad_norm": 377.12689208984375, + "learning_rate": 1.9070718296269678e-06, + "loss": 53.3594, + "step": 17343 + }, + { + "epoch": 0.16417868062589336, + "grad_norm": 205.2611846923828, + "learning_rate": 1.907058922771069e-06, + "loss": 16.6641, + "step": 17344 + }, + { + "epoch": 0.16418814664760842, + "grad_norm": 3.501997947692871, + "learning_rate": 1.907046015062594e-06, + "loss": 0.8491, + "step": 17345 + }, + { + "epoch": 0.16419761266932345, + "grad_norm": 664.63232421875, + "learning_rate": 1.9070331065015548e-06, + "loss": 28.5781, + "step": 17346 + }, + { + "epoch": 0.1642070786910385, + "grad_norm": 622.3645629882812, + "learning_rate": 1.9070201970879633e-06, + "loss": 26.375, + "step": 17347 + }, + { + "epoch": 0.16421654471275357, + "grad_norm": 348.4555969238281, + "learning_rate": 1.907007286821832e-06, + "loss": 32.4844, + "step": 17348 + }, + { + "epoch": 0.16422601073446863, + "grad_norm": 399.59759521484375, + "learning_rate": 1.9069943757031728e-06, + "loss": 10.6992, + "step": 17349 + }, + { + "epoch": 0.1642354767561837, + "grad_norm": 565.074951171875, + "learning_rate": 1.906981463731998e-06, + "loss": 35.6875, + "step": 17350 + }, + { + "epoch": 0.16424494277789872, + "grad_norm": 193.03909301757812, + "learning_rate": 1.9069685509083193e-06, + "loss": 20.3477, + "step": 17351 + }, + { + "epoch": 0.16425440879961378, + "grad_norm": 417.5644836425781, + "learning_rate": 1.9069556372321495e-06, + "loss": 21.7891, + "step": 17352 + }, + { + "epoch": 0.16426387482132884, + "grad_norm": 3.0022530555725098, + "learning_rate": 1.9069427227035001e-06, + "loss": 0.9678, + "step": 17353 + }, + { + "epoch": 0.1642733408430439, + "grad_norm": 338.07080078125, + "learning_rate": 1.9069298073223838e-06, + "loss": 19.6484, + "step": 17354 + }, + { + "epoch": 0.16428280686475893, + "grad_norm": 414.6466979980469, + "learning_rate": 1.9069168910888123e-06, + "loss": 8.9141, + "step": 17355 + }, + { + "epoch": 0.164292272886474, + "grad_norm": 453.0166931152344, + "learning_rate": 1.906903974002798e-06, + "loss": 41.3438, + "step": 17356 + }, + { + "epoch": 0.16430173890818905, + "grad_norm": 497.8065490722656, + "learning_rate": 1.9068910560643532e-06, + "loss": 32.5391, + "step": 17357 + }, + { + "epoch": 0.16431120492990411, + "grad_norm": 347.42791748046875, + "learning_rate": 1.9068781372734897e-06, + "loss": 46.8125, + "step": 17358 + }, + { + "epoch": 0.16432067095161917, + "grad_norm": 194.52601623535156, + "learning_rate": 1.9068652176302194e-06, + "loss": 20.6484, + "step": 17359 + }, + { + "epoch": 0.1643301369733342, + "grad_norm": 282.4821472167969, + "learning_rate": 1.9068522971345552e-06, + "loss": 20.8438, + "step": 17360 + }, + { + "epoch": 0.16433960299504927, + "grad_norm": 778.1165771484375, + "learning_rate": 1.9068393757865087e-06, + "loss": 10.9336, + "step": 17361 + }, + { + "epoch": 0.16434906901676433, + "grad_norm": 780.0211791992188, + "learning_rate": 1.906826453586092e-06, + "loss": 36.3047, + "step": 17362 + }, + { + "epoch": 0.1643585350384794, + "grad_norm": 3.098611354827881, + "learning_rate": 1.9068135305333176e-06, + "loss": 0.9795, + "step": 17363 + }, + { + "epoch": 0.16436800106019442, + "grad_norm": 539.475341796875, + "learning_rate": 1.9068006066281975e-06, + "loss": 52.4688, + "step": 17364 + }, + { + "epoch": 0.16437746708190948, + "grad_norm": 203.83750915527344, + "learning_rate": 1.9067876818707437e-06, + "loss": 8.0547, + "step": 17365 + }, + { + "epoch": 0.16438693310362454, + "grad_norm": 406.5920715332031, + "learning_rate": 1.9067747562609683e-06, + "loss": 28.7266, + "step": 17366 + }, + { + "epoch": 0.1643963991253396, + "grad_norm": 236.96009826660156, + "learning_rate": 1.906761829798884e-06, + "loss": 19.4609, + "step": 17367 + }, + { + "epoch": 0.16440586514705466, + "grad_norm": 192.86683654785156, + "learning_rate": 1.9067489024845025e-06, + "loss": 20.8477, + "step": 17368 + }, + { + "epoch": 0.1644153311687697, + "grad_norm": 517.0897216796875, + "learning_rate": 1.906735974317836e-06, + "loss": 50.3906, + "step": 17369 + }, + { + "epoch": 0.16442479719048475, + "grad_norm": 2.8282814025878906, + "learning_rate": 1.9067230452988965e-06, + "loss": 0.9146, + "step": 17370 + }, + { + "epoch": 0.1644342632121998, + "grad_norm": 260.5337829589844, + "learning_rate": 1.9067101154276966e-06, + "loss": 30.4766, + "step": 17371 + }, + { + "epoch": 0.16444372923391487, + "grad_norm": 730.047607421875, + "learning_rate": 1.9066971847042478e-06, + "loss": 24.8281, + "step": 17372 + }, + { + "epoch": 0.1644531952556299, + "grad_norm": 288.7608947753906, + "learning_rate": 1.906684253128563e-06, + "loss": 25.1055, + "step": 17373 + }, + { + "epoch": 0.16446266127734496, + "grad_norm": 600.3643188476562, + "learning_rate": 1.9066713207006538e-06, + "loss": 45.1094, + "step": 17374 + }, + { + "epoch": 0.16447212729906002, + "grad_norm": 475.83868408203125, + "learning_rate": 1.9066583874205326e-06, + "loss": 58.4062, + "step": 17375 + }, + { + "epoch": 0.16448159332077508, + "grad_norm": 540.615478515625, + "learning_rate": 1.9066454532882115e-06, + "loss": 20.4453, + "step": 17376 + }, + { + "epoch": 0.16449105934249014, + "grad_norm": 559.2560424804688, + "learning_rate": 1.9066325183037027e-06, + "loss": 56.2891, + "step": 17377 + }, + { + "epoch": 0.16450052536420517, + "grad_norm": 2.8869452476501465, + "learning_rate": 1.9066195824670182e-06, + "loss": 0.8149, + "step": 17378 + }, + { + "epoch": 0.16450999138592023, + "grad_norm": 244.72402954101562, + "learning_rate": 1.9066066457781702e-06, + "loss": 15.7969, + "step": 17379 + }, + { + "epoch": 0.1645194574076353, + "grad_norm": 465.4638366699219, + "learning_rate": 1.906593708237171e-06, + "loss": 29.9609, + "step": 17380 + }, + { + "epoch": 0.16452892342935035, + "grad_norm": 534.8636474609375, + "learning_rate": 1.9065807698440329e-06, + "loss": 19.1406, + "step": 17381 + }, + { + "epoch": 0.16453838945106541, + "grad_norm": 312.7527770996094, + "learning_rate": 1.9065678305987677e-06, + "loss": 18.6875, + "step": 17382 + }, + { + "epoch": 0.16454785547278045, + "grad_norm": 713.9063110351562, + "learning_rate": 1.9065548905013875e-06, + "loss": 28.6406, + "step": 17383 + }, + { + "epoch": 0.1645573214944955, + "grad_norm": 254.88821411132812, + "learning_rate": 1.9065419495519048e-06, + "loss": 24.3281, + "step": 17384 + }, + { + "epoch": 0.16456678751621057, + "grad_norm": 470.61175537109375, + "learning_rate": 1.9065290077503318e-06, + "loss": 8.3789, + "step": 17385 + }, + { + "epoch": 0.16457625353792563, + "grad_norm": 374.0289611816406, + "learning_rate": 1.9065160650966808e-06, + "loss": 21.7266, + "step": 17386 + }, + { + "epoch": 0.16458571955964066, + "grad_norm": 476.5569152832031, + "learning_rate": 1.9065031215909631e-06, + "loss": 20.5781, + "step": 17387 + }, + { + "epoch": 0.16459518558135572, + "grad_norm": 429.81427001953125, + "learning_rate": 1.9064901772331917e-06, + "loss": 33.25, + "step": 17388 + }, + { + "epoch": 0.16460465160307078, + "grad_norm": 170.4853973388672, + "learning_rate": 1.9064772320233784e-06, + "loss": 15.668, + "step": 17389 + }, + { + "epoch": 0.16461411762478584, + "grad_norm": 3.144179582595825, + "learning_rate": 1.9064642859615355e-06, + "loss": 0.9189, + "step": 17390 + }, + { + "epoch": 0.1646235836465009, + "grad_norm": 390.4148254394531, + "learning_rate": 1.906451339047675e-06, + "loss": 29.3047, + "step": 17391 + }, + { + "epoch": 0.16463304966821593, + "grad_norm": 277.05999755859375, + "learning_rate": 1.9064383912818097e-06, + "loss": 18.1016, + "step": 17392 + }, + { + "epoch": 0.164642515689931, + "grad_norm": 283.0719909667969, + "learning_rate": 1.9064254426639508e-06, + "loss": 25.3516, + "step": 17393 + }, + { + "epoch": 0.16465198171164605, + "grad_norm": 536.8934936523438, + "learning_rate": 1.906412493194111e-06, + "loss": 35.1484, + "step": 17394 + }, + { + "epoch": 0.1646614477333611, + "grad_norm": 465.54437255859375, + "learning_rate": 1.9063995428723026e-06, + "loss": 35.25, + "step": 17395 + }, + { + "epoch": 0.16467091375507614, + "grad_norm": 192.22854614257812, + "learning_rate": 1.9063865916985375e-06, + "loss": 15.8906, + "step": 17396 + }, + { + "epoch": 0.1646803797767912, + "grad_norm": 238.4422149658203, + "learning_rate": 1.9063736396728278e-06, + "loss": 24.4531, + "step": 17397 + }, + { + "epoch": 0.16468984579850626, + "grad_norm": 245.88458251953125, + "learning_rate": 1.9063606867951863e-06, + "loss": 19.375, + "step": 17398 + }, + { + "epoch": 0.16469931182022132, + "grad_norm": 522.15771484375, + "learning_rate": 1.9063477330656244e-06, + "loss": 22.9414, + "step": 17399 + }, + { + "epoch": 0.16470877784193638, + "grad_norm": 409.5943603515625, + "learning_rate": 1.9063347784841545e-06, + "loss": 40.375, + "step": 17400 + }, + { + "epoch": 0.16471824386365141, + "grad_norm": 187.69285583496094, + "learning_rate": 1.9063218230507889e-06, + "loss": 17.8828, + "step": 17401 + }, + { + "epoch": 0.16472770988536647, + "grad_norm": 672.754638671875, + "learning_rate": 1.90630886676554e-06, + "loss": 37.8672, + "step": 17402 + }, + { + "epoch": 0.16473717590708153, + "grad_norm": 438.0321960449219, + "learning_rate": 1.9062959096284196e-06, + "loss": 45.5469, + "step": 17403 + }, + { + "epoch": 0.1647466419287966, + "grad_norm": 357.0373840332031, + "learning_rate": 1.9062829516394402e-06, + "loss": 7.1641, + "step": 17404 + }, + { + "epoch": 0.16475610795051163, + "grad_norm": 794.4812622070312, + "learning_rate": 1.9062699927986134e-06, + "loss": 42.8203, + "step": 17405 + }, + { + "epoch": 0.1647655739722267, + "grad_norm": 440.9702453613281, + "learning_rate": 1.9062570331059517e-06, + "loss": 37.7188, + "step": 17406 + }, + { + "epoch": 0.16477503999394175, + "grad_norm": 484.17095947265625, + "learning_rate": 1.9062440725614677e-06, + "loss": 38.3594, + "step": 17407 + }, + { + "epoch": 0.1647845060156568, + "grad_norm": 386.2882080078125, + "learning_rate": 1.906231111165173e-06, + "loss": 36.8594, + "step": 17408 + }, + { + "epoch": 0.16479397203737187, + "grad_norm": 690.023681640625, + "learning_rate": 1.9062181489170803e-06, + "loss": 36.1562, + "step": 17409 + }, + { + "epoch": 0.1648034380590869, + "grad_norm": 206.55955505371094, + "learning_rate": 1.9062051858172012e-06, + "loss": 18.4453, + "step": 17410 + }, + { + "epoch": 0.16481290408080196, + "grad_norm": 335.5829772949219, + "learning_rate": 1.9061922218655484e-06, + "loss": 40.5938, + "step": 17411 + }, + { + "epoch": 0.16482237010251702, + "grad_norm": 459.7207336425781, + "learning_rate": 1.9061792570621336e-06, + "loss": 37.3594, + "step": 17412 + }, + { + "epoch": 0.16483183612423208, + "grad_norm": 454.43389892578125, + "learning_rate": 1.9061662914069693e-06, + "loss": 19.2891, + "step": 17413 + }, + { + "epoch": 0.1648413021459471, + "grad_norm": 176.02877807617188, + "learning_rate": 1.906153324900068e-06, + "loss": 20.3516, + "step": 17414 + }, + { + "epoch": 0.16485076816766217, + "grad_norm": 220.8588409423828, + "learning_rate": 1.906140357541441e-06, + "loss": 20.0391, + "step": 17415 + }, + { + "epoch": 0.16486023418937723, + "grad_norm": 892.9717407226562, + "learning_rate": 1.9061273893311014e-06, + "loss": 32.7656, + "step": 17416 + }, + { + "epoch": 0.1648697002110923, + "grad_norm": 200.55337524414062, + "learning_rate": 1.906114420269061e-06, + "loss": 20.5312, + "step": 17417 + }, + { + "epoch": 0.16487916623280735, + "grad_norm": 527.3665771484375, + "learning_rate": 1.9061014503553316e-06, + "loss": 32.9922, + "step": 17418 + }, + { + "epoch": 0.16488863225452238, + "grad_norm": 336.8902893066406, + "learning_rate": 1.9060884795899261e-06, + "loss": 16.9688, + "step": 17419 + }, + { + "epoch": 0.16489809827623744, + "grad_norm": 349.04949951171875, + "learning_rate": 1.9060755079728563e-06, + "loss": 28.6719, + "step": 17420 + }, + { + "epoch": 0.1649075642979525, + "grad_norm": 301.9931945800781, + "learning_rate": 1.9060625355041347e-06, + "loss": 31.7109, + "step": 17421 + }, + { + "epoch": 0.16491703031966756, + "grad_norm": 310.0751037597656, + "learning_rate": 1.906049562183773e-06, + "loss": 15.2266, + "step": 17422 + }, + { + "epoch": 0.1649264963413826, + "grad_norm": 229.2127227783203, + "learning_rate": 1.9060365880117835e-06, + "loss": 14.2266, + "step": 17423 + }, + { + "epoch": 0.16493596236309765, + "grad_norm": 578.4375, + "learning_rate": 1.9060236129881789e-06, + "loss": 38.4844, + "step": 17424 + }, + { + "epoch": 0.16494542838481271, + "grad_norm": 725.2532348632812, + "learning_rate": 1.9060106371129707e-06, + "loss": 20.9219, + "step": 17425 + }, + { + "epoch": 0.16495489440652777, + "grad_norm": 635.6505737304688, + "learning_rate": 1.9059976603861717e-06, + "loss": 22.4102, + "step": 17426 + }, + { + "epoch": 0.16496436042824283, + "grad_norm": 491.5859069824219, + "learning_rate": 1.905984682807794e-06, + "loss": 24.0859, + "step": 17427 + }, + { + "epoch": 0.16497382644995787, + "grad_norm": 261.7463073730469, + "learning_rate": 1.9059717043778492e-06, + "loss": 23.4453, + "step": 17428 + }, + { + "epoch": 0.16498329247167293, + "grad_norm": 3.218956708908081, + "learning_rate": 1.90595872509635e-06, + "loss": 0.9678, + "step": 17429 + }, + { + "epoch": 0.164992758493388, + "grad_norm": 367.6520080566406, + "learning_rate": 1.9059457449633087e-06, + "loss": 18.6406, + "step": 17430 + }, + { + "epoch": 0.16500222451510305, + "grad_norm": 202.3921356201172, + "learning_rate": 1.9059327639787374e-06, + "loss": 23.7656, + "step": 17431 + }, + { + "epoch": 0.16501169053681808, + "grad_norm": 294.8594970703125, + "learning_rate": 1.9059197821426482e-06, + "loss": 18.6055, + "step": 17432 + }, + { + "epoch": 0.16502115655853314, + "grad_norm": 2.6194286346435547, + "learning_rate": 1.9059067994550532e-06, + "loss": 0.8018, + "step": 17433 + }, + { + "epoch": 0.1650306225802482, + "grad_norm": 1078.1204833984375, + "learning_rate": 1.9058938159159649e-06, + "loss": 45.5156, + "step": 17434 + }, + { + "epoch": 0.16504008860196326, + "grad_norm": 684.2207641601562, + "learning_rate": 1.9058808315253953e-06, + "loss": 40.3438, + "step": 17435 + }, + { + "epoch": 0.16504955462367832, + "grad_norm": 2.8628129959106445, + "learning_rate": 1.9058678462833563e-06, + "loss": 0.8311, + "step": 17436 + }, + { + "epoch": 0.16505902064539335, + "grad_norm": 431.0395812988281, + "learning_rate": 1.905854860189861e-06, + "loss": 24.7812, + "step": 17437 + }, + { + "epoch": 0.1650684866671084, + "grad_norm": 504.46917724609375, + "learning_rate": 1.9058418732449208e-06, + "loss": 48.75, + "step": 17438 + }, + { + "epoch": 0.16507795268882347, + "grad_norm": 928.3423461914062, + "learning_rate": 1.9058288854485483e-06, + "loss": 66.8281, + "step": 17439 + }, + { + "epoch": 0.16508741871053853, + "grad_norm": 189.87057495117188, + "learning_rate": 1.9058158968007554e-06, + "loss": 21.3477, + "step": 17440 + }, + { + "epoch": 0.16509688473225356, + "grad_norm": 406.949462890625, + "learning_rate": 1.9058029073015546e-06, + "loss": 20.6016, + "step": 17441 + }, + { + "epoch": 0.16510635075396862, + "grad_norm": 579.3365478515625, + "learning_rate": 1.9057899169509578e-06, + "loss": 47.0469, + "step": 17442 + }, + { + "epoch": 0.16511581677568368, + "grad_norm": 241.85130310058594, + "learning_rate": 1.9057769257489777e-06, + "loss": 17.2734, + "step": 17443 + }, + { + "epoch": 0.16512528279739874, + "grad_norm": 300.6641540527344, + "learning_rate": 1.905763933695626e-06, + "loss": 22.3984, + "step": 17444 + }, + { + "epoch": 0.1651347488191138, + "grad_norm": 364.77325439453125, + "learning_rate": 1.905750940790915e-06, + "loss": 9.8848, + "step": 17445 + }, + { + "epoch": 0.16514421484082883, + "grad_norm": 145.34217834472656, + "learning_rate": 1.9057379470348577e-06, + "loss": 19.7188, + "step": 17446 + }, + { + "epoch": 0.1651536808625439, + "grad_norm": 260.9282531738281, + "learning_rate": 1.905724952427465e-06, + "loss": 21.8555, + "step": 17447 + }, + { + "epoch": 0.16516314688425895, + "grad_norm": 475.71258544921875, + "learning_rate": 1.90571195696875e-06, + "loss": 39.3906, + "step": 17448 + }, + { + "epoch": 0.16517261290597401, + "grad_norm": 364.8771057128906, + "learning_rate": 1.9056989606587247e-06, + "loss": 50.5938, + "step": 17449 + }, + { + "epoch": 0.16518207892768905, + "grad_norm": 382.34381103515625, + "learning_rate": 1.9056859634974013e-06, + "loss": 28.5234, + "step": 17450 + }, + { + "epoch": 0.1651915449494041, + "grad_norm": 598.556884765625, + "learning_rate": 1.9056729654847918e-06, + "loss": 44.918, + "step": 17451 + }, + { + "epoch": 0.16520101097111917, + "grad_norm": 533.8781127929688, + "learning_rate": 1.9056599666209087e-06, + "loss": 52.2188, + "step": 17452 + }, + { + "epoch": 0.16521047699283423, + "grad_norm": 206.83224487304688, + "learning_rate": 1.9056469669057642e-06, + "loss": 9.6406, + "step": 17453 + }, + { + "epoch": 0.1652199430145493, + "grad_norm": 658.0169677734375, + "learning_rate": 1.9056339663393705e-06, + "loss": 45.4375, + "step": 17454 + }, + { + "epoch": 0.16522940903626432, + "grad_norm": 475.63531494140625, + "learning_rate": 1.9056209649217398e-06, + "loss": 16.6406, + "step": 17455 + }, + { + "epoch": 0.16523887505797938, + "grad_norm": 361.7275085449219, + "learning_rate": 1.9056079626528842e-06, + "loss": 31.4688, + "step": 17456 + }, + { + "epoch": 0.16524834107969444, + "grad_norm": 426.32855224609375, + "learning_rate": 1.9055949595328164e-06, + "loss": 19.9023, + "step": 17457 + }, + { + "epoch": 0.1652578071014095, + "grad_norm": 554.1663208007812, + "learning_rate": 1.9055819555615478e-06, + "loss": 36.2656, + "step": 17458 + }, + { + "epoch": 0.16526727312312453, + "grad_norm": 352.1636047363281, + "learning_rate": 1.9055689507390913e-06, + "loss": 24.0391, + "step": 17459 + }, + { + "epoch": 0.1652767391448396, + "grad_norm": 2457.297119140625, + "learning_rate": 1.905555945065459e-06, + "loss": 40.0625, + "step": 17460 + }, + { + "epoch": 0.16528620516655465, + "grad_norm": 172.90989685058594, + "learning_rate": 1.9055429385406627e-06, + "loss": 17.5742, + "step": 17461 + }, + { + "epoch": 0.1652956711882697, + "grad_norm": 284.2088623046875, + "learning_rate": 1.9055299311647151e-06, + "loss": 19.5547, + "step": 17462 + }, + { + "epoch": 0.16530513720998477, + "grad_norm": 691.6528930664062, + "learning_rate": 1.9055169229376284e-06, + "loss": 54.3906, + "step": 17463 + }, + { + "epoch": 0.1653146032316998, + "grad_norm": 281.00018310546875, + "learning_rate": 1.9055039138594146e-06, + "loss": 17.6562, + "step": 17464 + }, + { + "epoch": 0.16532406925341486, + "grad_norm": 225.76121520996094, + "learning_rate": 1.905490903930086e-06, + "loss": 21.6875, + "step": 17465 + }, + { + "epoch": 0.16533353527512992, + "grad_norm": 437.7471008300781, + "learning_rate": 1.9054778931496548e-06, + "loss": 19.2891, + "step": 17466 + }, + { + "epoch": 0.16534300129684498, + "grad_norm": 300.2461242675781, + "learning_rate": 1.9054648815181334e-06, + "loss": 19.7031, + "step": 17467 + }, + { + "epoch": 0.16535246731856004, + "grad_norm": 683.804931640625, + "learning_rate": 1.9054518690355339e-06, + "loss": 41.8047, + "step": 17468 + }, + { + "epoch": 0.16536193334027507, + "grad_norm": 455.3758544921875, + "learning_rate": 1.9054388557018687e-06, + "loss": 45.7812, + "step": 17469 + }, + { + "epoch": 0.16537139936199013, + "grad_norm": 216.4862518310547, + "learning_rate": 1.9054258415171494e-06, + "loss": 24.9609, + "step": 17470 + }, + { + "epoch": 0.1653808653837052, + "grad_norm": 261.44964599609375, + "learning_rate": 1.905412826481389e-06, + "loss": 30.0, + "step": 17471 + }, + { + "epoch": 0.16539033140542025, + "grad_norm": 379.0142517089844, + "learning_rate": 1.9053998105945995e-06, + "loss": 23.6172, + "step": 17472 + }, + { + "epoch": 0.1653997974271353, + "grad_norm": 981.3734130859375, + "learning_rate": 1.9053867938567932e-06, + "loss": 27.3203, + "step": 17473 + }, + { + "epoch": 0.16540926344885035, + "grad_norm": 384.907958984375, + "learning_rate": 1.9053737762679817e-06, + "loss": 19.3047, + "step": 17474 + }, + { + "epoch": 0.1654187294705654, + "grad_norm": 2.8409032821655273, + "learning_rate": 1.9053607578281783e-06, + "loss": 0.9224, + "step": 17475 + }, + { + "epoch": 0.16542819549228047, + "grad_norm": 216.22903442382812, + "learning_rate": 1.9053477385373945e-06, + "loss": 20.3203, + "step": 17476 + }, + { + "epoch": 0.16543766151399553, + "grad_norm": 175.42303466796875, + "learning_rate": 1.9053347183956427e-06, + "loss": 14.1797, + "step": 17477 + }, + { + "epoch": 0.16544712753571056, + "grad_norm": 249.78160095214844, + "learning_rate": 1.905321697402935e-06, + "loss": 24.0312, + "step": 17478 + }, + { + "epoch": 0.16545659355742562, + "grad_norm": 483.0263366699219, + "learning_rate": 1.905308675559284e-06, + "loss": 37.7266, + "step": 17479 + }, + { + "epoch": 0.16546605957914068, + "grad_norm": 361.2465515136719, + "learning_rate": 1.9052956528647017e-06, + "loss": 29.8125, + "step": 17480 + }, + { + "epoch": 0.16547552560085574, + "grad_norm": 254.82867431640625, + "learning_rate": 1.9052826293192003e-06, + "loss": 18.4062, + "step": 17481 + }, + { + "epoch": 0.16548499162257077, + "grad_norm": 308.0985412597656, + "learning_rate": 1.9052696049227923e-06, + "loss": 26.5938, + "step": 17482 + }, + { + "epoch": 0.16549445764428583, + "grad_norm": 567.3588256835938, + "learning_rate": 1.9052565796754899e-06, + "loss": 52.8281, + "step": 17483 + }, + { + "epoch": 0.1655039236660009, + "grad_norm": 1368.41015625, + "learning_rate": 1.9052435535773047e-06, + "loss": 54.3711, + "step": 17484 + }, + { + "epoch": 0.16551338968771595, + "grad_norm": 441.4949645996094, + "learning_rate": 1.9052305266282498e-06, + "loss": 40.1953, + "step": 17485 + }, + { + "epoch": 0.165522855709431, + "grad_norm": 166.63502502441406, + "learning_rate": 1.905217498828337e-06, + "loss": 13.1602, + "step": 17486 + }, + { + "epoch": 0.16553232173114604, + "grad_norm": 269.75360107421875, + "learning_rate": 1.9052044701775784e-06, + "loss": 24.8438, + "step": 17487 + }, + { + "epoch": 0.1655417877528611, + "grad_norm": 1941.5811767578125, + "learning_rate": 1.905191440675987e-06, + "loss": 51.9297, + "step": 17488 + }, + { + "epoch": 0.16555125377457616, + "grad_norm": 260.94146728515625, + "learning_rate": 1.9051784103235742e-06, + "loss": 17.918, + "step": 17489 + }, + { + "epoch": 0.16556071979629122, + "grad_norm": 422.0663146972656, + "learning_rate": 1.9051653791203527e-06, + "loss": 29.8203, + "step": 17490 + }, + { + "epoch": 0.16557018581800625, + "grad_norm": 223.78103637695312, + "learning_rate": 1.9051523470663344e-06, + "loss": 8.5352, + "step": 17491 + }, + { + "epoch": 0.16557965183972131, + "grad_norm": 552.6943359375, + "learning_rate": 1.905139314161532e-06, + "loss": 43.668, + "step": 17492 + }, + { + "epoch": 0.16558911786143637, + "grad_norm": 332.2611999511719, + "learning_rate": 1.9051262804059575e-06, + "loss": 30.4219, + "step": 17493 + }, + { + "epoch": 0.16559858388315143, + "grad_norm": 189.31246948242188, + "learning_rate": 1.9051132457996233e-06, + "loss": 21.6328, + "step": 17494 + }, + { + "epoch": 0.1656080499048665, + "grad_norm": 361.1471252441406, + "learning_rate": 1.9051002103425413e-06, + "loss": 26.7969, + "step": 17495 + }, + { + "epoch": 0.16561751592658153, + "grad_norm": 967.703125, + "learning_rate": 1.9050871740347242e-06, + "loss": 46.375, + "step": 17496 + }, + { + "epoch": 0.1656269819482966, + "grad_norm": 398.5499267578125, + "learning_rate": 1.9050741368761838e-06, + "loss": 43.5859, + "step": 17497 + }, + { + "epoch": 0.16563644797001165, + "grad_norm": 139.1147918701172, + "learning_rate": 1.9050610988669326e-06, + "loss": 13.5742, + "step": 17498 + }, + { + "epoch": 0.1656459139917267, + "grad_norm": 478.48516845703125, + "learning_rate": 1.9050480600069832e-06, + "loss": 45.5938, + "step": 17499 + }, + { + "epoch": 0.16565538001344174, + "grad_norm": 1298.537109375, + "learning_rate": 1.905035020296347e-06, + "loss": 63.25, + "step": 17500 + }, + { + "epoch": 0.1656648460351568, + "grad_norm": 408.89422607421875, + "learning_rate": 1.9050219797350372e-06, + "loss": 43.2969, + "step": 17501 + }, + { + "epoch": 0.16567431205687186, + "grad_norm": 208.45872497558594, + "learning_rate": 1.9050089383230653e-06, + "loss": 22.4688, + "step": 17502 + }, + { + "epoch": 0.16568377807858692, + "grad_norm": 736.5292358398438, + "learning_rate": 1.904995896060444e-06, + "loss": 38.6094, + "step": 17503 + }, + { + "epoch": 0.16569324410030198, + "grad_norm": 3.2901976108551025, + "learning_rate": 1.9049828529471855e-06, + "loss": 0.9219, + "step": 17504 + }, + { + "epoch": 0.165702710122017, + "grad_norm": 489.8011169433594, + "learning_rate": 1.904969808983302e-06, + "loss": 18.3672, + "step": 17505 + }, + { + "epoch": 0.16571217614373207, + "grad_norm": 560.5367431640625, + "learning_rate": 1.9049567641688055e-06, + "loss": 42.4062, + "step": 17506 + }, + { + "epoch": 0.16572164216544713, + "grad_norm": 551.5521850585938, + "learning_rate": 1.9049437185037086e-06, + "loss": 35.9844, + "step": 17507 + }, + { + "epoch": 0.1657311081871622, + "grad_norm": 430.78753662109375, + "learning_rate": 1.9049306719880236e-06, + "loss": 17.0938, + "step": 17508 + }, + { + "epoch": 0.16574057420887722, + "grad_norm": 369.551513671875, + "learning_rate": 1.9049176246217626e-06, + "loss": 15.9531, + "step": 17509 + }, + { + "epoch": 0.16575004023059228, + "grad_norm": 430.7459716796875, + "learning_rate": 1.9049045764049381e-06, + "loss": 18.3789, + "step": 17510 + }, + { + "epoch": 0.16575950625230734, + "grad_norm": 785.5707397460938, + "learning_rate": 1.9048915273375617e-06, + "loss": 52.7031, + "step": 17511 + }, + { + "epoch": 0.1657689722740224, + "grad_norm": 632.776123046875, + "learning_rate": 1.9048784774196465e-06, + "loss": 37.2031, + "step": 17512 + }, + { + "epoch": 0.16577843829573746, + "grad_norm": 375.1039123535156, + "learning_rate": 1.9048654266512043e-06, + "loss": 12.3984, + "step": 17513 + }, + { + "epoch": 0.1657879043174525, + "grad_norm": 524.6016235351562, + "learning_rate": 1.9048523750322473e-06, + "loss": 37.5938, + "step": 17514 + }, + { + "epoch": 0.16579737033916755, + "grad_norm": 260.9999084472656, + "learning_rate": 1.9048393225627883e-06, + "loss": 15.7695, + "step": 17515 + }, + { + "epoch": 0.16580683636088261, + "grad_norm": 1023.9266357421875, + "learning_rate": 1.904826269242839e-06, + "loss": 65.2344, + "step": 17516 + }, + { + "epoch": 0.16581630238259767, + "grad_norm": 2.9306912422180176, + "learning_rate": 1.9048132150724117e-06, + "loss": 0.7778, + "step": 17517 + }, + { + "epoch": 0.1658257684043127, + "grad_norm": 3.321288824081421, + "learning_rate": 1.904800160051519e-06, + "loss": 0.8086, + "step": 17518 + }, + { + "epoch": 0.16583523442602777, + "grad_norm": 646.6973266601562, + "learning_rate": 1.904787104180173e-06, + "loss": 54.8281, + "step": 17519 + }, + { + "epoch": 0.16584470044774283, + "grad_norm": 669.599609375, + "learning_rate": 1.904774047458386e-06, + "loss": 31.9062, + "step": 17520 + }, + { + "epoch": 0.1658541664694579, + "grad_norm": 230.40538024902344, + "learning_rate": 1.9047609898861703e-06, + "loss": 30.4297, + "step": 17521 + }, + { + "epoch": 0.16586363249117295, + "grad_norm": 455.5083923339844, + "learning_rate": 1.9047479314635382e-06, + "loss": 36.2969, + "step": 17522 + }, + { + "epoch": 0.16587309851288798, + "grad_norm": 301.80804443359375, + "learning_rate": 1.9047348721905018e-06, + "loss": 22.8906, + "step": 17523 + }, + { + "epoch": 0.16588256453460304, + "grad_norm": 413.2418212890625, + "learning_rate": 1.9047218120670734e-06, + "loss": 33.0859, + "step": 17524 + }, + { + "epoch": 0.1658920305563181, + "grad_norm": 331.57855224609375, + "learning_rate": 1.9047087510932651e-06, + "loss": 16.7266, + "step": 17525 + }, + { + "epoch": 0.16590149657803316, + "grad_norm": 459.75909423828125, + "learning_rate": 1.90469568926909e-06, + "loss": 21.2422, + "step": 17526 + }, + { + "epoch": 0.1659109625997482, + "grad_norm": 309.9540100097656, + "learning_rate": 1.9046826265945596e-06, + "loss": 18.2109, + "step": 17527 + }, + { + "epoch": 0.16592042862146325, + "grad_norm": 210.1006317138672, + "learning_rate": 1.904669563069686e-06, + "loss": 19.8047, + "step": 17528 + }, + { + "epoch": 0.1659298946431783, + "grad_norm": 331.5171813964844, + "learning_rate": 1.9046564986944822e-06, + "loss": 25.9375, + "step": 17529 + }, + { + "epoch": 0.16593936066489337, + "grad_norm": 693.3828125, + "learning_rate": 1.9046434334689603e-06, + "loss": 59.9375, + "step": 17530 + }, + { + "epoch": 0.16594882668660843, + "grad_norm": 331.1889953613281, + "learning_rate": 1.9046303673931322e-06, + "loss": 18.8359, + "step": 17531 + }, + { + "epoch": 0.16595829270832346, + "grad_norm": 472.9093322753906, + "learning_rate": 1.9046173004670106e-06, + "loss": 47.9766, + "step": 17532 + }, + { + "epoch": 0.16596775873003852, + "grad_norm": 3.3124990463256836, + "learning_rate": 1.9046042326906074e-06, + "loss": 0.7698, + "step": 17533 + }, + { + "epoch": 0.16597722475175358, + "grad_norm": 280.6656494140625, + "learning_rate": 1.9045911640639352e-06, + "loss": 16.7266, + "step": 17534 + }, + { + "epoch": 0.16598669077346864, + "grad_norm": 348.37982177734375, + "learning_rate": 1.904578094587006e-06, + "loss": 49.5781, + "step": 17535 + }, + { + "epoch": 0.16599615679518367, + "grad_norm": 386.5525817871094, + "learning_rate": 1.9045650242598321e-06, + "loss": 49.2188, + "step": 17536 + }, + { + "epoch": 0.16600562281689873, + "grad_norm": 402.7718505859375, + "learning_rate": 1.9045519530824263e-06, + "loss": 43.625, + "step": 17537 + }, + { + "epoch": 0.1660150888386138, + "grad_norm": 951.482421875, + "learning_rate": 1.9045388810548001e-06, + "loss": 61.8594, + "step": 17538 + }, + { + "epoch": 0.16602455486032885, + "grad_norm": 340.7397155761719, + "learning_rate": 1.9045258081769665e-06, + "loss": 23.7031, + "step": 17539 + }, + { + "epoch": 0.16603402088204391, + "grad_norm": 873.0079345703125, + "learning_rate": 1.9045127344489376e-06, + "loss": 71.7344, + "step": 17540 + }, + { + "epoch": 0.16604348690375895, + "grad_norm": 295.83575439453125, + "learning_rate": 1.9044996598707254e-06, + "loss": 18.6875, + "step": 17541 + }, + { + "epoch": 0.166052952925474, + "grad_norm": 204.02024841308594, + "learning_rate": 1.9044865844423424e-06, + "loss": 8.6797, + "step": 17542 + }, + { + "epoch": 0.16606241894718907, + "grad_norm": 941.30029296875, + "learning_rate": 1.9044735081638007e-06, + "loss": 48.3086, + "step": 17543 + }, + { + "epoch": 0.16607188496890413, + "grad_norm": 1187.2080078125, + "learning_rate": 1.904460431035113e-06, + "loss": 7.082, + "step": 17544 + }, + { + "epoch": 0.16608135099061916, + "grad_norm": 315.45037841796875, + "learning_rate": 1.904447353056291e-06, + "loss": 25.0, + "step": 17545 + }, + { + "epoch": 0.16609081701233422, + "grad_norm": 221.50184631347656, + "learning_rate": 1.9044342742273476e-06, + "loss": 25.4375, + "step": 17546 + }, + { + "epoch": 0.16610028303404928, + "grad_norm": 705.7461547851562, + "learning_rate": 1.9044211945482947e-06, + "loss": 45.5469, + "step": 17547 + }, + { + "epoch": 0.16610974905576434, + "grad_norm": 581.6356811523438, + "learning_rate": 1.9044081140191448e-06, + "loss": 52.0547, + "step": 17548 + }, + { + "epoch": 0.1661192150774794, + "grad_norm": 232.07843017578125, + "learning_rate": 1.9043950326399098e-06, + "loss": 19.6406, + "step": 17549 + }, + { + "epoch": 0.16612868109919443, + "grad_norm": 570.5678100585938, + "learning_rate": 1.9043819504106027e-06, + "loss": 29.7812, + "step": 17550 + }, + { + "epoch": 0.1661381471209095, + "grad_norm": 357.05511474609375, + "learning_rate": 1.9043688673312352e-06, + "loss": 26.5938, + "step": 17551 + }, + { + "epoch": 0.16614761314262455, + "grad_norm": 272.0744323730469, + "learning_rate": 1.90435578340182e-06, + "loss": 16.7969, + "step": 17552 + }, + { + "epoch": 0.1661570791643396, + "grad_norm": 348.88653564453125, + "learning_rate": 1.904342698622369e-06, + "loss": 16.957, + "step": 17553 + }, + { + "epoch": 0.16616654518605467, + "grad_norm": 260.9897155761719, + "learning_rate": 1.9043296129928947e-06, + "loss": 23.0469, + "step": 17554 + }, + { + "epoch": 0.1661760112077697, + "grad_norm": 567.7647094726562, + "learning_rate": 1.9043165265134093e-06, + "loss": 32.7656, + "step": 17555 + }, + { + "epoch": 0.16618547722948476, + "grad_norm": 213.37747192382812, + "learning_rate": 1.9043034391839254e-06, + "loss": 21.75, + "step": 17556 + }, + { + "epoch": 0.16619494325119982, + "grad_norm": 4.090524673461914, + "learning_rate": 1.904290351004455e-06, + "loss": 0.9546, + "step": 17557 + }, + { + "epoch": 0.16620440927291488, + "grad_norm": 1687.2283935546875, + "learning_rate": 1.9042772619750104e-06, + "loss": 19.4453, + "step": 17558 + }, + { + "epoch": 0.16621387529462991, + "grad_norm": 685.069091796875, + "learning_rate": 1.9042641720956044e-06, + "loss": 30.6016, + "step": 17559 + }, + { + "epoch": 0.16622334131634497, + "grad_norm": 441.974365234375, + "learning_rate": 1.9042510813662484e-06, + "loss": 40.2188, + "step": 17560 + }, + { + "epoch": 0.16623280733806003, + "grad_norm": 353.1178283691406, + "learning_rate": 1.9042379897869556e-06, + "loss": 19.8594, + "step": 17561 + }, + { + "epoch": 0.1662422733597751, + "grad_norm": 226.99598693847656, + "learning_rate": 1.9042248973577377e-06, + "loss": 14.9219, + "step": 17562 + }, + { + "epoch": 0.16625173938149015, + "grad_norm": 1174.181396484375, + "learning_rate": 1.9042118040786074e-06, + "loss": 32.4375, + "step": 17563 + }, + { + "epoch": 0.1662612054032052, + "grad_norm": 204.27928161621094, + "learning_rate": 1.9041987099495764e-06, + "loss": 13.8984, + "step": 17564 + }, + { + "epoch": 0.16627067142492025, + "grad_norm": 253.1001434326172, + "learning_rate": 1.9041856149706579e-06, + "loss": 22.5703, + "step": 17565 + }, + { + "epoch": 0.1662801374466353, + "grad_norm": 285.1921081542969, + "learning_rate": 1.9041725191418637e-06, + "loss": 34.3203, + "step": 17566 + }, + { + "epoch": 0.16628960346835037, + "grad_norm": 432.8686828613281, + "learning_rate": 1.904159422463206e-06, + "loss": 45.2422, + "step": 17567 + }, + { + "epoch": 0.1662990694900654, + "grad_norm": 372.8061828613281, + "learning_rate": 1.9041463249346973e-06, + "loss": 17.9961, + "step": 17568 + }, + { + "epoch": 0.16630853551178046, + "grad_norm": 960.317138671875, + "learning_rate": 1.9041332265563498e-06, + "loss": 52.8984, + "step": 17569 + }, + { + "epoch": 0.16631800153349552, + "grad_norm": 427.927001953125, + "learning_rate": 1.9041201273281759e-06, + "loss": 53.2812, + "step": 17570 + }, + { + "epoch": 0.16632746755521058, + "grad_norm": 260.4722900390625, + "learning_rate": 1.904107027250188e-06, + "loss": 19.8594, + "step": 17571 + }, + { + "epoch": 0.16633693357692564, + "grad_norm": 274.7586669921875, + "learning_rate": 1.9040939263223982e-06, + "loss": 17.7031, + "step": 17572 + }, + { + "epoch": 0.16634639959864067, + "grad_norm": 516.5198974609375, + "learning_rate": 1.904080824544819e-06, + "loss": 43.2969, + "step": 17573 + }, + { + "epoch": 0.16635586562035573, + "grad_norm": 817.337890625, + "learning_rate": 1.9040677219174625e-06, + "loss": 43.1562, + "step": 17574 + }, + { + "epoch": 0.1663653316420708, + "grad_norm": 213.5312042236328, + "learning_rate": 1.9040546184403413e-06, + "loss": 16.4922, + "step": 17575 + }, + { + "epoch": 0.16637479766378585, + "grad_norm": 379.9842529296875, + "learning_rate": 1.9040415141134676e-06, + "loss": 28.2109, + "step": 17576 + }, + { + "epoch": 0.16638426368550088, + "grad_norm": 312.6467590332031, + "learning_rate": 1.9040284089368536e-06, + "loss": 44.4375, + "step": 17577 + }, + { + "epoch": 0.16639372970721594, + "grad_norm": 368.03009033203125, + "learning_rate": 1.9040153029105118e-06, + "loss": 22.3828, + "step": 17578 + }, + { + "epoch": 0.166403195728931, + "grad_norm": 331.3036193847656, + "learning_rate": 1.9040021960344546e-06, + "loss": 29.3672, + "step": 17579 + }, + { + "epoch": 0.16641266175064606, + "grad_norm": 868.1847534179688, + "learning_rate": 1.9039890883086937e-06, + "loss": 45.1602, + "step": 17580 + }, + { + "epoch": 0.16642212777236112, + "grad_norm": 658.0927734375, + "learning_rate": 1.903975979733242e-06, + "loss": 49.5156, + "step": 17581 + }, + { + "epoch": 0.16643159379407615, + "grad_norm": 321.1667785644531, + "learning_rate": 1.903962870308112e-06, + "loss": 16.7695, + "step": 17582 + }, + { + "epoch": 0.16644105981579121, + "grad_norm": 458.8502197265625, + "learning_rate": 1.9039497600333153e-06, + "loss": 25.1016, + "step": 17583 + }, + { + "epoch": 0.16645052583750627, + "grad_norm": 402.8515625, + "learning_rate": 1.903936648908865e-06, + "loss": 29.0234, + "step": 17584 + }, + { + "epoch": 0.16645999185922133, + "grad_norm": 290.2109680175781, + "learning_rate": 1.9039235369347728e-06, + "loss": 16.375, + "step": 17585 + }, + { + "epoch": 0.16646945788093637, + "grad_norm": 758.2498168945312, + "learning_rate": 1.9039104241110513e-06, + "loss": 39.543, + "step": 17586 + }, + { + "epoch": 0.16647892390265143, + "grad_norm": 232.66412353515625, + "learning_rate": 1.9038973104377128e-06, + "loss": 17.1797, + "step": 17587 + }, + { + "epoch": 0.1664883899243665, + "grad_norm": 549.873291015625, + "learning_rate": 1.9038841959147698e-06, + "loss": 45.375, + "step": 17588 + }, + { + "epoch": 0.16649785594608155, + "grad_norm": 459.9300231933594, + "learning_rate": 1.9038710805422343e-06, + "loss": 27.5625, + "step": 17589 + }, + { + "epoch": 0.1665073219677966, + "grad_norm": 702.8592529296875, + "learning_rate": 1.9038579643201186e-06, + "loss": 31.0938, + "step": 17590 + }, + { + "epoch": 0.16651678798951164, + "grad_norm": 242.56007385253906, + "learning_rate": 1.9038448472484357e-06, + "loss": 27.9414, + "step": 17591 + }, + { + "epoch": 0.1665262540112267, + "grad_norm": 248.9016571044922, + "learning_rate": 1.903831729327197e-06, + "loss": 28.2344, + "step": 17592 + }, + { + "epoch": 0.16653572003294176, + "grad_norm": 874.3299560546875, + "learning_rate": 1.9038186105564154e-06, + "loss": 69.8906, + "step": 17593 + }, + { + "epoch": 0.16654518605465682, + "grad_norm": 592.4867553710938, + "learning_rate": 1.9038054909361032e-06, + "loss": 21.7578, + "step": 17594 + }, + { + "epoch": 0.16655465207637185, + "grad_norm": 527.7444458007812, + "learning_rate": 1.9037923704662725e-06, + "loss": 17.2344, + "step": 17595 + }, + { + "epoch": 0.1665641180980869, + "grad_norm": 863.6167602539062, + "learning_rate": 1.903779249146936e-06, + "loss": 44.5078, + "step": 17596 + }, + { + "epoch": 0.16657358411980197, + "grad_norm": 272.0924377441406, + "learning_rate": 1.9037661269781056e-06, + "loss": 22.7969, + "step": 17597 + }, + { + "epoch": 0.16658305014151703, + "grad_norm": 446.99853515625, + "learning_rate": 1.9037530039597938e-06, + "loss": 39.8594, + "step": 17598 + }, + { + "epoch": 0.1665925161632321, + "grad_norm": 584.3058471679688, + "learning_rate": 1.9037398800920133e-06, + "loss": 23.9141, + "step": 17599 + }, + { + "epoch": 0.16660198218494712, + "grad_norm": 481.2335205078125, + "learning_rate": 1.9037267553747757e-06, + "loss": 39.5781, + "step": 17600 + }, + { + "epoch": 0.16661144820666218, + "grad_norm": 265.9800720214844, + "learning_rate": 1.9037136298080939e-06, + "loss": 14.6484, + "step": 17601 + }, + { + "epoch": 0.16662091422837724, + "grad_norm": 362.1441650390625, + "learning_rate": 1.9037005033919803e-06, + "loss": 21.6289, + "step": 17602 + }, + { + "epoch": 0.1666303802500923, + "grad_norm": 337.5423278808594, + "learning_rate": 1.9036873761264464e-06, + "loss": 40.75, + "step": 17603 + }, + { + "epoch": 0.16663984627180733, + "grad_norm": 342.0367431640625, + "learning_rate": 1.9036742480115055e-06, + "loss": 20.8008, + "step": 17604 + }, + { + "epoch": 0.1666493122935224, + "grad_norm": 320.5480041503906, + "learning_rate": 1.90366111904717e-06, + "loss": 20.7656, + "step": 17605 + }, + { + "epoch": 0.16665877831523745, + "grad_norm": 402.43646240234375, + "learning_rate": 1.9036479892334514e-06, + "loss": 14.6719, + "step": 17606 + }, + { + "epoch": 0.16666824433695251, + "grad_norm": 325.80535888671875, + "learning_rate": 1.9036348585703624e-06, + "loss": 30.8906, + "step": 17607 + }, + { + "epoch": 0.16667771035866757, + "grad_norm": 340.5157470703125, + "learning_rate": 1.9036217270579159e-06, + "loss": 29.2969, + "step": 17608 + }, + { + "epoch": 0.1666871763803826, + "grad_norm": 635.1835327148438, + "learning_rate": 1.9036085946961231e-06, + "loss": 42.9219, + "step": 17609 + }, + { + "epoch": 0.16669664240209767, + "grad_norm": 388.62225341796875, + "learning_rate": 1.9035954614849973e-06, + "loss": 15.4453, + "step": 17610 + }, + { + "epoch": 0.16670610842381273, + "grad_norm": 765.403076171875, + "learning_rate": 1.9035823274245505e-06, + "loss": 55.375, + "step": 17611 + }, + { + "epoch": 0.1667155744455278, + "grad_norm": 343.6528015136719, + "learning_rate": 1.9035691925147954e-06, + "loss": 30.0938, + "step": 17612 + }, + { + "epoch": 0.16672504046724282, + "grad_norm": 300.2288818359375, + "learning_rate": 1.9035560567557439e-06, + "loss": 37.5, + "step": 17613 + }, + { + "epoch": 0.16673450648895788, + "grad_norm": 376.2392883300781, + "learning_rate": 1.9035429201474082e-06, + "loss": 15.9023, + "step": 17614 + }, + { + "epoch": 0.16674397251067294, + "grad_norm": 4.302179336547852, + "learning_rate": 1.903529782689801e-06, + "loss": 1.0547, + "step": 17615 + }, + { + "epoch": 0.166753438532388, + "grad_norm": 262.98284912109375, + "learning_rate": 1.9035166443829352e-06, + "loss": 20.6641, + "step": 17616 + }, + { + "epoch": 0.16676290455410306, + "grad_norm": 166.8349609375, + "learning_rate": 1.9035035052268218e-06, + "loss": 24.2969, + "step": 17617 + }, + { + "epoch": 0.1667723705758181, + "grad_norm": 388.0373229980469, + "learning_rate": 1.9034903652214743e-06, + "loss": 20.3594, + "step": 17618 + }, + { + "epoch": 0.16678183659753315, + "grad_norm": 320.00628662109375, + "learning_rate": 1.9034772243669044e-06, + "loss": 22.5312, + "step": 17619 + }, + { + "epoch": 0.1667913026192482, + "grad_norm": 559.9071044921875, + "learning_rate": 1.9034640826631247e-06, + "loss": 40.6875, + "step": 17620 + }, + { + "epoch": 0.16680076864096327, + "grad_norm": 680.3812255859375, + "learning_rate": 1.9034509401101477e-06, + "loss": 6.1992, + "step": 17621 + }, + { + "epoch": 0.1668102346626783, + "grad_norm": 303.6968688964844, + "learning_rate": 1.9034377967079854e-06, + "loss": 28.7188, + "step": 17622 + }, + { + "epoch": 0.16681970068439336, + "grad_norm": 324.863037109375, + "learning_rate": 1.9034246524566506e-06, + "loss": 45.1719, + "step": 17623 + }, + { + "epoch": 0.16682916670610842, + "grad_norm": 341.242431640625, + "learning_rate": 1.9034115073561549e-06, + "loss": 32.1484, + "step": 17624 + }, + { + "epoch": 0.16683863272782348, + "grad_norm": 527.30859375, + "learning_rate": 1.9033983614065116e-06, + "loss": 42.6328, + "step": 17625 + }, + { + "epoch": 0.16684809874953854, + "grad_norm": 898.049072265625, + "learning_rate": 1.9033852146077326e-06, + "loss": 65.0, + "step": 17626 + }, + { + "epoch": 0.16685756477125357, + "grad_norm": 362.9295349121094, + "learning_rate": 1.9033720669598302e-06, + "loss": 36.3438, + "step": 17627 + }, + { + "epoch": 0.16686703079296863, + "grad_norm": 683.8591918945312, + "learning_rate": 1.9033589184628168e-06, + "loss": 28.2266, + "step": 17628 + }, + { + "epoch": 0.1668764968146837, + "grad_norm": 226.20999145507812, + "learning_rate": 1.9033457691167048e-06, + "loss": 17.8633, + "step": 17629 + }, + { + "epoch": 0.16688596283639875, + "grad_norm": 343.6250305175781, + "learning_rate": 1.9033326189215064e-06, + "loss": 17.4453, + "step": 17630 + }, + { + "epoch": 0.1668954288581138, + "grad_norm": 250.3340606689453, + "learning_rate": 1.9033194678772342e-06, + "loss": 30.3906, + "step": 17631 + }, + { + "epoch": 0.16690489487982885, + "grad_norm": 318.9510498046875, + "learning_rate": 1.9033063159839005e-06, + "loss": 44.0547, + "step": 17632 + }, + { + "epoch": 0.1669143609015439, + "grad_norm": 284.7974548339844, + "learning_rate": 1.9032931632415176e-06, + "loss": 31.3477, + "step": 17633 + }, + { + "epoch": 0.16692382692325897, + "grad_norm": 499.61993408203125, + "learning_rate": 1.903280009650098e-06, + "loss": 33.2812, + "step": 17634 + }, + { + "epoch": 0.16693329294497403, + "grad_norm": 404.04034423828125, + "learning_rate": 1.9032668552096536e-06, + "loss": 36.6406, + "step": 17635 + }, + { + "epoch": 0.16694275896668906, + "grad_norm": 241.6040802001953, + "learning_rate": 1.9032536999201977e-06, + "loss": 18.5234, + "step": 17636 + }, + { + "epoch": 0.16695222498840412, + "grad_norm": 234.95359802246094, + "learning_rate": 1.9032405437817416e-06, + "loss": 18.2656, + "step": 17637 + }, + { + "epoch": 0.16696169101011918, + "grad_norm": 213.71636962890625, + "learning_rate": 1.9032273867942982e-06, + "loss": 29.3828, + "step": 17638 + }, + { + "epoch": 0.16697115703183424, + "grad_norm": 3.263805866241455, + "learning_rate": 1.90321422895788e-06, + "loss": 0.7021, + "step": 17639 + }, + { + "epoch": 0.1669806230535493, + "grad_norm": 260.0309753417969, + "learning_rate": 1.903201070272499e-06, + "loss": 20.4453, + "step": 17640 + }, + { + "epoch": 0.16699008907526433, + "grad_norm": 293.8359069824219, + "learning_rate": 1.9031879107381679e-06, + "loss": 24.6758, + "step": 17641 + }, + { + "epoch": 0.1669995550969794, + "grad_norm": 555.5476684570312, + "learning_rate": 1.903174750354899e-06, + "loss": 53.0117, + "step": 17642 + }, + { + "epoch": 0.16700902111869445, + "grad_norm": 558.2454833984375, + "learning_rate": 1.9031615891227043e-06, + "loss": 24.7109, + "step": 17643 + }, + { + "epoch": 0.1670184871404095, + "grad_norm": 268.9899597167969, + "learning_rate": 1.9031484270415968e-06, + "loss": 15.9219, + "step": 17644 + }, + { + "epoch": 0.16702795316212454, + "grad_norm": 878.5142822265625, + "learning_rate": 1.9031352641115884e-06, + "loss": 36.0547, + "step": 17645 + }, + { + "epoch": 0.1670374191838396, + "grad_norm": 235.3226776123047, + "learning_rate": 1.9031221003326914e-06, + "loss": 15.9453, + "step": 17646 + }, + { + "epoch": 0.16704688520555466, + "grad_norm": 269.2769775390625, + "learning_rate": 1.9031089357049183e-06, + "loss": 16.8125, + "step": 17647 + }, + { + "epoch": 0.16705635122726972, + "grad_norm": 172.7972869873047, + "learning_rate": 1.9030957702282819e-06, + "loss": 14.6602, + "step": 17648 + }, + { + "epoch": 0.16706581724898478, + "grad_norm": 378.7185974121094, + "learning_rate": 1.9030826039027943e-06, + "loss": 20.2734, + "step": 17649 + }, + { + "epoch": 0.16707528327069981, + "grad_norm": 195.5813751220703, + "learning_rate": 1.9030694367284675e-06, + "loss": 17.793, + "step": 17650 + }, + { + "epoch": 0.16708474929241487, + "grad_norm": 374.4558410644531, + "learning_rate": 1.9030562687053142e-06, + "loss": 28.3906, + "step": 17651 + }, + { + "epoch": 0.16709421531412993, + "grad_norm": 470.4617004394531, + "learning_rate": 1.903043099833347e-06, + "loss": 46.4688, + "step": 17652 + }, + { + "epoch": 0.167103681335845, + "grad_norm": 482.7084655761719, + "learning_rate": 1.9030299301125779e-06, + "loss": 48.0469, + "step": 17653 + }, + { + "epoch": 0.16711314735756003, + "grad_norm": 329.18658447265625, + "learning_rate": 1.9030167595430194e-06, + "loss": 19.7266, + "step": 17654 + }, + { + "epoch": 0.1671226133792751, + "grad_norm": 347.3111267089844, + "learning_rate": 1.903003588124684e-06, + "loss": 21.1562, + "step": 17655 + }, + { + "epoch": 0.16713207940099015, + "grad_norm": 309.45745849609375, + "learning_rate": 1.902990415857584e-06, + "loss": 25.1172, + "step": 17656 + }, + { + "epoch": 0.1671415454227052, + "grad_norm": 579.4442749023438, + "learning_rate": 1.9029772427417314e-06, + "loss": 48.875, + "step": 17657 + }, + { + "epoch": 0.16715101144442027, + "grad_norm": 515.3912963867188, + "learning_rate": 1.9029640687771389e-06, + "loss": 48.5625, + "step": 17658 + }, + { + "epoch": 0.1671604774661353, + "grad_norm": 391.3013000488281, + "learning_rate": 1.9029508939638192e-06, + "loss": 24.3281, + "step": 17659 + }, + { + "epoch": 0.16716994348785036, + "grad_norm": 788.8831787109375, + "learning_rate": 1.9029377183017843e-06, + "loss": 35.0977, + "step": 17660 + }, + { + "epoch": 0.16717940950956542, + "grad_norm": 239.90005493164062, + "learning_rate": 1.9029245417910468e-06, + "loss": 19.875, + "step": 17661 + }, + { + "epoch": 0.16718887553128048, + "grad_norm": 239.92262268066406, + "learning_rate": 1.902911364431619e-06, + "loss": 21.2422, + "step": 17662 + }, + { + "epoch": 0.1671983415529955, + "grad_norm": 903.8651733398438, + "learning_rate": 1.9028981862235132e-06, + "loss": 41.25, + "step": 17663 + }, + { + "epoch": 0.16720780757471057, + "grad_norm": 674.9968872070312, + "learning_rate": 1.9028850071667418e-06, + "loss": 37.625, + "step": 17664 + }, + { + "epoch": 0.16721727359642563, + "grad_norm": 173.25674438476562, + "learning_rate": 1.9028718272613173e-06, + "loss": 17.4609, + "step": 17665 + }, + { + "epoch": 0.1672267396181407, + "grad_norm": 129.79005432128906, + "learning_rate": 1.9028586465072518e-06, + "loss": 10.0234, + "step": 17666 + }, + { + "epoch": 0.16723620563985575, + "grad_norm": 3.0381925106048584, + "learning_rate": 1.902845464904558e-06, + "loss": 0.8896, + "step": 17667 + }, + { + "epoch": 0.16724567166157078, + "grad_norm": 267.30517578125, + "learning_rate": 1.9028322824532484e-06, + "loss": 21.9375, + "step": 17668 + }, + { + "epoch": 0.16725513768328584, + "grad_norm": 464.1353759765625, + "learning_rate": 1.9028190991533351e-06, + "loss": 49.0547, + "step": 17669 + }, + { + "epoch": 0.1672646037050009, + "grad_norm": 341.5248107910156, + "learning_rate": 1.9028059150048304e-06, + "loss": 36.125, + "step": 17670 + }, + { + "epoch": 0.16727406972671596, + "grad_norm": 455.4871826171875, + "learning_rate": 1.902792730007747e-06, + "loss": 27.5469, + "step": 17671 + }, + { + "epoch": 0.167283535748431, + "grad_norm": 670.85595703125, + "learning_rate": 1.9027795441620973e-06, + "loss": 12.2578, + "step": 17672 + }, + { + "epoch": 0.16729300177014605, + "grad_norm": 208.12249755859375, + "learning_rate": 1.9027663574678934e-06, + "loss": 20.1875, + "step": 17673 + }, + { + "epoch": 0.16730246779186111, + "grad_norm": 608.0801391601562, + "learning_rate": 1.9027531699251479e-06, + "loss": 39.0859, + "step": 17674 + }, + { + "epoch": 0.16731193381357617, + "grad_norm": 859.7506103515625, + "learning_rate": 1.902739981533873e-06, + "loss": 54.4297, + "step": 17675 + }, + { + "epoch": 0.16732139983529123, + "grad_norm": 360.2976989746094, + "learning_rate": 1.9027267922940816e-06, + "loss": 22.6211, + "step": 17676 + }, + { + "epoch": 0.16733086585700627, + "grad_norm": 1163.53662109375, + "learning_rate": 1.9027136022057854e-06, + "loss": 73.7344, + "step": 17677 + }, + { + "epoch": 0.16734033187872133, + "grad_norm": 376.98590087890625, + "learning_rate": 1.9027004112689972e-06, + "loss": 22.0156, + "step": 17678 + }, + { + "epoch": 0.1673497979004364, + "grad_norm": 810.4246826171875, + "learning_rate": 1.9026872194837296e-06, + "loss": 50.7266, + "step": 17679 + }, + { + "epoch": 0.16735926392215145, + "grad_norm": 254.180908203125, + "learning_rate": 1.9026740268499944e-06, + "loss": 28.7891, + "step": 17680 + }, + { + "epoch": 0.16736872994386648, + "grad_norm": 670.5555419921875, + "learning_rate": 1.9026608333678045e-06, + "loss": 35.6172, + "step": 17681 + }, + { + "epoch": 0.16737819596558154, + "grad_norm": 417.5400390625, + "learning_rate": 1.902647639037172e-06, + "loss": 21.4688, + "step": 17682 + }, + { + "epoch": 0.1673876619872966, + "grad_norm": 372.73944091796875, + "learning_rate": 1.9026344438581097e-06, + "loss": 34.0781, + "step": 17683 + }, + { + "epoch": 0.16739712800901166, + "grad_norm": 368.216064453125, + "learning_rate": 1.9026212478306294e-06, + "loss": 18.6094, + "step": 17684 + }, + { + "epoch": 0.16740659403072672, + "grad_norm": 483.0911560058594, + "learning_rate": 1.9026080509547442e-06, + "loss": 21.6484, + "step": 17685 + }, + { + "epoch": 0.16741606005244175, + "grad_norm": 222.82125854492188, + "learning_rate": 1.9025948532304659e-06, + "loss": 17.7344, + "step": 17686 + }, + { + "epoch": 0.1674255260741568, + "grad_norm": 249.2345733642578, + "learning_rate": 1.902581654657807e-06, + "loss": 24.8047, + "step": 17687 + }, + { + "epoch": 0.16743499209587187, + "grad_norm": 537.444580078125, + "learning_rate": 1.9025684552367805e-06, + "loss": 49.5781, + "step": 17688 + }, + { + "epoch": 0.16744445811758693, + "grad_norm": 509.4107971191406, + "learning_rate": 1.9025552549673981e-06, + "loss": 56.4688, + "step": 17689 + }, + { + "epoch": 0.16745392413930196, + "grad_norm": 241.20567321777344, + "learning_rate": 1.9025420538496725e-06, + "loss": 24.0312, + "step": 17690 + }, + { + "epoch": 0.16746339016101702, + "grad_norm": 458.91009521484375, + "learning_rate": 1.9025288518836162e-06, + "loss": 39.4531, + "step": 17691 + }, + { + "epoch": 0.16747285618273208, + "grad_norm": 322.17254638671875, + "learning_rate": 1.9025156490692415e-06, + "loss": 20.7969, + "step": 17692 + }, + { + "epoch": 0.16748232220444714, + "grad_norm": 236.8173370361328, + "learning_rate": 1.9025024454065606e-06, + "loss": 8.1504, + "step": 17693 + }, + { + "epoch": 0.1674917882261622, + "grad_norm": 296.8450622558594, + "learning_rate": 1.902489240895586e-06, + "loss": 18.7031, + "step": 17694 + }, + { + "epoch": 0.16750125424787723, + "grad_norm": 284.1463623046875, + "learning_rate": 1.9024760355363307e-06, + "loss": 22.9688, + "step": 17695 + }, + { + "epoch": 0.1675107202695923, + "grad_norm": 3.383134365081787, + "learning_rate": 1.9024628293288063e-06, + "loss": 0.936, + "step": 17696 + }, + { + "epoch": 0.16752018629130735, + "grad_norm": 1269.6239013671875, + "learning_rate": 1.9024496222730258e-06, + "loss": 50.0391, + "step": 17697 + }, + { + "epoch": 0.16752965231302241, + "grad_norm": 640.1484985351562, + "learning_rate": 1.9024364143690012e-06, + "loss": 36.8438, + "step": 17698 + }, + { + "epoch": 0.16753911833473745, + "grad_norm": 268.4124755859375, + "learning_rate": 1.902423205616745e-06, + "loss": 18.5078, + "step": 17699 + }, + { + "epoch": 0.1675485843564525, + "grad_norm": 295.1808166503906, + "learning_rate": 1.9024099960162698e-06, + "loss": 16.2969, + "step": 17700 + }, + { + "epoch": 0.16755805037816757, + "grad_norm": 343.1858215332031, + "learning_rate": 1.902396785567588e-06, + "loss": 29.25, + "step": 17701 + }, + { + "epoch": 0.16756751639988263, + "grad_norm": 593.0007934570312, + "learning_rate": 1.9023835742707116e-06, + "loss": 25.3984, + "step": 17702 + }, + { + "epoch": 0.1675769824215977, + "grad_norm": 451.3909606933594, + "learning_rate": 1.9023703621256538e-06, + "loss": 30.8203, + "step": 17703 + }, + { + "epoch": 0.16758644844331272, + "grad_norm": 462.23577880859375, + "learning_rate": 1.9023571491324263e-06, + "loss": 39.2656, + "step": 17704 + }, + { + "epoch": 0.16759591446502778, + "grad_norm": 785.9237060546875, + "learning_rate": 1.9023439352910418e-06, + "loss": 50.875, + "step": 17705 + }, + { + "epoch": 0.16760538048674284, + "grad_norm": 562.3674926757812, + "learning_rate": 1.9023307206015126e-06, + "loss": 50.0625, + "step": 17706 + }, + { + "epoch": 0.1676148465084579, + "grad_norm": 442.06982421875, + "learning_rate": 1.9023175050638513e-06, + "loss": 19.3047, + "step": 17707 + }, + { + "epoch": 0.16762431253017293, + "grad_norm": 524.97216796875, + "learning_rate": 1.9023042886780703e-06, + "loss": 33.1172, + "step": 17708 + }, + { + "epoch": 0.167633778551888, + "grad_norm": 332.382568359375, + "learning_rate": 1.902291071444182e-06, + "loss": 18.6094, + "step": 17709 + }, + { + "epoch": 0.16764324457360305, + "grad_norm": 456.53076171875, + "learning_rate": 1.902277853362199e-06, + "loss": 23.0781, + "step": 17710 + }, + { + "epoch": 0.1676527105953181, + "grad_norm": 180.86087036132812, + "learning_rate": 1.9022646344321331e-06, + "loss": 22.9453, + "step": 17711 + }, + { + "epoch": 0.16766217661703317, + "grad_norm": 516.3060302734375, + "learning_rate": 1.9022514146539972e-06, + "loss": 23.6016, + "step": 17712 + }, + { + "epoch": 0.1676716426387482, + "grad_norm": 409.3343505859375, + "learning_rate": 1.9022381940278037e-06, + "loss": 31.2188, + "step": 17713 + }, + { + "epoch": 0.16768110866046326, + "grad_norm": 499.1239318847656, + "learning_rate": 1.902224972553565e-06, + "loss": 37.9219, + "step": 17714 + }, + { + "epoch": 0.16769057468217832, + "grad_norm": 335.83111572265625, + "learning_rate": 1.9022117502312938e-06, + "loss": 18.7109, + "step": 17715 + }, + { + "epoch": 0.16770004070389338, + "grad_norm": 4.254152297973633, + "learning_rate": 1.9021985270610016e-06, + "loss": 1.0688, + "step": 17716 + }, + { + "epoch": 0.16770950672560841, + "grad_norm": 930.213623046875, + "learning_rate": 1.9021853030427022e-06, + "loss": 40.8594, + "step": 17717 + }, + { + "epoch": 0.16771897274732347, + "grad_norm": 599.6341552734375, + "learning_rate": 1.9021720781764068e-06, + "loss": 28.0703, + "step": 17718 + }, + { + "epoch": 0.16772843876903853, + "grad_norm": 313.07562255859375, + "learning_rate": 1.9021588524621289e-06, + "loss": 19.9336, + "step": 17719 + }, + { + "epoch": 0.1677379047907536, + "grad_norm": 561.8310546875, + "learning_rate": 1.9021456258998797e-06, + "loss": 55.8594, + "step": 17720 + }, + { + "epoch": 0.16774737081246865, + "grad_norm": 909.892578125, + "learning_rate": 1.9021323984896727e-06, + "loss": 49.7188, + "step": 17721 + }, + { + "epoch": 0.1677568368341837, + "grad_norm": 598.6032104492188, + "learning_rate": 1.9021191702315198e-06, + "loss": 26.1875, + "step": 17722 + }, + { + "epoch": 0.16776630285589875, + "grad_norm": 716.4925537109375, + "learning_rate": 1.9021059411254337e-06, + "loss": 49.6719, + "step": 17723 + }, + { + "epoch": 0.1677757688776138, + "grad_norm": 1362.068603515625, + "learning_rate": 1.9020927111714264e-06, + "loss": 51.793, + "step": 17724 + }, + { + "epoch": 0.16778523489932887, + "grad_norm": 693.08154296875, + "learning_rate": 1.9020794803695108e-06, + "loss": 60.3438, + "step": 17725 + }, + { + "epoch": 0.16779470092104393, + "grad_norm": 785.204833984375, + "learning_rate": 1.9020662487196995e-06, + "loss": 45.2031, + "step": 17726 + }, + { + "epoch": 0.16780416694275896, + "grad_norm": 338.90087890625, + "learning_rate": 1.9020530162220042e-06, + "loss": 21.1016, + "step": 17727 + }, + { + "epoch": 0.16781363296447402, + "grad_norm": 891.704833984375, + "learning_rate": 1.9020397828764378e-06, + "loss": 20.1387, + "step": 17728 + }, + { + "epoch": 0.16782309898618908, + "grad_norm": 619.4381713867188, + "learning_rate": 1.902026548683013e-06, + "loss": 14.1562, + "step": 17729 + }, + { + "epoch": 0.16783256500790414, + "grad_norm": 1385.9609375, + "learning_rate": 1.9020133136417415e-06, + "loss": 25.6992, + "step": 17730 + }, + { + "epoch": 0.16784203102961917, + "grad_norm": 265.01177978515625, + "learning_rate": 1.9020000777526363e-06, + "loss": 23.9609, + "step": 17731 + }, + { + "epoch": 0.16785149705133423, + "grad_norm": 392.3757019042969, + "learning_rate": 1.90198684101571e-06, + "loss": 27.3203, + "step": 17732 + }, + { + "epoch": 0.1678609630730493, + "grad_norm": 328.0782470703125, + "learning_rate": 1.9019736034309744e-06, + "loss": 24.3984, + "step": 17733 + }, + { + "epoch": 0.16787042909476435, + "grad_norm": 1046.01123046875, + "learning_rate": 1.9019603649984424e-06, + "loss": 50.9062, + "step": 17734 + }, + { + "epoch": 0.1678798951164794, + "grad_norm": 3.637129783630371, + "learning_rate": 1.9019471257181263e-06, + "loss": 1.1055, + "step": 17735 + }, + { + "epoch": 0.16788936113819444, + "grad_norm": 1050.9591064453125, + "learning_rate": 1.9019338855900384e-06, + "loss": 81.7734, + "step": 17736 + }, + { + "epoch": 0.1678988271599095, + "grad_norm": 315.4830017089844, + "learning_rate": 1.9019206446141915e-06, + "loss": 17.7188, + "step": 17737 + }, + { + "epoch": 0.16790829318162456, + "grad_norm": 623.521484375, + "learning_rate": 1.9019074027905982e-06, + "loss": 24.8203, + "step": 17738 + }, + { + "epoch": 0.16791775920333962, + "grad_norm": 271.83984375, + "learning_rate": 1.9018941601192702e-06, + "loss": 26.3828, + "step": 17739 + }, + { + "epoch": 0.16792722522505465, + "grad_norm": 712.6432495117188, + "learning_rate": 1.9018809166002202e-06, + "loss": 20.1719, + "step": 17740 + }, + { + "epoch": 0.16793669124676971, + "grad_norm": 596.4508666992188, + "learning_rate": 1.9018676722334613e-06, + "loss": 28.0469, + "step": 17741 + }, + { + "epoch": 0.16794615726848477, + "grad_norm": 707.4386596679688, + "learning_rate": 1.9018544270190052e-06, + "loss": 21.7734, + "step": 17742 + }, + { + "epoch": 0.16795562329019983, + "grad_norm": 244.18191528320312, + "learning_rate": 1.9018411809568646e-06, + "loss": 8.9023, + "step": 17743 + }, + { + "epoch": 0.1679650893119149, + "grad_norm": 514.0537109375, + "learning_rate": 1.901827934047052e-06, + "loss": 22.1406, + "step": 17744 + }, + { + "epoch": 0.16797455533362993, + "grad_norm": 528.3394775390625, + "learning_rate": 1.9018146862895797e-06, + "loss": 36.1641, + "step": 17745 + }, + { + "epoch": 0.167984021355345, + "grad_norm": 849.8679809570312, + "learning_rate": 1.9018014376844604e-06, + "loss": 21.2266, + "step": 17746 + }, + { + "epoch": 0.16799348737706005, + "grad_norm": 265.3690185546875, + "learning_rate": 1.9017881882317064e-06, + "loss": 20.6875, + "step": 17747 + }, + { + "epoch": 0.1680029533987751, + "grad_norm": 305.3695983886719, + "learning_rate": 1.90177493793133e-06, + "loss": 20.2344, + "step": 17748 + }, + { + "epoch": 0.16801241942049014, + "grad_norm": 1229.5718994140625, + "learning_rate": 1.9017616867833442e-06, + "loss": 24.2422, + "step": 17749 + }, + { + "epoch": 0.1680218854422052, + "grad_norm": 189.28660583496094, + "learning_rate": 1.9017484347877608e-06, + "loss": 14.6875, + "step": 17750 + }, + { + "epoch": 0.16803135146392026, + "grad_norm": 246.96121215820312, + "learning_rate": 1.9017351819445927e-06, + "loss": 25.3828, + "step": 17751 + }, + { + "epoch": 0.16804081748563532, + "grad_norm": 406.8390197753906, + "learning_rate": 1.9017219282538518e-06, + "loss": 40.8906, + "step": 17752 + }, + { + "epoch": 0.16805028350735038, + "grad_norm": 642.6065673828125, + "learning_rate": 1.9017086737155514e-06, + "loss": 19.1445, + "step": 17753 + }, + { + "epoch": 0.1680597495290654, + "grad_norm": 292.57330322265625, + "learning_rate": 1.9016954183297034e-06, + "loss": 42.0469, + "step": 17754 + }, + { + "epoch": 0.16806921555078047, + "grad_norm": 452.9516906738281, + "learning_rate": 1.9016821620963204e-06, + "loss": 41.0469, + "step": 17755 + }, + { + "epoch": 0.16807868157249553, + "grad_norm": 289.8093566894531, + "learning_rate": 1.9016689050154144e-06, + "loss": 17.9297, + "step": 17756 + }, + { + "epoch": 0.1680881475942106, + "grad_norm": 251.84051513671875, + "learning_rate": 1.9016556470869988e-06, + "loss": 19.3398, + "step": 17757 + }, + { + "epoch": 0.16809761361592562, + "grad_norm": 294.21588134765625, + "learning_rate": 1.9016423883110853e-06, + "loss": 25.375, + "step": 17758 + }, + { + "epoch": 0.16810707963764068, + "grad_norm": 544.9436645507812, + "learning_rate": 1.9016291286876867e-06, + "loss": 47.9922, + "step": 17759 + }, + { + "epoch": 0.16811654565935574, + "grad_norm": 2.4725825786590576, + "learning_rate": 1.9016158682168153e-06, + "loss": 0.7637, + "step": 17760 + }, + { + "epoch": 0.1681260116810708, + "grad_norm": 446.65264892578125, + "learning_rate": 1.901602606898484e-06, + "loss": 49.4688, + "step": 17761 + }, + { + "epoch": 0.16813547770278586, + "grad_norm": 198.07745361328125, + "learning_rate": 1.9015893447327043e-06, + "loss": 12.4805, + "step": 17762 + }, + { + "epoch": 0.1681449437245009, + "grad_norm": 261.5894775390625, + "learning_rate": 1.9015760817194898e-06, + "loss": 18.9531, + "step": 17763 + }, + { + "epoch": 0.16815440974621595, + "grad_norm": 174.38919067382812, + "learning_rate": 1.9015628178588519e-06, + "loss": 15.9062, + "step": 17764 + }, + { + "epoch": 0.16816387576793101, + "grad_norm": 396.42681884765625, + "learning_rate": 1.9015495531508038e-06, + "loss": 38.9688, + "step": 17765 + }, + { + "epoch": 0.16817334178964607, + "grad_norm": 405.6079406738281, + "learning_rate": 1.901536287595358e-06, + "loss": 17.9688, + "step": 17766 + }, + { + "epoch": 0.1681828078113611, + "grad_norm": 392.3843994140625, + "learning_rate": 1.9015230211925267e-06, + "loss": 31.0, + "step": 17767 + }, + { + "epoch": 0.16819227383307617, + "grad_norm": 2.960378885269165, + "learning_rate": 1.901509753942322e-06, + "loss": 1.0112, + "step": 17768 + }, + { + "epoch": 0.16820173985479123, + "grad_norm": 242.2855682373047, + "learning_rate": 1.9014964858447574e-06, + "loss": 33.7969, + "step": 17769 + }, + { + "epoch": 0.1682112058765063, + "grad_norm": 351.17388916015625, + "learning_rate": 1.9014832168998444e-06, + "loss": 20.1953, + "step": 17770 + }, + { + "epoch": 0.16822067189822135, + "grad_norm": 1036.538330078125, + "learning_rate": 1.901469947107596e-06, + "loss": 56.7031, + "step": 17771 + }, + { + "epoch": 0.16823013791993638, + "grad_norm": 277.4615173339844, + "learning_rate": 1.9014566764680241e-06, + "loss": 15.8828, + "step": 17772 + }, + { + "epoch": 0.16823960394165144, + "grad_norm": 477.1905822753906, + "learning_rate": 1.9014434049811418e-06, + "loss": 19.5703, + "step": 17773 + }, + { + "epoch": 0.1682490699633665, + "grad_norm": 3.002081871032715, + "learning_rate": 1.9014301326469614e-06, + "loss": 1.0576, + "step": 17774 + }, + { + "epoch": 0.16825853598508156, + "grad_norm": 208.9560546875, + "learning_rate": 1.9014168594654954e-06, + "loss": 19.6016, + "step": 17775 + }, + { + "epoch": 0.1682680020067966, + "grad_norm": 330.0254211425781, + "learning_rate": 1.9014035854367561e-06, + "loss": 30.0, + "step": 17776 + }, + { + "epoch": 0.16827746802851165, + "grad_norm": 875.5594482421875, + "learning_rate": 1.9013903105607558e-06, + "loss": 67.5156, + "step": 17777 + }, + { + "epoch": 0.1682869340502267, + "grad_norm": 227.6334686279297, + "learning_rate": 1.9013770348375075e-06, + "loss": 17.5703, + "step": 17778 + }, + { + "epoch": 0.16829640007194177, + "grad_norm": 728.2520751953125, + "learning_rate": 1.9013637582670235e-06, + "loss": 87.375, + "step": 17779 + }, + { + "epoch": 0.16830586609365683, + "grad_norm": 299.20086669921875, + "learning_rate": 1.9013504808493162e-06, + "loss": 22.7188, + "step": 17780 + }, + { + "epoch": 0.16831533211537186, + "grad_norm": 338.49383544921875, + "learning_rate": 1.9013372025843979e-06, + "loss": 24.1172, + "step": 17781 + }, + { + "epoch": 0.16832479813708692, + "grad_norm": 213.378662109375, + "learning_rate": 1.9013239234722814e-06, + "loss": 19.1562, + "step": 17782 + }, + { + "epoch": 0.16833426415880198, + "grad_norm": 3.0868895053863525, + "learning_rate": 1.901310643512979e-06, + "loss": 0.9185, + "step": 17783 + }, + { + "epoch": 0.16834373018051704, + "grad_norm": 356.1834716796875, + "learning_rate": 1.9012973627065033e-06, + "loss": 39.1562, + "step": 17784 + }, + { + "epoch": 0.16835319620223207, + "grad_norm": 638.5443725585938, + "learning_rate": 1.9012840810528666e-06, + "loss": 43.6562, + "step": 17785 + }, + { + "epoch": 0.16836266222394713, + "grad_norm": 254.2024688720703, + "learning_rate": 1.9012707985520815e-06, + "loss": 14.1836, + "step": 17786 + }, + { + "epoch": 0.1683721282456622, + "grad_norm": 957.7716064453125, + "learning_rate": 1.9012575152041606e-06, + "loss": 62.3945, + "step": 17787 + }, + { + "epoch": 0.16838159426737725, + "grad_norm": 387.06524658203125, + "learning_rate": 1.9012442310091161e-06, + "loss": 29.2812, + "step": 17788 + }, + { + "epoch": 0.16839106028909231, + "grad_norm": 153.4307403564453, + "learning_rate": 1.9012309459669606e-06, + "loss": 18.0, + "step": 17789 + }, + { + "epoch": 0.16840052631080735, + "grad_norm": 358.12939453125, + "learning_rate": 1.901217660077707e-06, + "loss": 49.0625, + "step": 17790 + }, + { + "epoch": 0.1684099923325224, + "grad_norm": 3.2531816959381104, + "learning_rate": 1.9012043733413672e-06, + "loss": 1.0107, + "step": 17791 + }, + { + "epoch": 0.16841945835423747, + "grad_norm": 360.8478088378906, + "learning_rate": 1.901191085757954e-06, + "loss": 10.1094, + "step": 17792 + }, + { + "epoch": 0.16842892437595253, + "grad_norm": 241.09178161621094, + "learning_rate": 1.9011777973274796e-06, + "loss": 14.6328, + "step": 17793 + }, + { + "epoch": 0.16843839039766756, + "grad_norm": 289.9788513183594, + "learning_rate": 1.9011645080499568e-06, + "loss": 21.3828, + "step": 17794 + }, + { + "epoch": 0.16844785641938262, + "grad_norm": 3.3087213039398193, + "learning_rate": 1.9011512179253982e-06, + "loss": 1.0049, + "step": 17795 + }, + { + "epoch": 0.16845732244109768, + "grad_norm": 1057.461669921875, + "learning_rate": 1.9011379269538159e-06, + "loss": 34.8594, + "step": 17796 + }, + { + "epoch": 0.16846678846281274, + "grad_norm": 1168.5474853515625, + "learning_rate": 1.9011246351352225e-06, + "loss": 44.7891, + "step": 17797 + }, + { + "epoch": 0.1684762544845278, + "grad_norm": 656.6654663085938, + "learning_rate": 1.9011113424696306e-06, + "loss": 49.3906, + "step": 17798 + }, + { + "epoch": 0.16848572050624283, + "grad_norm": 270.8592224121094, + "learning_rate": 1.9010980489570528e-06, + "loss": 18.625, + "step": 17799 + }, + { + "epoch": 0.1684951865279579, + "grad_norm": 395.4127502441406, + "learning_rate": 1.9010847545975013e-06, + "loss": 41.1875, + "step": 17800 + }, + { + "epoch": 0.16850465254967295, + "grad_norm": 210.2036895751953, + "learning_rate": 1.9010714593909887e-06, + "loss": 20.7656, + "step": 17801 + }, + { + "epoch": 0.168514118571388, + "grad_norm": 482.3031311035156, + "learning_rate": 1.9010581633375279e-06, + "loss": 37.7344, + "step": 17802 + }, + { + "epoch": 0.16852358459310304, + "grad_norm": 625.5570678710938, + "learning_rate": 1.9010448664371306e-06, + "loss": 31.9609, + "step": 17803 + }, + { + "epoch": 0.1685330506148181, + "grad_norm": 339.39141845703125, + "learning_rate": 1.90103156868981e-06, + "loss": 20.9883, + "step": 17804 + }, + { + "epoch": 0.16854251663653316, + "grad_norm": 283.72021484375, + "learning_rate": 1.9010182700955784e-06, + "loss": 15.1797, + "step": 17805 + }, + { + "epoch": 0.16855198265824822, + "grad_norm": 345.8155822753906, + "learning_rate": 1.901004970654448e-06, + "loss": 20.7344, + "step": 17806 + }, + { + "epoch": 0.16856144867996328, + "grad_norm": 495.6812744140625, + "learning_rate": 1.9009916703664317e-06, + "loss": 46.2969, + "step": 17807 + }, + { + "epoch": 0.16857091470167831, + "grad_norm": 283.1066589355469, + "learning_rate": 1.9009783692315418e-06, + "loss": 7.6367, + "step": 17808 + }, + { + "epoch": 0.16858038072339337, + "grad_norm": 315.3823547363281, + "learning_rate": 1.9009650672497908e-06, + "loss": 35.375, + "step": 17809 + }, + { + "epoch": 0.16858984674510843, + "grad_norm": 3.126279592514038, + "learning_rate": 1.9009517644211912e-06, + "loss": 0.7871, + "step": 17810 + }, + { + "epoch": 0.1685993127668235, + "grad_norm": 275.097412109375, + "learning_rate": 1.9009384607457556e-06, + "loss": 20.5391, + "step": 17811 + }, + { + "epoch": 0.16860877878853855, + "grad_norm": 2.684793472290039, + "learning_rate": 1.9009251562234967e-06, + "loss": 0.8457, + "step": 17812 + }, + { + "epoch": 0.1686182448102536, + "grad_norm": 636.2796020507812, + "learning_rate": 1.9009118508544266e-06, + "loss": 40.1406, + "step": 17813 + }, + { + "epoch": 0.16862771083196865, + "grad_norm": 484.5663757324219, + "learning_rate": 1.900898544638558e-06, + "loss": 40.3281, + "step": 17814 + }, + { + "epoch": 0.1686371768536837, + "grad_norm": 296.0337219238281, + "learning_rate": 1.9008852375759032e-06, + "loss": 20.5469, + "step": 17815 + }, + { + "epoch": 0.16864664287539877, + "grad_norm": 426.0196838378906, + "learning_rate": 1.900871929666475e-06, + "loss": 21.8828, + "step": 17816 + }, + { + "epoch": 0.1686561088971138, + "grad_norm": 439.8944091796875, + "learning_rate": 1.9008586209102857e-06, + "loss": 47.6562, + "step": 17817 + }, + { + "epoch": 0.16866557491882886, + "grad_norm": 208.7421417236328, + "learning_rate": 1.9008453113073482e-06, + "loss": 17.375, + "step": 17818 + }, + { + "epoch": 0.16867504094054392, + "grad_norm": 830.2057495117188, + "learning_rate": 1.9008320008576743e-06, + "loss": 27.4375, + "step": 17819 + }, + { + "epoch": 0.16868450696225898, + "grad_norm": 410.4068908691406, + "learning_rate": 1.9008186895612773e-06, + "loss": 31.625, + "step": 17820 + }, + { + "epoch": 0.16869397298397404, + "grad_norm": 554.7896118164062, + "learning_rate": 1.9008053774181692e-06, + "loss": 34.0781, + "step": 17821 + }, + { + "epoch": 0.16870343900568907, + "grad_norm": 3.076225996017456, + "learning_rate": 1.9007920644283627e-06, + "loss": 0.9551, + "step": 17822 + }, + { + "epoch": 0.16871290502740413, + "grad_norm": 273.26416015625, + "learning_rate": 1.90077875059187e-06, + "loss": 26.1523, + "step": 17823 + }, + { + "epoch": 0.1687223710491192, + "grad_norm": 437.48968505859375, + "learning_rate": 1.9007654359087039e-06, + "loss": 7.8242, + "step": 17824 + }, + { + "epoch": 0.16873183707083425, + "grad_norm": 295.3888244628906, + "learning_rate": 1.9007521203788772e-06, + "loss": 20.9609, + "step": 17825 + }, + { + "epoch": 0.16874130309254928, + "grad_norm": 381.0657043457031, + "learning_rate": 1.900738804002402e-06, + "loss": 32.8828, + "step": 17826 + }, + { + "epoch": 0.16875076911426434, + "grad_norm": 373.1103515625, + "learning_rate": 1.9007254867792906e-06, + "loss": 40.3594, + "step": 17827 + }, + { + "epoch": 0.1687602351359794, + "grad_norm": 444.50048828125, + "learning_rate": 1.9007121687095563e-06, + "loss": 26.8008, + "step": 17828 + }, + { + "epoch": 0.16876970115769446, + "grad_norm": 2.819035291671753, + "learning_rate": 1.9006988497932108e-06, + "loss": 0.7615, + "step": 17829 + }, + { + "epoch": 0.16877916717940952, + "grad_norm": 499.438232421875, + "learning_rate": 1.9006855300302673e-06, + "loss": 47.5469, + "step": 17830 + }, + { + "epoch": 0.16878863320112455, + "grad_norm": 919.3040771484375, + "learning_rate": 1.9006722094207377e-06, + "loss": 47.4961, + "step": 17831 + }, + { + "epoch": 0.16879809922283961, + "grad_norm": 408.87396240234375, + "learning_rate": 1.9006588879646351e-06, + "loss": 31.0, + "step": 17832 + }, + { + "epoch": 0.16880756524455467, + "grad_norm": 242.04811096191406, + "learning_rate": 1.9006455656619714e-06, + "loss": 17.3438, + "step": 17833 + }, + { + "epoch": 0.16881703126626973, + "grad_norm": 217.59149169921875, + "learning_rate": 1.9006322425127594e-06, + "loss": 22.7188, + "step": 17834 + }, + { + "epoch": 0.16882649728798477, + "grad_norm": 235.82156372070312, + "learning_rate": 1.9006189185170123e-06, + "loss": 22.9102, + "step": 17835 + }, + { + "epoch": 0.16883596330969983, + "grad_norm": 381.9217834472656, + "learning_rate": 1.9006055936747413e-06, + "loss": 17.6641, + "step": 17836 + }, + { + "epoch": 0.1688454293314149, + "grad_norm": 426.2496643066406, + "learning_rate": 1.90059226798596e-06, + "loss": 15.3828, + "step": 17837 + }, + { + "epoch": 0.16885489535312995, + "grad_norm": 561.7144775390625, + "learning_rate": 1.9005789414506808e-06, + "loss": 30.7422, + "step": 17838 + }, + { + "epoch": 0.168864361374845, + "grad_norm": 437.0665588378906, + "learning_rate": 1.9005656140689155e-06, + "loss": 44.0938, + "step": 17839 + }, + { + "epoch": 0.16887382739656004, + "grad_norm": 345.266357421875, + "learning_rate": 1.9005522858406773e-06, + "loss": 24.8594, + "step": 17840 + }, + { + "epoch": 0.1688832934182751, + "grad_norm": 489.565673828125, + "learning_rate": 1.9005389567659784e-06, + "loss": 35.375, + "step": 17841 + }, + { + "epoch": 0.16889275943999016, + "grad_norm": 1063.00439453125, + "learning_rate": 1.9005256268448316e-06, + "loss": 36.0586, + "step": 17842 + }, + { + "epoch": 0.16890222546170522, + "grad_norm": 274.7663269042969, + "learning_rate": 1.9005122960772492e-06, + "loss": 29.0781, + "step": 17843 + }, + { + "epoch": 0.16891169148342025, + "grad_norm": 568.6911010742188, + "learning_rate": 1.9004989644632441e-06, + "loss": 31.2188, + "step": 17844 + }, + { + "epoch": 0.1689211575051353, + "grad_norm": 3.3106443881988525, + "learning_rate": 1.9004856320028285e-06, + "loss": 0.8394, + "step": 17845 + }, + { + "epoch": 0.16893062352685037, + "grad_norm": 253.1864013671875, + "learning_rate": 1.9004722986960147e-06, + "loss": 17.5625, + "step": 17846 + }, + { + "epoch": 0.16894008954856543, + "grad_norm": 540.8236694335938, + "learning_rate": 1.9004589645428158e-06, + "loss": 54.5625, + "step": 17847 + }, + { + "epoch": 0.1689495555702805, + "grad_norm": 241.75242614746094, + "learning_rate": 1.9004456295432438e-06, + "loss": 15.9141, + "step": 17848 + }, + { + "epoch": 0.16895902159199552, + "grad_norm": 3.887639045715332, + "learning_rate": 1.9004322936973116e-06, + "loss": 0.7759, + "step": 17849 + }, + { + "epoch": 0.16896848761371058, + "grad_norm": 553.3565673828125, + "learning_rate": 1.9004189570050317e-06, + "loss": 23.9844, + "step": 17850 + }, + { + "epoch": 0.16897795363542564, + "grad_norm": 227.6580352783203, + "learning_rate": 1.9004056194664164e-06, + "loss": 19.0156, + "step": 17851 + }, + { + "epoch": 0.1689874196571407, + "grad_norm": 246.80880737304688, + "learning_rate": 1.9003922810814785e-06, + "loss": 19.793, + "step": 17852 + }, + { + "epoch": 0.16899688567885573, + "grad_norm": 521.127197265625, + "learning_rate": 1.9003789418502305e-06, + "loss": 46.9531, + "step": 17853 + }, + { + "epoch": 0.1690063517005708, + "grad_norm": 271.4349060058594, + "learning_rate": 1.9003656017726847e-06, + "loss": 44.9375, + "step": 17854 + }, + { + "epoch": 0.16901581772228585, + "grad_norm": 502.8125305175781, + "learning_rate": 1.9003522608488537e-06, + "loss": 42.5625, + "step": 17855 + }, + { + "epoch": 0.16902528374400091, + "grad_norm": 312.7447814941406, + "learning_rate": 1.9003389190787504e-06, + "loss": 24.2188, + "step": 17856 + }, + { + "epoch": 0.16903474976571597, + "grad_norm": 642.8194580078125, + "learning_rate": 1.900325576462387e-06, + "loss": 53.6016, + "step": 17857 + }, + { + "epoch": 0.169044215787431, + "grad_norm": 552.0877685546875, + "learning_rate": 1.9003122329997762e-06, + "loss": 30.0703, + "step": 17858 + }, + { + "epoch": 0.16905368180914607, + "grad_norm": 13145.43359375, + "learning_rate": 1.9002988886909303e-06, + "loss": 62.6328, + "step": 17859 + }, + { + "epoch": 0.16906314783086113, + "grad_norm": 380.9663391113281, + "learning_rate": 1.9002855435358623e-06, + "loss": 20.8828, + "step": 17860 + }, + { + "epoch": 0.1690726138525762, + "grad_norm": 349.7706604003906, + "learning_rate": 1.9002721975345844e-06, + "loss": 12.0898, + "step": 17861 + }, + { + "epoch": 0.16908207987429122, + "grad_norm": 496.1686096191406, + "learning_rate": 1.9002588506871092e-06, + "loss": 27.8672, + "step": 17862 + }, + { + "epoch": 0.16909154589600628, + "grad_norm": 249.62318420410156, + "learning_rate": 1.900245502993449e-06, + "loss": 22.1797, + "step": 17863 + }, + { + "epoch": 0.16910101191772134, + "grad_norm": 375.92486572265625, + "learning_rate": 1.9002321544536168e-06, + "loss": 34.1562, + "step": 17864 + }, + { + "epoch": 0.1691104779394364, + "grad_norm": 411.9693298339844, + "learning_rate": 1.9002188050676247e-06, + "loss": 44.4062, + "step": 17865 + }, + { + "epoch": 0.16911994396115146, + "grad_norm": 389.6769714355469, + "learning_rate": 1.9002054548354857e-06, + "loss": 19.043, + "step": 17866 + }, + { + "epoch": 0.1691294099828665, + "grad_norm": 1070.240966796875, + "learning_rate": 1.9001921037572122e-06, + "loss": 61.668, + "step": 17867 + }, + { + "epoch": 0.16913887600458155, + "grad_norm": 411.69232177734375, + "learning_rate": 1.9001787518328163e-06, + "loss": 24.6172, + "step": 17868 + }, + { + "epoch": 0.1691483420262966, + "grad_norm": 233.09951782226562, + "learning_rate": 1.9001653990623113e-06, + "loss": 17.5469, + "step": 17869 + }, + { + "epoch": 0.16915780804801167, + "grad_norm": 379.72900390625, + "learning_rate": 1.9001520454457094e-06, + "loss": 25.4453, + "step": 17870 + }, + { + "epoch": 0.1691672740697267, + "grad_norm": 1627.5369873046875, + "learning_rate": 1.9001386909830228e-06, + "loss": 14.4277, + "step": 17871 + }, + { + "epoch": 0.16917674009144176, + "grad_norm": 578.2105102539062, + "learning_rate": 1.9001253356742648e-06, + "loss": 39.3281, + "step": 17872 + }, + { + "epoch": 0.16918620611315682, + "grad_norm": 375.775146484375, + "learning_rate": 1.9001119795194474e-06, + "loss": 19.6641, + "step": 17873 + }, + { + "epoch": 0.16919567213487188, + "grad_norm": 326.5600280761719, + "learning_rate": 1.9000986225185831e-06, + "loss": 28.6875, + "step": 17874 + }, + { + "epoch": 0.16920513815658694, + "grad_norm": 175.64923095703125, + "learning_rate": 1.9000852646716848e-06, + "loss": 20.2188, + "step": 17875 + }, + { + "epoch": 0.16921460417830197, + "grad_norm": 346.28515625, + "learning_rate": 1.9000719059787649e-06, + "loss": 16.5078, + "step": 17876 + }, + { + "epoch": 0.16922407020001703, + "grad_norm": 307.8047180175781, + "learning_rate": 1.900058546439836e-06, + "loss": 10.5547, + "step": 17877 + }, + { + "epoch": 0.1692335362217321, + "grad_norm": 537.9241333007812, + "learning_rate": 1.9000451860549104e-06, + "loss": 27.6562, + "step": 17878 + }, + { + "epoch": 0.16924300224344715, + "grad_norm": 258.8063659667969, + "learning_rate": 1.9000318248240013e-06, + "loss": 16.3984, + "step": 17879 + }, + { + "epoch": 0.1692524682651622, + "grad_norm": 282.595458984375, + "learning_rate": 1.9000184627471208e-06, + "loss": 34.9844, + "step": 17880 + }, + { + "epoch": 0.16926193428687725, + "grad_norm": 935.4859619140625, + "learning_rate": 1.9000050998242812e-06, + "loss": 37.4375, + "step": 17881 + }, + { + "epoch": 0.1692714003085923, + "grad_norm": 521.9757690429688, + "learning_rate": 1.8999917360554954e-06, + "loss": 40.7188, + "step": 17882 + }, + { + "epoch": 0.16928086633030737, + "grad_norm": 453.9342041015625, + "learning_rate": 1.8999783714407758e-06, + "loss": 33.0312, + "step": 17883 + }, + { + "epoch": 0.16929033235202243, + "grad_norm": 232.6048126220703, + "learning_rate": 1.8999650059801353e-06, + "loss": 20.6094, + "step": 17884 + }, + { + "epoch": 0.16929979837373746, + "grad_norm": 598.7670288085938, + "learning_rate": 1.8999516396735863e-06, + "loss": 35.9219, + "step": 17885 + }, + { + "epoch": 0.16930926439545252, + "grad_norm": 716.68017578125, + "learning_rate": 1.8999382725211412e-06, + "loss": 43.3281, + "step": 17886 + }, + { + "epoch": 0.16931873041716758, + "grad_norm": 540.1483764648438, + "learning_rate": 1.8999249045228126e-06, + "loss": 14.9219, + "step": 17887 + }, + { + "epoch": 0.16932819643888264, + "grad_norm": 343.8977966308594, + "learning_rate": 1.8999115356786132e-06, + "loss": 34.9922, + "step": 17888 + }, + { + "epoch": 0.16933766246059767, + "grad_norm": 1175.350830078125, + "learning_rate": 1.8998981659885557e-06, + "loss": 65.2969, + "step": 17889 + }, + { + "epoch": 0.16934712848231273, + "grad_norm": 391.184326171875, + "learning_rate": 1.899884795452652e-06, + "loss": 26.5312, + "step": 17890 + }, + { + "epoch": 0.1693565945040278, + "grad_norm": 374.490966796875, + "learning_rate": 1.8998714240709152e-06, + "loss": 17.5703, + "step": 17891 + }, + { + "epoch": 0.16936606052574285, + "grad_norm": 510.29022216796875, + "learning_rate": 1.8998580518433583e-06, + "loss": 55.1562, + "step": 17892 + }, + { + "epoch": 0.1693755265474579, + "grad_norm": 307.5956115722656, + "learning_rate": 1.8998446787699928e-06, + "loss": 16.1328, + "step": 17893 + }, + { + "epoch": 0.16938499256917294, + "grad_norm": 364.675537109375, + "learning_rate": 1.8998313048508324e-06, + "loss": 21.0391, + "step": 17894 + }, + { + "epoch": 0.169394458590888, + "grad_norm": 933.7001953125, + "learning_rate": 1.8998179300858888e-06, + "loss": 66.1914, + "step": 17895 + }, + { + "epoch": 0.16940392461260306, + "grad_norm": 526.345947265625, + "learning_rate": 1.899804554475175e-06, + "loss": 35.4844, + "step": 17896 + }, + { + "epoch": 0.16941339063431812, + "grad_norm": 2.7978134155273438, + "learning_rate": 1.8997911780187033e-06, + "loss": 0.8433, + "step": 17897 + }, + { + "epoch": 0.16942285665603318, + "grad_norm": 275.4418029785156, + "learning_rate": 1.8997778007164865e-06, + "loss": 21.5859, + "step": 17898 + }, + { + "epoch": 0.16943232267774821, + "grad_norm": 3.2872204780578613, + "learning_rate": 1.8997644225685369e-06, + "loss": 0.9783, + "step": 17899 + }, + { + "epoch": 0.16944178869946327, + "grad_norm": 467.91070556640625, + "learning_rate": 1.8997510435748675e-06, + "loss": 31.5156, + "step": 17900 + }, + { + "epoch": 0.16945125472117833, + "grad_norm": 314.1378479003906, + "learning_rate": 1.8997376637354905e-06, + "loss": 28.0508, + "step": 17901 + }, + { + "epoch": 0.1694607207428934, + "grad_norm": 413.9714660644531, + "learning_rate": 1.8997242830504189e-06, + "loss": 26.7109, + "step": 17902 + }, + { + "epoch": 0.16947018676460843, + "grad_norm": 454.08355712890625, + "learning_rate": 1.899710901519665e-06, + "loss": 31.0703, + "step": 17903 + }, + { + "epoch": 0.1694796527863235, + "grad_norm": 447.45196533203125, + "learning_rate": 1.8996975191432409e-06, + "loss": 13.8203, + "step": 17904 + }, + { + "epoch": 0.16948911880803855, + "grad_norm": 306.5827941894531, + "learning_rate": 1.8996841359211601e-06, + "loss": 17.8828, + "step": 17905 + }, + { + "epoch": 0.1694985848297536, + "grad_norm": 482.00811767578125, + "learning_rate": 1.8996707518534345e-06, + "loss": 17.125, + "step": 17906 + }, + { + "epoch": 0.16950805085146867, + "grad_norm": 325.26019287109375, + "learning_rate": 1.8996573669400773e-06, + "loss": 33.1875, + "step": 17907 + }, + { + "epoch": 0.1695175168731837, + "grad_norm": 2.614637613296509, + "learning_rate": 1.8996439811811005e-06, + "loss": 0.9194, + "step": 17908 + }, + { + "epoch": 0.16952698289489876, + "grad_norm": 185.49945068359375, + "learning_rate": 1.8996305945765165e-06, + "loss": 16.6836, + "step": 17909 + }, + { + "epoch": 0.16953644891661382, + "grad_norm": 238.22021484375, + "learning_rate": 1.8996172071263388e-06, + "loss": 26.9219, + "step": 17910 + }, + { + "epoch": 0.16954591493832888, + "grad_norm": 343.1516418457031, + "learning_rate": 1.8996038188305792e-06, + "loss": 31.625, + "step": 17911 + }, + { + "epoch": 0.1695553809600439, + "grad_norm": 278.3705749511719, + "learning_rate": 1.8995904296892507e-06, + "loss": 21.25, + "step": 17912 + }, + { + "epoch": 0.16956484698175897, + "grad_norm": 563.8444213867188, + "learning_rate": 1.8995770397023657e-06, + "loss": 46.9062, + "step": 17913 + }, + { + "epoch": 0.16957431300347403, + "grad_norm": 659.7127685546875, + "learning_rate": 1.8995636488699366e-06, + "loss": 23.5781, + "step": 17914 + }, + { + "epoch": 0.1695837790251891, + "grad_norm": 301.1825866699219, + "learning_rate": 1.899550257191976e-06, + "loss": 23.2422, + "step": 17915 + }, + { + "epoch": 0.16959324504690415, + "grad_norm": 3.4236085414886475, + "learning_rate": 1.8995368646684971e-06, + "loss": 0.9077, + "step": 17916 + }, + { + "epoch": 0.16960271106861918, + "grad_norm": 766.3114013671875, + "learning_rate": 1.899523471299512e-06, + "loss": 53.9844, + "step": 17917 + }, + { + "epoch": 0.16961217709033424, + "grad_norm": 209.3527374267578, + "learning_rate": 1.8995100770850332e-06, + "loss": 7.3125, + "step": 17918 + }, + { + "epoch": 0.1696216431120493, + "grad_norm": 372.0699768066406, + "learning_rate": 1.8994966820250737e-06, + "loss": 36.75, + "step": 17919 + }, + { + "epoch": 0.16963110913376436, + "grad_norm": 466.7545471191406, + "learning_rate": 1.8994832861196456e-06, + "loss": 31.875, + "step": 17920 + }, + { + "epoch": 0.1696405751554794, + "grad_norm": 3.3364529609680176, + "learning_rate": 1.899469889368762e-06, + "loss": 0.9126, + "step": 17921 + }, + { + "epoch": 0.16965004117719445, + "grad_norm": 794.6751098632812, + "learning_rate": 1.899456491772435e-06, + "loss": 31.4062, + "step": 17922 + }, + { + "epoch": 0.16965950719890951, + "grad_norm": 734.7487182617188, + "learning_rate": 1.8994430933306773e-06, + "loss": 46.0469, + "step": 17923 + }, + { + "epoch": 0.16966897322062457, + "grad_norm": 316.2149658203125, + "learning_rate": 1.8994296940435018e-06, + "loss": 25.7812, + "step": 17924 + }, + { + "epoch": 0.16967843924233963, + "grad_norm": 227.4922332763672, + "learning_rate": 1.899416293910921e-06, + "loss": 23.1094, + "step": 17925 + }, + { + "epoch": 0.16968790526405467, + "grad_norm": 539.3881225585938, + "learning_rate": 1.899402892932947e-06, + "loss": 30.5391, + "step": 17926 + }, + { + "epoch": 0.16969737128576973, + "grad_norm": 443.78509521484375, + "learning_rate": 1.8993894911095932e-06, + "loss": 19.0234, + "step": 17927 + }, + { + "epoch": 0.1697068373074848, + "grad_norm": 244.57034301757812, + "learning_rate": 1.8993760884408716e-06, + "loss": 15.8359, + "step": 17928 + }, + { + "epoch": 0.16971630332919985, + "grad_norm": 632.91650390625, + "learning_rate": 1.8993626849267948e-06, + "loss": 40.6875, + "step": 17929 + }, + { + "epoch": 0.16972576935091488, + "grad_norm": 937.2901611328125, + "learning_rate": 1.899349280567376e-06, + "loss": 44.4141, + "step": 17930 + }, + { + "epoch": 0.16973523537262994, + "grad_norm": 336.186767578125, + "learning_rate": 1.899335875362627e-06, + "loss": 24.9141, + "step": 17931 + }, + { + "epoch": 0.169744701394345, + "grad_norm": 777.3245239257812, + "learning_rate": 1.899322469312561e-06, + "loss": 42.4141, + "step": 17932 + }, + { + "epoch": 0.16975416741606006, + "grad_norm": 543.038330078125, + "learning_rate": 1.8993090624171902e-06, + "loss": 47.75, + "step": 17933 + }, + { + "epoch": 0.16976363343777512, + "grad_norm": 447.0935363769531, + "learning_rate": 1.8992956546765276e-06, + "loss": 18.8984, + "step": 17934 + }, + { + "epoch": 0.16977309945949015, + "grad_norm": 357.9447937011719, + "learning_rate": 1.8992822460905855e-06, + "loss": 26.8828, + "step": 17935 + }, + { + "epoch": 0.1697825654812052, + "grad_norm": 1158.2283935546875, + "learning_rate": 1.8992688366593766e-06, + "loss": 54.3086, + "step": 17936 + }, + { + "epoch": 0.16979203150292027, + "grad_norm": 952.6947021484375, + "learning_rate": 1.8992554263829134e-06, + "loss": 11.0703, + "step": 17937 + }, + { + "epoch": 0.16980149752463533, + "grad_norm": 490.2541809082031, + "learning_rate": 1.8992420152612087e-06, + "loss": 21.9766, + "step": 17938 + }, + { + "epoch": 0.16981096354635036, + "grad_norm": 653.3577270507812, + "learning_rate": 1.899228603294275e-06, + "loss": 48.7188, + "step": 17939 + }, + { + "epoch": 0.16982042956806542, + "grad_norm": 392.95208740234375, + "learning_rate": 1.899215190482125e-06, + "loss": 47.9219, + "step": 17940 + }, + { + "epoch": 0.16982989558978048, + "grad_norm": 454.2657470703125, + "learning_rate": 1.899201776824771e-06, + "loss": 52.6719, + "step": 17941 + }, + { + "epoch": 0.16983936161149554, + "grad_norm": 530.7131958007812, + "learning_rate": 1.899188362322226e-06, + "loss": 24.9766, + "step": 17942 + }, + { + "epoch": 0.1698488276332106, + "grad_norm": 632.1306762695312, + "learning_rate": 1.8991749469745022e-06, + "loss": 37.6875, + "step": 17943 + }, + { + "epoch": 0.16985829365492564, + "grad_norm": 583.5343017578125, + "learning_rate": 1.8991615307816127e-06, + "loss": 20.1289, + "step": 17944 + }, + { + "epoch": 0.1698677596766407, + "grad_norm": 637.77197265625, + "learning_rate": 1.8991481137435699e-06, + "loss": 51.7578, + "step": 17945 + }, + { + "epoch": 0.16987722569835575, + "grad_norm": 465.25079345703125, + "learning_rate": 1.8991346958603861e-06, + "loss": 30.7344, + "step": 17946 + }, + { + "epoch": 0.16988669172007081, + "grad_norm": 846.3147583007812, + "learning_rate": 1.8991212771320744e-06, + "loss": 16.4453, + "step": 17947 + }, + { + "epoch": 0.16989615774178585, + "grad_norm": 489.2130126953125, + "learning_rate": 1.8991078575586472e-06, + "loss": 18.6562, + "step": 17948 + }, + { + "epoch": 0.1699056237635009, + "grad_norm": 246.97079467773438, + "learning_rate": 1.899094437140117e-06, + "loss": 17.7578, + "step": 17949 + }, + { + "epoch": 0.16991508978521597, + "grad_norm": 348.6344909667969, + "learning_rate": 1.8990810158764966e-06, + "loss": 23.75, + "step": 17950 + }, + { + "epoch": 0.16992455580693103, + "grad_norm": 561.7269287109375, + "learning_rate": 1.8990675937677985e-06, + "loss": 43.5508, + "step": 17951 + }, + { + "epoch": 0.1699340218286461, + "grad_norm": 375.33868408203125, + "learning_rate": 1.8990541708140352e-06, + "loss": 37.875, + "step": 17952 + }, + { + "epoch": 0.16994348785036112, + "grad_norm": 262.9178161621094, + "learning_rate": 1.8990407470152198e-06, + "loss": 11.0586, + "step": 17953 + }, + { + "epoch": 0.16995295387207618, + "grad_norm": 1521.83740234375, + "learning_rate": 1.8990273223713642e-06, + "loss": 11.6484, + "step": 17954 + }, + { + "epoch": 0.16996241989379124, + "grad_norm": 354.79327392578125, + "learning_rate": 1.8990138968824817e-06, + "loss": 24.1172, + "step": 17955 + }, + { + "epoch": 0.1699718859155063, + "grad_norm": 252.3321075439453, + "learning_rate": 1.8990004705485844e-06, + "loss": 18.0312, + "step": 17956 + }, + { + "epoch": 0.16998135193722133, + "grad_norm": 354.511474609375, + "learning_rate": 1.898987043369685e-06, + "loss": 22.1094, + "step": 17957 + }, + { + "epoch": 0.1699908179589364, + "grad_norm": 213.6039581298828, + "learning_rate": 1.8989736153457966e-06, + "loss": 20.1641, + "step": 17958 + }, + { + "epoch": 0.17000028398065145, + "grad_norm": 226.76197814941406, + "learning_rate": 1.8989601864769316e-06, + "loss": 17.3359, + "step": 17959 + }, + { + "epoch": 0.1700097500023665, + "grad_norm": 589.5108642578125, + "learning_rate": 1.8989467567631023e-06, + "loss": 56.7383, + "step": 17960 + }, + { + "epoch": 0.17001921602408157, + "grad_norm": 721.3299560546875, + "learning_rate": 1.8989333262043214e-06, + "loss": 35.3945, + "step": 17961 + }, + { + "epoch": 0.1700286820457966, + "grad_norm": 388.6555480957031, + "learning_rate": 1.8989198948006017e-06, + "loss": 25.125, + "step": 17962 + }, + { + "epoch": 0.17003814806751166, + "grad_norm": 279.7769470214844, + "learning_rate": 1.898906462551956e-06, + "loss": 14.7383, + "step": 17963 + }, + { + "epoch": 0.17004761408922672, + "grad_norm": 275.8846130371094, + "learning_rate": 1.8988930294583966e-06, + "loss": 14.3828, + "step": 17964 + }, + { + "epoch": 0.17005708011094178, + "grad_norm": 549.38427734375, + "learning_rate": 1.898879595519936e-06, + "loss": 24.2812, + "step": 17965 + }, + { + "epoch": 0.17006654613265682, + "grad_norm": 408.0734558105469, + "learning_rate": 1.8988661607365875e-06, + "loss": 19.2812, + "step": 17966 + }, + { + "epoch": 0.17007601215437188, + "grad_norm": 1309.485595703125, + "learning_rate": 1.898852725108363e-06, + "loss": 37.7891, + "step": 17967 + }, + { + "epoch": 0.17008547817608693, + "grad_norm": 815.8139038085938, + "learning_rate": 1.8988392886352753e-06, + "loss": 45.9375, + "step": 17968 + }, + { + "epoch": 0.170094944197802, + "grad_norm": 596.4310913085938, + "learning_rate": 1.8988258513173375e-06, + "loss": 36.4688, + "step": 17969 + }, + { + "epoch": 0.17010441021951705, + "grad_norm": 361.7583923339844, + "learning_rate": 1.8988124131545614e-06, + "loss": 9.7773, + "step": 17970 + }, + { + "epoch": 0.1701138762412321, + "grad_norm": 661.9918823242188, + "learning_rate": 1.8987989741469604e-06, + "loss": 30.5938, + "step": 17971 + }, + { + "epoch": 0.17012334226294715, + "grad_norm": 447.5648498535156, + "learning_rate": 1.898785534294547e-06, + "loss": 43.1719, + "step": 17972 + }, + { + "epoch": 0.1701328082846622, + "grad_norm": 471.4880676269531, + "learning_rate": 1.8987720935973332e-06, + "loss": 22.6328, + "step": 17973 + }, + { + "epoch": 0.17014227430637727, + "grad_norm": 1382.9100341796875, + "learning_rate": 1.8987586520553325e-06, + "loss": 29.0078, + "step": 17974 + }, + { + "epoch": 0.1701517403280923, + "grad_norm": 240.3826141357422, + "learning_rate": 1.898745209668557e-06, + "loss": 18.2188, + "step": 17975 + }, + { + "epoch": 0.17016120634980736, + "grad_norm": 2.8444056510925293, + "learning_rate": 1.8987317664370192e-06, + "loss": 0.9561, + "step": 17976 + }, + { + "epoch": 0.17017067237152242, + "grad_norm": 361.05291748046875, + "learning_rate": 1.8987183223607324e-06, + "loss": 29.8984, + "step": 17977 + }, + { + "epoch": 0.17018013839323748, + "grad_norm": 270.07672119140625, + "learning_rate": 1.8987048774397087e-06, + "loss": 19.3516, + "step": 17978 + }, + { + "epoch": 0.17018960441495254, + "grad_norm": 336.9197692871094, + "learning_rate": 1.898691431673961e-06, + "loss": 40.3672, + "step": 17979 + }, + { + "epoch": 0.17019907043666757, + "grad_norm": 2.9408602714538574, + "learning_rate": 1.8986779850635016e-06, + "loss": 0.9312, + "step": 17980 + }, + { + "epoch": 0.17020853645838263, + "grad_norm": 243.16546630859375, + "learning_rate": 1.8986645376083436e-06, + "loss": 18.2461, + "step": 17981 + }, + { + "epoch": 0.1702180024800977, + "grad_norm": 321.6264343261719, + "learning_rate": 1.8986510893084992e-06, + "loss": 48.9219, + "step": 17982 + }, + { + "epoch": 0.17022746850181275, + "grad_norm": 286.67401123046875, + "learning_rate": 1.8986376401639814e-06, + "loss": 27.25, + "step": 17983 + }, + { + "epoch": 0.17023693452352778, + "grad_norm": 289.6798400878906, + "learning_rate": 1.8986241901748026e-06, + "loss": 30.0938, + "step": 17984 + }, + { + "epoch": 0.17024640054524284, + "grad_norm": 248.4419403076172, + "learning_rate": 1.8986107393409757e-06, + "loss": 17.4141, + "step": 17985 + }, + { + "epoch": 0.1702558665669579, + "grad_norm": 3.0520870685577393, + "learning_rate": 1.8985972876625127e-06, + "loss": 0.9746, + "step": 17986 + }, + { + "epoch": 0.17026533258867296, + "grad_norm": 256.4686584472656, + "learning_rate": 1.898583835139427e-06, + "loss": 23.4062, + "step": 17987 + }, + { + "epoch": 0.17027479861038802, + "grad_norm": 285.01141357421875, + "learning_rate": 1.8985703817717308e-06, + "loss": 29.3047, + "step": 17988 + }, + { + "epoch": 0.17028426463210306, + "grad_norm": 376.4917907714844, + "learning_rate": 1.8985569275594374e-06, + "loss": 36.4219, + "step": 17989 + }, + { + "epoch": 0.17029373065381812, + "grad_norm": 650.9038696289062, + "learning_rate": 1.8985434725025585e-06, + "loss": 42.7656, + "step": 17990 + }, + { + "epoch": 0.17030319667553317, + "grad_norm": 522.6336059570312, + "learning_rate": 1.898530016601107e-06, + "loss": 18.1719, + "step": 17991 + }, + { + "epoch": 0.17031266269724823, + "grad_norm": 784.3931274414062, + "learning_rate": 1.898516559855096e-06, + "loss": 35.4453, + "step": 17992 + }, + { + "epoch": 0.1703221287189633, + "grad_norm": 605.96142578125, + "learning_rate": 1.8985031022645379e-06, + "loss": 42.0469, + "step": 17993 + }, + { + "epoch": 0.17033159474067833, + "grad_norm": 266.9859313964844, + "learning_rate": 1.8984896438294453e-06, + "loss": 22.1328, + "step": 17994 + }, + { + "epoch": 0.1703410607623934, + "grad_norm": 589.5961303710938, + "learning_rate": 1.898476184549831e-06, + "loss": 33.8594, + "step": 17995 + }, + { + "epoch": 0.17035052678410845, + "grad_norm": 364.2583923339844, + "learning_rate": 1.8984627244257073e-06, + "loss": 40.8906, + "step": 17996 + }, + { + "epoch": 0.1703599928058235, + "grad_norm": 452.3497009277344, + "learning_rate": 1.8984492634570875e-06, + "loss": 29.7109, + "step": 17997 + }, + { + "epoch": 0.17036945882753854, + "grad_norm": 3.4138476848602295, + "learning_rate": 1.8984358016439833e-06, + "loss": 0.9194, + "step": 17998 + }, + { + "epoch": 0.1703789248492536, + "grad_norm": 1333.2449951171875, + "learning_rate": 1.8984223389864082e-06, + "loss": 28.0859, + "step": 17999 + }, + { + "epoch": 0.17038839087096866, + "grad_norm": 329.2818603515625, + "learning_rate": 1.8984088754843745e-06, + "loss": 16.4648, + "step": 18000 + } + ], + "logging_steps": 1.0, + "max_steps": 105641, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": true, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.355322423621976e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}