{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9902091677792613, "eval_steps": 500, "global_step": 4450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022251891410769915, "grad_norm": 15.136965569712375, "learning_rate": 0.0, "loss": 1.4091, "step": 1 }, { "epoch": 0.0004450378282153983, "grad_norm": 14.99264837846328, "learning_rate": 7.407407407407409e-08, "loss": 1.3935, "step": 2 }, { "epoch": 0.0006675567423230974, "grad_norm": 15.86403660263137, "learning_rate": 1.4814814814814817e-07, "loss": 1.4596, "step": 3 }, { "epoch": 0.0008900756564307966, "grad_norm": 15.41667307138427, "learning_rate": 2.2222222222222224e-07, "loss": 1.4311, "step": 4 }, { "epoch": 0.0011125945705384957, "grad_norm": 15.513667109820823, "learning_rate": 2.9629629629629634e-07, "loss": 1.435, "step": 5 }, { "epoch": 0.0013351134846461949, "grad_norm": 15.257978504306351, "learning_rate": 3.7037037037037036e-07, "loss": 1.4517, "step": 6 }, { "epoch": 0.001557632398753894, "grad_norm": 15.505152702603766, "learning_rate": 4.444444444444445e-07, "loss": 1.4363, "step": 7 }, { "epoch": 0.0017801513128615932, "grad_norm": 15.271668498878215, "learning_rate": 5.185185185185186e-07, "loss": 1.4137, "step": 8 }, { "epoch": 0.0020026702269692926, "grad_norm": 14.273110501089988, "learning_rate": 5.925925925925927e-07, "loss": 1.3807, "step": 9 }, { "epoch": 0.0022251891410769915, "grad_norm": 14.434315182164806, "learning_rate": 6.666666666666667e-07, "loss": 1.4018, "step": 10 }, { "epoch": 0.002447708055184691, "grad_norm": 14.444720072998669, "learning_rate": 7.407407407407407e-07, "loss": 1.3991, "step": 11 }, { "epoch": 0.0026702269692923898, "grad_norm": 12.680456499963933, "learning_rate": 8.14814814814815e-07, "loss": 1.3314, "step": 12 }, { "epoch": 0.002892745883400089, "grad_norm": 12.474876858779478, "learning_rate": 8.88888888888889e-07, "loss": 1.3222, "step": 13 }, { "epoch": 0.003115264797507788, "grad_norm": 12.013861135408924, "learning_rate": 9.62962962962963e-07, "loss": 1.3096, "step": 14 }, { "epoch": 0.0033377837116154874, "grad_norm": 11.89177158759944, "learning_rate": 1.0370370370370371e-06, "loss": 1.3217, "step": 15 }, { "epoch": 0.0035603026257231864, "grad_norm": 10.360106626406147, "learning_rate": 1.111111111111111e-06, "loss": 1.2222, "step": 16 }, { "epoch": 0.0037828215398308857, "grad_norm": 8.290419945668347, "learning_rate": 1.1851851851851854e-06, "loss": 1.1778, "step": 17 }, { "epoch": 0.004005340453938585, "grad_norm": 7.838161803388742, "learning_rate": 1.2592592592592593e-06, "loss": 1.1606, "step": 18 }, { "epoch": 0.004227859368046284, "grad_norm": 7.741292555452369, "learning_rate": 1.3333333333333334e-06, "loss": 1.133, "step": 19 }, { "epoch": 0.004450378282153983, "grad_norm": 7.525872599327424, "learning_rate": 1.4074074074074075e-06, "loss": 1.1189, "step": 20 }, { "epoch": 0.004672897196261682, "grad_norm": 7.246426726997428, "learning_rate": 1.4814814814814815e-06, "loss": 1.0875, "step": 21 }, { "epoch": 0.004895416110369382, "grad_norm": 6.975150593984728, "learning_rate": 1.5555555555555558e-06, "loss": 1.0181, "step": 22 }, { "epoch": 0.005117935024477081, "grad_norm": 4.475670586974346, "learning_rate": 1.62962962962963e-06, "loss": 0.8941, "step": 23 }, { "epoch": 0.0053404539385847796, "grad_norm": 4.281437882093947, "learning_rate": 1.7037037037037038e-06, "loss": 0.8847, "step": 24 }, { "epoch": 0.0055629728526924785, "grad_norm": 4.357528635448292, "learning_rate": 1.777777777777778e-06, "loss": 0.8674, "step": 25 }, { "epoch": 0.005785491766800178, "grad_norm": 4.0153126139744115, "learning_rate": 1.8518518518518519e-06, "loss": 0.876, "step": 26 }, { "epoch": 0.006008010680907877, "grad_norm": 3.953555838648583, "learning_rate": 1.925925925925926e-06, "loss": 0.8406, "step": 27 }, { "epoch": 0.006230529595015576, "grad_norm": 3.665997772696866, "learning_rate": 2.0000000000000003e-06, "loss": 0.8217, "step": 28 }, { "epoch": 0.006453048509123275, "grad_norm": 3.2898204418775943, "learning_rate": 2.0740740740740742e-06, "loss": 0.8178, "step": 29 }, { "epoch": 0.006675567423230975, "grad_norm": 2.975968537054257, "learning_rate": 2.148148148148148e-06, "loss": 0.8257, "step": 30 }, { "epoch": 0.006898086337338674, "grad_norm": 3.5057614109123967, "learning_rate": 2.222222222222222e-06, "loss": 0.8144, "step": 31 }, { "epoch": 0.007120605251446373, "grad_norm": 3.592993452960381, "learning_rate": 2.2962962962962964e-06, "loss": 0.7886, "step": 32 }, { "epoch": 0.007343124165554072, "grad_norm": 3.32386629123371, "learning_rate": 2.3703703703703707e-06, "loss": 0.7594, "step": 33 }, { "epoch": 0.0075656430796617715, "grad_norm": 2.853329800957974, "learning_rate": 2.4444444444444447e-06, "loss": 0.735, "step": 34 }, { "epoch": 0.00778816199376947, "grad_norm": 2.3211394113859987, "learning_rate": 2.5185185185185186e-06, "loss": 0.7487, "step": 35 }, { "epoch": 0.00801068090787717, "grad_norm": 2.2857079363880315, "learning_rate": 2.5925925925925925e-06, "loss": 0.7451, "step": 36 }, { "epoch": 0.008233199821984869, "grad_norm": 2.0153177456249574, "learning_rate": 2.666666666666667e-06, "loss": 0.7294, "step": 37 }, { "epoch": 0.008455718736092568, "grad_norm": 1.8080888614948172, "learning_rate": 2.740740740740741e-06, "loss": 0.7096, "step": 38 }, { "epoch": 0.008678237650200267, "grad_norm": 1.7657932194425667, "learning_rate": 2.814814814814815e-06, "loss": 0.7149, "step": 39 }, { "epoch": 0.008900756564307966, "grad_norm": 1.755373823212782, "learning_rate": 2.888888888888889e-06, "loss": 0.7083, "step": 40 }, { "epoch": 0.009123275478415665, "grad_norm": 1.580351320741117, "learning_rate": 2.962962962962963e-06, "loss": 0.6995, "step": 41 }, { "epoch": 0.009345794392523364, "grad_norm": 1.6176954046512564, "learning_rate": 3.0370370370370372e-06, "loss": 0.7114, "step": 42 }, { "epoch": 0.009568313306631064, "grad_norm": 1.6807271832070516, "learning_rate": 3.1111111111111116e-06, "loss": 0.6837, "step": 43 }, { "epoch": 0.009790832220738763, "grad_norm": 1.603878666301084, "learning_rate": 3.1851851851851855e-06, "loss": 0.6966, "step": 44 }, { "epoch": 0.010013351134846462, "grad_norm": 1.56248779024511, "learning_rate": 3.25925925925926e-06, "loss": 0.7058, "step": 45 }, { "epoch": 0.010235870048954161, "grad_norm": 1.5657221752212807, "learning_rate": 3.3333333333333333e-06, "loss": 0.6975, "step": 46 }, { "epoch": 0.01045838896306186, "grad_norm": 1.5784310089498428, "learning_rate": 3.4074074074074077e-06, "loss": 0.6938, "step": 47 }, { "epoch": 0.010680907877169559, "grad_norm": 1.405095724770737, "learning_rate": 3.481481481481482e-06, "loss": 0.6706, "step": 48 }, { "epoch": 0.010903426791277258, "grad_norm": 1.4290527992668824, "learning_rate": 3.555555555555556e-06, "loss": 0.6726, "step": 49 }, { "epoch": 0.011125945705384957, "grad_norm": 1.4190411683992645, "learning_rate": 3.6296296296296302e-06, "loss": 0.6791, "step": 50 }, { "epoch": 0.011348464619492658, "grad_norm": 1.5149043010890113, "learning_rate": 3.7037037037037037e-06, "loss": 0.6658, "step": 51 }, { "epoch": 0.011570983533600357, "grad_norm": 1.4007262235097018, "learning_rate": 3.777777777777778e-06, "loss": 0.6594, "step": 52 }, { "epoch": 0.011793502447708056, "grad_norm": 1.3940033492266457, "learning_rate": 3.851851851851852e-06, "loss": 0.6783, "step": 53 }, { "epoch": 0.012016021361815754, "grad_norm": 1.4166534556568036, "learning_rate": 3.925925925925926e-06, "loss": 0.6553, "step": 54 }, { "epoch": 0.012238540275923453, "grad_norm": 1.3260302265138735, "learning_rate": 4.000000000000001e-06, "loss": 0.6683, "step": 55 }, { "epoch": 0.012461059190031152, "grad_norm": 1.3941402505451106, "learning_rate": 4.074074074074074e-06, "loss": 0.6577, "step": 56 }, { "epoch": 0.012683578104138851, "grad_norm": 1.3742600425442553, "learning_rate": 4.1481481481481485e-06, "loss": 0.6497, "step": 57 }, { "epoch": 0.01290609701824655, "grad_norm": 1.3000606940322739, "learning_rate": 4.222222222222223e-06, "loss": 0.6535, "step": 58 }, { "epoch": 0.01312861593235425, "grad_norm": 1.3484838709021303, "learning_rate": 4.296296296296296e-06, "loss": 0.6521, "step": 59 }, { "epoch": 0.01335113484646195, "grad_norm": 1.3934457677287053, "learning_rate": 4.370370370370371e-06, "loss": 0.6667, "step": 60 }, { "epoch": 0.013573653760569649, "grad_norm": 1.2964446755696049, "learning_rate": 4.444444444444444e-06, "loss": 0.6418, "step": 61 }, { "epoch": 0.013796172674677348, "grad_norm": 1.2017690258961675, "learning_rate": 4.5185185185185185e-06, "loss": 0.6213, "step": 62 }, { "epoch": 0.014018691588785047, "grad_norm": 1.298790447680272, "learning_rate": 4.592592592592593e-06, "loss": 0.625, "step": 63 }, { "epoch": 0.014241210502892745, "grad_norm": 1.2914175415258702, "learning_rate": 4.666666666666667e-06, "loss": 0.6479, "step": 64 }, { "epoch": 0.014463729417000444, "grad_norm": 1.2980605028298682, "learning_rate": 4.7407407407407415e-06, "loss": 0.6459, "step": 65 }, { "epoch": 0.014686248331108143, "grad_norm": 1.2891804158055262, "learning_rate": 4.814814814814815e-06, "loss": 0.6373, "step": 66 }, { "epoch": 0.014908767245215844, "grad_norm": 1.2928348548411472, "learning_rate": 4.888888888888889e-06, "loss": 0.6102, "step": 67 }, { "epoch": 0.015131286159323543, "grad_norm": 1.3116030238018295, "learning_rate": 4.962962962962964e-06, "loss": 0.6502, "step": 68 }, { "epoch": 0.015353805073431242, "grad_norm": 1.3281750024359382, "learning_rate": 5.037037037037037e-06, "loss": 0.6285, "step": 69 }, { "epoch": 0.01557632398753894, "grad_norm": 1.2426066610176045, "learning_rate": 5.1111111111111115e-06, "loss": 0.6545, "step": 70 }, { "epoch": 0.01579884290164664, "grad_norm": 1.1943882231710237, "learning_rate": 5.185185185185185e-06, "loss": 0.6136, "step": 71 }, { "epoch": 0.01602136181575434, "grad_norm": 1.2492592088940555, "learning_rate": 5.259259259259259e-06, "loss": 0.6282, "step": 72 }, { "epoch": 0.016243880729862038, "grad_norm": 1.2145768557798735, "learning_rate": 5.333333333333334e-06, "loss": 0.6293, "step": 73 }, { "epoch": 0.016466399643969738, "grad_norm": 1.1832078724606963, "learning_rate": 5.407407407407408e-06, "loss": 0.6309, "step": 74 }, { "epoch": 0.016688918558077435, "grad_norm": 1.17400043233164, "learning_rate": 5.481481481481482e-06, "loss": 0.61, "step": 75 }, { "epoch": 0.016911437472185136, "grad_norm": 1.3057622637618462, "learning_rate": 5.555555555555557e-06, "loss": 0.6509, "step": 76 }, { "epoch": 0.017133956386292833, "grad_norm": 1.2140370305548498, "learning_rate": 5.62962962962963e-06, "loss": 0.6305, "step": 77 }, { "epoch": 0.017356475300400534, "grad_norm": 1.301717890816853, "learning_rate": 5.7037037037037045e-06, "loss": 0.6208, "step": 78 }, { "epoch": 0.017578994214508235, "grad_norm": 1.2704654115132414, "learning_rate": 5.777777777777778e-06, "loss": 0.6196, "step": 79 }, { "epoch": 0.017801513128615932, "grad_norm": 1.2847338808639248, "learning_rate": 5.8518518518518515e-06, "loss": 0.6305, "step": 80 }, { "epoch": 0.018024032042723633, "grad_norm": 1.2099178872812009, "learning_rate": 5.925925925925926e-06, "loss": 0.6265, "step": 81 }, { "epoch": 0.01824655095683133, "grad_norm": 1.176895124054048, "learning_rate": 6e-06, "loss": 0.6285, "step": 82 }, { "epoch": 0.01846906987093903, "grad_norm": 1.2115996231042403, "learning_rate": 6.0740740740740745e-06, "loss": 0.6272, "step": 83 }, { "epoch": 0.018691588785046728, "grad_norm": 1.2425730744306682, "learning_rate": 6.148148148148149e-06, "loss": 0.6235, "step": 84 }, { "epoch": 0.018914107699154428, "grad_norm": 1.224948765007962, "learning_rate": 6.222222222222223e-06, "loss": 0.617, "step": 85 }, { "epoch": 0.01913662661326213, "grad_norm": 1.1924022510337955, "learning_rate": 6.296296296296297e-06, "loss": 0.5991, "step": 86 }, { "epoch": 0.019359145527369826, "grad_norm": 1.28436813822436, "learning_rate": 6.370370370370371e-06, "loss": 0.633, "step": 87 }, { "epoch": 0.019581664441477527, "grad_norm": 1.225396362564893, "learning_rate": 6.444444444444445e-06, "loss": 0.6101, "step": 88 }, { "epoch": 0.019804183355585224, "grad_norm": 1.161623889336138, "learning_rate": 6.51851851851852e-06, "loss": 0.5933, "step": 89 }, { "epoch": 0.020026702269692925, "grad_norm": 1.2385378363555748, "learning_rate": 6.592592592592592e-06, "loss": 0.6016, "step": 90 }, { "epoch": 0.020249221183800622, "grad_norm": 1.264578286984502, "learning_rate": 6.666666666666667e-06, "loss": 0.6131, "step": 91 }, { "epoch": 0.020471740097908322, "grad_norm": 1.2521107696471214, "learning_rate": 6.740740740740741e-06, "loss": 0.6307, "step": 92 }, { "epoch": 0.02069425901201602, "grad_norm": 1.2672958550465332, "learning_rate": 6.814814814814815e-06, "loss": 0.6154, "step": 93 }, { "epoch": 0.02091677792612372, "grad_norm": 1.2972018784782147, "learning_rate": 6.88888888888889e-06, "loss": 0.6231, "step": 94 }, { "epoch": 0.02113929684023142, "grad_norm": 1.2799054744166314, "learning_rate": 6.962962962962964e-06, "loss": 0.6176, "step": 95 }, { "epoch": 0.021361815754339118, "grad_norm": 1.3156882104521799, "learning_rate": 7.0370370370370375e-06, "loss": 0.6227, "step": 96 }, { "epoch": 0.02158433466844682, "grad_norm": 1.3686621082562649, "learning_rate": 7.111111111111112e-06, "loss": 0.6215, "step": 97 }, { "epoch": 0.021806853582554516, "grad_norm": 1.2295484614849914, "learning_rate": 7.185185185185186e-06, "loss": 0.6052, "step": 98 }, { "epoch": 0.022029372496662217, "grad_norm": 1.2238378916576818, "learning_rate": 7.2592592592592605e-06, "loss": 0.6055, "step": 99 }, { "epoch": 0.022251891410769914, "grad_norm": 1.2473185428067006, "learning_rate": 7.333333333333333e-06, "loss": 0.6059, "step": 100 }, { "epoch": 0.022474410324877615, "grad_norm": 1.2051353180375, "learning_rate": 7.4074074074074075e-06, "loss": 0.6037, "step": 101 }, { "epoch": 0.022696929238985315, "grad_norm": 1.2636248600708369, "learning_rate": 7.481481481481482e-06, "loss": 0.6119, "step": 102 }, { "epoch": 0.022919448153093012, "grad_norm": 1.2806892509299552, "learning_rate": 7.555555555555556e-06, "loss": 0.5966, "step": 103 }, { "epoch": 0.023141967067200713, "grad_norm": 1.186669018204583, "learning_rate": 7.62962962962963e-06, "loss": 0.6051, "step": 104 }, { "epoch": 0.02336448598130841, "grad_norm": 1.2333292777221105, "learning_rate": 7.703703703703704e-06, "loss": 0.5975, "step": 105 }, { "epoch": 0.02358700489541611, "grad_norm": 1.174169791467086, "learning_rate": 7.77777777777778e-06, "loss": 0.5945, "step": 106 }, { "epoch": 0.023809523809523808, "grad_norm": 1.2492290959063366, "learning_rate": 7.851851851851853e-06, "loss": 0.599, "step": 107 }, { "epoch": 0.02403204272363151, "grad_norm": 1.2986831682282247, "learning_rate": 7.925925925925926e-06, "loss": 0.6058, "step": 108 }, { "epoch": 0.02425456163773921, "grad_norm": 1.2172824337494788, "learning_rate": 8.000000000000001e-06, "loss": 0.6312, "step": 109 }, { "epoch": 0.024477080551846907, "grad_norm": 1.25528679235197, "learning_rate": 8.074074074074075e-06, "loss": 0.6143, "step": 110 }, { "epoch": 0.024699599465954607, "grad_norm": 1.2805230479440322, "learning_rate": 8.148148148148148e-06, "loss": 0.6033, "step": 111 }, { "epoch": 0.024922118380062305, "grad_norm": 1.2512616637869776, "learning_rate": 8.222222222222222e-06, "loss": 0.6136, "step": 112 }, { "epoch": 0.025144637294170005, "grad_norm": 1.277466237431846, "learning_rate": 8.296296296296297e-06, "loss": 0.601, "step": 113 }, { "epoch": 0.025367156208277702, "grad_norm": 1.2797520165702911, "learning_rate": 8.37037037037037e-06, "loss": 0.5989, "step": 114 }, { "epoch": 0.025589675122385403, "grad_norm": 1.271343989068734, "learning_rate": 8.444444444444446e-06, "loss": 0.6213, "step": 115 }, { "epoch": 0.0258121940364931, "grad_norm": 1.2109214790067664, "learning_rate": 8.518518518518519e-06, "loss": 0.6057, "step": 116 }, { "epoch": 0.0260347129506008, "grad_norm": 1.288332895102178, "learning_rate": 8.592592592592593e-06, "loss": 0.5928, "step": 117 }, { "epoch": 0.0262572318647085, "grad_norm": 1.280972254133085, "learning_rate": 8.666666666666668e-06, "loss": 0.6123, "step": 118 }, { "epoch": 0.0264797507788162, "grad_norm": 1.2427292548001607, "learning_rate": 8.740740740740741e-06, "loss": 0.6103, "step": 119 }, { "epoch": 0.0267022696929239, "grad_norm": 1.3101768987222444, "learning_rate": 8.814814814814817e-06, "loss": 0.6058, "step": 120 }, { "epoch": 0.026924788607031597, "grad_norm": 1.382880833350148, "learning_rate": 8.888888888888888e-06, "loss": 0.5963, "step": 121 }, { "epoch": 0.027147307521139297, "grad_norm": 1.4286032339626367, "learning_rate": 8.962962962962963e-06, "loss": 0.6013, "step": 122 }, { "epoch": 0.027369826435246995, "grad_norm": 1.2981955679497434, "learning_rate": 9.037037037037037e-06, "loss": 0.6034, "step": 123 }, { "epoch": 0.027592345349354695, "grad_norm": 1.3278639110954835, "learning_rate": 9.111111111111112e-06, "loss": 0.6081, "step": 124 }, { "epoch": 0.027814864263462396, "grad_norm": 1.2957676800685884, "learning_rate": 9.185185185185186e-06, "loss": 0.6004, "step": 125 }, { "epoch": 0.028037383177570093, "grad_norm": 1.3340318920893854, "learning_rate": 9.25925925925926e-06, "loss": 0.5998, "step": 126 }, { "epoch": 0.028259902091677794, "grad_norm": 1.2580643273687153, "learning_rate": 9.333333333333334e-06, "loss": 0.5975, "step": 127 }, { "epoch": 0.02848242100578549, "grad_norm": 1.5111720373114075, "learning_rate": 9.407407407407408e-06, "loss": 0.6027, "step": 128 }, { "epoch": 0.02870493991989319, "grad_norm": 1.372953015702573, "learning_rate": 9.481481481481483e-06, "loss": 0.6013, "step": 129 }, { "epoch": 0.02892745883400089, "grad_norm": 1.4608964739796195, "learning_rate": 9.555555555555556e-06, "loss": 0.6039, "step": 130 }, { "epoch": 0.02914997774810859, "grad_norm": 1.2750592632293312, "learning_rate": 9.62962962962963e-06, "loss": 0.5936, "step": 131 }, { "epoch": 0.029372496662216287, "grad_norm": 1.265393643848603, "learning_rate": 9.703703703703703e-06, "loss": 0.5846, "step": 132 }, { "epoch": 0.029595015576323987, "grad_norm": 1.3288090930420628, "learning_rate": 9.777777777777779e-06, "loss": 0.6066, "step": 133 }, { "epoch": 0.029817534490431688, "grad_norm": 1.4081655977784415, "learning_rate": 9.851851851851852e-06, "loss": 0.616, "step": 134 }, { "epoch": 0.030040053404539385, "grad_norm": 1.3298128921086563, "learning_rate": 9.925925925925927e-06, "loss": 0.6007, "step": 135 }, { "epoch": 0.030262572318647086, "grad_norm": 1.434453287183046, "learning_rate": 1e-05, "loss": 0.5941, "step": 136 }, { "epoch": 0.030485091232754783, "grad_norm": 1.3810947083279819, "learning_rate": 9.999998701428113e-06, "loss": 0.6054, "step": 137 }, { "epoch": 0.030707610146862484, "grad_norm": 1.344828290678353, "learning_rate": 9.99999480571312e-06, "loss": 0.6043, "step": 138 }, { "epoch": 0.03093012906097018, "grad_norm": 1.4479868783858845, "learning_rate": 9.999988312857046e-06, "loss": 0.5913, "step": 139 }, { "epoch": 0.03115264797507788, "grad_norm": 1.3388856169463659, "learning_rate": 9.999979222863266e-06, "loss": 0.6027, "step": 140 }, { "epoch": 0.03137516688918558, "grad_norm": 1.3778786671925902, "learning_rate": 9.999967535736498e-06, "loss": 0.5835, "step": 141 }, { "epoch": 0.03159768580329328, "grad_norm": 1.373730977626657, "learning_rate": 9.999953251482817e-06, "loss": 0.5861, "step": 142 }, { "epoch": 0.03182020471740098, "grad_norm": 1.2806649102239134, "learning_rate": 9.99993637010964e-06, "loss": 0.5939, "step": 143 }, { "epoch": 0.03204272363150868, "grad_norm": 1.3044551814195569, "learning_rate": 9.999916891625736e-06, "loss": 0.5914, "step": 144 }, { "epoch": 0.03226524254561638, "grad_norm": 1.1965385465653362, "learning_rate": 9.999894816041222e-06, "loss": 0.5837, "step": 145 }, { "epoch": 0.032487761459724075, "grad_norm": 1.4588295321139508, "learning_rate": 9.999870143367565e-06, "loss": 0.6055, "step": 146 }, { "epoch": 0.03271028037383177, "grad_norm": 1.3633903672841696, "learning_rate": 9.999842873617583e-06, "loss": 0.5936, "step": 147 }, { "epoch": 0.032932799287939477, "grad_norm": 1.262515988627931, "learning_rate": 9.999813006805436e-06, "loss": 0.6052, "step": 148 }, { "epoch": 0.033155318202047174, "grad_norm": 1.2717545388841867, "learning_rate": 9.999780542946643e-06, "loss": 0.5964, "step": 149 }, { "epoch": 0.03337783711615487, "grad_norm": 1.305312733846561, "learning_rate": 9.999745482058063e-06, "loss": 0.5886, "step": 150 }, { "epoch": 0.033600356030262575, "grad_norm": 1.4093370995430785, "learning_rate": 9.999707824157909e-06, "loss": 0.6109, "step": 151 }, { "epoch": 0.03382287494437027, "grad_norm": 1.2671584956224664, "learning_rate": 9.999667569265741e-06, "loss": 0.5819, "step": 152 }, { "epoch": 0.03404539385847797, "grad_norm": 1.228989440996434, "learning_rate": 9.999624717402468e-06, "loss": 0.5834, "step": 153 }, { "epoch": 0.03426791277258567, "grad_norm": 1.3700315558953762, "learning_rate": 9.999579268590352e-06, "loss": 0.596, "step": 154 }, { "epoch": 0.03449043168669337, "grad_norm": 1.3628742993513012, "learning_rate": 9.999531222852996e-06, "loss": 0.5834, "step": 155 }, { "epoch": 0.03471295060080107, "grad_norm": 1.2004815057445861, "learning_rate": 9.999480580215356e-06, "loss": 0.5932, "step": 156 }, { "epoch": 0.034935469514908765, "grad_norm": 1.2685352646630292, "learning_rate": 9.999427340703743e-06, "loss": 0.6006, "step": 157 }, { "epoch": 0.03515798842901647, "grad_norm": 1.2662579872967188, "learning_rate": 9.999371504345806e-06, "loss": 0.5895, "step": 158 }, { "epoch": 0.035380507343124167, "grad_norm": 1.433173402928731, "learning_rate": 9.99931307117055e-06, "loss": 0.5874, "step": 159 }, { "epoch": 0.035603026257231864, "grad_norm": 1.3230456323557778, "learning_rate": 9.999252041208325e-06, "loss": 0.5983, "step": 160 }, { "epoch": 0.03582554517133956, "grad_norm": 1.302873321270265, "learning_rate": 9.999188414490834e-06, "loss": 0.6007, "step": 161 }, { "epoch": 0.036048064085447265, "grad_norm": 1.4613046512942496, "learning_rate": 9.999122191051126e-06, "loss": 0.5788, "step": 162 }, { "epoch": 0.03627058299955496, "grad_norm": 1.4201014287510574, "learning_rate": 9.9990533709236e-06, "loss": 0.5893, "step": 163 }, { "epoch": 0.03649310191366266, "grad_norm": 2.05907200179178, "learning_rate": 9.998981954144002e-06, "loss": 0.6105, "step": 164 }, { "epoch": 0.036715620827770364, "grad_norm": 1.1929643919820427, "learning_rate": 9.998907940749427e-06, "loss": 0.5856, "step": 165 }, { "epoch": 0.03693813974187806, "grad_norm": 1.2430406661997992, "learning_rate": 9.99883133077832e-06, "loss": 0.6161, "step": 166 }, { "epoch": 0.03716065865598576, "grad_norm": 1.3822645949215981, "learning_rate": 9.998752124270477e-06, "loss": 0.5944, "step": 167 }, { "epoch": 0.037383177570093455, "grad_norm": 1.3279670003404445, "learning_rate": 9.998670321267036e-06, "loss": 0.5778, "step": 168 }, { "epoch": 0.03760569648420116, "grad_norm": 1.2366742730321134, "learning_rate": 9.998585921810493e-06, "loss": 0.5907, "step": 169 }, { "epoch": 0.037828215398308856, "grad_norm": 1.3286280992840325, "learning_rate": 9.998498925944683e-06, "loss": 0.5977, "step": 170 }, { "epoch": 0.038050734312416554, "grad_norm": 1.2382731002470244, "learning_rate": 9.998409333714796e-06, "loss": 0.5852, "step": 171 }, { "epoch": 0.03827325322652426, "grad_norm": 1.2984434645137635, "learning_rate": 9.998317145167368e-06, "loss": 0.5921, "step": 172 }, { "epoch": 0.038495772140631955, "grad_norm": 1.294179188562429, "learning_rate": 9.998222360350286e-06, "loss": 0.5714, "step": 173 }, { "epoch": 0.03871829105473965, "grad_norm": 1.22644157935062, "learning_rate": 9.998124979312784e-06, "loss": 0.5943, "step": 174 }, { "epoch": 0.03894080996884735, "grad_norm": 1.2832489588420948, "learning_rate": 9.998025002105441e-06, "loss": 0.5934, "step": 175 }, { "epoch": 0.039163328882955054, "grad_norm": 1.277423889219014, "learning_rate": 9.997922428780192e-06, "loss": 0.6013, "step": 176 }, { "epoch": 0.03938584779706275, "grad_norm": 1.1826803472250706, "learning_rate": 9.997817259390314e-06, "loss": 0.5993, "step": 177 }, { "epoch": 0.03960836671117045, "grad_norm": 1.2214571499109659, "learning_rate": 9.997709493990437e-06, "loss": 0.5843, "step": 178 }, { "epoch": 0.03983088562527815, "grad_norm": 1.2197917520593953, "learning_rate": 9.997599132636538e-06, "loss": 0.5858, "step": 179 }, { "epoch": 0.04005340453938585, "grad_norm": 1.3664352356907754, "learning_rate": 9.997486175385938e-06, "loss": 0.5801, "step": 180 }, { "epoch": 0.040275923453493546, "grad_norm": 1.238381909373969, "learning_rate": 9.997370622297313e-06, "loss": 0.5791, "step": 181 }, { "epoch": 0.040498442367601244, "grad_norm": 1.2187506008134894, "learning_rate": 9.997252473430686e-06, "loss": 0.5779, "step": 182 }, { "epoch": 0.04072096128170895, "grad_norm": 1.2831936624878246, "learning_rate": 9.997131728847422e-06, "loss": 0.5782, "step": 183 }, { "epoch": 0.040943480195816645, "grad_norm": 1.2832787814915114, "learning_rate": 9.997008388610244e-06, "loss": 0.5829, "step": 184 }, { "epoch": 0.04116599910992434, "grad_norm": 1.3284950931620592, "learning_rate": 9.996882452783217e-06, "loss": 0.5846, "step": 185 }, { "epoch": 0.04138851802403204, "grad_norm": 1.2652998903810975, "learning_rate": 9.996753921431754e-06, "loss": 0.5863, "step": 186 }, { "epoch": 0.041611036938139744, "grad_norm": 1.3766753192715364, "learning_rate": 9.996622794622621e-06, "loss": 0.6041, "step": 187 }, { "epoch": 0.04183355585224744, "grad_norm": 1.212671765394685, "learning_rate": 9.996489072423927e-06, "loss": 0.593, "step": 188 }, { "epoch": 0.04205607476635514, "grad_norm": 1.343339041741583, "learning_rate": 9.996352754905133e-06, "loss": 0.597, "step": 189 }, { "epoch": 0.04227859368046284, "grad_norm": 1.2081729843164277, "learning_rate": 9.99621384213704e-06, "loss": 0.6025, "step": 190 }, { "epoch": 0.04250111259457054, "grad_norm": 1.232527395029766, "learning_rate": 9.996072334191814e-06, "loss": 0.5894, "step": 191 }, { "epoch": 0.042723631508678236, "grad_norm": 1.2939503582855414, "learning_rate": 9.995928231142949e-06, "loss": 0.5894, "step": 192 }, { "epoch": 0.042946150422785934, "grad_norm": 1.2008398844959165, "learning_rate": 9.9957815330653e-06, "loss": 0.5679, "step": 193 }, { "epoch": 0.04316866933689364, "grad_norm": 1.2467184949333043, "learning_rate": 9.995632240035065e-06, "loss": 0.583, "step": 194 }, { "epoch": 0.043391188251001335, "grad_norm": 1.218305434849445, "learning_rate": 9.995480352129794e-06, "loss": 0.5903, "step": 195 }, { "epoch": 0.04361370716510903, "grad_norm": 1.2314204003229252, "learning_rate": 9.995325869428379e-06, "loss": 0.5915, "step": 196 }, { "epoch": 0.043836226079216736, "grad_norm": 1.1737630494911826, "learning_rate": 9.995168792011062e-06, "loss": 0.5833, "step": 197 }, { "epoch": 0.044058744993324434, "grad_norm": 1.3429780393726054, "learning_rate": 9.995009119959438e-06, "loss": 0.5753, "step": 198 }, { "epoch": 0.04428126390743213, "grad_norm": 1.35859142108838, "learning_rate": 9.994846853356442e-06, "loss": 0.5899, "step": 199 }, { "epoch": 0.04450378282153983, "grad_norm": 1.189574740887382, "learning_rate": 9.994681992286359e-06, "loss": 0.5672, "step": 200 }, { "epoch": 0.04472630173564753, "grad_norm": 1.2297231477723591, "learning_rate": 9.994514536834824e-06, "loss": 0.5801, "step": 201 }, { "epoch": 0.04494882064975523, "grad_norm": 1.2097268611995209, "learning_rate": 9.994344487088818e-06, "loss": 0.5772, "step": 202 }, { "epoch": 0.045171339563862926, "grad_norm": 1.2465454510387826, "learning_rate": 9.994171843136671e-06, "loss": 0.5863, "step": 203 }, { "epoch": 0.04539385847797063, "grad_norm": 1.1868867568607728, "learning_rate": 9.993996605068057e-06, "loss": 0.5969, "step": 204 }, { "epoch": 0.04561637739207833, "grad_norm": 1.2733227664496662, "learning_rate": 9.993818772974002e-06, "loss": 0.5825, "step": 205 }, { "epoch": 0.045838896306186025, "grad_norm": 1.1707431156747603, "learning_rate": 9.993638346946875e-06, "loss": 0.5741, "step": 206 }, { "epoch": 0.04606141522029372, "grad_norm": 1.2039730664184545, "learning_rate": 9.993455327080394e-06, "loss": 0.5897, "step": 207 }, { "epoch": 0.046283934134401426, "grad_norm": 1.2184806522364138, "learning_rate": 9.99326971346963e-06, "loss": 0.5767, "step": 208 }, { "epoch": 0.046506453048509123, "grad_norm": 1.357503710311168, "learning_rate": 9.993081506210988e-06, "loss": 0.5796, "step": 209 }, { "epoch": 0.04672897196261682, "grad_norm": 1.2276325061999291, "learning_rate": 9.992890705402233e-06, "loss": 0.5937, "step": 210 }, { "epoch": 0.046951490876724525, "grad_norm": 1.226107032106942, "learning_rate": 9.992697311142474e-06, "loss": 0.595, "step": 211 }, { "epoch": 0.04717400979083222, "grad_norm": 1.231796326533193, "learning_rate": 9.992501323532161e-06, "loss": 0.5781, "step": 212 }, { "epoch": 0.04739652870493992, "grad_norm": 1.170343232471343, "learning_rate": 9.9923027426731e-06, "loss": 0.5999, "step": 213 }, { "epoch": 0.047619047619047616, "grad_norm": 1.183176159152835, "learning_rate": 9.992101568668437e-06, "loss": 0.586, "step": 214 }, { "epoch": 0.04784156653315532, "grad_norm": 1.2458931826309874, "learning_rate": 9.991897801622669e-06, "loss": 0.5797, "step": 215 }, { "epoch": 0.04806408544726302, "grad_norm": 1.4119511225647117, "learning_rate": 9.991691441641637e-06, "loss": 0.5687, "step": 216 }, { "epoch": 0.048286604361370715, "grad_norm": 1.224257291763077, "learning_rate": 9.991482488832531e-06, "loss": 0.5822, "step": 217 }, { "epoch": 0.04850912327547842, "grad_norm": 1.3264466949378402, "learning_rate": 9.991270943303886e-06, "loss": 0.5839, "step": 218 }, { "epoch": 0.048731642189586116, "grad_norm": 1.2707335127554251, "learning_rate": 9.991056805165587e-06, "loss": 0.6083, "step": 219 }, { "epoch": 0.04895416110369381, "grad_norm": 1.1763610829845843, "learning_rate": 9.99084007452886e-06, "loss": 0.592, "step": 220 }, { "epoch": 0.04917668001780151, "grad_norm": 1.2071821632401543, "learning_rate": 9.990620751506286e-06, "loss": 0.5711, "step": 221 }, { "epoch": 0.049399198931909215, "grad_norm": 1.4363004796354635, "learning_rate": 9.990398836211786e-06, "loss": 0.5807, "step": 222 }, { "epoch": 0.04962171784601691, "grad_norm": 1.2821458827639285, "learning_rate": 9.990174328760626e-06, "loss": 0.5899, "step": 223 }, { "epoch": 0.04984423676012461, "grad_norm": 1.2814538655863446, "learning_rate": 9.989947229269426e-06, "loss": 0.5549, "step": 224 }, { "epoch": 0.050066755674232306, "grad_norm": 1.1772573905453563, "learning_rate": 9.989717537856143e-06, "loss": 0.5585, "step": 225 }, { "epoch": 0.05028927458834001, "grad_norm": 1.2439004124713031, "learning_rate": 9.989485254640092e-06, "loss": 0.5854, "step": 226 }, { "epoch": 0.05051179350244771, "grad_norm": 1.2266604326278394, "learning_rate": 9.989250379741922e-06, "loss": 0.5811, "step": 227 }, { "epoch": 0.050734312416555405, "grad_norm": 1.20460958546394, "learning_rate": 9.989012913283636e-06, "loss": 0.5732, "step": 228 }, { "epoch": 0.05095683133066311, "grad_norm": 1.2986170732735538, "learning_rate": 9.98877285538858e-06, "loss": 0.5995, "step": 229 }, { "epoch": 0.051179350244770806, "grad_norm": 1.2996938914048315, "learning_rate": 9.988530206181448e-06, "loss": 0.5987, "step": 230 }, { "epoch": 0.0514018691588785, "grad_norm": 1.243959059133418, "learning_rate": 9.988284965788278e-06, "loss": 0.5802, "step": 231 }, { "epoch": 0.0516243880729862, "grad_norm": 1.3893018180652488, "learning_rate": 9.988037134336457e-06, "loss": 0.5964, "step": 232 }, { "epoch": 0.051846906987093905, "grad_norm": 1.22617618628679, "learning_rate": 9.987786711954712e-06, "loss": 0.5864, "step": 233 }, { "epoch": 0.0520694259012016, "grad_norm": 1.2067423493319835, "learning_rate": 9.987533698773122e-06, "loss": 0.5854, "step": 234 }, { "epoch": 0.0522919448153093, "grad_norm": 1.3443879518695743, "learning_rate": 9.987278094923111e-06, "loss": 0.5948, "step": 235 }, { "epoch": 0.052514463729417, "grad_norm": 1.3310886233450956, "learning_rate": 9.987019900537445e-06, "loss": 0.5849, "step": 236 }, { "epoch": 0.0527369826435247, "grad_norm": 1.2914501533855036, "learning_rate": 9.986759115750236e-06, "loss": 0.568, "step": 237 }, { "epoch": 0.0529595015576324, "grad_norm": 1.3416510317447925, "learning_rate": 9.986495740696946e-06, "loss": 0.5666, "step": 238 }, { "epoch": 0.053182020471740095, "grad_norm": 1.4094164339963482, "learning_rate": 9.98622977551438e-06, "loss": 0.5835, "step": 239 }, { "epoch": 0.0534045393858478, "grad_norm": 1.372690520617432, "learning_rate": 9.985961220340684e-06, "loss": 0.5688, "step": 240 }, { "epoch": 0.053627058299955496, "grad_norm": 1.27674146188315, "learning_rate": 9.985690075315355e-06, "loss": 0.5836, "step": 241 }, { "epoch": 0.05384957721406319, "grad_norm": 1.332916825756927, "learning_rate": 9.985416340579236e-06, "loss": 0.5764, "step": 242 }, { "epoch": 0.0540720961281709, "grad_norm": 1.2880103448254645, "learning_rate": 9.98514001627451e-06, "loss": 0.5906, "step": 243 }, { "epoch": 0.054294615042278595, "grad_norm": 1.2639641482786292, "learning_rate": 9.984861102544709e-06, "loss": 0.5743, "step": 244 }, { "epoch": 0.05451713395638629, "grad_norm": 1.3500744860028846, "learning_rate": 9.98457959953471e-06, "loss": 0.5649, "step": 245 }, { "epoch": 0.05473965287049399, "grad_norm": 1.276041902377311, "learning_rate": 9.984295507390728e-06, "loss": 0.5719, "step": 246 }, { "epoch": 0.05496217178460169, "grad_norm": 1.2912031533426904, "learning_rate": 9.984008826260337e-06, "loss": 0.5964, "step": 247 }, { "epoch": 0.05518469069870939, "grad_norm": 1.227928855016466, "learning_rate": 9.983719556292442e-06, "loss": 0.6014, "step": 248 }, { "epoch": 0.05540720961281709, "grad_norm": 1.242228134735168, "learning_rate": 9.983427697637298e-06, "loss": 0.5776, "step": 249 }, { "epoch": 0.05562972852692479, "grad_norm": 1.230120536574997, "learning_rate": 9.983133250446509e-06, "loss": 0.5777, "step": 250 }, { "epoch": 0.05585224744103249, "grad_norm": 1.2929482305845088, "learning_rate": 9.982836214873015e-06, "loss": 0.5752, "step": 251 }, { "epoch": 0.056074766355140186, "grad_norm": 1.1414817996402544, "learning_rate": 9.982536591071105e-06, "loss": 0.574, "step": 252 }, { "epoch": 0.05629728526924788, "grad_norm": 1.1901250406805426, "learning_rate": 9.982234379196415e-06, "loss": 0.5843, "step": 253 }, { "epoch": 0.05651980418335559, "grad_norm": 1.2333231063406331, "learning_rate": 9.981929579405921e-06, "loss": 0.5749, "step": 254 }, { "epoch": 0.056742323097463285, "grad_norm": 1.4618147545436566, "learning_rate": 9.981622191857944e-06, "loss": 0.5809, "step": 255 }, { "epoch": 0.05696484201157098, "grad_norm": 1.2087250078643372, "learning_rate": 9.981312216712153e-06, "loss": 0.5775, "step": 256 }, { "epoch": 0.057187360925678686, "grad_norm": 1.1426210215527852, "learning_rate": 9.980999654129556e-06, "loss": 0.5832, "step": 257 }, { "epoch": 0.05740987983978638, "grad_norm": 1.195175237195085, "learning_rate": 9.980684504272504e-06, "loss": 0.5929, "step": 258 }, { "epoch": 0.05763239875389408, "grad_norm": 1.1823787816981082, "learning_rate": 9.9803667673047e-06, "loss": 0.5713, "step": 259 }, { "epoch": 0.05785491766800178, "grad_norm": 1.2766687134375134, "learning_rate": 9.980046443391182e-06, "loss": 0.5917, "step": 260 }, { "epoch": 0.05807743658210948, "grad_norm": 2.1803710415327973, "learning_rate": 9.979723532698338e-06, "loss": 0.5789, "step": 261 }, { "epoch": 0.05829995549621718, "grad_norm": 1.164286726256095, "learning_rate": 9.979398035393894e-06, "loss": 0.5748, "step": 262 }, { "epoch": 0.058522474410324876, "grad_norm": 1.2348685407256252, "learning_rate": 9.979069951646926e-06, "loss": 0.5592, "step": 263 }, { "epoch": 0.05874499332443257, "grad_norm": 1.2153319123781787, "learning_rate": 9.97873928162785e-06, "loss": 0.5906, "step": 264 }, { "epoch": 0.05896751223854028, "grad_norm": 1.2780540406486223, "learning_rate": 9.978406025508423e-06, "loss": 0.5717, "step": 265 }, { "epoch": 0.059190031152647975, "grad_norm": 1.17383155835602, "learning_rate": 9.978070183461747e-06, "loss": 0.5624, "step": 266 }, { "epoch": 0.05941255006675567, "grad_norm": 1.1961002380524552, "learning_rate": 9.977731755662274e-06, "loss": 0.5826, "step": 267 }, { "epoch": 0.059635068980863376, "grad_norm": 1.2244854419829392, "learning_rate": 9.977390742285788e-06, "loss": 0.5624, "step": 268 }, { "epoch": 0.05985758789497107, "grad_norm": 1.231286087811102, "learning_rate": 9.977047143509423e-06, "loss": 0.5864, "step": 269 }, { "epoch": 0.06008010680907877, "grad_norm": 1.2136464133407847, "learning_rate": 9.97670095951165e-06, "loss": 0.5797, "step": 270 }, { "epoch": 0.06030262572318647, "grad_norm": 1.1603993774877008, "learning_rate": 9.976352190472294e-06, "loss": 0.5825, "step": 271 }, { "epoch": 0.06052514463729417, "grad_norm": 1.3136901795423037, "learning_rate": 9.97600083657251e-06, "loss": 0.5876, "step": 272 }, { "epoch": 0.06074766355140187, "grad_norm": 1.2564408573039128, "learning_rate": 9.975646897994804e-06, "loss": 0.5784, "step": 273 }, { "epoch": 0.060970182465509566, "grad_norm": 1.1848389369165375, "learning_rate": 9.975290374923022e-06, "loss": 0.5781, "step": 274 }, { "epoch": 0.06119270137961727, "grad_norm": 1.1804405445377886, "learning_rate": 9.974931267542351e-06, "loss": 0.5708, "step": 275 }, { "epoch": 0.06141522029372497, "grad_norm": 1.3290302348327767, "learning_rate": 9.974569576039324e-06, "loss": 0.5786, "step": 276 }, { "epoch": 0.061637739207832665, "grad_norm": 1.2457890359206538, "learning_rate": 9.974205300601809e-06, "loss": 0.5762, "step": 277 }, { "epoch": 0.06186025812194036, "grad_norm": 1.2356969513490472, "learning_rate": 9.973838441419026e-06, "loss": 0.5806, "step": 278 }, { "epoch": 0.062082777036048066, "grad_norm": 1.2199607388692053, "learning_rate": 9.973468998681533e-06, "loss": 0.5821, "step": 279 }, { "epoch": 0.06230529595015576, "grad_norm": 1.2733390062835142, "learning_rate": 9.973096972581225e-06, "loss": 0.5793, "step": 280 }, { "epoch": 0.06252781486426347, "grad_norm": 1.3314463297642543, "learning_rate": 9.972722363311342e-06, "loss": 0.5947, "step": 281 }, { "epoch": 0.06275033377837116, "grad_norm": 1.3453097690684896, "learning_rate": 9.972345171066473e-06, "loss": 0.5726, "step": 282 }, { "epoch": 0.06297285269247886, "grad_norm": 1.2073980715764006, "learning_rate": 9.97196539604254e-06, "loss": 0.5802, "step": 283 }, { "epoch": 0.06319537160658656, "grad_norm": 1.1722063524910298, "learning_rate": 9.971583038436805e-06, "loss": 0.5767, "step": 284 }, { "epoch": 0.06341789052069426, "grad_norm": 1.2619493778282025, "learning_rate": 9.971198098447881e-06, "loss": 0.5801, "step": 285 }, { "epoch": 0.06364040943480195, "grad_norm": 1.2407885992041334, "learning_rate": 9.970810576275713e-06, "loss": 0.5873, "step": 286 }, { "epoch": 0.06386292834890965, "grad_norm": 1.1359172710033665, "learning_rate": 9.970420472121594e-06, "loss": 0.5861, "step": 287 }, { "epoch": 0.06408544726301736, "grad_norm": 1.2049688345646288, "learning_rate": 9.970027786188155e-06, "loss": 0.5814, "step": 288 }, { "epoch": 0.06430796617712506, "grad_norm": 1.3237611863479715, "learning_rate": 9.969632518679366e-06, "loss": 0.5929, "step": 289 }, { "epoch": 0.06453048509123276, "grad_norm": 1.1768165669326227, "learning_rate": 9.969234669800543e-06, "loss": 0.5876, "step": 290 }, { "epoch": 0.06475300400534045, "grad_norm": 1.20258850601296, "learning_rate": 9.968834239758339e-06, "loss": 0.5849, "step": 291 }, { "epoch": 0.06497552291944815, "grad_norm": 1.1488339384482418, "learning_rate": 9.968431228760749e-06, "loss": 0.5746, "step": 292 }, { "epoch": 0.06519804183355585, "grad_norm": 1.1833603963836488, "learning_rate": 9.968025637017107e-06, "loss": 0.5787, "step": 293 }, { "epoch": 0.06542056074766354, "grad_norm": 1.1300343282277203, "learning_rate": 9.96761746473809e-06, "loss": 0.5563, "step": 294 }, { "epoch": 0.06564307966177126, "grad_norm": 1.152658892625531, "learning_rate": 9.967206712135718e-06, "loss": 0.5663, "step": 295 }, { "epoch": 0.06586559857587895, "grad_norm": 1.1352488452045768, "learning_rate": 9.96679337942334e-06, "loss": 0.5776, "step": 296 }, { "epoch": 0.06608811748998665, "grad_norm": 1.242754765586984, "learning_rate": 9.966377466815662e-06, "loss": 0.5752, "step": 297 }, { "epoch": 0.06631063640409435, "grad_norm": 1.1839328970521976, "learning_rate": 9.965958974528713e-06, "loss": 0.5932, "step": 298 }, { "epoch": 0.06653315531820204, "grad_norm": 1.2496290849735185, "learning_rate": 9.965537902779874e-06, "loss": 0.5868, "step": 299 }, { "epoch": 0.06675567423230974, "grad_norm": 1.2174483368269562, "learning_rate": 9.965114251787862e-06, "loss": 0.5813, "step": 300 }, { "epoch": 0.06697819314641744, "grad_norm": 1.1598062830935971, "learning_rate": 9.964688021772733e-06, "loss": 0.5677, "step": 301 }, { "epoch": 0.06720071206052515, "grad_norm": 1.161158555716341, "learning_rate": 9.964259212955882e-06, "loss": 0.5816, "step": 302 }, { "epoch": 0.06742323097463285, "grad_norm": 1.1822081515559015, "learning_rate": 9.963827825560044e-06, "loss": 0.5886, "step": 303 }, { "epoch": 0.06764574988874054, "grad_norm": 1.1895828602758636, "learning_rate": 9.963393859809297e-06, "loss": 0.5614, "step": 304 }, { "epoch": 0.06786826880284824, "grad_norm": 1.199217483108182, "learning_rate": 9.962957315929054e-06, "loss": 0.5731, "step": 305 }, { "epoch": 0.06809078771695594, "grad_norm": 1.1560090383246386, "learning_rate": 9.962518194146066e-06, "loss": 0.5868, "step": 306 }, { "epoch": 0.06831330663106364, "grad_norm": 1.2749077299672287, "learning_rate": 9.962076494688429e-06, "loss": 0.5745, "step": 307 }, { "epoch": 0.06853582554517133, "grad_norm": 1.1857925828433464, "learning_rate": 9.961632217785573e-06, "loss": 0.5701, "step": 308 }, { "epoch": 0.06875834445927904, "grad_norm": 1.1743713242549303, "learning_rate": 9.96118536366827e-06, "loss": 0.5782, "step": 309 }, { "epoch": 0.06898086337338674, "grad_norm": 1.2096082621540412, "learning_rate": 9.960735932568623e-06, "loss": 0.5688, "step": 310 }, { "epoch": 0.06920338228749444, "grad_norm": 1.2680079015335497, "learning_rate": 9.960283924720087e-06, "loss": 0.5719, "step": 311 }, { "epoch": 0.06942590120160214, "grad_norm": 1.3483954696881968, "learning_rate": 9.959829340357444e-06, "loss": 0.5841, "step": 312 }, { "epoch": 0.06964842011570983, "grad_norm": 1.2433940921415998, "learning_rate": 9.959372179716815e-06, "loss": 0.5652, "step": 313 }, { "epoch": 0.06987093902981753, "grad_norm": 1.1592503843964872, "learning_rate": 9.958912443035669e-06, "loss": 0.5743, "step": 314 }, { "epoch": 0.07009345794392523, "grad_norm": 1.1858672233084495, "learning_rate": 9.958450130552803e-06, "loss": 0.5683, "step": 315 }, { "epoch": 0.07031597685803294, "grad_norm": 1.1381291054697134, "learning_rate": 9.957985242508356e-06, "loss": 0.582, "step": 316 }, { "epoch": 0.07053849577214064, "grad_norm": 1.1848208896615595, "learning_rate": 9.957517779143804e-06, "loss": 0.5512, "step": 317 }, { "epoch": 0.07076101468624833, "grad_norm": 1.3996736913858467, "learning_rate": 9.957047740701959e-06, "loss": 0.5823, "step": 318 }, { "epoch": 0.07098353360035603, "grad_norm": 1.4210895871314237, "learning_rate": 9.956575127426978e-06, "loss": 0.5762, "step": 319 }, { "epoch": 0.07120605251446373, "grad_norm": 1.1991944071123857, "learning_rate": 9.956099939564343e-06, "loss": 0.5885, "step": 320 }, { "epoch": 0.07142857142857142, "grad_norm": 1.2164213799607881, "learning_rate": 9.955622177360885e-06, "loss": 0.5781, "step": 321 }, { "epoch": 0.07165109034267912, "grad_norm": 1.2723058162986411, "learning_rate": 9.955141841064766e-06, "loss": 0.5737, "step": 322 }, { "epoch": 0.07187360925678683, "grad_norm": 1.2749206366825776, "learning_rate": 9.954658930925487e-06, "loss": 0.5705, "step": 323 }, { "epoch": 0.07209612817089453, "grad_norm": 1.2030479730003805, "learning_rate": 9.954173447193885e-06, "loss": 0.5879, "step": 324 }, { "epoch": 0.07231864708500223, "grad_norm": 1.2893866503734817, "learning_rate": 9.953685390122132e-06, "loss": 0.5784, "step": 325 }, { "epoch": 0.07254116599910992, "grad_norm": 1.1361838852720354, "learning_rate": 9.953194759963742e-06, "loss": 0.5764, "step": 326 }, { "epoch": 0.07276368491321762, "grad_norm": 1.1802766296204543, "learning_rate": 9.952701556973563e-06, "loss": 0.5812, "step": 327 }, { "epoch": 0.07298620382732532, "grad_norm": 1.1664682943199938, "learning_rate": 9.952205781407775e-06, "loss": 0.5606, "step": 328 }, { "epoch": 0.07320872274143302, "grad_norm": 1.1803504294734017, "learning_rate": 9.951707433523901e-06, "loss": 0.5739, "step": 329 }, { "epoch": 0.07343124165554073, "grad_norm": 1.39838813629775, "learning_rate": 9.951206513580798e-06, "loss": 0.5685, "step": 330 }, { "epoch": 0.07365376056964842, "grad_norm": 1.2053313219349562, "learning_rate": 9.950703021838655e-06, "loss": 0.5651, "step": 331 }, { "epoch": 0.07387627948375612, "grad_norm": 1.1646097343316204, "learning_rate": 9.950196958559001e-06, "loss": 0.5641, "step": 332 }, { "epoch": 0.07409879839786382, "grad_norm": 1.2883297393279884, "learning_rate": 9.949688324004703e-06, "loss": 0.597, "step": 333 }, { "epoch": 0.07432131731197152, "grad_norm": 1.2500099247115846, "learning_rate": 9.949177118439956e-06, "loss": 0.5797, "step": 334 }, { "epoch": 0.07454383622607921, "grad_norm": 1.2633607685482895, "learning_rate": 9.9486633421303e-06, "loss": 0.5768, "step": 335 }, { "epoch": 0.07476635514018691, "grad_norm": 1.1762346559169246, "learning_rate": 9.948146995342599e-06, "loss": 0.5701, "step": 336 }, { "epoch": 0.07498887405429462, "grad_norm": 1.2130049835382035, "learning_rate": 9.947628078345063e-06, "loss": 0.5768, "step": 337 }, { "epoch": 0.07521139296840232, "grad_norm": 1.3774198802462996, "learning_rate": 9.94710659140723e-06, "loss": 0.5796, "step": 338 }, { "epoch": 0.07543391188251002, "grad_norm": 1.18206488997832, "learning_rate": 9.946582534799977e-06, "loss": 0.5636, "step": 339 }, { "epoch": 0.07565643079661771, "grad_norm": 1.2263628821663013, "learning_rate": 9.946055908795513e-06, "loss": 0.5879, "step": 340 }, { "epoch": 0.07587894971072541, "grad_norm": 1.160933989654302, "learning_rate": 9.945526713667382e-06, "loss": 0.5776, "step": 341 }, { "epoch": 0.07610146862483311, "grad_norm": 1.2745592958127698, "learning_rate": 9.944994949690466e-06, "loss": 0.5667, "step": 342 }, { "epoch": 0.0763239875389408, "grad_norm": 1.268835857230579, "learning_rate": 9.944460617140977e-06, "loss": 0.5896, "step": 343 }, { "epoch": 0.07654650645304852, "grad_norm": 1.1553863293219977, "learning_rate": 9.94392371629646e-06, "loss": 0.5624, "step": 344 }, { "epoch": 0.07676902536715621, "grad_norm": 1.1468071629040537, "learning_rate": 9.9433842474358e-06, "loss": 0.5692, "step": 345 }, { "epoch": 0.07699154428126391, "grad_norm": 1.2955186432471608, "learning_rate": 9.942842210839212e-06, "loss": 0.5926, "step": 346 }, { "epoch": 0.07721406319537161, "grad_norm": 1.1419558682733146, "learning_rate": 9.942297606788245e-06, "loss": 0.5641, "step": 347 }, { "epoch": 0.0774365821094793, "grad_norm": 1.157597441378026, "learning_rate": 9.941750435565782e-06, "loss": 0.5795, "step": 348 }, { "epoch": 0.077659101023587, "grad_norm": 1.15056040240774, "learning_rate": 9.94120069745604e-06, "loss": 0.5809, "step": 349 }, { "epoch": 0.0778816199376947, "grad_norm": 1.277028877836427, "learning_rate": 9.940648392744567e-06, "loss": 0.5681, "step": 350 }, { "epoch": 0.07810413885180241, "grad_norm": 1.1991486156785895, "learning_rate": 9.940093521718249e-06, "loss": 0.5827, "step": 351 }, { "epoch": 0.07832665776591011, "grad_norm": 1.132307367087446, "learning_rate": 9.9395360846653e-06, "loss": 0.5521, "step": 352 }, { "epoch": 0.0785491766800178, "grad_norm": 1.2409190408005806, "learning_rate": 9.938976081875267e-06, "loss": 0.5851, "step": 353 }, { "epoch": 0.0787716955941255, "grad_norm": 1.655099725484598, "learning_rate": 9.938413513639036e-06, "loss": 0.5805, "step": 354 }, { "epoch": 0.0789942145082332, "grad_norm": 1.4179630006524404, "learning_rate": 9.937848380248817e-06, "loss": 0.5731, "step": 355 }, { "epoch": 0.0792167334223409, "grad_norm": 1.1876560964380096, "learning_rate": 9.93728068199816e-06, "loss": 0.5733, "step": 356 }, { "epoch": 0.0794392523364486, "grad_norm": 1.135800445271976, "learning_rate": 9.936710419181943e-06, "loss": 0.583, "step": 357 }, { "epoch": 0.0796617712505563, "grad_norm": 1.2443677243352302, "learning_rate": 9.936137592096373e-06, "loss": 0.5821, "step": 358 }, { "epoch": 0.079884290164664, "grad_norm": 1.1775456369767079, "learning_rate": 9.935562201038999e-06, "loss": 0.5604, "step": 359 }, { "epoch": 0.0801068090787717, "grad_norm": 1.2088063776961315, "learning_rate": 9.93498424630869e-06, "loss": 0.5816, "step": 360 }, { "epoch": 0.0803293279928794, "grad_norm": 1.1398307669741774, "learning_rate": 9.934403728205655e-06, "loss": 0.5685, "step": 361 }, { "epoch": 0.08055184690698709, "grad_norm": 1.1829149349513217, "learning_rate": 9.933820647031434e-06, "loss": 0.5964, "step": 362 }, { "epoch": 0.08077436582109479, "grad_norm": 1.1120622083720373, "learning_rate": 9.933235003088893e-06, "loss": 0.5732, "step": 363 }, { "epoch": 0.08099688473520249, "grad_norm": 1.185995355685294, "learning_rate": 9.93264679668223e-06, "loss": 0.5655, "step": 364 }, { "epoch": 0.08121940364931018, "grad_norm": 1.2342278283440857, "learning_rate": 9.932056028116983e-06, "loss": 0.5698, "step": 365 }, { "epoch": 0.0814419225634179, "grad_norm": 1.2342125653521676, "learning_rate": 9.93146269770001e-06, "loss": 0.5785, "step": 366 }, { "epoch": 0.08166444147752559, "grad_norm": 1.2445218594822018, "learning_rate": 9.930866805739504e-06, "loss": 0.5604, "step": 367 }, { "epoch": 0.08188696039163329, "grad_norm": 1.2074014175583674, "learning_rate": 9.930268352544987e-06, "loss": 0.5642, "step": 368 }, { "epoch": 0.08210947930574099, "grad_norm": 1.1678188432123382, "learning_rate": 9.929667338427315e-06, "loss": 0.5479, "step": 369 }, { "epoch": 0.08233199821984868, "grad_norm": 1.2348461727715632, "learning_rate": 9.929063763698675e-06, "loss": 0.5681, "step": 370 }, { "epoch": 0.08255451713395638, "grad_norm": 1.2181192236477076, "learning_rate": 9.928457628672574e-06, "loss": 0.551, "step": 371 }, { "epoch": 0.08277703604806408, "grad_norm": 1.1600720763906516, "learning_rate": 9.927848933663862e-06, "loss": 0.5726, "step": 372 }, { "epoch": 0.08299955496217179, "grad_norm": 1.2453241960397654, "learning_rate": 9.92723767898871e-06, "loss": 0.5681, "step": 373 }, { "epoch": 0.08322207387627949, "grad_norm": 1.2151418089577355, "learning_rate": 9.926623864964622e-06, "loss": 0.5652, "step": 374 }, { "epoch": 0.08344459279038718, "grad_norm": 1.3049970548976595, "learning_rate": 9.92600749191043e-06, "loss": 0.5494, "step": 375 }, { "epoch": 0.08366711170449488, "grad_norm": 1.1253042396557704, "learning_rate": 9.925388560146295e-06, "loss": 0.5675, "step": 376 }, { "epoch": 0.08388963061860258, "grad_norm": 1.1762771449476546, "learning_rate": 9.92476706999371e-06, "loss": 0.5702, "step": 377 }, { "epoch": 0.08411214953271028, "grad_norm": 1.262408299230549, "learning_rate": 9.924143021775494e-06, "loss": 0.5776, "step": 378 }, { "epoch": 0.08433466844681797, "grad_norm": 1.2632577843187032, "learning_rate": 9.923516415815797e-06, "loss": 0.5833, "step": 379 }, { "epoch": 0.08455718736092568, "grad_norm": 1.2182640372399465, "learning_rate": 9.922887252440093e-06, "loss": 0.5983, "step": 380 }, { "epoch": 0.08477970627503338, "grad_norm": 1.2057233637776361, "learning_rate": 9.92225553197519e-06, "loss": 0.581, "step": 381 }, { "epoch": 0.08500222518914108, "grad_norm": 1.1776077452469966, "learning_rate": 9.92162125474922e-06, "loss": 0.5657, "step": 382 }, { "epoch": 0.08522474410324878, "grad_norm": 1.1888888100244468, "learning_rate": 9.920984421091649e-06, "loss": 0.5555, "step": 383 }, { "epoch": 0.08544726301735647, "grad_norm": 1.1246453682876556, "learning_rate": 9.92034503133326e-06, "loss": 0.5696, "step": 384 }, { "epoch": 0.08566978193146417, "grad_norm": 1.3187781979497002, "learning_rate": 9.919703085806176e-06, "loss": 0.5846, "step": 385 }, { "epoch": 0.08589230084557187, "grad_norm": 1.252861908372417, "learning_rate": 9.919058584843839e-06, "loss": 0.5581, "step": 386 }, { "epoch": 0.08611481975967958, "grad_norm": 1.1813141163437186, "learning_rate": 9.918411528781024e-06, "loss": 0.5698, "step": 387 }, { "epoch": 0.08633733867378728, "grad_norm": 1.2215256051594952, "learning_rate": 9.917761917953827e-06, "loss": 0.5577, "step": 388 }, { "epoch": 0.08655985758789497, "grad_norm": 1.2352265760612773, "learning_rate": 9.917109752699677e-06, "loss": 0.5956, "step": 389 }, { "epoch": 0.08678237650200267, "grad_norm": 1.1903972651196917, "learning_rate": 9.916455033357327e-06, "loss": 0.5804, "step": 390 }, { "epoch": 0.08700489541611037, "grad_norm": 1.2343322152354808, "learning_rate": 9.915797760266857e-06, "loss": 0.5652, "step": 391 }, { "epoch": 0.08722741433021806, "grad_norm": 1.167978577160961, "learning_rate": 9.915137933769674e-06, "loss": 0.5755, "step": 392 }, { "epoch": 0.08744993324432576, "grad_norm": 1.2489754662685442, "learning_rate": 9.914475554208509e-06, "loss": 0.5707, "step": 393 }, { "epoch": 0.08767245215843347, "grad_norm": 1.1616083865356748, "learning_rate": 9.913810621927423e-06, "loss": 0.5599, "step": 394 }, { "epoch": 0.08789497107254117, "grad_norm": 1.2693600559285871, "learning_rate": 9.9131431372718e-06, "loss": 0.5731, "step": 395 }, { "epoch": 0.08811748998664887, "grad_norm": 1.1361035731509999, "learning_rate": 9.91247310058835e-06, "loss": 0.5807, "step": 396 }, { "epoch": 0.08834000890075656, "grad_norm": 1.556752942587536, "learning_rate": 9.911800512225113e-06, "loss": 0.561, "step": 397 }, { "epoch": 0.08856252781486426, "grad_norm": 1.2350982900667944, "learning_rate": 9.911125372531445e-06, "loss": 0.5822, "step": 398 }, { "epoch": 0.08878504672897196, "grad_norm": 1.1284877197489847, "learning_rate": 9.910447681858037e-06, "loss": 0.576, "step": 399 }, { "epoch": 0.08900756564307966, "grad_norm": 1.1977526831357006, "learning_rate": 9.9097674405569e-06, "loss": 0.579, "step": 400 }, { "epoch": 0.08923008455718737, "grad_norm": 1.174163808487166, "learning_rate": 9.909084648981373e-06, "loss": 0.5706, "step": 401 }, { "epoch": 0.08945260347129506, "grad_norm": 1.1957035534517824, "learning_rate": 9.908399307486113e-06, "loss": 0.5625, "step": 402 }, { "epoch": 0.08967512238540276, "grad_norm": 1.158936865779494, "learning_rate": 9.907711416427108e-06, "loss": 0.5679, "step": 403 }, { "epoch": 0.08989764129951046, "grad_norm": 1.2348645495349837, "learning_rate": 9.907020976161672e-06, "loss": 0.5656, "step": 404 }, { "epoch": 0.09012016021361816, "grad_norm": 1.17795863189698, "learning_rate": 9.906327987048435e-06, "loss": 0.5731, "step": 405 }, { "epoch": 0.09034267912772585, "grad_norm": 1.1674205875281765, "learning_rate": 9.905632449447355e-06, "loss": 0.5596, "step": 406 }, { "epoch": 0.09056519804183355, "grad_norm": 1.126911084492299, "learning_rate": 9.904934363719719e-06, "loss": 0.5403, "step": 407 }, { "epoch": 0.09078771695594126, "grad_norm": 1.2197463213347093, "learning_rate": 9.904233730228126e-06, "loss": 0.5652, "step": 408 }, { "epoch": 0.09101023587004896, "grad_norm": 1.2581030581413524, "learning_rate": 9.903530549336513e-06, "loss": 0.5743, "step": 409 }, { "epoch": 0.09123275478415666, "grad_norm": 1.2038768086899765, "learning_rate": 9.902824821410126e-06, "loss": 0.574, "step": 410 }, { "epoch": 0.09145527369826435, "grad_norm": 1.2313198011344086, "learning_rate": 9.902116546815545e-06, "loss": 0.5497, "step": 411 }, { "epoch": 0.09167779261237205, "grad_norm": 1.2139588508872137, "learning_rate": 9.901405725920665e-06, "loss": 0.5506, "step": 412 }, { "epoch": 0.09190031152647975, "grad_norm": 1.2538496854344154, "learning_rate": 9.900692359094708e-06, "loss": 0.57, "step": 413 }, { "epoch": 0.09212283044058744, "grad_norm": 1.2199201808313793, "learning_rate": 9.899976446708217e-06, "loss": 0.5892, "step": 414 }, { "epoch": 0.09234534935469516, "grad_norm": 1.1778179894211929, "learning_rate": 9.899257989133057e-06, "loss": 0.5886, "step": 415 }, { "epoch": 0.09256786826880285, "grad_norm": 1.1872678725246961, "learning_rate": 9.898536986742418e-06, "loss": 0.5757, "step": 416 }, { "epoch": 0.09279038718291055, "grad_norm": 1.2123518272069156, "learning_rate": 9.897813439910806e-06, "loss": 0.552, "step": 417 }, { "epoch": 0.09301290609701825, "grad_norm": 1.205014833597777, "learning_rate": 9.897087349014054e-06, "loss": 0.5823, "step": 418 }, { "epoch": 0.09323542501112594, "grad_norm": 1.2204640063241174, "learning_rate": 9.896358714429315e-06, "loss": 0.605, "step": 419 }, { "epoch": 0.09345794392523364, "grad_norm": 1.146674500504516, "learning_rate": 9.89562753653506e-06, "loss": 0.562, "step": 420 }, { "epoch": 0.09368046283934134, "grad_norm": 1.1860827488038819, "learning_rate": 9.894893815711087e-06, "loss": 0.5694, "step": 421 }, { "epoch": 0.09390298175344905, "grad_norm": 1.26789441411751, "learning_rate": 9.894157552338511e-06, "loss": 0.5894, "step": 422 }, { "epoch": 0.09412550066755675, "grad_norm": 1.1574211683434188, "learning_rate": 9.893418746799766e-06, "loss": 0.5542, "step": 423 }, { "epoch": 0.09434801958166444, "grad_norm": 1.3432891981428983, "learning_rate": 9.89267739947861e-06, "loss": 0.5812, "step": 424 }, { "epoch": 0.09457053849577214, "grad_norm": 1.2608717041938917, "learning_rate": 9.891933510760123e-06, "loss": 0.5849, "step": 425 }, { "epoch": 0.09479305740987984, "grad_norm": 1.2355253977583236, "learning_rate": 9.891187081030698e-06, "loss": 0.5898, "step": 426 }, { "epoch": 0.09501557632398754, "grad_norm": 1.1984789609869713, "learning_rate": 9.890438110678053e-06, "loss": 0.5447, "step": 427 }, { "epoch": 0.09523809523809523, "grad_norm": 1.2332317963794688, "learning_rate": 9.889686600091228e-06, "loss": 0.5503, "step": 428 }, { "epoch": 0.09546061415220294, "grad_norm": 1.1977886794282715, "learning_rate": 9.888932549660576e-06, "loss": 0.5472, "step": 429 }, { "epoch": 0.09568313306631064, "grad_norm": 1.2470905304718005, "learning_rate": 9.888175959777772e-06, "loss": 0.5527, "step": 430 }, { "epoch": 0.09590565198041834, "grad_norm": 1.2811344653470542, "learning_rate": 9.887416830835814e-06, "loss": 0.5576, "step": 431 }, { "epoch": 0.09612817089452604, "grad_norm": 1.2219587946024653, "learning_rate": 9.886655163229014e-06, "loss": 0.5626, "step": 432 }, { "epoch": 0.09635068980863373, "grad_norm": 1.2688322572135997, "learning_rate": 9.885890957353e-06, "loss": 0.5841, "step": 433 }, { "epoch": 0.09657320872274143, "grad_norm": 1.2378364874711245, "learning_rate": 9.885124213604728e-06, "loss": 0.5679, "step": 434 }, { "epoch": 0.09679572763684913, "grad_norm": 1.1944901107143704, "learning_rate": 9.884354932382464e-06, "loss": 0.5505, "step": 435 }, { "epoch": 0.09701824655095684, "grad_norm": 1.296537863234703, "learning_rate": 9.883583114085795e-06, "loss": 0.5797, "step": 436 }, { "epoch": 0.09724076546506454, "grad_norm": 1.2674805343603728, "learning_rate": 9.882808759115628e-06, "loss": 0.5772, "step": 437 }, { "epoch": 0.09746328437917223, "grad_norm": 1.2546247557690884, "learning_rate": 9.88203186787418e-06, "loss": 0.5766, "step": 438 }, { "epoch": 0.09768580329327993, "grad_norm": 1.1681993173653615, "learning_rate": 9.881252440764997e-06, "loss": 0.5873, "step": 439 }, { "epoch": 0.09790832220738763, "grad_norm": 1.2088756211469174, "learning_rate": 9.880470478192932e-06, "loss": 0.5716, "step": 440 }, { "epoch": 0.09813084112149532, "grad_norm": 1.1716454565767909, "learning_rate": 9.879685980564158e-06, "loss": 0.5666, "step": 441 }, { "epoch": 0.09835336003560302, "grad_norm": 1.2228512537465606, "learning_rate": 9.878898948286169e-06, "loss": 0.5886, "step": 442 }, { "epoch": 0.09857587894971072, "grad_norm": 1.2086712811141211, "learning_rate": 9.878109381767769e-06, "loss": 0.5552, "step": 443 }, { "epoch": 0.09879839786381843, "grad_norm": 1.070033926041266, "learning_rate": 9.877317281419083e-06, "loss": 0.5565, "step": 444 }, { "epoch": 0.09902091677792613, "grad_norm": 1.19483530657273, "learning_rate": 9.876522647651552e-06, "loss": 0.5668, "step": 445 }, { "epoch": 0.09924343569203382, "grad_norm": 1.2077239066724406, "learning_rate": 9.875725480877929e-06, "loss": 0.57, "step": 446 }, { "epoch": 0.09946595460614152, "grad_norm": 1.1599588089616186, "learning_rate": 9.874925781512287e-06, "loss": 0.5562, "step": 447 }, { "epoch": 0.09968847352024922, "grad_norm": 1.1590193315830444, "learning_rate": 9.874123549970011e-06, "loss": 0.5612, "step": 448 }, { "epoch": 0.09991099243435692, "grad_norm": 1.2615852985645684, "learning_rate": 9.873318786667808e-06, "loss": 0.5717, "step": 449 }, { "epoch": 0.10013351134846461, "grad_norm": 1.2743901541701341, "learning_rate": 9.872511492023688e-06, "loss": 0.5684, "step": 450 }, { "epoch": 0.10035603026257232, "grad_norm": 1.2167054929195051, "learning_rate": 9.87170166645699e-06, "loss": 0.5859, "step": 451 }, { "epoch": 0.10057854917668002, "grad_norm": 1.1449881632004497, "learning_rate": 9.870889310388356e-06, "loss": 0.5637, "step": 452 }, { "epoch": 0.10080106809078772, "grad_norm": 1.087555032375584, "learning_rate": 9.87007442423975e-06, "loss": 0.5669, "step": 453 }, { "epoch": 0.10102358700489542, "grad_norm": 1.115361479333903, "learning_rate": 9.869257008434445e-06, "loss": 0.5602, "step": 454 }, { "epoch": 0.10124610591900311, "grad_norm": 1.210239945669705, "learning_rate": 9.868437063397031e-06, "loss": 0.5799, "step": 455 }, { "epoch": 0.10146862483311081, "grad_norm": 1.1208479954814439, "learning_rate": 9.867614589553412e-06, "loss": 0.5691, "step": 456 }, { "epoch": 0.10169114374721851, "grad_norm": 1.2189506883818013, "learning_rate": 9.866789587330803e-06, "loss": 0.5613, "step": 457 }, { "epoch": 0.10191366266132622, "grad_norm": 1.0936015467153724, "learning_rate": 9.865962057157734e-06, "loss": 0.5733, "step": 458 }, { "epoch": 0.10213618157543392, "grad_norm": 1.1325594894167987, "learning_rate": 9.86513199946405e-06, "loss": 0.5615, "step": 459 }, { "epoch": 0.10235870048954161, "grad_norm": 1.1692785710027258, "learning_rate": 9.864299414680904e-06, "loss": 0.5446, "step": 460 }, { "epoch": 0.10258121940364931, "grad_norm": 1.3006787710206809, "learning_rate": 9.863464303240768e-06, "loss": 0.5533, "step": 461 }, { "epoch": 0.102803738317757, "grad_norm": 1.17861778906393, "learning_rate": 9.86262666557742e-06, "loss": 0.5571, "step": 462 }, { "epoch": 0.1030262572318647, "grad_norm": 1.2037608200839771, "learning_rate": 9.861786502125954e-06, "loss": 0.5684, "step": 463 }, { "epoch": 0.1032487761459724, "grad_norm": 1.1304747237149473, "learning_rate": 9.860943813322776e-06, "loss": 0.546, "step": 464 }, { "epoch": 0.10347129506008011, "grad_norm": 1.1906389185583686, "learning_rate": 9.8600985996056e-06, "loss": 0.5632, "step": 465 }, { "epoch": 0.10369381397418781, "grad_norm": 1.129475188814438, "learning_rate": 9.859250861413456e-06, "loss": 0.5584, "step": 466 }, { "epoch": 0.1039163328882955, "grad_norm": 1.1826545277909968, "learning_rate": 9.858400599186686e-06, "loss": 0.551, "step": 467 }, { "epoch": 0.1041388518024032, "grad_norm": 1.1621282205651482, "learning_rate": 9.857547813366937e-06, "loss": 0.5563, "step": 468 }, { "epoch": 0.1043613707165109, "grad_norm": 1.175252430692712, "learning_rate": 9.856692504397171e-06, "loss": 0.5652, "step": 469 }, { "epoch": 0.1045838896306186, "grad_norm": 1.2406629908179927, "learning_rate": 9.855834672721662e-06, "loss": 0.5536, "step": 470 }, { "epoch": 0.1048064085447263, "grad_norm": 1.1984550640619185, "learning_rate": 9.85497431878599e-06, "loss": 0.5785, "step": 471 }, { "epoch": 0.105028927458834, "grad_norm": 1.1443750807172108, "learning_rate": 9.85411144303705e-06, "loss": 0.5772, "step": 472 }, { "epoch": 0.1052514463729417, "grad_norm": 1.2336740139383362, "learning_rate": 9.853246045923043e-06, "loss": 0.5622, "step": 473 }, { "epoch": 0.1054739652870494, "grad_norm": 1.2309756779203067, "learning_rate": 9.852378127893483e-06, "loss": 0.5704, "step": 474 }, { "epoch": 0.1056964842011571, "grad_norm": 1.1419123816260681, "learning_rate": 9.851507689399189e-06, "loss": 0.5787, "step": 475 }, { "epoch": 0.1059190031152648, "grad_norm": 1.091423766268567, "learning_rate": 9.850634730892294e-06, "loss": 0.5632, "step": 476 }, { "epoch": 0.10614152202937249, "grad_norm": 1.378417397880191, "learning_rate": 9.849759252826236e-06, "loss": 0.548, "step": 477 }, { "epoch": 0.10636404094348019, "grad_norm": 1.1562506497314116, "learning_rate": 9.848881255655763e-06, "loss": 0.5521, "step": 478 }, { "epoch": 0.1065865598575879, "grad_norm": 3.0424599433605586, "learning_rate": 9.848000739836934e-06, "loss": 0.5643, "step": 479 }, { "epoch": 0.1068090787716956, "grad_norm": 1.2429953496789707, "learning_rate": 9.847117705827114e-06, "loss": 0.5595, "step": 480 }, { "epoch": 0.1070315976858033, "grad_norm": 1.1676632768370603, "learning_rate": 9.846232154084973e-06, "loss": 0.5774, "step": 481 }, { "epoch": 0.10725411659991099, "grad_norm": 1.1496611202533222, "learning_rate": 9.845344085070498e-06, "loss": 0.5639, "step": 482 }, { "epoch": 0.10747663551401869, "grad_norm": 1.3642333677122809, "learning_rate": 9.844453499244973e-06, "loss": 0.5577, "step": 483 }, { "epoch": 0.10769915442812639, "grad_norm": 1.257768962037844, "learning_rate": 9.843560397070994e-06, "loss": 0.5437, "step": 484 }, { "epoch": 0.10792167334223408, "grad_norm": 1.2105425039545274, "learning_rate": 9.842664779012468e-06, "loss": 0.5563, "step": 485 }, { "epoch": 0.1081441922563418, "grad_norm": 1.2230571387233238, "learning_rate": 9.8417666455346e-06, "loss": 0.5694, "step": 486 }, { "epoch": 0.10836671117044949, "grad_norm": 1.2582866843053673, "learning_rate": 9.840865997103908e-06, "loss": 0.5645, "step": 487 }, { "epoch": 0.10858923008455719, "grad_norm": 1.2087216993326146, "learning_rate": 9.839962834188214e-06, "loss": 0.5791, "step": 488 }, { "epoch": 0.10881174899866489, "grad_norm": 1.1630158048452486, "learning_rate": 9.83905715725665e-06, "loss": 0.5773, "step": 489 }, { "epoch": 0.10903426791277258, "grad_norm": 1.5663125108619582, "learning_rate": 9.838148966779646e-06, "loss": 0.5924, "step": 490 }, { "epoch": 0.10925678682688028, "grad_norm": 1.2710655884216249, "learning_rate": 9.837238263228946e-06, "loss": 0.5707, "step": 491 }, { "epoch": 0.10947930574098798, "grad_norm": 1.1383970352654333, "learning_rate": 9.836325047077594e-06, "loss": 0.5533, "step": 492 }, { "epoch": 0.10970182465509569, "grad_norm": 1.1949921867128266, "learning_rate": 9.83540931879994e-06, "loss": 0.5614, "step": 493 }, { "epoch": 0.10992434356920339, "grad_norm": 1.2673239718686986, "learning_rate": 9.834491078871641e-06, "loss": 0.5705, "step": 494 }, { "epoch": 0.11014686248331108, "grad_norm": 1.160722361364226, "learning_rate": 9.83357032776966e-06, "loss": 0.5494, "step": 495 }, { "epoch": 0.11036938139741878, "grad_norm": 1.151244186581546, "learning_rate": 9.832647065972254e-06, "loss": 0.5812, "step": 496 }, { "epoch": 0.11059190031152648, "grad_norm": 1.2348739329744987, "learning_rate": 9.831721293958998e-06, "loss": 0.5579, "step": 497 }, { "epoch": 0.11081441922563418, "grad_norm": 1.279559354378489, "learning_rate": 9.830793012210763e-06, "loss": 0.5629, "step": 498 }, { "epoch": 0.11103693813974187, "grad_norm": 1.1232049684232435, "learning_rate": 9.829862221209723e-06, "loss": 0.5499, "step": 499 }, { "epoch": 0.11125945705384958, "grad_norm": 1.2185488094123302, "learning_rate": 9.82892892143936e-06, "loss": 0.547, "step": 500 }, { "epoch": 0.11148197596795728, "grad_norm": 1.191444198066381, "learning_rate": 9.827993113384458e-06, "loss": 0.555, "step": 501 }, { "epoch": 0.11170449488206498, "grad_norm": 1.2185992517450401, "learning_rate": 9.827054797531099e-06, "loss": 0.5736, "step": 502 }, { "epoch": 0.11192701379617268, "grad_norm": 1.1771760420025104, "learning_rate": 9.826113974366676e-06, "loss": 0.5782, "step": 503 }, { "epoch": 0.11214953271028037, "grad_norm": 1.212040141207426, "learning_rate": 9.825170644379874e-06, "loss": 0.5617, "step": 504 }, { "epoch": 0.11237205162438807, "grad_norm": 1.1770773236085708, "learning_rate": 9.82422480806069e-06, "loss": 0.5738, "step": 505 }, { "epoch": 0.11259457053849577, "grad_norm": 1.2261197764408653, "learning_rate": 9.823276465900416e-06, "loss": 0.5669, "step": 506 }, { "epoch": 0.11281708945260348, "grad_norm": 1.3469699959979462, "learning_rate": 9.822325618391649e-06, "loss": 0.5659, "step": 507 }, { "epoch": 0.11303960836671118, "grad_norm": 1.2585806195572862, "learning_rate": 9.821372266028285e-06, "loss": 0.5854, "step": 508 }, { "epoch": 0.11326212728081887, "grad_norm": 1.1868851873125081, "learning_rate": 9.82041640930553e-06, "loss": 0.559, "step": 509 }, { "epoch": 0.11348464619492657, "grad_norm": 1.2464367104725078, "learning_rate": 9.819458048719871e-06, "loss": 0.5735, "step": 510 }, { "epoch": 0.11370716510903427, "grad_norm": 1.2393120852773492, "learning_rate": 9.81849718476912e-06, "loss": 0.5594, "step": 511 }, { "epoch": 0.11392968402314196, "grad_norm": 1.2773431866861347, "learning_rate": 9.81753381795237e-06, "loss": 0.5681, "step": 512 }, { "epoch": 0.11415220293724966, "grad_norm": 1.496212076309311, "learning_rate": 9.816567948770024e-06, "loss": 0.5649, "step": 513 }, { "epoch": 0.11437472185135737, "grad_norm": 1.281347632908108, "learning_rate": 9.815599577723782e-06, "loss": 0.5776, "step": 514 }, { "epoch": 0.11459724076546507, "grad_norm": 1.1655590230259973, "learning_rate": 9.814628705316645e-06, "loss": 0.585, "step": 515 }, { "epoch": 0.11481975967957277, "grad_norm": 1.2914823025074904, "learning_rate": 9.81365533205291e-06, "loss": 0.5508, "step": 516 }, { "epoch": 0.11504227859368046, "grad_norm": 1.364186969644009, "learning_rate": 9.812679458438174e-06, "loss": 0.5713, "step": 517 }, { "epoch": 0.11526479750778816, "grad_norm": 1.225487220150313, "learning_rate": 9.811701084979337e-06, "loss": 0.5574, "step": 518 }, { "epoch": 0.11548731642189586, "grad_norm": 1.1287385332200337, "learning_rate": 9.810720212184593e-06, "loss": 0.5507, "step": 519 }, { "epoch": 0.11570983533600356, "grad_norm": 1.258436980397998, "learning_rate": 9.809736840563435e-06, "loss": 0.5633, "step": 520 }, { "epoch": 0.11593235425011127, "grad_norm": 1.2505266052464785, "learning_rate": 9.808750970626655e-06, "loss": 0.5663, "step": 521 }, { "epoch": 0.11615487316421896, "grad_norm": 1.2343119310215904, "learning_rate": 9.807762602886343e-06, "loss": 0.5503, "step": 522 }, { "epoch": 0.11637739207832666, "grad_norm": 1.2011626578961323, "learning_rate": 9.806771737855885e-06, "loss": 0.55, "step": 523 }, { "epoch": 0.11659991099243436, "grad_norm": 1.2248903840325698, "learning_rate": 9.805778376049964e-06, "loss": 0.5595, "step": 524 }, { "epoch": 0.11682242990654206, "grad_norm": 1.2705358305210266, "learning_rate": 9.804782517984561e-06, "loss": 0.5803, "step": 525 }, { "epoch": 0.11704494882064975, "grad_norm": 1.2471116313719572, "learning_rate": 9.803784164176953e-06, "loss": 0.5707, "step": 526 }, { "epoch": 0.11726746773475745, "grad_norm": 1.218644861081007, "learning_rate": 9.802783315145715e-06, "loss": 0.5525, "step": 527 }, { "epoch": 0.11748998664886515, "grad_norm": 1.1627965671864176, "learning_rate": 9.801779971410717e-06, "loss": 0.564, "step": 528 }, { "epoch": 0.11771250556297286, "grad_norm": 1.3278171762137516, "learning_rate": 9.800774133493121e-06, "loss": 0.5492, "step": 529 }, { "epoch": 0.11793502447708056, "grad_norm": 1.3933798036650291, "learning_rate": 9.799765801915393e-06, "loss": 0.5641, "step": 530 }, { "epoch": 0.11815754339118825, "grad_norm": 1.2231022406141128, "learning_rate": 9.798754977201285e-06, "loss": 0.5437, "step": 531 }, { "epoch": 0.11838006230529595, "grad_norm": 1.2017154492174444, "learning_rate": 9.797741659875852e-06, "loss": 0.5552, "step": 532 }, { "epoch": 0.11860258121940365, "grad_norm": 1.3372760944872455, "learning_rate": 9.79672585046544e-06, "loss": 0.5677, "step": 533 }, { "epoch": 0.11882510013351134, "grad_norm": 1.344424021683583, "learning_rate": 9.795707549497685e-06, "loss": 0.5705, "step": 534 }, { "epoch": 0.11904761904761904, "grad_norm": 1.2205426542003985, "learning_rate": 9.794686757501529e-06, "loss": 0.5654, "step": 535 }, { "epoch": 0.11927013796172675, "grad_norm": 1.441206404331531, "learning_rate": 9.793663475007196e-06, "loss": 0.5772, "step": 536 }, { "epoch": 0.11949265687583445, "grad_norm": 1.2079428097661333, "learning_rate": 9.792637702546207e-06, "loss": 0.565, "step": 537 }, { "epoch": 0.11971517578994215, "grad_norm": 1.266099701034026, "learning_rate": 9.791609440651382e-06, "loss": 0.5432, "step": 538 }, { "epoch": 0.11993769470404984, "grad_norm": 1.3297893296923256, "learning_rate": 9.790578689856826e-06, "loss": 0.5816, "step": 539 }, { "epoch": 0.12016021361815754, "grad_norm": 1.3159962777982006, "learning_rate": 9.789545450697944e-06, "loss": 0.5811, "step": 540 }, { "epoch": 0.12038273253226524, "grad_norm": 1.1500478402683425, "learning_rate": 9.788509723711427e-06, "loss": 0.5594, "step": 541 }, { "epoch": 0.12060525144637294, "grad_norm": 1.1788971698269228, "learning_rate": 9.787471509435264e-06, "loss": 0.5601, "step": 542 }, { "epoch": 0.12082777036048065, "grad_norm": 1.3236635238353327, "learning_rate": 9.786430808408731e-06, "loss": 0.5687, "step": 543 }, { "epoch": 0.12105028927458834, "grad_norm": 1.1682377204807315, "learning_rate": 9.7853876211724e-06, "loss": 0.5545, "step": 544 }, { "epoch": 0.12127280818869604, "grad_norm": 1.118845518156632, "learning_rate": 9.784341948268132e-06, "loss": 0.5616, "step": 545 }, { "epoch": 0.12149532710280374, "grad_norm": 1.1847873084667313, "learning_rate": 9.783293790239079e-06, "loss": 0.5637, "step": 546 }, { "epoch": 0.12171784601691144, "grad_norm": 1.2317827214399935, "learning_rate": 9.782243147629686e-06, "loss": 0.5619, "step": 547 }, { "epoch": 0.12194036493101913, "grad_norm": 1.1103808415177212, "learning_rate": 9.781190020985683e-06, "loss": 0.5697, "step": 548 }, { "epoch": 0.12216288384512683, "grad_norm": 1.0979085850042376, "learning_rate": 9.780134410854098e-06, "loss": 0.5687, "step": 549 }, { "epoch": 0.12238540275923454, "grad_norm": 1.2661644450767897, "learning_rate": 9.779076317783245e-06, "loss": 0.5659, "step": 550 }, { "epoch": 0.12260792167334224, "grad_norm": 1.0999584523764083, "learning_rate": 9.778015742322725e-06, "loss": 0.5607, "step": 551 }, { "epoch": 0.12283044058744993, "grad_norm": 1.1822137260507715, "learning_rate": 9.776952685023437e-06, "loss": 0.578, "step": 552 }, { "epoch": 0.12305295950155763, "grad_norm": 1.1788357091346278, "learning_rate": 9.775887146437558e-06, "loss": 0.5567, "step": 553 }, { "epoch": 0.12327547841566533, "grad_norm": 1.2542747648601777, "learning_rate": 9.774819127118561e-06, "loss": 0.567, "step": 554 }, { "epoch": 0.12349799732977303, "grad_norm": 1.2240197411252158, "learning_rate": 9.773748627621208e-06, "loss": 0.555, "step": 555 }, { "epoch": 0.12372051624388072, "grad_norm": 1.2049195450250936, "learning_rate": 9.772675648501544e-06, "loss": 0.5475, "step": 556 }, { "epoch": 0.12394303515798843, "grad_norm": 1.215003490329507, "learning_rate": 9.771600190316907e-06, "loss": 0.5702, "step": 557 }, { "epoch": 0.12416555407209613, "grad_norm": 1.2553801391853388, "learning_rate": 9.770522253625923e-06, "loss": 0.5589, "step": 558 }, { "epoch": 0.12438807298620383, "grad_norm": 1.3374186091048303, "learning_rate": 9.769441838988498e-06, "loss": 0.5661, "step": 559 }, { "epoch": 0.12461059190031153, "grad_norm": 1.168136281645438, "learning_rate": 9.768358946965835e-06, "loss": 0.5612, "step": 560 }, { "epoch": 0.12483311081441922, "grad_norm": 1.0869523636184766, "learning_rate": 9.767273578120417e-06, "loss": 0.5504, "step": 561 }, { "epoch": 0.12505562972852693, "grad_norm": 1.149682658197338, "learning_rate": 9.766185733016018e-06, "loss": 0.5678, "step": 562 }, { "epoch": 0.12527814864263462, "grad_norm": 1.2091923751632665, "learning_rate": 9.765095412217693e-06, "loss": 0.5504, "step": 563 }, { "epoch": 0.12550066755674233, "grad_norm": 1.071450906515386, "learning_rate": 9.764002616291788e-06, "loss": 0.5486, "step": 564 }, { "epoch": 0.12572318647085, "grad_norm": 1.1025751299371456, "learning_rate": 9.762907345805932e-06, "loss": 0.5552, "step": 565 }, { "epoch": 0.12594570538495772, "grad_norm": 1.2256297082403085, "learning_rate": 9.761809601329042e-06, "loss": 0.5735, "step": 566 }, { "epoch": 0.1261682242990654, "grad_norm": 1.1071247091503411, "learning_rate": 9.760709383431315e-06, "loss": 0.5675, "step": 567 }, { "epoch": 0.12639074321317312, "grad_norm": 1.0787247765319308, "learning_rate": 9.759606692684236e-06, "loss": 0.5549, "step": 568 }, { "epoch": 0.12661326212728083, "grad_norm": 1.2170324170417528, "learning_rate": 9.758501529660575e-06, "loss": 0.5709, "step": 569 }, { "epoch": 0.1268357810413885, "grad_norm": 1.1615060573349834, "learning_rate": 9.757393894934387e-06, "loss": 0.5554, "step": 570 }, { "epoch": 0.12705829995549622, "grad_norm": 1.2512136934947071, "learning_rate": 9.756283789081006e-06, "loss": 0.5656, "step": 571 }, { "epoch": 0.1272808188696039, "grad_norm": 1.146661587965403, "learning_rate": 9.755171212677058e-06, "loss": 0.5478, "step": 572 }, { "epoch": 0.12750333778371162, "grad_norm": 1.1987693131276762, "learning_rate": 9.754056166300443e-06, "loss": 0.568, "step": 573 }, { "epoch": 0.1277258566978193, "grad_norm": 1.1320733311734854, "learning_rate": 9.75293865053035e-06, "loss": 0.5506, "step": 574 }, { "epoch": 0.127948375611927, "grad_norm": 1.148566243681117, "learning_rate": 9.751818665947245e-06, "loss": 0.5396, "step": 575 }, { "epoch": 0.12817089452603472, "grad_norm": 1.1860241694544948, "learning_rate": 9.750696213132887e-06, "loss": 0.5622, "step": 576 }, { "epoch": 0.1283934134401424, "grad_norm": 1.22576073046737, "learning_rate": 9.749571292670305e-06, "loss": 0.5616, "step": 577 }, { "epoch": 0.12861593235425012, "grad_norm": 1.14020387953187, "learning_rate": 9.748443905143816e-06, "loss": 0.5522, "step": 578 }, { "epoch": 0.1288384512683578, "grad_norm": 1.124927516932292, "learning_rate": 9.74731405113902e-06, "loss": 0.5639, "step": 579 }, { "epoch": 0.1290609701824655, "grad_norm": 1.162748065760439, "learning_rate": 9.746181731242793e-06, "loss": 0.5581, "step": 580 }, { "epoch": 0.1292834890965732, "grad_norm": 1.1364809946332328, "learning_rate": 9.745046946043296e-06, "loss": 0.5527, "step": 581 }, { "epoch": 0.1295060080106809, "grad_norm": 1.2206234498544934, "learning_rate": 9.743909696129967e-06, "loss": 0.5411, "step": 582 }, { "epoch": 0.12972852692478862, "grad_norm": 1.2919737198330326, "learning_rate": 9.742769982093528e-06, "loss": 0.5728, "step": 583 }, { "epoch": 0.1299510458388963, "grad_norm": 1.1660137679458797, "learning_rate": 9.741627804525978e-06, "loss": 0.5353, "step": 584 }, { "epoch": 0.130173564753004, "grad_norm": 1.2285406844910571, "learning_rate": 9.7404831640206e-06, "loss": 0.5626, "step": 585 }, { "epoch": 0.1303960836671117, "grad_norm": 1.1739592274035466, "learning_rate": 9.739336061171949e-06, "loss": 0.5679, "step": 586 }, { "epoch": 0.1306186025812194, "grad_norm": 1.1569462289672428, "learning_rate": 9.738186496575865e-06, "loss": 0.571, "step": 587 }, { "epoch": 0.1308411214953271, "grad_norm": 1.6060094703892336, "learning_rate": 9.737034470829467e-06, "loss": 0.5504, "step": 588 }, { "epoch": 0.1310636404094348, "grad_norm": 1.168519636910086, "learning_rate": 9.735879984531147e-06, "loss": 0.5468, "step": 589 }, { "epoch": 0.1312861593235425, "grad_norm": 1.1154409034957684, "learning_rate": 9.73472303828058e-06, "loss": 0.5655, "step": 590 }, { "epoch": 0.1315086782376502, "grad_norm": 1.2123573520234285, "learning_rate": 9.733563632678717e-06, "loss": 0.5506, "step": 591 }, { "epoch": 0.1317311971517579, "grad_norm": 1.1401471370648728, "learning_rate": 9.732401768327787e-06, "loss": 0.5453, "step": 592 }, { "epoch": 0.1319537160658656, "grad_norm": 1.0892284307556457, "learning_rate": 9.731237445831295e-06, "loss": 0.5414, "step": 593 }, { "epoch": 0.1321762349799733, "grad_norm": 1.1416111807224723, "learning_rate": 9.730070665794024e-06, "loss": 0.5661, "step": 594 }, { "epoch": 0.13239875389408098, "grad_norm": 1.2303990834675436, "learning_rate": 9.728901428822033e-06, "loss": 0.5683, "step": 595 }, { "epoch": 0.1326212728081887, "grad_norm": 1.4298874056365931, "learning_rate": 9.727729735522657e-06, "loss": 0.5731, "step": 596 }, { "epoch": 0.1328437917222964, "grad_norm": 1.1472641212992203, "learning_rate": 9.726555586504506e-06, "loss": 0.5447, "step": 597 }, { "epoch": 0.1330663106364041, "grad_norm": 1.1529328228264604, "learning_rate": 9.725378982377472e-06, "loss": 0.5692, "step": 598 }, { "epoch": 0.1332888295505118, "grad_norm": 1.1128521053974632, "learning_rate": 9.72419992375271e-06, "loss": 0.5857, "step": 599 }, { "epoch": 0.13351134846461948, "grad_norm": 1.1968765563215245, "learning_rate": 9.723018411242662e-06, "loss": 0.5659, "step": 600 }, { "epoch": 0.1337338673787272, "grad_norm": 1.1593021405083794, "learning_rate": 9.721834445461038e-06, "loss": 0.5468, "step": 601 }, { "epoch": 0.13395638629283488, "grad_norm": 1.1785217959784053, "learning_rate": 9.720648027022822e-06, "loss": 0.5417, "step": 602 }, { "epoch": 0.1341789052069426, "grad_norm": 1.0822645839218787, "learning_rate": 9.719459156544276e-06, "loss": 0.5432, "step": 603 }, { "epoch": 0.1344014241210503, "grad_norm": 1.1088933752673273, "learning_rate": 9.718267834642933e-06, "loss": 0.569, "step": 604 }, { "epoch": 0.13462394303515798, "grad_norm": 1.1362949097758135, "learning_rate": 9.717074061937601e-06, "loss": 0.5604, "step": 605 }, { "epoch": 0.1348464619492657, "grad_norm": 1.1507822555541558, "learning_rate": 9.715877839048357e-06, "loss": 0.5679, "step": 606 }, { "epoch": 0.13506898086337338, "grad_norm": 1.165172987399113, "learning_rate": 9.714679166596557e-06, "loss": 0.5644, "step": 607 }, { "epoch": 0.1352914997774811, "grad_norm": 1.1396966305609193, "learning_rate": 9.713478045204823e-06, "loss": 0.5645, "step": 608 }, { "epoch": 0.13551401869158877, "grad_norm": 1.1958796122363538, "learning_rate": 9.712274475497055e-06, "loss": 0.5611, "step": 609 }, { "epoch": 0.13573653760569648, "grad_norm": 1.14835245649388, "learning_rate": 9.711068458098418e-06, "loss": 0.5533, "step": 610 }, { "epoch": 0.1359590565198042, "grad_norm": 1.0735937810959124, "learning_rate": 9.709859993635356e-06, "loss": 0.5554, "step": 611 }, { "epoch": 0.13618157543391188, "grad_norm": 1.1624126073389212, "learning_rate": 9.708649082735576e-06, "loss": 0.5583, "step": 612 }, { "epoch": 0.1364040943480196, "grad_norm": 1.2208901888018737, "learning_rate": 9.707435726028062e-06, "loss": 0.5626, "step": 613 }, { "epoch": 0.13662661326212727, "grad_norm": 1.2590888259271713, "learning_rate": 9.706219924143068e-06, "loss": 0.5578, "step": 614 }, { "epoch": 0.13684913217623498, "grad_norm": 1.1591552464064925, "learning_rate": 9.705001677712115e-06, "loss": 0.5543, "step": 615 }, { "epoch": 0.13707165109034267, "grad_norm": 1.2172539276965484, "learning_rate": 9.703780987367996e-06, "loss": 0.5794, "step": 616 }, { "epoch": 0.13729417000445038, "grad_norm": 1.178034811056313, "learning_rate": 9.702557853744772e-06, "loss": 0.5591, "step": 617 }, { "epoch": 0.1375166889185581, "grad_norm": 1.2645420308760833, "learning_rate": 9.701332277477772e-06, "loss": 0.5431, "step": 618 }, { "epoch": 0.13773920783266577, "grad_norm": 1.1696448180558037, "learning_rate": 9.700104259203598e-06, "loss": 0.5422, "step": 619 }, { "epoch": 0.13796172674677348, "grad_norm": 1.2531540810141806, "learning_rate": 9.698873799560117e-06, "loss": 0.547, "step": 620 }, { "epoch": 0.13818424566088117, "grad_norm": 1.2469343765379113, "learning_rate": 9.697640899186466e-06, "loss": 0.5541, "step": 621 }, { "epoch": 0.13840676457498888, "grad_norm": 1.1644424191277412, "learning_rate": 9.696405558723047e-06, "loss": 0.5667, "step": 622 }, { "epoch": 0.13862928348909656, "grad_norm": 1.1471890807392016, "learning_rate": 9.695167778811534e-06, "loss": 0.556, "step": 623 }, { "epoch": 0.13885180240320427, "grad_norm": 1.1893272531970795, "learning_rate": 9.693927560094864e-06, "loss": 0.5727, "step": 624 }, { "epoch": 0.13907432131731198, "grad_norm": 1.2101587833248497, "learning_rate": 9.692684903217243e-06, "loss": 0.5578, "step": 625 }, { "epoch": 0.13929684023141967, "grad_norm": 1.0494329071527109, "learning_rate": 9.691439808824142e-06, "loss": 0.5426, "step": 626 }, { "epoch": 0.13951935914552738, "grad_norm": 1.1086885326299125, "learning_rate": 9.690192277562298e-06, "loss": 0.5559, "step": 627 }, { "epoch": 0.13974187805963506, "grad_norm": 1.1211548490212029, "learning_rate": 9.688942310079715e-06, "loss": 0.5746, "step": 628 }, { "epoch": 0.13996439697374277, "grad_norm": 1.0540243353590903, "learning_rate": 9.687689907025664e-06, "loss": 0.5499, "step": 629 }, { "epoch": 0.14018691588785046, "grad_norm": 1.124751476291591, "learning_rate": 9.686435069050676e-06, "loss": 0.5592, "step": 630 }, { "epoch": 0.14040943480195817, "grad_norm": 1.0716345042340223, "learning_rate": 9.685177796806554e-06, "loss": 0.5484, "step": 631 }, { "epoch": 0.14063195371606588, "grad_norm": 1.149277489484783, "learning_rate": 9.683918090946358e-06, "loss": 0.5627, "step": 632 }, { "epoch": 0.14085447263017356, "grad_norm": 1.1342051844216703, "learning_rate": 9.682655952124416e-06, "loss": 0.574, "step": 633 }, { "epoch": 0.14107699154428127, "grad_norm": 1.141144026859714, "learning_rate": 9.681391380996321e-06, "loss": 0.5481, "step": 634 }, { "epoch": 0.14129951045838895, "grad_norm": 1.201242499176147, "learning_rate": 9.680124378218925e-06, "loss": 0.5556, "step": 635 }, { "epoch": 0.14152202937249667, "grad_norm": 1.1563521336885407, "learning_rate": 9.678854944450348e-06, "loss": 0.5484, "step": 636 }, { "epoch": 0.14174454828660435, "grad_norm": 1.0703877927613874, "learning_rate": 9.677583080349968e-06, "loss": 0.5587, "step": 637 }, { "epoch": 0.14196706720071206, "grad_norm": 1.1879568581148106, "learning_rate": 9.67630878657843e-06, "loss": 0.5515, "step": 638 }, { "epoch": 0.14218958611481977, "grad_norm": 1.1859797775483387, "learning_rate": 9.675032063797638e-06, "loss": 0.5479, "step": 639 }, { "epoch": 0.14241210502892745, "grad_norm": 1.105596120294261, "learning_rate": 9.67375291267076e-06, "loss": 0.5506, "step": 640 }, { "epoch": 0.14263462394303517, "grad_norm": 1.2559704158920735, "learning_rate": 9.67247133386222e-06, "loss": 0.5826, "step": 641 }, { "epoch": 0.14285714285714285, "grad_norm": 1.3185281888416238, "learning_rate": 9.67118732803771e-06, "loss": 0.5557, "step": 642 }, { "epoch": 0.14307966177125056, "grad_norm": 1.0868565209768932, "learning_rate": 9.66990089586418e-06, "loss": 0.5583, "step": 643 }, { "epoch": 0.14330218068535824, "grad_norm": 1.1113588883428, "learning_rate": 9.668612038009836e-06, "loss": 0.5522, "step": 644 }, { "epoch": 0.14352469959946595, "grad_norm": 1.1718665584389238, "learning_rate": 9.667320755144155e-06, "loss": 0.5575, "step": 645 }, { "epoch": 0.14374721851357367, "grad_norm": 1.225682082526302, "learning_rate": 9.666027047937858e-06, "loss": 0.5739, "step": 646 }, { "epoch": 0.14396973742768135, "grad_norm": 1.1960237534751046, "learning_rate": 9.664730917062939e-06, "loss": 0.562, "step": 647 }, { "epoch": 0.14419225634178906, "grad_norm": 1.2482131043053488, "learning_rate": 9.663432363192644e-06, "loss": 0.5612, "step": 648 }, { "epoch": 0.14441477525589674, "grad_norm": 1.0975835890593615, "learning_rate": 9.662131387001481e-06, "loss": 0.5496, "step": 649 }, { "epoch": 0.14463729417000445, "grad_norm": 1.142596706012096, "learning_rate": 9.660827989165211e-06, "loss": 0.5538, "step": 650 }, { "epoch": 0.14485981308411214, "grad_norm": 1.1184176546118896, "learning_rate": 9.65952217036086e-06, "loss": 0.5703, "step": 651 }, { "epoch": 0.14508233199821985, "grad_norm": 1.1011139534949255, "learning_rate": 9.658213931266705e-06, "loss": 0.5403, "step": 652 }, { "epoch": 0.14530485091232756, "grad_norm": 1.245962701818207, "learning_rate": 9.656903272562286e-06, "loss": 0.5621, "step": 653 }, { "epoch": 0.14552736982643524, "grad_norm": 1.0917180560154256, "learning_rate": 9.655590194928392e-06, "loss": 0.5526, "step": 654 }, { "epoch": 0.14574988874054295, "grad_norm": 1.1182574568760404, "learning_rate": 9.65427469904708e-06, "loss": 0.5588, "step": 655 }, { "epoch": 0.14597240765465064, "grad_norm": 1.0887740593394906, "learning_rate": 9.652956785601651e-06, "loss": 0.5611, "step": 656 }, { "epoch": 0.14619492656875835, "grad_norm": 1.2514900788780243, "learning_rate": 9.651636455276668e-06, "loss": 0.555, "step": 657 }, { "epoch": 0.14641744548286603, "grad_norm": 1.1709449040149054, "learning_rate": 9.65031370875795e-06, "loss": 0.5488, "step": 658 }, { "epoch": 0.14663996439697374, "grad_norm": 1.092427241865748, "learning_rate": 9.648988546732567e-06, "loss": 0.546, "step": 659 }, { "epoch": 0.14686248331108145, "grad_norm": 1.0839572606683703, "learning_rate": 9.647660969888852e-06, "loss": 0.5549, "step": 660 }, { "epoch": 0.14708500222518914, "grad_norm": 1.2496384552434852, "learning_rate": 9.64633097891638e-06, "loss": 0.5716, "step": 661 }, { "epoch": 0.14730752113929685, "grad_norm": 1.151625812515542, "learning_rate": 9.64499857450599e-06, "loss": 0.5609, "step": 662 }, { "epoch": 0.14753004005340453, "grad_norm": 1.123321124842251, "learning_rate": 9.64366375734977e-06, "loss": 0.5408, "step": 663 }, { "epoch": 0.14775255896751224, "grad_norm": 1.0199070859785624, "learning_rate": 9.642326528141064e-06, "loss": 0.5418, "step": 664 }, { "epoch": 0.14797507788161993, "grad_norm": 1.1169948555243576, "learning_rate": 9.640986887574466e-06, "loss": 0.5479, "step": 665 }, { "epoch": 0.14819759679572764, "grad_norm": 1.261564871899297, "learning_rate": 9.639644836345823e-06, "loss": 0.5643, "step": 666 }, { "epoch": 0.14842011570983535, "grad_norm": 1.1304438100924634, "learning_rate": 9.638300375152236e-06, "loss": 0.5623, "step": 667 }, { "epoch": 0.14864263462394303, "grad_norm": 1.1718428428936238, "learning_rate": 9.63695350469206e-06, "loss": 0.5708, "step": 668 }, { "epoch": 0.14886515353805074, "grad_norm": 1.1544544878126233, "learning_rate": 9.635604225664892e-06, "loss": 0.5615, "step": 669 }, { "epoch": 0.14908767245215843, "grad_norm": 1.2247145188969113, "learning_rate": 9.634252538771588e-06, "loss": 0.5423, "step": 670 }, { "epoch": 0.14931019136626614, "grad_norm": 1.2007757599581004, "learning_rate": 9.632898444714258e-06, "loss": 0.5479, "step": 671 }, { "epoch": 0.14953271028037382, "grad_norm": 1.1325370635160736, "learning_rate": 9.631541944196254e-06, "loss": 0.5445, "step": 672 }, { "epoch": 0.14975522919448153, "grad_norm": 1.2057534667220462, "learning_rate": 9.630183037922178e-06, "loss": 0.5762, "step": 673 }, { "epoch": 0.14997774810858924, "grad_norm": 1.1918798737795453, "learning_rate": 9.62882172659789e-06, "loss": 0.5491, "step": 674 }, { "epoch": 0.15020026702269693, "grad_norm": 1.1798412024339053, "learning_rate": 9.627458010930493e-06, "loss": 0.5568, "step": 675 }, { "epoch": 0.15042278593680464, "grad_norm": 1.2228726209009224, "learning_rate": 9.62609189162834e-06, "loss": 0.5494, "step": 676 }, { "epoch": 0.15064530485091232, "grad_norm": 1.1558596202067903, "learning_rate": 9.62472336940103e-06, "loss": 0.54, "step": 677 }, { "epoch": 0.15086782376502003, "grad_norm": 1.1328644381330666, "learning_rate": 9.623352444959418e-06, "loss": 0.5555, "step": 678 }, { "epoch": 0.15109034267912771, "grad_norm": 1.1740645105416123, "learning_rate": 9.621979119015596e-06, "loss": 0.5405, "step": 679 }, { "epoch": 0.15131286159323543, "grad_norm": 1.2412337605492687, "learning_rate": 9.620603392282912e-06, "loss": 0.5427, "step": 680 }, { "epoch": 0.15153538050734314, "grad_norm": 1.2411780233668566, "learning_rate": 9.61922526547596e-06, "loss": 0.5376, "step": 681 }, { "epoch": 0.15175789942145082, "grad_norm": 1.227381328764813, "learning_rate": 9.617844739310573e-06, "loss": 0.5658, "step": 682 }, { "epoch": 0.15198041833555853, "grad_norm": 1.2227588216973404, "learning_rate": 9.616461814503841e-06, "loss": 0.5526, "step": 683 }, { "epoch": 0.15220293724966621, "grad_norm": 1.201110330099259, "learning_rate": 9.615076491774093e-06, "loss": 0.5536, "step": 684 }, { "epoch": 0.15242545616377393, "grad_norm": 1.139089974974813, "learning_rate": 9.613688771840907e-06, "loss": 0.5432, "step": 685 }, { "epoch": 0.1526479750778816, "grad_norm": 1.0926128151650198, "learning_rate": 9.612298655425101e-06, "loss": 0.5406, "step": 686 }, { "epoch": 0.15287049399198932, "grad_norm": 1.2621450966795793, "learning_rate": 9.610906143248746e-06, "loss": 0.5619, "step": 687 }, { "epoch": 0.15309301290609703, "grad_norm": 1.0610521672995568, "learning_rate": 9.60951123603515e-06, "loss": 0.5491, "step": 688 }, { "epoch": 0.15331553182020471, "grad_norm": 1.1076469679786127, "learning_rate": 9.608113934508866e-06, "loss": 0.5579, "step": 689 }, { "epoch": 0.15353805073431243, "grad_norm": 1.2959558289474296, "learning_rate": 9.6067142393957e-06, "loss": 0.5413, "step": 690 }, { "epoch": 0.1537605696484201, "grad_norm": 1.0389179911412947, "learning_rate": 9.605312151422686e-06, "loss": 0.534, "step": 691 }, { "epoch": 0.15398308856252782, "grad_norm": 1.1131523413199282, "learning_rate": 9.603907671318111e-06, "loss": 0.5598, "step": 692 }, { "epoch": 0.1542056074766355, "grad_norm": 1.060948409495351, "learning_rate": 9.602500799811504e-06, "loss": 0.5318, "step": 693 }, { "epoch": 0.15442812639074321, "grad_norm": 1.0912136679875442, "learning_rate": 9.601091537633635e-06, "loss": 0.5421, "step": 694 }, { "epoch": 0.15465064530485093, "grad_norm": 1.1026323512812257, "learning_rate": 9.599679885516513e-06, "loss": 0.5568, "step": 695 }, { "epoch": 0.1548731642189586, "grad_norm": 1.099504866703303, "learning_rate": 9.598265844193393e-06, "loss": 0.5321, "step": 696 }, { "epoch": 0.15509568313306632, "grad_norm": 1.081036220994894, "learning_rate": 9.596849414398765e-06, "loss": 0.559, "step": 697 }, { "epoch": 0.155318202047174, "grad_norm": 1.1421839754087102, "learning_rate": 9.595430596868368e-06, "loss": 0.5382, "step": 698 }, { "epoch": 0.15554072096128171, "grad_norm": 1.2980744409037057, "learning_rate": 9.594009392339174e-06, "loss": 0.5723, "step": 699 }, { "epoch": 0.1557632398753894, "grad_norm": 1.1202384473837952, "learning_rate": 9.592585801549396e-06, "loss": 0.5762, "step": 700 }, { "epoch": 0.1559857587894971, "grad_norm": 1.1487010595861191, "learning_rate": 9.591159825238493e-06, "loss": 0.5559, "step": 701 }, { "epoch": 0.15620827770360482, "grad_norm": 1.102865729174161, "learning_rate": 9.589731464147154e-06, "loss": 0.5698, "step": 702 }, { "epoch": 0.1564307966177125, "grad_norm": 1.1236997067843248, "learning_rate": 9.588300719017312e-06, "loss": 0.5677, "step": 703 }, { "epoch": 0.15665331553182021, "grad_norm": 1.104496881534634, "learning_rate": 9.586867590592134e-06, "loss": 0.5582, "step": 704 }, { "epoch": 0.1568758344459279, "grad_norm": 1.1285205516613315, "learning_rate": 9.585432079616034e-06, "loss": 0.5754, "step": 705 }, { "epoch": 0.1570983533600356, "grad_norm": 1.114192463881833, "learning_rate": 9.583994186834655e-06, "loss": 0.5524, "step": 706 }, { "epoch": 0.1573208722741433, "grad_norm": 1.16568470751668, "learning_rate": 9.58255391299488e-06, "loss": 0.5421, "step": 707 }, { "epoch": 0.157543391188251, "grad_norm": 1.2284689126798551, "learning_rate": 9.581111258844826e-06, "loss": 0.552, "step": 708 }, { "epoch": 0.15776591010235871, "grad_norm": 1.1230446438295578, "learning_rate": 9.579666225133854e-06, "loss": 0.5625, "step": 709 }, { "epoch": 0.1579884290164664, "grad_norm": 1.1095465079175453, "learning_rate": 9.578218812612552e-06, "loss": 0.5364, "step": 710 }, { "epoch": 0.1582109479305741, "grad_norm": 1.2097997430298524, "learning_rate": 9.57676902203275e-06, "loss": 0.5424, "step": 711 }, { "epoch": 0.1584334668446818, "grad_norm": 1.2586374568533982, "learning_rate": 9.575316854147509e-06, "loss": 0.5671, "step": 712 }, { "epoch": 0.1586559857587895, "grad_norm": 1.1734798610466932, "learning_rate": 9.573862309711129e-06, "loss": 0.5583, "step": 713 }, { "epoch": 0.1588785046728972, "grad_norm": 1.180962967247729, "learning_rate": 9.57240538947914e-06, "loss": 0.5616, "step": 714 }, { "epoch": 0.1591010235870049, "grad_norm": 1.2103569048052307, "learning_rate": 9.570946094208308e-06, "loss": 0.5624, "step": 715 }, { "epoch": 0.1593235425011126, "grad_norm": 1.1077790221993358, "learning_rate": 9.569484424656636e-06, "loss": 0.5631, "step": 716 }, { "epoch": 0.1595460614152203, "grad_norm": 1.108640614365241, "learning_rate": 9.568020381583356e-06, "loss": 0.5533, "step": 717 }, { "epoch": 0.159768580329328, "grad_norm": 1.257440843717015, "learning_rate": 9.566553965748932e-06, "loss": 0.5387, "step": 718 }, { "epoch": 0.15999109924343569, "grad_norm": 1.1024969121683552, "learning_rate": 9.565085177915064e-06, "loss": 0.5514, "step": 719 }, { "epoch": 0.1602136181575434, "grad_norm": 1.0878469176378227, "learning_rate": 9.563614018844683e-06, "loss": 0.5453, "step": 720 }, { "epoch": 0.16043613707165108, "grad_norm": 1.1150954263006265, "learning_rate": 9.562140489301952e-06, "loss": 0.5503, "step": 721 }, { "epoch": 0.1606586559857588, "grad_norm": 1.1966040993774663, "learning_rate": 9.560664590052261e-06, "loss": 0.5615, "step": 722 }, { "epoch": 0.16088117489986647, "grad_norm": 1.1818984232384893, "learning_rate": 9.559186321862239e-06, "loss": 0.5469, "step": 723 }, { "epoch": 0.16110369381397419, "grad_norm": 1.1484226704293214, "learning_rate": 9.557705685499741e-06, "loss": 0.5726, "step": 724 }, { "epoch": 0.1613262127280819, "grad_norm": 1.1659359912971472, "learning_rate": 9.556222681733846e-06, "loss": 0.5342, "step": 725 }, { "epoch": 0.16154873164218958, "grad_norm": 1.1594246496955518, "learning_rate": 9.554737311334876e-06, "loss": 0.545, "step": 726 }, { "epoch": 0.1617712505562973, "grad_norm": 1.15926318200675, "learning_rate": 9.553249575074372e-06, "loss": 0.5661, "step": 727 }, { "epoch": 0.16199376947040497, "grad_norm": 1.2613122616116559, "learning_rate": 9.551759473725106e-06, "loss": 0.5574, "step": 728 }, { "epoch": 0.16221628838451269, "grad_norm": 1.1095160743368349, "learning_rate": 9.550267008061081e-06, "loss": 0.5473, "step": 729 }, { "epoch": 0.16243880729862037, "grad_norm": 1.1367982389859084, "learning_rate": 9.548772178857526e-06, "loss": 0.5667, "step": 730 }, { "epoch": 0.16266132621272808, "grad_norm": 1.1574937295691854, "learning_rate": 9.547274986890899e-06, "loss": 0.5536, "step": 731 }, { "epoch": 0.1628838451268358, "grad_norm": 1.1612143611339594, "learning_rate": 9.545775432938883e-06, "loss": 0.5591, "step": 732 }, { "epoch": 0.16310636404094347, "grad_norm": 1.1199904812328298, "learning_rate": 9.54427351778039e-06, "loss": 0.5551, "step": 733 }, { "epoch": 0.16332888295505119, "grad_norm": 1.2006686553424302, "learning_rate": 9.542769242195559e-06, "loss": 0.5703, "step": 734 }, { "epoch": 0.16355140186915887, "grad_norm": 1.073619125810168, "learning_rate": 9.541262606965755e-06, "loss": 0.5512, "step": 735 }, { "epoch": 0.16377392078326658, "grad_norm": 1.1216558903852984, "learning_rate": 9.539753612873565e-06, "loss": 0.5556, "step": 736 }, { "epoch": 0.16399643969737426, "grad_norm": 1.1488537792330527, "learning_rate": 9.538242260702805e-06, "loss": 0.5604, "step": 737 }, { "epoch": 0.16421895861148197, "grad_norm": 1.1444373590261274, "learning_rate": 9.536728551238515e-06, "loss": 0.5313, "step": 738 }, { "epoch": 0.16444147752558969, "grad_norm": 1.1555177649268207, "learning_rate": 9.535212485266959e-06, "loss": 0.5443, "step": 739 }, { "epoch": 0.16466399643969737, "grad_norm": 1.2233578443579387, "learning_rate": 9.533694063575623e-06, "loss": 0.5494, "step": 740 }, { "epoch": 0.16488651535380508, "grad_norm": 1.0901077532682897, "learning_rate": 9.532173286953224e-06, "loss": 0.5507, "step": 741 }, { "epoch": 0.16510903426791276, "grad_norm": 1.220185063821665, "learning_rate": 9.530650156189692e-06, "loss": 0.5522, "step": 742 }, { "epoch": 0.16533155318202047, "grad_norm": 1.1081071679578478, "learning_rate": 9.529124672076189e-06, "loss": 0.5497, "step": 743 }, { "epoch": 0.16555407209612816, "grad_norm": 1.1453393213959522, "learning_rate": 9.527596835405093e-06, "loss": 0.5547, "step": 744 }, { "epoch": 0.16577659101023587, "grad_norm": 1.166117045943638, "learning_rate": 9.526066646970007e-06, "loss": 0.5377, "step": 745 }, { "epoch": 0.16599910992434358, "grad_norm": 1.2093771963576883, "learning_rate": 9.524534107565752e-06, "loss": 0.5655, "step": 746 }, { "epoch": 0.16622162883845126, "grad_norm": 1.1195194317602186, "learning_rate": 9.522999217988378e-06, "loss": 0.5567, "step": 747 }, { "epoch": 0.16644414775255897, "grad_norm": 1.1881546231075042, "learning_rate": 9.52146197903515e-06, "loss": 0.5442, "step": 748 }, { "epoch": 0.16666666666666666, "grad_norm": 1.2982798412929841, "learning_rate": 9.51992239150455e-06, "loss": 0.5661, "step": 749 }, { "epoch": 0.16688918558077437, "grad_norm": 1.1422782510801228, "learning_rate": 9.518380456196286e-06, "loss": 0.5566, "step": 750 }, { "epoch": 0.16711170449488205, "grad_norm": 1.1633804879239589, "learning_rate": 9.516836173911285e-06, "loss": 0.5791, "step": 751 }, { "epoch": 0.16733422340898976, "grad_norm": 1.15426787080873, "learning_rate": 9.515289545451691e-06, "loss": 0.5522, "step": 752 }, { "epoch": 0.16755674232309747, "grad_norm": 1.070501098140435, "learning_rate": 9.513740571620868e-06, "loss": 0.5392, "step": 753 }, { "epoch": 0.16777926123720516, "grad_norm": 1.0456068751930798, "learning_rate": 9.512189253223397e-06, "loss": 0.5498, "step": 754 }, { "epoch": 0.16800178015131287, "grad_norm": 1.1305592812034435, "learning_rate": 9.510635591065073e-06, "loss": 0.5525, "step": 755 }, { "epoch": 0.16822429906542055, "grad_norm": 1.13025456834056, "learning_rate": 9.50907958595292e-06, "loss": 0.5527, "step": 756 }, { "epoch": 0.16844681797952826, "grad_norm": 1.0125596643545018, "learning_rate": 9.50752123869517e-06, "loss": 0.5411, "step": 757 }, { "epoch": 0.16866933689363595, "grad_norm": 1.1248076460361258, "learning_rate": 9.505960550101269e-06, "loss": 0.5439, "step": 758 }, { "epoch": 0.16889185580774366, "grad_norm": 1.0816479492096076, "learning_rate": 9.504397520981889e-06, "loss": 0.5441, "step": 759 }, { "epoch": 0.16911437472185137, "grad_norm": 1.165297077345471, "learning_rate": 9.502832152148907e-06, "loss": 0.5683, "step": 760 }, { "epoch": 0.16933689363595905, "grad_norm": 1.106433504815399, "learning_rate": 9.501264444415426e-06, "loss": 0.562, "step": 761 }, { "epoch": 0.16955941255006676, "grad_norm": 1.1886433504716847, "learning_rate": 9.499694398595753e-06, "loss": 0.5614, "step": 762 }, { "epoch": 0.16978193146417445, "grad_norm": 1.2045767469192372, "learning_rate": 9.498122015505419e-06, "loss": 0.5593, "step": 763 }, { "epoch": 0.17000445037828216, "grad_norm": 1.2624256737217907, "learning_rate": 9.496547295961165e-06, "loss": 0.536, "step": 764 }, { "epoch": 0.17022696929238984, "grad_norm": 1.047633378965557, "learning_rate": 9.494970240780944e-06, "loss": 0.5478, "step": 765 }, { "epoch": 0.17044948820649755, "grad_norm": 1.2043034101902885, "learning_rate": 9.493390850783923e-06, "loss": 0.5518, "step": 766 }, { "epoch": 0.17067200712060526, "grad_norm": 1.1425748694637847, "learning_rate": 9.491809126790486e-06, "loss": 0.5611, "step": 767 }, { "epoch": 0.17089452603471295, "grad_norm": 1.0923576421611538, "learning_rate": 9.490225069622221e-06, "loss": 0.5396, "step": 768 }, { "epoch": 0.17111704494882066, "grad_norm": 1.1500388234411938, "learning_rate": 9.488638680101939e-06, "loss": 0.5586, "step": 769 }, { "epoch": 0.17133956386292834, "grad_norm": 1.4372255362304378, "learning_rate": 9.487049959053649e-06, "loss": 0.5552, "step": 770 }, { "epoch": 0.17156208277703605, "grad_norm": 1.1399596985294458, "learning_rate": 9.485458907302585e-06, "loss": 0.569, "step": 771 }, { "epoch": 0.17178460169114373, "grad_norm": 1.1230380453674147, "learning_rate": 9.48386552567518e-06, "loss": 0.5473, "step": 772 }, { "epoch": 0.17200712060525145, "grad_norm": 1.0681745665399107, "learning_rate": 9.482269814999085e-06, "loss": 0.5579, "step": 773 }, { "epoch": 0.17222963951935916, "grad_norm": 1.0377696395322245, "learning_rate": 9.480671776103158e-06, "loss": 0.5416, "step": 774 }, { "epoch": 0.17245215843346684, "grad_norm": 1.1106596799981832, "learning_rate": 9.479071409817467e-06, "loss": 0.5428, "step": 775 }, { "epoch": 0.17267467734757455, "grad_norm": 1.2031675806118851, "learning_rate": 9.477468716973287e-06, "loss": 0.5535, "step": 776 }, { "epoch": 0.17289719626168223, "grad_norm": 1.1422746637420296, "learning_rate": 9.475863698403103e-06, "loss": 0.5543, "step": 777 }, { "epoch": 0.17311971517578995, "grad_norm": 1.0754565432958556, "learning_rate": 9.474256354940606e-06, "loss": 0.5248, "step": 778 }, { "epoch": 0.17334223408989763, "grad_norm": 1.2113424129186543, "learning_rate": 9.4726466874207e-06, "loss": 0.5585, "step": 779 }, { "epoch": 0.17356475300400534, "grad_norm": 1.1644538938520634, "learning_rate": 9.471034696679489e-06, "loss": 0.5439, "step": 780 }, { "epoch": 0.17378727191811305, "grad_norm": 1.1440966315272414, "learning_rate": 9.46942038355429e-06, "loss": 0.5518, "step": 781 }, { "epoch": 0.17400979083222073, "grad_norm": 1.2116705901496747, "learning_rate": 9.467803748883624e-06, "loss": 0.5632, "step": 782 }, { "epoch": 0.17423230974632845, "grad_norm": 1.1590469359125155, "learning_rate": 9.466184793507215e-06, "loss": 0.5523, "step": 783 }, { "epoch": 0.17445482866043613, "grad_norm": 1.2264304303363158, "learning_rate": 9.464563518265997e-06, "loss": 0.5541, "step": 784 }, { "epoch": 0.17467734757454384, "grad_norm": 1.057124777526008, "learning_rate": 9.462939924002105e-06, "loss": 0.5484, "step": 785 }, { "epoch": 0.17489986648865152, "grad_norm": 1.2109437224924535, "learning_rate": 9.461314011558881e-06, "loss": 0.5584, "step": 786 }, { "epoch": 0.17512238540275923, "grad_norm": 1.1205083117920431, "learning_rate": 9.459685781780874e-06, "loss": 0.5493, "step": 787 }, { "epoch": 0.17534490431686695, "grad_norm": 1.2043534772726656, "learning_rate": 9.45805523551383e-06, "loss": 0.5471, "step": 788 }, { "epoch": 0.17556742323097463, "grad_norm": 1.1838632774398221, "learning_rate": 9.456422373604701e-06, "loss": 0.5527, "step": 789 }, { "epoch": 0.17578994214508234, "grad_norm": 1.1902752781752268, "learning_rate": 9.454787196901646e-06, "loss": 0.5216, "step": 790 }, { "epoch": 0.17601246105919002, "grad_norm": 1.1133484295168388, "learning_rate": 9.453149706254018e-06, "loss": 0.5635, "step": 791 }, { "epoch": 0.17623497997329773, "grad_norm": 1.1742433824644147, "learning_rate": 9.451509902512383e-06, "loss": 0.5598, "step": 792 }, { "epoch": 0.17645749888740542, "grad_norm": 1.201630552440685, "learning_rate": 9.449867786528497e-06, "loss": 0.5527, "step": 793 }, { "epoch": 0.17668001780151313, "grad_norm": 1.212461389887163, "learning_rate": 9.448223359155322e-06, "loss": 0.5649, "step": 794 }, { "epoch": 0.17690253671562084, "grad_norm": 1.0341078280276297, "learning_rate": 9.446576621247025e-06, "loss": 0.5426, "step": 795 }, { "epoch": 0.17712505562972852, "grad_norm": 1.1456896060394528, "learning_rate": 9.444927573658966e-06, "loss": 0.5462, "step": 796 }, { "epoch": 0.17734757454383623, "grad_norm": 1.2340403408770577, "learning_rate": 9.443276217247707e-06, "loss": 0.5482, "step": 797 }, { "epoch": 0.17757009345794392, "grad_norm": 1.1346802113094454, "learning_rate": 9.441622552871015e-06, "loss": 0.5627, "step": 798 }, { "epoch": 0.17779261237205163, "grad_norm": 1.352167436075907, "learning_rate": 9.439966581387845e-06, "loss": 0.551, "step": 799 }, { "epoch": 0.1780151312861593, "grad_norm": 1.137508382501613, "learning_rate": 9.438308303658358e-06, "loss": 0.5653, "step": 800 }, { "epoch": 0.17823765020026702, "grad_norm": 1.081030335982852, "learning_rate": 9.436647720543914e-06, "loss": 0.5511, "step": 801 }, { "epoch": 0.17846016911437473, "grad_norm": 1.1737365672225704, "learning_rate": 9.434984832907063e-06, "loss": 0.5632, "step": 802 }, { "epoch": 0.17868268802848242, "grad_norm": 1.1744434797428216, "learning_rate": 9.43331964161156e-06, "loss": 0.5542, "step": 803 }, { "epoch": 0.17890520694259013, "grad_norm": 1.1881813221905753, "learning_rate": 9.431652147522352e-06, "loss": 0.5554, "step": 804 }, { "epoch": 0.1791277258566978, "grad_norm": 1.1325944865828639, "learning_rate": 9.429982351505585e-06, "loss": 0.5603, "step": 805 }, { "epoch": 0.17935024477080552, "grad_norm": 1.158593740833714, "learning_rate": 9.428310254428597e-06, "loss": 0.5689, "step": 806 }, { "epoch": 0.1795727636849132, "grad_norm": 1.1768208196325396, "learning_rate": 9.426635857159922e-06, "loss": 0.5704, "step": 807 }, { "epoch": 0.17979528259902092, "grad_norm": 1.234838229103689, "learning_rate": 9.424959160569293e-06, "loss": 0.5658, "step": 808 }, { "epoch": 0.18001780151312863, "grad_norm": 1.2140939562516195, "learning_rate": 9.423280165527635e-06, "loss": 0.5596, "step": 809 }, { "epoch": 0.1802403204272363, "grad_norm": 1.126168367090025, "learning_rate": 9.421598872907062e-06, "loss": 0.5299, "step": 810 }, { "epoch": 0.18046283934134402, "grad_norm": 1.1195293844696583, "learning_rate": 9.419915283580892e-06, "loss": 0.5551, "step": 811 }, { "epoch": 0.1806853582554517, "grad_norm": 1.0650755857930492, "learning_rate": 9.418229398423624e-06, "loss": 0.5397, "step": 812 }, { "epoch": 0.18090787716955942, "grad_norm": 1.1221435774112174, "learning_rate": 9.416541218310957e-06, "loss": 0.5509, "step": 813 }, { "epoch": 0.1811303960836671, "grad_norm": 1.3912304205745827, "learning_rate": 9.414850744119783e-06, "loss": 0.5603, "step": 814 }, { "epoch": 0.1813529149977748, "grad_norm": 1.098571785797821, "learning_rate": 9.413157976728178e-06, "loss": 0.5454, "step": 815 }, { "epoch": 0.18157543391188252, "grad_norm": 1.0940863381537012, "learning_rate": 9.41146291701542e-06, "loss": 0.526, "step": 816 }, { "epoch": 0.1817979528259902, "grad_norm": 1.1030307827079697, "learning_rate": 9.409765565861965e-06, "loss": 0.5379, "step": 817 }, { "epoch": 0.18202047174009792, "grad_norm": 1.2062975771070827, "learning_rate": 9.408065924149471e-06, "loss": 0.543, "step": 818 }, { "epoch": 0.1822429906542056, "grad_norm": 1.0970061015748194, "learning_rate": 9.406363992760779e-06, "loss": 0.5615, "step": 819 }, { "epoch": 0.1824655095683133, "grad_norm": 1.2696235872614843, "learning_rate": 9.404659772579921e-06, "loss": 0.5499, "step": 820 }, { "epoch": 0.182688028482421, "grad_norm": 1.2199208708101184, "learning_rate": 9.402953264492119e-06, "loss": 0.5482, "step": 821 }, { "epoch": 0.1829105473965287, "grad_norm": 1.1941815057000713, "learning_rate": 9.401244469383781e-06, "loss": 0.5548, "step": 822 }, { "epoch": 0.18313306631063642, "grad_norm": 1.0956032939421183, "learning_rate": 9.399533388142505e-06, "loss": 0.5448, "step": 823 }, { "epoch": 0.1833555852247441, "grad_norm": 1.060668040692383, "learning_rate": 9.397820021657079e-06, "loss": 0.5389, "step": 824 }, { "epoch": 0.1835781041388518, "grad_norm": 1.1011386012571418, "learning_rate": 9.396104370817467e-06, "loss": 0.541, "step": 825 }, { "epoch": 0.1838006230529595, "grad_norm": 1.1113590779518348, "learning_rate": 9.394386436514834e-06, "loss": 0.5398, "step": 826 }, { "epoch": 0.1840231419670672, "grad_norm": 1.1552157585765515, "learning_rate": 9.392666219641523e-06, "loss": 0.544, "step": 827 }, { "epoch": 0.1842456608811749, "grad_norm": 1.2840714934870907, "learning_rate": 9.390943721091062e-06, "loss": 0.5495, "step": 828 }, { "epoch": 0.1844681797952826, "grad_norm": 1.187454993606685, "learning_rate": 9.389218941758169e-06, "loss": 0.5724, "step": 829 }, { "epoch": 0.1846906987093903, "grad_norm": 1.2048895581478742, "learning_rate": 9.387491882538744e-06, "loss": 0.52, "step": 830 }, { "epoch": 0.184913217623498, "grad_norm": 1.1944401439114856, "learning_rate": 9.385762544329869e-06, "loss": 0.5778, "step": 831 }, { "epoch": 0.1851357365376057, "grad_norm": 1.0444455710877276, "learning_rate": 9.384030928029813e-06, "loss": 0.5434, "step": 832 }, { "epoch": 0.1853582554517134, "grad_norm": 1.2292612891690982, "learning_rate": 9.382297034538026e-06, "loss": 0.5412, "step": 833 }, { "epoch": 0.1855807743658211, "grad_norm": 1.1022303515352634, "learning_rate": 9.380560864755145e-06, "loss": 0.5652, "step": 834 }, { "epoch": 0.18580329327992878, "grad_norm": 1.1291947434238674, "learning_rate": 9.378822419582984e-06, "loss": 0.5536, "step": 835 }, { "epoch": 0.1860258121940365, "grad_norm": 1.0115372670025384, "learning_rate": 9.377081699924544e-06, "loss": 0.539, "step": 836 }, { "epoch": 0.1862483311081442, "grad_norm": 1.0910404922023862, "learning_rate": 9.375338706684003e-06, "loss": 0.5463, "step": 837 }, { "epoch": 0.1864708500222519, "grad_norm": 1.1358734148346463, "learning_rate": 9.37359344076672e-06, "loss": 0.5387, "step": 838 }, { "epoch": 0.1866933689363596, "grad_norm": 1.1462882255521643, "learning_rate": 9.37184590307924e-06, "loss": 0.5507, "step": 839 }, { "epoch": 0.18691588785046728, "grad_norm": 1.120969290399554, "learning_rate": 9.370096094529285e-06, "loss": 0.5419, "step": 840 }, { "epoch": 0.187138406764575, "grad_norm": 1.144582775469928, "learning_rate": 9.36834401602575e-06, "loss": 0.5572, "step": 841 }, { "epoch": 0.18736092567868268, "grad_norm": 1.120849864221797, "learning_rate": 9.36658966847872e-06, "loss": 0.5575, "step": 842 }, { "epoch": 0.1875834445927904, "grad_norm": 1.1264544680123147, "learning_rate": 9.36483305279945e-06, "loss": 0.546, "step": 843 }, { "epoch": 0.1878059635068981, "grad_norm": 1.1249323558581443, "learning_rate": 9.363074169900382e-06, "loss": 0.5487, "step": 844 }, { "epoch": 0.18802848242100578, "grad_norm": 1.1386086651511345, "learning_rate": 9.361313020695126e-06, "loss": 0.5644, "step": 845 }, { "epoch": 0.1882510013351135, "grad_norm": 1.08110138511489, "learning_rate": 9.359549606098474e-06, "loss": 0.5455, "step": 846 }, { "epoch": 0.18847352024922118, "grad_norm": 1.1533623187527993, "learning_rate": 9.357783927026395e-06, "loss": 0.5582, "step": 847 }, { "epoch": 0.1886960391633289, "grad_norm": 1.3056851471993385, "learning_rate": 9.356015984396036e-06, "loss": 0.5666, "step": 848 }, { "epoch": 0.18891855807743657, "grad_norm": 1.115822421275336, "learning_rate": 9.354245779125712e-06, "loss": 0.5608, "step": 849 }, { "epoch": 0.18914107699154428, "grad_norm": 1.183893437818766, "learning_rate": 9.352473312134923e-06, "loss": 0.5553, "step": 850 }, { "epoch": 0.189363595905652, "grad_norm": 1.1072673791461256, "learning_rate": 9.350698584344335e-06, "loss": 0.5562, "step": 851 }, { "epoch": 0.18958611481975968, "grad_norm": 1.1982505016442468, "learning_rate": 9.348921596675797e-06, "loss": 0.5254, "step": 852 }, { "epoch": 0.1898086337338674, "grad_norm": 1.1419618787619403, "learning_rate": 9.347142350052326e-06, "loss": 0.5569, "step": 853 }, { "epoch": 0.19003115264797507, "grad_norm": 1.075332530269893, "learning_rate": 9.345360845398112e-06, "loss": 0.5489, "step": 854 }, { "epoch": 0.19025367156208278, "grad_norm": 1.2463989497864554, "learning_rate": 9.343577083638522e-06, "loss": 0.568, "step": 855 }, { "epoch": 0.19047619047619047, "grad_norm": 1.1312122705025065, "learning_rate": 9.341791065700092e-06, "loss": 0.5579, "step": 856 }, { "epoch": 0.19069870939029818, "grad_norm": 1.0816386495986188, "learning_rate": 9.340002792510532e-06, "loss": 0.5597, "step": 857 }, { "epoch": 0.1909212283044059, "grad_norm": 1.1797291597437511, "learning_rate": 9.338212264998722e-06, "loss": 0.5618, "step": 858 }, { "epoch": 0.19114374721851357, "grad_norm": 1.205112498259838, "learning_rate": 9.336419484094714e-06, "loss": 0.5534, "step": 859 }, { "epoch": 0.19136626613262128, "grad_norm": 1.125253891620594, "learning_rate": 9.334624450729729e-06, "loss": 0.5605, "step": 860 }, { "epoch": 0.19158878504672897, "grad_norm": 1.1453444805672817, "learning_rate": 9.33282716583616e-06, "loss": 0.5642, "step": 861 }, { "epoch": 0.19181130396083668, "grad_norm": 1.1720158051935867, "learning_rate": 9.331027630347567e-06, "loss": 0.5483, "step": 862 }, { "epoch": 0.19203382287494436, "grad_norm": 1.1115985468737293, "learning_rate": 9.329225845198681e-06, "loss": 0.5748, "step": 863 }, { "epoch": 0.19225634178905207, "grad_norm": 1.159132380812796, "learning_rate": 9.327421811325402e-06, "loss": 0.5571, "step": 864 }, { "epoch": 0.19247886070315978, "grad_norm": 1.1268305171690893, "learning_rate": 9.325615529664795e-06, "loss": 0.5542, "step": 865 }, { "epoch": 0.19270137961726747, "grad_norm": 1.1106158918620577, "learning_rate": 9.323807001155098e-06, "loss": 0.5515, "step": 866 }, { "epoch": 0.19292389853137518, "grad_norm": 1.1462232319771715, "learning_rate": 9.32199622673571e-06, "loss": 0.556, "step": 867 }, { "epoch": 0.19314641744548286, "grad_norm": 1.1527492431189044, "learning_rate": 9.3201832073472e-06, "loss": 0.533, "step": 868 }, { "epoch": 0.19336893635959057, "grad_norm": 1.0883033053684275, "learning_rate": 9.318367943931304e-06, "loss": 0.5561, "step": 869 }, { "epoch": 0.19359145527369825, "grad_norm": 1.0856031477738886, "learning_rate": 9.316550437430917e-06, "loss": 0.548, "step": 870 }, { "epoch": 0.19381397418780597, "grad_norm": 1.1100345640590168, "learning_rate": 9.314730688790111e-06, "loss": 0.5369, "step": 871 }, { "epoch": 0.19403649310191368, "grad_norm": 1.2378799151733266, "learning_rate": 9.312908698954113e-06, "loss": 0.5503, "step": 872 }, { "epoch": 0.19425901201602136, "grad_norm": 1.103102427233243, "learning_rate": 9.311084468869314e-06, "loss": 0.5346, "step": 873 }, { "epoch": 0.19448153093012907, "grad_norm": 1.1555722330548683, "learning_rate": 9.309257999483274e-06, "loss": 0.5512, "step": 874 }, { "epoch": 0.19470404984423675, "grad_norm": 1.00045685485685, "learning_rate": 9.307429291744714e-06, "loss": 0.534, "step": 875 }, { "epoch": 0.19492656875834447, "grad_norm": 1.0201356324570787, "learning_rate": 9.305598346603518e-06, "loss": 0.5464, "step": 876 }, { "epoch": 0.19514908767245215, "grad_norm": 1.0495903905467694, "learning_rate": 9.303765165010727e-06, "loss": 0.5546, "step": 877 }, { "epoch": 0.19537160658655986, "grad_norm": 1.2613489641454507, "learning_rate": 9.301929747918555e-06, "loss": 0.5503, "step": 878 }, { "epoch": 0.19559412550066757, "grad_norm": 1.126356580158189, "learning_rate": 9.300092096280367e-06, "loss": 0.5463, "step": 879 }, { "epoch": 0.19581664441477525, "grad_norm": 1.0991367582726248, "learning_rate": 9.29825221105069e-06, "loss": 0.5426, "step": 880 }, { "epoch": 0.19603916332888296, "grad_norm": 1.1740467609366378, "learning_rate": 9.296410093185219e-06, "loss": 0.5583, "step": 881 }, { "epoch": 0.19626168224299065, "grad_norm": 1.1097662826661263, "learning_rate": 9.294565743640797e-06, "loss": 0.5664, "step": 882 }, { "epoch": 0.19648420115709836, "grad_norm": 1.109947144995137, "learning_rate": 9.292719163375437e-06, "loss": 0.5411, "step": 883 }, { "epoch": 0.19670672007120604, "grad_norm": 1.024778111666912, "learning_rate": 9.290870353348302e-06, "loss": 0.5461, "step": 884 }, { "epoch": 0.19692923898531375, "grad_norm": 1.0784778935274266, "learning_rate": 9.289019314519719e-06, "loss": 0.5447, "step": 885 }, { "epoch": 0.19715175789942144, "grad_norm": 1.1351512377461739, "learning_rate": 9.28716604785117e-06, "loss": 0.5296, "step": 886 }, { "epoch": 0.19737427681352915, "grad_norm": 1.0333466366455915, "learning_rate": 9.285310554305298e-06, "loss": 0.54, "step": 887 }, { "epoch": 0.19759679572763686, "grad_norm": 1.2000474182447902, "learning_rate": 9.283452834845894e-06, "loss": 0.5525, "step": 888 }, { "epoch": 0.19781931464174454, "grad_norm": 1.1147491222891004, "learning_rate": 9.281592890437916e-06, "loss": 0.5558, "step": 889 }, { "epoch": 0.19804183355585225, "grad_norm": 1.1489984097689525, "learning_rate": 9.279730722047472e-06, "loss": 0.5374, "step": 890 }, { "epoch": 0.19826435246995994, "grad_norm": 1.0947452221573726, "learning_rate": 9.27786633064182e-06, "loss": 0.5533, "step": 891 }, { "epoch": 0.19848687138406765, "grad_norm": 1.1054950352338828, "learning_rate": 9.275999717189388e-06, "loss": 0.5342, "step": 892 }, { "epoch": 0.19870939029817533, "grad_norm": 1.086521642053307, "learning_rate": 9.274130882659741e-06, "loss": 0.5545, "step": 893 }, { "epoch": 0.19893190921228304, "grad_norm": 1.0582440038921517, "learning_rate": 9.272259828023609e-06, "loss": 0.549, "step": 894 }, { "epoch": 0.19915442812639075, "grad_norm": 1.09367785827472, "learning_rate": 9.27038655425287e-06, "loss": 0.5537, "step": 895 }, { "epoch": 0.19937694704049844, "grad_norm": 1.104576506991253, "learning_rate": 9.268511062320559e-06, "loss": 0.542, "step": 896 }, { "epoch": 0.19959946595460615, "grad_norm": 1.0876342142614834, "learning_rate": 9.266633353200857e-06, "loss": 0.553, "step": 897 }, { "epoch": 0.19982198486871383, "grad_norm": 1.1605748108855616, "learning_rate": 9.264753427869103e-06, "loss": 0.5588, "step": 898 }, { "epoch": 0.20004450378282154, "grad_norm": 1.1492198482338953, "learning_rate": 9.26287128730178e-06, "loss": 0.5631, "step": 899 }, { "epoch": 0.20026702269692923, "grad_norm": 1.1509170938289177, "learning_rate": 9.260986932476532e-06, "loss": 0.5501, "step": 900 }, { "epoch": 0.20048954161103694, "grad_norm": 1.104567902847753, "learning_rate": 9.259100364372141e-06, "loss": 0.5479, "step": 901 }, { "epoch": 0.20071206052514465, "grad_norm": 1.0708115867161527, "learning_rate": 9.25721158396855e-06, "loss": 0.5451, "step": 902 }, { "epoch": 0.20093457943925233, "grad_norm": 1.1041446198569824, "learning_rate": 9.255320592246842e-06, "loss": 0.5608, "step": 903 }, { "epoch": 0.20115709835336004, "grad_norm": 1.2511350274527033, "learning_rate": 9.253427390189253e-06, "loss": 0.5545, "step": 904 }, { "epoch": 0.20137961726746773, "grad_norm": 1.0942109323430425, "learning_rate": 9.25153197877917e-06, "loss": 0.5505, "step": 905 }, { "epoch": 0.20160213618157544, "grad_norm": 1.1427958314917261, "learning_rate": 9.24963435900112e-06, "loss": 0.5623, "step": 906 }, { "epoch": 0.20182465509568312, "grad_norm": 1.1581368273465094, "learning_rate": 9.247734531840784e-06, "loss": 0.5473, "step": 907 }, { "epoch": 0.20204717400979083, "grad_norm": 1.0681431355900937, "learning_rate": 9.245832498284986e-06, "loss": 0.5492, "step": 908 }, { "epoch": 0.20226969292389854, "grad_norm": 1.1688775685951933, "learning_rate": 9.243928259321694e-06, "loss": 0.5589, "step": 909 }, { "epoch": 0.20249221183800623, "grad_norm": 1.2191523525680696, "learning_rate": 9.242021815940031e-06, "loss": 0.5451, "step": 910 }, { "epoch": 0.20271473075211394, "grad_norm": 1.1772025168330955, "learning_rate": 9.240113169130252e-06, "loss": 0.5414, "step": 911 }, { "epoch": 0.20293724966622162, "grad_norm": 1.1139227666270273, "learning_rate": 9.238202319883767e-06, "loss": 0.5543, "step": 912 }, { "epoch": 0.20315976858032933, "grad_norm": 1.1741798683515303, "learning_rate": 9.236289269193127e-06, "loss": 0.5634, "step": 913 }, { "epoch": 0.20338228749443701, "grad_norm": 1.0860031737795484, "learning_rate": 9.234374018052018e-06, "loss": 0.5512, "step": 914 }, { "epoch": 0.20360480640854473, "grad_norm": 1.042048863835587, "learning_rate": 9.232456567455288e-06, "loss": 0.5399, "step": 915 }, { "epoch": 0.20382732532265244, "grad_norm": 1.0908071663578922, "learning_rate": 9.230536918398906e-06, "loss": 0.5543, "step": 916 }, { "epoch": 0.20404984423676012, "grad_norm": 1.1649008830653822, "learning_rate": 9.228615071879998e-06, "loss": 0.5556, "step": 917 }, { "epoch": 0.20427236315086783, "grad_norm": 1.1111072582055386, "learning_rate": 9.226691028896823e-06, "loss": 0.552, "step": 918 }, { "epoch": 0.20449488206497551, "grad_norm": 1.09024771872841, "learning_rate": 9.22476479044879e-06, "loss": 0.5382, "step": 919 }, { "epoch": 0.20471740097908322, "grad_norm": 1.1268543114976852, "learning_rate": 9.222836357536437e-06, "loss": 0.5527, "step": 920 }, { "epoch": 0.2049399198931909, "grad_norm": 1.2109064136178418, "learning_rate": 9.22090573116145e-06, "loss": 0.5297, "step": 921 }, { "epoch": 0.20516243880729862, "grad_norm": 1.1585133432323202, "learning_rate": 9.21897291232665e-06, "loss": 0.5407, "step": 922 }, { "epoch": 0.20538495772140633, "grad_norm": 1.2133484766386367, "learning_rate": 9.217037902036002e-06, "loss": 0.5489, "step": 923 }, { "epoch": 0.205607476635514, "grad_norm": 1.2112868391926253, "learning_rate": 9.215100701294604e-06, "loss": 0.5492, "step": 924 }, { "epoch": 0.20582999554962172, "grad_norm": 1.1371347596211532, "learning_rate": 9.213161311108691e-06, "loss": 0.5498, "step": 925 }, { "epoch": 0.2060525144637294, "grad_norm": 1.0896927237430745, "learning_rate": 9.211219732485644e-06, "loss": 0.5325, "step": 926 }, { "epoch": 0.20627503337783712, "grad_norm": 1.1401755028390195, "learning_rate": 9.209275966433971e-06, "loss": 0.5367, "step": 927 }, { "epoch": 0.2064975522919448, "grad_norm": 1.1690449310109228, "learning_rate": 9.20733001396332e-06, "loss": 0.5503, "step": 928 }, { "epoch": 0.2067200712060525, "grad_norm": 1.0870137024281639, "learning_rate": 9.205381876084476e-06, "loss": 0.5525, "step": 929 }, { "epoch": 0.20694259012016022, "grad_norm": 1.10442153936924, "learning_rate": 9.203431553809357e-06, "loss": 0.5464, "step": 930 }, { "epoch": 0.2071651090342679, "grad_norm": 1.1610150146552938, "learning_rate": 9.201479048151015e-06, "loss": 0.5517, "step": 931 }, { "epoch": 0.20738762794837562, "grad_norm": 1.1056836463760238, "learning_rate": 9.199524360123641e-06, "loss": 0.5399, "step": 932 }, { "epoch": 0.2076101468624833, "grad_norm": 1.3140692511226753, "learning_rate": 9.197567490742554e-06, "loss": 0.5396, "step": 933 }, { "epoch": 0.207832665776591, "grad_norm": 1.1780771739953673, "learning_rate": 9.195608441024207e-06, "loss": 0.5389, "step": 934 }, { "epoch": 0.2080551846906987, "grad_norm": 1.080282338916025, "learning_rate": 9.19364721198619e-06, "loss": 0.5548, "step": 935 }, { "epoch": 0.2082777036048064, "grad_norm": 1.0709729803697947, "learning_rate": 9.19168380464722e-06, "loss": 0.5426, "step": 936 }, { "epoch": 0.20850022251891412, "grad_norm": 1.1849893921357024, "learning_rate": 9.189718220027147e-06, "loss": 0.546, "step": 937 }, { "epoch": 0.2087227414330218, "grad_norm": 1.170561796346696, "learning_rate": 9.187750459146954e-06, "loss": 0.5261, "step": 938 }, { "epoch": 0.2089452603471295, "grad_norm": 1.2041256268657932, "learning_rate": 9.185780523028748e-06, "loss": 0.5569, "step": 939 }, { "epoch": 0.2091677792612372, "grad_norm": 1.1890034990203964, "learning_rate": 9.183808412695775e-06, "loss": 0.5479, "step": 940 }, { "epoch": 0.2093902981753449, "grad_norm": 1.044423784062905, "learning_rate": 9.181834129172406e-06, "loss": 0.5438, "step": 941 }, { "epoch": 0.2096128170894526, "grad_norm": 1.137453422350518, "learning_rate": 9.179857673484135e-06, "loss": 0.5514, "step": 942 }, { "epoch": 0.2098353360035603, "grad_norm": 1.1020916955768223, "learning_rate": 9.177879046657599e-06, "loss": 0.565, "step": 943 }, { "epoch": 0.210057854917668, "grad_norm": 1.144397272547214, "learning_rate": 9.175898249720545e-06, "loss": 0.5425, "step": 944 }, { "epoch": 0.2102803738317757, "grad_norm": 1.1402497122017299, "learning_rate": 9.17391528370186e-06, "loss": 0.5544, "step": 945 }, { "epoch": 0.2105028927458834, "grad_norm": 1.13594253435123, "learning_rate": 9.171930149631553e-06, "loss": 0.5468, "step": 946 }, { "epoch": 0.2107254116599911, "grad_norm": 1.0880739493652472, "learning_rate": 9.16994284854076e-06, "loss": 0.5447, "step": 947 }, { "epoch": 0.2109479305740988, "grad_norm": 1.7335299214775883, "learning_rate": 9.167953381461744e-06, "loss": 0.5389, "step": 948 }, { "epoch": 0.21117044948820649, "grad_norm": 1.2480617834549321, "learning_rate": 9.165961749427887e-06, "loss": 0.5393, "step": 949 }, { "epoch": 0.2113929684023142, "grad_norm": 1.0839632919457807, "learning_rate": 9.163967953473705e-06, "loss": 0.5344, "step": 950 }, { "epoch": 0.2116154873164219, "grad_norm": 1.1071949691783984, "learning_rate": 9.161971994634829e-06, "loss": 0.5444, "step": 951 }, { "epoch": 0.2118380062305296, "grad_norm": 1.206626796668822, "learning_rate": 9.159973873948019e-06, "loss": 0.5362, "step": 952 }, { "epoch": 0.2120605251446373, "grad_norm": 1.1913298317346088, "learning_rate": 9.157973592451154e-06, "loss": 0.5512, "step": 953 }, { "epoch": 0.21228304405874499, "grad_norm": 1.1585903886064202, "learning_rate": 9.155971151183242e-06, "loss": 0.5448, "step": 954 }, { "epoch": 0.2125055629728527, "grad_norm": 1.059350113686, "learning_rate": 9.153966551184406e-06, "loss": 0.5402, "step": 955 }, { "epoch": 0.21272808188696038, "grad_norm": 1.2503680884082444, "learning_rate": 9.151959793495894e-06, "loss": 0.5433, "step": 956 }, { "epoch": 0.2129506008010681, "grad_norm": 1.1585172922500382, "learning_rate": 9.149950879160072e-06, "loss": 0.5443, "step": 957 }, { "epoch": 0.2131731197151758, "grad_norm": 1.326970847601286, "learning_rate": 9.14793980922043e-06, "loss": 0.5529, "step": 958 }, { "epoch": 0.21339563862928349, "grad_norm": 1.1250696021702227, "learning_rate": 9.145926584721574e-06, "loss": 0.5321, "step": 959 }, { "epoch": 0.2136181575433912, "grad_norm": 1.1408133614023874, "learning_rate": 9.14391120670923e-06, "loss": 0.5651, "step": 960 }, { "epoch": 0.21384067645749888, "grad_norm": 1.1615446038503234, "learning_rate": 9.141893676230246e-06, "loss": 0.5448, "step": 961 }, { "epoch": 0.2140631953716066, "grad_norm": 1.1557892123371376, "learning_rate": 9.139873994332583e-06, "loss": 0.5457, "step": 962 }, { "epoch": 0.21428571428571427, "grad_norm": 1.0758336804667015, "learning_rate": 9.13785216206532e-06, "loss": 0.5488, "step": 963 }, { "epoch": 0.21450823319982198, "grad_norm": 1.1268826942487096, "learning_rate": 9.135828180478663e-06, "loss": 0.5285, "step": 964 }, { "epoch": 0.2147307521139297, "grad_norm": 1.141974548194859, "learning_rate": 9.133802050623916e-06, "loss": 0.5336, "step": 965 }, { "epoch": 0.21495327102803738, "grad_norm": 1.0963082128526984, "learning_rate": 9.131773773553517e-06, "loss": 0.5412, "step": 966 }, { "epoch": 0.2151757899421451, "grad_norm": 1.1489167786180818, "learning_rate": 9.129743350321007e-06, "loss": 0.5568, "step": 967 }, { "epoch": 0.21539830885625277, "grad_norm": 1.092895254687159, "learning_rate": 9.127710781981047e-06, "loss": 0.5517, "step": 968 }, { "epoch": 0.21562082777036048, "grad_norm": 1.221406884533832, "learning_rate": 9.125676069589414e-06, "loss": 0.5419, "step": 969 }, { "epoch": 0.21584334668446817, "grad_norm": 1.1468194928015705, "learning_rate": 9.123639214202991e-06, "loss": 0.5454, "step": 970 }, { "epoch": 0.21606586559857588, "grad_norm": 1.163967135186286, "learning_rate": 9.121600216879782e-06, "loss": 0.5507, "step": 971 }, { "epoch": 0.2162883845126836, "grad_norm": 1.091344511913713, "learning_rate": 9.119559078678903e-06, "loss": 0.5369, "step": 972 }, { "epoch": 0.21651090342679127, "grad_norm": 1.1975387889653861, "learning_rate": 9.117515800660578e-06, "loss": 0.5578, "step": 973 }, { "epoch": 0.21673342234089898, "grad_norm": 1.1340390973773955, "learning_rate": 9.115470383886144e-06, "loss": 0.5315, "step": 974 }, { "epoch": 0.21695594125500667, "grad_norm": 1.178013084327542, "learning_rate": 9.11342282941805e-06, "loss": 0.5395, "step": 975 }, { "epoch": 0.21717846016911438, "grad_norm": 1.194773996561476, "learning_rate": 9.111373138319852e-06, "loss": 0.5416, "step": 976 }, { "epoch": 0.21740097908322206, "grad_norm": 1.1341177392574153, "learning_rate": 9.109321311656224e-06, "loss": 0.5597, "step": 977 }, { "epoch": 0.21762349799732977, "grad_norm": 1.0791521775722333, "learning_rate": 9.107267350492938e-06, "loss": 0.5537, "step": 978 }, { "epoch": 0.21784601691143748, "grad_norm": 1.2290661180925302, "learning_rate": 9.105211255896885e-06, "loss": 0.5572, "step": 979 }, { "epoch": 0.21806853582554517, "grad_norm": 1.1913167395850333, "learning_rate": 9.103153028936058e-06, "loss": 0.5344, "step": 980 }, { "epoch": 0.21829105473965288, "grad_norm": 1.1434025073848917, "learning_rate": 9.101092670679556e-06, "loss": 0.5306, "step": 981 }, { "epoch": 0.21851357365376056, "grad_norm": 1.0887025152241463, "learning_rate": 9.099030182197594e-06, "loss": 0.5661, "step": 982 }, { "epoch": 0.21873609256786827, "grad_norm": 1.1124848270871739, "learning_rate": 9.096965564561483e-06, "loss": 0.5506, "step": 983 }, { "epoch": 0.21895861148197596, "grad_norm": 1.1338377842780414, "learning_rate": 9.09489881884365e-06, "loss": 0.545, "step": 984 }, { "epoch": 0.21918113039608367, "grad_norm": 1.1026036066599243, "learning_rate": 9.092829946117616e-06, "loss": 0.5465, "step": 985 }, { "epoch": 0.21940364931019138, "grad_norm": 1.137832829477167, "learning_rate": 9.090758947458018e-06, "loss": 0.559, "step": 986 }, { "epoch": 0.21962616822429906, "grad_norm": 1.1813006360902776, "learning_rate": 9.08868582394059e-06, "loss": 0.5474, "step": 987 }, { "epoch": 0.21984868713840677, "grad_norm": 1.1201145539327002, "learning_rate": 9.086610576642173e-06, "loss": 0.5469, "step": 988 }, { "epoch": 0.22007120605251446, "grad_norm": 1.1604766960285018, "learning_rate": 9.084533206640707e-06, "loss": 0.5397, "step": 989 }, { "epoch": 0.22029372496662217, "grad_norm": 1.094382894463383, "learning_rate": 9.082453715015242e-06, "loss": 0.5513, "step": 990 }, { "epoch": 0.22051624388072985, "grad_norm": 1.0945901194860117, "learning_rate": 9.080372102845923e-06, "loss": 0.5449, "step": 991 }, { "epoch": 0.22073876279483756, "grad_norm": 1.0863153598639101, "learning_rate": 9.078288371214e-06, "loss": 0.5314, "step": 992 }, { "epoch": 0.22096128170894527, "grad_norm": 1.1431484793071989, "learning_rate": 9.076202521201824e-06, "loss": 0.5466, "step": 993 }, { "epoch": 0.22118380062305296, "grad_norm": 1.1723106599100053, "learning_rate": 9.074114553892844e-06, "loss": 0.5439, "step": 994 }, { "epoch": 0.22140631953716067, "grad_norm": 1.1008227115539697, "learning_rate": 9.072024470371612e-06, "loss": 0.5455, "step": 995 }, { "epoch": 0.22162883845126835, "grad_norm": 1.1611615136579478, "learning_rate": 9.069932271723774e-06, "loss": 0.5455, "step": 996 }, { "epoch": 0.22185135736537606, "grad_norm": 1.1363485723648683, "learning_rate": 9.067837959036083e-06, "loss": 0.5526, "step": 997 }, { "epoch": 0.22207387627948375, "grad_norm": 1.2024744237309868, "learning_rate": 9.065741533396382e-06, "loss": 0.5502, "step": 998 }, { "epoch": 0.22229639519359146, "grad_norm": 1.100342796620659, "learning_rate": 9.063642995893615e-06, "loss": 0.5298, "step": 999 }, { "epoch": 0.22251891410769917, "grad_norm": 1.180871771780787, "learning_rate": 9.061542347617825e-06, "loss": 0.5472, "step": 1000 }, { "epoch": 0.22274143302180685, "grad_norm": 1.1010222077016985, "learning_rate": 9.059439589660145e-06, "loss": 0.5612, "step": 1001 }, { "epoch": 0.22296395193591456, "grad_norm": 1.1151644634528648, "learning_rate": 9.057334723112812e-06, "loss": 0.5403, "step": 1002 }, { "epoch": 0.22318647085002224, "grad_norm": 1.132913341287773, "learning_rate": 9.055227749069152e-06, "loss": 0.5473, "step": 1003 }, { "epoch": 0.22340898976412996, "grad_norm": 1.1952712565038095, "learning_rate": 9.05311866862359e-06, "loss": 0.5454, "step": 1004 }, { "epoch": 0.22363150867823764, "grad_norm": 1.0560619193558436, "learning_rate": 9.05100748287164e-06, "loss": 0.5343, "step": 1005 }, { "epoch": 0.22385402759234535, "grad_norm": 1.088346090468417, "learning_rate": 9.048894192909913e-06, "loss": 0.5436, "step": 1006 }, { "epoch": 0.22407654650645306, "grad_norm": 1.0812089828427158, "learning_rate": 9.046778799836115e-06, "loss": 0.5297, "step": 1007 }, { "epoch": 0.22429906542056074, "grad_norm": 1.1524144796757119, "learning_rate": 9.04466130474904e-06, "loss": 0.5476, "step": 1008 }, { "epoch": 0.22452158433466846, "grad_norm": 1.152969533447044, "learning_rate": 9.042541708748577e-06, "loss": 0.5349, "step": 1009 }, { "epoch": 0.22474410324877614, "grad_norm": 1.1012005816802803, "learning_rate": 9.040420012935705e-06, "loss": 0.5288, "step": 1010 }, { "epoch": 0.22496662216288385, "grad_norm": 1.157084155570933, "learning_rate": 9.038296218412492e-06, "loss": 0.5546, "step": 1011 }, { "epoch": 0.22518914107699153, "grad_norm": 1.0820209075901337, "learning_rate": 9.0361703262821e-06, "loss": 0.5456, "step": 1012 }, { "epoch": 0.22541165999109924, "grad_norm": 1.1796406509416058, "learning_rate": 9.034042337648778e-06, "loss": 0.5411, "step": 1013 }, { "epoch": 0.22563417890520696, "grad_norm": 1.1919199921473882, "learning_rate": 9.031912253617865e-06, "loss": 0.5595, "step": 1014 }, { "epoch": 0.22585669781931464, "grad_norm": 1.1505575561728882, "learning_rate": 9.029780075295787e-06, "loss": 0.5384, "step": 1015 }, { "epoch": 0.22607921673342235, "grad_norm": 1.1593500701561845, "learning_rate": 9.02764580379006e-06, "loss": 0.5389, "step": 1016 }, { "epoch": 0.22630173564753003, "grad_norm": 1.0946802906808029, "learning_rate": 9.025509440209284e-06, "loss": 0.5374, "step": 1017 }, { "epoch": 0.22652425456163774, "grad_norm": 1.1348352405292346, "learning_rate": 9.023370985663147e-06, "loss": 0.5374, "step": 1018 }, { "epoch": 0.22674677347574543, "grad_norm": 1.1392813387710212, "learning_rate": 9.021230441262427e-06, "loss": 0.5402, "step": 1019 }, { "epoch": 0.22696929238985314, "grad_norm": 1.192116523107714, "learning_rate": 9.019087808118982e-06, "loss": 0.5737, "step": 1020 }, { "epoch": 0.22719181130396085, "grad_norm": 1.0825684611323718, "learning_rate": 9.016943087345759e-06, "loss": 0.5513, "step": 1021 }, { "epoch": 0.22741433021806853, "grad_norm": 1.0338390548665155, "learning_rate": 9.014796280056786e-06, "loss": 0.5227, "step": 1022 }, { "epoch": 0.22763684913217624, "grad_norm": 1.0723257672361226, "learning_rate": 9.012647387367179e-06, "loss": 0.5327, "step": 1023 }, { "epoch": 0.22785936804628393, "grad_norm": 1.1332084195046002, "learning_rate": 9.01049641039313e-06, "loss": 0.5499, "step": 1024 }, { "epoch": 0.22808188696039164, "grad_norm": 1.6766457284563032, "learning_rate": 9.008343350251923e-06, "loss": 0.5568, "step": 1025 }, { "epoch": 0.22830440587449932, "grad_norm": 1.1014409481364824, "learning_rate": 9.006188208061916e-06, "loss": 0.5388, "step": 1026 }, { "epoch": 0.22852692478860703, "grad_norm": 1.0866311621532567, "learning_rate": 9.004030984942555e-06, "loss": 0.5525, "step": 1027 }, { "epoch": 0.22874944370271474, "grad_norm": 1.1727205861434515, "learning_rate": 9.001871682014361e-06, "loss": 0.5607, "step": 1028 }, { "epoch": 0.22897196261682243, "grad_norm": 1.1605044426107782, "learning_rate": 8.999710300398939e-06, "loss": 0.5284, "step": 1029 }, { "epoch": 0.22919448153093014, "grad_norm": 1.2247913959688528, "learning_rate": 8.997546841218971e-06, "loss": 0.5305, "step": 1030 }, { "epoch": 0.22941700044503782, "grad_norm": 1.1789300618960368, "learning_rate": 8.995381305598224e-06, "loss": 0.5408, "step": 1031 }, { "epoch": 0.22963951935914553, "grad_norm": 1.1213641181610554, "learning_rate": 8.993213694661537e-06, "loss": 0.5323, "step": 1032 }, { "epoch": 0.22986203827325322, "grad_norm": 1.341435322407785, "learning_rate": 8.99104400953483e-06, "loss": 0.534, "step": 1033 }, { "epoch": 0.23008455718736093, "grad_norm": 1.061540045170406, "learning_rate": 8.988872251345097e-06, "loss": 0.5351, "step": 1034 }, { "epoch": 0.23030707610146864, "grad_norm": 1.1624341365746165, "learning_rate": 8.986698421220416e-06, "loss": 0.5352, "step": 1035 }, { "epoch": 0.23052959501557632, "grad_norm": 1.103409170070339, "learning_rate": 8.984522520289934e-06, "loss": 0.5296, "step": 1036 }, { "epoch": 0.23075211392968403, "grad_norm": 1.1503852873742055, "learning_rate": 8.982344549683878e-06, "loss": 0.5522, "step": 1037 }, { "epoch": 0.23097463284379172, "grad_norm": 1.180269484193612, "learning_rate": 8.980164510533548e-06, "loss": 0.5505, "step": 1038 }, { "epoch": 0.23119715175789943, "grad_norm": 1.1160407148423075, "learning_rate": 8.977982403971319e-06, "loss": 0.5511, "step": 1039 }, { "epoch": 0.2314196706720071, "grad_norm": 1.088720778470974, "learning_rate": 8.97579823113064e-06, "loss": 0.537, "step": 1040 }, { "epoch": 0.23164218958611482, "grad_norm": 1.1479204554625715, "learning_rate": 8.973611993146032e-06, "loss": 0.5331, "step": 1041 }, { "epoch": 0.23186470850022253, "grad_norm": 1.097624960680709, "learning_rate": 8.971423691153094e-06, "loss": 0.5534, "step": 1042 }, { "epoch": 0.23208722741433022, "grad_norm": 1.1471515507502972, "learning_rate": 8.969233326288486e-06, "loss": 0.5692, "step": 1043 }, { "epoch": 0.23230974632843793, "grad_norm": 1.0033823672965787, "learning_rate": 8.967040899689953e-06, "loss": 0.5626, "step": 1044 }, { "epoch": 0.2325322652425456, "grad_norm": 1.059738220030107, "learning_rate": 8.964846412496302e-06, "loss": 0.5462, "step": 1045 }, { "epoch": 0.23275478415665332, "grad_norm": 1.0608705662494566, "learning_rate": 8.962649865847413e-06, "loss": 0.5314, "step": 1046 }, { "epoch": 0.232977303070761, "grad_norm": 1.1512476918787122, "learning_rate": 8.960451260884233e-06, "loss": 0.5408, "step": 1047 }, { "epoch": 0.23319982198486872, "grad_norm": 1.102215164972978, "learning_rate": 8.958250598748785e-06, "loss": 0.5481, "step": 1048 }, { "epoch": 0.2334223408989764, "grad_norm": 1.0509004160973643, "learning_rate": 8.956047880584153e-06, "loss": 0.5293, "step": 1049 }, { "epoch": 0.2336448598130841, "grad_norm": 1.0818727616295736, "learning_rate": 8.953843107534492e-06, "loss": 0.5619, "step": 1050 }, { "epoch": 0.23386737872719182, "grad_norm": 1.1111045113544429, "learning_rate": 8.951636280745028e-06, "loss": 0.5398, "step": 1051 }, { "epoch": 0.2340898976412995, "grad_norm": 1.0661682144772868, "learning_rate": 8.949427401362047e-06, "loss": 0.5499, "step": 1052 }, { "epoch": 0.23431241655540722, "grad_norm": 1.0549793040590925, "learning_rate": 8.947216470532904e-06, "loss": 0.5386, "step": 1053 }, { "epoch": 0.2345349354695149, "grad_norm": 1.1563741169886517, "learning_rate": 8.945003489406023e-06, "loss": 0.5332, "step": 1054 }, { "epoch": 0.2347574543836226, "grad_norm": 1.2356259028289613, "learning_rate": 8.94278845913089e-06, "loss": 0.5463, "step": 1055 }, { "epoch": 0.2349799732977303, "grad_norm": 1.204788372644096, "learning_rate": 8.94057138085805e-06, "loss": 0.556, "step": 1056 }, { "epoch": 0.235202492211838, "grad_norm": 1.0504735726573913, "learning_rate": 8.938352255739124e-06, "loss": 0.5343, "step": 1057 }, { "epoch": 0.23542501112594572, "grad_norm": 1.0483050104005072, "learning_rate": 8.936131084926785e-06, "loss": 0.5292, "step": 1058 }, { "epoch": 0.2356475300400534, "grad_norm": 1.0785933999908202, "learning_rate": 8.933907869574776e-06, "loss": 0.5501, "step": 1059 }, { "epoch": 0.2358700489541611, "grad_norm": 1.2420589670813487, "learning_rate": 8.931682610837897e-06, "loss": 0.552, "step": 1060 }, { "epoch": 0.2360925678682688, "grad_norm": 1.1886797790133108, "learning_rate": 8.929455309872011e-06, "loss": 0.5583, "step": 1061 }, { "epoch": 0.2363150867823765, "grad_norm": 1.0574770736886787, "learning_rate": 8.927225967834045e-06, "loss": 0.5337, "step": 1062 }, { "epoch": 0.2365376056964842, "grad_norm": 1.1241319220201216, "learning_rate": 8.92499458588198e-06, "loss": 0.5366, "step": 1063 }, { "epoch": 0.2367601246105919, "grad_norm": 1.1203851254303805, "learning_rate": 8.92276116517486e-06, "loss": 0.5449, "step": 1064 }, { "epoch": 0.2369826435246996, "grad_norm": 1.115426652800492, "learning_rate": 8.920525706872791e-06, "loss": 0.5495, "step": 1065 }, { "epoch": 0.2372051624388073, "grad_norm": 1.0955663201081831, "learning_rate": 8.918288212136935e-06, "loss": 0.5415, "step": 1066 }, { "epoch": 0.237427681352915, "grad_norm": 1.1948915920918557, "learning_rate": 8.916048682129504e-06, "loss": 0.5359, "step": 1067 }, { "epoch": 0.2376502002670227, "grad_norm": 1.286028805064918, "learning_rate": 8.913807118013782e-06, "loss": 0.5271, "step": 1068 }, { "epoch": 0.2378727191811304, "grad_norm": 1.1694830099971891, "learning_rate": 8.911563520954099e-06, "loss": 0.5539, "step": 1069 }, { "epoch": 0.23809523809523808, "grad_norm": 1.1065131844477718, "learning_rate": 8.909317892115842e-06, "loss": 0.5566, "step": 1070 }, { "epoch": 0.2383177570093458, "grad_norm": 1.1480970889500621, "learning_rate": 8.907070232665457e-06, "loss": 0.5418, "step": 1071 }, { "epoch": 0.2385402759234535, "grad_norm": 1.2226293831452488, "learning_rate": 8.904820543770445e-06, "loss": 0.5369, "step": 1072 }, { "epoch": 0.2387627948375612, "grad_norm": 1.1313936918256582, "learning_rate": 8.902568826599354e-06, "loss": 0.5441, "step": 1073 }, { "epoch": 0.2389853137516689, "grad_norm": 1.1980515830634528, "learning_rate": 8.900315082321795e-06, "loss": 0.5499, "step": 1074 }, { "epoch": 0.23920783266577658, "grad_norm": 1.0942780295824934, "learning_rate": 8.898059312108427e-06, "loss": 0.5336, "step": 1075 }, { "epoch": 0.2394303515798843, "grad_norm": 1.0961882735704547, "learning_rate": 8.89580151713096e-06, "loss": 0.532, "step": 1076 }, { "epoch": 0.23965287049399198, "grad_norm": 1.217620737116737, "learning_rate": 8.89354169856216e-06, "loss": 0.5319, "step": 1077 }, { "epoch": 0.2398753894080997, "grad_norm": 1.1927015282272555, "learning_rate": 8.89127985757584e-06, "loss": 0.545, "step": 1078 }, { "epoch": 0.2400979083222074, "grad_norm": 1.080353858815883, "learning_rate": 8.889015995346865e-06, "loss": 0.5457, "step": 1079 }, { "epoch": 0.24032042723631508, "grad_norm": 1.1529228932586861, "learning_rate": 8.88675011305115e-06, "loss": 0.5472, "step": 1080 }, { "epoch": 0.2405429461504228, "grad_norm": 1.0884374475422336, "learning_rate": 8.884482211865663e-06, "loss": 0.5422, "step": 1081 }, { "epoch": 0.24076546506453048, "grad_norm": 1.12917079632197, "learning_rate": 8.882212292968412e-06, "loss": 0.5302, "step": 1082 }, { "epoch": 0.2409879839786382, "grad_norm": 1.019575725821414, "learning_rate": 8.879940357538462e-06, "loss": 0.5278, "step": 1083 }, { "epoch": 0.24121050289274587, "grad_norm": 1.0748932401578903, "learning_rate": 8.87766640675592e-06, "loss": 0.5369, "step": 1084 }, { "epoch": 0.24143302180685358, "grad_norm": 1.0879318803335645, "learning_rate": 8.87539044180194e-06, "loss": 0.5445, "step": 1085 }, { "epoch": 0.2416555407209613, "grad_norm": 1.1055556045427775, "learning_rate": 8.873112463858726e-06, "loss": 0.5381, "step": 1086 }, { "epoch": 0.24187805963506898, "grad_norm": 1.169690925027173, "learning_rate": 8.870832474109525e-06, "loss": 0.5471, "step": 1087 }, { "epoch": 0.2421005785491767, "grad_norm": 1.2960789211088288, "learning_rate": 8.868550473738629e-06, "loss": 0.5389, "step": 1088 }, { "epoch": 0.24232309746328437, "grad_norm": 1.167481872227059, "learning_rate": 8.866266463931374e-06, "loss": 0.5646, "step": 1089 }, { "epoch": 0.24254561637739208, "grad_norm": 1.1428432929315862, "learning_rate": 8.86398044587414e-06, "loss": 0.5441, "step": 1090 }, { "epoch": 0.24276813529149976, "grad_norm": 1.1084138071758454, "learning_rate": 8.861692420754353e-06, "loss": 0.5331, "step": 1091 }, { "epoch": 0.24299065420560748, "grad_norm": 1.127128624489013, "learning_rate": 8.859402389760475e-06, "loss": 0.5357, "step": 1092 }, { "epoch": 0.2432131731197152, "grad_norm": 1.0845797092376979, "learning_rate": 8.857110354082018e-06, "loss": 0.5477, "step": 1093 }, { "epoch": 0.24343569203382287, "grad_norm": 1.1275215027867693, "learning_rate": 8.854816314909527e-06, "loss": 0.5444, "step": 1094 }, { "epoch": 0.24365821094793058, "grad_norm": 1.0484724598242356, "learning_rate": 8.852520273434597e-06, "loss": 0.5407, "step": 1095 }, { "epoch": 0.24388072986203826, "grad_norm": 1.145730037639104, "learning_rate": 8.850222230849854e-06, "loss": 0.5461, "step": 1096 }, { "epoch": 0.24410324877614598, "grad_norm": 1.1078600443003823, "learning_rate": 8.847922188348969e-06, "loss": 0.5556, "step": 1097 }, { "epoch": 0.24432576769025366, "grad_norm": 1.1169829884854099, "learning_rate": 8.84562014712665e-06, "loss": 0.5419, "step": 1098 }, { "epoch": 0.24454828660436137, "grad_norm": 1.1849992785779824, "learning_rate": 8.843316108378642e-06, "loss": 0.5587, "step": 1099 }, { "epoch": 0.24477080551846908, "grad_norm": 1.1763959877761982, "learning_rate": 8.841010073301733e-06, "loss": 0.5403, "step": 1100 }, { "epoch": 0.24499332443257676, "grad_norm": 1.183233044332581, "learning_rate": 8.838702043093739e-06, "loss": 0.5553, "step": 1101 }, { "epoch": 0.24521584334668448, "grad_norm": 1.2500860246089822, "learning_rate": 8.83639201895352e-06, "loss": 0.5391, "step": 1102 }, { "epoch": 0.24543836226079216, "grad_norm": 1.0848133381766079, "learning_rate": 8.834080002080968e-06, "loss": 0.5328, "step": 1103 }, { "epoch": 0.24566088117489987, "grad_norm": 1.0787150953956803, "learning_rate": 8.831765993677012e-06, "loss": 0.5396, "step": 1104 }, { "epoch": 0.24588340008900755, "grad_norm": 1.1205157422255314, "learning_rate": 8.829449994943614e-06, "loss": 0.5393, "step": 1105 }, { "epoch": 0.24610591900311526, "grad_norm": 1.1256856896051552, "learning_rate": 8.82713200708377e-06, "loss": 0.5417, "step": 1106 }, { "epoch": 0.24632843791722298, "grad_norm": 1.1637515489158259, "learning_rate": 8.824812031301511e-06, "loss": 0.5274, "step": 1107 }, { "epoch": 0.24655095683133066, "grad_norm": 1.1596757199924643, "learning_rate": 8.822490068801896e-06, "loss": 0.5412, "step": 1108 }, { "epoch": 0.24677347574543837, "grad_norm": 1.08456618628103, "learning_rate": 8.820166120791023e-06, "loss": 0.5419, "step": 1109 }, { "epoch": 0.24699599465954605, "grad_norm": 1.0976183053342605, "learning_rate": 8.817840188476015e-06, "loss": 0.5218, "step": 1110 }, { "epoch": 0.24721851357365376, "grad_norm": 1.157420246545367, "learning_rate": 8.815512273065028e-06, "loss": 0.5334, "step": 1111 }, { "epoch": 0.24744103248776145, "grad_norm": 1.1438254397968564, "learning_rate": 8.813182375767249e-06, "loss": 0.5445, "step": 1112 }, { "epoch": 0.24766355140186916, "grad_norm": 1.0751868755705756, "learning_rate": 8.810850497792895e-06, "loss": 0.5206, "step": 1113 }, { "epoch": 0.24788607031597687, "grad_norm": 1.140429054702173, "learning_rate": 8.80851664035321e-06, "loss": 0.5424, "step": 1114 }, { "epoch": 0.24810858923008455, "grad_norm": 1.166955773673513, "learning_rate": 8.806180804660462e-06, "loss": 0.5432, "step": 1115 }, { "epoch": 0.24833110814419226, "grad_norm": 1.1789647207660985, "learning_rate": 8.803842991927955e-06, "loss": 0.5321, "step": 1116 }, { "epoch": 0.24855362705829995, "grad_norm": 1.0909133664670563, "learning_rate": 8.801503203370019e-06, "loss": 0.5337, "step": 1117 }, { "epoch": 0.24877614597240766, "grad_norm": 1.128020253609063, "learning_rate": 8.799161440202002e-06, "loss": 0.536, "step": 1118 }, { "epoch": 0.24899866488651534, "grad_norm": 1.1214464196350014, "learning_rate": 8.796817703640288e-06, "loss": 0.5526, "step": 1119 }, { "epoch": 0.24922118380062305, "grad_norm": 1.0236476327835133, "learning_rate": 8.794471994902277e-06, "loss": 0.5507, "step": 1120 }, { "epoch": 0.24944370271473076, "grad_norm": 1.14089134520776, "learning_rate": 8.7921243152064e-06, "loss": 0.5302, "step": 1121 }, { "epoch": 0.24966622162883845, "grad_norm": 1.1911611904656128, "learning_rate": 8.789774665772109e-06, "loss": 0.546, "step": 1122 }, { "epoch": 0.24988874054294616, "grad_norm": 1.087956137140756, "learning_rate": 8.787423047819878e-06, "loss": 0.5286, "step": 1123 }, { "epoch": 0.25011125945705387, "grad_norm": 1.1816066972298143, "learning_rate": 8.785069462571208e-06, "loss": 0.5425, "step": 1124 }, { "epoch": 0.25033377837116155, "grad_norm": 1.1502196552533304, "learning_rate": 8.782713911248616e-06, "loss": 0.538, "step": 1125 }, { "epoch": 0.25055629728526924, "grad_norm": 1.128219018573067, "learning_rate": 8.780356395075644e-06, "loss": 0.5486, "step": 1126 }, { "epoch": 0.2507788161993769, "grad_norm": 1.2359501038358873, "learning_rate": 8.777996915276854e-06, "loss": 0.5555, "step": 1127 }, { "epoch": 0.25100133511348466, "grad_norm": 1.1539489082363177, "learning_rate": 8.775635473077828e-06, "loss": 0.5656, "step": 1128 }, { "epoch": 0.25122385402759234, "grad_norm": 1.1418610700509322, "learning_rate": 8.773272069705165e-06, "loss": 0.5365, "step": 1129 }, { "epoch": 0.2514463729417, "grad_norm": 1.1131082190146457, "learning_rate": 8.770906706386488e-06, "loss": 0.5413, "step": 1130 }, { "epoch": 0.25166889185580776, "grad_norm": 1.1636042157597712, "learning_rate": 8.768539384350432e-06, "loss": 0.5353, "step": 1131 }, { "epoch": 0.25189141076991545, "grad_norm": 1.2032283986022418, "learning_rate": 8.766170104826655e-06, "loss": 0.5423, "step": 1132 }, { "epoch": 0.25211392968402313, "grad_norm": 1.1311293551473642, "learning_rate": 8.763798869045823e-06, "loss": 0.5371, "step": 1133 }, { "epoch": 0.2523364485981308, "grad_norm": 1.1395836572679507, "learning_rate": 8.76142567823963e-06, "loss": 0.5351, "step": 1134 }, { "epoch": 0.25255896751223855, "grad_norm": 1.1767518991483314, "learning_rate": 8.759050533640778e-06, "loss": 0.5622, "step": 1135 }, { "epoch": 0.25278148642634624, "grad_norm": 1.0508041179152565, "learning_rate": 8.756673436482984e-06, "loss": 0.5422, "step": 1136 }, { "epoch": 0.2530040053404539, "grad_norm": 1.1501353711169693, "learning_rate": 8.754294388000984e-06, "loss": 0.5269, "step": 1137 }, { "epoch": 0.25322652425456166, "grad_norm": 1.1132463171996434, "learning_rate": 8.751913389430518e-06, "loss": 0.5292, "step": 1138 }, { "epoch": 0.25344904316866934, "grad_norm": 1.1863681291597168, "learning_rate": 8.749530442008352e-06, "loss": 0.544, "step": 1139 }, { "epoch": 0.253671562082777, "grad_norm": 1.121019811923796, "learning_rate": 8.747145546972252e-06, "loss": 0.5401, "step": 1140 }, { "epoch": 0.2538940809968847, "grad_norm": 1.0837897399588459, "learning_rate": 8.744758705561004e-06, "loss": 0.5511, "step": 1141 }, { "epoch": 0.25411659991099245, "grad_norm": 1.2145952704884848, "learning_rate": 8.742369919014401e-06, "loss": 0.5278, "step": 1142 }, { "epoch": 0.25433911882510013, "grad_norm": 1.1626976842977106, "learning_rate": 8.73997918857325e-06, "loss": 0.5312, "step": 1143 }, { "epoch": 0.2545616377392078, "grad_norm": 1.227036554861998, "learning_rate": 8.73758651547936e-06, "loss": 0.5421, "step": 1144 }, { "epoch": 0.25478415665331555, "grad_norm": 1.311761451174797, "learning_rate": 8.735191900975559e-06, "loss": 0.561, "step": 1145 }, { "epoch": 0.25500667556742324, "grad_norm": 1.3570427367940932, "learning_rate": 8.732795346305675e-06, "loss": 0.5443, "step": 1146 }, { "epoch": 0.2552291944815309, "grad_norm": 1.2090230719673531, "learning_rate": 8.730396852714552e-06, "loss": 0.5638, "step": 1147 }, { "epoch": 0.2554517133956386, "grad_norm": 1.1204778618640856, "learning_rate": 8.727996421448034e-06, "loss": 0.5457, "step": 1148 }, { "epoch": 0.25567423230974634, "grad_norm": 1.328012105112474, "learning_rate": 8.72559405375297e-06, "loss": 0.5588, "step": 1149 }, { "epoch": 0.255896751223854, "grad_norm": 1.3858126975027782, "learning_rate": 8.723189750877226e-06, "loss": 0.5606, "step": 1150 }, { "epoch": 0.2561192701379617, "grad_norm": 1.2107857377190518, "learning_rate": 8.72078351406966e-06, "loss": 0.5397, "step": 1151 }, { "epoch": 0.25634178905206945, "grad_norm": 1.2497971870016609, "learning_rate": 8.718375344580146e-06, "loss": 0.5418, "step": 1152 }, { "epoch": 0.25656430796617713, "grad_norm": 1.1548793630402938, "learning_rate": 8.715965243659553e-06, "loss": 0.5398, "step": 1153 }, { "epoch": 0.2567868268802848, "grad_norm": 1.135895589718665, "learning_rate": 8.713553212559756e-06, "loss": 0.5306, "step": 1154 }, { "epoch": 0.2570093457943925, "grad_norm": 1.2514575888745552, "learning_rate": 8.711139252533636e-06, "loss": 0.5392, "step": 1155 }, { "epoch": 0.25723186470850024, "grad_norm": 1.3144803688313635, "learning_rate": 8.708723364835073e-06, "loss": 0.5221, "step": 1156 }, { "epoch": 0.2574543836226079, "grad_norm": 1.0980561765943266, "learning_rate": 8.706305550718945e-06, "loss": 0.5341, "step": 1157 }, { "epoch": 0.2576769025367156, "grad_norm": 1.1301567189700157, "learning_rate": 8.703885811441138e-06, "loss": 0.5456, "step": 1158 }, { "epoch": 0.25789942145082334, "grad_norm": 1.280561741100562, "learning_rate": 8.701464148258534e-06, "loss": 0.5493, "step": 1159 }, { "epoch": 0.258121940364931, "grad_norm": 1.0784288237349895, "learning_rate": 8.699040562429013e-06, "loss": 0.548, "step": 1160 }, { "epoch": 0.2583444592790387, "grad_norm": 1.1343645328228873, "learning_rate": 8.696615055211454e-06, "loss": 0.5392, "step": 1161 }, { "epoch": 0.2585669781931464, "grad_norm": 1.1719383276071969, "learning_rate": 8.694187627865737e-06, "loss": 0.5337, "step": 1162 }, { "epoch": 0.25878949710725413, "grad_norm": 1.1481041896561888, "learning_rate": 8.69175828165274e-06, "loss": 0.5446, "step": 1163 }, { "epoch": 0.2590120160213618, "grad_norm": 1.2137483265742781, "learning_rate": 8.68932701783433e-06, "loss": 0.5519, "step": 1164 }, { "epoch": 0.2592345349354695, "grad_norm": 1.0750132286003533, "learning_rate": 8.68689383767338e-06, "loss": 0.5276, "step": 1165 }, { "epoch": 0.25945705384957723, "grad_norm": 1.0811626107975645, "learning_rate": 8.68445874243375e-06, "loss": 0.5388, "step": 1166 }, { "epoch": 0.2596795727636849, "grad_norm": 1.1019874792031272, "learning_rate": 8.682021733380301e-06, "loss": 0.5384, "step": 1167 }, { "epoch": 0.2599020916777926, "grad_norm": 1.188618240966417, "learning_rate": 8.679582811778885e-06, "loss": 0.5566, "step": 1168 }, { "epoch": 0.2601246105919003, "grad_norm": 1.1360579423212145, "learning_rate": 8.677141978896347e-06, "loss": 0.5357, "step": 1169 }, { "epoch": 0.260347129506008, "grad_norm": 1.1701037872137916, "learning_rate": 8.674699236000527e-06, "loss": 0.5425, "step": 1170 }, { "epoch": 0.2605696484201157, "grad_norm": 1.1826824022559355, "learning_rate": 8.672254584360255e-06, "loss": 0.5186, "step": 1171 }, { "epoch": 0.2607921673342234, "grad_norm": 1.0778492846383791, "learning_rate": 8.669808025245356e-06, "loss": 0.5496, "step": 1172 }, { "epoch": 0.26101468624833113, "grad_norm": 1.1206354182137208, "learning_rate": 8.66735955992664e-06, "loss": 0.5446, "step": 1173 }, { "epoch": 0.2612372051624388, "grad_norm": 1.2091033130922753, "learning_rate": 8.66490918967591e-06, "loss": 0.5359, "step": 1174 }, { "epoch": 0.2614597240765465, "grad_norm": 1.124987885109825, "learning_rate": 8.66245691576596e-06, "loss": 0.5265, "step": 1175 }, { "epoch": 0.2616822429906542, "grad_norm": 1.0785485906624268, "learning_rate": 8.660002739470573e-06, "loss": 0.5292, "step": 1176 }, { "epoch": 0.2619047619047619, "grad_norm": 1.362911805928175, "learning_rate": 8.657546662064518e-06, "loss": 0.54, "step": 1177 }, { "epoch": 0.2621272808188696, "grad_norm": 1.1529798704966472, "learning_rate": 8.65508868482355e-06, "loss": 0.5421, "step": 1178 }, { "epoch": 0.2623497997329773, "grad_norm": 1.0733865734865118, "learning_rate": 8.652628809024415e-06, "loss": 0.5351, "step": 1179 }, { "epoch": 0.262572318647085, "grad_norm": 1.052170197248074, "learning_rate": 8.650167035944843e-06, "loss": 0.5276, "step": 1180 }, { "epoch": 0.2627948375611927, "grad_norm": 1.1149872374932095, "learning_rate": 8.64770336686355e-06, "loss": 0.5343, "step": 1181 }, { "epoch": 0.2630173564753004, "grad_norm": 1.2212194370772136, "learning_rate": 8.645237803060236e-06, "loss": 0.5263, "step": 1182 }, { "epoch": 0.2632398753894081, "grad_norm": 1.0798258179932094, "learning_rate": 8.642770345815586e-06, "loss": 0.5217, "step": 1183 }, { "epoch": 0.2634623943035158, "grad_norm": 1.1461414578558842, "learning_rate": 8.640300996411269e-06, "loss": 0.5351, "step": 1184 }, { "epoch": 0.2636849132176235, "grad_norm": 1.055463759569512, "learning_rate": 8.637829756129934e-06, "loss": 0.5394, "step": 1185 }, { "epoch": 0.2639074321317312, "grad_norm": 1.1064963900970444, "learning_rate": 8.635356626255216e-06, "loss": 0.547, "step": 1186 }, { "epoch": 0.2641299510458389, "grad_norm": 1.1099578519492, "learning_rate": 8.632881608071729e-06, "loss": 0.5521, "step": 1187 }, { "epoch": 0.2643524699599466, "grad_norm": 1.149095613848265, "learning_rate": 8.630404702865069e-06, "loss": 0.5214, "step": 1188 }, { "epoch": 0.2645749888740543, "grad_norm": 1.117918760550004, "learning_rate": 8.627925911921811e-06, "loss": 0.5466, "step": 1189 }, { "epoch": 0.26479750778816197, "grad_norm": 1.097300955204372, "learning_rate": 8.625445236529512e-06, "loss": 0.536, "step": 1190 }, { "epoch": 0.2650200267022697, "grad_norm": 1.2315010230967276, "learning_rate": 8.622962677976706e-06, "loss": 0.5432, "step": 1191 }, { "epoch": 0.2652425456163774, "grad_norm": 1.0857085194965768, "learning_rate": 8.620478237552902e-06, "loss": 0.5363, "step": 1192 }, { "epoch": 0.2654650645304851, "grad_norm": 1.157965289638086, "learning_rate": 8.617991916548596e-06, "loss": 0.5455, "step": 1193 }, { "epoch": 0.2656875834445928, "grad_norm": 1.0817696817309124, "learning_rate": 8.615503716255249e-06, "loss": 0.5319, "step": 1194 }, { "epoch": 0.2659101023587005, "grad_norm": 1.1176027195064202, "learning_rate": 8.613013637965305e-06, "loss": 0.5351, "step": 1195 }, { "epoch": 0.2661326212728082, "grad_norm": 1.0534001485142053, "learning_rate": 8.610521682972182e-06, "loss": 0.5505, "step": 1196 }, { "epoch": 0.26635514018691586, "grad_norm": 1.0653919957593805, "learning_rate": 8.608027852570276e-06, "loss": 0.5259, "step": 1197 }, { "epoch": 0.2665776591010236, "grad_norm": 1.1660478115982855, "learning_rate": 8.60553214805495e-06, "loss": 0.5381, "step": 1198 }, { "epoch": 0.2668001780151313, "grad_norm": 1.180879352047131, "learning_rate": 8.60303457072255e-06, "loss": 0.5556, "step": 1199 }, { "epoch": 0.26702269692923897, "grad_norm": 1.1048275009505628, "learning_rate": 8.600535121870385e-06, "loss": 0.5236, "step": 1200 }, { "epoch": 0.2672452158433467, "grad_norm": 1.2304159198641638, "learning_rate": 8.598033802796741e-06, "loss": 0.5452, "step": 1201 }, { "epoch": 0.2674677347574544, "grad_norm": 1.2231385895179718, "learning_rate": 8.595530614800877e-06, "loss": 0.5331, "step": 1202 }, { "epoch": 0.2676902536715621, "grad_norm": 1.1176213076963126, "learning_rate": 8.59302555918302e-06, "loss": 0.5403, "step": 1203 }, { "epoch": 0.26791277258566976, "grad_norm": 1.0559898587952565, "learning_rate": 8.590518637244366e-06, "loss": 0.5314, "step": 1204 }, { "epoch": 0.2681352914997775, "grad_norm": 1.125927265969317, "learning_rate": 8.588009850287086e-06, "loss": 0.5528, "step": 1205 }, { "epoch": 0.2683578104138852, "grad_norm": 1.0815866037797424, "learning_rate": 8.585499199614315e-06, "loss": 0.53, "step": 1206 }, { "epoch": 0.26858032932799286, "grad_norm": 1.1635062739395392, "learning_rate": 8.582986686530155e-06, "loss": 0.5406, "step": 1207 }, { "epoch": 0.2688028482421006, "grad_norm": 1.0840207374225888, "learning_rate": 8.580472312339681e-06, "loss": 0.5382, "step": 1208 }, { "epoch": 0.2690253671562083, "grad_norm": 1.1263812822591983, "learning_rate": 8.577956078348928e-06, "loss": 0.5367, "step": 1209 }, { "epoch": 0.26924788607031597, "grad_norm": 1.1449164720953233, "learning_rate": 8.5754379858649e-06, "loss": 0.5258, "step": 1210 }, { "epoch": 0.26947040498442365, "grad_norm": 1.1781084776565987, "learning_rate": 8.57291803619557e-06, "loss": 0.5368, "step": 1211 }, { "epoch": 0.2696929238985314, "grad_norm": 1.204724156250435, "learning_rate": 8.57039623064987e-06, "loss": 0.5473, "step": 1212 }, { "epoch": 0.2699154428126391, "grad_norm": 1.0731819318165852, "learning_rate": 8.567872570537696e-06, "loss": 0.5316, "step": 1213 }, { "epoch": 0.27013796172674676, "grad_norm": 1.1598337963290362, "learning_rate": 8.565347057169917e-06, "loss": 0.5462, "step": 1214 }, { "epoch": 0.2703604806408545, "grad_norm": 1.1453670240428864, "learning_rate": 8.56281969185835e-06, "loss": 0.5378, "step": 1215 }, { "epoch": 0.2705829995549622, "grad_norm": 1.1972846471502476, "learning_rate": 8.560290475915784e-06, "loss": 0.5479, "step": 1216 }, { "epoch": 0.27080551846906986, "grad_norm": 1.112369740749558, "learning_rate": 8.557759410655966e-06, "loss": 0.5434, "step": 1217 }, { "epoch": 0.27102803738317754, "grad_norm": 1.1156048956178743, "learning_rate": 8.555226497393607e-06, "loss": 0.5277, "step": 1218 }, { "epoch": 0.2712505562972853, "grad_norm": 1.134272123305125, "learning_rate": 8.552691737444369e-06, "loss": 0.5185, "step": 1219 }, { "epoch": 0.27147307521139297, "grad_norm": 1.0653672804924197, "learning_rate": 8.550155132124885e-06, "loss": 0.5265, "step": 1220 }, { "epoch": 0.27169559412550065, "grad_norm": 1.2019423091411041, "learning_rate": 8.547616682752738e-06, "loss": 0.5471, "step": 1221 }, { "epoch": 0.2719181130396084, "grad_norm": 1.1840055259731719, "learning_rate": 8.545076390646472e-06, "loss": 0.5403, "step": 1222 }, { "epoch": 0.2721406319537161, "grad_norm": 1.1642168399866495, "learning_rate": 8.542534257125587e-06, "loss": 0.5282, "step": 1223 }, { "epoch": 0.27236315086782376, "grad_norm": 1.1124622624780314, "learning_rate": 8.539990283510542e-06, "loss": 0.5491, "step": 1224 }, { "epoch": 0.27258566978193144, "grad_norm": 1.1755567204563202, "learning_rate": 8.537444471122748e-06, "loss": 0.5468, "step": 1225 }, { "epoch": 0.2728081886960392, "grad_norm": 1.1265417322509188, "learning_rate": 8.534896821284575e-06, "loss": 0.5439, "step": 1226 }, { "epoch": 0.27303070761014686, "grad_norm": 1.123714164991035, "learning_rate": 8.532347335319344e-06, "loss": 0.5353, "step": 1227 }, { "epoch": 0.27325322652425454, "grad_norm": 1.2350500170550114, "learning_rate": 8.529796014551333e-06, "loss": 0.5407, "step": 1228 }, { "epoch": 0.2734757454383623, "grad_norm": 1.0082600051571646, "learning_rate": 8.52724286030577e-06, "loss": 0.5188, "step": 1229 }, { "epoch": 0.27369826435246997, "grad_norm": 1.4159044912812553, "learning_rate": 8.524687873908838e-06, "loss": 0.5445, "step": 1230 }, { "epoch": 0.27392078326657765, "grad_norm": 1.2462623241155968, "learning_rate": 8.522131056687669e-06, "loss": 0.5423, "step": 1231 }, { "epoch": 0.27414330218068533, "grad_norm": 1.248819883539982, "learning_rate": 8.519572409970347e-06, "loss": 0.5384, "step": 1232 }, { "epoch": 0.27436582109479307, "grad_norm": 1.21795842383383, "learning_rate": 8.51701193508591e-06, "loss": 0.5322, "step": 1233 }, { "epoch": 0.27458834000890076, "grad_norm": 1.2024472685672976, "learning_rate": 8.51444963336434e-06, "loss": 0.5379, "step": 1234 }, { "epoch": 0.27481085892300844, "grad_norm": 1.2031336620609205, "learning_rate": 8.511885506136568e-06, "loss": 0.5399, "step": 1235 }, { "epoch": 0.2750333778371162, "grad_norm": 1.0737676296933674, "learning_rate": 8.509319554734478e-06, "loss": 0.5381, "step": 1236 }, { "epoch": 0.27525589675122386, "grad_norm": 1.0817201480618261, "learning_rate": 8.5067517804909e-06, "loss": 0.5403, "step": 1237 }, { "epoch": 0.27547841566533154, "grad_norm": 1.1634773414922908, "learning_rate": 8.504182184739608e-06, "loss": 0.5389, "step": 1238 }, { "epoch": 0.2757009345794392, "grad_norm": 1.1369142029147967, "learning_rate": 8.501610768815322e-06, "loss": 0.5564, "step": 1239 }, { "epoch": 0.27592345349354697, "grad_norm": 1.126177287759494, "learning_rate": 8.499037534053713e-06, "loss": 0.5516, "step": 1240 }, { "epoch": 0.27614597240765465, "grad_norm": 1.1616611844307716, "learning_rate": 8.496462481791394e-06, "loss": 0.5596, "step": 1241 }, { "epoch": 0.27636849132176233, "grad_norm": 1.0956699064865225, "learning_rate": 8.493885613365916e-06, "loss": 0.5338, "step": 1242 }, { "epoch": 0.27659101023587007, "grad_norm": 1.118002499339704, "learning_rate": 8.491306930115784e-06, "loss": 0.5319, "step": 1243 }, { "epoch": 0.27681352914997776, "grad_norm": 1.0980523128712836, "learning_rate": 8.488726433380435e-06, "loss": 0.5411, "step": 1244 }, { "epoch": 0.27703604806408544, "grad_norm": 1.1867262193208614, "learning_rate": 8.486144124500257e-06, "loss": 0.5419, "step": 1245 }, { "epoch": 0.2772585669781931, "grad_norm": 1.1544542276228447, "learning_rate": 8.483560004816575e-06, "loss": 0.5319, "step": 1246 }, { "epoch": 0.27748108589230086, "grad_norm": 1.1645933826844492, "learning_rate": 8.480974075671655e-06, "loss": 0.5522, "step": 1247 }, { "epoch": 0.27770360480640854, "grad_norm": 1.1169377828810618, "learning_rate": 8.4783863384087e-06, "loss": 0.5595, "step": 1248 }, { "epoch": 0.2779261237205162, "grad_norm": 1.144894263001444, "learning_rate": 8.47579679437186e-06, "loss": 0.5343, "step": 1249 }, { "epoch": 0.27814864263462397, "grad_norm": 1.1091162847171228, "learning_rate": 8.473205444906216e-06, "loss": 0.5515, "step": 1250 }, { "epoch": 0.27837116154873165, "grad_norm": 1.1668465569812192, "learning_rate": 8.47061229135779e-06, "loss": 0.5298, "step": 1251 }, { "epoch": 0.27859368046283933, "grad_norm": 1.0630054945025122, "learning_rate": 8.468017335073538e-06, "loss": 0.528, "step": 1252 }, { "epoch": 0.278816199376947, "grad_norm": 1.1554360856371937, "learning_rate": 8.465420577401359e-06, "loss": 0.5356, "step": 1253 }, { "epoch": 0.27903871829105475, "grad_norm": 1.0771734086294231, "learning_rate": 8.46282201969008e-06, "loss": 0.5425, "step": 1254 }, { "epoch": 0.27926123720516244, "grad_norm": 1.044672026929439, "learning_rate": 8.46022166328947e-06, "loss": 0.5353, "step": 1255 }, { "epoch": 0.2794837561192701, "grad_norm": 1.0997178504391145, "learning_rate": 8.457619509550227e-06, "loss": 0.5221, "step": 1256 }, { "epoch": 0.27970627503337786, "grad_norm": 1.110741412219345, "learning_rate": 8.455015559823984e-06, "loss": 0.531, "step": 1257 }, { "epoch": 0.27992879394748554, "grad_norm": 0.9872285441576187, "learning_rate": 8.452409815463308e-06, "loss": 0.5344, "step": 1258 }, { "epoch": 0.2801513128615932, "grad_norm": 1.0499130918981163, "learning_rate": 8.4498022778217e-06, "loss": 0.5393, "step": 1259 }, { "epoch": 0.2803738317757009, "grad_norm": 1.067484380848227, "learning_rate": 8.447192948253583e-06, "loss": 0.538, "step": 1260 }, { "epoch": 0.28059635068980865, "grad_norm": 1.105497517458467, "learning_rate": 8.444581828114326e-06, "loss": 0.5496, "step": 1261 }, { "epoch": 0.28081886960391633, "grad_norm": 1.1662480540016864, "learning_rate": 8.441968918760215e-06, "loss": 0.5339, "step": 1262 }, { "epoch": 0.281041388518024, "grad_norm": 1.1728637379415547, "learning_rate": 8.439354221548472e-06, "loss": 0.5276, "step": 1263 }, { "epoch": 0.28126390743213175, "grad_norm": 1.2605852368154953, "learning_rate": 8.436737737837246e-06, "loss": 0.5365, "step": 1264 }, { "epoch": 0.28148642634623944, "grad_norm": 1.1004348723769972, "learning_rate": 8.434119468985614e-06, "loss": 0.521, "step": 1265 }, { "epoch": 0.2817089452603471, "grad_norm": 1.1424969642146758, "learning_rate": 8.431499416353576e-06, "loss": 0.5325, "step": 1266 }, { "epoch": 0.2819314641744548, "grad_norm": 1.0586164963025524, "learning_rate": 8.428877581302071e-06, "loss": 0.5408, "step": 1267 }, { "epoch": 0.28215398308856254, "grad_norm": 1.0176660538370188, "learning_rate": 8.426253965192948e-06, "loss": 0.5145, "step": 1268 }, { "epoch": 0.2823765020026702, "grad_norm": 1.2245641691295635, "learning_rate": 8.423628569388991e-06, "loss": 0.5282, "step": 1269 }, { "epoch": 0.2825990209167779, "grad_norm": 1.166514545347371, "learning_rate": 8.421001395253907e-06, "loss": 0.5554, "step": 1270 }, { "epoch": 0.28282153983088565, "grad_norm": 1.119734815672749, "learning_rate": 8.418372444152325e-06, "loss": 0.5514, "step": 1271 }, { "epoch": 0.28304405874499333, "grad_norm": 1.1025470834588689, "learning_rate": 8.415741717449798e-06, "loss": 0.5284, "step": 1272 }, { "epoch": 0.283266577659101, "grad_norm": 1.2738120396201387, "learning_rate": 8.413109216512801e-06, "loss": 0.5278, "step": 1273 }, { "epoch": 0.2834890965732087, "grad_norm": 1.238671745544393, "learning_rate": 8.410474942708733e-06, "loss": 0.5417, "step": 1274 }, { "epoch": 0.28371161548731644, "grad_norm": 1.057128504519073, "learning_rate": 8.407838897405907e-06, "loss": 0.5331, "step": 1275 }, { "epoch": 0.2839341344014241, "grad_norm": 1.0939051265089474, "learning_rate": 8.405201081973563e-06, "loss": 0.5455, "step": 1276 }, { "epoch": 0.2841566533155318, "grad_norm": 1.1986599694449729, "learning_rate": 8.40256149778186e-06, "loss": 0.5407, "step": 1277 }, { "epoch": 0.28437917222963954, "grad_norm": 1.0976863747243095, "learning_rate": 8.399920146201872e-06, "loss": 0.5248, "step": 1278 }, { "epoch": 0.2846016911437472, "grad_norm": 1.116828063778291, "learning_rate": 8.39727702860559e-06, "loss": 0.5254, "step": 1279 }, { "epoch": 0.2848242100578549, "grad_norm": 1.209112973668726, "learning_rate": 8.394632146365931e-06, "loss": 0.5305, "step": 1280 }, { "epoch": 0.2850467289719626, "grad_norm": 1.1880896917564423, "learning_rate": 8.39198550085672e-06, "loss": 0.5301, "step": 1281 }, { "epoch": 0.28526924788607033, "grad_norm": 1.146881171453976, "learning_rate": 8.389337093452704e-06, "loss": 0.5209, "step": 1282 }, { "epoch": 0.285491766800178, "grad_norm": 1.1132472056233553, "learning_rate": 8.386686925529534e-06, "loss": 0.5356, "step": 1283 }, { "epoch": 0.2857142857142857, "grad_norm": 1.266921554857887, "learning_rate": 8.384034998463793e-06, "loss": 0.5297, "step": 1284 }, { "epoch": 0.28593680462839344, "grad_norm": 1.2094379249462208, "learning_rate": 8.381381313632962e-06, "loss": 0.5354, "step": 1285 }, { "epoch": 0.2861593235425011, "grad_norm": 1.1550975027035801, "learning_rate": 8.378725872415441e-06, "loss": 0.5397, "step": 1286 }, { "epoch": 0.2863818424566088, "grad_norm": 1.2273799230918068, "learning_rate": 8.376068676190547e-06, "loss": 0.5328, "step": 1287 }, { "epoch": 0.2866043613707165, "grad_norm": 1.1614667508245575, "learning_rate": 8.373409726338499e-06, "loss": 0.5406, "step": 1288 }, { "epoch": 0.2868268802848242, "grad_norm": 1.121966108188506, "learning_rate": 8.370749024240436e-06, "loss": 0.538, "step": 1289 }, { "epoch": 0.2870493991989319, "grad_norm": 1.2016757070848232, "learning_rate": 8.368086571278404e-06, "loss": 0.5559, "step": 1290 }, { "epoch": 0.2872719181130396, "grad_norm": 1.0854705484182066, "learning_rate": 8.365422368835352e-06, "loss": 0.5334, "step": 1291 }, { "epoch": 0.28749443702714733, "grad_norm": 1.1146726710678228, "learning_rate": 8.362756418295148e-06, "loss": 0.5397, "step": 1292 }, { "epoch": 0.287716955941255, "grad_norm": 1.1258072788876277, "learning_rate": 8.360088721042563e-06, "loss": 0.5456, "step": 1293 }, { "epoch": 0.2879394748553627, "grad_norm": 1.1430177426145538, "learning_rate": 8.357419278463275e-06, "loss": 0.5359, "step": 1294 }, { "epoch": 0.2881619937694704, "grad_norm": 1.0686385449831801, "learning_rate": 8.354748091943867e-06, "loss": 0.5216, "step": 1295 }, { "epoch": 0.2883845126835781, "grad_norm": 1.149800509383997, "learning_rate": 8.352075162871833e-06, "loss": 0.5326, "step": 1296 }, { "epoch": 0.2886070315976858, "grad_norm": 1.2383816886354717, "learning_rate": 8.349400492635568e-06, "loss": 0.5553, "step": 1297 }, { "epoch": 0.2888295505117935, "grad_norm": 1.1036985247868405, "learning_rate": 8.346724082624374e-06, "loss": 0.5343, "step": 1298 }, { "epoch": 0.2890520694259012, "grad_norm": 1.1316607271063712, "learning_rate": 8.344045934228455e-06, "loss": 0.5462, "step": 1299 }, { "epoch": 0.2892745883400089, "grad_norm": 1.194485778435747, "learning_rate": 8.341366048838917e-06, "loss": 0.5261, "step": 1300 }, { "epoch": 0.2894971072541166, "grad_norm": 1.2642244911532945, "learning_rate": 8.338684427847768e-06, "loss": 0.5592, "step": 1301 }, { "epoch": 0.2897196261682243, "grad_norm": 1.123518682086265, "learning_rate": 8.336001072647924e-06, "loss": 0.5428, "step": 1302 }, { "epoch": 0.289942145082332, "grad_norm": 1.1151874475687584, "learning_rate": 8.333315984633192e-06, "loss": 0.5413, "step": 1303 }, { "epoch": 0.2901646639964397, "grad_norm": 1.2221670669208766, "learning_rate": 8.330629165198286e-06, "loss": 0.5476, "step": 1304 }, { "epoch": 0.2903871829105474, "grad_norm": 1.1030915124393563, "learning_rate": 8.327940615738819e-06, "loss": 0.5341, "step": 1305 }, { "epoch": 0.2906097018246551, "grad_norm": 1.1920766901476891, "learning_rate": 8.325250337651297e-06, "loss": 0.528, "step": 1306 }, { "epoch": 0.2908322207387628, "grad_norm": 1.0896601297591169, "learning_rate": 8.322558332333132e-06, "loss": 0.5354, "step": 1307 }, { "epoch": 0.2910547396528705, "grad_norm": 1.0367378440275279, "learning_rate": 8.319864601182625e-06, "loss": 0.5225, "step": 1308 }, { "epoch": 0.29127725856697817, "grad_norm": 1.1170157600228903, "learning_rate": 8.31716914559898e-06, "loss": 0.5361, "step": 1309 }, { "epoch": 0.2914997774810859, "grad_norm": 1.1555635024992708, "learning_rate": 8.314471966982293e-06, "loss": 0.5237, "step": 1310 }, { "epoch": 0.2917222963951936, "grad_norm": 1.1306201013336, "learning_rate": 8.311773066733559e-06, "loss": 0.5494, "step": 1311 }, { "epoch": 0.2919448153093013, "grad_norm": 1.0780896190018123, "learning_rate": 8.30907244625466e-06, "loss": 0.5233, "step": 1312 }, { "epoch": 0.292167334223409, "grad_norm": 1.2216486548793488, "learning_rate": 8.306370106948377e-06, "loss": 0.5461, "step": 1313 }, { "epoch": 0.2923898531375167, "grad_norm": 1.1336284741186726, "learning_rate": 8.303666050218385e-06, "loss": 0.5301, "step": 1314 }, { "epoch": 0.2926123720516244, "grad_norm": 1.1852801564939774, "learning_rate": 8.300960277469248e-06, "loss": 0.5524, "step": 1315 }, { "epoch": 0.29283489096573206, "grad_norm": 1.1131497343051868, "learning_rate": 8.298252790106421e-06, "loss": 0.5393, "step": 1316 }, { "epoch": 0.2930574098798398, "grad_norm": 1.0967536267351696, "learning_rate": 8.29554358953625e-06, "loss": 0.5352, "step": 1317 }, { "epoch": 0.2932799287939475, "grad_norm": 1.0545244305021213, "learning_rate": 8.292832677165976e-06, "loss": 0.5345, "step": 1318 }, { "epoch": 0.29350244770805517, "grad_norm": 1.1514640169323374, "learning_rate": 8.29012005440372e-06, "loss": 0.564, "step": 1319 }, { "epoch": 0.2937249666221629, "grad_norm": 1.1440591778100941, "learning_rate": 8.287405722658499e-06, "loss": 0.5355, "step": 1320 }, { "epoch": 0.2939474855362706, "grad_norm": 1.0487821057926658, "learning_rate": 8.284689683340215e-06, "loss": 0.5337, "step": 1321 }, { "epoch": 0.2941700044503783, "grad_norm": 1.1017761244205997, "learning_rate": 8.281971937859654e-06, "loss": 0.5364, "step": 1322 }, { "epoch": 0.29439252336448596, "grad_norm": 1.1575835041622675, "learning_rate": 8.279252487628495e-06, "loss": 0.5323, "step": 1323 }, { "epoch": 0.2946150422785937, "grad_norm": 1.0919241689447046, "learning_rate": 8.276531334059296e-06, "loss": 0.5304, "step": 1324 }, { "epoch": 0.2948375611927014, "grad_norm": 1.0980505566270589, "learning_rate": 8.273808478565503e-06, "loss": 0.5226, "step": 1325 }, { "epoch": 0.29506008010680906, "grad_norm": 1.0361399786443146, "learning_rate": 8.271083922561447e-06, "loss": 0.5298, "step": 1326 }, { "epoch": 0.2952825990209168, "grad_norm": 1.295705156018323, "learning_rate": 8.26835766746234e-06, "loss": 0.5394, "step": 1327 }, { "epoch": 0.2955051179350245, "grad_norm": 1.2460797513074355, "learning_rate": 8.265629714684273e-06, "loss": 0.5443, "step": 1328 }, { "epoch": 0.29572763684913217, "grad_norm": 1.1127508036191245, "learning_rate": 8.26290006564423e-06, "loss": 0.542, "step": 1329 }, { "epoch": 0.29595015576323985, "grad_norm": 1.028833852662332, "learning_rate": 8.260168721760066e-06, "loss": 0.5171, "step": 1330 }, { "epoch": 0.2961726746773476, "grad_norm": 1.0993049785382334, "learning_rate": 8.25743568445052e-06, "loss": 0.5366, "step": 1331 }, { "epoch": 0.2963951935914553, "grad_norm": 1.1299500831558955, "learning_rate": 8.25470095513521e-06, "loss": 0.5327, "step": 1332 }, { "epoch": 0.29661771250556296, "grad_norm": 1.190105328721219, "learning_rate": 8.25196453523463e-06, "loss": 0.5312, "step": 1333 }, { "epoch": 0.2968402314196707, "grad_norm": 1.0936779233490863, "learning_rate": 8.249226426170162e-06, "loss": 0.5349, "step": 1334 }, { "epoch": 0.2970627503337784, "grad_norm": 1.1842205858625776, "learning_rate": 8.246486629364052e-06, "loss": 0.5214, "step": 1335 }, { "epoch": 0.29728526924788606, "grad_norm": 1.102478513625012, "learning_rate": 8.243745146239434e-06, "loss": 0.5148, "step": 1336 }, { "epoch": 0.29750778816199375, "grad_norm": 1.1492200732627722, "learning_rate": 8.24100197822031e-06, "loss": 0.5411, "step": 1337 }, { "epoch": 0.2977303070761015, "grad_norm": 1.072617765875341, "learning_rate": 8.238257126731561e-06, "loss": 0.5188, "step": 1338 }, { "epoch": 0.29795282599020917, "grad_norm": 1.1774249330762585, "learning_rate": 8.235510593198943e-06, "loss": 0.5469, "step": 1339 }, { "epoch": 0.29817534490431685, "grad_norm": 1.0509182765924678, "learning_rate": 8.232762379049082e-06, "loss": 0.5363, "step": 1340 }, { "epoch": 0.2983978638184246, "grad_norm": 1.0852146018249795, "learning_rate": 8.230012485709484e-06, "loss": 0.5243, "step": 1341 }, { "epoch": 0.2986203827325323, "grad_norm": 1.1812052947635479, "learning_rate": 8.227260914608516e-06, "loss": 0.5363, "step": 1342 }, { "epoch": 0.29884290164663996, "grad_norm": 1.174088879055678, "learning_rate": 8.22450766717543e-06, "loss": 0.525, "step": 1343 }, { "epoch": 0.29906542056074764, "grad_norm": 1.0912119541433463, "learning_rate": 8.221752744840338e-06, "loss": 0.524, "step": 1344 }, { "epoch": 0.2992879394748554, "grad_norm": 1.1587700192033814, "learning_rate": 8.218996149034228e-06, "loss": 0.5375, "step": 1345 }, { "epoch": 0.29951045838896306, "grad_norm": 1.1223957459783793, "learning_rate": 8.216237881188952e-06, "loss": 0.5302, "step": 1346 }, { "epoch": 0.29973297730307075, "grad_norm": 1.1337327175757648, "learning_rate": 8.213477942737237e-06, "loss": 0.551, "step": 1347 }, { "epoch": 0.2999554962171785, "grad_norm": 1.1956787781982465, "learning_rate": 8.210716335112671e-06, "loss": 0.5431, "step": 1348 }, { "epoch": 0.30017801513128617, "grad_norm": 1.1272547689470236, "learning_rate": 8.207953059749717e-06, "loss": 0.5158, "step": 1349 }, { "epoch": 0.30040053404539385, "grad_norm": 1.1533577504133583, "learning_rate": 8.205188118083695e-06, "loss": 0.5229, "step": 1350 }, { "epoch": 0.30062305295950154, "grad_norm": 1.027145270439382, "learning_rate": 8.202421511550799e-06, "loss": 0.5259, "step": 1351 }, { "epoch": 0.3008455718736093, "grad_norm": 1.0743069193621118, "learning_rate": 8.199653241588081e-06, "loss": 0.5307, "step": 1352 }, { "epoch": 0.30106809078771696, "grad_norm": 1.1824921889762432, "learning_rate": 8.196883309633461e-06, "loss": 0.5317, "step": 1353 }, { "epoch": 0.30129060970182464, "grad_norm": 1.0944561319482218, "learning_rate": 8.194111717125722e-06, "loss": 0.5414, "step": 1354 }, { "epoch": 0.3015131286159324, "grad_norm": 1.1638727013065957, "learning_rate": 8.191338465504508e-06, "loss": 0.5372, "step": 1355 }, { "epoch": 0.30173564753004006, "grad_norm": 1.143068921985202, "learning_rate": 8.188563556210328e-06, "loss": 0.5407, "step": 1356 }, { "epoch": 0.30195816644414775, "grad_norm": 1.1295154319151222, "learning_rate": 8.185786990684545e-06, "loss": 0.5208, "step": 1357 }, { "epoch": 0.30218068535825543, "grad_norm": 1.092230257476899, "learning_rate": 8.183008770369392e-06, "loss": 0.5406, "step": 1358 }, { "epoch": 0.30240320427236317, "grad_norm": 1.1479353636778447, "learning_rate": 8.180228896707952e-06, "loss": 0.5561, "step": 1359 }, { "epoch": 0.30262572318647085, "grad_norm": 1.180899119916089, "learning_rate": 8.177447371144175e-06, "loss": 0.5261, "step": 1360 }, { "epoch": 0.30284824210057854, "grad_norm": 1.0636299377466012, "learning_rate": 8.174664195122863e-06, "loss": 0.528, "step": 1361 }, { "epoch": 0.3030707610146863, "grad_norm": 1.1401427781684068, "learning_rate": 8.171879370089679e-06, "loss": 0.5189, "step": 1362 }, { "epoch": 0.30329327992879396, "grad_norm": 1.230650492820182, "learning_rate": 8.169092897491141e-06, "loss": 0.5487, "step": 1363 }, { "epoch": 0.30351579884290164, "grad_norm": 1.0798893794357984, "learning_rate": 8.166304778774624e-06, "loss": 0.5499, "step": 1364 }, { "epoch": 0.3037383177570093, "grad_norm": 1.0820670769210838, "learning_rate": 8.163515015388353e-06, "loss": 0.5308, "step": 1365 }, { "epoch": 0.30396083667111706, "grad_norm": 1.194012134313668, "learning_rate": 8.160723608781416e-06, "loss": 0.5431, "step": 1366 }, { "epoch": 0.30418335558522475, "grad_norm": 1.1526216305927954, "learning_rate": 8.157930560403746e-06, "loss": 0.536, "step": 1367 }, { "epoch": 0.30440587449933243, "grad_norm": 1.0976820211779796, "learning_rate": 8.155135871706136e-06, "loss": 0.5271, "step": 1368 }, { "epoch": 0.30462839341344017, "grad_norm": 1.2754613064700848, "learning_rate": 8.152339544140226e-06, "loss": 0.5307, "step": 1369 }, { "epoch": 0.30485091232754785, "grad_norm": 1.1624123050830262, "learning_rate": 8.149541579158511e-06, "loss": 0.5289, "step": 1370 }, { "epoch": 0.30507343124165553, "grad_norm": 1.175799091502753, "learning_rate": 8.14674197821433e-06, "loss": 0.522, "step": 1371 }, { "epoch": 0.3052959501557632, "grad_norm": 1.163855027357477, "learning_rate": 8.143940742761881e-06, "loss": 0.5218, "step": 1372 }, { "epoch": 0.30551846906987096, "grad_norm": 1.143264638009981, "learning_rate": 8.141137874256204e-06, "loss": 0.5293, "step": 1373 }, { "epoch": 0.30574098798397864, "grad_norm": 1.1182146530378014, "learning_rate": 8.138333374153187e-06, "loss": 0.5317, "step": 1374 }, { "epoch": 0.3059635068980863, "grad_norm": 1.1579116630525166, "learning_rate": 8.135527243909574e-06, "loss": 0.5154, "step": 1375 }, { "epoch": 0.30618602581219406, "grad_norm": 1.1639540070208791, "learning_rate": 8.132719484982945e-06, "loss": 0.5324, "step": 1376 }, { "epoch": 0.30640854472630175, "grad_norm": 1.2413342722546528, "learning_rate": 8.129910098831732e-06, "loss": 0.5481, "step": 1377 }, { "epoch": 0.30663106364040943, "grad_norm": 1.154898388573563, "learning_rate": 8.127099086915212e-06, "loss": 0.5359, "step": 1378 }, { "epoch": 0.3068535825545171, "grad_norm": 1.1245041902087103, "learning_rate": 8.124286450693503e-06, "loss": 0.5329, "step": 1379 }, { "epoch": 0.30707610146862485, "grad_norm": 1.124424485675566, "learning_rate": 8.121472191627572e-06, "loss": 0.5331, "step": 1380 }, { "epoch": 0.30729862038273253, "grad_norm": 1.0688157963242253, "learning_rate": 8.118656311179226e-06, "loss": 0.5431, "step": 1381 }, { "epoch": 0.3075211392968402, "grad_norm": 1.098987310102683, "learning_rate": 8.11583881081111e-06, "loss": 0.538, "step": 1382 }, { "epoch": 0.30774365821094796, "grad_norm": 1.1222109104429665, "learning_rate": 8.11301969198672e-06, "loss": 0.5355, "step": 1383 }, { "epoch": 0.30796617712505564, "grad_norm": 1.536675861386096, "learning_rate": 8.110198956170383e-06, "loss": 0.5235, "step": 1384 }, { "epoch": 0.3081886960391633, "grad_norm": 1.2059375282401759, "learning_rate": 8.107376604827275e-06, "loss": 0.5372, "step": 1385 }, { "epoch": 0.308411214953271, "grad_norm": 1.172501136683614, "learning_rate": 8.104552639423402e-06, "loss": 0.5087, "step": 1386 }, { "epoch": 0.30863373386737875, "grad_norm": 1.2264265239111058, "learning_rate": 8.101727061425614e-06, "loss": 0.5444, "step": 1387 }, { "epoch": 0.30885625278148643, "grad_norm": 1.1618520821702294, "learning_rate": 8.0988998723016e-06, "loss": 0.5478, "step": 1388 }, { "epoch": 0.3090787716955941, "grad_norm": 1.2200221788469916, "learning_rate": 8.09607107351988e-06, "loss": 0.5382, "step": 1389 }, { "epoch": 0.30930129060970185, "grad_norm": 1.1718662109903824, "learning_rate": 8.093240666549816e-06, "loss": 0.5428, "step": 1390 }, { "epoch": 0.30952380952380953, "grad_norm": 1.2261077462443442, "learning_rate": 8.090408652861603e-06, "loss": 0.5389, "step": 1391 }, { "epoch": 0.3097463284379172, "grad_norm": 1.1571261911378925, "learning_rate": 8.087575033926267e-06, "loss": 0.5287, "step": 1392 }, { "epoch": 0.3099688473520249, "grad_norm": 1.1538059081903214, "learning_rate": 8.084739811215672e-06, "loss": 0.5466, "step": 1393 }, { "epoch": 0.31019136626613264, "grad_norm": 1.1707894669823529, "learning_rate": 8.081902986202517e-06, "loss": 0.5146, "step": 1394 }, { "epoch": 0.3104138851802403, "grad_norm": 1.0971592624666506, "learning_rate": 8.07906456036033e-06, "loss": 0.544, "step": 1395 }, { "epoch": 0.310636404094348, "grad_norm": 1.180848013244239, "learning_rate": 8.076224535163468e-06, "loss": 0.5145, "step": 1396 }, { "epoch": 0.31085892300845575, "grad_norm": 1.1219338718863003, "learning_rate": 8.073382912087124e-06, "loss": 0.5172, "step": 1397 }, { "epoch": 0.31108144192256343, "grad_norm": 1.1981933270888914, "learning_rate": 8.07053969260732e-06, "loss": 0.5283, "step": 1398 }, { "epoch": 0.3113039608366711, "grad_norm": 1.2032464318866465, "learning_rate": 8.067694878200903e-06, "loss": 0.5334, "step": 1399 }, { "epoch": 0.3115264797507788, "grad_norm": 1.1257609898564545, "learning_rate": 8.064848470345551e-06, "loss": 0.5356, "step": 1400 }, { "epoch": 0.31174899866488653, "grad_norm": 1.0969264417232854, "learning_rate": 8.062000470519775e-06, "loss": 0.5298, "step": 1401 }, { "epoch": 0.3119715175789942, "grad_norm": 1.0923886990256586, "learning_rate": 8.059150880202902e-06, "loss": 0.5249, "step": 1402 }, { "epoch": 0.3121940364931019, "grad_norm": 1.1045392634449638, "learning_rate": 8.056299700875096e-06, "loss": 0.5226, "step": 1403 }, { "epoch": 0.31241655540720964, "grad_norm": 1.1456476682541585, "learning_rate": 8.05344693401734e-06, "loss": 0.536, "step": 1404 }, { "epoch": 0.3126390743213173, "grad_norm": 1.178358193489956, "learning_rate": 8.050592581111441e-06, "loss": 0.5122, "step": 1405 }, { "epoch": 0.312861593235425, "grad_norm": 1.1522656674808551, "learning_rate": 8.047736643640034e-06, "loss": 0.525, "step": 1406 }, { "epoch": 0.3130841121495327, "grad_norm": 1.1112303393309906, "learning_rate": 8.044879123086575e-06, "loss": 0.5401, "step": 1407 }, { "epoch": 0.31330663106364043, "grad_norm": 1.0834250231789107, "learning_rate": 8.04202002093534e-06, "loss": 0.5419, "step": 1408 }, { "epoch": 0.3135291499777481, "grad_norm": 1.1508084308509796, "learning_rate": 8.039159338671437e-06, "loss": 0.5159, "step": 1409 }, { "epoch": 0.3137516688918558, "grad_norm": 1.0626595038470386, "learning_rate": 8.036297077780775e-06, "loss": 0.5347, "step": 1410 }, { "epoch": 0.31397418780596353, "grad_norm": 1.2675643231214573, "learning_rate": 8.033433239750101e-06, "loss": 0.5291, "step": 1411 }, { "epoch": 0.3141967067200712, "grad_norm": 1.156161181361819, "learning_rate": 8.030567826066975e-06, "loss": 0.5555, "step": 1412 }, { "epoch": 0.3144192256341789, "grad_norm": 1.1863939110356494, "learning_rate": 8.027700838219774e-06, "loss": 0.526, "step": 1413 }, { "epoch": 0.3146417445482866, "grad_norm": 1.108729096885121, "learning_rate": 8.024832277697692e-06, "loss": 0.5191, "step": 1414 }, { "epoch": 0.3148642634623943, "grad_norm": 1.1435068167685467, "learning_rate": 8.021962145990746e-06, "loss": 0.5348, "step": 1415 }, { "epoch": 0.315086782376502, "grad_norm": 1.1462039713321301, "learning_rate": 8.019090444589762e-06, "loss": 0.5347, "step": 1416 }, { "epoch": 0.3153093012906097, "grad_norm": 1.2113249014442315, "learning_rate": 8.016217174986388e-06, "loss": 0.5387, "step": 1417 }, { "epoch": 0.31553182020471743, "grad_norm": 1.06414237692473, "learning_rate": 8.013342338673078e-06, "loss": 0.5271, "step": 1418 }, { "epoch": 0.3157543391188251, "grad_norm": 1.0781441177853148, "learning_rate": 8.010465937143106e-06, "loss": 0.536, "step": 1419 }, { "epoch": 0.3159768580329328, "grad_norm": 1.1055460157781023, "learning_rate": 8.00758797189056e-06, "loss": 0.5344, "step": 1420 }, { "epoch": 0.3161993769470405, "grad_norm": 1.1202584565649536, "learning_rate": 8.004708444410334e-06, "loss": 0.5417, "step": 1421 }, { "epoch": 0.3164218958611482, "grad_norm": 1.1889296432095267, "learning_rate": 8.001827356198141e-06, "loss": 0.5394, "step": 1422 }, { "epoch": 0.3166444147752559, "grad_norm": 1.081008030051691, "learning_rate": 7.9989447087505e-06, "loss": 0.5284, "step": 1423 }, { "epoch": 0.3168669336893636, "grad_norm": 1.1494271194415757, "learning_rate": 7.996060503564741e-06, "loss": 0.549, "step": 1424 }, { "epoch": 0.3170894526034713, "grad_norm": 1.1261760488076134, "learning_rate": 7.993174742139003e-06, "loss": 0.5368, "step": 1425 }, { "epoch": 0.317311971517579, "grad_norm": 1.1332311735551561, "learning_rate": 7.990287425972232e-06, "loss": 0.5348, "step": 1426 }, { "epoch": 0.3175344904316867, "grad_norm": 1.2452815383259437, "learning_rate": 7.987398556564186e-06, "loss": 0.5215, "step": 1427 }, { "epoch": 0.3177570093457944, "grad_norm": 1.0918663647722595, "learning_rate": 7.984508135415423e-06, "loss": 0.5407, "step": 1428 }, { "epoch": 0.3179795282599021, "grad_norm": 1.1043889500117028, "learning_rate": 7.981616164027316e-06, "loss": 0.5164, "step": 1429 }, { "epoch": 0.3182020471740098, "grad_norm": 1.1409790582740533, "learning_rate": 7.978722643902031e-06, "loss": 0.5272, "step": 1430 }, { "epoch": 0.3184245660881175, "grad_norm": 1.131091867681425, "learning_rate": 7.975827576542552e-06, "loss": 0.5273, "step": 1431 }, { "epoch": 0.3186470850022252, "grad_norm": 1.1143100405804411, "learning_rate": 7.972930963452659e-06, "loss": 0.5415, "step": 1432 }, { "epoch": 0.3188696039163329, "grad_norm": 1.2198112427475367, "learning_rate": 7.970032806136932e-06, "loss": 0.5402, "step": 1433 }, { "epoch": 0.3190921228304406, "grad_norm": 1.1325042475072717, "learning_rate": 7.967133106100762e-06, "loss": 0.5314, "step": 1434 }, { "epoch": 0.31931464174454827, "grad_norm": 1.0850231324600192, "learning_rate": 7.964231864850337e-06, "loss": 0.5147, "step": 1435 }, { "epoch": 0.319537160658656, "grad_norm": 1.1601182857081889, "learning_rate": 7.961329083892639e-06, "loss": 0.5362, "step": 1436 }, { "epoch": 0.3197596795727637, "grad_norm": 1.1061458557299972, "learning_rate": 7.958424764735462e-06, "loss": 0.5321, "step": 1437 }, { "epoch": 0.31998219848687137, "grad_norm": 1.087729224082217, "learning_rate": 7.955518908887392e-06, "loss": 0.5297, "step": 1438 }, { "epoch": 0.3202047174009791, "grad_norm": 1.1762258392600689, "learning_rate": 7.952611517857811e-06, "loss": 0.5465, "step": 1439 }, { "epoch": 0.3204272363150868, "grad_norm": 1.1487933324530326, "learning_rate": 7.949702593156905e-06, "loss": 0.5292, "step": 1440 }, { "epoch": 0.3206497552291945, "grad_norm": 1.1026818386882962, "learning_rate": 7.94679213629565e-06, "loss": 0.5154, "step": 1441 }, { "epoch": 0.32087227414330216, "grad_norm": 1.1196971500855082, "learning_rate": 7.943880148785824e-06, "loss": 0.5318, "step": 1442 }, { "epoch": 0.3210947930574099, "grad_norm": 1.1009461755429306, "learning_rate": 7.940966632139993e-06, "loss": 0.5218, "step": 1443 }, { "epoch": 0.3213173119715176, "grad_norm": 1.1445683072911856, "learning_rate": 7.938051587871527e-06, "loss": 0.5319, "step": 1444 }, { "epoch": 0.32153983088562527, "grad_norm": 1.1355447745247154, "learning_rate": 7.935135017494578e-06, "loss": 0.5189, "step": 1445 }, { "epoch": 0.32176234979973295, "grad_norm": 1.173494820054353, "learning_rate": 7.9322169225241e-06, "loss": 0.5435, "step": 1446 }, { "epoch": 0.3219848687138407, "grad_norm": 1.1540626384201327, "learning_rate": 7.929297304475836e-06, "loss": 0.5345, "step": 1447 }, { "epoch": 0.32220738762794837, "grad_norm": 1.2203096040439543, "learning_rate": 7.926376164866317e-06, "loss": 0.5405, "step": 1448 }, { "epoch": 0.32242990654205606, "grad_norm": 1.194025980298621, "learning_rate": 7.923453505212869e-06, "loss": 0.5339, "step": 1449 }, { "epoch": 0.3226524254561638, "grad_norm": 1.1208745014934638, "learning_rate": 7.920529327033604e-06, "loss": 0.536, "step": 1450 }, { "epoch": 0.3228749443702715, "grad_norm": 1.03409374143069, "learning_rate": 7.917603631847425e-06, "loss": 0.5393, "step": 1451 }, { "epoch": 0.32309746328437916, "grad_norm": 1.2265248115162763, "learning_rate": 7.914676421174023e-06, "loss": 0.5485, "step": 1452 }, { "epoch": 0.32331998219848684, "grad_norm": 1.1086912176488333, "learning_rate": 7.911747696533874e-06, "loss": 0.5226, "step": 1453 }, { "epoch": 0.3235425011125946, "grad_norm": 1.2481283024338867, "learning_rate": 7.90881745944824e-06, "loss": 0.5223, "step": 1454 }, { "epoch": 0.32376502002670227, "grad_norm": 1.1174846072362303, "learning_rate": 7.905885711439175e-06, "loss": 0.522, "step": 1455 }, { "epoch": 0.32398753894080995, "grad_norm": 1.1417876646067293, "learning_rate": 7.902952454029512e-06, "loss": 0.5301, "step": 1456 }, { "epoch": 0.3242100578549177, "grad_norm": 1.1543443329013787, "learning_rate": 7.900017688742864e-06, "loss": 0.5392, "step": 1457 }, { "epoch": 0.32443257676902537, "grad_norm": 1.077362854041758, "learning_rate": 7.89708141710364e-06, "loss": 0.5285, "step": 1458 }, { "epoch": 0.32465509568313305, "grad_norm": 1.1595837629499401, "learning_rate": 7.894143640637019e-06, "loss": 0.5311, "step": 1459 }, { "epoch": 0.32487761459724074, "grad_norm": 1.0411691877714875, "learning_rate": 7.891204360868969e-06, "loss": 0.5314, "step": 1460 }, { "epoch": 0.3251001335113485, "grad_norm": 1.1698362244973328, "learning_rate": 7.888263579326237e-06, "loss": 0.5336, "step": 1461 }, { "epoch": 0.32532265242545616, "grad_norm": 1.1572641593370705, "learning_rate": 7.885321297536347e-06, "loss": 0.533, "step": 1462 }, { "epoch": 0.32554517133956384, "grad_norm": 1.1994587946185171, "learning_rate": 7.882377517027605e-06, "loss": 0.5417, "step": 1463 }, { "epoch": 0.3257676902536716, "grad_norm": 1.1978584042440905, "learning_rate": 7.879432239329095e-06, "loss": 0.5268, "step": 1464 }, { "epoch": 0.32599020916777927, "grad_norm": 1.152828917878437, "learning_rate": 7.87648546597068e-06, "loss": 0.5177, "step": 1465 }, { "epoch": 0.32621272808188695, "grad_norm": 1.1768770659165966, "learning_rate": 7.873537198483e-06, "loss": 0.5304, "step": 1466 }, { "epoch": 0.32643524699599463, "grad_norm": 1.148053281035454, "learning_rate": 7.870587438397469e-06, "loss": 0.5271, "step": 1467 }, { "epoch": 0.32665776591010237, "grad_norm": 1.1659791980438516, "learning_rate": 7.867636187246276e-06, "loss": 0.5331, "step": 1468 }, { "epoch": 0.32688028482421005, "grad_norm": 1.106445051369775, "learning_rate": 7.864683446562388e-06, "loss": 0.5205, "step": 1469 }, { "epoch": 0.32710280373831774, "grad_norm": 1.2371372815472672, "learning_rate": 7.861729217879541e-06, "loss": 0.545, "step": 1470 }, { "epoch": 0.3273253226524255, "grad_norm": 1.0984035586006462, "learning_rate": 7.858773502732248e-06, "loss": 0.5303, "step": 1471 }, { "epoch": 0.32754784156653316, "grad_norm": 1.097500910748061, "learning_rate": 7.855816302655791e-06, "loss": 0.5218, "step": 1472 }, { "epoch": 0.32777036048064084, "grad_norm": 1.1611744734298297, "learning_rate": 7.852857619186226e-06, "loss": 0.5475, "step": 1473 }, { "epoch": 0.3279928793947485, "grad_norm": 1.1244622336291197, "learning_rate": 7.849897453860378e-06, "loss": 0.5327, "step": 1474 }, { "epoch": 0.32821539830885627, "grad_norm": 1.1641163108278592, "learning_rate": 7.846935808215841e-06, "loss": 0.5415, "step": 1475 }, { "epoch": 0.32843791722296395, "grad_norm": 1.112802076959716, "learning_rate": 7.843972683790982e-06, "loss": 0.5334, "step": 1476 }, { "epoch": 0.32866043613707163, "grad_norm": 1.1537051987621778, "learning_rate": 7.841008082124928e-06, "loss": 0.5196, "step": 1477 }, { "epoch": 0.32888295505117937, "grad_norm": 1.0929624950478378, "learning_rate": 7.838042004757583e-06, "loss": 0.5281, "step": 1478 }, { "epoch": 0.32910547396528705, "grad_norm": 1.1404866690477513, "learning_rate": 7.83507445322961e-06, "loss": 0.5281, "step": 1479 }, { "epoch": 0.32932799287939474, "grad_norm": 1.1290925628539201, "learning_rate": 7.832105429082442e-06, "loss": 0.519, "step": 1480 }, { "epoch": 0.3295505117935024, "grad_norm": 1.1031725007544877, "learning_rate": 7.829134933858275e-06, "loss": 0.5143, "step": 1481 }, { "epoch": 0.32977303070761016, "grad_norm": 1.120220683239564, "learning_rate": 7.826162969100069e-06, "loss": 0.5366, "step": 1482 }, { "epoch": 0.32999554962171784, "grad_norm": 1.1557428051259846, "learning_rate": 7.82318953635155e-06, "loss": 0.5342, "step": 1483 }, { "epoch": 0.3302180685358255, "grad_norm": 1.2450166652555275, "learning_rate": 7.820214637157202e-06, "loss": 0.53, "step": 1484 }, { "epoch": 0.33044058744993327, "grad_norm": 1.0476243696086724, "learning_rate": 7.817238273062276e-06, "loss": 0.5266, "step": 1485 }, { "epoch": 0.33066310636404095, "grad_norm": 1.131374746559298, "learning_rate": 7.81426044561278e-06, "loss": 0.5441, "step": 1486 }, { "epoch": 0.33088562527814863, "grad_norm": 1.120370837366579, "learning_rate": 7.811281156355481e-06, "loss": 0.5288, "step": 1487 }, { "epoch": 0.3311081441922563, "grad_norm": 1.1184210592406933, "learning_rate": 7.808300406837908e-06, "loss": 0.5257, "step": 1488 }, { "epoch": 0.33133066310636405, "grad_norm": 1.1501267993785893, "learning_rate": 7.80531819860835e-06, "loss": 0.5279, "step": 1489 }, { "epoch": 0.33155318202047174, "grad_norm": 1.0742311307682955, "learning_rate": 7.802334533215854e-06, "loss": 0.5355, "step": 1490 }, { "epoch": 0.3317757009345794, "grad_norm": 1.1515679255707503, "learning_rate": 7.799349412210216e-06, "loss": 0.5373, "step": 1491 }, { "epoch": 0.33199821984868716, "grad_norm": 1.193241402491609, "learning_rate": 7.796362837141996e-06, "loss": 0.525, "step": 1492 }, { "epoch": 0.33222073876279484, "grad_norm": 1.1614028475097016, "learning_rate": 7.793374809562508e-06, "loss": 0.5472, "step": 1493 }, { "epoch": 0.3324432576769025, "grad_norm": 1.209258193158217, "learning_rate": 7.790385331023818e-06, "loss": 0.5517, "step": 1494 }, { "epoch": 0.3326657765910102, "grad_norm": 1.1427682447875858, "learning_rate": 7.787394403078747e-06, "loss": 0.5329, "step": 1495 }, { "epoch": 0.33288829550511795, "grad_norm": 1.1703047430748241, "learning_rate": 7.784402027280873e-06, "loss": 0.5534, "step": 1496 }, { "epoch": 0.33311081441922563, "grad_norm": 1.0953658373980288, "learning_rate": 7.781408205184516e-06, "loss": 0.5444, "step": 1497 }, { "epoch": 0.3333333333333333, "grad_norm": 1.079107505975361, "learning_rate": 7.778412938344755e-06, "loss": 0.5304, "step": 1498 }, { "epoch": 0.33355585224744105, "grad_norm": 1.198493063222287, "learning_rate": 7.775416228317421e-06, "loss": 0.5314, "step": 1499 }, { "epoch": 0.33377837116154874, "grad_norm": 1.1144677660070021, "learning_rate": 7.77241807665909e-06, "loss": 0.5188, "step": 1500 }, { "epoch": 0.3340008900756564, "grad_norm": 1.1087435910392536, "learning_rate": 7.769418484927085e-06, "loss": 0.5244, "step": 1501 }, { "epoch": 0.3342234089897641, "grad_norm": 1.1107685544540562, "learning_rate": 7.766417454679484e-06, "loss": 0.5336, "step": 1502 }, { "epoch": 0.33444592790387184, "grad_norm": 1.1974866529196282, "learning_rate": 7.763414987475107e-06, "loss": 0.5371, "step": 1503 }, { "epoch": 0.3346684468179795, "grad_norm": 1.1163998196547023, "learning_rate": 7.76041108487352e-06, "loss": 0.5148, "step": 1504 }, { "epoch": 0.3348909657320872, "grad_norm": 1.148559417203405, "learning_rate": 7.75740574843504e-06, "loss": 0.5313, "step": 1505 }, { "epoch": 0.33511348464619495, "grad_norm": 1.1694438673107108, "learning_rate": 7.754398979720722e-06, "loss": 0.5359, "step": 1506 }, { "epoch": 0.33533600356030263, "grad_norm": 1.2180785232685356, "learning_rate": 7.75139078029237e-06, "loss": 0.5393, "step": 1507 }, { "epoch": 0.3355585224744103, "grad_norm": 1.2505098031494217, "learning_rate": 7.748381151712527e-06, "loss": 0.5214, "step": 1508 }, { "epoch": 0.335781041388518, "grad_norm": 1.248453824389279, "learning_rate": 7.745370095544485e-06, "loss": 0.5493, "step": 1509 }, { "epoch": 0.33600356030262574, "grad_norm": 1.3859742693761345, "learning_rate": 7.74235761335227e-06, "loss": 0.5404, "step": 1510 }, { "epoch": 0.3362260792167334, "grad_norm": 1.0677982459846074, "learning_rate": 7.739343706700652e-06, "loss": 0.511, "step": 1511 }, { "epoch": 0.3364485981308411, "grad_norm": 1.1270473958273715, "learning_rate": 7.73632837715514e-06, "loss": 0.533, "step": 1512 }, { "epoch": 0.33667111704494884, "grad_norm": 1.1388632713742133, "learning_rate": 7.733311626281985e-06, "loss": 0.5356, "step": 1513 }, { "epoch": 0.3368936359590565, "grad_norm": 1.2632485840470338, "learning_rate": 7.730293455648174e-06, "loss": 0.5342, "step": 1514 }, { "epoch": 0.3371161548731642, "grad_norm": 1.187917150552554, "learning_rate": 7.72727386682143e-06, "loss": 0.5207, "step": 1515 }, { "epoch": 0.3373386737872719, "grad_norm": 1.1624749179741258, "learning_rate": 7.724252861370215e-06, "loss": 0.5457, "step": 1516 }, { "epoch": 0.33756119270137963, "grad_norm": 1.12232897454527, "learning_rate": 7.721230440863727e-06, "loss": 0.5235, "step": 1517 }, { "epoch": 0.3377837116154873, "grad_norm": 1.1745659702823699, "learning_rate": 7.718206606871896e-06, "loss": 0.5385, "step": 1518 }, { "epoch": 0.338006230529595, "grad_norm": 1.1204172628988847, "learning_rate": 7.715181360965391e-06, "loss": 0.5348, "step": 1519 }, { "epoch": 0.33822874944370274, "grad_norm": 1.1756035034565944, "learning_rate": 7.71215470471561e-06, "loss": 0.5414, "step": 1520 }, { "epoch": 0.3384512683578104, "grad_norm": 1.1128124005681057, "learning_rate": 7.709126639694684e-06, "loss": 0.5303, "step": 1521 }, { "epoch": 0.3386737872719181, "grad_norm": 1.0872659031352414, "learning_rate": 7.706097167475479e-06, "loss": 0.5308, "step": 1522 }, { "epoch": 0.3388963061860258, "grad_norm": 1.201961502392054, "learning_rate": 7.703066289631591e-06, "loss": 0.525, "step": 1523 }, { "epoch": 0.3391188251001335, "grad_norm": 1.1891365378341494, "learning_rate": 7.700034007737345e-06, "loss": 0.544, "step": 1524 }, { "epoch": 0.3393413440142412, "grad_norm": 1.0540621472502811, "learning_rate": 7.69700032336779e-06, "loss": 0.5377, "step": 1525 }, { "epoch": 0.3395638629283489, "grad_norm": 1.19659719232733, "learning_rate": 7.693965238098717e-06, "loss": 0.524, "step": 1526 }, { "epoch": 0.33978638184245663, "grad_norm": 1.0759143354183034, "learning_rate": 7.690928753506632e-06, "loss": 0.5275, "step": 1527 }, { "epoch": 0.3400089007565643, "grad_norm": 1.0549431073008024, "learning_rate": 7.68789087116877e-06, "loss": 0.5327, "step": 1528 }, { "epoch": 0.340231419670672, "grad_norm": 1.1767758229128573, "learning_rate": 7.684851592663101e-06, "loss": 0.551, "step": 1529 }, { "epoch": 0.3404539385847797, "grad_norm": 1.1492101094924092, "learning_rate": 7.68181091956831e-06, "loss": 0.5206, "step": 1530 }, { "epoch": 0.3406764574988874, "grad_norm": 1.1441216840264103, "learning_rate": 7.678768853463807e-06, "loss": 0.5223, "step": 1531 }, { "epoch": 0.3408989764129951, "grad_norm": 1.211433993227037, "learning_rate": 7.675725395929734e-06, "loss": 0.5192, "step": 1532 }, { "epoch": 0.3411214953271028, "grad_norm": 1.0377306568727318, "learning_rate": 7.67268054854695e-06, "loss": 0.5121, "step": 1533 }, { "epoch": 0.3413440142412105, "grad_norm": 1.1566365833783696, "learning_rate": 7.669634312897032e-06, "loss": 0.5324, "step": 1534 }, { "epoch": 0.3415665331553182, "grad_norm": 1.110885496206, "learning_rate": 7.666586690562284e-06, "loss": 0.5239, "step": 1535 }, { "epoch": 0.3417890520694259, "grad_norm": 1.0869067829012766, "learning_rate": 7.663537683125731e-06, "loss": 0.5212, "step": 1536 }, { "epoch": 0.3420115709835336, "grad_norm": 1.1682873436533316, "learning_rate": 7.660487292171115e-06, "loss": 0.5255, "step": 1537 }, { "epoch": 0.3422340898976413, "grad_norm": 1.1466478406615488, "learning_rate": 7.657435519282892e-06, "loss": 0.5278, "step": 1538 }, { "epoch": 0.342456608811749, "grad_norm": 1.095908295461089, "learning_rate": 7.654382366046247e-06, "loss": 0.5034, "step": 1539 }, { "epoch": 0.3426791277258567, "grad_norm": 1.171208016456938, "learning_rate": 7.651327834047071e-06, "loss": 0.5384, "step": 1540 }, { "epoch": 0.3429016466399644, "grad_norm": 1.2093956837510171, "learning_rate": 7.648271924871977e-06, "loss": 0.5357, "step": 1541 }, { "epoch": 0.3431241655540721, "grad_norm": 1.2063384167584015, "learning_rate": 7.645214640108295e-06, "loss": 0.5424, "step": 1542 }, { "epoch": 0.3433466844681798, "grad_norm": 1.077407317875053, "learning_rate": 7.642155981344063e-06, "loss": 0.5053, "step": 1543 }, { "epoch": 0.34356920338228747, "grad_norm": 1.0978162981722557, "learning_rate": 7.639095950168035e-06, "loss": 0.5287, "step": 1544 }, { "epoch": 0.3437917222963952, "grad_norm": 1.0978427071949892, "learning_rate": 7.636034548169683e-06, "loss": 0.5057, "step": 1545 }, { "epoch": 0.3440142412105029, "grad_norm": 1.1618905589238728, "learning_rate": 7.632971776939189e-06, "loss": 0.5212, "step": 1546 }, { "epoch": 0.3442367601246106, "grad_norm": 1.103403238385288, "learning_rate": 7.629907638067438e-06, "loss": 0.5228, "step": 1547 }, { "epoch": 0.3444592790387183, "grad_norm": 1.1194912229038148, "learning_rate": 7.626842133146035e-06, "loss": 0.5342, "step": 1548 }, { "epoch": 0.344681797952826, "grad_norm": 1.128396078283735, "learning_rate": 7.623775263767294e-06, "loss": 0.5175, "step": 1549 }, { "epoch": 0.3449043168669337, "grad_norm": 1.3575369533950448, "learning_rate": 7.620707031524231e-06, "loss": 0.5122, "step": 1550 }, { "epoch": 0.34512683578104136, "grad_norm": 1.2253925309610185, "learning_rate": 7.6176374380105746e-06, "loss": 0.5331, "step": 1551 }, { "epoch": 0.3453493546951491, "grad_norm": 1.6856813654323761, "learning_rate": 7.614566484820762e-06, "loss": 0.5372, "step": 1552 }, { "epoch": 0.3455718736092568, "grad_norm": 1.1007987097097147, "learning_rate": 7.611494173549933e-06, "loss": 0.5142, "step": 1553 }, { "epoch": 0.34579439252336447, "grad_norm": 2.1025904977989565, "learning_rate": 7.608420505793937e-06, "loss": 0.5089, "step": 1554 }, { "epoch": 0.3460169114374722, "grad_norm": 1.1346687737236787, "learning_rate": 7.605345483149322e-06, "loss": 0.5116, "step": 1555 }, { "epoch": 0.3462394303515799, "grad_norm": 1.1629059258554606, "learning_rate": 7.602269107213344e-06, "loss": 0.5445, "step": 1556 }, { "epoch": 0.3464619492656876, "grad_norm": 1.2208932928253686, "learning_rate": 7.599191379583963e-06, "loss": 0.5179, "step": 1557 }, { "epoch": 0.34668446817979526, "grad_norm": 1.1257412226173797, "learning_rate": 7.596112301859838e-06, "loss": 0.5376, "step": 1558 }, { "epoch": 0.346906987093903, "grad_norm": 1.1335219711895008, "learning_rate": 7.593031875640331e-06, "loss": 0.5248, "step": 1559 }, { "epoch": 0.3471295060080107, "grad_norm": 1.1226635013088613, "learning_rate": 7.589950102525503e-06, "loss": 0.5412, "step": 1560 }, { "epoch": 0.34735202492211836, "grad_norm": 1.1598896800678151, "learning_rate": 7.5868669841161145e-06, "loss": 0.531, "step": 1561 }, { "epoch": 0.3475745438362261, "grad_norm": 1.1830608333746298, "learning_rate": 7.58378252201363e-06, "loss": 0.5135, "step": 1562 }, { "epoch": 0.3477970627503338, "grad_norm": 1.195904180199359, "learning_rate": 7.580696717820204e-06, "loss": 0.5275, "step": 1563 }, { "epoch": 0.34801958166444147, "grad_norm": 1.2327124037675923, "learning_rate": 7.577609573138693e-06, "loss": 0.5393, "step": 1564 }, { "epoch": 0.34824210057854915, "grad_norm": 1.170656897457435, "learning_rate": 7.574521089572648e-06, "loss": 0.5487, "step": 1565 }, { "epoch": 0.3484646194926569, "grad_norm": 1.2165929883569284, "learning_rate": 7.571431268726319e-06, "loss": 0.5456, "step": 1566 }, { "epoch": 0.3486871384067646, "grad_norm": 1.18890251251629, "learning_rate": 7.568340112204646e-06, "loss": 0.5309, "step": 1567 }, { "epoch": 0.34890965732087226, "grad_norm": 1.0639453147643292, "learning_rate": 7.565247621613263e-06, "loss": 0.5183, "step": 1568 }, { "epoch": 0.34913217623498, "grad_norm": 1.1565863232815434, "learning_rate": 7.5621537985585e-06, "loss": 0.5117, "step": 1569 }, { "epoch": 0.3493546951490877, "grad_norm": 1.088004211740508, "learning_rate": 7.559058644647379e-06, "loss": 0.538, "step": 1570 }, { "epoch": 0.34957721406319536, "grad_norm": 1.1176913508503785, "learning_rate": 7.55596216148761e-06, "loss": 0.5295, "step": 1571 }, { "epoch": 0.34979973297730305, "grad_norm": 1.0972402970332988, "learning_rate": 7.552864350687596e-06, "loss": 0.5134, "step": 1572 }, { "epoch": 0.3500222518914108, "grad_norm": 1.1582746614680082, "learning_rate": 7.549765213856428e-06, "loss": 0.5392, "step": 1573 }, { "epoch": 0.35024477080551847, "grad_norm": 1.0716494635936797, "learning_rate": 7.54666475260389e-06, "loss": 0.539, "step": 1574 }, { "epoch": 0.35046728971962615, "grad_norm": 1.1533671260739735, "learning_rate": 7.543562968540446e-06, "loss": 0.5234, "step": 1575 }, { "epoch": 0.3506898086337339, "grad_norm": 1.2183499354201686, "learning_rate": 7.540459863277257e-06, "loss": 0.5138, "step": 1576 }, { "epoch": 0.3509123275478416, "grad_norm": 1.1759831561362513, "learning_rate": 7.5373554384261604e-06, "loss": 0.5241, "step": 1577 }, { "epoch": 0.35113484646194926, "grad_norm": 1.173052797184834, "learning_rate": 7.534249695599686e-06, "loss": 0.5349, "step": 1578 }, { "epoch": 0.35135736537605694, "grad_norm": 1.1326210750058725, "learning_rate": 7.531142636411046e-06, "loss": 0.5153, "step": 1579 }, { "epoch": 0.3515798842901647, "grad_norm": 1.1844653110136618, "learning_rate": 7.5280342624741374e-06, "loss": 0.5328, "step": 1580 }, { "epoch": 0.35180240320427236, "grad_norm": 1.1202119320181572, "learning_rate": 7.524924575403536e-06, "loss": 0.5257, "step": 1581 }, { "epoch": 0.35202492211838005, "grad_norm": 1.093278305461717, "learning_rate": 7.521813576814504e-06, "loss": 0.5322, "step": 1582 }, { "epoch": 0.3522474410324878, "grad_norm": 1.0952368650943274, "learning_rate": 7.518701268322985e-06, "loss": 0.5139, "step": 1583 }, { "epoch": 0.35246995994659547, "grad_norm": 1.114062208077999, "learning_rate": 7.515587651545599e-06, "loss": 0.538, "step": 1584 }, { "epoch": 0.35269247886070315, "grad_norm": 1.1512760726496627, "learning_rate": 7.5124727280996516e-06, "loss": 0.5332, "step": 1585 }, { "epoch": 0.35291499777481083, "grad_norm": 1.1791826999273354, "learning_rate": 7.50935649960312e-06, "loss": 0.5374, "step": 1586 }, { "epoch": 0.3531375166889186, "grad_norm": 1.1500522814160192, "learning_rate": 7.506238967674664e-06, "loss": 0.5347, "step": 1587 }, { "epoch": 0.35336003560302626, "grad_norm": 1.1426454441191651, "learning_rate": 7.503120133933621e-06, "loss": 0.5237, "step": 1588 }, { "epoch": 0.35358255451713394, "grad_norm": 1.0993603551092634, "learning_rate": 7.500000000000001e-06, "loss": 0.5161, "step": 1589 }, { "epoch": 0.3538050734312417, "grad_norm": 1.2044030658265654, "learning_rate": 7.496878567494492e-06, "loss": 0.5169, "step": 1590 }, { "epoch": 0.35402759234534936, "grad_norm": 1.1141884640903705, "learning_rate": 7.4937558380384555e-06, "loss": 0.5173, "step": 1591 }, { "epoch": 0.35425011125945705, "grad_norm": 1.174655269426836, "learning_rate": 7.490631813253927e-06, "loss": 0.5327, "step": 1592 }, { "epoch": 0.35447263017356473, "grad_norm": 1.1594904747461718, "learning_rate": 7.4875064947636145e-06, "loss": 0.5127, "step": 1593 }, { "epoch": 0.35469514908767247, "grad_norm": 1.1084504542206899, "learning_rate": 7.4843798841909e-06, "loss": 0.5091, "step": 1594 }, { "epoch": 0.35491766800178015, "grad_norm": 1.2576174843947319, "learning_rate": 7.481251983159834e-06, "loss": 0.5341, "step": 1595 }, { "epoch": 0.35514018691588783, "grad_norm": 1.0887884392288687, "learning_rate": 7.478122793295136e-06, "loss": 0.533, "step": 1596 }, { "epoch": 0.3553627058299956, "grad_norm": 1.1471094493669376, "learning_rate": 7.4749923162222005e-06, "loss": 0.5032, "step": 1597 }, { "epoch": 0.35558522474410326, "grad_norm": 1.2870647759388607, "learning_rate": 7.471860553567086e-06, "loss": 0.5464, "step": 1598 }, { "epoch": 0.35580774365821094, "grad_norm": 1.1937984251755343, "learning_rate": 7.468727506956519e-06, "loss": 0.5237, "step": 1599 }, { "epoch": 0.3560302625723186, "grad_norm": 1.1949066790184188, "learning_rate": 7.465593178017897e-06, "loss": 0.5091, "step": 1600 }, { "epoch": 0.35625278148642636, "grad_norm": 1.1412739835611916, "learning_rate": 7.462457568379278e-06, "loss": 0.5069, "step": 1601 }, { "epoch": 0.35647530040053405, "grad_norm": 1.131118757435118, "learning_rate": 7.459320679669387e-06, "loss": 0.5229, "step": 1602 }, { "epoch": 0.35669781931464173, "grad_norm": 1.1927684563828476, "learning_rate": 7.456182513517616e-06, "loss": 0.5406, "step": 1603 }, { "epoch": 0.35692033822874947, "grad_norm": 1.145626380605564, "learning_rate": 7.45304307155402e-06, "loss": 0.5138, "step": 1604 }, { "epoch": 0.35714285714285715, "grad_norm": 1.0789930433277752, "learning_rate": 7.449902355409312e-06, "loss": 0.5034, "step": 1605 }, { "epoch": 0.35736537605696483, "grad_norm": 1.1275643635603199, "learning_rate": 7.446760366714874e-06, "loss": 0.5203, "step": 1606 }, { "epoch": 0.3575878949710725, "grad_norm": 1.1746037470654684, "learning_rate": 7.4436171071027405e-06, "loss": 0.5104, "step": 1607 }, { "epoch": 0.35781041388518026, "grad_norm": 1.0888248995532988, "learning_rate": 7.440472578205618e-06, "loss": 0.5206, "step": 1608 }, { "epoch": 0.35803293279928794, "grad_norm": 1.096471817811949, "learning_rate": 7.4373267816568575e-06, "loss": 0.5235, "step": 1609 }, { "epoch": 0.3582554517133956, "grad_norm": 1.146630647796223, "learning_rate": 7.434179719090481e-06, "loss": 0.5168, "step": 1610 }, { "epoch": 0.35847797062750336, "grad_norm": 1.2301967050230915, "learning_rate": 7.431031392141162e-06, "loss": 0.5219, "step": 1611 }, { "epoch": 0.35870048954161105, "grad_norm": 1.0942515150292587, "learning_rate": 7.427881802444233e-06, "loss": 0.5244, "step": 1612 }, { "epoch": 0.35892300845571873, "grad_norm": 1.2334473212170836, "learning_rate": 7.424730951635677e-06, "loss": 0.5332, "step": 1613 }, { "epoch": 0.3591455273698264, "grad_norm": 1.1896439315745269, "learning_rate": 7.421578841352144e-06, "loss": 0.5443, "step": 1614 }, { "epoch": 0.35936804628393415, "grad_norm": 1.1491769588711842, "learning_rate": 7.418425473230924e-06, "loss": 0.5488, "step": 1615 }, { "epoch": 0.35959056519804183, "grad_norm": 1.1425051109123145, "learning_rate": 7.415270848909973e-06, "loss": 0.5222, "step": 1616 }, { "epoch": 0.3598130841121495, "grad_norm": 1.1873547628133823, "learning_rate": 7.412114970027886e-06, "loss": 0.5181, "step": 1617 }, { "epoch": 0.36003560302625726, "grad_norm": 1.1371284516487372, "learning_rate": 7.4089578382239245e-06, "loss": 0.5281, "step": 1618 }, { "epoch": 0.36025812194036494, "grad_norm": 1.1143152098225797, "learning_rate": 7.405799455137991e-06, "loss": 0.5382, "step": 1619 }, { "epoch": 0.3604806408544726, "grad_norm": 1.1460518863357585, "learning_rate": 7.40263982241064e-06, "loss": 0.5208, "step": 1620 }, { "epoch": 0.3607031597685803, "grad_norm": 1.1314082605793072, "learning_rate": 7.399478941683075e-06, "loss": 0.4943, "step": 1621 }, { "epoch": 0.36092567868268804, "grad_norm": 1.1732807805123484, "learning_rate": 7.39631681459715e-06, "loss": 0.523, "step": 1622 }, { "epoch": 0.36114819759679573, "grad_norm": 1.240532253833408, "learning_rate": 7.393153442795362e-06, "loss": 0.5353, "step": 1623 }, { "epoch": 0.3613707165109034, "grad_norm": 1.1493650781609872, "learning_rate": 7.38998882792086e-06, "loss": 0.5289, "step": 1624 }, { "epoch": 0.36159323542501115, "grad_norm": 1.1150001158565335, "learning_rate": 7.386822971617437e-06, "loss": 0.5124, "step": 1625 }, { "epoch": 0.36181575433911883, "grad_norm": 1.0955395001845156, "learning_rate": 7.383655875529526e-06, "loss": 0.4951, "step": 1626 }, { "epoch": 0.3620382732532265, "grad_norm": 1.1828849338749325, "learning_rate": 7.380487541302211e-06, "loss": 0.5453, "step": 1627 }, { "epoch": 0.3622607921673342, "grad_norm": 1.2285325827717957, "learning_rate": 7.377317970581212e-06, "loss": 0.5424, "step": 1628 }, { "epoch": 0.36248331108144194, "grad_norm": 1.1474898586772069, "learning_rate": 7.3741471650129005e-06, "loss": 0.5275, "step": 1629 }, { "epoch": 0.3627058299955496, "grad_norm": 1.1638207359960582, "learning_rate": 7.37097512624428e-06, "loss": 0.5335, "step": 1630 }, { "epoch": 0.3629283489096573, "grad_norm": 1.2253204204410104, "learning_rate": 7.367801855923001e-06, "loss": 0.5358, "step": 1631 }, { "epoch": 0.36315086782376504, "grad_norm": 1.2728805875976266, "learning_rate": 7.36462735569735e-06, "loss": 0.5452, "step": 1632 }, { "epoch": 0.36337338673787273, "grad_norm": 1.143524855152224, "learning_rate": 7.361451627216254e-06, "loss": 0.5246, "step": 1633 }, { "epoch": 0.3635959056519804, "grad_norm": 1.3042138023917726, "learning_rate": 7.3582746721292775e-06, "loss": 0.5255, "step": 1634 }, { "epoch": 0.3638184245660881, "grad_norm": 1.291585633030359, "learning_rate": 7.355096492086623e-06, "loss": 0.5264, "step": 1635 }, { "epoch": 0.36404094348019583, "grad_norm": 1.1334067267701409, "learning_rate": 7.351917088739128e-06, "loss": 0.5157, "step": 1636 }, { "epoch": 0.3642634623943035, "grad_norm": 1.1761788160231843, "learning_rate": 7.348736463738267e-06, "loss": 0.523, "step": 1637 }, { "epoch": 0.3644859813084112, "grad_norm": 1.1367252210992869, "learning_rate": 7.345554618736146e-06, "loss": 0.5313, "step": 1638 }, { "epoch": 0.36470850022251894, "grad_norm": 1.139954798495544, "learning_rate": 7.342371555385508e-06, "loss": 0.529, "step": 1639 }, { "epoch": 0.3649310191366266, "grad_norm": 1.1071215091666742, "learning_rate": 7.3391872753397285e-06, "loss": 0.5276, "step": 1640 }, { "epoch": 0.3651535380507343, "grad_norm": 1.2030258651362176, "learning_rate": 7.336001780252814e-06, "loss": 0.5263, "step": 1641 }, { "epoch": 0.365376056964842, "grad_norm": 1.172989706175124, "learning_rate": 7.3328150717794e-06, "loss": 0.5348, "step": 1642 }, { "epoch": 0.3655985758789497, "grad_norm": 1.1284186785642323, "learning_rate": 7.3296271515747585e-06, "loss": 0.5054, "step": 1643 }, { "epoch": 0.3658210947930574, "grad_norm": 1.1287084167139272, "learning_rate": 7.3264380212947815e-06, "loss": 0.5152, "step": 1644 }, { "epoch": 0.3660436137071651, "grad_norm": 1.1370911950009555, "learning_rate": 7.323247682596001e-06, "loss": 0.5178, "step": 1645 }, { "epoch": 0.36626613262127283, "grad_norm": 1.1432167248048946, "learning_rate": 7.320056137135565e-06, "loss": 0.5278, "step": 1646 }, { "epoch": 0.3664886515353805, "grad_norm": 1.2007439612923323, "learning_rate": 7.316863386571259e-06, "loss": 0.5411, "step": 1647 }, { "epoch": 0.3667111704494882, "grad_norm": 1.230973046712849, "learning_rate": 7.3136694325614855e-06, "loss": 0.5325, "step": 1648 }, { "epoch": 0.3669336893635959, "grad_norm": 1.1132827915803536, "learning_rate": 7.310474276765278e-06, "loss": 0.5145, "step": 1649 }, { "epoch": 0.3671562082777036, "grad_norm": 1.073161571147065, "learning_rate": 7.307277920842293e-06, "loss": 0.5214, "step": 1650 }, { "epoch": 0.3673787271918113, "grad_norm": 1.1877354190120704, "learning_rate": 7.304080366452808e-06, "loss": 0.5536, "step": 1651 }, { "epoch": 0.367601246105919, "grad_norm": 1.8178854576958474, "learning_rate": 7.300881615257725e-06, "loss": 0.5307, "step": 1652 }, { "epoch": 0.3678237650200267, "grad_norm": 1.1413191395483526, "learning_rate": 7.297681668918568e-06, "loss": 0.5591, "step": 1653 }, { "epoch": 0.3680462839341344, "grad_norm": 1.0951267511968408, "learning_rate": 7.294480529097481e-06, "loss": 0.5216, "step": 1654 }, { "epoch": 0.3682688028482421, "grad_norm": 1.164624004236528, "learning_rate": 7.291278197457228e-06, "loss": 0.5198, "step": 1655 }, { "epoch": 0.3684913217623498, "grad_norm": 1.090573465160285, "learning_rate": 7.288074675661192e-06, "loss": 0.5309, "step": 1656 }, { "epoch": 0.3687138406764575, "grad_norm": 1.1235588981299576, "learning_rate": 7.284869965373374e-06, "loss": 0.5101, "step": 1657 }, { "epoch": 0.3689363595905652, "grad_norm": 1.1187565018334542, "learning_rate": 7.281664068258394e-06, "loss": 0.5327, "step": 1658 }, { "epoch": 0.3691588785046729, "grad_norm": 1.158077664763365, "learning_rate": 7.278456985981485e-06, "loss": 0.5386, "step": 1659 }, { "epoch": 0.3693813974187806, "grad_norm": 1.156659359762492, "learning_rate": 7.2752487202085e-06, "loss": 0.5055, "step": 1660 }, { "epoch": 0.3696039163328883, "grad_norm": 1.0574750388806502, "learning_rate": 7.272039272605902e-06, "loss": 0.5351, "step": 1661 }, { "epoch": 0.369826435246996, "grad_norm": 1.2274599954218333, "learning_rate": 7.268828644840774e-06, "loss": 0.5139, "step": 1662 }, { "epoch": 0.37004895416110367, "grad_norm": 1.2638330196839163, "learning_rate": 7.265616838580806e-06, "loss": 0.5192, "step": 1663 }, { "epoch": 0.3702714730752114, "grad_norm": 1.2565401540076877, "learning_rate": 7.262403855494301e-06, "loss": 0.5414, "step": 1664 }, { "epoch": 0.3704939919893191, "grad_norm": 1.2870633262115538, "learning_rate": 7.259189697250177e-06, "loss": 0.5388, "step": 1665 }, { "epoch": 0.3707165109034268, "grad_norm": 1.6229881252444494, "learning_rate": 7.255974365517961e-06, "loss": 0.5152, "step": 1666 }, { "epoch": 0.3709390298175345, "grad_norm": 1.1711566272737581, "learning_rate": 7.2527578619677866e-06, "loss": 0.5188, "step": 1667 }, { "epoch": 0.3711615487316422, "grad_norm": 1.2181478883496077, "learning_rate": 7.2495401882703995e-06, "loss": 0.5124, "step": 1668 }, { "epoch": 0.3713840676457499, "grad_norm": 1.296985736261446, "learning_rate": 7.246321346097152e-06, "loss": 0.5091, "step": 1669 }, { "epoch": 0.37160658655985757, "grad_norm": 1.5946846372505108, "learning_rate": 7.243101337120002e-06, "loss": 0.5212, "step": 1670 }, { "epoch": 0.3718291054739653, "grad_norm": 1.1480748756981265, "learning_rate": 7.239880163011517e-06, "loss": 0.5202, "step": 1671 }, { "epoch": 0.372051624388073, "grad_norm": 1.1757625870728976, "learning_rate": 7.236657825444866e-06, "loss": 0.5107, "step": 1672 }, { "epoch": 0.37227414330218067, "grad_norm": 1.1715614254333209, "learning_rate": 7.233434326093822e-06, "loss": 0.5223, "step": 1673 }, { "epoch": 0.3724966622162884, "grad_norm": 1.2614214361396536, "learning_rate": 7.230209666632768e-06, "loss": 0.5272, "step": 1674 }, { "epoch": 0.3727191811303961, "grad_norm": 1.3189664567037183, "learning_rate": 7.226983848736679e-06, "loss": 0.5252, "step": 1675 }, { "epoch": 0.3729417000445038, "grad_norm": 1.223799132319172, "learning_rate": 7.223756874081143e-06, "loss": 0.528, "step": 1676 }, { "epoch": 0.37316421895861146, "grad_norm": 1.1772867666234221, "learning_rate": 7.220528744342341e-06, "loss": 0.519, "step": 1677 }, { "epoch": 0.3733867378727192, "grad_norm": 1.1631058957473666, "learning_rate": 7.217299461197056e-06, "loss": 0.5235, "step": 1678 }, { "epoch": 0.3736092567868269, "grad_norm": 1.1850720667492676, "learning_rate": 7.21406902632267e-06, "loss": 0.5159, "step": 1679 }, { "epoch": 0.37383177570093457, "grad_norm": 1.195534526939251, "learning_rate": 7.210837441397165e-06, "loss": 0.5309, "step": 1680 }, { "epoch": 0.3740542946150423, "grad_norm": 1.1336762491971728, "learning_rate": 7.207604708099121e-06, "loss": 0.5129, "step": 1681 }, { "epoch": 0.37427681352915, "grad_norm": 1.2038204588081545, "learning_rate": 7.2043708281077075e-06, "loss": 0.5202, "step": 1682 }, { "epoch": 0.37449933244325767, "grad_norm": 1.1421203320172786, "learning_rate": 7.2011358031027e-06, "loss": 0.5502, "step": 1683 }, { "epoch": 0.37472185135736535, "grad_norm": 1.091813301992578, "learning_rate": 7.197899634764461e-06, "loss": 0.5046, "step": 1684 }, { "epoch": 0.3749443702714731, "grad_norm": 1.1659479414072793, "learning_rate": 7.194662324773949e-06, "loss": 0.5227, "step": 1685 }, { "epoch": 0.3751668891855808, "grad_norm": 1.15249069319857, "learning_rate": 7.1914238748127165e-06, "loss": 0.5305, "step": 1686 }, { "epoch": 0.37538940809968846, "grad_norm": 1.1875608268726334, "learning_rate": 7.1881842865629085e-06, "loss": 0.5293, "step": 1687 }, { "epoch": 0.3756119270137962, "grad_norm": 1.187470574288426, "learning_rate": 7.184943561707259e-06, "loss": 0.5234, "step": 1688 }, { "epoch": 0.3758344459279039, "grad_norm": 1.18933911010025, "learning_rate": 7.181701701929094e-06, "loss": 0.5252, "step": 1689 }, { "epoch": 0.37605696484201157, "grad_norm": 1.1940492120204693, "learning_rate": 7.178458708912328e-06, "loss": 0.5152, "step": 1690 }, { "epoch": 0.37627948375611925, "grad_norm": 1.0755556616882562, "learning_rate": 7.175214584341467e-06, "loss": 0.5076, "step": 1691 }, { "epoch": 0.376502002670227, "grad_norm": 1.2042073174500374, "learning_rate": 7.171969329901601e-06, "loss": 0.5202, "step": 1692 }, { "epoch": 0.37672452158433467, "grad_norm": 1.1343408628235219, "learning_rate": 7.168722947278408e-06, "loss": 0.5251, "step": 1693 }, { "epoch": 0.37694704049844235, "grad_norm": 1.0987348482677448, "learning_rate": 7.165475438158154e-06, "loss": 0.5096, "step": 1694 }, { "epoch": 0.3771695594125501, "grad_norm": 1.2275760282440273, "learning_rate": 7.162226804227687e-06, "loss": 0.5147, "step": 1695 }, { "epoch": 0.3773920783266578, "grad_norm": 1.0906169268415562, "learning_rate": 7.158977047174441e-06, "loss": 0.5079, "step": 1696 }, { "epoch": 0.37761459724076546, "grad_norm": 1.0772318431169305, "learning_rate": 7.1557261686864355e-06, "loss": 0.5256, "step": 1697 }, { "epoch": 0.37783711615487314, "grad_norm": 1.1258855257765978, "learning_rate": 7.152474170452268e-06, "loss": 0.5195, "step": 1698 }, { "epoch": 0.3780596350689809, "grad_norm": 1.0544001188711358, "learning_rate": 7.1492210541611205e-06, "loss": 0.52, "step": 1699 }, { "epoch": 0.37828215398308856, "grad_norm": 1.1824201103030667, "learning_rate": 7.145966821502755e-06, "loss": 0.5312, "step": 1700 }, { "epoch": 0.37850467289719625, "grad_norm": 1.2436875526728461, "learning_rate": 7.1427114741675145e-06, "loss": 0.5245, "step": 1701 }, { "epoch": 0.378727191811304, "grad_norm": 1.196746466976519, "learning_rate": 7.139455013846319e-06, "loss": 0.5247, "step": 1702 }, { "epoch": 0.37894971072541167, "grad_norm": 1.1571343790255741, "learning_rate": 7.136197442230668e-06, "loss": 0.5194, "step": 1703 }, { "epoch": 0.37917222963951935, "grad_norm": 1.2521641943567723, "learning_rate": 7.132938761012638e-06, "loss": 0.5148, "step": 1704 }, { "epoch": 0.37939474855362704, "grad_norm": 1.2402519563658647, "learning_rate": 7.1296789718848815e-06, "loss": 0.5272, "step": 1705 }, { "epoch": 0.3796172674677348, "grad_norm": 1.2954906358114446, "learning_rate": 7.1264180765406256e-06, "loss": 0.54, "step": 1706 }, { "epoch": 0.37983978638184246, "grad_norm": 1.141150792464595, "learning_rate": 7.123156076673674e-06, "loss": 0.5277, "step": 1707 }, { "epoch": 0.38006230529595014, "grad_norm": 1.1805423173597278, "learning_rate": 7.119892973978405e-06, "loss": 0.5195, "step": 1708 }, { "epoch": 0.3802848242100579, "grad_norm": 1.22271105173283, "learning_rate": 7.116628770149767e-06, "loss": 0.5202, "step": 1709 }, { "epoch": 0.38050734312416556, "grad_norm": 1.1341985304018911, "learning_rate": 7.113363466883278e-06, "loss": 0.5326, "step": 1710 }, { "epoch": 0.38072986203827325, "grad_norm": 1.243393520671106, "learning_rate": 7.110097065875036e-06, "loss": 0.5356, "step": 1711 }, { "epoch": 0.38095238095238093, "grad_norm": 1.17217958557334, "learning_rate": 7.106829568821699e-06, "loss": 0.5218, "step": 1712 }, { "epoch": 0.38117489986648867, "grad_norm": 1.136729773756026, "learning_rate": 7.103560977420501e-06, "loss": 0.536, "step": 1713 }, { "epoch": 0.38139741878059635, "grad_norm": 1.2512360271494465, "learning_rate": 7.100291293369244e-06, "loss": 0.5253, "step": 1714 }, { "epoch": 0.38161993769470404, "grad_norm": 1.194665620444131, "learning_rate": 7.097020518366292e-06, "loss": 0.5086, "step": 1715 }, { "epoch": 0.3818424566088118, "grad_norm": 1.2529575140202105, "learning_rate": 7.093748654110582e-06, "loss": 0.5284, "step": 1716 }, { "epoch": 0.38206497552291946, "grad_norm": 1.2256035096539601, "learning_rate": 7.0904757023016135e-06, "loss": 0.5163, "step": 1717 }, { "epoch": 0.38228749443702714, "grad_norm": 1.123019844687602, "learning_rate": 7.087201664639454e-06, "loss": 0.5446, "step": 1718 }, { "epoch": 0.3825100133511348, "grad_norm": 1.1678075529119045, "learning_rate": 7.083926542824728e-06, "loss": 0.5235, "step": 1719 }, { "epoch": 0.38273253226524256, "grad_norm": 1.1804379129781684, "learning_rate": 7.080650338558634e-06, "loss": 0.5213, "step": 1720 }, { "epoch": 0.38295505117935025, "grad_norm": 1.162879598875113, "learning_rate": 7.077373053542922e-06, "loss": 0.5188, "step": 1721 }, { "epoch": 0.38317757009345793, "grad_norm": 1.168947171330643, "learning_rate": 7.074094689479911e-06, "loss": 0.53, "step": 1722 }, { "epoch": 0.38340008900756567, "grad_norm": 1.1343276313388033, "learning_rate": 7.070815248072476e-06, "loss": 0.5275, "step": 1723 }, { "epoch": 0.38362260792167335, "grad_norm": 1.2009326748852023, "learning_rate": 7.067534731024054e-06, "loss": 0.524, "step": 1724 }, { "epoch": 0.38384512683578104, "grad_norm": 1.1212440057813486, "learning_rate": 7.064253140038639e-06, "loss": 0.5318, "step": 1725 }, { "epoch": 0.3840676457498887, "grad_norm": 1.1549113344257909, "learning_rate": 7.060970476820783e-06, "loss": 0.5293, "step": 1726 }, { "epoch": 0.38429016466399646, "grad_norm": 1.1012898158060547, "learning_rate": 7.057686743075598e-06, "loss": 0.5285, "step": 1727 }, { "epoch": 0.38451268357810414, "grad_norm": 1.1673553850782115, "learning_rate": 7.054401940508748e-06, "loss": 0.5286, "step": 1728 }, { "epoch": 0.3847352024922118, "grad_norm": 1.1135342469758156, "learning_rate": 7.0511160708264545e-06, "loss": 0.5143, "step": 1729 }, { "epoch": 0.38495772140631956, "grad_norm": 1.8328517620916422, "learning_rate": 7.047829135735493e-06, "loss": 0.537, "step": 1730 }, { "epoch": 0.38518024032042725, "grad_norm": 1.1604141879050631, "learning_rate": 7.044541136943192e-06, "loss": 0.5214, "step": 1731 }, { "epoch": 0.38540275923453493, "grad_norm": 1.0681321775596262, "learning_rate": 7.041252076157431e-06, "loss": 0.5311, "step": 1732 }, { "epoch": 0.3856252781486426, "grad_norm": 1.144409340079055, "learning_rate": 7.037961955086645e-06, "loss": 0.5073, "step": 1733 }, { "epoch": 0.38584779706275035, "grad_norm": 1.1279129588782897, "learning_rate": 7.034670775439818e-06, "loss": 0.5316, "step": 1734 }, { "epoch": 0.38607031597685804, "grad_norm": 1.1885089245823006, "learning_rate": 7.031378538926481e-06, "loss": 0.5269, "step": 1735 }, { "epoch": 0.3862928348909657, "grad_norm": 1.1075963921459955, "learning_rate": 7.028085247256717e-06, "loss": 0.5292, "step": 1736 }, { "epoch": 0.38651535380507346, "grad_norm": 1.1811737623436664, "learning_rate": 7.024790902141157e-06, "loss": 0.5298, "step": 1737 }, { "epoch": 0.38673787271918114, "grad_norm": 1.1226978927573976, "learning_rate": 7.021495505290976e-06, "loss": 0.5169, "step": 1738 }, { "epoch": 0.3869603916332888, "grad_norm": 1.299464963722953, "learning_rate": 7.018199058417904e-06, "loss": 0.5221, "step": 1739 }, { "epoch": 0.3871829105473965, "grad_norm": 1.1600529994325706, "learning_rate": 7.014901563234204e-06, "loss": 0.5204, "step": 1740 }, { "epoch": 0.38740542946150425, "grad_norm": 1.1241769706631823, "learning_rate": 7.011603021452693e-06, "loss": 0.5098, "step": 1741 }, { "epoch": 0.38762794837561193, "grad_norm": 1.1832635645944862, "learning_rate": 7.0083034347867274e-06, "loss": 0.5157, "step": 1742 }, { "epoch": 0.3878504672897196, "grad_norm": 1.2599105117804072, "learning_rate": 7.005002804950209e-06, "loss": 0.5153, "step": 1743 }, { "epoch": 0.38807298620382735, "grad_norm": 1.2477850918362148, "learning_rate": 7.001701133657577e-06, "loss": 0.503, "step": 1744 }, { "epoch": 0.38829550511793504, "grad_norm": 1.1298851960798921, "learning_rate": 6.998398422623816e-06, "loss": 0.5131, "step": 1745 }, { "epoch": 0.3885180240320427, "grad_norm": 1.1003155595795953, "learning_rate": 6.995094673564451e-06, "loss": 0.529, "step": 1746 }, { "epoch": 0.3887405429461504, "grad_norm": 1.24208705908319, "learning_rate": 6.9917898881955395e-06, "loss": 0.5264, "step": 1747 }, { "epoch": 0.38896306186025814, "grad_norm": 1.1679299031547656, "learning_rate": 6.9884840682336865e-06, "loss": 0.525, "step": 1748 }, { "epoch": 0.3891855807743658, "grad_norm": 1.1868109443752957, "learning_rate": 6.985177215396028e-06, "loss": 0.5024, "step": 1749 }, { "epoch": 0.3894080996884735, "grad_norm": 1.1945182493631215, "learning_rate": 6.981869331400238e-06, "loss": 0.527, "step": 1750 }, { "epoch": 0.38963061860258125, "grad_norm": 1.2272484603042488, "learning_rate": 6.978560417964529e-06, "loss": 0.5109, "step": 1751 }, { "epoch": 0.38985313751668893, "grad_norm": 1.132448942775402, "learning_rate": 6.975250476807644e-06, "loss": 0.5168, "step": 1752 }, { "epoch": 0.3900756564307966, "grad_norm": 1.0849471481139579, "learning_rate": 6.97193950964886e-06, "loss": 0.5146, "step": 1753 }, { "epoch": 0.3902981753449043, "grad_norm": 1.2109717119545615, "learning_rate": 6.968627518207992e-06, "loss": 0.5251, "step": 1754 }, { "epoch": 0.39052069425901204, "grad_norm": 1.1759607886622407, "learning_rate": 6.965314504205382e-06, "loss": 0.5255, "step": 1755 }, { "epoch": 0.3907432131731197, "grad_norm": 1.1939269547251832, "learning_rate": 6.962000469361904e-06, "loss": 0.5317, "step": 1756 }, { "epoch": 0.3909657320872274, "grad_norm": 1.162799857549563, "learning_rate": 6.958685415398964e-06, "loss": 0.5304, "step": 1757 }, { "epoch": 0.39118825100133514, "grad_norm": 1.1389526812041217, "learning_rate": 6.955369344038495e-06, "loss": 0.514, "step": 1758 }, { "epoch": 0.3914107699154428, "grad_norm": 1.2417224593735245, "learning_rate": 6.952052257002961e-06, "loss": 0.5159, "step": 1759 }, { "epoch": 0.3916332888295505, "grad_norm": 1.1339172290095314, "learning_rate": 6.948734156015353e-06, "loss": 0.5082, "step": 1760 }, { "epoch": 0.3918558077436582, "grad_norm": 1.1607822507554002, "learning_rate": 6.945415042799187e-06, "loss": 0.526, "step": 1761 }, { "epoch": 0.39207832665776593, "grad_norm": 1.229006285469011, "learning_rate": 6.942094919078506e-06, "loss": 0.5449, "step": 1762 }, { "epoch": 0.3923008455718736, "grad_norm": 1.1492005999768713, "learning_rate": 6.938773786577877e-06, "loss": 0.5299, "step": 1763 }, { "epoch": 0.3925233644859813, "grad_norm": 1.153555498994226, "learning_rate": 6.935451647022394e-06, "loss": 0.5087, "step": 1764 }, { "epoch": 0.39274588340008904, "grad_norm": 1.2160394618842667, "learning_rate": 6.93212850213767e-06, "loss": 0.535, "step": 1765 }, { "epoch": 0.3929684023141967, "grad_norm": 1.209673013520563, "learning_rate": 6.9288043536498425e-06, "loss": 0.5313, "step": 1766 }, { "epoch": 0.3931909212283044, "grad_norm": 1.212553320075499, "learning_rate": 6.92547920328557e-06, "loss": 0.5308, "step": 1767 }, { "epoch": 0.3934134401424121, "grad_norm": 1.2229778173970576, "learning_rate": 6.922153052772029e-06, "loss": 0.522, "step": 1768 }, { "epoch": 0.3936359590565198, "grad_norm": 1.4313826457145618, "learning_rate": 6.918825903836921e-06, "loss": 0.5187, "step": 1769 }, { "epoch": 0.3938584779706275, "grad_norm": 1.1613931793857553, "learning_rate": 6.915497758208462e-06, "loss": 0.5229, "step": 1770 }, { "epoch": 0.3940809968847352, "grad_norm": 1.161954019800829, "learning_rate": 6.912168617615387e-06, "loss": 0.5208, "step": 1771 }, { "epoch": 0.3943035157988429, "grad_norm": 1.1617205687210053, "learning_rate": 6.908838483786944e-06, "loss": 0.5339, "step": 1772 }, { "epoch": 0.3945260347129506, "grad_norm": 1.165430719275128, "learning_rate": 6.9055073584529034e-06, "loss": 0.5176, "step": 1773 }, { "epoch": 0.3947485536270583, "grad_norm": 1.1847579837642055, "learning_rate": 6.902175243343546e-06, "loss": 0.5006, "step": 1774 }, { "epoch": 0.394971072541166, "grad_norm": 1.2253303362101968, "learning_rate": 6.89884214018967e-06, "loss": 0.5335, "step": 1775 }, { "epoch": 0.3951935914552737, "grad_norm": 1.2231108642498734, "learning_rate": 6.895508050722585e-06, "loss": 0.5172, "step": 1776 }, { "epoch": 0.3954161103693814, "grad_norm": 1.124897975485126, "learning_rate": 6.89217297667411e-06, "loss": 0.5215, "step": 1777 }, { "epoch": 0.3956386292834891, "grad_norm": 1.1539529463215144, "learning_rate": 6.888836919776582e-06, "loss": 0.5143, "step": 1778 }, { "epoch": 0.39586114819759677, "grad_norm": 1.468231300081614, "learning_rate": 6.885499881762841e-06, "loss": 0.5289, "step": 1779 }, { "epoch": 0.3960836671117045, "grad_norm": 1.1373605733279748, "learning_rate": 6.882161864366243e-06, "loss": 0.5214, "step": 1780 }, { "epoch": 0.3963061860258122, "grad_norm": 1.1573493802468233, "learning_rate": 6.878822869320652e-06, "loss": 0.5087, "step": 1781 }, { "epoch": 0.3965287049399199, "grad_norm": 1.2272815145506124, "learning_rate": 6.875482898360435e-06, "loss": 0.5265, "step": 1782 }, { "epoch": 0.3967512238540276, "grad_norm": 1.1613844266245108, "learning_rate": 6.872141953220468e-06, "loss": 0.5277, "step": 1783 }, { "epoch": 0.3969737427681353, "grad_norm": 1.2062589496921607, "learning_rate": 6.8688000356361374e-06, "loss": 0.5401, "step": 1784 }, { "epoch": 0.397196261682243, "grad_norm": 1.0875113843477873, "learning_rate": 6.865457147343331e-06, "loss": 0.5216, "step": 1785 }, { "epoch": 0.39741878059635066, "grad_norm": 1.236549821817686, "learning_rate": 6.862113290078438e-06, "loss": 0.5205, "step": 1786 }, { "epoch": 0.3976412995104584, "grad_norm": 1.3404029847495922, "learning_rate": 6.858768465578356e-06, "loss": 0.519, "step": 1787 }, { "epoch": 0.3978638184245661, "grad_norm": 1.1411532675352578, "learning_rate": 6.855422675580484e-06, "loss": 0.5092, "step": 1788 }, { "epoch": 0.39808633733867377, "grad_norm": 1.1996246896848606, "learning_rate": 6.85207592182272e-06, "loss": 0.516, "step": 1789 }, { "epoch": 0.3983088562527815, "grad_norm": 1.099532489730166, "learning_rate": 6.848728206043463e-06, "loss": 0.5158, "step": 1790 }, { "epoch": 0.3985313751668892, "grad_norm": 1.2527533031154086, "learning_rate": 6.845379529981616e-06, "loss": 0.523, "step": 1791 }, { "epoch": 0.3987538940809969, "grad_norm": 1.2027981955001394, "learning_rate": 6.842029895376576e-06, "loss": 0.5114, "step": 1792 }, { "epoch": 0.39897641299510456, "grad_norm": 1.1430096004978763, "learning_rate": 6.838679303968239e-06, "loss": 0.5165, "step": 1793 }, { "epoch": 0.3991989319092123, "grad_norm": 1.172129561559509, "learning_rate": 6.835327757497e-06, "loss": 0.5202, "step": 1794 }, { "epoch": 0.39942145082332, "grad_norm": 1.2810274902820262, "learning_rate": 6.831975257703748e-06, "loss": 0.5187, "step": 1795 }, { "epoch": 0.39964396973742766, "grad_norm": 1.1909658196026855, "learning_rate": 6.828621806329867e-06, "loss": 0.5187, "step": 1796 }, { "epoch": 0.3998664886515354, "grad_norm": 1.1337952610904087, "learning_rate": 6.825267405117235e-06, "loss": 0.5118, "step": 1797 }, { "epoch": 0.4000890075656431, "grad_norm": 1.161183460369558, "learning_rate": 6.821912055808229e-06, "loss": 0.5095, "step": 1798 }, { "epoch": 0.40031152647975077, "grad_norm": 1.1401970801216375, "learning_rate": 6.818555760145709e-06, "loss": 0.5103, "step": 1799 }, { "epoch": 0.40053404539385845, "grad_norm": 1.2042370349418061, "learning_rate": 6.815198519873033e-06, "loss": 0.5258, "step": 1800 }, { "epoch": 0.4007565643079662, "grad_norm": 1.3467789526371274, "learning_rate": 6.81184033673405e-06, "loss": 0.5105, "step": 1801 }, { "epoch": 0.4009790832220739, "grad_norm": 1.163605893253319, "learning_rate": 6.808481212473096e-06, "loss": 0.5033, "step": 1802 }, { "epoch": 0.40120160213618156, "grad_norm": 1.1585224269608958, "learning_rate": 6.805121148834994e-06, "loss": 0.4969, "step": 1803 }, { "epoch": 0.4014241210502893, "grad_norm": 1.2170047469438974, "learning_rate": 6.801760147565061e-06, "loss": 0.517, "step": 1804 }, { "epoch": 0.401646639964397, "grad_norm": 1.201205503204009, "learning_rate": 6.7983982104090975e-06, "loss": 0.5327, "step": 1805 }, { "epoch": 0.40186915887850466, "grad_norm": 1.305074362507626, "learning_rate": 6.795035339113387e-06, "loss": 0.5091, "step": 1806 }, { "epoch": 0.40209167779261235, "grad_norm": 1.155643746840145, "learning_rate": 6.791671535424707e-06, "loss": 0.5329, "step": 1807 }, { "epoch": 0.4023141967067201, "grad_norm": 1.1294532243522901, "learning_rate": 6.78830680109031e-06, "loss": 0.5141, "step": 1808 }, { "epoch": 0.40253671562082777, "grad_norm": 1.2268470666702658, "learning_rate": 6.784941137857935e-06, "loss": 0.5241, "step": 1809 }, { "epoch": 0.40275923453493545, "grad_norm": 1.2473924909163994, "learning_rate": 6.781574547475807e-06, "loss": 0.5408, "step": 1810 }, { "epoch": 0.4029817534490432, "grad_norm": 1.1874598803929461, "learning_rate": 6.7782070316926285e-06, "loss": 0.5273, "step": 1811 }, { "epoch": 0.4032042723631509, "grad_norm": 1.1693795384457977, "learning_rate": 6.774838592257583e-06, "loss": 0.533, "step": 1812 }, { "epoch": 0.40342679127725856, "grad_norm": 1.134938728201609, "learning_rate": 6.771469230920339e-06, "loss": 0.5339, "step": 1813 }, { "epoch": 0.40364931019136624, "grad_norm": 1.1967720362410472, "learning_rate": 6.768098949431035e-06, "loss": 0.5227, "step": 1814 }, { "epoch": 0.403871829105474, "grad_norm": 1.157545845494928, "learning_rate": 6.764727749540293e-06, "loss": 0.5196, "step": 1815 }, { "epoch": 0.40409434801958166, "grad_norm": 1.0931981305964604, "learning_rate": 6.761355632999214e-06, "loss": 0.5087, "step": 1816 }, { "epoch": 0.40431686693368935, "grad_norm": 1.0875713049177915, "learning_rate": 6.757982601559369e-06, "loss": 0.526, "step": 1817 }, { "epoch": 0.4045393858477971, "grad_norm": 1.2145529093463887, "learning_rate": 6.754608656972809e-06, "loss": 0.5278, "step": 1818 }, { "epoch": 0.40476190476190477, "grad_norm": 1.236692973552246, "learning_rate": 6.751233800992059e-06, "loss": 0.5224, "step": 1819 }, { "epoch": 0.40498442367601245, "grad_norm": 1.18910158394634, "learning_rate": 6.747858035370112e-06, "loss": 0.5223, "step": 1820 }, { "epoch": 0.40520694259012013, "grad_norm": 1.14203402667648, "learning_rate": 6.744481361860442e-06, "loss": 0.539, "step": 1821 }, { "epoch": 0.4054294615042279, "grad_norm": 1.160780310592781, "learning_rate": 6.74110378221699e-06, "loss": 0.5101, "step": 1822 }, { "epoch": 0.40565198041833556, "grad_norm": 1.1843284144453898, "learning_rate": 6.737725298194166e-06, "loss": 0.5102, "step": 1823 }, { "epoch": 0.40587449933244324, "grad_norm": 1.1806570971027277, "learning_rate": 6.734345911546853e-06, "loss": 0.5213, "step": 1824 }, { "epoch": 0.406097018246551, "grad_norm": 1.169954121993187, "learning_rate": 6.730965624030401e-06, "loss": 0.5378, "step": 1825 }, { "epoch": 0.40631953716065866, "grad_norm": 1.2752268527232766, "learning_rate": 6.72758443740063e-06, "loss": 0.5207, "step": 1826 }, { "epoch": 0.40654205607476634, "grad_norm": 1.1823828606237707, "learning_rate": 6.724202353413823e-06, "loss": 0.5305, "step": 1827 }, { "epoch": 0.40676457498887403, "grad_norm": 1.147560624073833, "learning_rate": 6.720819373826735e-06, "loss": 0.5202, "step": 1828 }, { "epoch": 0.40698709390298177, "grad_norm": 5.248899035479289, "learning_rate": 6.71743550039658e-06, "loss": 0.5169, "step": 1829 }, { "epoch": 0.40720961281708945, "grad_norm": 1.270876562679729, "learning_rate": 6.714050734881038e-06, "loss": 0.5356, "step": 1830 }, { "epoch": 0.40743213173119713, "grad_norm": 1.1082662068392277, "learning_rate": 6.710665079038258e-06, "loss": 0.5129, "step": 1831 }, { "epoch": 0.4076546506453049, "grad_norm": 1.203852197405917, "learning_rate": 6.707278534626845e-06, "loss": 0.5218, "step": 1832 }, { "epoch": 0.40787716955941256, "grad_norm": 1.1814986116521442, "learning_rate": 6.703891103405866e-06, "loss": 0.5236, "step": 1833 }, { "epoch": 0.40809968847352024, "grad_norm": 1.1919791147055825, "learning_rate": 6.7005027871348516e-06, "loss": 0.5133, "step": 1834 }, { "epoch": 0.4083222073876279, "grad_norm": 1.1734749467608292, "learning_rate": 6.69711358757379e-06, "loss": 0.5238, "step": 1835 }, { "epoch": 0.40854472630173566, "grad_norm": 1.2097147133170842, "learning_rate": 6.693723506483132e-06, "loss": 0.5113, "step": 1836 }, { "epoch": 0.40876724521584334, "grad_norm": 1.131294170361713, "learning_rate": 6.690332545623778e-06, "loss": 0.5343, "step": 1837 }, { "epoch": 0.40898976412995103, "grad_norm": 1.2077328373662284, "learning_rate": 6.6869407067570945e-06, "loss": 0.5145, "step": 1838 }, { "epoch": 0.40921228304405877, "grad_norm": 1.1528044694417858, "learning_rate": 6.6835479916448984e-06, "loss": 0.522, "step": 1839 }, { "epoch": 0.40943480195816645, "grad_norm": 1.077731782688218, "learning_rate": 6.680154402049464e-06, "loss": 0.5012, "step": 1840 }, { "epoch": 0.40965732087227413, "grad_norm": 1.1340763373134584, "learning_rate": 6.6767599397335185e-06, "loss": 0.5223, "step": 1841 }, { "epoch": 0.4098798397863818, "grad_norm": 1.1072819573146984, "learning_rate": 6.673364606460246e-06, "loss": 0.5181, "step": 1842 }, { "epoch": 0.41010235870048956, "grad_norm": 1.1414709136715198, "learning_rate": 6.669968403993275e-06, "loss": 0.5204, "step": 1843 }, { "epoch": 0.41032487761459724, "grad_norm": 1.1416356612789058, "learning_rate": 6.666571334096697e-06, "loss": 0.5269, "step": 1844 }, { "epoch": 0.4105473965287049, "grad_norm": 1.302766139838582, "learning_rate": 6.663173398535043e-06, "loss": 0.5265, "step": 1845 }, { "epoch": 0.41076991544281266, "grad_norm": 1.1817551637545431, "learning_rate": 6.6597745990733006e-06, "loss": 0.5117, "step": 1846 }, { "epoch": 0.41099243435692034, "grad_norm": 1.7615031068309386, "learning_rate": 6.656374937476904e-06, "loss": 0.5021, "step": 1847 }, { "epoch": 0.411214953271028, "grad_norm": 1.3086092656495842, "learning_rate": 6.652974415511735e-06, "loss": 0.5239, "step": 1848 }, { "epoch": 0.4114374721851357, "grad_norm": 1.2025258441370947, "learning_rate": 6.649573034944122e-06, "loss": 0.5292, "step": 1849 }, { "epoch": 0.41165999109924345, "grad_norm": 1.18816774284474, "learning_rate": 6.64617079754084e-06, "loss": 0.5246, "step": 1850 }, { "epoch": 0.41188251001335113, "grad_norm": 1.2523963055472738, "learning_rate": 6.642767705069109e-06, "loss": 0.5241, "step": 1851 }, { "epoch": 0.4121050289274588, "grad_norm": 1.2902001957865945, "learning_rate": 6.639363759296592e-06, "loss": 0.5325, "step": 1852 }, { "epoch": 0.41232754784156656, "grad_norm": 1.229357835993306, "learning_rate": 6.635958961991399e-06, "loss": 0.5156, "step": 1853 }, { "epoch": 0.41255006675567424, "grad_norm": 1.1333092950625565, "learning_rate": 6.632553314922078e-06, "loss": 0.5191, "step": 1854 }, { "epoch": 0.4127725856697819, "grad_norm": 1.2031750789501148, "learning_rate": 6.6291468198576195e-06, "loss": 0.5318, "step": 1855 }, { "epoch": 0.4129951045838896, "grad_norm": 1.172450494492786, "learning_rate": 6.6257394785674555e-06, "loss": 0.5256, "step": 1856 }, { "epoch": 0.41321762349799734, "grad_norm": 1.2387112670836777, "learning_rate": 6.622331292821458e-06, "loss": 0.5052, "step": 1857 }, { "epoch": 0.413440142412105, "grad_norm": 1.2153151621881584, "learning_rate": 6.6189222643899356e-06, "loss": 0.5161, "step": 1858 }, { "epoch": 0.4136626613262127, "grad_norm": 1.285321749329093, "learning_rate": 6.615512395043635e-06, "loss": 0.5131, "step": 1859 }, { "epoch": 0.41388518024032045, "grad_norm": 1.1855665747620339, "learning_rate": 6.612101686553742e-06, "loss": 0.5025, "step": 1860 }, { "epoch": 0.41410769915442813, "grad_norm": 1.1184532517546613, "learning_rate": 6.608690140691876e-06, "loss": 0.5176, "step": 1861 }, { "epoch": 0.4143302180685358, "grad_norm": 1.1971376282624133, "learning_rate": 6.605277759230092e-06, "loss": 0.5335, "step": 1862 }, { "epoch": 0.4145527369826435, "grad_norm": 1.2161389991862193, "learning_rate": 6.60186454394088e-06, "loss": 0.5222, "step": 1863 }, { "epoch": 0.41477525589675124, "grad_norm": 1.1892046123316438, "learning_rate": 6.5984504965971595e-06, "loss": 0.5369, "step": 1864 }, { "epoch": 0.4149977748108589, "grad_norm": 1.1424187458931783, "learning_rate": 6.595035618972288e-06, "loss": 0.5241, "step": 1865 }, { "epoch": 0.4152202937249666, "grad_norm": 1.2649658435642728, "learning_rate": 6.591619912840048e-06, "loss": 0.5189, "step": 1866 }, { "epoch": 0.41544281263907434, "grad_norm": 1.1819089927083204, "learning_rate": 6.588203379974658e-06, "loss": 0.5117, "step": 1867 }, { "epoch": 0.415665331553182, "grad_norm": 1.1560399520973912, "learning_rate": 6.584786022150765e-06, "loss": 0.5341, "step": 1868 }, { "epoch": 0.4158878504672897, "grad_norm": 1.2602977924295269, "learning_rate": 6.581367841143438e-06, "loss": 0.5154, "step": 1869 }, { "epoch": 0.4161103693813974, "grad_norm": 1.1381195391361603, "learning_rate": 6.577948838728183e-06, "loss": 0.5096, "step": 1870 }, { "epoch": 0.41633288829550513, "grad_norm": 1.2075480884201244, "learning_rate": 6.5745290166809236e-06, "loss": 0.5189, "step": 1871 }, { "epoch": 0.4165554072096128, "grad_norm": 1.1838136900142304, "learning_rate": 6.571108376778017e-06, "loss": 0.5162, "step": 1872 }, { "epoch": 0.4167779261237205, "grad_norm": 1.198758129512039, "learning_rate": 6.567686920796244e-06, "loss": 0.5157, "step": 1873 }, { "epoch": 0.41700044503782824, "grad_norm": 1.1187341305096739, "learning_rate": 6.564264650512802e-06, "loss": 0.488, "step": 1874 }, { "epoch": 0.4172229639519359, "grad_norm": 1.1509822238608094, "learning_rate": 6.56084156770532e-06, "loss": 0.5124, "step": 1875 }, { "epoch": 0.4174454828660436, "grad_norm": 1.1851633185860004, "learning_rate": 6.557417674151842e-06, "loss": 0.5235, "step": 1876 }, { "epoch": 0.4176680017801513, "grad_norm": 1.183584868508725, "learning_rate": 6.5539929716308425e-06, "loss": 0.5073, "step": 1877 }, { "epoch": 0.417890520694259, "grad_norm": 1.3092055136968885, "learning_rate": 6.550567461921207e-06, "loss": 0.5282, "step": 1878 }, { "epoch": 0.4181130396083667, "grad_norm": 1.2171668170893717, "learning_rate": 6.547141146802245e-06, "loss": 0.5239, "step": 1879 }, { "epoch": 0.4183355585224744, "grad_norm": 1.1640951477467374, "learning_rate": 6.54371402805368e-06, "loss": 0.5054, "step": 1880 }, { "epoch": 0.41855807743658213, "grad_norm": 1.3169907401482173, "learning_rate": 6.54028610745566e-06, "loss": 0.5179, "step": 1881 }, { "epoch": 0.4187805963506898, "grad_norm": 1.1040814183963419, "learning_rate": 6.536857386788743e-06, "loss": 0.5142, "step": 1882 }, { "epoch": 0.4190031152647975, "grad_norm": 1.1978672520694051, "learning_rate": 6.533427867833906e-06, "loss": 0.5169, "step": 1883 }, { "epoch": 0.4192256341789052, "grad_norm": 1.2382158657258175, "learning_rate": 6.529997552372541e-06, "loss": 0.5175, "step": 1884 }, { "epoch": 0.4194481530930129, "grad_norm": 1.2578343362894653, "learning_rate": 6.526566442186451e-06, "loss": 0.5162, "step": 1885 }, { "epoch": 0.4196706720071206, "grad_norm": 1.1330170625411853, "learning_rate": 6.523134539057853e-06, "loss": 0.5114, "step": 1886 }, { "epoch": 0.4198931909212283, "grad_norm": 1.528806196286805, "learning_rate": 6.519701844769376e-06, "loss": 0.5143, "step": 1887 }, { "epoch": 0.420115709835336, "grad_norm": 1.1789987577126537, "learning_rate": 6.516268361104062e-06, "loss": 0.5016, "step": 1888 }, { "epoch": 0.4203382287494437, "grad_norm": 1.1605730566880375, "learning_rate": 6.512834089845359e-06, "loss": 0.5202, "step": 1889 }, { "epoch": 0.4205607476635514, "grad_norm": 1.1322494013669417, "learning_rate": 6.509399032777129e-06, "loss": 0.5112, "step": 1890 }, { "epoch": 0.4207832665776591, "grad_norm": 1.1590626985815995, "learning_rate": 6.505963191683636e-06, "loss": 0.5157, "step": 1891 }, { "epoch": 0.4210057854917668, "grad_norm": 1.2250583228878071, "learning_rate": 6.502526568349555e-06, "loss": 0.511, "step": 1892 }, { "epoch": 0.4212283044058745, "grad_norm": 1.203940014626715, "learning_rate": 6.49908916455997e-06, "loss": 0.5141, "step": 1893 }, { "epoch": 0.4214508233199822, "grad_norm": 1.195482360351126, "learning_rate": 6.495650982100368e-06, "loss": 0.517, "step": 1894 }, { "epoch": 0.4216733422340899, "grad_norm": 1.203855935748383, "learning_rate": 6.492212022756634e-06, "loss": 0.5224, "step": 1895 }, { "epoch": 0.4218958611481976, "grad_norm": 1.2170157029228466, "learning_rate": 6.488772288315067e-06, "loss": 0.5233, "step": 1896 }, { "epoch": 0.4221183800623053, "grad_norm": 1.200360253581169, "learning_rate": 6.485331780562363e-06, "loss": 0.5264, "step": 1897 }, { "epoch": 0.42234089897641297, "grad_norm": 1.159701819229567, "learning_rate": 6.481890501285619e-06, "loss": 0.5322, "step": 1898 }, { "epoch": 0.4225634178905207, "grad_norm": 1.124784440909848, "learning_rate": 6.478448452272339e-06, "loss": 0.5073, "step": 1899 }, { "epoch": 0.4227859368046284, "grad_norm": 1.2474451176097376, "learning_rate": 6.475005635310417e-06, "loss": 0.5109, "step": 1900 }, { "epoch": 0.4230084557187361, "grad_norm": 1.1251590363347443, "learning_rate": 6.471562052188154e-06, "loss": 0.5214, "step": 1901 }, { "epoch": 0.4232309746328438, "grad_norm": 1.1014054277515195, "learning_rate": 6.468117704694244e-06, "loss": 0.5254, "step": 1902 }, { "epoch": 0.4234534935469515, "grad_norm": 1.1645754367101437, "learning_rate": 6.464672594617784e-06, "loss": 0.53, "step": 1903 }, { "epoch": 0.4236760124610592, "grad_norm": 1.1277458844426995, "learning_rate": 6.4612267237482584e-06, "loss": 0.5253, "step": 1904 }, { "epoch": 0.42389853137516686, "grad_norm": 1.2697808585821304, "learning_rate": 6.457780093875555e-06, "loss": 0.5272, "step": 1905 }, { "epoch": 0.4241210502892746, "grad_norm": 1.1636015084578433, "learning_rate": 6.454332706789952e-06, "loss": 0.5092, "step": 1906 }, { "epoch": 0.4243435692033823, "grad_norm": 1.1889683454704827, "learning_rate": 6.45088456428212e-06, "loss": 0.514, "step": 1907 }, { "epoch": 0.42456608811748997, "grad_norm": 1.1382041773874327, "learning_rate": 6.447435668143124e-06, "loss": 0.5227, "step": 1908 }, { "epoch": 0.4247886070315977, "grad_norm": 1.2052574594766399, "learning_rate": 6.443986020164421e-06, "loss": 0.5121, "step": 1909 }, { "epoch": 0.4250111259457054, "grad_norm": 1.2594335007629736, "learning_rate": 6.4405356221378566e-06, "loss": 0.4873, "step": 1910 }, { "epoch": 0.4252336448598131, "grad_norm": 1.2427105984707891, "learning_rate": 6.437084475855665e-06, "loss": 0.5303, "step": 1911 }, { "epoch": 0.42545616377392076, "grad_norm": 1.1109598662922955, "learning_rate": 6.433632583110474e-06, "loss": 0.5209, "step": 1912 }, { "epoch": 0.4256786826880285, "grad_norm": 1.1880325398925158, "learning_rate": 6.430179945695293e-06, "loss": 0.5105, "step": 1913 }, { "epoch": 0.4259012016021362, "grad_norm": 1.1882544992972774, "learning_rate": 6.426726565403523e-06, "loss": 0.5274, "step": 1914 }, { "epoch": 0.42612372051624386, "grad_norm": 1.1959883508225804, "learning_rate": 6.423272444028949e-06, "loss": 0.5085, "step": 1915 }, { "epoch": 0.4263462394303516, "grad_norm": 1.2185265391731523, "learning_rate": 6.419817583365739e-06, "loss": 0.512, "step": 1916 }, { "epoch": 0.4265687583444593, "grad_norm": 1.17421929131693, "learning_rate": 6.41636198520845e-06, "loss": 0.512, "step": 1917 }, { "epoch": 0.42679127725856697, "grad_norm": 1.170563829327419, "learning_rate": 6.412905651352016e-06, "loss": 0.5055, "step": 1918 }, { "epoch": 0.42701379617267465, "grad_norm": 1.1898295548532947, "learning_rate": 6.40944858359176e-06, "loss": 0.5127, "step": 1919 }, { "epoch": 0.4272363150867824, "grad_norm": 1.1489063318862027, "learning_rate": 6.4059907837233774e-06, "loss": 0.5227, "step": 1920 }, { "epoch": 0.4274588340008901, "grad_norm": 1.1163582249158317, "learning_rate": 6.402532253542953e-06, "loss": 0.5063, "step": 1921 }, { "epoch": 0.42768135291499776, "grad_norm": 1.264661837850464, "learning_rate": 6.399072994846947e-06, "loss": 0.4914, "step": 1922 }, { "epoch": 0.4279038718291055, "grad_norm": 1.1896854015298157, "learning_rate": 6.395613009432191e-06, "loss": 0.5202, "step": 1923 }, { "epoch": 0.4281263907432132, "grad_norm": 1.2403815842995207, "learning_rate": 6.392152299095911e-06, "loss": 0.5097, "step": 1924 }, { "epoch": 0.42834890965732086, "grad_norm": 1.204582491009032, "learning_rate": 6.388690865635694e-06, "loss": 0.5049, "step": 1925 }, { "epoch": 0.42857142857142855, "grad_norm": 1.2755666966935126, "learning_rate": 6.385228710849507e-06, "loss": 0.5095, "step": 1926 }, { "epoch": 0.4287939474855363, "grad_norm": 1.1839711328860663, "learning_rate": 6.3817658365356964e-06, "loss": 0.5102, "step": 1927 }, { "epoch": 0.42901646639964397, "grad_norm": 1.2163088034499965, "learning_rate": 6.3783022444929745e-06, "loss": 0.492, "step": 1928 }, { "epoch": 0.42923898531375165, "grad_norm": 1.263267280079711, "learning_rate": 6.374837936520434e-06, "loss": 0.5161, "step": 1929 }, { "epoch": 0.4294615042278594, "grad_norm": 1.2201256055506648, "learning_rate": 6.371372914417535e-06, "loss": 0.5051, "step": 1930 }, { "epoch": 0.4296840231419671, "grad_norm": 1.3221955061949984, "learning_rate": 6.3679071799841095e-06, "loss": 0.5306, "step": 1931 }, { "epoch": 0.42990654205607476, "grad_norm": 1.1942525903559316, "learning_rate": 6.36444073502036e-06, "loss": 0.5145, "step": 1932 }, { "epoch": 0.43012906097018244, "grad_norm": 1.1749930869075063, "learning_rate": 6.360973581326857e-06, "loss": 0.5272, "step": 1933 }, { "epoch": 0.4303515798842902, "grad_norm": 1.2908347846130959, "learning_rate": 6.35750572070454e-06, "loss": 0.5171, "step": 1934 }, { "epoch": 0.43057409879839786, "grad_norm": 1.2268375608381719, "learning_rate": 6.354037154954715e-06, "loss": 0.5066, "step": 1935 }, { "epoch": 0.43079661771250555, "grad_norm": 1.5190952367127177, "learning_rate": 6.350567885879058e-06, "loss": 0.5179, "step": 1936 }, { "epoch": 0.4310191366266133, "grad_norm": 1.2147713723531648, "learning_rate": 6.347097915279603e-06, "loss": 0.5255, "step": 1937 }, { "epoch": 0.43124165554072097, "grad_norm": 1.1919985805743498, "learning_rate": 6.343627244958755e-06, "loss": 0.5025, "step": 1938 }, { "epoch": 0.43146417445482865, "grad_norm": 1.1742153729381135, "learning_rate": 6.340155876719276e-06, "loss": 0.5232, "step": 1939 }, { "epoch": 0.43168669336893634, "grad_norm": 1.2221672445402332, "learning_rate": 6.336683812364301e-06, "loss": 0.5187, "step": 1940 }, { "epoch": 0.4319092122830441, "grad_norm": 1.2664396265121933, "learning_rate": 6.333211053697316e-06, "loss": 0.5123, "step": 1941 }, { "epoch": 0.43213173119715176, "grad_norm": 1.3761645897398076, "learning_rate": 6.3297376025221725e-06, "loss": 0.5234, "step": 1942 }, { "epoch": 0.43235425011125944, "grad_norm": 1.128223427079444, "learning_rate": 6.326263460643081e-06, "loss": 0.5271, "step": 1943 }, { "epoch": 0.4325767690253672, "grad_norm": 1.1155206417351629, "learning_rate": 6.322788629864609e-06, "loss": 0.5005, "step": 1944 }, { "epoch": 0.43279928793947486, "grad_norm": 1.1774162540343156, "learning_rate": 6.3193131119916854e-06, "loss": 0.501, "step": 1945 }, { "epoch": 0.43302180685358255, "grad_norm": 1.1201940927800458, "learning_rate": 6.315836908829594e-06, "loss": 0.5181, "step": 1946 }, { "epoch": 0.43324432576769023, "grad_norm": 1.2219259896637955, "learning_rate": 6.3123600221839755e-06, "loss": 0.5088, "step": 1947 }, { "epoch": 0.43346684468179797, "grad_norm": 1.1783438774105484, "learning_rate": 6.308882453860821e-06, "loss": 0.5059, "step": 1948 }, { "epoch": 0.43368936359590565, "grad_norm": 1.228254078569224, "learning_rate": 6.305404205666484e-06, "loss": 0.4976, "step": 1949 }, { "epoch": 0.43391188251001334, "grad_norm": 1.2133818224948558, "learning_rate": 6.301925279407666e-06, "loss": 0.5078, "step": 1950 }, { "epoch": 0.4341344014241211, "grad_norm": 1.3168039527396576, "learning_rate": 6.298445676891418e-06, "loss": 0.5183, "step": 1951 }, { "epoch": 0.43435692033822876, "grad_norm": 1.2548366127882546, "learning_rate": 6.294965399925149e-06, "loss": 0.5034, "step": 1952 }, { "epoch": 0.43457943925233644, "grad_norm": 1.2610347217368383, "learning_rate": 6.291484450316614e-06, "loss": 0.5098, "step": 1953 }, { "epoch": 0.4348019581664441, "grad_norm": 1.376331206915901, "learning_rate": 6.288002829873916e-06, "loss": 0.5256, "step": 1954 }, { "epoch": 0.43502447708055186, "grad_norm": 1.21369170599055, "learning_rate": 6.284520540405513e-06, "loss": 0.5084, "step": 1955 }, { "epoch": 0.43524699599465955, "grad_norm": 1.2753950650067114, "learning_rate": 6.281037583720204e-06, "loss": 0.5317, "step": 1956 }, { "epoch": 0.43546951490876723, "grad_norm": 1.1991844056143741, "learning_rate": 6.2775539616271364e-06, "loss": 0.521, "step": 1957 }, { "epoch": 0.43569203382287497, "grad_norm": 1.2438282520569532, "learning_rate": 6.2740696759358045e-06, "loss": 0.4998, "step": 1958 }, { "epoch": 0.43591455273698265, "grad_norm": 1.1800468618220816, "learning_rate": 6.270584728456046e-06, "loss": 0.5087, "step": 1959 }, { "epoch": 0.43613707165109034, "grad_norm": 1.1516599912505816, "learning_rate": 6.267099120998043e-06, "loss": 0.5079, "step": 1960 }, { "epoch": 0.436359590565198, "grad_norm": 1.116139236716566, "learning_rate": 6.263612855372321e-06, "loss": 0.5029, "step": 1961 }, { "epoch": 0.43658210947930576, "grad_norm": 1.1480707112477924, "learning_rate": 6.260125933389745e-06, "loss": 0.5121, "step": 1962 }, { "epoch": 0.43680462839341344, "grad_norm": 1.1540195681637464, "learning_rate": 6.256638356861524e-06, "loss": 0.5013, "step": 1963 }, { "epoch": 0.4370271473075211, "grad_norm": 1.152100373292905, "learning_rate": 6.2531501275992035e-06, "loss": 0.5152, "step": 1964 }, { "epoch": 0.43724966622162886, "grad_norm": 1.2008481018248844, "learning_rate": 6.249661247414674e-06, "loss": 0.5224, "step": 1965 }, { "epoch": 0.43747218513573655, "grad_norm": 1.2933406986746103, "learning_rate": 6.2461717181201556e-06, "loss": 0.5193, "step": 1966 }, { "epoch": 0.43769470404984423, "grad_norm": 1.1134662078014212, "learning_rate": 6.242681541528214e-06, "loss": 0.5067, "step": 1967 }, { "epoch": 0.4379172229639519, "grad_norm": 1.2826621575702066, "learning_rate": 6.239190719451746e-06, "loss": 0.537, "step": 1968 }, { "epoch": 0.43813974187805965, "grad_norm": 1.2263499278532928, "learning_rate": 6.235699253703983e-06, "loss": 0.503, "step": 1969 }, { "epoch": 0.43836226079216734, "grad_norm": 1.1368378742833056, "learning_rate": 6.232207146098494e-06, "loss": 0.5124, "step": 1970 }, { "epoch": 0.438584779706275, "grad_norm": 1.187252328645729, "learning_rate": 6.2287143984491825e-06, "loss": 0.5206, "step": 1971 }, { "epoch": 0.43880729862038276, "grad_norm": 1.167768405618748, "learning_rate": 6.225221012570278e-06, "loss": 0.5066, "step": 1972 }, { "epoch": 0.43902981753449044, "grad_norm": 1.2310577753842464, "learning_rate": 6.221726990276349e-06, "loss": 0.5093, "step": 1973 }, { "epoch": 0.4392523364485981, "grad_norm": 1.1988246758019503, "learning_rate": 6.218232333382288e-06, "loss": 0.5013, "step": 1974 }, { "epoch": 0.4394748553627058, "grad_norm": 1.2402644797840034, "learning_rate": 6.2147370437033226e-06, "loss": 0.5395, "step": 1975 }, { "epoch": 0.43969737427681355, "grad_norm": 1.1810949355811466, "learning_rate": 6.211241123055007e-06, "loss": 0.4892, "step": 1976 }, { "epoch": 0.43991989319092123, "grad_norm": 1.2113668700954374, "learning_rate": 6.207744573253223e-06, "loss": 0.5054, "step": 1977 }, { "epoch": 0.4401424121050289, "grad_norm": 1.315092135638533, "learning_rate": 6.204247396114177e-06, "loss": 0.519, "step": 1978 }, { "epoch": 0.44036493101913665, "grad_norm": 1.1955627124537038, "learning_rate": 6.200749593454405e-06, "loss": 0.5102, "step": 1979 }, { "epoch": 0.44058744993324434, "grad_norm": 1.217264240540597, "learning_rate": 6.197251167090764e-06, "loss": 0.5229, "step": 1980 }, { "epoch": 0.440809968847352, "grad_norm": 1.2646265772162544, "learning_rate": 6.193752118840441e-06, "loss": 0.4975, "step": 1981 }, { "epoch": 0.4410324877614597, "grad_norm": 1.1818150967453287, "learning_rate": 6.19025245052094e-06, "loss": 0.5058, "step": 1982 }, { "epoch": 0.44125500667556744, "grad_norm": 1.181819151040224, "learning_rate": 6.18675216395009e-06, "loss": 0.518, "step": 1983 }, { "epoch": 0.4414775255896751, "grad_norm": 1.1505640867936717, "learning_rate": 6.18325126094604e-06, "loss": 0.5134, "step": 1984 }, { "epoch": 0.4417000445037828, "grad_norm": 1.149833604091292, "learning_rate": 6.179749743327258e-06, "loss": 0.5132, "step": 1985 }, { "epoch": 0.44192256341789055, "grad_norm": 1.1371608230624766, "learning_rate": 6.176247612912537e-06, "loss": 0.5176, "step": 1986 }, { "epoch": 0.44214508233199823, "grad_norm": 1.2593791016460991, "learning_rate": 6.172744871520983e-06, "loss": 0.5124, "step": 1987 }, { "epoch": 0.4423676012461059, "grad_norm": 1.3091102493375537, "learning_rate": 6.169241520972017e-06, "loss": 0.5205, "step": 1988 }, { "epoch": 0.4425901201602136, "grad_norm": 1.1928160984012472, "learning_rate": 6.165737563085384e-06, "loss": 0.5056, "step": 1989 }, { "epoch": 0.44281263907432133, "grad_norm": 1.2182998903928997, "learning_rate": 6.162232999681139e-06, "loss": 0.5158, "step": 1990 }, { "epoch": 0.443035157988429, "grad_norm": 1.219818879257823, "learning_rate": 6.158727832579653e-06, "loss": 0.4999, "step": 1991 }, { "epoch": 0.4432576769025367, "grad_norm": 1.2332754573652787, "learning_rate": 6.155222063601611e-06, "loss": 0.5187, "step": 1992 }, { "epoch": 0.44348019581664444, "grad_norm": 1.2866146910054994, "learning_rate": 6.151715694568012e-06, "loss": 0.5041, "step": 1993 }, { "epoch": 0.4437027147307521, "grad_norm": 1.219989966815726, "learning_rate": 6.14820872730016e-06, "loss": 0.5221, "step": 1994 }, { "epoch": 0.4439252336448598, "grad_norm": 1.098860524967539, "learning_rate": 6.144701163619678e-06, "loss": 0.5028, "step": 1995 }, { "epoch": 0.4441477525589675, "grad_norm": 1.4724908024476782, "learning_rate": 6.141193005348497e-06, "loss": 0.4982, "step": 1996 }, { "epoch": 0.44437027147307523, "grad_norm": 1.2433938045712043, "learning_rate": 6.1376842543088515e-06, "loss": 0.5282, "step": 1997 }, { "epoch": 0.4445927903871829, "grad_norm": 1.1929869098606416, "learning_rate": 6.1341749123232906e-06, "loss": 0.5102, "step": 1998 }, { "epoch": 0.4448153093012906, "grad_norm": 1.2674964637709778, "learning_rate": 6.1306649812146665e-06, "loss": 0.5114, "step": 1999 }, { "epoch": 0.44503782821539833, "grad_norm": 1.2301795009461314, "learning_rate": 6.127154462806136e-06, "loss": 0.505, "step": 2000 }, { "epoch": 0.445260347129506, "grad_norm": 1.1304600780827998, "learning_rate": 6.123643358921168e-06, "loss": 0.5299, "step": 2001 }, { "epoch": 0.4454828660436137, "grad_norm": 1.154644483664138, "learning_rate": 6.120131671383527e-06, "loss": 0.5073, "step": 2002 }, { "epoch": 0.4457053849577214, "grad_norm": 1.2102878913605104, "learning_rate": 6.116619402017285e-06, "loss": 0.5212, "step": 2003 }, { "epoch": 0.4459279038718291, "grad_norm": 1.2072469046162202, "learning_rate": 6.113106552646818e-06, "loss": 0.4899, "step": 2004 }, { "epoch": 0.4461504227859368, "grad_norm": 1.2178964843540467, "learning_rate": 6.109593125096799e-06, "loss": 0.5216, "step": 2005 }, { "epoch": 0.4463729417000445, "grad_norm": 1.2382508710053959, "learning_rate": 6.106079121192202e-06, "loss": 0.5168, "step": 2006 }, { "epoch": 0.44659546061415223, "grad_norm": 1.2547963283150634, "learning_rate": 6.1025645427583055e-06, "loss": 0.5201, "step": 2007 }, { "epoch": 0.4468179795282599, "grad_norm": 1.1871055459943403, "learning_rate": 6.099049391620682e-06, "loss": 0.5149, "step": 2008 }, { "epoch": 0.4470404984423676, "grad_norm": 1.2182527237602805, "learning_rate": 6.095533669605198e-06, "loss": 0.507, "step": 2009 }, { "epoch": 0.4472630173564753, "grad_norm": 1.2929854380466488, "learning_rate": 6.092017378538025e-06, "loss": 0.5032, "step": 2010 }, { "epoch": 0.447485536270583, "grad_norm": 1.2061367231978966, "learning_rate": 6.088500520245621e-06, "loss": 0.5077, "step": 2011 }, { "epoch": 0.4477080551846907, "grad_norm": 1.2053009309215605, "learning_rate": 6.084983096554749e-06, "loss": 0.5195, "step": 2012 }, { "epoch": 0.4479305740987984, "grad_norm": 1.2702253637493885, "learning_rate": 6.081465109292456e-06, "loss": 0.5075, "step": 2013 }, { "epoch": 0.4481530930129061, "grad_norm": 1.2270909091082653, "learning_rate": 6.077946560286087e-06, "loss": 0.5271, "step": 2014 }, { "epoch": 0.4483756119270138, "grad_norm": 1.1950754568985504, "learning_rate": 6.0744274513632784e-06, "loss": 0.4945, "step": 2015 }, { "epoch": 0.4485981308411215, "grad_norm": 1.2580990964133565, "learning_rate": 6.070907784351955e-06, "loss": 0.4991, "step": 2016 }, { "epoch": 0.4488206497552292, "grad_norm": 1.2122153058145402, "learning_rate": 6.067387561080335e-06, "loss": 0.508, "step": 2017 }, { "epoch": 0.4490431686693369, "grad_norm": 1.245584014704355, "learning_rate": 6.063866783376921e-06, "loss": 0.5189, "step": 2018 }, { "epoch": 0.4492656875834446, "grad_norm": 1.303273529474464, "learning_rate": 6.0603454530705086e-06, "loss": 0.505, "step": 2019 }, { "epoch": 0.4494882064975523, "grad_norm": 1.441452803017248, "learning_rate": 6.056823571990177e-06, "loss": 0.5096, "step": 2020 }, { "epoch": 0.44971072541166, "grad_norm": 1.1980834403557725, "learning_rate": 6.0533011419652905e-06, "loss": 0.5155, "step": 2021 }, { "epoch": 0.4499332443257677, "grad_norm": 1.33544282191991, "learning_rate": 6.049778164825504e-06, "loss": 0.4936, "step": 2022 }, { "epoch": 0.4501557632398754, "grad_norm": 1.262108389107605, "learning_rate": 6.046254642400752e-06, "loss": 0.5107, "step": 2023 }, { "epoch": 0.45037828215398307, "grad_norm": 1.2298852205771638, "learning_rate": 6.042730576521253e-06, "loss": 0.5152, "step": 2024 }, { "epoch": 0.4506008010680908, "grad_norm": 1.322732106654089, "learning_rate": 6.039205969017508e-06, "loss": 0.4931, "step": 2025 }, { "epoch": 0.4508233199821985, "grad_norm": 1.2518599198015308, "learning_rate": 6.035680821720298e-06, "loss": 0.5228, "step": 2026 }, { "epoch": 0.4510458388963062, "grad_norm": 1.2210251037139437, "learning_rate": 6.032155136460689e-06, "loss": 0.5051, "step": 2027 }, { "epoch": 0.4512683578104139, "grad_norm": 1.2031552597721278, "learning_rate": 6.028628915070022e-06, "loss": 0.52, "step": 2028 }, { "epoch": 0.4514908767245216, "grad_norm": 1.2501498475924169, "learning_rate": 6.025102159379917e-06, "loss": 0.5204, "step": 2029 }, { "epoch": 0.4517133956386293, "grad_norm": 1.2062353864811912, "learning_rate": 6.021574871222274e-06, "loss": 0.5083, "step": 2030 }, { "epoch": 0.45193591455273696, "grad_norm": 1.2331954296267755, "learning_rate": 6.018047052429266e-06, "loss": 0.5066, "step": 2031 }, { "epoch": 0.4521584334668447, "grad_norm": 1.288809347333523, "learning_rate": 6.014518704833344e-06, "loss": 0.5239, "step": 2032 }, { "epoch": 0.4523809523809524, "grad_norm": 1.2360746292477784, "learning_rate": 6.010989830267234e-06, "loss": 0.5098, "step": 2033 }, { "epoch": 0.45260347129506007, "grad_norm": 1.2071494311512525, "learning_rate": 6.007460430563935e-06, "loss": 0.5139, "step": 2034 }, { "epoch": 0.4528259902091678, "grad_norm": 1.1700629046937112, "learning_rate": 6.0039305075567175e-06, "loss": 0.5085, "step": 2035 }, { "epoch": 0.4530485091232755, "grad_norm": 1.1958959364126174, "learning_rate": 6.000400063079126e-06, "loss": 0.5043, "step": 2036 }, { "epoch": 0.4532710280373832, "grad_norm": 1.1572632489860255, "learning_rate": 5.9968690989649734e-06, "loss": 0.5072, "step": 2037 }, { "epoch": 0.45349354695149086, "grad_norm": 1.1550849841338449, "learning_rate": 5.993337617048347e-06, "loss": 0.5006, "step": 2038 }, { "epoch": 0.4537160658655986, "grad_norm": 1.239120789466579, "learning_rate": 5.989805619163599e-06, "loss": 0.5268, "step": 2039 }, { "epoch": 0.4539385847797063, "grad_norm": 1.1793316240236198, "learning_rate": 5.986273107145348e-06, "loss": 0.5119, "step": 2040 }, { "epoch": 0.45416110369381396, "grad_norm": 1.3297264243182256, "learning_rate": 5.982740082828485e-06, "loss": 0.4818, "step": 2041 }, { "epoch": 0.4543836226079217, "grad_norm": 1.1807707257573676, "learning_rate": 5.979206548048163e-06, "loss": 0.5152, "step": 2042 }, { "epoch": 0.4546061415220294, "grad_norm": 1.30498346153371, "learning_rate": 5.9756725046398025e-06, "loss": 0.5048, "step": 2043 }, { "epoch": 0.45482866043613707, "grad_norm": 1.2747206028998392, "learning_rate": 5.972137954439088e-06, "loss": 0.5018, "step": 2044 }, { "epoch": 0.45505117935024475, "grad_norm": 1.3881241776331856, "learning_rate": 5.968602899281964e-06, "loss": 0.5289, "step": 2045 }, { "epoch": 0.4552736982643525, "grad_norm": 1.2296758578004938, "learning_rate": 5.9650673410046425e-06, "loss": 0.4983, "step": 2046 }, { "epoch": 0.45549621717846017, "grad_norm": 1.1900197206159309, "learning_rate": 5.961531281443592e-06, "loss": 0.5164, "step": 2047 }, { "epoch": 0.45571873609256786, "grad_norm": 1.2493845826617962, "learning_rate": 5.957994722435545e-06, "loss": 0.5249, "step": 2048 }, { "epoch": 0.4559412550066756, "grad_norm": 1.1843926553445583, "learning_rate": 5.954457665817491e-06, "loss": 0.508, "step": 2049 }, { "epoch": 0.4561637739207833, "grad_norm": 1.2283602641793716, "learning_rate": 5.950920113426681e-06, "loss": 0.5108, "step": 2050 }, { "epoch": 0.45638629283489096, "grad_norm": 1.2862382333226097, "learning_rate": 5.9473820671006185e-06, "loss": 0.5214, "step": 2051 }, { "epoch": 0.45660881174899864, "grad_norm": 1.2357786892761224, "learning_rate": 5.9438435286770666e-06, "loss": 0.5007, "step": 2052 }, { "epoch": 0.4568313306631064, "grad_norm": 1.186178996232497, "learning_rate": 5.9403044999940465e-06, "loss": 0.5043, "step": 2053 }, { "epoch": 0.45705384957721407, "grad_norm": 1.353345815141886, "learning_rate": 5.93676498288983e-06, "loss": 0.4995, "step": 2054 }, { "epoch": 0.45727636849132175, "grad_norm": 1.3127684508107023, "learning_rate": 5.933224979202945e-06, "loss": 0.5204, "step": 2055 }, { "epoch": 0.4574988874054295, "grad_norm": 1.170367947448414, "learning_rate": 5.929684490772168e-06, "loss": 0.4975, "step": 2056 }, { "epoch": 0.45772140631953717, "grad_norm": 1.169530623143736, "learning_rate": 5.9261435194365336e-06, "loss": 0.5139, "step": 2057 }, { "epoch": 0.45794392523364486, "grad_norm": 1.2847655153388888, "learning_rate": 5.9226020670353245e-06, "loss": 0.5146, "step": 2058 }, { "epoch": 0.45816644414775254, "grad_norm": 1.315408188173639, "learning_rate": 5.9190601354080705e-06, "loss": 0.5138, "step": 2059 }, { "epoch": 0.4583889630618603, "grad_norm": 1.3567683003742157, "learning_rate": 5.915517726394555e-06, "loss": 0.5172, "step": 2060 }, { "epoch": 0.45861148197596796, "grad_norm": 1.305695931808594, "learning_rate": 5.911974841834807e-06, "loss": 0.5034, "step": 2061 }, { "epoch": 0.45883400089007564, "grad_norm": 1.2050141817628894, "learning_rate": 5.908431483569099e-06, "loss": 0.5097, "step": 2062 }, { "epoch": 0.4590565198041834, "grad_norm": 1.3238687024295581, "learning_rate": 5.9048876534379575e-06, "loss": 0.5448, "step": 2063 }, { "epoch": 0.45927903871829107, "grad_norm": 1.201163916727819, "learning_rate": 5.901343353282147e-06, "loss": 0.5044, "step": 2064 }, { "epoch": 0.45950155763239875, "grad_norm": 1.2034724947018287, "learning_rate": 5.897798584942681e-06, "loss": 0.4896, "step": 2065 }, { "epoch": 0.45972407654650643, "grad_norm": 1.2000367393361457, "learning_rate": 5.894253350260814e-06, "loss": 0.493, "step": 2066 }, { "epoch": 0.45994659546061417, "grad_norm": 1.2272162962821975, "learning_rate": 5.8907076510780415e-06, "loss": 0.4847, "step": 2067 }, { "epoch": 0.46016911437472185, "grad_norm": 1.2149112861270661, "learning_rate": 5.887161489236102e-06, "loss": 0.5102, "step": 2068 }, { "epoch": 0.46039163328882954, "grad_norm": 1.2347788493666259, "learning_rate": 5.8836148665769745e-06, "loss": 0.4956, "step": 2069 }, { "epoch": 0.4606141522029373, "grad_norm": 1.1923184919538894, "learning_rate": 5.8800677849428754e-06, "loss": 0.5126, "step": 2070 }, { "epoch": 0.46083667111704496, "grad_norm": 1.2978770552840915, "learning_rate": 5.876520246176263e-06, "loss": 0.5063, "step": 2071 }, { "epoch": 0.46105919003115264, "grad_norm": 1.2309103101308012, "learning_rate": 5.87297225211983e-06, "loss": 0.5111, "step": 2072 }, { "epoch": 0.4612817089452603, "grad_norm": 1.2989516231412608, "learning_rate": 5.869423804616504e-06, "loss": 0.5272, "step": 2073 }, { "epoch": 0.46150422785936807, "grad_norm": 1.2575173417280332, "learning_rate": 5.865874905509455e-06, "loss": 0.501, "step": 2074 }, { "epoch": 0.46172674677347575, "grad_norm": 1.3392720354636107, "learning_rate": 5.862325556642081e-06, "loss": 0.5089, "step": 2075 }, { "epoch": 0.46194926568758343, "grad_norm": 1.292474455168834, "learning_rate": 5.858775759858018e-06, "loss": 0.5087, "step": 2076 }, { "epoch": 0.46217178460169117, "grad_norm": 1.2574078868316552, "learning_rate": 5.855225517001128e-06, "loss": 0.5079, "step": 2077 }, { "epoch": 0.46239430351579885, "grad_norm": 1.323072777109167, "learning_rate": 5.851674829915512e-06, "loss": 0.4912, "step": 2078 }, { "epoch": 0.46261682242990654, "grad_norm": 1.380384677094371, "learning_rate": 5.8481237004455e-06, "loss": 0.4979, "step": 2079 }, { "epoch": 0.4628393413440142, "grad_norm": 1.3470446479055622, "learning_rate": 5.844572130435649e-06, "loss": 0.5094, "step": 2080 }, { "epoch": 0.46306186025812196, "grad_norm": 1.2574673061096702, "learning_rate": 5.841020121730747e-06, "loss": 0.4952, "step": 2081 }, { "epoch": 0.46328437917222964, "grad_norm": 1.2437229187236705, "learning_rate": 5.837467676175812e-06, "loss": 0.5102, "step": 2082 }, { "epoch": 0.4635068980863373, "grad_norm": 1.1982237898491548, "learning_rate": 5.8339147956160815e-06, "loss": 0.5216, "step": 2083 }, { "epoch": 0.46372941700044507, "grad_norm": 1.274362454725424, "learning_rate": 5.830361481897027e-06, "loss": 0.4986, "step": 2084 }, { "epoch": 0.46395193591455275, "grad_norm": 1.303599732687467, "learning_rate": 5.826807736864342e-06, "loss": 0.5138, "step": 2085 }, { "epoch": 0.46417445482866043, "grad_norm": 1.4066484361217622, "learning_rate": 5.823253562363942e-06, "loss": 0.51, "step": 2086 }, { "epoch": 0.4643969737427681, "grad_norm": 1.255168105354518, "learning_rate": 5.819698960241969e-06, "loss": 0.5134, "step": 2087 }, { "epoch": 0.46461949265687585, "grad_norm": 1.2654176233118306, "learning_rate": 5.8161439323447846e-06, "loss": 0.5227, "step": 2088 }, { "epoch": 0.46484201157098354, "grad_norm": 1.2014251099493876, "learning_rate": 5.8125884805189735e-06, "loss": 0.5073, "step": 2089 }, { "epoch": 0.4650645304850912, "grad_norm": 1.1900223926232592, "learning_rate": 5.80903260661134e-06, "loss": 0.5021, "step": 2090 }, { "epoch": 0.4652870493991989, "grad_norm": 1.252620304085854, "learning_rate": 5.805476312468906e-06, "loss": 0.4779, "step": 2091 }, { "epoch": 0.46550956831330664, "grad_norm": 1.1986628166775002, "learning_rate": 5.801919599938913e-06, "loss": 0.5151, "step": 2092 }, { "epoch": 0.4657320872274143, "grad_norm": 1.235982923258637, "learning_rate": 5.798362470868819e-06, "loss": 0.4976, "step": 2093 }, { "epoch": 0.465954606141522, "grad_norm": 1.1865431083878797, "learning_rate": 5.7948049271063015e-06, "loss": 0.5085, "step": 2094 }, { "epoch": 0.46617712505562975, "grad_norm": 1.2689276062254828, "learning_rate": 5.79124697049925e-06, "loss": 0.5183, "step": 2095 }, { "epoch": 0.46639964396973743, "grad_norm": 1.2247357217498067, "learning_rate": 5.787688602895768e-06, "loss": 0.5071, "step": 2096 }, { "epoch": 0.4666221628838451, "grad_norm": 1.228310567614732, "learning_rate": 5.784129826144176e-06, "loss": 0.4995, "step": 2097 }, { "epoch": 0.4668446817979528, "grad_norm": 1.320310348855059, "learning_rate": 5.780570642093004e-06, "loss": 0.5096, "step": 2098 }, { "epoch": 0.46706720071206054, "grad_norm": 1.1718053413189118, "learning_rate": 5.777011052590994e-06, "loss": 0.5111, "step": 2099 }, { "epoch": 0.4672897196261682, "grad_norm": 1.181280037041366, "learning_rate": 5.773451059487101e-06, "loss": 0.5034, "step": 2100 }, { "epoch": 0.4675122385402759, "grad_norm": 1.1165088704856487, "learning_rate": 5.769890664630486e-06, "loss": 0.4984, "step": 2101 }, { "epoch": 0.46773475745438364, "grad_norm": 1.2890998162149232, "learning_rate": 5.76632986987052e-06, "loss": 0.5256, "step": 2102 }, { "epoch": 0.4679572763684913, "grad_norm": 1.2302427696605738, "learning_rate": 5.762768677056786e-06, "loss": 0.5314, "step": 2103 }, { "epoch": 0.468179795282599, "grad_norm": 1.197598052621555, "learning_rate": 5.759207088039065e-06, "loss": 0.4952, "step": 2104 }, { "epoch": 0.4684023141967067, "grad_norm": 1.209667421822148, "learning_rate": 5.7556451046673516e-06, "loss": 0.5165, "step": 2105 }, { "epoch": 0.46862483311081443, "grad_norm": 1.2417051118210047, "learning_rate": 5.752082728791842e-06, "loss": 0.5065, "step": 2106 }, { "epoch": 0.4688473520249221, "grad_norm": 1.2320600702359574, "learning_rate": 5.748519962262937e-06, "loss": 0.5141, "step": 2107 }, { "epoch": 0.4690698709390298, "grad_norm": 1.2039420328172934, "learning_rate": 5.744956806931238e-06, "loss": 0.4945, "step": 2108 }, { "epoch": 0.46929238985313754, "grad_norm": 1.1991502578533881, "learning_rate": 5.741393264647552e-06, "loss": 0.4951, "step": 2109 }, { "epoch": 0.4695149087672452, "grad_norm": 1.1600506345042865, "learning_rate": 5.737829337262887e-06, "loss": 0.521, "step": 2110 }, { "epoch": 0.4697374276813529, "grad_norm": 1.304970629019425, "learning_rate": 5.734265026628445e-06, "loss": 0.5043, "step": 2111 }, { "epoch": 0.4699599465954606, "grad_norm": 1.2341787175826804, "learning_rate": 5.730700334595636e-06, "loss": 0.516, "step": 2112 }, { "epoch": 0.4701824655095683, "grad_norm": 1.189951595167113, "learning_rate": 5.72713526301606e-06, "loss": 0.511, "step": 2113 }, { "epoch": 0.470404984423676, "grad_norm": 1.17303172109503, "learning_rate": 5.72356981374152e-06, "loss": 0.4962, "step": 2114 }, { "epoch": 0.4706275033377837, "grad_norm": 1.320877068091779, "learning_rate": 5.720003988624011e-06, "loss": 0.5156, "step": 2115 }, { "epoch": 0.47085002225189143, "grad_norm": 1.2234438631606226, "learning_rate": 5.716437789515728e-06, "loss": 0.5096, "step": 2116 }, { "epoch": 0.4710725411659991, "grad_norm": 1.2118356697322163, "learning_rate": 5.712871218269053e-06, "loss": 0.5088, "step": 2117 }, { "epoch": 0.4712950600801068, "grad_norm": 1.27202056134373, "learning_rate": 5.709304276736569e-06, "loss": 0.4955, "step": 2118 }, { "epoch": 0.4715175789942145, "grad_norm": 1.2955883292341102, "learning_rate": 5.705736966771047e-06, "loss": 0.5033, "step": 2119 }, { "epoch": 0.4717400979083222, "grad_norm": 1.1734748051199064, "learning_rate": 5.702169290225451e-06, "loss": 0.489, "step": 2120 }, { "epoch": 0.4719626168224299, "grad_norm": 1.311624991805231, "learning_rate": 5.698601248952935e-06, "loss": 0.5124, "step": 2121 }, { "epoch": 0.4721851357365376, "grad_norm": 1.1632322083739357, "learning_rate": 5.695032844806842e-06, "loss": 0.5013, "step": 2122 }, { "epoch": 0.4724076546506453, "grad_norm": 1.1972412483679948, "learning_rate": 5.691464079640702e-06, "loss": 0.5194, "step": 2123 }, { "epoch": 0.472630173564753, "grad_norm": 1.2963750197598263, "learning_rate": 5.687894955308236e-06, "loss": 0.4971, "step": 2124 }, { "epoch": 0.4728526924788607, "grad_norm": 1.2488833574838978, "learning_rate": 5.684325473663349e-06, "loss": 0.5031, "step": 2125 }, { "epoch": 0.4730752113929684, "grad_norm": 1.2092700016073192, "learning_rate": 5.680755636560132e-06, "loss": 0.5024, "step": 2126 }, { "epoch": 0.4732977303070761, "grad_norm": 1.2645856688025876, "learning_rate": 5.677185445852864e-06, "loss": 0.5135, "step": 2127 }, { "epoch": 0.4735202492211838, "grad_norm": 1.1779586881868835, "learning_rate": 5.673614903396001e-06, "loss": 0.4883, "step": 2128 }, { "epoch": 0.4737427681352915, "grad_norm": 1.3180154943201499, "learning_rate": 5.670044011044187e-06, "loss": 0.5008, "step": 2129 }, { "epoch": 0.4739652870493992, "grad_norm": 1.3072656303635377, "learning_rate": 5.666472770652246e-06, "loss": 0.5101, "step": 2130 }, { "epoch": 0.4741878059635069, "grad_norm": 1.1928666805989714, "learning_rate": 5.662901184075185e-06, "loss": 0.4945, "step": 2131 }, { "epoch": 0.4744103248776146, "grad_norm": 1.2222299808433146, "learning_rate": 5.6593292531681855e-06, "loss": 0.5164, "step": 2132 }, { "epoch": 0.47463284379172227, "grad_norm": 1.1850231909160827, "learning_rate": 5.655756979786611e-06, "loss": 0.5229, "step": 2133 }, { "epoch": 0.47485536270583, "grad_norm": 1.3611418393656318, "learning_rate": 5.6521843657860066e-06, "loss": 0.5234, "step": 2134 }, { "epoch": 0.4750778816199377, "grad_norm": 1.239195997335758, "learning_rate": 5.6486114130220875e-06, "loss": 0.5115, "step": 2135 }, { "epoch": 0.4753004005340454, "grad_norm": 1.2485007726934036, "learning_rate": 5.645038123350749e-06, "loss": 0.4985, "step": 2136 }, { "epoch": 0.4755229194481531, "grad_norm": 1.2068477035425977, "learning_rate": 5.641464498628062e-06, "loss": 0.5201, "step": 2137 }, { "epoch": 0.4757454383622608, "grad_norm": 1.1442948637113697, "learning_rate": 5.637890540710268e-06, "loss": 0.5, "step": 2138 }, { "epoch": 0.4759679572763685, "grad_norm": 1.2746204099977088, "learning_rate": 5.6343162514537845e-06, "loss": 0.502, "step": 2139 }, { "epoch": 0.47619047619047616, "grad_norm": 1.186759046664176, "learning_rate": 5.630741632715198e-06, "loss": 0.4991, "step": 2140 }, { "epoch": 0.4764129951045839, "grad_norm": 1.1996974429516036, "learning_rate": 5.627166686351272e-06, "loss": 0.5144, "step": 2141 }, { "epoch": 0.4766355140186916, "grad_norm": 1.2418953443480296, "learning_rate": 5.623591414218934e-06, "loss": 0.501, "step": 2142 }, { "epoch": 0.47685803293279927, "grad_norm": 1.2813722759940265, "learning_rate": 5.620015818175284e-06, "loss": 0.4993, "step": 2143 }, { "epoch": 0.477080551846907, "grad_norm": 1.214238874972997, "learning_rate": 5.6164399000775895e-06, "loss": 0.5097, "step": 2144 }, { "epoch": 0.4773030707610147, "grad_norm": 1.3613235394583614, "learning_rate": 5.612863661783283e-06, "loss": 0.5251, "step": 2145 }, { "epoch": 0.4775255896751224, "grad_norm": 1.2332880762924021, "learning_rate": 5.609287105149969e-06, "loss": 0.5104, "step": 2146 }, { "epoch": 0.47774810858923006, "grad_norm": 1.3724472916591492, "learning_rate": 5.605710232035412e-06, "loss": 0.5094, "step": 2147 }, { "epoch": 0.4779706275033378, "grad_norm": 1.2657641934394084, "learning_rate": 5.602133044297542e-06, "loss": 0.4895, "step": 2148 }, { "epoch": 0.4781931464174455, "grad_norm": 1.1818929288786972, "learning_rate": 5.598555543794455e-06, "loss": 0.4924, "step": 2149 }, { "epoch": 0.47841566533155316, "grad_norm": 1.2677619780609986, "learning_rate": 5.5949777323844055e-06, "loss": 0.5055, "step": 2150 }, { "epoch": 0.4786381842456609, "grad_norm": 1.3689797358393454, "learning_rate": 5.591399611925813e-06, "loss": 0.5208, "step": 2151 }, { "epoch": 0.4788607031597686, "grad_norm": 1.2278459110885902, "learning_rate": 5.5878211842772575e-06, "loss": 0.4716, "step": 2152 }, { "epoch": 0.47908322207387627, "grad_norm": 1.2164062243867855, "learning_rate": 5.584242451297476e-06, "loss": 0.4886, "step": 2153 }, { "epoch": 0.47930574098798395, "grad_norm": 1.3322228486963035, "learning_rate": 5.580663414845363e-06, "loss": 0.5027, "step": 2154 }, { "epoch": 0.4795282599020917, "grad_norm": 1.2611927176501654, "learning_rate": 5.5770840767799765e-06, "loss": 0.494, "step": 2155 }, { "epoch": 0.4797507788161994, "grad_norm": 1.2472377473839102, "learning_rate": 5.573504438960524e-06, "loss": 0.5017, "step": 2156 }, { "epoch": 0.47997329773030706, "grad_norm": 1.2288415726808346, "learning_rate": 5.569924503246376e-06, "loss": 0.5102, "step": 2157 }, { "epoch": 0.4801958166444148, "grad_norm": 1.4851349608023263, "learning_rate": 5.566344271497053e-06, "loss": 0.5169, "step": 2158 }, { "epoch": 0.4804183355585225, "grad_norm": 1.246129358560281, "learning_rate": 5.56276374557223e-06, "loss": 0.5035, "step": 2159 }, { "epoch": 0.48064085447263016, "grad_norm": 1.1650823747615056, "learning_rate": 5.5591829273317325e-06, "loss": 0.4996, "step": 2160 }, { "epoch": 0.48086337338673785, "grad_norm": 1.2177476656291175, "learning_rate": 5.555601818635544e-06, "loss": 0.4942, "step": 2161 }, { "epoch": 0.4810858923008456, "grad_norm": 1.2696177019295982, "learning_rate": 5.552020421343795e-06, "loss": 0.4941, "step": 2162 }, { "epoch": 0.48130841121495327, "grad_norm": 1.1709711148805924, "learning_rate": 5.548438737316764e-06, "loss": 0.4981, "step": 2163 }, { "epoch": 0.48153093012906095, "grad_norm": 1.287791947403963, "learning_rate": 5.544856768414884e-06, "loss": 0.4919, "step": 2164 }, { "epoch": 0.4817534490431687, "grad_norm": 1.2847276663917617, "learning_rate": 5.541274516498731e-06, "loss": 0.5028, "step": 2165 }, { "epoch": 0.4819759679572764, "grad_norm": 1.2726353930488519, "learning_rate": 5.537691983429028e-06, "loss": 0.505, "step": 2166 }, { "epoch": 0.48219848687138406, "grad_norm": 1.245425390016433, "learning_rate": 5.5341091710666475e-06, "loss": 0.4872, "step": 2167 }, { "epoch": 0.48242100578549174, "grad_norm": 1.2198757847226593, "learning_rate": 5.530526081272605e-06, "loss": 0.5031, "step": 2168 }, { "epoch": 0.4826435246995995, "grad_norm": 1.4088978387987257, "learning_rate": 5.526942715908061e-06, "loss": 0.5275, "step": 2169 }, { "epoch": 0.48286604361370716, "grad_norm": 1.2467268662184376, "learning_rate": 5.523359076834316e-06, "loss": 0.4989, "step": 2170 }, { "epoch": 0.48308856252781485, "grad_norm": 1.2690998877289714, "learning_rate": 5.5197751659128174e-06, "loss": 0.5106, "step": 2171 }, { "epoch": 0.4833110814419226, "grad_norm": 1.2910940979650551, "learning_rate": 5.516190985005152e-06, "loss": 0.5006, "step": 2172 }, { "epoch": 0.48353360035603027, "grad_norm": 1.3030832167947388, "learning_rate": 5.512606535973045e-06, "loss": 0.4904, "step": 2173 }, { "epoch": 0.48375611927013795, "grad_norm": 1.2664216479513346, "learning_rate": 5.509021820678364e-06, "loss": 0.5178, "step": 2174 }, { "epoch": 0.48397863818424564, "grad_norm": 1.255752114073673, "learning_rate": 5.505436840983112e-06, "loss": 0.49, "step": 2175 }, { "epoch": 0.4842011570983534, "grad_norm": 1.3007503182867994, "learning_rate": 5.501851598749429e-06, "loss": 0.5005, "step": 2176 }, { "epoch": 0.48442367601246106, "grad_norm": 1.3056951048336338, "learning_rate": 5.498266095839595e-06, "loss": 0.5099, "step": 2177 }, { "epoch": 0.48464619492656874, "grad_norm": 1.2692129820596334, "learning_rate": 5.494680334116024e-06, "loss": 0.5054, "step": 2178 }, { "epoch": 0.4848687138406765, "grad_norm": 1.2147038339681666, "learning_rate": 5.491094315441262e-06, "loss": 0.5085, "step": 2179 }, { "epoch": 0.48509123275478416, "grad_norm": 1.2339259153168056, "learning_rate": 5.487508041677992e-06, "loss": 0.5068, "step": 2180 }, { "epoch": 0.48531375166889185, "grad_norm": 1.306150636185863, "learning_rate": 5.483921514689023e-06, "loss": 0.5084, "step": 2181 }, { "epoch": 0.48553627058299953, "grad_norm": 1.3024296611139403, "learning_rate": 5.480334736337306e-06, "loss": 0.509, "step": 2182 }, { "epoch": 0.48575878949710727, "grad_norm": 1.2429902061834381, "learning_rate": 5.476747708485915e-06, "loss": 0.5001, "step": 2183 }, { "epoch": 0.48598130841121495, "grad_norm": 1.3421538369475712, "learning_rate": 5.4731604329980555e-06, "loss": 0.4997, "step": 2184 }, { "epoch": 0.48620382732532264, "grad_norm": 1.2097506546550616, "learning_rate": 5.46957291173706e-06, "loss": 0.5018, "step": 2185 }, { "epoch": 0.4864263462394304, "grad_norm": 1.2999378832467348, "learning_rate": 5.465985146566392e-06, "loss": 0.5008, "step": 2186 }, { "epoch": 0.48664886515353806, "grad_norm": 1.2562100153730245, "learning_rate": 5.4623971393496386e-06, "loss": 0.5072, "step": 2187 }, { "epoch": 0.48687138406764574, "grad_norm": 1.2180628070403199, "learning_rate": 5.4588088919505144e-06, "loss": 0.5165, "step": 2188 }, { "epoch": 0.4870939029817534, "grad_norm": 1.2637061978761475, "learning_rate": 5.45522040623286e-06, "loss": 0.5066, "step": 2189 }, { "epoch": 0.48731642189586116, "grad_norm": 1.269512733041299, "learning_rate": 5.451631684060635e-06, "loss": 0.4942, "step": 2190 }, { "epoch": 0.48753894080996885, "grad_norm": 1.2579038226257362, "learning_rate": 5.448042727297925e-06, "loss": 0.5087, "step": 2191 }, { "epoch": 0.48776145972407653, "grad_norm": 1.3174548594678166, "learning_rate": 5.444453537808941e-06, "loss": 0.5161, "step": 2192 }, { "epoch": 0.48798397863818427, "grad_norm": 1.26782930237438, "learning_rate": 5.440864117458008e-06, "loss": 0.4906, "step": 2193 }, { "epoch": 0.48820649755229195, "grad_norm": 1.3377781380077427, "learning_rate": 5.4372744681095744e-06, "loss": 0.5064, "step": 2194 }, { "epoch": 0.48842901646639963, "grad_norm": 1.2321054077616478, "learning_rate": 5.433684591628209e-06, "loss": 0.5055, "step": 2195 }, { "epoch": 0.4886515353805073, "grad_norm": 1.3324455711310503, "learning_rate": 5.4300944898785965e-06, "loss": 0.5006, "step": 2196 }, { "epoch": 0.48887405429461506, "grad_norm": 1.322310481885133, "learning_rate": 5.426504164725537e-06, "loss": 0.4918, "step": 2197 }, { "epoch": 0.48909657320872274, "grad_norm": 1.2351664128994044, "learning_rate": 5.422913618033949e-06, "loss": 0.4875, "step": 2198 }, { "epoch": 0.4893190921228304, "grad_norm": 1.2980390616694377, "learning_rate": 5.419322851668869e-06, "loss": 0.5147, "step": 2199 }, { "epoch": 0.48954161103693816, "grad_norm": 1.2856622375101794, "learning_rate": 5.41573186749544e-06, "loss": 0.5089, "step": 2200 }, { "epoch": 0.48976412995104585, "grad_norm": 1.236014066967702, "learning_rate": 5.412140667378926e-06, "loss": 0.5046, "step": 2201 }, { "epoch": 0.48998664886515353, "grad_norm": 1.2533923979882242, "learning_rate": 5.408549253184696e-06, "loss": 0.4884, "step": 2202 }, { "epoch": 0.4902091677792612, "grad_norm": 1.2508781734761925, "learning_rate": 5.404957626778236e-06, "loss": 0.4953, "step": 2203 }, { "epoch": 0.49043168669336895, "grad_norm": 1.2076363179761953, "learning_rate": 5.401365790025143e-06, "loss": 0.4959, "step": 2204 }, { "epoch": 0.49065420560747663, "grad_norm": 1.2476008507435525, "learning_rate": 5.397773744791115e-06, "loss": 0.496, "step": 2205 }, { "epoch": 0.4908767245215843, "grad_norm": 1.2277494464567076, "learning_rate": 5.394181492941967e-06, "loss": 0.5154, "step": 2206 }, { "epoch": 0.49109924343569206, "grad_norm": 1.2929380087187443, "learning_rate": 5.390589036343614e-06, "loss": 0.484, "step": 2207 }, { "epoch": 0.49132176234979974, "grad_norm": 1.303378713009843, "learning_rate": 5.3869963768620845e-06, "loss": 0.5137, "step": 2208 }, { "epoch": 0.4915442812639074, "grad_norm": 1.203843010167743, "learning_rate": 5.38340351636351e-06, "loss": 0.5046, "step": 2209 }, { "epoch": 0.4917668001780151, "grad_norm": 1.3134482090277713, "learning_rate": 5.379810456714123e-06, "loss": 0.4916, "step": 2210 }, { "epoch": 0.49198931909212285, "grad_norm": 1.3071460421234495, "learning_rate": 5.3762171997802636e-06, "loss": 0.5053, "step": 2211 }, { "epoch": 0.49221183800623053, "grad_norm": 1.301513970301543, "learning_rate": 5.3726237474283715e-06, "loss": 0.5207, "step": 2212 }, { "epoch": 0.4924343569203382, "grad_norm": 1.2986956753480925, "learning_rate": 5.36903010152499e-06, "loss": 0.5145, "step": 2213 }, { "epoch": 0.49265687583444595, "grad_norm": 1.220717153455881, "learning_rate": 5.365436263936763e-06, "loss": 0.5071, "step": 2214 }, { "epoch": 0.49287939474855363, "grad_norm": 1.3263623138015934, "learning_rate": 5.361842236530433e-06, "loss": 0.5025, "step": 2215 }, { "epoch": 0.4931019136626613, "grad_norm": 1.2984305899494426, "learning_rate": 5.358248021172838e-06, "loss": 0.5182, "step": 2216 }, { "epoch": 0.493324432576769, "grad_norm": 1.2555681111940462, "learning_rate": 5.354653619730921e-06, "loss": 0.4997, "step": 2217 }, { "epoch": 0.49354695149087674, "grad_norm": 1.318975320495285, "learning_rate": 5.351059034071715e-06, "loss": 0.4855, "step": 2218 }, { "epoch": 0.4937694704049844, "grad_norm": 1.3558709702348852, "learning_rate": 5.347464266062351e-06, "loss": 0.5199, "step": 2219 }, { "epoch": 0.4939919893190921, "grad_norm": 1.1865775546596906, "learning_rate": 5.343869317570056e-06, "loss": 0.5066, "step": 2220 }, { "epoch": 0.49421450823319985, "grad_norm": 1.3204373158702811, "learning_rate": 5.3402741904621515e-06, "loss": 0.4989, "step": 2221 }, { "epoch": 0.49443702714730753, "grad_norm": 1.2236345168475113, "learning_rate": 5.336678886606045e-06, "loss": 0.4834, "step": 2222 }, { "epoch": 0.4946595460614152, "grad_norm": 1.3085535633016114, "learning_rate": 5.3330834078692424e-06, "loss": 0.4942, "step": 2223 }, { "epoch": 0.4948820649755229, "grad_norm": 1.3344290220747719, "learning_rate": 5.329487756119342e-06, "loss": 0.5182, "step": 2224 }, { "epoch": 0.49510458388963063, "grad_norm": 1.2716357330512185, "learning_rate": 5.325891933224025e-06, "loss": 0.5018, "step": 2225 }, { "epoch": 0.4953271028037383, "grad_norm": 1.2121681376965054, "learning_rate": 5.322295941051069e-06, "loss": 0.4923, "step": 2226 }, { "epoch": 0.495549621717846, "grad_norm": 1.288402345696327, "learning_rate": 5.3186997814683325e-06, "loss": 0.4997, "step": 2227 }, { "epoch": 0.49577214063195374, "grad_norm": 1.2965155655987664, "learning_rate": 5.315103456343764e-06, "loss": 0.5061, "step": 2228 }, { "epoch": 0.4959946595460614, "grad_norm": 1.2835916453214147, "learning_rate": 5.3115069675454e-06, "loss": 0.5176, "step": 2229 }, { "epoch": 0.4962171784601691, "grad_norm": 1.1719293574014347, "learning_rate": 5.3079103169413596e-06, "loss": 0.5234, "step": 2230 }, { "epoch": 0.4964396973742768, "grad_norm": 1.294085689139773, "learning_rate": 5.304313506399845e-06, "loss": 0.5036, "step": 2231 }, { "epoch": 0.49666221628838453, "grad_norm": 1.299081283049853, "learning_rate": 5.300716537789147e-06, "loss": 0.5209, "step": 2232 }, { "epoch": 0.4968847352024922, "grad_norm": 1.2762803100386737, "learning_rate": 5.2971194129776295e-06, "loss": 0.4964, "step": 2233 }, { "epoch": 0.4971072541165999, "grad_norm": 1.191515445118409, "learning_rate": 5.293522133833745e-06, "loss": 0.496, "step": 2234 }, { "epoch": 0.49732977303070763, "grad_norm": 1.2957394059365197, "learning_rate": 5.289924702226026e-06, "loss": 0.5051, "step": 2235 }, { "epoch": 0.4975522919448153, "grad_norm": 1.4588649476705866, "learning_rate": 5.286327120023079e-06, "loss": 0.4997, "step": 2236 }, { "epoch": 0.497774810858923, "grad_norm": 1.2967505362218645, "learning_rate": 5.282729389093591e-06, "loss": 0.5005, "step": 2237 }, { "epoch": 0.4979973297730307, "grad_norm": 1.3090057520097638, "learning_rate": 5.27913151130633e-06, "loss": 0.4706, "step": 2238 }, { "epoch": 0.4982198486871384, "grad_norm": 1.341707610833717, "learning_rate": 5.275533488530134e-06, "loss": 0.4907, "step": 2239 }, { "epoch": 0.4984423676012461, "grad_norm": 1.3259765991373356, "learning_rate": 5.271935322633921e-06, "loss": 0.5119, "step": 2240 }, { "epoch": 0.4986648865153538, "grad_norm": 1.2605236871094214, "learning_rate": 5.268337015486683e-06, "loss": 0.483, "step": 2241 }, { "epoch": 0.49888740542946153, "grad_norm": 1.4585560258441634, "learning_rate": 5.2647385689574826e-06, "loss": 0.4866, "step": 2242 }, { "epoch": 0.4991099243435692, "grad_norm": 1.3140425413705545, "learning_rate": 5.261139984915455e-06, "loss": 0.5314, "step": 2243 }, { "epoch": 0.4993324432576769, "grad_norm": 1.2954429239031287, "learning_rate": 5.257541265229812e-06, "loss": 0.4785, "step": 2244 }, { "epoch": 0.4995549621717846, "grad_norm": 1.3888183963411747, "learning_rate": 5.253942411769829e-06, "loss": 0.4927, "step": 2245 }, { "epoch": 0.4997774810858923, "grad_norm": 1.2661718345522264, "learning_rate": 5.250343426404857e-06, "loss": 0.4958, "step": 2246 }, { "epoch": 0.5, "grad_norm": 1.3392483974903813, "learning_rate": 5.2467443110043084e-06, "loss": 0.5071, "step": 2247 }, { "epoch": 0.5002225189141077, "grad_norm": 1.2720477885260326, "learning_rate": 5.24314506743767e-06, "loss": 0.5185, "step": 2248 }, { "epoch": 0.5004450378282154, "grad_norm": 1.2783710808561914, "learning_rate": 5.23954569757449e-06, "loss": 0.4977, "step": 2249 }, { "epoch": 0.5006675567423231, "grad_norm": 1.2864775530088135, "learning_rate": 5.235946203284389e-06, "loss": 0.5003, "step": 2250 }, { "epoch": 0.5008900756564308, "grad_norm": 1.5165223436749677, "learning_rate": 5.2323465864370445e-06, "loss": 0.5012, "step": 2251 }, { "epoch": 0.5011125945705385, "grad_norm": 1.2427019104624293, "learning_rate": 5.228746848902202e-06, "loss": 0.5023, "step": 2252 }, { "epoch": 0.5013351134846462, "grad_norm": 1.490784660744393, "learning_rate": 5.225146992549668e-06, "loss": 0.5128, "step": 2253 }, { "epoch": 0.5015576323987538, "grad_norm": 1.3314373792090486, "learning_rate": 5.221547019249311e-06, "loss": 0.4896, "step": 2254 }, { "epoch": 0.5017801513128616, "grad_norm": 1.1999027914841436, "learning_rate": 5.217946930871063e-06, "loss": 0.5121, "step": 2255 }, { "epoch": 0.5020026702269693, "grad_norm": 1.3069588451483907, "learning_rate": 5.21434672928491e-06, "loss": 0.5008, "step": 2256 }, { "epoch": 0.5022251891410769, "grad_norm": 1.3456012822061176, "learning_rate": 5.210746416360904e-06, "loss": 0.5024, "step": 2257 }, { "epoch": 0.5024477080551847, "grad_norm": 1.3891301266769922, "learning_rate": 5.20714599396915e-06, "loss": 0.5058, "step": 2258 }, { "epoch": 0.5026702269692924, "grad_norm": 1.2656271519206204, "learning_rate": 5.203545463979807e-06, "loss": 0.5011, "step": 2259 }, { "epoch": 0.5028927458834, "grad_norm": 1.4018571165114728, "learning_rate": 5.199944828263099e-06, "loss": 0.4952, "step": 2260 }, { "epoch": 0.5031152647975078, "grad_norm": 1.3109608755928446, "learning_rate": 5.196344088689298e-06, "loss": 0.5132, "step": 2261 }, { "epoch": 0.5033377837116155, "grad_norm": 1.413941787292157, "learning_rate": 5.19274324712873e-06, "loss": 0.5034, "step": 2262 }, { "epoch": 0.5035603026257232, "grad_norm": 1.2640452473763484, "learning_rate": 5.189142305451777e-06, "loss": 0.4922, "step": 2263 }, { "epoch": 0.5037828215398309, "grad_norm": 1.4079904138763184, "learning_rate": 5.185541265528873e-06, "loss": 0.5135, "step": 2264 }, { "epoch": 0.5040053404539386, "grad_norm": 1.2689671186179359, "learning_rate": 5.1819401292304985e-06, "loss": 0.5147, "step": 2265 }, { "epoch": 0.5042278593680463, "grad_norm": 1.3282285649946723, "learning_rate": 5.178338898427191e-06, "loss": 0.515, "step": 2266 }, { "epoch": 0.504450378282154, "grad_norm": 1.2506316719348258, "learning_rate": 5.17473757498953e-06, "loss": 0.5079, "step": 2267 }, { "epoch": 0.5046728971962616, "grad_norm": 1.3375350847505958, "learning_rate": 5.171136160788148e-06, "loss": 0.5163, "step": 2268 }, { "epoch": 0.5048954161103694, "grad_norm": 1.3426237652125694, "learning_rate": 5.167534657693725e-06, "loss": 0.4858, "step": 2269 }, { "epoch": 0.5051179350244771, "grad_norm": 1.4130939827134308, "learning_rate": 5.163933067576981e-06, "loss": 0.5016, "step": 2270 }, { "epoch": 0.5053404539385847, "grad_norm": 1.2708337241798657, "learning_rate": 5.1603313923086875e-06, "loss": 0.4984, "step": 2271 }, { "epoch": 0.5055629728526925, "grad_norm": 1.4695319339425945, "learning_rate": 5.156729633759659e-06, "loss": 0.5029, "step": 2272 }, { "epoch": 0.5057854917668002, "grad_norm": 1.364360431436452, "learning_rate": 5.153127793800753e-06, "loss": 0.4924, "step": 2273 }, { "epoch": 0.5060080106809078, "grad_norm": 1.242883055930352, "learning_rate": 5.149525874302867e-06, "loss": 0.501, "step": 2274 }, { "epoch": 0.5062305295950156, "grad_norm": 1.3268341336993743, "learning_rate": 5.14592387713694e-06, "loss": 0.5079, "step": 2275 }, { "epoch": 0.5064530485091233, "grad_norm": 1.4157873235260579, "learning_rate": 5.1423218041739575e-06, "loss": 0.5017, "step": 2276 }, { "epoch": 0.5066755674232309, "grad_norm": 1.3360456115446795, "learning_rate": 5.138719657284936e-06, "loss": 0.5114, "step": 2277 }, { "epoch": 0.5068980863373387, "grad_norm": 1.2402920014378553, "learning_rate": 5.135117438340938e-06, "loss": 0.489, "step": 2278 }, { "epoch": 0.5071206052514464, "grad_norm": 1.3059164413633904, "learning_rate": 5.131515149213056e-06, "loss": 0.4939, "step": 2279 }, { "epoch": 0.507343124165554, "grad_norm": 1.2489331033022595, "learning_rate": 5.127912791772421e-06, "loss": 0.4829, "step": 2280 }, { "epoch": 0.5075656430796618, "grad_norm": 1.3134366438298917, "learning_rate": 5.1243103678902065e-06, "loss": 0.5025, "step": 2281 }, { "epoch": 0.5077881619937694, "grad_norm": 1.33796573702118, "learning_rate": 5.120707879437612e-06, "loss": 0.5021, "step": 2282 }, { "epoch": 0.5080106809078772, "grad_norm": 1.2734433269547123, "learning_rate": 5.117105328285874e-06, "loss": 0.4887, "step": 2283 }, { "epoch": 0.5082331998219849, "grad_norm": 1.3197292257216406, "learning_rate": 5.11350271630626e-06, "loss": 0.4925, "step": 2284 }, { "epoch": 0.5084557187360925, "grad_norm": 1.2869619453400434, "learning_rate": 5.109900045370071e-06, "loss": 0.4993, "step": 2285 }, { "epoch": 0.5086782376502003, "grad_norm": 1.3435861692707864, "learning_rate": 5.10629731734864e-06, "loss": 0.4972, "step": 2286 }, { "epoch": 0.508900756564308, "grad_norm": 1.3042026749266975, "learning_rate": 5.102694534113324e-06, "loss": 0.4982, "step": 2287 }, { "epoch": 0.5091232754784156, "grad_norm": 1.4009254611327466, "learning_rate": 5.099091697535515e-06, "loss": 0.4832, "step": 2288 }, { "epoch": 0.5093457943925234, "grad_norm": 1.3067145410612886, "learning_rate": 5.09548880948663e-06, "loss": 0.5113, "step": 2289 }, { "epoch": 0.5095683133066311, "grad_norm": 1.3343492913183892, "learning_rate": 5.091885871838108e-06, "loss": 0.4982, "step": 2290 }, { "epoch": 0.5097908322207387, "grad_norm": 1.2845553566612156, "learning_rate": 5.088282886461425e-06, "loss": 0.4848, "step": 2291 }, { "epoch": 0.5100133511348465, "grad_norm": 1.2828717633261935, "learning_rate": 5.084679855228072e-06, "loss": 0.4988, "step": 2292 }, { "epoch": 0.5102358700489542, "grad_norm": 1.2273071969372378, "learning_rate": 5.081076780009565e-06, "loss": 0.5079, "step": 2293 }, { "epoch": 0.5104583889630618, "grad_norm": 1.2995265224041637, "learning_rate": 5.077473662677449e-06, "loss": 0.5006, "step": 2294 }, { "epoch": 0.5106809078771696, "grad_norm": 1.3340909026276522, "learning_rate": 5.073870505103284e-06, "loss": 0.5074, "step": 2295 }, { "epoch": 0.5109034267912772, "grad_norm": 1.3467231584776622, "learning_rate": 5.070267309158654e-06, "loss": 0.4831, "step": 2296 }, { "epoch": 0.5111259457053849, "grad_norm": 1.2379769980816664, "learning_rate": 5.066664076715164e-06, "loss": 0.4993, "step": 2297 }, { "epoch": 0.5113484646194927, "grad_norm": 1.325883780013733, "learning_rate": 5.063060809644436e-06, "loss": 0.4983, "step": 2298 }, { "epoch": 0.5115709835336003, "grad_norm": 1.3448509167095866, "learning_rate": 5.059457509818109e-06, "loss": 0.4902, "step": 2299 }, { "epoch": 0.511793502447708, "grad_norm": 1.3576990893678, "learning_rate": 5.055854179107842e-06, "loss": 0.4962, "step": 2300 }, { "epoch": 0.5120160213618158, "grad_norm": 1.3421243601670558, "learning_rate": 5.052250819385308e-06, "loss": 0.501, "step": 2301 }, { "epoch": 0.5122385402759234, "grad_norm": 1.323150916855973, "learning_rate": 5.048647432522195e-06, "loss": 0.4767, "step": 2302 }, { "epoch": 0.5124610591900312, "grad_norm": 1.4020451276784398, "learning_rate": 5.0450440203902094e-06, "loss": 0.5285, "step": 2303 }, { "epoch": 0.5126835781041389, "grad_norm": 1.3707362140684325, "learning_rate": 5.041440584861064e-06, "loss": 0.5047, "step": 2304 }, { "epoch": 0.5129060970182465, "grad_norm": 1.3184276296076873, "learning_rate": 5.0378371278064854e-06, "loss": 0.4856, "step": 2305 }, { "epoch": 0.5131286159323543, "grad_norm": 1.3549041569467128, "learning_rate": 5.034233651098214e-06, "loss": 0.4905, "step": 2306 }, { "epoch": 0.513351134846462, "grad_norm": 1.410034103321518, "learning_rate": 5.030630156608001e-06, "loss": 0.4886, "step": 2307 }, { "epoch": 0.5135736537605696, "grad_norm": 1.3738791887592479, "learning_rate": 5.027026646207603e-06, "loss": 0.4983, "step": 2308 }, { "epoch": 0.5137961726746774, "grad_norm": 1.2862154061042477, "learning_rate": 5.02342312176879e-06, "loss": 0.4847, "step": 2309 }, { "epoch": 0.514018691588785, "grad_norm": 1.2676714694588629, "learning_rate": 5.019819585163333e-06, "loss": 0.4945, "step": 2310 }, { "epoch": 0.5142412105028927, "grad_norm": 1.3612482510540507, "learning_rate": 5.016216038263012e-06, "loss": 0.4919, "step": 2311 }, { "epoch": 0.5144637294170005, "grad_norm": 1.2954831393835977, "learning_rate": 5.0126124829396164e-06, "loss": 0.5014, "step": 2312 }, { "epoch": 0.5146862483311081, "grad_norm": 1.2817931783493726, "learning_rate": 5.009008921064934e-06, "loss": 0.4893, "step": 2313 }, { "epoch": 0.5149087672452158, "grad_norm": 1.3556177122523172, "learning_rate": 5.0054053545107584e-06, "loss": 0.5037, "step": 2314 }, { "epoch": 0.5151312861593236, "grad_norm": 1.4576812806145483, "learning_rate": 5.0018017851488865e-06, "loss": 0.5163, "step": 2315 }, { "epoch": 0.5153538050734312, "grad_norm": 1.4372914612902115, "learning_rate": 4.998198214851115e-06, "loss": 0.4994, "step": 2316 }, { "epoch": 0.5155763239875389, "grad_norm": 1.3046916026484012, "learning_rate": 4.994594645489242e-06, "loss": 0.5034, "step": 2317 }, { "epoch": 0.5157988429016467, "grad_norm": 1.302020348142613, "learning_rate": 4.990991078935068e-06, "loss": 0.4834, "step": 2318 }, { "epoch": 0.5160213618157543, "grad_norm": 1.3557971887944653, "learning_rate": 4.987387517060385e-06, "loss": 0.4976, "step": 2319 }, { "epoch": 0.516243880729862, "grad_norm": 1.4703943505035058, "learning_rate": 4.98378396173699e-06, "loss": 0.4913, "step": 2320 }, { "epoch": 0.5164663996439698, "grad_norm": 1.321617167349654, "learning_rate": 4.980180414836669e-06, "loss": 0.5026, "step": 2321 }, { "epoch": 0.5166889185580774, "grad_norm": 1.329434352136924, "learning_rate": 4.9765768782312105e-06, "loss": 0.5049, "step": 2322 }, { "epoch": 0.5169114374721852, "grad_norm": 1.4450121453474978, "learning_rate": 4.972973353792397e-06, "loss": 0.4928, "step": 2323 }, { "epoch": 0.5171339563862928, "grad_norm": 1.3403834710120999, "learning_rate": 4.969369843392e-06, "loss": 0.4927, "step": 2324 }, { "epoch": 0.5173564753004005, "grad_norm": 1.3357154144443308, "learning_rate": 4.965766348901788e-06, "loss": 0.4893, "step": 2325 }, { "epoch": 0.5175789942145083, "grad_norm": 1.3056206091953109, "learning_rate": 4.962162872193517e-06, "loss": 0.4973, "step": 2326 }, { "epoch": 0.5178015131286159, "grad_norm": 1.2025230546390153, "learning_rate": 4.95855941513894e-06, "loss": 0.4998, "step": 2327 }, { "epoch": 0.5180240320427236, "grad_norm": 1.2863544250730266, "learning_rate": 4.954955979609791e-06, "loss": 0.4823, "step": 2328 }, { "epoch": 0.5182465509568314, "grad_norm": 1.2565617089543202, "learning_rate": 4.951352567477805e-06, "loss": 0.502, "step": 2329 }, { "epoch": 0.518469069870939, "grad_norm": 1.3040722580323527, "learning_rate": 4.947749180614693e-06, "loss": 0.4945, "step": 2330 }, { "epoch": 0.5186915887850467, "grad_norm": 1.3163011966310452, "learning_rate": 4.944145820892159e-06, "loss": 0.5009, "step": 2331 }, { "epoch": 0.5189141076991545, "grad_norm": 1.2852567015837353, "learning_rate": 4.940542490181893e-06, "loss": 0.4852, "step": 2332 }, { "epoch": 0.5191366266132621, "grad_norm": 1.3343536002194663, "learning_rate": 4.936939190355566e-06, "loss": 0.4944, "step": 2333 }, { "epoch": 0.5193591455273698, "grad_norm": 1.4197770932786449, "learning_rate": 4.933335923284836e-06, "loss": 0.492, "step": 2334 }, { "epoch": 0.5195816644414776, "grad_norm": 1.3040001398248586, "learning_rate": 4.929732690841346e-06, "loss": 0.4886, "step": 2335 }, { "epoch": 0.5198041833555852, "grad_norm": 1.2374015946752281, "learning_rate": 4.926129494896717e-06, "loss": 0.48, "step": 2336 }, { "epoch": 0.5200267022696929, "grad_norm": 1.240357195799928, "learning_rate": 4.922526337322553e-06, "loss": 0.5035, "step": 2337 }, { "epoch": 0.5202492211838006, "grad_norm": 1.3140036372233364, "learning_rate": 4.9189232199904366e-06, "loss": 0.4932, "step": 2338 }, { "epoch": 0.5204717400979083, "grad_norm": 1.416073751053942, "learning_rate": 4.915320144771931e-06, "loss": 0.5095, "step": 2339 }, { "epoch": 0.520694259012016, "grad_norm": 1.3863743397344808, "learning_rate": 4.911717113538578e-06, "loss": 0.5028, "step": 2340 }, { "epoch": 0.5209167779261237, "grad_norm": 1.3582064029675323, "learning_rate": 4.908114128161892e-06, "loss": 0.49, "step": 2341 }, { "epoch": 0.5211392968402314, "grad_norm": 1.3320403780617693, "learning_rate": 4.904511190513372e-06, "loss": 0.4874, "step": 2342 }, { "epoch": 0.5213618157543392, "grad_norm": 1.3682450366479257, "learning_rate": 4.900908302464486e-06, "loss": 0.5063, "step": 2343 }, { "epoch": 0.5215843346684468, "grad_norm": 1.315242199102008, "learning_rate": 4.897305465886678e-06, "loss": 0.4846, "step": 2344 }, { "epoch": 0.5218068535825545, "grad_norm": 1.414579736418699, "learning_rate": 4.893702682651363e-06, "loss": 0.4893, "step": 2345 }, { "epoch": 0.5220293724966623, "grad_norm": 1.390368509510065, "learning_rate": 4.89009995462993e-06, "loss": 0.4881, "step": 2346 }, { "epoch": 0.5222518914107699, "grad_norm": 1.3291578387554073, "learning_rate": 4.886497283693741e-06, "loss": 0.4883, "step": 2347 }, { "epoch": 0.5224744103248776, "grad_norm": 1.3250787186133548, "learning_rate": 4.882894671714128e-06, "loss": 0.505, "step": 2348 }, { "epoch": 0.5226969292389854, "grad_norm": 1.3399011131712013, "learning_rate": 4.8792921205623895e-06, "loss": 0.5194, "step": 2349 }, { "epoch": 0.522919448153093, "grad_norm": 1.4471277475966482, "learning_rate": 4.875689632109795e-06, "loss": 0.502, "step": 2350 }, { "epoch": 0.5231419670672007, "grad_norm": 1.3379764166743453, "learning_rate": 4.872087208227581e-06, "loss": 0.506, "step": 2351 }, { "epoch": 0.5233644859813084, "grad_norm": 1.3340973901264783, "learning_rate": 4.868484850786948e-06, "loss": 0.4941, "step": 2352 }, { "epoch": 0.5235870048954161, "grad_norm": 1.3404355117637818, "learning_rate": 4.864882561659063e-06, "loss": 0.4852, "step": 2353 }, { "epoch": 0.5238095238095238, "grad_norm": 1.411419874696056, "learning_rate": 4.861280342715064e-06, "loss": 0.4947, "step": 2354 }, { "epoch": 0.5240320427236315, "grad_norm": 1.3265636552843751, "learning_rate": 4.857678195826044e-06, "loss": 0.5183, "step": 2355 }, { "epoch": 0.5242545616377392, "grad_norm": 1.3532454100952669, "learning_rate": 4.854076122863062e-06, "loss": 0.4804, "step": 2356 }, { "epoch": 0.5244770805518469, "grad_norm": 1.248640935194336, "learning_rate": 4.850474125697136e-06, "loss": 0.492, "step": 2357 }, { "epoch": 0.5246995994659546, "grad_norm": 1.3413068374964987, "learning_rate": 4.8468722061992496e-06, "loss": 0.4956, "step": 2358 }, { "epoch": 0.5249221183800623, "grad_norm": 1.3178798278896109, "learning_rate": 4.8432703662403415e-06, "loss": 0.499, "step": 2359 }, { "epoch": 0.52514463729417, "grad_norm": 1.3681553035121679, "learning_rate": 4.839668607691313e-06, "loss": 0.5125, "step": 2360 }, { "epoch": 0.5253671562082777, "grad_norm": 1.3074811241246516, "learning_rate": 4.836066932423021e-06, "loss": 0.4919, "step": 2361 }, { "epoch": 0.5255896751223854, "grad_norm": 1.3205774459111286, "learning_rate": 4.8324653423062775e-06, "loss": 0.4916, "step": 2362 }, { "epoch": 0.5258121940364932, "grad_norm": 1.28364585647099, "learning_rate": 4.828863839211853e-06, "loss": 0.491, "step": 2363 }, { "epoch": 0.5260347129506008, "grad_norm": 1.3274997815955827, "learning_rate": 4.825262425010472e-06, "loss": 0.5071, "step": 2364 }, { "epoch": 0.5262572318647085, "grad_norm": 1.2494045664135067, "learning_rate": 4.82166110157281e-06, "loss": 0.4877, "step": 2365 }, { "epoch": 0.5264797507788161, "grad_norm": 1.296762958398345, "learning_rate": 4.818059870769502e-06, "loss": 0.5038, "step": 2366 }, { "epoch": 0.5267022696929239, "grad_norm": 1.2921266624003185, "learning_rate": 4.814458734471129e-06, "loss": 0.4692, "step": 2367 }, { "epoch": 0.5269247886070316, "grad_norm": 1.2943025280459983, "learning_rate": 4.8108576945482235e-06, "loss": 0.4966, "step": 2368 }, { "epoch": 0.5271473075211393, "grad_norm": 1.274207359854863, "learning_rate": 4.807256752871272e-06, "loss": 0.5009, "step": 2369 }, { "epoch": 0.527369826435247, "grad_norm": 1.2852423155546557, "learning_rate": 4.803655911310705e-06, "loss": 0.5045, "step": 2370 }, { "epoch": 0.5275923453493547, "grad_norm": 1.2321318683649007, "learning_rate": 4.800055171736902e-06, "loss": 0.5102, "step": 2371 }, { "epoch": 0.5278148642634624, "grad_norm": 1.352471401600216, "learning_rate": 4.796454536020193e-06, "loss": 0.4895, "step": 2372 }, { "epoch": 0.5280373831775701, "grad_norm": 1.2932213716487322, "learning_rate": 4.792854006030852e-06, "loss": 0.4898, "step": 2373 }, { "epoch": 0.5282599020916778, "grad_norm": 1.285062718073897, "learning_rate": 4.789253583639097e-06, "loss": 0.486, "step": 2374 }, { "epoch": 0.5284824210057855, "grad_norm": 1.3123814989569573, "learning_rate": 4.785653270715091e-06, "loss": 0.4973, "step": 2375 }, { "epoch": 0.5287049399198932, "grad_norm": 1.2979680061178809, "learning_rate": 4.78205306912894e-06, "loss": 0.4938, "step": 2376 }, { "epoch": 0.5289274588340009, "grad_norm": 1.397377633425388, "learning_rate": 4.77845298075069e-06, "loss": 0.5085, "step": 2377 }, { "epoch": 0.5291499777481086, "grad_norm": 1.3659724780301226, "learning_rate": 4.774853007450334e-06, "loss": 0.4993, "step": 2378 }, { "epoch": 0.5293724966622163, "grad_norm": 1.344635161763784, "learning_rate": 4.771253151097799e-06, "loss": 0.4964, "step": 2379 }, { "epoch": 0.5295950155763239, "grad_norm": 1.3160088923769693, "learning_rate": 4.767653413562956e-06, "loss": 0.4892, "step": 2380 }, { "epoch": 0.5298175344904317, "grad_norm": 1.32450979274095, "learning_rate": 4.764053796715613e-06, "loss": 0.4904, "step": 2381 }, { "epoch": 0.5300400534045394, "grad_norm": 1.3450751384397412, "learning_rate": 4.760454302425511e-06, "loss": 0.4872, "step": 2382 }, { "epoch": 0.530262572318647, "grad_norm": 1.3640792861713247, "learning_rate": 4.756854932562332e-06, "loss": 0.4959, "step": 2383 }, { "epoch": 0.5304850912327548, "grad_norm": 1.4010954509586666, "learning_rate": 4.753255688995692e-06, "loss": 0.4937, "step": 2384 }, { "epoch": 0.5307076101468625, "grad_norm": 1.3207680608926295, "learning_rate": 4.749656573595145e-06, "loss": 0.5145, "step": 2385 }, { "epoch": 0.5309301290609701, "grad_norm": 1.388979789025446, "learning_rate": 4.746057588230172e-06, "loss": 0.4882, "step": 2386 }, { "epoch": 0.5311526479750779, "grad_norm": 1.383375599356058, "learning_rate": 4.74245873477019e-06, "loss": 0.4848, "step": 2387 }, { "epoch": 0.5313751668891856, "grad_norm": 1.3170289760083236, "learning_rate": 4.738860015084546e-06, "loss": 0.4954, "step": 2388 }, { "epoch": 0.5315976858032933, "grad_norm": 1.332755322423392, "learning_rate": 4.73526143104252e-06, "loss": 0.4938, "step": 2389 }, { "epoch": 0.531820204717401, "grad_norm": 1.3020004534940472, "learning_rate": 4.7316629845133176e-06, "loss": 0.4777, "step": 2390 }, { "epoch": 0.5320427236315087, "grad_norm": 1.4126752266453428, "learning_rate": 4.728064677366079e-06, "loss": 0.4943, "step": 2391 }, { "epoch": 0.5322652425456164, "grad_norm": 1.361216532359496, "learning_rate": 4.724466511469867e-06, "loss": 0.4998, "step": 2392 }, { "epoch": 0.5324877614597241, "grad_norm": 1.2964912469490915, "learning_rate": 4.720868488693671e-06, "loss": 0.491, "step": 2393 }, { "epoch": 0.5327102803738317, "grad_norm": 1.3751064858063426, "learning_rate": 4.7172706109064096e-06, "loss": 0.5018, "step": 2394 }, { "epoch": 0.5329327992879395, "grad_norm": 1.3932062220966885, "learning_rate": 4.713672879976924e-06, "loss": 0.4953, "step": 2395 }, { "epoch": 0.5331553182020472, "grad_norm": 1.3332273991465557, "learning_rate": 4.710075297773974e-06, "loss": 0.5056, "step": 2396 }, { "epoch": 0.5333778371161548, "grad_norm": 1.3071087794239928, "learning_rate": 4.7064778661662545e-06, "loss": 0.5036, "step": 2397 }, { "epoch": 0.5336003560302626, "grad_norm": 1.2958461615169783, "learning_rate": 4.702880587022372e-06, "loss": 0.4662, "step": 2398 }, { "epoch": 0.5338228749443703, "grad_norm": 1.409050560180942, "learning_rate": 4.699283462210855e-06, "loss": 0.5016, "step": 2399 }, { "epoch": 0.5340453938584779, "grad_norm": 1.2914554668992217, "learning_rate": 4.6956864936001565e-06, "loss": 0.4808, "step": 2400 }, { "epoch": 0.5342679127725857, "grad_norm": 1.4189111887285155, "learning_rate": 4.692089683058643e-06, "loss": 0.4972, "step": 2401 }, { "epoch": 0.5344904316866934, "grad_norm": 1.2536887801718974, "learning_rate": 4.688493032454601e-06, "loss": 0.4926, "step": 2402 }, { "epoch": 0.534712950600801, "grad_norm": 1.36353358918582, "learning_rate": 4.684896543656237e-06, "loss": 0.4827, "step": 2403 }, { "epoch": 0.5349354695149088, "grad_norm": 1.3131434792236911, "learning_rate": 4.681300218531668e-06, "loss": 0.481, "step": 2404 }, { "epoch": 0.5351579884290165, "grad_norm": 1.3772027402325906, "learning_rate": 4.677704058948932e-06, "loss": 0.4783, "step": 2405 }, { "epoch": 0.5353805073431241, "grad_norm": 1.3545991340242232, "learning_rate": 4.6741080667759755e-06, "loss": 0.5063, "step": 2406 }, { "epoch": 0.5356030262572319, "grad_norm": 1.3304765672370233, "learning_rate": 4.67051224388066e-06, "loss": 0.5089, "step": 2407 }, { "epoch": 0.5358255451713395, "grad_norm": 1.2519961398992865, "learning_rate": 4.6669165921307575e-06, "loss": 0.4854, "step": 2408 }, { "epoch": 0.5360480640854473, "grad_norm": 1.2877005339668972, "learning_rate": 4.663321113393957e-06, "loss": 0.5012, "step": 2409 }, { "epoch": 0.536270582999555, "grad_norm": 1.3559765328571054, "learning_rate": 4.65972580953785e-06, "loss": 0.4908, "step": 2410 }, { "epoch": 0.5364931019136626, "grad_norm": 1.3214412768049366, "learning_rate": 4.656130682429945e-06, "loss": 0.5009, "step": 2411 }, { "epoch": 0.5367156208277704, "grad_norm": 1.3947994990051789, "learning_rate": 4.652535733937651e-06, "loss": 0.4888, "step": 2412 }, { "epoch": 0.5369381397418781, "grad_norm": 1.277683225640519, "learning_rate": 4.648940965928288e-06, "loss": 0.4772, "step": 2413 }, { "epoch": 0.5371606586559857, "grad_norm": 1.3249526723217544, "learning_rate": 4.6453463802690815e-06, "loss": 0.4972, "step": 2414 }, { "epoch": 0.5373831775700935, "grad_norm": 1.3407701385759743, "learning_rate": 4.641751978827162e-06, "loss": 0.5032, "step": 2415 }, { "epoch": 0.5376056964842012, "grad_norm": 1.3279697265539114, "learning_rate": 4.638157763469568e-06, "loss": 0.4891, "step": 2416 }, { "epoch": 0.5378282153983088, "grad_norm": 1.375078615475072, "learning_rate": 4.634563736063238e-06, "loss": 0.4745, "step": 2417 }, { "epoch": 0.5380507343124166, "grad_norm": 1.3822504052040916, "learning_rate": 4.630969898475011e-06, "loss": 0.5089, "step": 2418 }, { "epoch": 0.5382732532265243, "grad_norm": 1.3623299904208115, "learning_rate": 4.62737625257163e-06, "loss": 0.4789, "step": 2419 }, { "epoch": 0.5384957721406319, "grad_norm": 1.308929073632619, "learning_rate": 4.623782800219739e-06, "loss": 0.484, "step": 2420 }, { "epoch": 0.5387182910547397, "grad_norm": 1.3754137504067294, "learning_rate": 4.620189543285877e-06, "loss": 0.4879, "step": 2421 }, { "epoch": 0.5389408099688473, "grad_norm": 1.3935518218171168, "learning_rate": 4.616596483636491e-06, "loss": 0.5078, "step": 2422 }, { "epoch": 0.539163328882955, "grad_norm": 1.3543463655968644, "learning_rate": 4.613003623137916e-06, "loss": 0.4934, "step": 2423 }, { "epoch": 0.5393858477970628, "grad_norm": 1.3530978513360103, "learning_rate": 4.609410963656387e-06, "loss": 0.5061, "step": 2424 }, { "epoch": 0.5396083667111704, "grad_norm": 1.4096258666274557, "learning_rate": 4.6058185070580365e-06, "loss": 0.491, "step": 2425 }, { "epoch": 0.5398308856252781, "grad_norm": 1.2250015915404655, "learning_rate": 4.6022262552088874e-06, "loss": 0.4937, "step": 2426 }, { "epoch": 0.5400534045393859, "grad_norm": 1.280973783339964, "learning_rate": 4.598634209974858e-06, "loss": 0.4857, "step": 2427 }, { "epoch": 0.5402759234534935, "grad_norm": 1.2932145854622068, "learning_rate": 4.595042373221763e-06, "loss": 0.4987, "step": 2428 }, { "epoch": 0.5404984423676013, "grad_norm": 1.3032144447613474, "learning_rate": 4.591450746815305e-06, "loss": 0.5031, "step": 2429 }, { "epoch": 0.540720961281709, "grad_norm": 1.4528042230635958, "learning_rate": 4.587859332621076e-06, "loss": 0.5063, "step": 2430 }, { "epoch": 0.5409434801958166, "grad_norm": 1.3474325057252696, "learning_rate": 4.584268132504561e-06, "loss": 0.5041, "step": 2431 }, { "epoch": 0.5411659991099244, "grad_norm": 1.3197945108034197, "learning_rate": 4.580677148331134e-06, "loss": 0.4809, "step": 2432 }, { "epoch": 0.5413885180240321, "grad_norm": 1.3512942778340626, "learning_rate": 4.577086381966051e-06, "loss": 0.4937, "step": 2433 }, { "epoch": 0.5416110369381397, "grad_norm": 1.3850837358731845, "learning_rate": 4.5734958352744655e-06, "loss": 0.502, "step": 2434 }, { "epoch": 0.5418335558522475, "grad_norm": 1.3995755536998062, "learning_rate": 4.569905510121405e-06, "loss": 0.4951, "step": 2435 }, { "epoch": 0.5420560747663551, "grad_norm": 1.3414753532271584, "learning_rate": 4.566315408371792e-06, "loss": 0.4975, "step": 2436 }, { "epoch": 0.5422785936804628, "grad_norm": 1.3345838328507247, "learning_rate": 4.562725531890427e-06, "loss": 0.4941, "step": 2437 }, { "epoch": 0.5425011125945706, "grad_norm": 1.422270172922325, "learning_rate": 4.559135882541995e-06, "loss": 0.4997, "step": 2438 }, { "epoch": 0.5427236315086782, "grad_norm": 1.293812034322094, "learning_rate": 4.55554646219106e-06, "loss": 0.491, "step": 2439 }, { "epoch": 0.5429461504227859, "grad_norm": 1.4424796141133343, "learning_rate": 4.551957272702076e-06, "loss": 0.4734, "step": 2440 }, { "epoch": 0.5431686693368937, "grad_norm": 1.2967074132975944, "learning_rate": 4.548368315939366e-06, "loss": 0.5074, "step": 2441 }, { "epoch": 0.5433911882510013, "grad_norm": 1.3836612210831416, "learning_rate": 4.5447795937671426e-06, "loss": 0.5031, "step": 2442 }, { "epoch": 0.543613707165109, "grad_norm": 1.4051681190753291, "learning_rate": 4.541191108049487e-06, "loss": 0.4768, "step": 2443 }, { "epoch": 0.5438362260792168, "grad_norm": 1.2598588840582028, "learning_rate": 4.537602860650364e-06, "loss": 0.4724, "step": 2444 }, { "epoch": 0.5440587449933244, "grad_norm": 1.4274221123386333, "learning_rate": 4.534014853433609e-06, "loss": 0.4841, "step": 2445 }, { "epoch": 0.5442812639074321, "grad_norm": 1.335700134507707, "learning_rate": 4.53042708826294e-06, "loss": 0.4856, "step": 2446 }, { "epoch": 0.5445037828215399, "grad_norm": 1.3136633252253553, "learning_rate": 4.526839567001945e-06, "loss": 0.4989, "step": 2447 }, { "epoch": 0.5447263017356475, "grad_norm": 1.3821413887049172, "learning_rate": 4.523252291514086e-06, "loss": 0.4984, "step": 2448 }, { "epoch": 0.5449488206497553, "grad_norm": 1.2321879762194339, "learning_rate": 4.519665263662696e-06, "loss": 0.4758, "step": 2449 }, { "epoch": 0.5451713395638629, "grad_norm": 1.3507604568698846, "learning_rate": 4.516078485310978e-06, "loss": 0.4924, "step": 2450 }, { "epoch": 0.5453938584779706, "grad_norm": 1.32424044930724, "learning_rate": 4.512491958322012e-06, "loss": 0.5036, "step": 2451 }, { "epoch": 0.5456163773920784, "grad_norm": 1.3711383837014366, "learning_rate": 4.5089056845587385e-06, "loss": 0.4803, "step": 2452 }, { "epoch": 0.545838896306186, "grad_norm": 1.3450679258438032, "learning_rate": 4.505319665883977e-06, "loss": 0.5027, "step": 2453 }, { "epoch": 0.5460614152202937, "grad_norm": 1.2633823436853995, "learning_rate": 4.501733904160406e-06, "loss": 0.4902, "step": 2454 }, { "epoch": 0.5462839341344015, "grad_norm": 1.3375862428018948, "learning_rate": 4.4981484012505726e-06, "loss": 0.48, "step": 2455 }, { "epoch": 0.5465064530485091, "grad_norm": 1.4582172532235749, "learning_rate": 4.494563159016891e-06, "loss": 0.4981, "step": 2456 }, { "epoch": 0.5467289719626168, "grad_norm": 1.3757455309260642, "learning_rate": 4.490978179321638e-06, "loss": 0.4809, "step": 2457 }, { "epoch": 0.5469514908767246, "grad_norm": 1.3580359994270224, "learning_rate": 4.487393464026955e-06, "loss": 0.5007, "step": 2458 }, { "epoch": 0.5471740097908322, "grad_norm": 1.3142927536196947, "learning_rate": 4.483809014994849e-06, "loss": 0.4889, "step": 2459 }, { "epoch": 0.5473965287049399, "grad_norm": 1.291798127413654, "learning_rate": 4.480224834087183e-06, "loss": 0.474, "step": 2460 }, { "epoch": 0.5476190476190477, "grad_norm": 1.35458946848696, "learning_rate": 4.476640923165685e-06, "loss": 0.4833, "step": 2461 }, { "epoch": 0.5478415665331553, "grad_norm": 1.3712565272591064, "learning_rate": 4.473057284091942e-06, "loss": 0.5, "step": 2462 }, { "epoch": 0.548064085447263, "grad_norm": 1.3719763044472428, "learning_rate": 4.469473918727397e-06, "loss": 0.4729, "step": 2463 }, { "epoch": 0.5482866043613707, "grad_norm": 1.4012302661788116, "learning_rate": 4.465890828933353e-06, "loss": 0.5099, "step": 2464 }, { "epoch": 0.5485091232754784, "grad_norm": 1.3997871974822806, "learning_rate": 4.462308016570973e-06, "loss": 0.4959, "step": 2465 }, { "epoch": 0.5487316421895861, "grad_norm": 1.4609979266710234, "learning_rate": 4.45872548350127e-06, "loss": 0.4953, "step": 2466 }, { "epoch": 0.5489541611036938, "grad_norm": 1.327706968133401, "learning_rate": 4.4551432315851165e-06, "loss": 0.5019, "step": 2467 }, { "epoch": 0.5491766800178015, "grad_norm": 1.3276809066945965, "learning_rate": 4.451561262683236e-06, "loss": 0.4718, "step": 2468 }, { "epoch": 0.5493991989319092, "grad_norm": 1.4755894230838889, "learning_rate": 4.447979578656207e-06, "loss": 0.4925, "step": 2469 }, { "epoch": 0.5496217178460169, "grad_norm": 1.2838080487873844, "learning_rate": 4.444398181364456e-06, "loss": 0.4818, "step": 2470 }, { "epoch": 0.5498442367601246, "grad_norm": 1.2798287835103859, "learning_rate": 4.440817072668268e-06, "loss": 0.4955, "step": 2471 }, { "epoch": 0.5500667556742324, "grad_norm": 1.3773368666625985, "learning_rate": 4.437236254427772e-06, "loss": 0.5, "step": 2472 }, { "epoch": 0.55028927458834, "grad_norm": 1.356217911157425, "learning_rate": 4.4336557285029494e-06, "loss": 0.4757, "step": 2473 }, { "epoch": 0.5505117935024477, "grad_norm": 1.3299238119619765, "learning_rate": 4.430075496753626e-06, "loss": 0.4929, "step": 2474 }, { "epoch": 0.5507343124165555, "grad_norm": 1.3370862440050568, "learning_rate": 4.426495561039477e-06, "loss": 0.4911, "step": 2475 }, { "epoch": 0.5509568313306631, "grad_norm": 1.3057004157585002, "learning_rate": 4.422915923220025e-06, "loss": 0.4992, "step": 2476 }, { "epoch": 0.5511793502447708, "grad_norm": 1.3135866572666628, "learning_rate": 4.419336585154638e-06, "loss": 0.4987, "step": 2477 }, { "epoch": 0.5514018691588785, "grad_norm": 1.301495932726821, "learning_rate": 4.4157575487025265e-06, "loss": 0.4823, "step": 2478 }, { "epoch": 0.5516243880729862, "grad_norm": 1.3937563047950532, "learning_rate": 4.412178815722744e-06, "loss": 0.4925, "step": 2479 }, { "epoch": 0.5518469069870939, "grad_norm": 1.3528664334047615, "learning_rate": 4.408600388074188e-06, "loss": 0.4946, "step": 2480 }, { "epoch": 0.5520694259012016, "grad_norm": 1.682426519706587, "learning_rate": 4.405022267615595e-06, "loss": 0.4968, "step": 2481 }, { "epoch": 0.5522919448153093, "grad_norm": 1.4836532050001578, "learning_rate": 4.401444456205546e-06, "loss": 0.496, "step": 2482 }, { "epoch": 0.552514463729417, "grad_norm": 1.3609187815164678, "learning_rate": 4.397866955702458e-06, "loss": 0.4936, "step": 2483 }, { "epoch": 0.5527369826435247, "grad_norm": 1.3993279924291289, "learning_rate": 4.3942897679645895e-06, "loss": 0.4939, "step": 2484 }, { "epoch": 0.5529595015576324, "grad_norm": 1.3708073491575758, "learning_rate": 4.390712894850033e-06, "loss": 0.4969, "step": 2485 }, { "epoch": 0.5531820204717401, "grad_norm": 1.3734492483296312, "learning_rate": 4.387136338216718e-06, "loss": 0.4799, "step": 2486 }, { "epoch": 0.5534045393858478, "grad_norm": 1.3099142080245758, "learning_rate": 4.383560099922413e-06, "loss": 0.4725, "step": 2487 }, { "epoch": 0.5536270582999555, "grad_norm": 1.4012360042996501, "learning_rate": 4.379984181824718e-06, "loss": 0.4802, "step": 2488 }, { "epoch": 0.5538495772140632, "grad_norm": 1.4315983029849162, "learning_rate": 4.376408585781067e-06, "loss": 0.4994, "step": 2489 }, { "epoch": 0.5540720961281709, "grad_norm": 1.4613905968379497, "learning_rate": 4.372833313648729e-06, "loss": 0.4823, "step": 2490 }, { "epoch": 0.5542946150422786, "grad_norm": 1.3333830748584763, "learning_rate": 4.369258367284803e-06, "loss": 0.4696, "step": 2491 }, { "epoch": 0.5545171339563862, "grad_norm": 1.3463483161479546, "learning_rate": 4.365683748546218e-06, "loss": 0.4814, "step": 2492 }, { "epoch": 0.554739652870494, "grad_norm": 1.372985013164312, "learning_rate": 4.362109459289734e-06, "loss": 0.495, "step": 2493 }, { "epoch": 0.5549621717846017, "grad_norm": 1.3238465080649897, "learning_rate": 4.358535501371941e-06, "loss": 0.4977, "step": 2494 }, { "epoch": 0.5551846906987093, "grad_norm": 1.4468390299849831, "learning_rate": 4.354961876649252e-06, "loss": 0.4981, "step": 2495 }, { "epoch": 0.5554072096128171, "grad_norm": 1.3261819096788252, "learning_rate": 4.351388586977914e-06, "loss": 0.4836, "step": 2496 }, { "epoch": 0.5556297285269248, "grad_norm": 1.4253156100834132, "learning_rate": 4.347815634213995e-06, "loss": 0.4935, "step": 2497 }, { "epoch": 0.5558522474410325, "grad_norm": 1.5383821001651234, "learning_rate": 4.34424302021339e-06, "loss": 0.4868, "step": 2498 }, { "epoch": 0.5560747663551402, "grad_norm": 1.4385397886035325, "learning_rate": 4.340670746831818e-06, "loss": 0.4974, "step": 2499 }, { "epoch": 0.5562972852692479, "grad_norm": 1.382611711154089, "learning_rate": 4.3370988159248185e-06, "loss": 0.4966, "step": 2500 }, { "epoch": 0.5565198041833556, "grad_norm": 1.3689811768715163, "learning_rate": 4.333527229347754e-06, "loss": 0.5016, "step": 2501 }, { "epoch": 0.5567423230974633, "grad_norm": 1.4017053341538286, "learning_rate": 4.329955988955814e-06, "loss": 0.5051, "step": 2502 }, { "epoch": 0.556964842011571, "grad_norm": 1.4642219725717056, "learning_rate": 4.326385096604e-06, "loss": 0.4797, "step": 2503 }, { "epoch": 0.5571873609256787, "grad_norm": 1.4088309143623052, "learning_rate": 4.322814554147138e-06, "loss": 0.4922, "step": 2504 }, { "epoch": 0.5574098798397864, "grad_norm": 1.3304722709601464, "learning_rate": 4.3192443634398695e-06, "loss": 0.4802, "step": 2505 }, { "epoch": 0.557632398753894, "grad_norm": 1.49490830439239, "learning_rate": 4.3156745263366526e-06, "loss": 0.4824, "step": 2506 }, { "epoch": 0.5578549176680018, "grad_norm": 1.4064186097872478, "learning_rate": 4.312105044691766e-06, "loss": 0.4965, "step": 2507 }, { "epoch": 0.5580774365821095, "grad_norm": 1.4491807986164962, "learning_rate": 4.308535920359299e-06, "loss": 0.496, "step": 2508 }, { "epoch": 0.5582999554962171, "grad_norm": 1.3646382167026887, "learning_rate": 4.304967155193159e-06, "loss": 0.4768, "step": 2509 }, { "epoch": 0.5585224744103249, "grad_norm": 1.5160184368151755, "learning_rate": 4.3013987510470665e-06, "loss": 0.4926, "step": 2510 }, { "epoch": 0.5587449933244326, "grad_norm": 1.3614153278522336, "learning_rate": 4.297830709774551e-06, "loss": 0.4849, "step": 2511 }, { "epoch": 0.5589675122385402, "grad_norm": 1.3720530226057528, "learning_rate": 4.294263033228954e-06, "loss": 0.4862, "step": 2512 }, { "epoch": 0.559190031152648, "grad_norm": 1.4315825509496483, "learning_rate": 4.290695723263432e-06, "loss": 0.4996, "step": 2513 }, { "epoch": 0.5594125500667557, "grad_norm": 1.36906936912417, "learning_rate": 4.287128781730947e-06, "loss": 0.48, "step": 2514 }, { "epoch": 0.5596350689808633, "grad_norm": 1.393533014082014, "learning_rate": 4.283562210484275e-06, "loss": 0.4983, "step": 2515 }, { "epoch": 0.5598575878949711, "grad_norm": 1.3353926353718513, "learning_rate": 4.279996011375991e-06, "loss": 0.4702, "step": 2516 }, { "epoch": 0.5600801068090788, "grad_norm": 1.4827411938024975, "learning_rate": 4.276430186258482e-06, "loss": 0.4979, "step": 2517 }, { "epoch": 0.5603026257231865, "grad_norm": 1.3412188432905081, "learning_rate": 4.2728647369839425e-06, "loss": 0.4775, "step": 2518 }, { "epoch": 0.5605251446372942, "grad_norm": 1.3834494954882623, "learning_rate": 4.269299665404365e-06, "loss": 0.4901, "step": 2519 }, { "epoch": 0.5607476635514018, "grad_norm": 1.466527603173983, "learning_rate": 4.2657349733715555e-06, "loss": 0.4682, "step": 2520 }, { "epoch": 0.5609701824655096, "grad_norm": 1.3629783954435184, "learning_rate": 4.262170662737115e-06, "loss": 0.481, "step": 2521 }, { "epoch": 0.5611927013796173, "grad_norm": 1.324337226454841, "learning_rate": 4.258606735352449e-06, "loss": 0.484, "step": 2522 }, { "epoch": 0.5614152202937249, "grad_norm": 1.5289355609145372, "learning_rate": 4.255043193068763e-06, "loss": 0.5017, "step": 2523 }, { "epoch": 0.5616377392078327, "grad_norm": 1.4053201575741852, "learning_rate": 4.251480037737065e-06, "loss": 0.4772, "step": 2524 }, { "epoch": 0.5618602581219404, "grad_norm": 1.4226872212388764, "learning_rate": 4.247917271208161e-06, "loss": 0.4836, "step": 2525 }, { "epoch": 0.562082777036048, "grad_norm": 1.4457174479414772, "learning_rate": 4.244354895332649e-06, "loss": 0.5009, "step": 2526 }, { "epoch": 0.5623052959501558, "grad_norm": 1.4945866992691668, "learning_rate": 4.240792911960936e-06, "loss": 0.483, "step": 2527 }, { "epoch": 0.5625278148642635, "grad_norm": 1.4274648808269186, "learning_rate": 4.237231322943216e-06, "loss": 0.4837, "step": 2528 }, { "epoch": 0.5627503337783711, "grad_norm": 1.3023900076542723, "learning_rate": 4.2336701301294805e-06, "loss": 0.4681, "step": 2529 }, { "epoch": 0.5629728526924789, "grad_norm": 1.5116045144856978, "learning_rate": 4.2301093353695165e-06, "loss": 0.49, "step": 2530 }, { "epoch": 0.5631953716065866, "grad_norm": 1.4420382931874998, "learning_rate": 4.2265489405129015e-06, "loss": 0.4997, "step": 2531 }, { "epoch": 0.5634178905206942, "grad_norm": 1.321766387409974, "learning_rate": 4.222988947409007e-06, "loss": 0.4868, "step": 2532 }, { "epoch": 0.563640409434802, "grad_norm": 1.3420441902117242, "learning_rate": 4.219429357906998e-06, "loss": 0.4652, "step": 2533 }, { "epoch": 0.5638629283489096, "grad_norm": 1.4319088113008172, "learning_rate": 4.215870173855825e-06, "loss": 0.4813, "step": 2534 }, { "epoch": 0.5640854472630173, "grad_norm": 1.3867922408377757, "learning_rate": 4.212311397104233e-06, "loss": 0.4801, "step": 2535 }, { "epoch": 0.5643079661771251, "grad_norm": 1.3540408376810273, "learning_rate": 4.208753029500753e-06, "loss": 0.4896, "step": 2536 }, { "epoch": 0.5645304850912327, "grad_norm": 1.2985273864555096, "learning_rate": 4.2051950728937e-06, "loss": 0.4739, "step": 2537 }, { "epoch": 0.5647530040053405, "grad_norm": 1.4472714739728996, "learning_rate": 4.2016375291311815e-06, "loss": 0.4918, "step": 2538 }, { "epoch": 0.5649755229194482, "grad_norm": 1.3425048916352267, "learning_rate": 4.198080400061088e-06, "loss": 0.4743, "step": 2539 }, { "epoch": 0.5651980418335558, "grad_norm": 1.238187150901619, "learning_rate": 4.194523687531096e-06, "loss": 0.466, "step": 2540 }, { "epoch": 0.5654205607476636, "grad_norm": 1.408165966978395, "learning_rate": 4.190967393388662e-06, "loss": 0.4885, "step": 2541 }, { "epoch": 0.5656430796617713, "grad_norm": 1.3534372119062457, "learning_rate": 4.187411519481028e-06, "loss": 0.4878, "step": 2542 }, { "epoch": 0.5658655985758789, "grad_norm": 1.3143924089721564, "learning_rate": 4.183856067655216e-06, "loss": 0.479, "step": 2543 }, { "epoch": 0.5660881174899867, "grad_norm": 1.4354047934287226, "learning_rate": 4.1803010397580315e-06, "loss": 0.4967, "step": 2544 }, { "epoch": 0.5663106364040944, "grad_norm": 1.3631888765968985, "learning_rate": 4.176746437636059e-06, "loss": 0.4773, "step": 2545 }, { "epoch": 0.566533155318202, "grad_norm": 1.5599104638699566, "learning_rate": 4.17319226313566e-06, "loss": 0.4812, "step": 2546 }, { "epoch": 0.5667556742323098, "grad_norm": 1.4091157014061038, "learning_rate": 4.169638518102975e-06, "loss": 0.4981, "step": 2547 }, { "epoch": 0.5669781931464174, "grad_norm": 1.443533623250907, "learning_rate": 4.16608520438392e-06, "loss": 0.4743, "step": 2548 }, { "epoch": 0.5672007120605251, "grad_norm": 1.314895462467856, "learning_rate": 4.162532323824191e-06, "loss": 0.4738, "step": 2549 }, { "epoch": 0.5674232309746329, "grad_norm": 1.3745160846550422, "learning_rate": 4.158979878269253e-06, "loss": 0.4887, "step": 2550 }, { "epoch": 0.5676457498887405, "grad_norm": 1.41842738971469, "learning_rate": 4.155427869564352e-06, "loss": 0.488, "step": 2551 }, { "epoch": 0.5678682688028482, "grad_norm": 1.3742398226872674, "learning_rate": 4.151876299554501e-06, "loss": 0.4795, "step": 2552 }, { "epoch": 0.568090787716956, "grad_norm": 1.3847160380147854, "learning_rate": 4.148325170084489e-06, "loss": 0.4962, "step": 2553 }, { "epoch": 0.5683133066310636, "grad_norm": 1.382886215327261, "learning_rate": 4.144774482998874e-06, "loss": 0.4943, "step": 2554 }, { "epoch": 0.5685358255451713, "grad_norm": 1.4619785899451732, "learning_rate": 4.141224240141986e-06, "loss": 0.4943, "step": 2555 }, { "epoch": 0.5687583444592791, "grad_norm": 1.4598681447300894, "learning_rate": 4.137674443357919e-06, "loss": 0.5008, "step": 2556 }, { "epoch": 0.5689808633733867, "grad_norm": 1.4476390834112047, "learning_rate": 4.134125094490545e-06, "loss": 0.4889, "step": 2557 }, { "epoch": 0.5692033822874945, "grad_norm": 1.4609277558194624, "learning_rate": 4.130576195383497e-06, "loss": 0.4946, "step": 2558 }, { "epoch": 0.5694259012016022, "grad_norm": 1.3139432501961947, "learning_rate": 4.127027747880173e-06, "loss": 0.4836, "step": 2559 }, { "epoch": 0.5696484201157098, "grad_norm": 1.3902263053665465, "learning_rate": 4.123479753823739e-06, "loss": 0.4736, "step": 2560 }, { "epoch": 0.5698709390298176, "grad_norm": 1.451812036196626, "learning_rate": 4.119932215057126e-06, "loss": 0.4923, "step": 2561 }, { "epoch": 0.5700934579439252, "grad_norm": 1.3594992271114787, "learning_rate": 4.116385133423029e-06, "loss": 0.4991, "step": 2562 }, { "epoch": 0.5703159768580329, "grad_norm": 1.3275119251748015, "learning_rate": 4.112838510763899e-06, "loss": 0.4842, "step": 2563 }, { "epoch": 0.5705384957721407, "grad_norm": 1.3835584756419415, "learning_rate": 4.10929234892196e-06, "loss": 0.4904, "step": 2564 }, { "epoch": 0.5707610146862483, "grad_norm": 1.3488508280257154, "learning_rate": 4.1057466497391875e-06, "loss": 0.504, "step": 2565 }, { "epoch": 0.570983533600356, "grad_norm": 1.3512876904927218, "learning_rate": 4.10220141505732e-06, "loss": 0.4796, "step": 2566 }, { "epoch": 0.5712060525144638, "grad_norm": 1.375371241049644, "learning_rate": 4.098656646717854e-06, "loss": 0.4936, "step": 2567 }, { "epoch": 0.5714285714285714, "grad_norm": 1.3888094275546008, "learning_rate": 4.095112346562045e-06, "loss": 0.4934, "step": 2568 }, { "epoch": 0.5716510903426791, "grad_norm": 1.4246186235436535, "learning_rate": 4.091568516430902e-06, "loss": 0.4732, "step": 2569 }, { "epoch": 0.5718736092567869, "grad_norm": 1.4070904830907853, "learning_rate": 4.088025158165196e-06, "loss": 0.4955, "step": 2570 }, { "epoch": 0.5720961281708945, "grad_norm": 1.3114673476333407, "learning_rate": 4.0844822736054464e-06, "loss": 0.477, "step": 2571 }, { "epoch": 0.5723186470850022, "grad_norm": 1.3176803442333815, "learning_rate": 4.08093986459193e-06, "loss": 0.4753, "step": 2572 }, { "epoch": 0.5725411659991099, "grad_norm": 1.386162350548333, "learning_rate": 4.077397932964679e-06, "loss": 0.4758, "step": 2573 }, { "epoch": 0.5727636849132176, "grad_norm": 1.4377099191338287, "learning_rate": 4.073856480563468e-06, "loss": 0.4879, "step": 2574 }, { "epoch": 0.5729862038273253, "grad_norm": 1.513050741900984, "learning_rate": 4.0703155092278324e-06, "loss": 0.5041, "step": 2575 }, { "epoch": 0.573208722741433, "grad_norm": 1.430792143144712, "learning_rate": 4.0667750207970565e-06, "loss": 0.4881, "step": 2576 }, { "epoch": 0.5734312416555407, "grad_norm": 1.3958184335882584, "learning_rate": 4.063235017110172e-06, "loss": 0.4959, "step": 2577 }, { "epoch": 0.5736537605696485, "grad_norm": 1.3400982777259187, "learning_rate": 4.059695500005955e-06, "loss": 0.4779, "step": 2578 }, { "epoch": 0.5738762794837561, "grad_norm": 1.4336448429306567, "learning_rate": 4.056156471322934e-06, "loss": 0.4713, "step": 2579 }, { "epoch": 0.5740987983978638, "grad_norm": 1.3920687672070051, "learning_rate": 4.052617932899384e-06, "loss": 0.474, "step": 2580 }, { "epoch": 0.5743213173119716, "grad_norm": 1.423752427385013, "learning_rate": 4.04907988657332e-06, "loss": 0.4963, "step": 2581 }, { "epoch": 0.5745438362260792, "grad_norm": 1.3534787841706917, "learning_rate": 4.0455423341825094e-06, "loss": 0.4814, "step": 2582 }, { "epoch": 0.5747663551401869, "grad_norm": 1.3056828293894838, "learning_rate": 4.042005277564456e-06, "loss": 0.4751, "step": 2583 }, { "epoch": 0.5749888740542947, "grad_norm": 1.4431703742779822, "learning_rate": 4.03846871855641e-06, "loss": 0.4678, "step": 2584 }, { "epoch": 0.5752113929684023, "grad_norm": 1.3652850289748495, "learning_rate": 4.034932658995359e-06, "loss": 0.4895, "step": 2585 }, { "epoch": 0.57543391188251, "grad_norm": 1.4648644536058588, "learning_rate": 4.031397100718038e-06, "loss": 0.477, "step": 2586 }, { "epoch": 0.5756564307966177, "grad_norm": 1.5121892747278978, "learning_rate": 4.027862045560913e-06, "loss": 0.4829, "step": 2587 }, { "epoch": 0.5758789497107254, "grad_norm": 1.5058685801737748, "learning_rate": 4.024327495360198e-06, "loss": 0.5091, "step": 2588 }, { "epoch": 0.5761014686248331, "grad_norm": 1.4338092629856831, "learning_rate": 4.020793451951839e-06, "loss": 0.5037, "step": 2589 }, { "epoch": 0.5763239875389408, "grad_norm": 1.3893641914625505, "learning_rate": 4.017259917171516e-06, "loss": 0.495, "step": 2590 }, { "epoch": 0.5765465064530485, "grad_norm": 1.4249148265531526, "learning_rate": 4.013726892854654e-06, "loss": 0.4712, "step": 2591 }, { "epoch": 0.5767690253671562, "grad_norm": 1.4740095712068828, "learning_rate": 4.010194380836403e-06, "loss": 0.496, "step": 2592 }, { "epoch": 0.5769915442812639, "grad_norm": 1.5009645576628954, "learning_rate": 4.006662382951653e-06, "loss": 0.4926, "step": 2593 }, { "epoch": 0.5772140631953716, "grad_norm": 1.4636154458817932, "learning_rate": 4.0031309010350265e-06, "loss": 0.4746, "step": 2594 }, { "epoch": 0.5774365821094793, "grad_norm": 1.3956564058585643, "learning_rate": 3.999599936920875e-06, "loss": 0.4847, "step": 2595 }, { "epoch": 0.577659101023587, "grad_norm": 1.3714542352872123, "learning_rate": 3.996069492443283e-06, "loss": 0.4867, "step": 2596 }, { "epoch": 0.5778816199376947, "grad_norm": 1.4335413471309923, "learning_rate": 3.992539569436067e-06, "loss": 0.4827, "step": 2597 }, { "epoch": 0.5781041388518025, "grad_norm": 1.4267714764531467, "learning_rate": 3.9890101697327676e-06, "loss": 0.47, "step": 2598 }, { "epoch": 0.5783266577659101, "grad_norm": 1.4294503142549224, "learning_rate": 3.985481295166658e-06, "loss": 0.4765, "step": 2599 }, { "epoch": 0.5785491766800178, "grad_norm": 1.3632445718553583, "learning_rate": 3.981952947570735e-06, "loss": 0.4742, "step": 2600 }, { "epoch": 0.5787716955941254, "grad_norm": 1.4509533583278924, "learning_rate": 3.9784251287777274e-06, "loss": 0.484, "step": 2601 }, { "epoch": 0.5789942145082332, "grad_norm": 1.6089979293139944, "learning_rate": 3.974897840620084e-06, "loss": 0.4809, "step": 2602 }, { "epoch": 0.5792167334223409, "grad_norm": 1.482340463009842, "learning_rate": 3.97137108492998e-06, "loss": 0.4833, "step": 2603 }, { "epoch": 0.5794392523364486, "grad_norm": 1.4656447137449475, "learning_rate": 3.967844863539313e-06, "loss": 0.4978, "step": 2604 }, { "epoch": 0.5796617712505563, "grad_norm": 1.4483956801966, "learning_rate": 3.964319178279703e-06, "loss": 0.4939, "step": 2605 }, { "epoch": 0.579884290164664, "grad_norm": 1.4009201012432233, "learning_rate": 3.960794030982493e-06, "loss": 0.4923, "step": 2606 }, { "epoch": 0.5801068090787717, "grad_norm": 1.4054126610141187, "learning_rate": 3.957269423478748e-06, "loss": 0.4813, "step": 2607 }, { "epoch": 0.5803293279928794, "grad_norm": 1.3207579885167213, "learning_rate": 3.95374535759925e-06, "loss": 0.4634, "step": 2608 }, { "epoch": 0.5805518469069871, "grad_norm": 1.4486077510965756, "learning_rate": 3.950221835174498e-06, "loss": 0.4901, "step": 2609 }, { "epoch": 0.5807743658210948, "grad_norm": 1.4139399635519994, "learning_rate": 3.94669885803471e-06, "loss": 0.4804, "step": 2610 }, { "epoch": 0.5809968847352025, "grad_norm": 1.4015002940007872, "learning_rate": 3.943176428009826e-06, "loss": 0.4844, "step": 2611 }, { "epoch": 0.5812194036493102, "grad_norm": 1.3840207787344614, "learning_rate": 3.939654546929492e-06, "loss": 0.4709, "step": 2612 }, { "epoch": 0.5814419225634179, "grad_norm": 1.4481787891597775, "learning_rate": 3.93613321662308e-06, "loss": 0.505, "step": 2613 }, { "epoch": 0.5816644414775256, "grad_norm": 1.5090106313825347, "learning_rate": 3.932612438919667e-06, "loss": 0.467, "step": 2614 }, { "epoch": 0.5818869603916332, "grad_norm": 1.4841694529949272, "learning_rate": 3.929092215648046e-06, "loss": 0.4887, "step": 2615 }, { "epoch": 0.582109479305741, "grad_norm": 1.4678874772019088, "learning_rate": 3.925572548636722e-06, "loss": 0.4795, "step": 2616 }, { "epoch": 0.5823319982198487, "grad_norm": 1.3393518990167674, "learning_rate": 3.922053439713914e-06, "loss": 0.4766, "step": 2617 }, { "epoch": 0.5825545171339563, "grad_norm": 1.5202169436994024, "learning_rate": 3.9185348907075435e-06, "loss": 0.4904, "step": 2618 }, { "epoch": 0.5827770360480641, "grad_norm": 1.4540529005954181, "learning_rate": 3.915016903445252e-06, "loss": 0.4887, "step": 2619 }, { "epoch": 0.5829995549621718, "grad_norm": 1.3488940669971268, "learning_rate": 3.9114994797543795e-06, "loss": 0.4801, "step": 2620 }, { "epoch": 0.5832220738762794, "grad_norm": 1.3224036149740304, "learning_rate": 3.907982621461978e-06, "loss": 0.4804, "step": 2621 }, { "epoch": 0.5834445927903872, "grad_norm": 1.4504454311985282, "learning_rate": 3.904466330394804e-06, "loss": 0.4696, "step": 2622 }, { "epoch": 0.5836671117044949, "grad_norm": 1.480334291550744, "learning_rate": 3.900950608379322e-06, "loss": 0.4802, "step": 2623 }, { "epoch": 0.5838896306186026, "grad_norm": 1.4625124220299348, "learning_rate": 3.8974354572416936e-06, "loss": 0.4795, "step": 2624 }, { "epoch": 0.5841121495327103, "grad_norm": 1.4010175598963648, "learning_rate": 3.893920878807797e-06, "loss": 0.4612, "step": 2625 }, { "epoch": 0.584334668446818, "grad_norm": 1.475812047050076, "learning_rate": 3.890406874903203e-06, "loss": 0.4945, "step": 2626 }, { "epoch": 0.5845571873609257, "grad_norm": 1.466945655815867, "learning_rate": 3.8868934473531836e-06, "loss": 0.4702, "step": 2627 }, { "epoch": 0.5847797062750334, "grad_norm": 1.3773988534234847, "learning_rate": 3.883380597982716e-06, "loss": 0.4933, "step": 2628 }, { "epoch": 0.585002225189141, "grad_norm": 1.336240395104176, "learning_rate": 3.879868328616476e-06, "loss": 0.467, "step": 2629 }, { "epoch": 0.5852247441032488, "grad_norm": 1.4185443946542118, "learning_rate": 3.876356641078833e-06, "loss": 0.5019, "step": 2630 }, { "epoch": 0.5854472630173565, "grad_norm": 1.4531910898104499, "learning_rate": 3.8728455371938654e-06, "loss": 0.4906, "step": 2631 }, { "epoch": 0.5856697819314641, "grad_norm": 1.4549932508330705, "learning_rate": 3.869335018785335e-06, "loss": 0.4862, "step": 2632 }, { "epoch": 0.5858923008455719, "grad_norm": 1.4004715163166286, "learning_rate": 3.865825087676711e-06, "loss": 0.4669, "step": 2633 }, { "epoch": 0.5861148197596796, "grad_norm": 1.4768808258369008, "learning_rate": 3.862315745691149e-06, "loss": 0.4762, "step": 2634 }, { "epoch": 0.5863373386737872, "grad_norm": 1.3770419252091637, "learning_rate": 3.8588069946515055e-06, "loss": 0.4822, "step": 2635 }, { "epoch": 0.586559857587895, "grad_norm": 1.3859412856711981, "learning_rate": 3.855298836380323e-06, "loss": 0.4924, "step": 2636 }, { "epoch": 0.5867823765020027, "grad_norm": 1.2993259305894551, "learning_rate": 3.85179127269984e-06, "loss": 0.4793, "step": 2637 }, { "epoch": 0.5870048954161103, "grad_norm": 1.4510880071085783, "learning_rate": 3.8482843054319895e-06, "loss": 0.466, "step": 2638 }, { "epoch": 0.5872274143302181, "grad_norm": 1.3087913056782945, "learning_rate": 3.84477793639839e-06, "loss": 0.4974, "step": 2639 }, { "epoch": 0.5874499332443258, "grad_norm": 1.4415707911605828, "learning_rate": 3.841272167420348e-06, "loss": 0.476, "step": 2640 }, { "epoch": 0.5876724521584334, "grad_norm": 1.383305951653064, "learning_rate": 3.837767000318862e-06, "loss": 0.4905, "step": 2641 }, { "epoch": 0.5878949710725412, "grad_norm": 1.4330907260471477, "learning_rate": 3.834262436914618e-06, "loss": 0.4856, "step": 2642 }, { "epoch": 0.5881174899866488, "grad_norm": 1.430534147921742, "learning_rate": 3.830758479027983e-06, "loss": 0.464, "step": 2643 }, { "epoch": 0.5883400089007566, "grad_norm": 1.377041908320112, "learning_rate": 3.82725512847902e-06, "loss": 0.4927, "step": 2644 }, { "epoch": 0.5885625278148643, "grad_norm": 1.3288509004080034, "learning_rate": 3.8237523870874645e-06, "loss": 0.4971, "step": 2645 }, { "epoch": 0.5887850467289719, "grad_norm": 1.4730656764536851, "learning_rate": 3.820250256672744e-06, "loss": 0.5022, "step": 2646 }, { "epoch": 0.5890075656430797, "grad_norm": 1.3774208087513247, "learning_rate": 3.816748739053963e-06, "loss": 0.474, "step": 2647 }, { "epoch": 0.5892300845571874, "grad_norm": 1.4539551769173693, "learning_rate": 3.8132478360499128e-06, "loss": 0.4764, "step": 2648 }, { "epoch": 0.589452603471295, "grad_norm": 1.468685649132737, "learning_rate": 3.809747549479061e-06, "loss": 0.5014, "step": 2649 }, { "epoch": 0.5896751223854028, "grad_norm": 1.357628224821566, "learning_rate": 3.8062478811595603e-06, "loss": 0.4851, "step": 2650 }, { "epoch": 0.5898976412995105, "grad_norm": 1.3298374283353762, "learning_rate": 3.802748832909237e-06, "loss": 0.4862, "step": 2651 }, { "epoch": 0.5901201602136181, "grad_norm": 1.337331830876358, "learning_rate": 3.799250406545597e-06, "loss": 0.4669, "step": 2652 }, { "epoch": 0.5903426791277259, "grad_norm": 1.4872469124227974, "learning_rate": 3.795752603885825e-06, "loss": 0.4659, "step": 2653 }, { "epoch": 0.5905651980418336, "grad_norm": 1.4027051376535498, "learning_rate": 3.7922554267467794e-06, "loss": 0.4793, "step": 2654 }, { "epoch": 0.5907877169559412, "grad_norm": 1.3351332456280838, "learning_rate": 3.7887588769449923e-06, "loss": 0.4769, "step": 2655 }, { "epoch": 0.591010235870049, "grad_norm": 1.409275520210228, "learning_rate": 3.785262956296677e-06, "loss": 0.4936, "step": 2656 }, { "epoch": 0.5912327547841566, "grad_norm": 1.4249619558319886, "learning_rate": 3.781767666617713e-06, "loss": 0.4754, "step": 2657 }, { "epoch": 0.5914552736982643, "grad_norm": 1.442392156355912, "learning_rate": 3.7782730097236526e-06, "loss": 0.4894, "step": 2658 }, { "epoch": 0.5916777926123721, "grad_norm": 1.3410869730838573, "learning_rate": 3.7747789874297235e-06, "loss": 0.4743, "step": 2659 }, { "epoch": 0.5919003115264797, "grad_norm": 1.4030449583843634, "learning_rate": 3.7712856015508204e-06, "loss": 0.4819, "step": 2660 }, { "epoch": 0.5921228304405874, "grad_norm": 1.3803860544269166, "learning_rate": 3.7677928539015064e-06, "loss": 0.4717, "step": 2661 }, { "epoch": 0.5923453493546952, "grad_norm": 1.4459958836498692, "learning_rate": 3.7643007462960186e-06, "loss": 0.4875, "step": 2662 }, { "epoch": 0.5925678682688028, "grad_norm": 1.4615402000187812, "learning_rate": 3.7608092805482555e-06, "loss": 0.4816, "step": 2663 }, { "epoch": 0.5927903871829105, "grad_norm": 1.3716713700999745, "learning_rate": 3.757318458471787e-06, "loss": 0.4907, "step": 2664 }, { "epoch": 0.5930129060970183, "grad_norm": 1.5051488085992641, "learning_rate": 3.7538282818798457e-06, "loss": 0.4791, "step": 2665 }, { "epoch": 0.5932354250111259, "grad_norm": 1.35805268462258, "learning_rate": 3.7503387525853284e-06, "loss": 0.487, "step": 2666 }, { "epoch": 0.5934579439252337, "grad_norm": 1.3754388004548181, "learning_rate": 3.7468498724007964e-06, "loss": 0.4773, "step": 2667 }, { "epoch": 0.5936804628393414, "grad_norm": 1.4402433162351018, "learning_rate": 3.7433616431384767e-06, "loss": 0.4772, "step": 2668 }, { "epoch": 0.593902981753449, "grad_norm": 1.3957730759092533, "learning_rate": 3.739874066610256e-06, "loss": 0.4864, "step": 2669 }, { "epoch": 0.5941255006675568, "grad_norm": 1.4444664971652157, "learning_rate": 3.7363871446276814e-06, "loss": 0.471, "step": 2670 }, { "epoch": 0.5943480195816644, "grad_norm": 1.3665947894610306, "learning_rate": 3.7329008790019594e-06, "loss": 0.483, "step": 2671 }, { "epoch": 0.5945705384957721, "grad_norm": 1.4480315746677084, "learning_rate": 3.729415271543956e-06, "loss": 0.475, "step": 2672 }, { "epoch": 0.5947930574098799, "grad_norm": 1.3416452486792747, "learning_rate": 3.7259303240641976e-06, "loss": 0.4773, "step": 2673 }, { "epoch": 0.5950155763239875, "grad_norm": 1.3229618742617926, "learning_rate": 3.722446038372864e-06, "loss": 0.4563, "step": 2674 }, { "epoch": 0.5952380952380952, "grad_norm": 1.4765670581457278, "learning_rate": 3.7189624162797965e-06, "loss": 0.4979, "step": 2675 }, { "epoch": 0.595460614152203, "grad_norm": 1.4204247041273237, "learning_rate": 3.7154794595944875e-06, "loss": 0.4796, "step": 2676 }, { "epoch": 0.5956831330663106, "grad_norm": 1.4695728078722452, "learning_rate": 3.711997170126085e-06, "loss": 0.4834, "step": 2677 }, { "epoch": 0.5959056519804183, "grad_norm": 1.40191380381889, "learning_rate": 3.708515549683388e-06, "loss": 0.4857, "step": 2678 }, { "epoch": 0.5961281708945261, "grad_norm": 1.4011361162194806, "learning_rate": 3.7050346000748526e-06, "loss": 0.4904, "step": 2679 }, { "epoch": 0.5963506898086337, "grad_norm": 1.3526739346497354, "learning_rate": 3.7015543231085827e-06, "loss": 0.4816, "step": 2680 }, { "epoch": 0.5965732087227414, "grad_norm": 1.4785891654522183, "learning_rate": 3.698074720592336e-06, "loss": 0.4968, "step": 2681 }, { "epoch": 0.5967957276368492, "grad_norm": 1.3246245972427146, "learning_rate": 3.6945957943335175e-06, "loss": 0.4778, "step": 2682 }, { "epoch": 0.5970182465509568, "grad_norm": 1.4653395313951671, "learning_rate": 3.6911175461391794e-06, "loss": 0.4771, "step": 2683 }, { "epoch": 0.5972407654650645, "grad_norm": 1.4320406599519568, "learning_rate": 3.687639977816028e-06, "loss": 0.4874, "step": 2684 }, { "epoch": 0.5974632843791722, "grad_norm": 1.5189308952439553, "learning_rate": 3.6841630911704085e-06, "loss": 0.4797, "step": 2685 }, { "epoch": 0.5976858032932799, "grad_norm": 1.5640330714872053, "learning_rate": 3.680686888008316e-06, "loss": 0.4868, "step": 2686 }, { "epoch": 0.5979083222073877, "grad_norm": 1.4185802026995953, "learning_rate": 3.677211370135393e-06, "loss": 0.4792, "step": 2687 }, { "epoch": 0.5981308411214953, "grad_norm": 1.4046790096519675, "learning_rate": 3.6737365393569212e-06, "loss": 0.4839, "step": 2688 }, { "epoch": 0.598353360035603, "grad_norm": 1.3915158835904375, "learning_rate": 3.6702623974778296e-06, "loss": 0.4689, "step": 2689 }, { "epoch": 0.5985758789497108, "grad_norm": 1.389542712024189, "learning_rate": 3.666788946302686e-06, "loss": 0.4761, "step": 2690 }, { "epoch": 0.5987983978638184, "grad_norm": 1.4329468549748259, "learning_rate": 3.6633161876357017e-06, "loss": 0.4827, "step": 2691 }, { "epoch": 0.5990209167779261, "grad_norm": 1.3462142052758461, "learning_rate": 3.6598441232807234e-06, "loss": 0.4722, "step": 2692 }, { "epoch": 0.5992434356920339, "grad_norm": 1.4597997003097014, "learning_rate": 3.6563727550412478e-06, "loss": 0.4868, "step": 2693 }, { "epoch": 0.5994659546061415, "grad_norm": 1.3476781160922793, "learning_rate": 3.6529020847203983e-06, "loss": 0.4811, "step": 2694 }, { "epoch": 0.5996884735202492, "grad_norm": 1.410480096068369, "learning_rate": 3.6494321141209436e-06, "loss": 0.4839, "step": 2695 }, { "epoch": 0.599910992434357, "grad_norm": 1.4247212881506324, "learning_rate": 3.6459628450452855e-06, "loss": 0.485, "step": 2696 }, { "epoch": 0.6001335113484646, "grad_norm": 1.4814355133569594, "learning_rate": 3.642494279295462e-06, "loss": 0.459, "step": 2697 }, { "epoch": 0.6003560302625723, "grad_norm": 1.5018222488170552, "learning_rate": 3.639026418673144e-06, "loss": 0.4944, "step": 2698 }, { "epoch": 0.60057854917668, "grad_norm": 1.3762431621387905, "learning_rate": 3.6355592649796406e-06, "loss": 0.4823, "step": 2699 }, { "epoch": 0.6008010680907877, "grad_norm": 1.407800097783673, "learning_rate": 3.6320928200158913e-06, "loss": 0.4427, "step": 2700 }, { "epoch": 0.6010235870048954, "grad_norm": 1.4657012054272505, "learning_rate": 3.628627085582466e-06, "loss": 0.4751, "step": 2701 }, { "epoch": 0.6012461059190031, "grad_norm": 1.4548331289767855, "learning_rate": 3.625162063479568e-06, "loss": 0.49, "step": 2702 }, { "epoch": 0.6014686248331108, "grad_norm": 1.4299130220422576, "learning_rate": 3.6216977555070267e-06, "loss": 0.4833, "step": 2703 }, { "epoch": 0.6016911437472185, "grad_norm": 1.4932985154011122, "learning_rate": 3.618234163464306e-06, "loss": 0.4809, "step": 2704 }, { "epoch": 0.6019136626613262, "grad_norm": 1.4098415232985329, "learning_rate": 3.6147712891504936e-06, "loss": 0.4823, "step": 2705 }, { "epoch": 0.6021361815754339, "grad_norm": 1.4798951448776085, "learning_rate": 3.611309134364308e-06, "loss": 0.4659, "step": 2706 }, { "epoch": 0.6023587004895417, "grad_norm": 1.481938291827507, "learning_rate": 3.60784770090409e-06, "loss": 0.4651, "step": 2707 }, { "epoch": 0.6025812194036493, "grad_norm": 1.3485339137708612, "learning_rate": 3.6043869905678096e-06, "loss": 0.4765, "step": 2708 }, { "epoch": 0.602803738317757, "grad_norm": 1.3363576559482562, "learning_rate": 3.6009270051530565e-06, "loss": 0.4888, "step": 2709 }, { "epoch": 0.6030262572318648, "grad_norm": 1.5140804866524298, "learning_rate": 3.5974677464570496e-06, "loss": 0.4839, "step": 2710 }, { "epoch": 0.6032487761459724, "grad_norm": 1.3520228283012172, "learning_rate": 3.5940092162766234e-06, "loss": 0.4805, "step": 2711 }, { "epoch": 0.6034712950600801, "grad_norm": 1.4899437262165107, "learning_rate": 3.5905514164082423e-06, "loss": 0.4711, "step": 2712 }, { "epoch": 0.6036938139741878, "grad_norm": 1.4112505210169908, "learning_rate": 3.5870943486479855e-06, "loss": 0.4689, "step": 2713 }, { "epoch": 0.6039163328882955, "grad_norm": 1.3310408960913473, "learning_rate": 3.5836380147915518e-06, "loss": 0.4799, "step": 2714 }, { "epoch": 0.6041388518024032, "grad_norm": 1.3380172855531642, "learning_rate": 3.5801824166342623e-06, "loss": 0.4743, "step": 2715 }, { "epoch": 0.6043613707165109, "grad_norm": 1.3331545036889287, "learning_rate": 3.576727555971054e-06, "loss": 0.4711, "step": 2716 }, { "epoch": 0.6045838896306186, "grad_norm": 1.459059125078441, "learning_rate": 3.573273434596478e-06, "loss": 0.4838, "step": 2717 }, { "epoch": 0.6048064085447263, "grad_norm": 1.6750208122426677, "learning_rate": 3.5698200543047085e-06, "loss": 0.4991, "step": 2718 }, { "epoch": 0.605028927458834, "grad_norm": 1.4256445652322598, "learning_rate": 3.5663674168895276e-06, "loss": 0.4643, "step": 2719 }, { "epoch": 0.6052514463729417, "grad_norm": 1.4133557455870867, "learning_rate": 3.562915524144336e-06, "loss": 0.4554, "step": 2720 }, { "epoch": 0.6054739652870494, "grad_norm": 1.4219406140504798, "learning_rate": 3.5594643778621455e-06, "loss": 0.4782, "step": 2721 }, { "epoch": 0.6056964842011571, "grad_norm": 1.4226727180004262, "learning_rate": 3.5560139798355814e-06, "loss": 0.4962, "step": 2722 }, { "epoch": 0.6059190031152648, "grad_norm": 1.4656230142442181, "learning_rate": 3.552564331856876e-06, "loss": 0.4906, "step": 2723 }, { "epoch": 0.6061415220293725, "grad_norm": 1.4401495457170161, "learning_rate": 3.5491154357178814e-06, "loss": 0.4876, "step": 2724 }, { "epoch": 0.6063640409434802, "grad_norm": 1.518239647013923, "learning_rate": 3.545667293210049e-06, "loss": 0.4924, "step": 2725 }, { "epoch": 0.6065865598575879, "grad_norm": 1.4080024669327251, "learning_rate": 3.5422199061244454e-06, "loss": 0.4773, "step": 2726 }, { "epoch": 0.6068090787716955, "grad_norm": 1.4619904683083236, "learning_rate": 3.538773276251743e-06, "loss": 0.4694, "step": 2727 }, { "epoch": 0.6070315976858033, "grad_norm": 1.3849948948578235, "learning_rate": 3.535327405382219e-06, "loss": 0.4838, "step": 2728 }, { "epoch": 0.607254116599911, "grad_norm": 1.4366219975666523, "learning_rate": 3.5318822953057563e-06, "loss": 0.4804, "step": 2729 }, { "epoch": 0.6074766355140186, "grad_norm": 1.5218674820228386, "learning_rate": 3.528437947811847e-06, "loss": 0.4754, "step": 2730 }, { "epoch": 0.6076991544281264, "grad_norm": 1.3286153586216969, "learning_rate": 3.524994364689584e-06, "loss": 0.4654, "step": 2731 }, { "epoch": 0.6079216733422341, "grad_norm": 1.3457294239651598, "learning_rate": 3.521551547727663e-06, "loss": 0.4697, "step": 2732 }, { "epoch": 0.6081441922563418, "grad_norm": 1.437368144821097, "learning_rate": 3.5181094987143814e-06, "loss": 0.488, "step": 2733 }, { "epoch": 0.6083667111704495, "grad_norm": 1.3722157463625073, "learning_rate": 3.514668219437639e-06, "loss": 0.4827, "step": 2734 }, { "epoch": 0.6085892300845572, "grad_norm": 1.5174084614504093, "learning_rate": 3.5112277116849343e-06, "loss": 0.4878, "step": 2735 }, { "epoch": 0.6088117489986649, "grad_norm": 1.4967767655127184, "learning_rate": 3.5077879772433666e-06, "loss": 0.4974, "step": 2736 }, { "epoch": 0.6090342679127726, "grad_norm": 1.5224445117866998, "learning_rate": 3.5043490178996344e-06, "loss": 0.4811, "step": 2737 }, { "epoch": 0.6092567868268803, "grad_norm": 1.3940910706727128, "learning_rate": 3.50091083544003e-06, "loss": 0.4592, "step": 2738 }, { "epoch": 0.609479305740988, "grad_norm": 1.4069631916931302, "learning_rate": 3.497473431650446e-06, "loss": 0.4807, "step": 2739 }, { "epoch": 0.6097018246550957, "grad_norm": 1.3853108568938004, "learning_rate": 3.494036808316366e-06, "loss": 0.4777, "step": 2740 }, { "epoch": 0.6099243435692033, "grad_norm": 1.4598070336510918, "learning_rate": 3.490600967222872e-06, "loss": 0.4832, "step": 2741 }, { "epoch": 0.6101468624833111, "grad_norm": 1.4280990936122284, "learning_rate": 3.487165910154641e-06, "loss": 0.4743, "step": 2742 }, { "epoch": 0.6103693813974188, "grad_norm": 1.4177154245231811, "learning_rate": 3.4837316388959398e-06, "loss": 0.4893, "step": 2743 }, { "epoch": 0.6105919003115264, "grad_norm": 1.4805546175099307, "learning_rate": 3.480298155230626e-06, "loss": 0.465, "step": 2744 }, { "epoch": 0.6108144192256342, "grad_norm": 1.6172785275508008, "learning_rate": 3.4768654609421486e-06, "loss": 0.4628, "step": 2745 }, { "epoch": 0.6110369381397419, "grad_norm": 1.4157234791524933, "learning_rate": 3.4734335578135516e-06, "loss": 0.4783, "step": 2746 }, { "epoch": 0.6112594570538495, "grad_norm": 1.4500504319503817, "learning_rate": 3.470002447627461e-06, "loss": 0.4624, "step": 2747 }, { "epoch": 0.6114819759679573, "grad_norm": 1.549314999804638, "learning_rate": 3.466572132166094e-06, "loss": 0.4769, "step": 2748 }, { "epoch": 0.611704494882065, "grad_norm": 1.547977216058944, "learning_rate": 3.463142613211258e-06, "loss": 0.4825, "step": 2749 }, { "epoch": 0.6119270137961726, "grad_norm": 1.5121741803806104, "learning_rate": 3.4597138925443407e-06, "loss": 0.4861, "step": 2750 }, { "epoch": 0.6121495327102804, "grad_norm": 1.5460588825992794, "learning_rate": 3.4562859719463216e-06, "loss": 0.473, "step": 2751 }, { "epoch": 0.6123720516243881, "grad_norm": 1.3718120242045146, "learning_rate": 3.452858853197758e-06, "loss": 0.4564, "step": 2752 }, { "epoch": 0.6125945705384958, "grad_norm": 1.4155622066555764, "learning_rate": 3.4494325380787953e-06, "loss": 0.4649, "step": 2753 }, { "epoch": 0.6128170894526035, "grad_norm": 1.4376937213231897, "learning_rate": 3.4460070283691583e-06, "loss": 0.4857, "step": 2754 }, { "epoch": 0.6130396083667111, "grad_norm": 1.4358212090344502, "learning_rate": 3.4425823258481583e-06, "loss": 0.4823, "step": 2755 }, { "epoch": 0.6132621272808189, "grad_norm": 1.634648486693899, "learning_rate": 3.4391584322946824e-06, "loss": 0.4768, "step": 2756 }, { "epoch": 0.6134846461949266, "grad_norm": 1.5986263133719378, "learning_rate": 3.4357353494872004e-06, "loss": 0.4924, "step": 2757 }, { "epoch": 0.6137071651090342, "grad_norm": 1.608313601955231, "learning_rate": 3.4323130792037596e-06, "loss": 0.4798, "step": 2758 }, { "epoch": 0.613929684023142, "grad_norm": 1.5232446235371588, "learning_rate": 3.428891623221985e-06, "loss": 0.4754, "step": 2759 }, { "epoch": 0.6141522029372497, "grad_norm": 1.4060141332230998, "learning_rate": 3.4254709833190773e-06, "loss": 0.4834, "step": 2760 }, { "epoch": 0.6143747218513573, "grad_norm": 1.48475010860055, "learning_rate": 3.422051161271819e-06, "loss": 0.4799, "step": 2761 }, { "epoch": 0.6145972407654651, "grad_norm": 1.5310392811368647, "learning_rate": 3.418632158856563e-06, "loss": 0.476, "step": 2762 }, { "epoch": 0.6148197596795728, "grad_norm": 1.3744123599275728, "learning_rate": 3.415213977849237e-06, "loss": 0.4705, "step": 2763 }, { "epoch": 0.6150422785936804, "grad_norm": 1.4330050112315396, "learning_rate": 3.411796620025343e-06, "loss": 0.4763, "step": 2764 }, { "epoch": 0.6152647975077882, "grad_norm": 1.4710574344480216, "learning_rate": 3.4083800871599526e-06, "loss": 0.4786, "step": 2765 }, { "epoch": 0.6154873164218959, "grad_norm": 1.4294060703928004, "learning_rate": 3.4049643810277135e-06, "loss": 0.4572, "step": 2766 }, { "epoch": 0.6157098353360035, "grad_norm": 1.4993358891150164, "learning_rate": 3.4015495034028405e-06, "loss": 0.4686, "step": 2767 }, { "epoch": 0.6159323542501113, "grad_norm": 1.50165446988616, "learning_rate": 3.3981354560591216e-06, "loss": 0.4779, "step": 2768 }, { "epoch": 0.6161548731642189, "grad_norm": 1.4311810071382343, "learning_rate": 3.3947222407699094e-06, "loss": 0.4734, "step": 2769 }, { "epoch": 0.6163773920783266, "grad_norm": 1.4394291438433675, "learning_rate": 3.3913098593081264e-06, "loss": 0.4781, "step": 2770 }, { "epoch": 0.6165999109924344, "grad_norm": 1.3784486992281066, "learning_rate": 3.3878983134462596e-06, "loss": 0.4799, "step": 2771 }, { "epoch": 0.616822429906542, "grad_norm": 1.475978029524275, "learning_rate": 3.3844876049563645e-06, "loss": 0.4891, "step": 2772 }, { "epoch": 0.6170449488206498, "grad_norm": 1.4191607034047786, "learning_rate": 3.3810777356100657e-06, "loss": 0.4665, "step": 2773 }, { "epoch": 0.6172674677347575, "grad_norm": 1.396428468545759, "learning_rate": 3.3776687071785435e-06, "loss": 0.4741, "step": 2774 }, { "epoch": 0.6174899866488651, "grad_norm": 1.3870930163899575, "learning_rate": 3.374260521432546e-06, "loss": 0.4886, "step": 2775 }, { "epoch": 0.6177125055629729, "grad_norm": 1.5107785752152947, "learning_rate": 3.3708531801423818e-06, "loss": 0.467, "step": 2776 }, { "epoch": 0.6179350244770806, "grad_norm": 1.469006340109607, "learning_rate": 3.367446685077924e-06, "loss": 0.4746, "step": 2777 }, { "epoch": 0.6181575433911882, "grad_norm": 1.437771301765242, "learning_rate": 3.3640410380086015e-06, "loss": 0.4784, "step": 2778 }, { "epoch": 0.618380062305296, "grad_norm": 1.4918364871669443, "learning_rate": 3.3606362407034086e-06, "loss": 0.4601, "step": 2779 }, { "epoch": 0.6186025812194037, "grad_norm": 1.3785685509909436, "learning_rate": 3.3572322949308933e-06, "loss": 0.4812, "step": 2780 }, { "epoch": 0.6188251001335113, "grad_norm": 1.355265098129837, "learning_rate": 3.353829202459162e-06, "loss": 0.4817, "step": 2781 }, { "epoch": 0.6190476190476191, "grad_norm": 1.4895548785665578, "learning_rate": 3.35042696505588e-06, "loss": 0.4586, "step": 2782 }, { "epoch": 0.6192701379617267, "grad_norm": 1.500979930459987, "learning_rate": 3.347025584488267e-06, "loss": 0.4752, "step": 2783 }, { "epoch": 0.6194926568758344, "grad_norm": 1.500081771054392, "learning_rate": 3.343625062523098e-06, "loss": 0.4933, "step": 2784 }, { "epoch": 0.6197151757899422, "grad_norm": 1.5059313971013786, "learning_rate": 3.3402254009267e-06, "loss": 0.4991, "step": 2785 }, { "epoch": 0.6199376947040498, "grad_norm": 1.570450077323205, "learning_rate": 3.3368266014649586e-06, "loss": 0.4707, "step": 2786 }, { "epoch": 0.6201602136181575, "grad_norm": 1.4997938103739805, "learning_rate": 3.3334286659033046e-06, "loss": 0.4717, "step": 2787 }, { "epoch": 0.6203827325322653, "grad_norm": 1.449772330241861, "learning_rate": 3.330031596006726e-06, "loss": 0.4745, "step": 2788 }, { "epoch": 0.6206052514463729, "grad_norm": 1.4345666175675038, "learning_rate": 3.3266353935397578e-06, "loss": 0.4878, "step": 2789 }, { "epoch": 0.6208277703604806, "grad_norm": 1.5007321173928607, "learning_rate": 3.323240060266484e-06, "loss": 0.4831, "step": 2790 }, { "epoch": 0.6210502892745884, "grad_norm": 1.4604464253435003, "learning_rate": 3.319845597950538e-06, "loss": 0.4659, "step": 2791 }, { "epoch": 0.621272808188696, "grad_norm": 1.5180170807786348, "learning_rate": 3.316452008355103e-06, "loss": 0.4882, "step": 2792 }, { "epoch": 0.6214953271028038, "grad_norm": 1.5648965507980264, "learning_rate": 3.313059293242907e-06, "loss": 0.493, "step": 2793 }, { "epoch": 0.6217178460169115, "grad_norm": 1.519687966836139, "learning_rate": 3.309667454376224e-06, "loss": 0.4645, "step": 2794 }, { "epoch": 0.6219403649310191, "grad_norm": 1.508521099590642, "learning_rate": 3.306276493516871e-06, "loss": 0.4777, "step": 2795 }, { "epoch": 0.6221628838451269, "grad_norm": 1.3918442877185069, "learning_rate": 3.3028864124262105e-06, "loss": 0.4719, "step": 2796 }, { "epoch": 0.6223854027592345, "grad_norm": 1.463836090252516, "learning_rate": 3.2994972128651493e-06, "loss": 0.4684, "step": 2797 }, { "epoch": 0.6226079216733422, "grad_norm": 1.38699836817722, "learning_rate": 3.2961088965941345e-06, "loss": 0.4629, "step": 2798 }, { "epoch": 0.62283044058745, "grad_norm": 1.4425666215587887, "learning_rate": 3.2927214653731566e-06, "loss": 0.4803, "step": 2799 }, { "epoch": 0.6230529595015576, "grad_norm": 1.4379875962806523, "learning_rate": 3.2893349209617433e-06, "loss": 0.4742, "step": 2800 }, { "epoch": 0.6232754784156653, "grad_norm": 1.41331287480488, "learning_rate": 3.2859492651189633e-06, "loss": 0.4676, "step": 2801 }, { "epoch": 0.6234979973297731, "grad_norm": 1.4554601439932113, "learning_rate": 3.2825644996034227e-06, "loss": 0.4537, "step": 2802 }, { "epoch": 0.6237205162438807, "grad_norm": 1.4564751293144946, "learning_rate": 3.279180626173266e-06, "loss": 0.486, "step": 2803 }, { "epoch": 0.6239430351579884, "grad_norm": 1.5184210026274572, "learning_rate": 3.2757976465861775e-06, "loss": 0.4682, "step": 2804 }, { "epoch": 0.6241655540720962, "grad_norm": 1.491358292002014, "learning_rate": 3.2724155625993712e-06, "loss": 0.4748, "step": 2805 }, { "epoch": 0.6243880729862038, "grad_norm": 1.600585699481036, "learning_rate": 3.2690343759696e-06, "loss": 0.474, "step": 2806 }, { "epoch": 0.6246105919003115, "grad_norm": 1.5995719475532946, "learning_rate": 3.265654088453148e-06, "loss": 0.4622, "step": 2807 }, { "epoch": 0.6248331108144193, "grad_norm": 1.5934555882346404, "learning_rate": 3.2622747018058355e-06, "loss": 0.4753, "step": 2808 }, { "epoch": 0.6250556297285269, "grad_norm": 1.4976809403010243, "learning_rate": 3.2588962177830104e-06, "loss": 0.4674, "step": 2809 }, { "epoch": 0.6252781486426346, "grad_norm": 1.6314687925834197, "learning_rate": 3.255518638139558e-06, "loss": 0.4811, "step": 2810 }, { "epoch": 0.6255006675567423, "grad_norm": 1.4194608344406883, "learning_rate": 3.252141964629889e-06, "loss": 0.4599, "step": 2811 }, { "epoch": 0.62572318647085, "grad_norm": 1.4661043667595357, "learning_rate": 3.2487661990079435e-06, "loss": 0.4711, "step": 2812 }, { "epoch": 0.6259457053849578, "grad_norm": 1.4669409275516525, "learning_rate": 3.2453913430271926e-06, "loss": 0.5008, "step": 2813 }, { "epoch": 0.6261682242990654, "grad_norm": 1.4311242077072166, "learning_rate": 3.242017398440633e-06, "loss": 0.4708, "step": 2814 }, { "epoch": 0.6263907432131731, "grad_norm": 1.465133013908476, "learning_rate": 3.2386443670007872e-06, "loss": 0.4726, "step": 2815 }, { "epoch": 0.6266132621272809, "grad_norm": 1.4734527232526162, "learning_rate": 3.2352722504597078e-06, "loss": 0.4655, "step": 2816 }, { "epoch": 0.6268357810413885, "grad_norm": 1.5173803705564624, "learning_rate": 3.2319010505689674e-06, "loss": 0.4882, "step": 2817 }, { "epoch": 0.6270582999554962, "grad_norm": 1.532102783890357, "learning_rate": 3.228530769079663e-06, "loss": 0.4711, "step": 2818 }, { "epoch": 0.627280818869604, "grad_norm": 1.5671111118002046, "learning_rate": 3.2251614077424175e-06, "loss": 0.4717, "step": 2819 }, { "epoch": 0.6275033377837116, "grad_norm": 1.4547989011648867, "learning_rate": 3.2217929683073744e-06, "loss": 0.4628, "step": 2820 }, { "epoch": 0.6277258566978193, "grad_norm": 1.6037372771027458, "learning_rate": 3.2184254525241965e-06, "loss": 0.4829, "step": 2821 }, { "epoch": 0.6279483756119271, "grad_norm": 1.5234479226228386, "learning_rate": 3.2150588621420665e-06, "loss": 0.4819, "step": 2822 }, { "epoch": 0.6281708945260347, "grad_norm": 1.5260338487300804, "learning_rate": 3.2116931989096924e-06, "loss": 0.4883, "step": 2823 }, { "epoch": 0.6283934134401424, "grad_norm": 1.5166941722552472, "learning_rate": 3.208328464575295e-06, "loss": 0.4498, "step": 2824 }, { "epoch": 0.6286159323542501, "grad_norm": 1.4642773442084933, "learning_rate": 3.2049646608866135e-06, "loss": 0.4639, "step": 2825 }, { "epoch": 0.6288384512683578, "grad_norm": 1.4621126708436414, "learning_rate": 3.201601789590906e-06, "loss": 0.4747, "step": 2826 }, { "epoch": 0.6290609701824655, "grad_norm": 1.4448073078230244, "learning_rate": 3.1982398524349407e-06, "loss": 0.4498, "step": 2827 }, { "epoch": 0.6292834890965732, "grad_norm": 1.4180201778303556, "learning_rate": 3.1948788511650077e-06, "loss": 0.4564, "step": 2828 }, { "epoch": 0.6295060080106809, "grad_norm": 1.4297187542358039, "learning_rate": 3.191518787526906e-06, "loss": 0.4709, "step": 2829 }, { "epoch": 0.6297285269247886, "grad_norm": 1.5438829375366587, "learning_rate": 3.188159663265951e-06, "loss": 0.4634, "step": 2830 }, { "epoch": 0.6299510458388963, "grad_norm": 1.4096990998538597, "learning_rate": 3.1848014801269684e-06, "loss": 0.4634, "step": 2831 }, { "epoch": 0.630173564753004, "grad_norm": 1.4930898075838237, "learning_rate": 3.1814442398542927e-06, "loss": 0.4837, "step": 2832 }, { "epoch": 0.6303960836671118, "grad_norm": 1.5025822098951904, "learning_rate": 3.1780879441917733e-06, "loss": 0.4855, "step": 2833 }, { "epoch": 0.6306186025812194, "grad_norm": 1.490321817215844, "learning_rate": 3.1747325948827643e-06, "loss": 0.4707, "step": 2834 }, { "epoch": 0.6308411214953271, "grad_norm": 1.545695562480296, "learning_rate": 3.171378193670135e-06, "loss": 0.4908, "step": 2835 }, { "epoch": 0.6310636404094349, "grad_norm": 1.5628070987571296, "learning_rate": 3.168024742296254e-06, "loss": 0.4563, "step": 2836 }, { "epoch": 0.6312861593235425, "grad_norm": 1.441406741401823, "learning_rate": 3.164672242503002e-06, "loss": 0.4657, "step": 2837 }, { "epoch": 0.6315086782376502, "grad_norm": 1.5006653386319841, "learning_rate": 3.1613206960317614e-06, "loss": 0.4671, "step": 2838 }, { "epoch": 0.6317311971517579, "grad_norm": 1.518521832784334, "learning_rate": 3.1579701046234256e-06, "loss": 0.4738, "step": 2839 }, { "epoch": 0.6319537160658656, "grad_norm": 1.460907675570471, "learning_rate": 3.1546204700183838e-06, "loss": 0.4731, "step": 2840 }, { "epoch": 0.6321762349799733, "grad_norm": 1.519315283714557, "learning_rate": 3.1512717939565372e-06, "loss": 0.4802, "step": 2841 }, { "epoch": 0.632398753894081, "grad_norm": 1.5362378378414157, "learning_rate": 3.1479240781772826e-06, "loss": 0.4851, "step": 2842 }, { "epoch": 0.6326212728081887, "grad_norm": 1.468977336356714, "learning_rate": 3.144577324419518e-06, "loss": 0.5029, "step": 2843 }, { "epoch": 0.6328437917222964, "grad_norm": 1.5124544942011078, "learning_rate": 3.1412315344216453e-06, "loss": 0.455, "step": 2844 }, { "epoch": 0.6330663106364041, "grad_norm": 1.3978826256120362, "learning_rate": 3.1378867099215642e-06, "loss": 0.4559, "step": 2845 }, { "epoch": 0.6332888295505118, "grad_norm": 1.5137913636394627, "learning_rate": 3.13454285265667e-06, "loss": 0.4718, "step": 2846 }, { "epoch": 0.6335113484646195, "grad_norm": 1.4839012129632696, "learning_rate": 3.1311999643638634e-06, "loss": 0.4687, "step": 2847 }, { "epoch": 0.6337338673787272, "grad_norm": 1.3803732029516051, "learning_rate": 3.1278580467795327e-06, "loss": 0.4778, "step": 2848 }, { "epoch": 0.6339563862928349, "grad_norm": 1.4841138496953328, "learning_rate": 3.124517101639567e-06, "loss": 0.474, "step": 2849 }, { "epoch": 0.6341789052069426, "grad_norm": 1.4241215041976194, "learning_rate": 3.12117713067935e-06, "loss": 0.4631, "step": 2850 }, { "epoch": 0.6344014241210503, "grad_norm": 1.4757597548134551, "learning_rate": 3.1178381356337585e-06, "loss": 0.4773, "step": 2851 }, { "epoch": 0.634623943035158, "grad_norm": 1.4818930788272284, "learning_rate": 3.1145001182371593e-06, "loss": 0.4627, "step": 2852 }, { "epoch": 0.6348464619492656, "grad_norm": 1.556341689914317, "learning_rate": 3.1111630802234205e-06, "loss": 0.4866, "step": 2853 }, { "epoch": 0.6350689808633734, "grad_norm": 1.3962974557134238, "learning_rate": 3.1078270233258913e-06, "loss": 0.4654, "step": 2854 }, { "epoch": 0.6352914997774811, "grad_norm": 1.4999256849999598, "learning_rate": 3.1044919492774173e-06, "loss": 0.4819, "step": 2855 }, { "epoch": 0.6355140186915887, "grad_norm": 1.43933075218223, "learning_rate": 3.1011578598103316e-06, "loss": 0.4695, "step": 2856 }, { "epoch": 0.6357365376056965, "grad_norm": 1.4756799033167742, "learning_rate": 3.097824756656456e-06, "loss": 0.4923, "step": 2857 }, { "epoch": 0.6359590565198042, "grad_norm": 1.44845603984291, "learning_rate": 3.0944926415470986e-06, "loss": 0.4683, "step": 2858 }, { "epoch": 0.6361815754339119, "grad_norm": 1.49292517568966, "learning_rate": 3.0911615162130583e-06, "loss": 0.4504, "step": 2859 }, { "epoch": 0.6364040943480196, "grad_norm": 1.5160636018775786, "learning_rate": 3.0878313823846152e-06, "loss": 0.4803, "step": 2860 }, { "epoch": 0.6366266132621273, "grad_norm": 1.4505911288519862, "learning_rate": 3.0845022417915394e-06, "loss": 0.4632, "step": 2861 }, { "epoch": 0.636849132176235, "grad_norm": 1.4640991369957148, "learning_rate": 3.0811740961630804e-06, "loss": 0.4848, "step": 2862 }, { "epoch": 0.6370716510903427, "grad_norm": 1.4641079660968523, "learning_rate": 3.0778469472279714e-06, "loss": 0.4809, "step": 2863 }, { "epoch": 0.6372941700044504, "grad_norm": 1.4616763344281356, "learning_rate": 3.074520796714433e-06, "loss": 0.4794, "step": 2864 }, { "epoch": 0.6375166889185581, "grad_norm": 1.488166418274119, "learning_rate": 3.071195646350158e-06, "loss": 0.475, "step": 2865 }, { "epoch": 0.6377392078326658, "grad_norm": 1.5337395227927724, "learning_rate": 3.067871497862331e-06, "loss": 0.4695, "step": 2866 }, { "epoch": 0.6379617267467734, "grad_norm": 1.5467251621098839, "learning_rate": 3.064548352977608e-06, "loss": 0.489, "step": 2867 }, { "epoch": 0.6381842456608812, "grad_norm": 1.4596375207472672, "learning_rate": 3.0612262134221245e-06, "loss": 0.4729, "step": 2868 }, { "epoch": 0.6384067645749889, "grad_norm": 1.4192133003955094, "learning_rate": 3.0579050809214954e-06, "loss": 0.4739, "step": 2869 }, { "epoch": 0.6386292834890965, "grad_norm": 1.4576663452227379, "learning_rate": 3.0545849572008153e-06, "loss": 0.4613, "step": 2870 }, { "epoch": 0.6388518024032043, "grad_norm": 1.4438548198972407, "learning_rate": 3.0512658439846476e-06, "loss": 0.4734, "step": 2871 }, { "epoch": 0.639074321317312, "grad_norm": 1.4064362782206623, "learning_rate": 3.0479477429970393e-06, "loss": 0.4705, "step": 2872 }, { "epoch": 0.6392968402314196, "grad_norm": 1.453448810558947, "learning_rate": 3.044630655961507e-06, "loss": 0.4717, "step": 2873 }, { "epoch": 0.6395193591455274, "grad_norm": 1.4399016990396911, "learning_rate": 3.0413145846010376e-06, "loss": 0.4672, "step": 2874 }, { "epoch": 0.6397418780596351, "grad_norm": 1.545135133051347, "learning_rate": 3.0379995306380985e-06, "loss": 0.4951, "step": 2875 }, { "epoch": 0.6399643969737427, "grad_norm": 1.575174753028542, "learning_rate": 3.03468549579462e-06, "loss": 0.4715, "step": 2876 }, { "epoch": 0.6401869158878505, "grad_norm": 1.5207898964836888, "learning_rate": 3.0313724817920088e-06, "loss": 0.4717, "step": 2877 }, { "epoch": 0.6404094348019582, "grad_norm": 1.4963811707959391, "learning_rate": 3.02806049035114e-06, "loss": 0.4759, "step": 2878 }, { "epoch": 0.6406319537160658, "grad_norm": 1.562822104371851, "learning_rate": 3.024749523192358e-06, "loss": 0.4712, "step": 2879 }, { "epoch": 0.6408544726301736, "grad_norm": 1.5862679306229648, "learning_rate": 3.021439582035472e-06, "loss": 0.4648, "step": 2880 }, { "epoch": 0.6410769915442812, "grad_norm": 1.4298798630913903, "learning_rate": 3.0181306685997622e-06, "loss": 0.4531, "step": 2881 }, { "epoch": 0.641299510458389, "grad_norm": 1.5384215361789984, "learning_rate": 3.014822784603974e-06, "loss": 0.4584, "step": 2882 }, { "epoch": 0.6415220293724967, "grad_norm": 1.5769420708084858, "learning_rate": 3.0115159317663147e-06, "loss": 0.4805, "step": 2883 }, { "epoch": 0.6417445482866043, "grad_norm": 1.5135700627374078, "learning_rate": 3.0082101118044617e-06, "loss": 0.4745, "step": 2884 }, { "epoch": 0.6419670672007121, "grad_norm": 1.5273042499143406, "learning_rate": 3.004905326435551e-06, "loss": 0.4623, "step": 2885 }, { "epoch": 0.6421895861148198, "grad_norm": 1.4884121182262413, "learning_rate": 3.0016015773761854e-06, "loss": 0.4728, "step": 2886 }, { "epoch": 0.6424121050289274, "grad_norm": 1.5294134514111546, "learning_rate": 2.998298866342425e-06, "loss": 0.4598, "step": 2887 }, { "epoch": 0.6426346239430352, "grad_norm": 1.4506020688835979, "learning_rate": 2.9949971950497943e-06, "loss": 0.4597, "step": 2888 }, { "epoch": 0.6428571428571429, "grad_norm": 1.4344238941798029, "learning_rate": 2.9916965652132734e-06, "loss": 0.4904, "step": 2889 }, { "epoch": 0.6430796617712505, "grad_norm": 1.5599257041075842, "learning_rate": 2.988396978547308e-06, "loss": 0.4784, "step": 2890 }, { "epoch": 0.6433021806853583, "grad_norm": 1.5013561076525526, "learning_rate": 2.9850984367657964e-06, "loss": 0.46, "step": 2891 }, { "epoch": 0.6435246995994659, "grad_norm": 1.4424276326609986, "learning_rate": 2.9818009415820983e-06, "loss": 0.4682, "step": 2892 }, { "epoch": 0.6437472185135736, "grad_norm": 1.574869982206467, "learning_rate": 2.9785044947090245e-06, "loss": 0.4676, "step": 2893 }, { "epoch": 0.6439697374276814, "grad_norm": 1.4852068604889925, "learning_rate": 2.9752090978588454e-06, "loss": 0.4671, "step": 2894 }, { "epoch": 0.644192256341789, "grad_norm": 1.5082875078488227, "learning_rate": 2.971914752743286e-06, "loss": 0.4653, "step": 2895 }, { "epoch": 0.6444147752558967, "grad_norm": 1.444716420820958, "learning_rate": 2.96862146107352e-06, "loss": 0.4538, "step": 2896 }, { "epoch": 0.6446372941700045, "grad_norm": 1.474592938849016, "learning_rate": 2.9653292245601835e-06, "loss": 0.463, "step": 2897 }, { "epoch": 0.6448598130841121, "grad_norm": 1.5988032668211432, "learning_rate": 2.9620380449133558e-06, "loss": 0.4666, "step": 2898 }, { "epoch": 0.6450823319982198, "grad_norm": 1.5742952826995034, "learning_rate": 2.9587479238425704e-06, "loss": 0.4705, "step": 2899 }, { "epoch": 0.6453048509123276, "grad_norm": 1.6333645319269117, "learning_rate": 2.95545886305681e-06, "loss": 0.4882, "step": 2900 }, { "epoch": 0.6455273698264352, "grad_norm": 1.4484757956066288, "learning_rate": 2.9521708642645085e-06, "loss": 0.4685, "step": 2901 }, { "epoch": 0.645749888740543, "grad_norm": 1.5032201968681431, "learning_rate": 2.948883929173546e-06, "loss": 0.4619, "step": 2902 }, { "epoch": 0.6459724076546507, "grad_norm": 1.4481965589083137, "learning_rate": 2.945598059491253e-06, "loss": 0.469, "step": 2903 }, { "epoch": 0.6461949265687583, "grad_norm": 1.5150908842488628, "learning_rate": 2.942313256924404e-06, "loss": 0.4685, "step": 2904 }, { "epoch": 0.6464174454828661, "grad_norm": 1.423279601218607, "learning_rate": 2.9390295231792184e-06, "loss": 0.4465, "step": 2905 }, { "epoch": 0.6466399643969737, "grad_norm": 1.4163525358807445, "learning_rate": 2.9357468599613637e-06, "loss": 0.4744, "step": 2906 }, { "epoch": 0.6468624833110814, "grad_norm": 1.4293487855542364, "learning_rate": 2.9324652689759493e-06, "loss": 0.4802, "step": 2907 }, { "epoch": 0.6470850022251892, "grad_norm": 1.4465481902269899, "learning_rate": 2.9291847519275245e-06, "loss": 0.4647, "step": 2908 }, { "epoch": 0.6473075211392968, "grad_norm": 1.4845987832578524, "learning_rate": 2.9259053105200895e-06, "loss": 0.4711, "step": 2909 }, { "epoch": 0.6475300400534045, "grad_norm": 1.5307375375815826, "learning_rate": 2.9226269464570788e-06, "loss": 0.4749, "step": 2910 }, { "epoch": 0.6477525589675123, "grad_norm": 1.3806748101068413, "learning_rate": 2.919349661441367e-06, "loss": 0.4656, "step": 2911 }, { "epoch": 0.6479750778816199, "grad_norm": 1.5058108169130737, "learning_rate": 2.9160734571752736e-06, "loss": 0.4652, "step": 2912 }, { "epoch": 0.6481975967957276, "grad_norm": 1.4866123601847292, "learning_rate": 2.9127983353605488e-06, "loss": 0.4757, "step": 2913 }, { "epoch": 0.6484201157098354, "grad_norm": 1.5056524284853343, "learning_rate": 2.9095242976983856e-06, "loss": 0.4694, "step": 2914 }, { "epoch": 0.648642634623943, "grad_norm": 1.5571935333068696, "learning_rate": 2.9062513458894194e-06, "loss": 0.4895, "step": 2915 }, { "epoch": 0.6488651535380507, "grad_norm": 1.5140113753782818, "learning_rate": 2.9029794816337085e-06, "loss": 0.4774, "step": 2916 }, { "epoch": 0.6490876724521585, "grad_norm": 1.4972400051226142, "learning_rate": 2.8997087066307587e-06, "loss": 0.4771, "step": 2917 }, { "epoch": 0.6493101913662661, "grad_norm": 1.4328990476210248, "learning_rate": 2.8964390225794995e-06, "loss": 0.4732, "step": 2918 }, { "epoch": 0.6495327102803738, "grad_norm": 1.4313048446351955, "learning_rate": 2.8931704311783014e-06, "loss": 0.4597, "step": 2919 }, { "epoch": 0.6497552291944815, "grad_norm": 1.434138657184483, "learning_rate": 2.8899029341249652e-06, "loss": 0.4758, "step": 2920 }, { "epoch": 0.6499777481085892, "grad_norm": 1.4895220888741054, "learning_rate": 2.886636533116721e-06, "loss": 0.4632, "step": 2921 }, { "epoch": 0.650200267022697, "grad_norm": 1.4006448271772292, "learning_rate": 2.883371229850236e-06, "loss": 0.4705, "step": 2922 }, { "epoch": 0.6504227859368046, "grad_norm": 1.5138735535454002, "learning_rate": 2.8801070260215956e-06, "loss": 0.4767, "step": 2923 }, { "epoch": 0.6506453048509123, "grad_norm": 1.4858464424189173, "learning_rate": 2.8768439233263257e-06, "loss": 0.4615, "step": 2924 }, { "epoch": 0.6508678237650201, "grad_norm": 1.4014512439720288, "learning_rate": 2.873581923459377e-06, "loss": 0.4607, "step": 2925 }, { "epoch": 0.6510903426791277, "grad_norm": 1.5944135236181964, "learning_rate": 2.8703210281151193e-06, "loss": 0.4761, "step": 2926 }, { "epoch": 0.6513128615932354, "grad_norm": 1.5046985443556786, "learning_rate": 2.867061238987364e-06, "loss": 0.4804, "step": 2927 }, { "epoch": 0.6515353805073432, "grad_norm": 1.5240066568734463, "learning_rate": 2.8638025577693328e-06, "loss": 0.4625, "step": 2928 }, { "epoch": 0.6517578994214508, "grad_norm": 1.5860385210809385, "learning_rate": 2.8605449861536817e-06, "loss": 0.4627, "step": 2929 }, { "epoch": 0.6519804183355585, "grad_norm": 1.6604480818867777, "learning_rate": 2.8572885258324876e-06, "loss": 0.4808, "step": 2930 }, { "epoch": 0.6522029372496663, "grad_norm": 1.5205016870725987, "learning_rate": 2.8540331784972464e-06, "loss": 0.4843, "step": 2931 }, { "epoch": 0.6524254561637739, "grad_norm": 1.6360113726773025, "learning_rate": 2.8507789458388824e-06, "loss": 0.4739, "step": 2932 }, { "epoch": 0.6526479750778816, "grad_norm": 1.594328378539049, "learning_rate": 2.8475258295477324e-06, "loss": 0.4768, "step": 2933 }, { "epoch": 0.6528704939919893, "grad_norm": 1.430845019036844, "learning_rate": 2.844273831313565e-06, "loss": 0.4588, "step": 2934 }, { "epoch": 0.653093012906097, "grad_norm": 1.5461693060102057, "learning_rate": 2.8410229528255605e-06, "loss": 0.4883, "step": 2935 }, { "epoch": 0.6533155318202047, "grad_norm": 1.5481251692745077, "learning_rate": 2.837773195772315e-06, "loss": 0.4762, "step": 2936 }, { "epoch": 0.6535380507343124, "grad_norm": 1.548162126963636, "learning_rate": 2.8345245618418493e-06, "loss": 0.4584, "step": 2937 }, { "epoch": 0.6537605696484201, "grad_norm": 1.5717443105767077, "learning_rate": 2.8312770527215942e-06, "loss": 0.4675, "step": 2938 }, { "epoch": 0.6539830885625278, "grad_norm": 1.4705286400762625, "learning_rate": 2.828030670098401e-06, "loss": 0.4722, "step": 2939 }, { "epoch": 0.6542056074766355, "grad_norm": 1.5099798542527172, "learning_rate": 2.824785415658534e-06, "loss": 0.4679, "step": 2940 }, { "epoch": 0.6544281263907432, "grad_norm": 1.5469917964843027, "learning_rate": 2.8215412910876725e-06, "loss": 0.4832, "step": 2941 }, { "epoch": 0.654650645304851, "grad_norm": 1.4589149827533288, "learning_rate": 2.8182982980709082e-06, "loss": 0.4607, "step": 2942 }, { "epoch": 0.6548731642189586, "grad_norm": 1.428771514963962, "learning_rate": 2.815056438292743e-06, "loss": 0.4694, "step": 2943 }, { "epoch": 0.6550956831330663, "grad_norm": 1.4600858533904522, "learning_rate": 2.8118157134370928e-06, "loss": 0.4599, "step": 2944 }, { "epoch": 0.6553182020471741, "grad_norm": 1.4379938484978876, "learning_rate": 2.8085761251872847e-06, "loss": 0.4463, "step": 2945 }, { "epoch": 0.6555407209612817, "grad_norm": 1.514731261803648, "learning_rate": 2.8053376752260515e-06, "loss": 0.4695, "step": 2946 }, { "epoch": 0.6557632398753894, "grad_norm": 1.4234550955317657, "learning_rate": 2.802100365235542e-06, "loss": 0.437, "step": 2947 }, { "epoch": 0.655985758789497, "grad_norm": 1.4969644624255611, "learning_rate": 2.798864196897301e-06, "loss": 0.4576, "step": 2948 }, { "epoch": 0.6562082777036048, "grad_norm": 1.48053648372563, "learning_rate": 2.7956291718922925e-06, "loss": 0.4648, "step": 2949 }, { "epoch": 0.6564307966177125, "grad_norm": 1.4870148211152376, "learning_rate": 2.7923952919008823e-06, "loss": 0.4798, "step": 2950 }, { "epoch": 0.6566533155318202, "grad_norm": 1.4933535652206573, "learning_rate": 2.7891625586028336e-06, "loss": 0.4472, "step": 2951 }, { "epoch": 0.6568758344459279, "grad_norm": 1.5624080632187465, "learning_rate": 2.785930973677331e-06, "loss": 0.4515, "step": 2952 }, { "epoch": 0.6570983533600356, "grad_norm": 1.6101446837629756, "learning_rate": 2.7827005388029454e-06, "loss": 0.4648, "step": 2953 }, { "epoch": 0.6573208722741433, "grad_norm": 1.6401247298342718, "learning_rate": 2.77947125565766e-06, "loss": 0.4745, "step": 2954 }, { "epoch": 0.657543391188251, "grad_norm": 1.5041789167550634, "learning_rate": 2.776243125918858e-06, "loss": 0.4582, "step": 2955 }, { "epoch": 0.6577659101023587, "grad_norm": 1.4776560342800489, "learning_rate": 2.773016151263321e-06, "loss": 0.4566, "step": 2956 }, { "epoch": 0.6579884290164664, "grad_norm": 1.437238400703092, "learning_rate": 2.769790333367234e-06, "loss": 0.469, "step": 2957 }, { "epoch": 0.6582109479305741, "grad_norm": 1.542932442686436, "learning_rate": 2.7665656739061777e-06, "loss": 0.4484, "step": 2958 }, { "epoch": 0.6584334668446818, "grad_norm": 1.5131453121993848, "learning_rate": 2.763342174555137e-06, "loss": 0.4839, "step": 2959 }, { "epoch": 0.6586559857587895, "grad_norm": 1.4635277764986339, "learning_rate": 2.7601198369884845e-06, "loss": 0.4673, "step": 2960 }, { "epoch": 0.6588785046728972, "grad_norm": 1.5716880894918488, "learning_rate": 2.756898662879999e-06, "loss": 0.4814, "step": 2961 }, { "epoch": 0.6591010235870048, "grad_norm": 1.511421450659162, "learning_rate": 2.7536786539028503e-06, "loss": 0.4769, "step": 2962 }, { "epoch": 0.6593235425011126, "grad_norm": 1.860311528818102, "learning_rate": 2.7504598117296e-06, "loss": 0.4649, "step": 2963 }, { "epoch": 0.6595460614152203, "grad_norm": 1.4918450180171243, "learning_rate": 2.7472421380322147e-06, "loss": 0.4709, "step": 2964 }, { "epoch": 0.659768580329328, "grad_norm": 1.5054337700291212, "learning_rate": 2.7440256344820404e-06, "loss": 0.4541, "step": 2965 }, { "epoch": 0.6599910992434357, "grad_norm": 1.4668105211684621, "learning_rate": 2.7408103027498236e-06, "loss": 0.4618, "step": 2966 }, { "epoch": 0.6602136181575434, "grad_norm": 1.5431426226498648, "learning_rate": 2.7375961445057014e-06, "loss": 0.4692, "step": 2967 }, { "epoch": 0.660436137071651, "grad_norm": 1.4609459616295646, "learning_rate": 2.734383161419197e-06, "loss": 0.4595, "step": 2968 }, { "epoch": 0.6606586559857588, "grad_norm": 1.4405376291589465, "learning_rate": 2.731171355159228e-06, "loss": 0.4759, "step": 2969 }, { "epoch": 0.6608811748998665, "grad_norm": 1.5167724105370477, "learning_rate": 2.7279607273940977e-06, "loss": 0.4513, "step": 2970 }, { "epoch": 0.6611036938139742, "grad_norm": 1.4474850625856521, "learning_rate": 2.724751279791501e-06, "loss": 0.4655, "step": 2971 }, { "epoch": 0.6613262127280819, "grad_norm": 1.5551874427867447, "learning_rate": 2.7215430140185166e-06, "loss": 0.4556, "step": 2972 }, { "epoch": 0.6615487316421896, "grad_norm": 1.5826553007923025, "learning_rate": 2.718335931741608e-06, "loss": 0.4678, "step": 2973 }, { "epoch": 0.6617712505562973, "grad_norm": 1.838795157138924, "learning_rate": 2.7151300346266286e-06, "loss": 0.4725, "step": 2974 }, { "epoch": 0.661993769470405, "grad_norm": 1.6660364919862893, "learning_rate": 2.7119253243388113e-06, "loss": 0.4601, "step": 2975 }, { "epoch": 0.6622162883845126, "grad_norm": 1.6082210963906873, "learning_rate": 2.708721802542772e-06, "loss": 0.4744, "step": 2976 }, { "epoch": 0.6624388072986204, "grad_norm": 1.6358525048661179, "learning_rate": 2.7055194709025203e-06, "loss": 0.4455, "step": 2977 }, { "epoch": 0.6626613262127281, "grad_norm": 1.5155810129813803, "learning_rate": 2.7023183310814325e-06, "loss": 0.4763, "step": 2978 }, { "epoch": 0.6628838451268357, "grad_norm": 1.5345767038934466, "learning_rate": 2.6991183847422773e-06, "loss": 0.462, "step": 2979 }, { "epoch": 0.6631063640409435, "grad_norm": 1.6318048346031835, "learning_rate": 2.6959196335471937e-06, "loss": 0.4768, "step": 2980 }, { "epoch": 0.6633288829550512, "grad_norm": 1.5479339363451399, "learning_rate": 2.6927220791577084e-06, "loss": 0.4674, "step": 2981 }, { "epoch": 0.6635514018691588, "grad_norm": 1.458902992087847, "learning_rate": 2.6895257232347226e-06, "loss": 0.4599, "step": 2982 }, { "epoch": 0.6637739207832666, "grad_norm": 1.4343558260762546, "learning_rate": 2.686330567438515e-06, "loss": 0.4607, "step": 2983 }, { "epoch": 0.6639964396973743, "grad_norm": 1.4394899329563275, "learning_rate": 2.6831366134287434e-06, "loss": 0.4578, "step": 2984 }, { "epoch": 0.664218958611482, "grad_norm": 1.5141232441194168, "learning_rate": 2.6799438628644357e-06, "loss": 0.4637, "step": 2985 }, { "epoch": 0.6644414775255897, "grad_norm": 1.4494508102470225, "learning_rate": 2.676752317404001e-06, "loss": 0.4717, "step": 2986 }, { "epoch": 0.6646639964396974, "grad_norm": 1.5299489781230884, "learning_rate": 2.67356197870522e-06, "loss": 0.4563, "step": 2987 }, { "epoch": 0.664886515353805, "grad_norm": 1.4479565407056654, "learning_rate": 2.6703728484252424e-06, "loss": 0.4532, "step": 2988 }, { "epoch": 0.6651090342679128, "grad_norm": 1.5852337908407592, "learning_rate": 2.667184928220601e-06, "loss": 0.4982, "step": 2989 }, { "epoch": 0.6653315531820204, "grad_norm": 1.5359255017121654, "learning_rate": 2.6639982197471875e-06, "loss": 0.4995, "step": 2990 }, { "epoch": 0.6655540720961282, "grad_norm": 1.5533628846016296, "learning_rate": 2.6608127246602715e-06, "loss": 0.474, "step": 2991 }, { "epoch": 0.6657765910102359, "grad_norm": 1.4638074235066991, "learning_rate": 2.657628444614494e-06, "loss": 0.4609, "step": 2992 }, { "epoch": 0.6659991099243435, "grad_norm": 1.5314503865259304, "learning_rate": 2.654445381263856e-06, "loss": 0.4623, "step": 2993 }, { "epoch": 0.6662216288384513, "grad_norm": 1.4850838818780316, "learning_rate": 2.651263536261735e-06, "loss": 0.4723, "step": 2994 }, { "epoch": 0.666444147752559, "grad_norm": 1.5439273682884656, "learning_rate": 2.648082911260873e-06, "loss": 0.4726, "step": 2995 }, { "epoch": 0.6666666666666666, "grad_norm": 1.5446619498180514, "learning_rate": 2.6449035079133772e-06, "loss": 0.4691, "step": 2996 }, { "epoch": 0.6668891855807744, "grad_norm": 1.6311585929174441, "learning_rate": 2.6417253278707246e-06, "loss": 0.4748, "step": 2997 }, { "epoch": 0.6671117044948821, "grad_norm": 1.5862988014824708, "learning_rate": 2.6385483727837472e-06, "loss": 0.4574, "step": 2998 }, { "epoch": 0.6673342234089897, "grad_norm": 1.5867051424823484, "learning_rate": 2.635372644302653e-06, "loss": 0.4623, "step": 2999 }, { "epoch": 0.6675567423230975, "grad_norm": 1.529592464295062, "learning_rate": 2.6321981440769995e-06, "loss": 0.4815, "step": 3000 }, { "epoch": 0.6677792612372052, "grad_norm": 1.4924351054531393, "learning_rate": 2.629024873755721e-06, "loss": 0.4699, "step": 3001 }, { "epoch": 0.6680017801513128, "grad_norm": 1.604884789764474, "learning_rate": 2.6258528349871004e-06, "loss": 0.4614, "step": 3002 }, { "epoch": 0.6682242990654206, "grad_norm": 1.4984741676919724, "learning_rate": 2.622682029418788e-06, "loss": 0.4687, "step": 3003 }, { "epoch": 0.6684468179795282, "grad_norm": 1.721124384396919, "learning_rate": 2.6195124586977923e-06, "loss": 0.4659, "step": 3004 }, { "epoch": 0.668669336893636, "grad_norm": 1.6512276407355173, "learning_rate": 2.6163441244704758e-06, "loss": 0.4566, "step": 3005 }, { "epoch": 0.6688918558077437, "grad_norm": 1.572068289205624, "learning_rate": 2.6131770283825653e-06, "loss": 0.4544, "step": 3006 }, { "epoch": 0.6691143747218513, "grad_norm": 1.591773450511514, "learning_rate": 2.61001117207914e-06, "loss": 0.4587, "step": 3007 }, { "epoch": 0.669336893635959, "grad_norm": 1.524026433554218, "learning_rate": 2.606846557204638e-06, "loss": 0.4571, "step": 3008 }, { "epoch": 0.6695594125500668, "grad_norm": 1.5703993512088592, "learning_rate": 2.603683185402853e-06, "loss": 0.4601, "step": 3009 }, { "epoch": 0.6697819314641744, "grad_norm": 1.5609697097286928, "learning_rate": 2.600521058316927e-06, "loss": 0.4373, "step": 3010 }, { "epoch": 0.6700044503782822, "grad_norm": 1.5359881223756062, "learning_rate": 2.597360177589362e-06, "loss": 0.461, "step": 3011 }, { "epoch": 0.6702269692923899, "grad_norm": 1.51727402553303, "learning_rate": 2.594200544862012e-06, "loss": 0.4615, "step": 3012 }, { "epoch": 0.6704494882064975, "grad_norm": 1.533675509177913, "learning_rate": 2.591042161776075e-06, "loss": 0.4657, "step": 3013 }, { "epoch": 0.6706720071206053, "grad_norm": 1.560785861909357, "learning_rate": 2.5878850299721148e-06, "loss": 0.4736, "step": 3014 }, { "epoch": 0.670894526034713, "grad_norm": 1.5556028398794344, "learning_rate": 2.58472915109003e-06, "loss": 0.4648, "step": 3015 }, { "epoch": 0.6711170449488206, "grad_norm": 1.45856392018126, "learning_rate": 2.581574526769076e-06, "loss": 0.4608, "step": 3016 }, { "epoch": 0.6713395638629284, "grad_norm": 1.5535339328944362, "learning_rate": 2.578421158647859e-06, "loss": 0.4434, "step": 3017 }, { "epoch": 0.671562082777036, "grad_norm": 1.4817315902997374, "learning_rate": 2.575269048364324e-06, "loss": 0.4594, "step": 3018 }, { "epoch": 0.6717846016911437, "grad_norm": 1.4202642742539204, "learning_rate": 2.57211819755577e-06, "loss": 0.4611, "step": 3019 }, { "epoch": 0.6720071206052515, "grad_norm": 1.5458837176676132, "learning_rate": 2.5689686078588394e-06, "loss": 0.4702, "step": 3020 }, { "epoch": 0.6722296395193591, "grad_norm": 1.595057948602012, "learning_rate": 2.565820280909521e-06, "loss": 0.4663, "step": 3021 }, { "epoch": 0.6724521584334668, "grad_norm": 1.552948162330459, "learning_rate": 2.5626732183431446e-06, "loss": 0.4507, "step": 3022 }, { "epoch": 0.6726746773475746, "grad_norm": 1.5828366564688232, "learning_rate": 2.5595274217943844e-06, "loss": 0.4635, "step": 3023 }, { "epoch": 0.6728971962616822, "grad_norm": 1.6285513233734021, "learning_rate": 2.556382892897261e-06, "loss": 0.4552, "step": 3024 }, { "epoch": 0.67311971517579, "grad_norm": 1.6367886360320107, "learning_rate": 2.5532396332851266e-06, "loss": 0.4604, "step": 3025 }, { "epoch": 0.6733422340898977, "grad_norm": 1.6024369427539253, "learning_rate": 2.550097644590689e-06, "loss": 0.4698, "step": 3026 }, { "epoch": 0.6735647530040053, "grad_norm": 1.571529188571513, "learning_rate": 2.5469569284459814e-06, "loss": 0.455, "step": 3027 }, { "epoch": 0.673787271918113, "grad_norm": 1.5639244550395226, "learning_rate": 2.543817486482384e-06, "loss": 0.4677, "step": 3028 }, { "epoch": 0.6740097908322208, "grad_norm": 1.4740609640965086, "learning_rate": 2.540679320330615e-06, "loss": 0.4489, "step": 3029 }, { "epoch": 0.6742323097463284, "grad_norm": 1.5284641575879956, "learning_rate": 2.537542431620724e-06, "loss": 0.474, "step": 3030 }, { "epoch": 0.6744548286604362, "grad_norm": 1.8532722064365794, "learning_rate": 2.534406821982105e-06, "loss": 0.4759, "step": 3031 }, { "epoch": 0.6746773475745438, "grad_norm": 1.4730091063101736, "learning_rate": 2.5312724930434806e-06, "loss": 0.4793, "step": 3032 }, { "epoch": 0.6748998664886515, "grad_norm": 1.5402943046671058, "learning_rate": 2.5281394464329144e-06, "loss": 0.4616, "step": 3033 }, { "epoch": 0.6751223854027593, "grad_norm": 1.42790520172222, "learning_rate": 2.5250076837778015e-06, "loss": 0.4701, "step": 3034 }, { "epoch": 0.6753449043168669, "grad_norm": 1.5387854389804954, "learning_rate": 2.521877206704865e-06, "loss": 0.4583, "step": 3035 }, { "epoch": 0.6755674232309746, "grad_norm": 1.4560239238711912, "learning_rate": 2.5187480168401697e-06, "loss": 0.4397, "step": 3036 }, { "epoch": 0.6757899421450824, "grad_norm": 1.5082398970992035, "learning_rate": 2.5156201158091e-06, "loss": 0.4794, "step": 3037 }, { "epoch": 0.67601246105919, "grad_norm": 1.5594909844680314, "learning_rate": 2.5124935052363854e-06, "loss": 0.459, "step": 3038 }, { "epoch": 0.6762349799732977, "grad_norm": 1.5210147898064368, "learning_rate": 2.5093681867460743e-06, "loss": 0.4545, "step": 3039 }, { "epoch": 0.6764574988874055, "grad_norm": 1.5857101868728525, "learning_rate": 2.5062441619615457e-06, "loss": 0.4552, "step": 3040 }, { "epoch": 0.6766800178015131, "grad_norm": 1.5141878317176205, "learning_rate": 2.503121432505511e-06, "loss": 0.4489, "step": 3041 }, { "epoch": 0.6769025367156208, "grad_norm": 1.5956943586553731, "learning_rate": 2.5000000000000015e-06, "loss": 0.4639, "step": 3042 }, { "epoch": 0.6771250556297286, "grad_norm": 1.6945491337887706, "learning_rate": 2.4968798660663805e-06, "loss": 0.4583, "step": 3043 }, { "epoch": 0.6773475745438362, "grad_norm": 1.6065332939207797, "learning_rate": 2.4937610323253363e-06, "loss": 0.4716, "step": 3044 }, { "epoch": 0.677570093457944, "grad_norm": 1.5072352671916454, "learning_rate": 2.4906435003968804e-06, "loss": 0.4687, "step": 3045 }, { "epoch": 0.6777926123720516, "grad_norm": 1.562646350820419, "learning_rate": 2.4875272719003505e-06, "loss": 0.4509, "step": 3046 }, { "epoch": 0.6780151312861593, "grad_norm": 1.6007847919557234, "learning_rate": 2.484412348454402e-06, "loss": 0.4458, "step": 3047 }, { "epoch": 0.678237650200267, "grad_norm": 1.6609231246868175, "learning_rate": 2.481298731677016e-06, "loss": 0.4678, "step": 3048 }, { "epoch": 0.6784601691143747, "grad_norm": 1.5972550468923958, "learning_rate": 2.4781864231854983e-06, "loss": 0.4656, "step": 3049 }, { "epoch": 0.6786826880284824, "grad_norm": 1.5151152516213073, "learning_rate": 2.475075424596465e-06, "loss": 0.4567, "step": 3050 }, { "epoch": 0.6789052069425902, "grad_norm": 1.4366365422175533, "learning_rate": 2.471965737525865e-06, "loss": 0.4545, "step": 3051 }, { "epoch": 0.6791277258566978, "grad_norm": 1.5922742155435077, "learning_rate": 2.4688573635889545e-06, "loss": 0.4808, "step": 3052 }, { "epoch": 0.6793502447708055, "grad_norm": 1.5140533498322344, "learning_rate": 2.4657503044003144e-06, "loss": 0.474, "step": 3053 }, { "epoch": 0.6795727636849133, "grad_norm": 1.5732751690059124, "learning_rate": 2.462644561573842e-06, "loss": 0.4669, "step": 3054 }, { "epoch": 0.6797952825990209, "grad_norm": 1.5196621137683166, "learning_rate": 2.4595401367227455e-06, "loss": 0.4601, "step": 3055 }, { "epoch": 0.6800178015131286, "grad_norm": 1.5311554811971546, "learning_rate": 2.456437031459555e-06, "loss": 0.4606, "step": 3056 }, { "epoch": 0.6802403204272364, "grad_norm": 1.5191067395047329, "learning_rate": 2.4533352473961115e-06, "loss": 0.452, "step": 3057 }, { "epoch": 0.680462839341344, "grad_norm": 1.478218360280578, "learning_rate": 2.4502347861435717e-06, "loss": 0.4584, "step": 3058 }, { "epoch": 0.6806853582554517, "grad_norm": 1.6279968084731105, "learning_rate": 2.447135649312406e-06, "loss": 0.4799, "step": 3059 }, { "epoch": 0.6809078771695594, "grad_norm": 1.5907539130396853, "learning_rate": 2.4440378385123915e-06, "loss": 0.4602, "step": 3060 }, { "epoch": 0.6811303960836671, "grad_norm": 1.5084075981957523, "learning_rate": 2.4409413553526236e-06, "loss": 0.4589, "step": 3061 }, { "epoch": 0.6813529149977748, "grad_norm": 1.5631065091353318, "learning_rate": 2.4378462014414994e-06, "loss": 0.4617, "step": 3062 }, { "epoch": 0.6815754339118825, "grad_norm": 1.6020435197010852, "learning_rate": 2.434752378386739e-06, "loss": 0.4491, "step": 3063 }, { "epoch": 0.6817979528259902, "grad_norm": 1.5449379792467077, "learning_rate": 2.4316598877953557e-06, "loss": 0.4693, "step": 3064 }, { "epoch": 0.6820204717400979, "grad_norm": 1.4285256583641388, "learning_rate": 2.4285687312736815e-06, "loss": 0.4673, "step": 3065 }, { "epoch": 0.6822429906542056, "grad_norm": 1.6772734922680756, "learning_rate": 2.425478910427353e-06, "loss": 0.4588, "step": 3066 }, { "epoch": 0.6824655095683133, "grad_norm": 1.5600450452846257, "learning_rate": 2.422390426861309e-06, "loss": 0.4799, "step": 3067 }, { "epoch": 0.682688028482421, "grad_norm": 1.534124983180479, "learning_rate": 2.419303282179798e-06, "loss": 0.4585, "step": 3068 }, { "epoch": 0.6829105473965287, "grad_norm": 1.5567151452633392, "learning_rate": 2.4162174779863716e-06, "loss": 0.4564, "step": 3069 }, { "epoch": 0.6831330663106364, "grad_norm": 1.594458602466786, "learning_rate": 2.4131330158838855e-06, "loss": 0.47, "step": 3070 }, { "epoch": 0.6833555852247442, "grad_norm": 1.4935176885340569, "learning_rate": 2.4100498974744997e-06, "loss": 0.4701, "step": 3071 }, { "epoch": 0.6835781041388518, "grad_norm": 1.425071255462995, "learning_rate": 2.406968124359671e-06, "loss": 0.4603, "step": 3072 }, { "epoch": 0.6838006230529595, "grad_norm": 1.5519459334585222, "learning_rate": 2.4038876981401632e-06, "loss": 0.4548, "step": 3073 }, { "epoch": 0.6840231419670671, "grad_norm": 1.481523029725298, "learning_rate": 2.4008086204160375e-06, "loss": 0.4552, "step": 3074 }, { "epoch": 0.6842456608811749, "grad_norm": 1.5677454119944472, "learning_rate": 2.3977308927866554e-06, "loss": 0.4459, "step": 3075 }, { "epoch": 0.6844681797952826, "grad_norm": 1.5001254556246169, "learning_rate": 2.39465451685068e-06, "loss": 0.4458, "step": 3076 }, { "epoch": 0.6846906987093903, "grad_norm": 1.4522289021332626, "learning_rate": 2.391579494206065e-06, "loss": 0.4628, "step": 3077 }, { "epoch": 0.684913217623498, "grad_norm": 1.6141206976354299, "learning_rate": 2.388505826450067e-06, "loss": 0.4567, "step": 3078 }, { "epoch": 0.6851357365376057, "grad_norm": 1.5810545863808363, "learning_rate": 2.385433515179241e-06, "loss": 0.4494, "step": 3079 }, { "epoch": 0.6853582554517134, "grad_norm": 1.4269717068948973, "learning_rate": 2.3823625619894275e-06, "loss": 0.4586, "step": 3080 }, { "epoch": 0.6855807743658211, "grad_norm": 1.5092393444131513, "learning_rate": 2.3792929684757714e-06, "loss": 0.4565, "step": 3081 }, { "epoch": 0.6858032932799288, "grad_norm": 1.676079311454782, "learning_rate": 2.376224736232708e-06, "loss": 0.4644, "step": 3082 }, { "epoch": 0.6860258121940365, "grad_norm": 1.6104682629318212, "learning_rate": 2.3731578668539666e-06, "loss": 0.465, "step": 3083 }, { "epoch": 0.6862483311081442, "grad_norm": 1.4709388914453203, "learning_rate": 2.370092361932564e-06, "loss": 0.4459, "step": 3084 }, { "epoch": 0.6864708500222519, "grad_norm": 1.5400469455192027, "learning_rate": 2.367028223060813e-06, "loss": 0.4677, "step": 3085 }, { "epoch": 0.6866933689363596, "grad_norm": 1.621775016842325, "learning_rate": 2.363965451830318e-06, "loss": 0.4536, "step": 3086 }, { "epoch": 0.6869158878504673, "grad_norm": 1.7362357492139953, "learning_rate": 2.360904049831964e-06, "loss": 0.4597, "step": 3087 }, { "epoch": 0.6871384067645749, "grad_norm": 1.5910296878106114, "learning_rate": 2.3578440186559393e-06, "loss": 0.4671, "step": 3088 }, { "epoch": 0.6873609256786827, "grad_norm": 1.570508788775816, "learning_rate": 2.3547853598917066e-06, "loss": 0.4715, "step": 3089 }, { "epoch": 0.6875834445927904, "grad_norm": 1.5385022059968896, "learning_rate": 2.3517280751280224e-06, "loss": 0.4624, "step": 3090 }, { "epoch": 0.687805963506898, "grad_norm": 1.7672128242301421, "learning_rate": 2.348672165952931e-06, "loss": 0.4772, "step": 3091 }, { "epoch": 0.6880284824210058, "grad_norm": 1.555045886634184, "learning_rate": 2.3456176339537552e-06, "loss": 0.4763, "step": 3092 }, { "epoch": 0.6882510013351135, "grad_norm": 1.567019156335646, "learning_rate": 2.3425644807171084e-06, "loss": 0.4576, "step": 3093 }, { "epoch": 0.6884735202492211, "grad_norm": 1.5596744553428579, "learning_rate": 2.3395127078288872e-06, "loss": 0.4577, "step": 3094 }, { "epoch": 0.6886960391633289, "grad_norm": 1.7031724178266057, "learning_rate": 2.336462316874269e-06, "loss": 0.4561, "step": 3095 }, { "epoch": 0.6889185580774366, "grad_norm": 1.6411823521319298, "learning_rate": 2.333413309437717e-06, "loss": 0.4752, "step": 3096 }, { "epoch": 0.6891410769915443, "grad_norm": 1.574533148637288, "learning_rate": 2.33036568710297e-06, "loss": 0.4639, "step": 3097 }, { "epoch": 0.689363595905652, "grad_norm": 1.5220089656805897, "learning_rate": 2.327319451453052e-06, "loss": 0.4762, "step": 3098 }, { "epoch": 0.6895861148197597, "grad_norm": 1.5485934008935576, "learning_rate": 2.3242746040702657e-06, "loss": 0.4601, "step": 3099 }, { "epoch": 0.6898086337338674, "grad_norm": 1.5397626668335296, "learning_rate": 2.3212311465361918e-06, "loss": 0.4487, "step": 3100 }, { "epoch": 0.6900311526479751, "grad_norm": 1.5609478869125992, "learning_rate": 2.3181890804316928e-06, "loss": 0.4463, "step": 3101 }, { "epoch": 0.6902536715620827, "grad_norm": 1.5658443364294528, "learning_rate": 2.3151484073369e-06, "loss": 0.4476, "step": 3102 }, { "epoch": 0.6904761904761905, "grad_norm": 1.5877940322510646, "learning_rate": 2.3121091288312315e-06, "loss": 0.4403, "step": 3103 }, { "epoch": 0.6906987093902982, "grad_norm": 5.870819005628173, "learning_rate": 2.3090712464933714e-06, "loss": 0.4579, "step": 3104 }, { "epoch": 0.6909212283044058, "grad_norm": 1.571154292852244, "learning_rate": 2.306034761901285e-06, "loss": 0.457, "step": 3105 }, { "epoch": 0.6911437472185136, "grad_norm": 1.6592741624208995, "learning_rate": 2.30299967663221e-06, "loss": 0.4683, "step": 3106 }, { "epoch": 0.6913662661326213, "grad_norm": 1.6450961200754572, "learning_rate": 2.2999659922626568e-06, "loss": 0.4515, "step": 3107 }, { "epoch": 0.6915887850467289, "grad_norm": 1.666295249166609, "learning_rate": 2.2969337103684107e-06, "loss": 0.46, "step": 3108 }, { "epoch": 0.6918113039608367, "grad_norm": 1.5639875035484692, "learning_rate": 2.2939028325245216e-06, "loss": 0.4417, "step": 3109 }, { "epoch": 0.6920338228749444, "grad_norm": 1.5622093261211618, "learning_rate": 2.2908733603053167e-06, "loss": 0.4382, "step": 3110 }, { "epoch": 0.692256341789052, "grad_norm": 1.5725650142298941, "learning_rate": 2.2878452952843918e-06, "loss": 0.4518, "step": 3111 }, { "epoch": 0.6924788607031598, "grad_norm": 1.5943827286395034, "learning_rate": 2.28481863903461e-06, "loss": 0.4542, "step": 3112 }, { "epoch": 0.6927013796172675, "grad_norm": 1.6294320541661482, "learning_rate": 2.2817933931281057e-06, "loss": 0.4567, "step": 3113 }, { "epoch": 0.6929238985313751, "grad_norm": 1.5952686288209175, "learning_rate": 2.278769559136275e-06, "loss": 0.4425, "step": 3114 }, { "epoch": 0.6931464174454829, "grad_norm": 1.5319501665297808, "learning_rate": 2.2757471386297857e-06, "loss": 0.45, "step": 3115 }, { "epoch": 0.6933689363595905, "grad_norm": 1.5175634325960992, "learning_rate": 2.272726133178572e-06, "loss": 0.4486, "step": 3116 }, { "epoch": 0.6935914552736983, "grad_norm": 1.523900131111348, "learning_rate": 2.269706544351828e-06, "loss": 0.4568, "step": 3117 }, { "epoch": 0.693813974187806, "grad_norm": 1.485065798056805, "learning_rate": 2.2666883737180158e-06, "loss": 0.4632, "step": 3118 }, { "epoch": 0.6940364931019136, "grad_norm": 1.4785183410061078, "learning_rate": 2.26367162284486e-06, "loss": 0.46, "step": 3119 }, { "epoch": 0.6942590120160214, "grad_norm": 1.5136644706752889, "learning_rate": 2.2606562932993486e-06, "loss": 0.4757, "step": 3120 }, { "epoch": 0.6944815309301291, "grad_norm": 1.4275616880696749, "learning_rate": 2.257642386647732e-06, "loss": 0.4589, "step": 3121 }, { "epoch": 0.6947040498442367, "grad_norm": 1.5589249495950017, "learning_rate": 2.2546299044555166e-06, "loss": 0.4637, "step": 3122 }, { "epoch": 0.6949265687583445, "grad_norm": 1.5576679609923545, "learning_rate": 2.2516188482874745e-06, "loss": 0.4482, "step": 3123 }, { "epoch": 0.6951490876724522, "grad_norm": 1.5470576114774428, "learning_rate": 2.248609219707631e-06, "loss": 0.4471, "step": 3124 }, { "epoch": 0.6953716065865598, "grad_norm": 1.671068996945424, "learning_rate": 2.2456010202792804e-06, "loss": 0.4553, "step": 3125 }, { "epoch": 0.6955941255006676, "grad_norm": 1.7366489667475415, "learning_rate": 2.2425942515649624e-06, "loss": 0.4671, "step": 3126 }, { "epoch": 0.6958166444147753, "grad_norm": 1.6499401265329976, "learning_rate": 2.239588915126481e-06, "loss": 0.4582, "step": 3127 }, { "epoch": 0.6960391633288829, "grad_norm": 1.7014057959172895, "learning_rate": 2.2365850125248967e-06, "loss": 0.4431, "step": 3128 }, { "epoch": 0.6962616822429907, "grad_norm": 1.600061711249905, "learning_rate": 2.2335825453205183e-06, "loss": 0.4736, "step": 3129 }, { "epoch": 0.6964842011570983, "grad_norm": 1.757052494166697, "learning_rate": 2.2305815150729167e-06, "loss": 0.441, "step": 3130 }, { "epoch": 0.696706720071206, "grad_norm": 1.5572212767682263, "learning_rate": 2.227581923340912e-06, "loss": 0.4627, "step": 3131 }, { "epoch": 0.6969292389853138, "grad_norm": 1.6079058074111627, "learning_rate": 2.224583771682579e-06, "loss": 0.4586, "step": 3132 }, { "epoch": 0.6971517578994214, "grad_norm": 1.556732615679921, "learning_rate": 2.221587061655246e-06, "loss": 0.4617, "step": 3133 }, { "epoch": 0.6973742768135291, "grad_norm": 1.5213567713892084, "learning_rate": 2.218591794815486e-06, "loss": 0.4775, "step": 3134 }, { "epoch": 0.6975967957276369, "grad_norm": 1.5326629509305738, "learning_rate": 2.215597972719129e-06, "loss": 0.4475, "step": 3135 }, { "epoch": 0.6978193146417445, "grad_norm": 1.6492710791197727, "learning_rate": 2.2126055969212523e-06, "loss": 0.4569, "step": 3136 }, { "epoch": 0.6980418335558523, "grad_norm": 1.5146168569124718, "learning_rate": 2.209614668976182e-06, "loss": 0.4744, "step": 3137 }, { "epoch": 0.69826435246996, "grad_norm": 1.5126479954438075, "learning_rate": 2.2066251904374937e-06, "loss": 0.4783, "step": 3138 }, { "epoch": 0.6984868713840676, "grad_norm": 1.5375075220262602, "learning_rate": 2.203637162858005e-06, "loss": 0.459, "step": 3139 }, { "epoch": 0.6987093902981754, "grad_norm": 1.449146565847769, "learning_rate": 2.200650587789785e-06, "loss": 0.4275, "step": 3140 }, { "epoch": 0.6989319092122831, "grad_norm": 1.4979205327449552, "learning_rate": 2.1976654667841484e-06, "loss": 0.4668, "step": 3141 }, { "epoch": 0.6991544281263907, "grad_norm": 1.5547230965699923, "learning_rate": 2.1946818013916484e-06, "loss": 0.4635, "step": 3142 }, { "epoch": 0.6993769470404985, "grad_norm": 1.532027516651703, "learning_rate": 2.1916995931620923e-06, "loss": 0.4543, "step": 3143 }, { "epoch": 0.6995994659546061, "grad_norm": 1.5899230859099927, "learning_rate": 2.1887188436445207e-06, "loss": 0.4481, "step": 3144 }, { "epoch": 0.6998219848687138, "grad_norm": 1.6662945457615554, "learning_rate": 2.1857395543872234e-06, "loss": 0.4748, "step": 3145 }, { "epoch": 0.7000445037828216, "grad_norm": 1.5375802260380809, "learning_rate": 2.1827617269377256e-06, "loss": 0.4359, "step": 3146 }, { "epoch": 0.7002670226969292, "grad_norm": 1.6522278468678444, "learning_rate": 2.1797853628427985e-06, "loss": 0.4651, "step": 3147 }, { "epoch": 0.7004895416110369, "grad_norm": 1.68936851023124, "learning_rate": 2.1768104636484503e-06, "loss": 0.4564, "step": 3148 }, { "epoch": 0.7007120605251447, "grad_norm": 1.4851207480122832, "learning_rate": 2.1738370308999307e-06, "loss": 0.4465, "step": 3149 }, { "epoch": 0.7009345794392523, "grad_norm": 1.579101855899259, "learning_rate": 2.1708650661417268e-06, "loss": 0.4623, "step": 3150 }, { "epoch": 0.70115709835336, "grad_norm": 1.490511939804927, "learning_rate": 2.16789457091756e-06, "loss": 0.4456, "step": 3151 }, { "epoch": 0.7013796172674678, "grad_norm": 1.603132660128251, "learning_rate": 2.1649255467703912e-06, "loss": 0.4726, "step": 3152 }, { "epoch": 0.7016021361815754, "grad_norm": 1.6922127372254843, "learning_rate": 2.1619579952424204e-06, "loss": 0.4753, "step": 3153 }, { "epoch": 0.7018246550956831, "grad_norm": 1.572202520639884, "learning_rate": 2.158991917875074e-06, "loss": 0.4546, "step": 3154 }, { "epoch": 0.7020471740097909, "grad_norm": 1.568435817989794, "learning_rate": 2.156027316209021e-06, "loss": 0.464, "step": 3155 }, { "epoch": 0.7022696929238985, "grad_norm": 1.5099837402871683, "learning_rate": 2.15306419178416e-06, "loss": 0.4518, "step": 3156 }, { "epoch": 0.7024922118380063, "grad_norm": 1.5963313293285872, "learning_rate": 2.150102546139623e-06, "loss": 0.4488, "step": 3157 }, { "epoch": 0.7027147307521139, "grad_norm": 1.5109337820111473, "learning_rate": 2.1471423808137765e-06, "loss": 0.4533, "step": 3158 }, { "epoch": 0.7029372496662216, "grad_norm": 1.547050456189328, "learning_rate": 2.144183697344211e-06, "loss": 0.4443, "step": 3159 }, { "epoch": 0.7031597685803294, "grad_norm": 1.728653754833563, "learning_rate": 2.1412264972677537e-06, "loss": 0.4659, "step": 3160 }, { "epoch": 0.703382287494437, "grad_norm": 1.569220130367665, "learning_rate": 2.13827078212046e-06, "loss": 0.4525, "step": 3161 }, { "epoch": 0.7036048064085447, "grad_norm": 1.4945974026443363, "learning_rate": 2.135316553437613e-06, "loss": 0.4584, "step": 3162 }, { "epoch": 0.7038273253226525, "grad_norm": 1.52225699589081, "learning_rate": 2.1323638127537256e-06, "loss": 0.4582, "step": 3163 }, { "epoch": 0.7040498442367601, "grad_norm": 1.661224440437875, "learning_rate": 2.1294125616025323e-06, "loss": 0.4779, "step": 3164 }, { "epoch": 0.7042723631508678, "grad_norm": 1.5883790497807826, "learning_rate": 2.1264628015170026e-06, "loss": 0.4479, "step": 3165 }, { "epoch": 0.7044948820649756, "grad_norm": 1.5384224997557157, "learning_rate": 2.123514534029322e-06, "loss": 0.4362, "step": 3166 }, { "epoch": 0.7047174009790832, "grad_norm": 1.5997689917721043, "learning_rate": 2.1205677606709056e-06, "loss": 0.4589, "step": 3167 }, { "epoch": 0.7049399198931909, "grad_norm": 1.5517453114314252, "learning_rate": 2.117622482972398e-06, "loss": 0.4508, "step": 3168 }, { "epoch": 0.7051624388072987, "grad_norm": 1.8034063596482446, "learning_rate": 2.1146787024636555e-06, "loss": 0.4548, "step": 3169 }, { "epoch": 0.7053849577214063, "grad_norm": 1.6011381527575876, "learning_rate": 2.111736420673767e-06, "loss": 0.4511, "step": 3170 }, { "epoch": 0.705607476635514, "grad_norm": 1.5799083018665867, "learning_rate": 2.108795639131032e-06, "loss": 0.4552, "step": 3171 }, { "epoch": 0.7058299955496217, "grad_norm": 1.5066184556595663, "learning_rate": 2.105856359362982e-06, "loss": 0.4427, "step": 3172 }, { "epoch": 0.7060525144637294, "grad_norm": 1.6112900030171324, "learning_rate": 2.1029185828963604e-06, "loss": 0.4367, "step": 3173 }, { "epoch": 0.7062750333778371, "grad_norm": 1.5945437188108216, "learning_rate": 2.0999823112571356e-06, "loss": 0.4491, "step": 3174 }, { "epoch": 0.7064975522919448, "grad_norm": 1.5997397291220967, "learning_rate": 2.097047545970491e-06, "loss": 0.4372, "step": 3175 }, { "epoch": 0.7067200712060525, "grad_norm": 1.665625290212202, "learning_rate": 2.094114288560826e-06, "loss": 0.4628, "step": 3176 }, { "epoch": 0.7069425901201603, "grad_norm": 1.5622495630707347, "learning_rate": 2.09118254055176e-06, "loss": 0.4564, "step": 3177 }, { "epoch": 0.7071651090342679, "grad_norm": 1.4999751520829996, "learning_rate": 2.0882523034661297e-06, "loss": 0.4589, "step": 3178 }, { "epoch": 0.7073876279483756, "grad_norm": 1.5302541416524498, "learning_rate": 2.0853235788259773e-06, "loss": 0.4631, "step": 3179 }, { "epoch": 0.7076101468624834, "grad_norm": 1.522369258058369, "learning_rate": 2.082396368152576e-06, "loss": 0.468, "step": 3180 }, { "epoch": 0.707832665776591, "grad_norm": 1.582063892474226, "learning_rate": 2.0794706729663965e-06, "loss": 0.4449, "step": 3181 }, { "epoch": 0.7080551846906987, "grad_norm": 1.4788199372596444, "learning_rate": 2.076546494787131e-06, "loss": 0.4494, "step": 3182 }, { "epoch": 0.7082777036048065, "grad_norm": 1.511480499264368, "learning_rate": 2.073623835133684e-06, "loss": 0.4351, "step": 3183 }, { "epoch": 0.7085002225189141, "grad_norm": 1.5336503216589559, "learning_rate": 2.0707026955241657e-06, "loss": 0.4591, "step": 3184 }, { "epoch": 0.7087227414330218, "grad_norm": 1.6724189145470596, "learning_rate": 2.0677830774759002e-06, "loss": 0.4611, "step": 3185 }, { "epoch": 0.7089452603471295, "grad_norm": 1.6388952419977771, "learning_rate": 2.064864982505422e-06, "loss": 0.4583, "step": 3186 }, { "epoch": 0.7091677792612372, "grad_norm": 1.6161807763752392, "learning_rate": 2.0619484121284754e-06, "loss": 0.4596, "step": 3187 }, { "epoch": 0.7093902981753449, "grad_norm": 1.6613775801258905, "learning_rate": 2.059033367860008e-06, "loss": 0.4587, "step": 3188 }, { "epoch": 0.7096128170894526, "grad_norm": 1.6911562304591443, "learning_rate": 2.0561198512141784e-06, "loss": 0.459, "step": 3189 }, { "epoch": 0.7098353360035603, "grad_norm": 1.6125122531406337, "learning_rate": 2.0532078637043523e-06, "loss": 0.4442, "step": 3190 }, { "epoch": 0.710057854917668, "grad_norm": 1.8581833446589873, "learning_rate": 2.0502974068430957e-06, "loss": 0.4695, "step": 3191 }, { "epoch": 0.7102803738317757, "grad_norm": 1.6701653901429059, "learning_rate": 2.04738848214219e-06, "loss": 0.464, "step": 3192 }, { "epoch": 0.7105028927458834, "grad_norm": 1.6791771744565056, "learning_rate": 2.0444810911126094e-06, "loss": 0.458, "step": 3193 }, { "epoch": 0.7107254116599911, "grad_norm": 1.707835403053552, "learning_rate": 2.041575235264538e-06, "loss": 0.4811, "step": 3194 }, { "epoch": 0.7109479305740988, "grad_norm": 1.5862555303016161, "learning_rate": 2.038670916107362e-06, "loss": 0.4598, "step": 3195 }, { "epoch": 0.7111704494882065, "grad_norm": 1.577504654818574, "learning_rate": 2.0357681351496663e-06, "loss": 0.4686, "step": 3196 }, { "epoch": 0.7113929684023143, "grad_norm": 1.6848571699296095, "learning_rate": 2.0328668938992387e-06, "loss": 0.4586, "step": 3197 }, { "epoch": 0.7116154873164219, "grad_norm": 1.6013513298573598, "learning_rate": 2.0299671938630685e-06, "loss": 0.4731, "step": 3198 }, { "epoch": 0.7118380062305296, "grad_norm": 1.5539062061437536, "learning_rate": 2.027069036547343e-06, "loss": 0.4482, "step": 3199 }, { "epoch": 0.7120605251446372, "grad_norm": 1.6204389118615636, "learning_rate": 2.02417242345745e-06, "loss": 0.4402, "step": 3200 }, { "epoch": 0.712283044058745, "grad_norm": 1.5081327854128774, "learning_rate": 2.0212773560979704e-06, "loss": 0.4659, "step": 3201 }, { "epoch": 0.7125055629728527, "grad_norm": 1.482510123067662, "learning_rate": 2.0183838359726873e-06, "loss": 0.4686, "step": 3202 }, { "epoch": 0.7127280818869604, "grad_norm": 1.5208822051122648, "learning_rate": 2.0154918645845795e-06, "loss": 0.451, "step": 3203 }, { "epoch": 0.7129506008010681, "grad_norm": 1.6848698548032277, "learning_rate": 2.0126014434358152e-06, "loss": 0.465, "step": 3204 }, { "epoch": 0.7131731197151758, "grad_norm": 1.6572374724988883, "learning_rate": 2.0097125740277696e-06, "loss": 0.4458, "step": 3205 }, { "epoch": 0.7133956386292835, "grad_norm": 1.5773420517110024, "learning_rate": 2.006825257860999e-06, "loss": 0.4662, "step": 3206 }, { "epoch": 0.7136181575433912, "grad_norm": 1.4827964723541933, "learning_rate": 2.0039394964352615e-06, "loss": 0.4422, "step": 3207 }, { "epoch": 0.7138406764574989, "grad_norm": 1.6031102572277045, "learning_rate": 2.001055291249501e-06, "loss": 0.4501, "step": 3208 }, { "epoch": 0.7140631953716066, "grad_norm": 1.7497719457801202, "learning_rate": 1.9981726438018596e-06, "loss": 0.4722, "step": 3209 }, { "epoch": 0.7142857142857143, "grad_norm": 1.5900028740540597, "learning_rate": 1.9952915555896664e-06, "loss": 0.4277, "step": 3210 }, { "epoch": 0.7145082331998219, "grad_norm": 1.6525208671821991, "learning_rate": 1.992412028109441e-06, "loss": 0.4648, "step": 3211 }, { "epoch": 0.7147307521139297, "grad_norm": 2.513143893193337, "learning_rate": 1.9895340628568952e-06, "loss": 0.4577, "step": 3212 }, { "epoch": 0.7149532710280374, "grad_norm": 1.7312706467886665, "learning_rate": 1.986657661326924e-06, "loss": 0.4758, "step": 3213 }, { "epoch": 0.715175789942145, "grad_norm": 1.6914999685703473, "learning_rate": 1.9837828250136137e-06, "loss": 0.4661, "step": 3214 }, { "epoch": 0.7153983088562528, "grad_norm": 1.656592775845252, "learning_rate": 1.980909555410239e-06, "loss": 0.466, "step": 3215 }, { "epoch": 0.7156208277703605, "grad_norm": 1.5898221553063143, "learning_rate": 1.9780378540092532e-06, "loss": 0.4395, "step": 3216 }, { "epoch": 0.7158433466844681, "grad_norm": 1.5818452103133107, "learning_rate": 1.9751677223023088e-06, "loss": 0.4472, "step": 3217 }, { "epoch": 0.7160658655985759, "grad_norm": 1.5802493186789257, "learning_rate": 1.972299161780228e-06, "loss": 0.4605, "step": 3218 }, { "epoch": 0.7162883845126836, "grad_norm": 1.528189789820153, "learning_rate": 1.969432173933026e-06, "loss": 0.453, "step": 3219 }, { "epoch": 0.7165109034267912, "grad_norm": 1.5918115284963505, "learning_rate": 1.966566760249901e-06, "loss": 0.4456, "step": 3220 }, { "epoch": 0.716733422340899, "grad_norm": 1.6252612523927044, "learning_rate": 1.963702922219227e-06, "loss": 0.4643, "step": 3221 }, { "epoch": 0.7169559412550067, "grad_norm": 1.523787296541616, "learning_rate": 1.9608406613285657e-06, "loss": 0.4606, "step": 3222 }, { "epoch": 0.7171784601691144, "grad_norm": 1.5124201904034393, "learning_rate": 1.9579799790646587e-06, "loss": 0.4386, "step": 3223 }, { "epoch": 0.7174009790832221, "grad_norm": 1.5997159220742962, "learning_rate": 1.955120876913425e-06, "loss": 0.4534, "step": 3224 }, { "epoch": 0.7176234979973297, "grad_norm": 1.6274856558463937, "learning_rate": 1.952263356359967e-06, "loss": 0.4538, "step": 3225 }, { "epoch": 0.7178460169114375, "grad_norm": 1.6148613252422814, "learning_rate": 1.94940741888856e-06, "loss": 0.4511, "step": 3226 }, { "epoch": 0.7180685358255452, "grad_norm": 1.6440028219134388, "learning_rate": 1.9465530659826633e-06, "loss": 0.4523, "step": 3227 }, { "epoch": 0.7182910547396528, "grad_norm": 1.638138148810441, "learning_rate": 1.943700299124904e-06, "loss": 0.456, "step": 3228 }, { "epoch": 0.7185135736537606, "grad_norm": 1.6878847658062568, "learning_rate": 1.940849119797097e-06, "loss": 0.4495, "step": 3229 }, { "epoch": 0.7187360925678683, "grad_norm": 1.5110632560225814, "learning_rate": 1.9379995294802264e-06, "loss": 0.4401, "step": 3230 }, { "epoch": 0.7189586114819759, "grad_norm": 1.8419128931367794, "learning_rate": 1.935151529654449e-06, "loss": 0.4462, "step": 3231 }, { "epoch": 0.7191811303960837, "grad_norm": 1.5887345648840154, "learning_rate": 1.9323051217990997e-06, "loss": 0.4504, "step": 3232 }, { "epoch": 0.7194036493101914, "grad_norm": 1.5748316815542687, "learning_rate": 1.929460307392683e-06, "loss": 0.4469, "step": 3233 }, { "epoch": 0.719626168224299, "grad_norm": 1.545247992660044, "learning_rate": 1.9266170879128764e-06, "loss": 0.4403, "step": 3234 }, { "epoch": 0.7198486871384068, "grad_norm": 1.5317106746717295, "learning_rate": 1.9237754648365328e-06, "loss": 0.464, "step": 3235 }, { "epoch": 0.7200712060525145, "grad_norm": 1.7012280034234404, "learning_rate": 1.9209354396396713e-06, "loss": 0.4614, "step": 3236 }, { "epoch": 0.7202937249666221, "grad_norm": 1.6358919887627443, "learning_rate": 1.918097013797484e-06, "loss": 0.4585, "step": 3237 }, { "epoch": 0.7205162438807299, "grad_norm": 1.5939043032136047, "learning_rate": 1.9152601887843285e-06, "loss": 0.4841, "step": 3238 }, { "epoch": 0.7207387627948375, "grad_norm": 1.5029889890886126, "learning_rate": 1.912424966073735e-06, "loss": 0.4412, "step": 3239 }, { "epoch": 0.7209612817089452, "grad_norm": 1.4961913205544595, "learning_rate": 1.9095913471384005e-06, "loss": 0.431, "step": 3240 }, { "epoch": 0.721183800623053, "grad_norm": 1.597392262259258, "learning_rate": 1.906759333450184e-06, "loss": 0.4636, "step": 3241 }, { "epoch": 0.7214063195371606, "grad_norm": 1.6312426523017882, "learning_rate": 1.903928926480121e-06, "loss": 0.4486, "step": 3242 }, { "epoch": 0.7216288384512684, "grad_norm": 1.606794182575477, "learning_rate": 1.901100127698401e-06, "loss": 0.4413, "step": 3243 }, { "epoch": 0.7218513573653761, "grad_norm": 1.6691459459783806, "learning_rate": 1.898272938574386e-06, "loss": 0.4579, "step": 3244 }, { "epoch": 0.7220738762794837, "grad_norm": 1.606240568491535, "learning_rate": 1.8954473605766005e-06, "loss": 0.4364, "step": 3245 }, { "epoch": 0.7222963951935915, "grad_norm": 1.6073824554946126, "learning_rate": 1.8926233951727273e-06, "loss": 0.4601, "step": 3246 }, { "epoch": 0.7225189141076992, "grad_norm": 1.586520888360378, "learning_rate": 1.8898010438296177e-06, "loss": 0.4423, "step": 3247 }, { "epoch": 0.7227414330218068, "grad_norm": 1.5743397127748426, "learning_rate": 1.8869803080132815e-06, "loss": 0.447, "step": 3248 }, { "epoch": 0.7229639519359146, "grad_norm": 1.6457733416923317, "learning_rate": 1.8841611891888918e-06, "loss": 0.4629, "step": 3249 }, { "epoch": 0.7231864708500223, "grad_norm": 1.6219342139193687, "learning_rate": 1.881343688820777e-06, "loss": 0.4651, "step": 3250 }, { "epoch": 0.7234089897641299, "grad_norm": 1.6501309383607483, "learning_rate": 1.8785278083724285e-06, "loss": 0.4609, "step": 3251 }, { "epoch": 0.7236315086782377, "grad_norm": 1.696893315150528, "learning_rate": 1.8757135493064987e-06, "loss": 0.4476, "step": 3252 }, { "epoch": 0.7238540275923453, "grad_norm": 1.5725433699980487, "learning_rate": 1.8729009130847886e-06, "loss": 0.4521, "step": 3253 }, { "epoch": 0.724076546506453, "grad_norm": 1.6663079534962573, "learning_rate": 1.8700899011682688e-06, "loss": 0.4461, "step": 3254 }, { "epoch": 0.7242990654205608, "grad_norm": 1.5322066397959344, "learning_rate": 1.867280515017056e-06, "loss": 0.4603, "step": 3255 }, { "epoch": 0.7245215843346684, "grad_norm": 1.679350086378782, "learning_rate": 1.8644727560904269e-06, "loss": 0.4542, "step": 3256 }, { "epoch": 0.7247441032487761, "grad_norm": 1.500910479124543, "learning_rate": 1.8616666258468136e-06, "loss": 0.4677, "step": 3257 }, { "epoch": 0.7249666221628839, "grad_norm": 1.5486438457965472, "learning_rate": 1.8588621257437983e-06, "loss": 0.4443, "step": 3258 }, { "epoch": 0.7251891410769915, "grad_norm": 1.6373764737483965, "learning_rate": 1.8560592572381202e-06, "loss": 0.4661, "step": 3259 }, { "epoch": 0.7254116599910992, "grad_norm": 1.539097044755985, "learning_rate": 1.8532580217856698e-06, "loss": 0.4365, "step": 3260 }, { "epoch": 0.725634178905207, "grad_norm": 1.5939189138264913, "learning_rate": 1.85045842084149e-06, "loss": 0.4414, "step": 3261 }, { "epoch": 0.7258566978193146, "grad_norm": 1.4642518060102936, "learning_rate": 1.847660455859775e-06, "loss": 0.4602, "step": 3262 }, { "epoch": 0.7260792167334224, "grad_norm": 1.5311370528293111, "learning_rate": 1.8448641282938645e-06, "loss": 0.44, "step": 3263 }, { "epoch": 0.7263017356475301, "grad_norm": 1.572799915065932, "learning_rate": 1.8420694395962546e-06, "loss": 0.4526, "step": 3264 }, { "epoch": 0.7265242545616377, "grad_norm": 1.5046078856455956, "learning_rate": 1.8392763912185852e-06, "loss": 0.4509, "step": 3265 }, { "epoch": 0.7267467734757455, "grad_norm": 1.680408063940401, "learning_rate": 1.8364849846116472e-06, "loss": 0.4575, "step": 3266 }, { "epoch": 0.7269692923898531, "grad_norm": 1.6158569427820573, "learning_rate": 1.833695221225379e-06, "loss": 0.4655, "step": 3267 }, { "epoch": 0.7271918113039608, "grad_norm": 1.879884383359371, "learning_rate": 1.8309071025088604e-06, "loss": 0.4561, "step": 3268 }, { "epoch": 0.7274143302180686, "grad_norm": 1.6601868884968074, "learning_rate": 1.8281206299103231e-06, "loss": 0.48, "step": 3269 }, { "epoch": 0.7276368491321762, "grad_norm": 1.8513622634148255, "learning_rate": 1.8253358048771386e-06, "loss": 0.4522, "step": 3270 }, { "epoch": 0.7278593680462839, "grad_norm": 1.6098733050204028, "learning_rate": 1.822552628855827e-06, "loss": 0.4497, "step": 3271 }, { "epoch": 0.7280818869603917, "grad_norm": 1.6062527421679222, "learning_rate": 1.819771103292049e-06, "loss": 0.4593, "step": 3272 }, { "epoch": 0.7283044058744993, "grad_norm": 1.5775950447264413, "learning_rate": 1.8169912296306097e-06, "loss": 0.4749, "step": 3273 }, { "epoch": 0.728526924788607, "grad_norm": 1.5865260737100049, "learning_rate": 1.8142130093154569e-06, "loss": 0.4681, "step": 3274 }, { "epoch": 0.7287494437027148, "grad_norm": 1.592320040056994, "learning_rate": 1.8114364437896747e-06, "loss": 0.4584, "step": 3275 }, { "epoch": 0.7289719626168224, "grad_norm": 1.6027990712371267, "learning_rate": 1.8086615344954928e-06, "loss": 0.4694, "step": 3276 }, { "epoch": 0.7291944815309301, "grad_norm": 1.5907716190195467, "learning_rate": 1.8058882828742803e-06, "loss": 0.4746, "step": 3277 }, { "epoch": 0.7294170004450379, "grad_norm": 1.6223539728594096, "learning_rate": 1.8031166903665392e-06, "loss": 0.4379, "step": 3278 }, { "epoch": 0.7296395193591455, "grad_norm": 1.6092321984692481, "learning_rate": 1.8003467584119205e-06, "loss": 0.4516, "step": 3279 }, { "epoch": 0.7298620382732532, "grad_norm": 1.5347741763749687, "learning_rate": 1.7975784884492026e-06, "loss": 0.4521, "step": 3280 }, { "epoch": 0.7300845571873609, "grad_norm": 1.5035901945462977, "learning_rate": 1.7948118819163052e-06, "loss": 0.4562, "step": 3281 }, { "epoch": 0.7303070761014686, "grad_norm": 1.5720360888439482, "learning_rate": 1.7920469402502849e-06, "loss": 0.4573, "step": 3282 }, { "epoch": 0.7305295950155763, "grad_norm": 1.7502438634721584, "learning_rate": 1.789283664887329e-06, "loss": 0.4412, "step": 3283 }, { "epoch": 0.730752113929684, "grad_norm": 1.6192649793977651, "learning_rate": 1.786522057262764e-06, "loss": 0.4543, "step": 3284 }, { "epoch": 0.7309746328437917, "grad_norm": 1.7193143097028987, "learning_rate": 1.7837621188110482e-06, "loss": 0.4566, "step": 3285 }, { "epoch": 0.7311971517578995, "grad_norm": 1.6609315957290431, "learning_rate": 1.781003850965773e-06, "loss": 0.4431, "step": 3286 }, { "epoch": 0.7314196706720071, "grad_norm": 1.6219738818507576, "learning_rate": 1.778247255159663e-06, "loss": 0.4641, "step": 3287 }, { "epoch": 0.7316421895861148, "grad_norm": 1.5645935879038992, "learning_rate": 1.7754923328245704e-06, "loss": 0.438, "step": 3288 }, { "epoch": 0.7318647085002226, "grad_norm": 1.5373956021923447, "learning_rate": 1.7727390853914855e-06, "loss": 0.4595, "step": 3289 }, { "epoch": 0.7320872274143302, "grad_norm": 1.5729712328345231, "learning_rate": 1.7699875142905177e-06, "loss": 0.4573, "step": 3290 }, { "epoch": 0.7323097463284379, "grad_norm": 1.6307921158654721, "learning_rate": 1.7672376209509174e-06, "loss": 0.4612, "step": 3291 }, { "epoch": 0.7325322652425457, "grad_norm": 1.5317884271137123, "learning_rate": 1.7644894068010593e-06, "loss": 0.4378, "step": 3292 }, { "epoch": 0.7327547841566533, "grad_norm": 1.492348460561991, "learning_rate": 1.7617428732684406e-06, "loss": 0.4461, "step": 3293 }, { "epoch": 0.732977303070761, "grad_norm": 1.5777953719640112, "learning_rate": 1.7589980217796931e-06, "loss": 0.4366, "step": 3294 }, { "epoch": 0.7331998219848687, "grad_norm": 1.5776779644253853, "learning_rate": 1.7562548537605683e-06, "loss": 0.4324, "step": 3295 }, { "epoch": 0.7334223408989764, "grad_norm": 1.5958598588723698, "learning_rate": 1.7535133706359486e-06, "loss": 0.4526, "step": 3296 }, { "epoch": 0.7336448598130841, "grad_norm": 1.50899382884701, "learning_rate": 1.7507735738298392e-06, "loss": 0.4408, "step": 3297 }, { "epoch": 0.7338673787271918, "grad_norm": 1.5618998387458356, "learning_rate": 1.7480354647653692e-06, "loss": 0.4479, "step": 3298 }, { "epoch": 0.7340898976412995, "grad_norm": 1.513557246083374, "learning_rate": 1.7452990448647927e-06, "loss": 0.4567, "step": 3299 }, { "epoch": 0.7343124165554072, "grad_norm": 1.708244468512909, "learning_rate": 1.7425643155494814e-06, "loss": 0.4562, "step": 3300 }, { "epoch": 0.7345349354695149, "grad_norm": 1.6848915529136876, "learning_rate": 1.7398312782399346e-06, "loss": 0.4601, "step": 3301 }, { "epoch": 0.7347574543836226, "grad_norm": 1.6865005274526348, "learning_rate": 1.7370999343557698e-06, "loss": 0.4668, "step": 3302 }, { "epoch": 0.7349799732977303, "grad_norm": 1.692911374544285, "learning_rate": 1.7343702853157262e-06, "loss": 0.4608, "step": 3303 }, { "epoch": 0.735202492211838, "grad_norm": 1.6579943614411237, "learning_rate": 1.731642332537663e-06, "loss": 0.4609, "step": 3304 }, { "epoch": 0.7354250111259457, "grad_norm": 1.66273414905097, "learning_rate": 1.7289160774385543e-06, "loss": 0.4477, "step": 3305 }, { "epoch": 0.7356475300400535, "grad_norm": 1.6627089751890614, "learning_rate": 1.7261915214344976e-06, "loss": 0.4405, "step": 3306 }, { "epoch": 0.7358700489541611, "grad_norm": 1.6054618765614603, "learning_rate": 1.7234686659407064e-06, "loss": 0.4683, "step": 3307 }, { "epoch": 0.7360925678682688, "grad_norm": 1.6394653822766263, "learning_rate": 1.7207475123715072e-06, "loss": 0.4652, "step": 3308 }, { "epoch": 0.7363150867823764, "grad_norm": 1.6197049589396346, "learning_rate": 1.7180280621403473e-06, "loss": 0.4533, "step": 3309 }, { "epoch": 0.7365376056964842, "grad_norm": 1.5480358582193827, "learning_rate": 1.7153103166597868e-06, "loss": 0.4515, "step": 3310 }, { "epoch": 0.7367601246105919, "grad_norm": 1.559779275924909, "learning_rate": 1.7125942773415017e-06, "loss": 0.4515, "step": 3311 }, { "epoch": 0.7369826435246996, "grad_norm": 1.6262360785652952, "learning_rate": 1.7098799455962817e-06, "loss": 0.4256, "step": 3312 }, { "epoch": 0.7372051624388073, "grad_norm": 1.5707366699065735, "learning_rate": 1.7071673228340257e-06, "loss": 0.4416, "step": 3313 }, { "epoch": 0.737427681352915, "grad_norm": 1.5808100265807132, "learning_rate": 1.7044564104637512e-06, "loss": 0.4593, "step": 3314 }, { "epoch": 0.7376502002670227, "grad_norm": 1.5395217724845582, "learning_rate": 1.7017472098935795e-06, "loss": 0.4352, "step": 3315 }, { "epoch": 0.7378727191811304, "grad_norm": 1.584650444977282, "learning_rate": 1.6990397225307536e-06, "loss": 0.4348, "step": 3316 }, { "epoch": 0.7380952380952381, "grad_norm": 1.6467954156890525, "learning_rate": 1.6963339497816155e-06, "loss": 0.4736, "step": 3317 }, { "epoch": 0.7383177570093458, "grad_norm": 1.6656837187415565, "learning_rate": 1.6936298930516232e-06, "loss": 0.4466, "step": 3318 }, { "epoch": 0.7385402759234535, "grad_norm": 1.6453058081892662, "learning_rate": 1.6909275537453423e-06, "loss": 0.4295, "step": 3319 }, { "epoch": 0.7387627948375612, "grad_norm": 1.5953364144935869, "learning_rate": 1.6882269332664437e-06, "loss": 0.4579, "step": 3320 }, { "epoch": 0.7389853137516689, "grad_norm": 1.6134819017617517, "learning_rate": 1.6855280330177072e-06, "loss": 0.4403, "step": 3321 }, { "epoch": 0.7392078326657766, "grad_norm": 1.8321470230080699, "learning_rate": 1.6828308544010202e-06, "loss": 0.4357, "step": 3322 }, { "epoch": 0.7394303515798842, "grad_norm": 1.6967530321371862, "learning_rate": 1.6801353988173752e-06, "loss": 0.45, "step": 3323 }, { "epoch": 0.739652870493992, "grad_norm": 1.658967153433948, "learning_rate": 1.6774416676668704e-06, "loss": 0.4596, "step": 3324 }, { "epoch": 0.7398753894080997, "grad_norm": 1.6237586622392566, "learning_rate": 1.6747496623487042e-06, "loss": 0.4502, "step": 3325 }, { "epoch": 0.7400979083222073, "grad_norm": 1.6841357288611039, "learning_rate": 1.6720593842611827e-06, "loss": 0.455, "step": 3326 }, { "epoch": 0.7403204272363151, "grad_norm": 1.650727429682881, "learning_rate": 1.669370834801714e-06, "loss": 0.4543, "step": 3327 }, { "epoch": 0.7405429461504228, "grad_norm": 1.675669772426563, "learning_rate": 1.6666840153668085e-06, "loss": 0.4544, "step": 3328 }, { "epoch": 0.7407654650645304, "grad_norm": 1.5752003011766884, "learning_rate": 1.6639989273520785e-06, "loss": 0.4249, "step": 3329 }, { "epoch": 0.7409879839786382, "grad_norm": 1.5709246149991496, "learning_rate": 1.6613155721522328e-06, "loss": 0.442, "step": 3330 }, { "epoch": 0.7412105028927459, "grad_norm": 1.5958438840602442, "learning_rate": 1.6586339511610865e-06, "loss": 0.4652, "step": 3331 }, { "epoch": 0.7414330218068536, "grad_norm": 1.611565923434101, "learning_rate": 1.6559540657715473e-06, "loss": 0.4629, "step": 3332 }, { "epoch": 0.7416555407209613, "grad_norm": 1.5919948732258435, "learning_rate": 1.6532759173756251e-06, "loss": 0.4136, "step": 3333 }, { "epoch": 0.741878059635069, "grad_norm": 1.5858051699166573, "learning_rate": 1.650599507364432e-06, "loss": 0.4512, "step": 3334 }, { "epoch": 0.7421005785491767, "grad_norm": 1.560970664395756, "learning_rate": 1.647924837128167e-06, "loss": 0.4764, "step": 3335 }, { "epoch": 0.7423230974632844, "grad_norm": 1.6242859357788795, "learning_rate": 1.6452519080561348e-06, "loss": 0.4372, "step": 3336 }, { "epoch": 0.742545616377392, "grad_norm": 1.5489174558351217, "learning_rate": 1.6425807215367273e-06, "loss": 0.46, "step": 3337 }, { "epoch": 0.7427681352914998, "grad_norm": 1.6408650191561478, "learning_rate": 1.6399112789574378e-06, "loss": 0.4601, "step": 3338 }, { "epoch": 0.7429906542056075, "grad_norm": 1.6430486073469392, "learning_rate": 1.637243581704852e-06, "loss": 0.4586, "step": 3339 }, { "epoch": 0.7432131731197151, "grad_norm": 1.626297699852403, "learning_rate": 1.6345776311646478e-06, "loss": 0.4631, "step": 3340 }, { "epoch": 0.7434356920338229, "grad_norm": 1.6063788345174048, "learning_rate": 1.6319134287215982e-06, "loss": 0.4464, "step": 3341 }, { "epoch": 0.7436582109479306, "grad_norm": 1.6887846411465786, "learning_rate": 1.6292509757595642e-06, "loss": 0.4566, "step": 3342 }, { "epoch": 0.7438807298620382, "grad_norm": 1.7012438552436877, "learning_rate": 1.6265902736615007e-06, "loss": 0.4653, "step": 3343 }, { "epoch": 0.744103248776146, "grad_norm": 1.7366281468194815, "learning_rate": 1.6239313238094556e-06, "loss": 0.4531, "step": 3344 }, { "epoch": 0.7443257676902537, "grad_norm": 1.5931389067969683, "learning_rate": 1.6212741275845606e-06, "loss": 0.4503, "step": 3345 }, { "epoch": 0.7445482866043613, "grad_norm": 1.459472610780699, "learning_rate": 1.6186186863670406e-06, "loss": 0.4325, "step": 3346 }, { "epoch": 0.7447708055184691, "grad_norm": 1.5962725128463484, "learning_rate": 1.6159650015362088e-06, "loss": 0.4385, "step": 3347 }, { "epoch": 0.7449933244325768, "grad_norm": 1.5414737764095576, "learning_rate": 1.6133130744704657e-06, "loss": 0.4319, "step": 3348 }, { "epoch": 0.7452158433466844, "grad_norm": 1.6366026044767987, "learning_rate": 1.6106629065472995e-06, "loss": 0.4476, "step": 3349 }, { "epoch": 0.7454383622607922, "grad_norm": 1.6235500030882382, "learning_rate": 1.6080144991432806e-06, "loss": 0.4504, "step": 3350 }, { "epoch": 0.7456608811748998, "grad_norm": 1.556444583550987, "learning_rate": 1.6053678536340705e-06, "loss": 0.4413, "step": 3351 }, { "epoch": 0.7458834000890076, "grad_norm": 1.5476073433694766, "learning_rate": 1.6027229713944098e-06, "loss": 0.4515, "step": 3352 }, { "epoch": 0.7461059190031153, "grad_norm": 1.8964002316734452, "learning_rate": 1.60007985379813e-06, "loss": 0.4494, "step": 3353 }, { "epoch": 0.7463284379172229, "grad_norm": 1.7314939421108702, "learning_rate": 1.5974385022181422e-06, "loss": 0.4428, "step": 3354 }, { "epoch": 0.7465509568313307, "grad_norm": 1.8243466465304456, "learning_rate": 1.5947989180264379e-06, "loss": 0.4501, "step": 3355 }, { "epoch": 0.7467734757454384, "grad_norm": 1.7075292965956324, "learning_rate": 1.592161102594096e-06, "loss": 0.4375, "step": 3356 }, { "epoch": 0.746995994659546, "grad_norm": 2.452104831943122, "learning_rate": 1.5895250572912696e-06, "loss": 0.4455, "step": 3357 }, { "epoch": 0.7472185135736538, "grad_norm": 1.5625438610546225, "learning_rate": 1.5868907834872e-06, "loss": 0.4481, "step": 3358 }, { "epoch": 0.7474410324877615, "grad_norm": 1.6607827080748447, "learning_rate": 1.5842582825502028e-06, "loss": 0.4393, "step": 3359 }, { "epoch": 0.7476635514018691, "grad_norm": 1.7066936052806643, "learning_rate": 1.5816275558476758e-06, "loss": 0.4479, "step": 3360 }, { "epoch": 0.7478860703159769, "grad_norm": 1.75017328627575, "learning_rate": 1.5789986047460953e-06, "loss": 0.446, "step": 3361 }, { "epoch": 0.7481085892300846, "grad_norm": 1.5574139465315326, "learning_rate": 1.5763714306110106e-06, "loss": 0.4487, "step": 3362 }, { "epoch": 0.7483311081441922, "grad_norm": 1.53774007796715, "learning_rate": 1.5737460348070538e-06, "loss": 0.4383, "step": 3363 }, { "epoch": 0.7485536270583, "grad_norm": 1.622600896994732, "learning_rate": 1.5711224186979307e-06, "loss": 0.4389, "step": 3364 }, { "epoch": 0.7487761459724076, "grad_norm": 1.6395881460209785, "learning_rate": 1.568500583646423e-06, "loss": 0.4387, "step": 3365 }, { "epoch": 0.7489986648865153, "grad_norm": 1.776104695586581, "learning_rate": 1.5658805310143887e-06, "loss": 0.4571, "step": 3366 }, { "epoch": 0.7492211838006231, "grad_norm": 1.7221630461679778, "learning_rate": 1.5632622621627553e-06, "loss": 0.4503, "step": 3367 }, { "epoch": 0.7494437027147307, "grad_norm": 1.5103229572221253, "learning_rate": 1.5606457784515282e-06, "loss": 0.4352, "step": 3368 }, { "epoch": 0.7496662216288384, "grad_norm": 1.5624015586791034, "learning_rate": 1.5580310812397865e-06, "loss": 0.4396, "step": 3369 }, { "epoch": 0.7498887405429462, "grad_norm": 1.695494110843321, "learning_rate": 1.5554181718856737e-06, "loss": 0.4528, "step": 3370 }, { "epoch": 0.7501112594570538, "grad_norm": 1.582295572841094, "learning_rate": 1.5528070517464171e-06, "loss": 0.4522, "step": 3371 }, { "epoch": 0.7503337783711616, "grad_norm": 1.664297133118027, "learning_rate": 1.5501977221783021e-06, "loss": 0.4467, "step": 3372 }, { "epoch": 0.7505562972852693, "grad_norm": 1.607230955804456, "learning_rate": 1.547590184536692e-06, "loss": 0.4358, "step": 3373 }, { "epoch": 0.7507788161993769, "grad_norm": 1.5728525955782278, "learning_rate": 1.5449844401760178e-06, "loss": 0.4436, "step": 3374 }, { "epoch": 0.7510013351134847, "grad_norm": 1.8187844454222597, "learning_rate": 1.5423804904497747e-06, "loss": 0.4447, "step": 3375 }, { "epoch": 0.7512238540275924, "grad_norm": 1.553832944969523, "learning_rate": 1.5397783367105307e-06, "loss": 0.4451, "step": 3376 }, { "epoch": 0.7514463729417, "grad_norm": 1.652074546405469, "learning_rate": 1.53717798030992e-06, "loss": 0.4533, "step": 3377 }, { "epoch": 0.7516688918558078, "grad_norm": 1.733790868658929, "learning_rate": 1.5345794225986433e-06, "loss": 0.456, "step": 3378 }, { "epoch": 0.7518914107699154, "grad_norm": 1.6053722797844674, "learning_rate": 1.5319826649264636e-06, "loss": 0.4369, "step": 3379 }, { "epoch": 0.7521139296840231, "grad_norm": 1.8347345785178224, "learning_rate": 1.5293877086422126e-06, "loss": 0.459, "step": 3380 }, { "epoch": 0.7523364485981309, "grad_norm": 1.592547796455022, "learning_rate": 1.5267945550937869e-06, "loss": 0.4582, "step": 3381 }, { "epoch": 0.7525589675122385, "grad_norm": 1.5574390854543936, "learning_rate": 1.5242032056281419e-06, "loss": 0.4528, "step": 3382 }, { "epoch": 0.7527814864263462, "grad_norm": 1.6677833196931857, "learning_rate": 1.5216136615913006e-06, "loss": 0.4526, "step": 3383 }, { "epoch": 0.753004005340454, "grad_norm": 1.5979660985406985, "learning_rate": 1.5190259243283468e-06, "loss": 0.4521, "step": 3384 }, { "epoch": 0.7532265242545616, "grad_norm": 1.709081780178527, "learning_rate": 1.5164399951834258e-06, "loss": 0.454, "step": 3385 }, { "epoch": 0.7534490431686693, "grad_norm": 1.7242893597449744, "learning_rate": 1.5138558754997445e-06, "loss": 0.4617, "step": 3386 }, { "epoch": 0.7536715620827771, "grad_norm": 1.5779013515550582, "learning_rate": 1.5112735666195666e-06, "loss": 0.4667, "step": 3387 }, { "epoch": 0.7538940809968847, "grad_norm": 1.592720309616067, "learning_rate": 1.5086930698842183e-06, "loss": 0.4436, "step": 3388 }, { "epoch": 0.7541165999109924, "grad_norm": 1.6891372942194498, "learning_rate": 1.5061143866340844e-06, "loss": 0.4464, "step": 3389 }, { "epoch": 0.7543391188251002, "grad_norm": 1.7506461540862785, "learning_rate": 1.503537518208607e-06, "loss": 0.4638, "step": 3390 }, { "epoch": 0.7545616377392078, "grad_norm": 1.6019388137675912, "learning_rate": 1.5009624659462874e-06, "loss": 0.446, "step": 3391 }, { "epoch": 0.7547841566533156, "grad_norm": 1.555567844008143, "learning_rate": 1.498389231184678e-06, "loss": 0.4307, "step": 3392 }, { "epoch": 0.7550066755674232, "grad_norm": 1.6005850377400281, "learning_rate": 1.4958178152603954e-06, "loss": 0.4649, "step": 3393 }, { "epoch": 0.7552291944815309, "grad_norm": 1.6934968504514298, "learning_rate": 1.4932482195091024e-06, "loss": 0.406, "step": 3394 }, { "epoch": 0.7554517133956387, "grad_norm": 1.7068471701559835, "learning_rate": 1.4906804452655216e-06, "loss": 0.4637, "step": 3395 }, { "epoch": 0.7556742323097463, "grad_norm": 1.739841043734045, "learning_rate": 1.4881144938634334e-06, "loss": 0.4444, "step": 3396 }, { "epoch": 0.755896751223854, "grad_norm": 1.6455374348230463, "learning_rate": 1.485550366635662e-06, "loss": 0.4625, "step": 3397 }, { "epoch": 0.7561192701379618, "grad_norm": 1.604210415281447, "learning_rate": 1.482988064914092e-06, "loss": 0.4439, "step": 3398 }, { "epoch": 0.7563417890520694, "grad_norm": 1.6264753894658088, "learning_rate": 1.4804275900296533e-06, "loss": 0.4384, "step": 3399 }, { "epoch": 0.7565643079661771, "grad_norm": 1.6541818360861895, "learning_rate": 1.4778689433123321e-06, "loss": 0.4365, "step": 3400 }, { "epoch": 0.7567868268802849, "grad_norm": 1.7481366374288578, "learning_rate": 1.475312126091163e-06, "loss": 0.4457, "step": 3401 }, { "epoch": 0.7570093457943925, "grad_norm": 1.7226781389677153, "learning_rate": 1.4727571396942303e-06, "loss": 0.4508, "step": 3402 }, { "epoch": 0.7572318647085002, "grad_norm": 1.5629756977360731, "learning_rate": 1.4702039854486683e-06, "loss": 0.4453, "step": 3403 }, { "epoch": 0.757454383622608, "grad_norm": 1.671966747679513, "learning_rate": 1.4676526646806566e-06, "loss": 0.4474, "step": 3404 }, { "epoch": 0.7576769025367156, "grad_norm": 1.7331284056851581, "learning_rate": 1.4651031787154263e-06, "loss": 0.4722, "step": 3405 }, { "epoch": 0.7578994214508233, "grad_norm": 1.726894752308129, "learning_rate": 1.4625555288772543e-06, "loss": 0.4486, "step": 3406 }, { "epoch": 0.758121940364931, "grad_norm": 1.666225930276218, "learning_rate": 1.460009716489459e-06, "loss": 0.4475, "step": 3407 }, { "epoch": 0.7583444592790387, "grad_norm": 1.6640140365635634, "learning_rate": 1.4574657428744144e-06, "loss": 0.4624, "step": 3408 }, { "epoch": 0.7585669781931464, "grad_norm": 1.6615182587985953, "learning_rate": 1.4549236093535296e-06, "loss": 0.4354, "step": 3409 }, { "epoch": 0.7587894971072541, "grad_norm": 1.5432792659200194, "learning_rate": 1.452383317247263e-06, "loss": 0.4452, "step": 3410 }, { "epoch": 0.7590120160213618, "grad_norm": 1.618027098724041, "learning_rate": 1.4498448678751164e-06, "loss": 0.4411, "step": 3411 }, { "epoch": 0.7592345349354696, "grad_norm": 1.6820865333467534, "learning_rate": 1.4473082625556318e-06, "loss": 0.4628, "step": 3412 }, { "epoch": 0.7594570538495772, "grad_norm": 1.6989256259516476, "learning_rate": 1.4447735026063946e-06, "loss": 0.4748, "step": 3413 }, { "epoch": 0.7596795727636849, "grad_norm": 1.5915472180221988, "learning_rate": 1.442240589344034e-06, "loss": 0.4461, "step": 3414 }, { "epoch": 0.7599020916777927, "grad_norm": 1.6106532543518044, "learning_rate": 1.4397095240842162e-06, "loss": 0.4359, "step": 3415 }, { "epoch": 0.7601246105919003, "grad_norm": 1.5978569778407525, "learning_rate": 1.437180308141652e-06, "loss": 0.4576, "step": 3416 }, { "epoch": 0.760347129506008, "grad_norm": 1.5923465693393126, "learning_rate": 1.4346529428300849e-06, "loss": 0.4541, "step": 3417 }, { "epoch": 0.7605696484201158, "grad_norm": 1.5414645434619114, "learning_rate": 1.432127429462305e-06, "loss": 0.4385, "step": 3418 }, { "epoch": 0.7607921673342234, "grad_norm": 1.6345752800232543, "learning_rate": 1.429603769350133e-06, "loss": 0.4347, "step": 3419 }, { "epoch": 0.7610146862483311, "grad_norm": 1.6058631872394518, "learning_rate": 1.4270819638044324e-06, "loss": 0.4659, "step": 3420 }, { "epoch": 0.7612372051624388, "grad_norm": 1.5614409446528779, "learning_rate": 1.4245620141351013e-06, "loss": 0.4396, "step": 3421 }, { "epoch": 0.7614597240765465, "grad_norm": 1.5906573744089756, "learning_rate": 1.4220439216510739e-06, "loss": 0.4643, "step": 3422 }, { "epoch": 0.7616822429906542, "grad_norm": 1.6241698458121616, "learning_rate": 1.4195276876603213e-06, "loss": 0.4441, "step": 3423 }, { "epoch": 0.7619047619047619, "grad_norm": 1.6716341426008725, "learning_rate": 1.4170133134698454e-06, "loss": 0.4593, "step": 3424 }, { "epoch": 0.7621272808188696, "grad_norm": 1.6447029525439305, "learning_rate": 1.4145008003856858e-06, "loss": 0.4371, "step": 3425 }, { "epoch": 0.7623497997329773, "grad_norm": 1.668830856515283, "learning_rate": 1.4119901497129135e-06, "loss": 0.4543, "step": 3426 }, { "epoch": 0.762572318647085, "grad_norm": 1.6548043878832817, "learning_rate": 1.4094813627556325e-06, "loss": 0.4414, "step": 3427 }, { "epoch": 0.7627948375611927, "grad_norm": 1.6966872336706436, "learning_rate": 1.4069744408169816e-06, "loss": 0.4455, "step": 3428 }, { "epoch": 0.7630173564753004, "grad_norm": 1.643489760911595, "learning_rate": 1.4044693851991238e-06, "loss": 0.4435, "step": 3429 }, { "epoch": 0.7632398753894081, "grad_norm": 1.6108605821991917, "learning_rate": 1.4019661972032595e-06, "loss": 0.4392, "step": 3430 }, { "epoch": 0.7634623943035158, "grad_norm": 1.639104575588699, "learning_rate": 1.3994648781296178e-06, "loss": 0.4525, "step": 3431 }, { "epoch": 0.7636849132176236, "grad_norm": 1.6444133270116654, "learning_rate": 1.3969654292774503e-06, "loss": 0.454, "step": 3432 }, { "epoch": 0.7639074321317312, "grad_norm": 1.5269414129003123, "learning_rate": 1.39446785194505e-06, "loss": 0.4317, "step": 3433 }, { "epoch": 0.7641299510458389, "grad_norm": 1.7127989013962075, "learning_rate": 1.3919721474297249e-06, "loss": 0.4521, "step": 3434 }, { "epoch": 0.7643524699599465, "grad_norm": 1.5272729655494572, "learning_rate": 1.389478317027818e-06, "loss": 0.4497, "step": 3435 }, { "epoch": 0.7645749888740543, "grad_norm": 1.5044888383081694, "learning_rate": 1.3869863620346973e-06, "loss": 0.4431, "step": 3436 }, { "epoch": 0.764797507788162, "grad_norm": 1.596344206819707, "learning_rate": 1.3844962837447535e-06, "loss": 0.4327, "step": 3437 }, { "epoch": 0.7650200267022697, "grad_norm": 1.6258977685663571, "learning_rate": 1.382008083451406e-06, "loss": 0.433, "step": 3438 }, { "epoch": 0.7652425456163774, "grad_norm": 1.7789747621791587, "learning_rate": 1.3795217624470975e-06, "loss": 0.4533, "step": 3439 }, { "epoch": 0.7654650645304851, "grad_norm": 1.7841084736372939, "learning_rate": 1.3770373220232957e-06, "loss": 0.4336, "step": 3440 }, { "epoch": 0.7656875834445928, "grad_norm": 1.6012815644668206, "learning_rate": 1.3745547634704887e-06, "loss": 0.4576, "step": 3441 }, { "epoch": 0.7659101023587005, "grad_norm": 1.6667300745701057, "learning_rate": 1.3720740880781892e-06, "loss": 0.4526, "step": 3442 }, { "epoch": 0.7661326212728082, "grad_norm": 1.7600487168718646, "learning_rate": 1.3695952971349336e-06, "loss": 0.4404, "step": 3443 }, { "epoch": 0.7663551401869159, "grad_norm": 1.623061738983551, "learning_rate": 1.3671183919282716e-06, "loss": 0.4486, "step": 3444 }, { "epoch": 0.7665776591010236, "grad_norm": 1.7356339637883036, "learning_rate": 1.3646433737447863e-06, "loss": 0.446, "step": 3445 }, { "epoch": 0.7668001780151313, "grad_norm": 1.6033557762305848, "learning_rate": 1.3621702438700678e-06, "loss": 0.4444, "step": 3446 }, { "epoch": 0.767022696929239, "grad_norm": 1.6680706815039195, "learning_rate": 1.3596990035887332e-06, "loss": 0.4498, "step": 3447 }, { "epoch": 0.7672452158433467, "grad_norm": 1.6601334890275832, "learning_rate": 1.357229654184416e-06, "loss": 0.4421, "step": 3448 }, { "epoch": 0.7674677347574543, "grad_norm": 1.7475792305421527, "learning_rate": 1.3547621969397657e-06, "loss": 0.4336, "step": 3449 }, { "epoch": 0.7676902536715621, "grad_norm": 1.7952762171883065, "learning_rate": 1.3522966331364512e-06, "loss": 0.4509, "step": 3450 }, { "epoch": 0.7679127725856698, "grad_norm": 1.7884725833221995, "learning_rate": 1.3498329640551576e-06, "loss": 0.4337, "step": 3451 }, { "epoch": 0.7681352914997774, "grad_norm": 1.8200484361557507, "learning_rate": 1.3473711909755853e-06, "loss": 0.4503, "step": 3452 }, { "epoch": 0.7683578104138852, "grad_norm": 1.6886663460710054, "learning_rate": 1.344911315176452e-06, "loss": 0.4481, "step": 3453 }, { "epoch": 0.7685803293279929, "grad_norm": 1.6296726935176886, "learning_rate": 1.3424533379354842e-06, "loss": 0.4314, "step": 3454 }, { "epoch": 0.7688028482421005, "grad_norm": 1.6861582330474698, "learning_rate": 1.3399972605294277e-06, "loss": 0.4383, "step": 3455 }, { "epoch": 0.7690253671562083, "grad_norm": 1.7121421688392973, "learning_rate": 1.3375430842340415e-06, "loss": 0.4459, "step": 3456 }, { "epoch": 0.769247886070316, "grad_norm": 1.871467457829082, "learning_rate": 1.3350908103240905e-06, "loss": 0.4519, "step": 3457 }, { "epoch": 0.7694704049844237, "grad_norm": 1.7034096610703788, "learning_rate": 1.3326404400733623e-06, "loss": 0.4386, "step": 3458 }, { "epoch": 0.7696929238985314, "grad_norm": 1.7544611705329127, "learning_rate": 1.3301919747546455e-06, "loss": 0.4552, "step": 3459 }, { "epoch": 0.7699154428126391, "grad_norm": 1.582625068673052, "learning_rate": 1.3277454156397457e-06, "loss": 0.4485, "step": 3460 }, { "epoch": 0.7701379617267468, "grad_norm": 1.5374358768782677, "learning_rate": 1.3253007639994743e-06, "loss": 0.424, "step": 3461 }, { "epoch": 0.7703604806408545, "grad_norm": 1.568276441519735, "learning_rate": 1.3228580211036541e-06, "loss": 0.4431, "step": 3462 }, { "epoch": 0.7705829995549621, "grad_norm": 1.636780578174139, "learning_rate": 1.3204171882211158e-06, "loss": 0.4532, "step": 3463 }, { "epoch": 0.7708055184690699, "grad_norm": 1.6575101690926108, "learning_rate": 1.3179782666196993e-06, "loss": 0.4456, "step": 3464 }, { "epoch": 0.7710280373831776, "grad_norm": 1.6530511311171199, "learning_rate": 1.3155412575662513e-06, "loss": 0.4643, "step": 3465 }, { "epoch": 0.7712505562972852, "grad_norm": 1.9440454454162304, "learning_rate": 1.3131061623266217e-06, "loss": 0.4774, "step": 3466 }, { "epoch": 0.771473075211393, "grad_norm": 1.6077806631896707, "learning_rate": 1.3106729821656706e-06, "loss": 0.4395, "step": 3467 }, { "epoch": 0.7716955941255007, "grad_norm": 1.6745108663699413, "learning_rate": 1.3082417183472623e-06, "loss": 0.4558, "step": 3468 }, { "epoch": 0.7719181130396083, "grad_norm": 1.5888585310307608, "learning_rate": 1.305812372134262e-06, "loss": 0.4385, "step": 3469 }, { "epoch": 0.7721406319537161, "grad_norm": 1.6761812924257815, "learning_rate": 1.3033849447885471e-06, "loss": 0.4441, "step": 3470 }, { "epoch": 0.7723631508678238, "grad_norm": 1.6635089187882335, "learning_rate": 1.3009594375709888e-06, "loss": 0.449, "step": 3471 }, { "epoch": 0.7725856697819314, "grad_norm": 1.7369557631222807, "learning_rate": 1.298535851741467e-06, "loss": 0.4532, "step": 3472 }, { "epoch": 0.7728081886960392, "grad_norm": 1.661931261626694, "learning_rate": 1.2961141885588634e-06, "loss": 0.4295, "step": 3473 }, { "epoch": 0.7730307076101469, "grad_norm": 1.6547615124571307, "learning_rate": 1.293694449281056e-06, "loss": 0.4288, "step": 3474 }, { "epoch": 0.7732532265242545, "grad_norm": 1.6537853771727002, "learning_rate": 1.2912766351649293e-06, "loss": 0.4582, "step": 3475 }, { "epoch": 0.7734757454383623, "grad_norm": 1.9547911752695017, "learning_rate": 1.288860747466365e-06, "loss": 0.4289, "step": 3476 }, { "epoch": 0.7736982643524699, "grad_norm": 1.7166400481150306, "learning_rate": 1.2864467874402442e-06, "loss": 0.4619, "step": 3477 }, { "epoch": 0.7739207832665776, "grad_norm": 1.6922770822367226, "learning_rate": 1.2840347563404492e-06, "loss": 0.4496, "step": 3478 }, { "epoch": 0.7741433021806854, "grad_norm": 1.6716294065330957, "learning_rate": 1.2816246554198557e-06, "loss": 0.4352, "step": 3479 }, { "epoch": 0.774365821094793, "grad_norm": 1.6711155992655176, "learning_rate": 1.2792164859303413e-06, "loss": 0.4382, "step": 3480 }, { "epoch": 0.7745883400089008, "grad_norm": 1.706030260514817, "learning_rate": 1.2768102491227751e-06, "loss": 0.4378, "step": 3481 }, { "epoch": 0.7748108589230085, "grad_norm": 1.7228717471151462, "learning_rate": 1.274405946247031e-06, "loss": 0.4254, "step": 3482 }, { "epoch": 0.7750333778371161, "grad_norm": 1.7081426476763775, "learning_rate": 1.2720035785519685e-06, "loss": 0.4477, "step": 3483 }, { "epoch": 0.7752558967512239, "grad_norm": 1.6111972475306529, "learning_rate": 1.2696031472854486e-06, "loss": 0.4408, "step": 3484 }, { "epoch": 0.7754784156653316, "grad_norm": 1.7164199882144442, "learning_rate": 1.2672046536943256e-06, "loss": 0.443, "step": 3485 }, { "epoch": 0.7757009345794392, "grad_norm": 1.5856791899847646, "learning_rate": 1.2648080990244426e-06, "loss": 0.4568, "step": 3486 }, { "epoch": 0.775923453493547, "grad_norm": 1.601970202442271, "learning_rate": 1.2624134845206408e-06, "loss": 0.4455, "step": 3487 }, { "epoch": 0.7761459724076547, "grad_norm": 1.7646093525660005, "learning_rate": 1.260020811426752e-06, "loss": 0.4387, "step": 3488 }, { "epoch": 0.7763684913217623, "grad_norm": 1.7752022698242338, "learning_rate": 1.257630080985599e-06, "loss": 0.4451, "step": 3489 }, { "epoch": 0.7765910102358701, "grad_norm": 1.5735227032386636, "learning_rate": 1.2552412944389974e-06, "loss": 0.4526, "step": 3490 }, { "epoch": 0.7768135291499777, "grad_norm": 1.616397203853421, "learning_rate": 1.2528544530277492e-06, "loss": 0.4502, "step": 3491 }, { "epoch": 0.7770360480640854, "grad_norm": 1.5747684763801075, "learning_rate": 1.2504695579916498e-06, "loss": 0.4499, "step": 3492 }, { "epoch": 0.7772585669781932, "grad_norm": 1.6178872754108637, "learning_rate": 1.2480866105694838e-06, "loss": 0.4414, "step": 3493 }, { "epoch": 0.7774810858923008, "grad_norm": 1.6518884001131995, "learning_rate": 1.2457056119990175e-06, "loss": 0.4387, "step": 3494 }, { "epoch": 0.7777036048064085, "grad_norm": 1.5895496045345354, "learning_rate": 1.2433265635170166e-06, "loss": 0.4429, "step": 3495 }, { "epoch": 0.7779261237205163, "grad_norm": 1.6347913602458624, "learning_rate": 1.240949466359223e-06, "loss": 0.4454, "step": 3496 }, { "epoch": 0.7781486426346239, "grad_norm": 1.8680429295132697, "learning_rate": 1.2385743217603703e-06, "loss": 0.448, "step": 3497 }, { "epoch": 0.7783711615487316, "grad_norm": 1.6022094629180677, "learning_rate": 1.2362011309541784e-06, "loss": 0.4278, "step": 3498 }, { "epoch": 0.7785936804628394, "grad_norm": 1.6627120539508546, "learning_rate": 1.233829895173348e-06, "loss": 0.4607, "step": 3499 }, { "epoch": 0.778816199376947, "grad_norm": 1.745583086302977, "learning_rate": 1.2314606156495683e-06, "loss": 0.4567, "step": 3500 }, { "epoch": 0.7790387182910548, "grad_norm": 1.6029792841190906, "learning_rate": 1.2290932936135125e-06, "loss": 0.4225, "step": 3501 }, { "epoch": 0.7792612372051625, "grad_norm": 1.640373357552993, "learning_rate": 1.226727930294836e-06, "loss": 0.4285, "step": 3502 }, { "epoch": 0.7794837561192701, "grad_norm": 1.629450865484103, "learning_rate": 1.2243645269221732e-06, "loss": 0.4489, "step": 3503 }, { "epoch": 0.7797062750333779, "grad_norm": 1.63529838362506, "learning_rate": 1.2220030847231468e-06, "loss": 0.4582, "step": 3504 }, { "epoch": 0.7799287939474855, "grad_norm": 1.630892200256831, "learning_rate": 1.219643604924358e-06, "loss": 0.4374, "step": 3505 }, { "epoch": 0.7801513128615932, "grad_norm": 1.6514671302226802, "learning_rate": 1.2172860887513844e-06, "loss": 0.4496, "step": 3506 }, { "epoch": 0.780373831775701, "grad_norm": 1.6229149267341418, "learning_rate": 1.2149305374287934e-06, "loss": 0.4418, "step": 3507 }, { "epoch": 0.7805963506898086, "grad_norm": 1.5743375336604648, "learning_rate": 1.2125769521801223e-06, "loss": 0.4537, "step": 3508 }, { "epoch": 0.7808188696039163, "grad_norm": 1.6186860312581888, "learning_rate": 1.210225334227892e-06, "loss": 0.4358, "step": 3509 }, { "epoch": 0.7810413885180241, "grad_norm": 1.5936044190085337, "learning_rate": 1.207875684793602e-06, "loss": 0.4327, "step": 3510 }, { "epoch": 0.7812639074321317, "grad_norm": 1.6662841923822636, "learning_rate": 1.205528005097724e-06, "loss": 0.4516, "step": 3511 }, { "epoch": 0.7814864263462394, "grad_norm": 1.7472501224646828, "learning_rate": 1.2031822963597134e-06, "loss": 0.4287, "step": 3512 }, { "epoch": 0.7817089452603472, "grad_norm": 1.654854859468969, "learning_rate": 1.2008385597979982e-06, "loss": 0.4574, "step": 3513 }, { "epoch": 0.7819314641744548, "grad_norm": 1.7289589255919777, "learning_rate": 1.198496796629982e-06, "loss": 0.4554, "step": 3514 }, { "epoch": 0.7821539830885625, "grad_norm": 1.7273978849945866, "learning_rate": 1.1961570080720459e-06, "loss": 0.4409, "step": 3515 }, { "epoch": 0.7823765020026703, "grad_norm": 1.694101378533968, "learning_rate": 1.1938191953395401e-06, "loss": 0.4311, "step": 3516 }, { "epoch": 0.7825990209167779, "grad_norm": 1.6577605275076244, "learning_rate": 1.191483359646793e-06, "loss": 0.4478, "step": 3517 }, { "epoch": 0.7828215398308856, "grad_norm": 1.6864417961656062, "learning_rate": 1.1891495022071059e-06, "loss": 0.4607, "step": 3518 }, { "epoch": 0.7830440587449933, "grad_norm": 1.6068359574111364, "learning_rate": 1.1868176242327507e-06, "loss": 0.4531, "step": 3519 }, { "epoch": 0.783266577659101, "grad_norm": 1.6250562786468619, "learning_rate": 1.184487726934973e-06, "loss": 0.4464, "step": 3520 }, { "epoch": 0.7834890965732088, "grad_norm": 1.5776012384575624, "learning_rate": 1.1821598115239863e-06, "loss": 0.424, "step": 3521 }, { "epoch": 0.7837116154873164, "grad_norm": 1.6415629811075365, "learning_rate": 1.1798338792089792e-06, "loss": 0.4358, "step": 3522 }, { "epoch": 0.7839341344014241, "grad_norm": 1.8349592079859678, "learning_rate": 1.1775099311981052e-06, "loss": 0.4379, "step": 3523 }, { "epoch": 0.7841566533155319, "grad_norm": 1.7927746914926994, "learning_rate": 1.1751879686984896e-06, "loss": 0.4545, "step": 3524 }, { "epoch": 0.7843791722296395, "grad_norm": 1.6057316010390177, "learning_rate": 1.1728679929162313e-06, "loss": 0.4368, "step": 3525 }, { "epoch": 0.7846016911437472, "grad_norm": 1.6518889681115672, "learning_rate": 1.1705500050563873e-06, "loss": 0.4215, "step": 3526 }, { "epoch": 0.784824210057855, "grad_norm": 1.7262044161311219, "learning_rate": 1.1682340063229902e-06, "loss": 0.4345, "step": 3527 }, { "epoch": 0.7850467289719626, "grad_norm": 1.6208804950249807, "learning_rate": 1.1659199979190339e-06, "loss": 0.4486, "step": 3528 }, { "epoch": 0.7852692478860703, "grad_norm": 1.5997428048208437, "learning_rate": 1.1636079810464818e-06, "loss": 0.4527, "step": 3529 }, { "epoch": 0.7854917668001781, "grad_norm": 1.6359465865102254, "learning_rate": 1.1612979569062638e-06, "loss": 0.4346, "step": 3530 }, { "epoch": 0.7857142857142857, "grad_norm": 1.6453902895049186, "learning_rate": 1.1589899266982691e-06, "loss": 0.4516, "step": 3531 }, { "epoch": 0.7859368046283934, "grad_norm": 1.6353806151565315, "learning_rate": 1.156683891621359e-06, "loss": 0.427, "step": 3532 }, { "epoch": 0.7861593235425011, "grad_norm": 1.8308664170498758, "learning_rate": 1.1543798528733518e-06, "loss": 0.449, "step": 3533 }, { "epoch": 0.7863818424566088, "grad_norm": 1.6735567425399343, "learning_rate": 1.1520778116510323e-06, "loss": 0.4454, "step": 3534 }, { "epoch": 0.7866043613707165, "grad_norm": 1.6326255669134282, "learning_rate": 1.1497777691501484e-06, "loss": 0.4355, "step": 3535 }, { "epoch": 0.7868268802848242, "grad_norm": 1.70194614080876, "learning_rate": 1.1474797265654048e-06, "loss": 0.4287, "step": 3536 }, { "epoch": 0.7870493991989319, "grad_norm": 1.5260439413635944, "learning_rate": 1.1451836850904736e-06, "loss": 0.424, "step": 3537 }, { "epoch": 0.7872719181130396, "grad_norm": 1.7059960188837684, "learning_rate": 1.1428896459179833e-06, "loss": 0.4387, "step": 3538 }, { "epoch": 0.7874944370271473, "grad_norm": 1.571641376676792, "learning_rate": 1.140597610239525e-06, "loss": 0.4456, "step": 3539 }, { "epoch": 0.787716955941255, "grad_norm": 1.8275540154970225, "learning_rate": 1.1383075792456493e-06, "loss": 0.4526, "step": 3540 }, { "epoch": 0.7879394748553628, "grad_norm": 1.6412712546735284, "learning_rate": 1.1360195541258606e-06, "loss": 0.4431, "step": 3541 }, { "epoch": 0.7881619937694704, "grad_norm": 1.656669192563466, "learning_rate": 1.133733536068628e-06, "loss": 0.4295, "step": 3542 }, { "epoch": 0.7883845126835781, "grad_norm": 1.7398961518423237, "learning_rate": 1.1314495262613712e-06, "loss": 0.4418, "step": 3543 }, { "epoch": 0.7886070315976857, "grad_norm": 1.621681101462389, "learning_rate": 1.1291675258904755e-06, "loss": 0.4368, "step": 3544 }, { "epoch": 0.7888295505117935, "grad_norm": 1.7240465543911059, "learning_rate": 1.126887536141274e-06, "loss": 0.4439, "step": 3545 }, { "epoch": 0.7890520694259012, "grad_norm": 1.6739568028447367, "learning_rate": 1.1246095581980604e-06, "loss": 0.4068, "step": 3546 }, { "epoch": 0.7892745883400089, "grad_norm": 1.658983660931699, "learning_rate": 1.1223335932440827e-06, "loss": 0.4275, "step": 3547 }, { "epoch": 0.7894971072541166, "grad_norm": 1.6417551745360455, "learning_rate": 1.1200596424615396e-06, "loss": 0.4524, "step": 3548 }, { "epoch": 0.7897196261682243, "grad_norm": 1.6168625209019656, "learning_rate": 1.117787707031589e-06, "loss": 0.4393, "step": 3549 }, { "epoch": 0.789942145082332, "grad_norm": 1.5783414605035615, "learning_rate": 1.1155177881343383e-06, "loss": 0.4148, "step": 3550 }, { "epoch": 0.7901646639964397, "grad_norm": 1.5793690864328185, "learning_rate": 1.1132498869488496e-06, "loss": 0.4303, "step": 3551 }, { "epoch": 0.7903871829105474, "grad_norm": 1.6271383802700852, "learning_rate": 1.1109840046531368e-06, "loss": 0.4324, "step": 3552 }, { "epoch": 0.7906097018246551, "grad_norm": 1.6624308252708986, "learning_rate": 1.1087201424241622e-06, "loss": 0.4371, "step": 3553 }, { "epoch": 0.7908322207387628, "grad_norm": 1.5502546036811549, "learning_rate": 1.1064583014378417e-06, "loss": 0.4513, "step": 3554 }, { "epoch": 0.7910547396528705, "grad_norm": 1.6144259765844402, "learning_rate": 1.1041984828690399e-06, "loss": 0.4262, "step": 3555 }, { "epoch": 0.7912772585669782, "grad_norm": 1.5718060867053476, "learning_rate": 1.1019406878915734e-06, "loss": 0.4511, "step": 3556 }, { "epoch": 0.7914997774810859, "grad_norm": 1.681897300403964, "learning_rate": 1.0996849176782054e-06, "loss": 0.447, "step": 3557 }, { "epoch": 0.7917222963951935, "grad_norm": 1.6202330689660513, "learning_rate": 1.0974311734006466e-06, "loss": 0.4274, "step": 3558 }, { "epoch": 0.7919448153093013, "grad_norm": 1.7412143067533115, "learning_rate": 1.0951794562295564e-06, "loss": 0.4345, "step": 3559 }, { "epoch": 0.792167334223409, "grad_norm": 1.6878620711119139, "learning_rate": 1.092929767334544e-06, "loss": 0.4494, "step": 3560 }, { "epoch": 0.7923898531375166, "grad_norm": 1.6695730780652096, "learning_rate": 1.0906821078841584e-06, "loss": 0.458, "step": 3561 }, { "epoch": 0.7926123720516244, "grad_norm": 1.7203135810757881, "learning_rate": 1.088436479045903e-06, "loss": 0.4553, "step": 3562 }, { "epoch": 0.7928348909657321, "grad_norm": 1.7777267758188087, "learning_rate": 1.0861928819862189e-06, "loss": 0.4515, "step": 3563 }, { "epoch": 0.7930574098798397, "grad_norm": 1.6245328782229798, "learning_rate": 1.0839513178704968e-06, "loss": 0.449, "step": 3564 }, { "epoch": 0.7932799287939475, "grad_norm": 1.5872754633779227, "learning_rate": 1.081711787863068e-06, "loss": 0.4385, "step": 3565 }, { "epoch": 0.7935024477080552, "grad_norm": 1.693387074563148, "learning_rate": 1.079474293127209e-06, "loss": 0.4498, "step": 3566 }, { "epoch": 0.7937249666221629, "grad_norm": 1.7011473172713412, "learning_rate": 1.077238834825141e-06, "loss": 0.4503, "step": 3567 }, { "epoch": 0.7939474855362706, "grad_norm": 1.6792778760547242, "learning_rate": 1.0750054141180212e-06, "loss": 0.4595, "step": 3568 }, { "epoch": 0.7941700044503783, "grad_norm": 1.5773118195080642, "learning_rate": 1.0727740321659568e-06, "loss": 0.4418, "step": 3569 }, { "epoch": 0.794392523364486, "grad_norm": 1.5878761088313937, "learning_rate": 1.0705446901279897e-06, "loss": 0.4512, "step": 3570 }, { "epoch": 0.7946150422785937, "grad_norm": 1.5580420083878015, "learning_rate": 1.068317389162104e-06, "loss": 0.4511, "step": 3571 }, { "epoch": 0.7948375611927013, "grad_norm": 1.6615396177788255, "learning_rate": 1.0660921304252259e-06, "loss": 0.4314, "step": 3572 }, { "epoch": 0.7950600801068091, "grad_norm": 1.592488997833743, "learning_rate": 1.0638689150732157e-06, "loss": 0.4363, "step": 3573 }, { "epoch": 0.7952825990209168, "grad_norm": 1.625766985356179, "learning_rate": 1.0616477442608774e-06, "loss": 0.4507, "step": 3574 }, { "epoch": 0.7955051179350244, "grad_norm": 1.7121941817163933, "learning_rate": 1.0594286191419501e-06, "loss": 0.4494, "step": 3575 }, { "epoch": 0.7957276368491322, "grad_norm": 1.6488985040136026, "learning_rate": 1.0572115408691119e-06, "loss": 0.4534, "step": 3576 }, { "epoch": 0.7959501557632399, "grad_norm": 1.6544291449311452, "learning_rate": 1.054996510593978e-06, "loss": 0.4427, "step": 3577 }, { "epoch": 0.7961726746773475, "grad_norm": 1.6117253015484352, "learning_rate": 1.052783529467097e-06, "loss": 0.4323, "step": 3578 }, { "epoch": 0.7963951935914553, "grad_norm": 1.7922708057793448, "learning_rate": 1.0505725986379544e-06, "loss": 0.451, "step": 3579 }, { "epoch": 0.796617712505563, "grad_norm": 1.5959493658316166, "learning_rate": 1.0483637192549728e-06, "loss": 0.4467, "step": 3580 }, { "epoch": 0.7968402314196706, "grad_norm": 1.6538753578667007, "learning_rate": 1.0461568924655074e-06, "loss": 0.4396, "step": 3581 }, { "epoch": 0.7970627503337784, "grad_norm": 1.8753900577823728, "learning_rate": 1.0439521194158486e-06, "loss": 0.4443, "step": 3582 }, { "epoch": 0.7972852692478861, "grad_norm": 1.6282946922070727, "learning_rate": 1.0417494012512163e-06, "loss": 0.44, "step": 3583 }, { "epoch": 0.7975077881619937, "grad_norm": 1.7155199796719047, "learning_rate": 1.0395487391157683e-06, "loss": 0.4397, "step": 3584 }, { "epoch": 0.7977303070761015, "grad_norm": 1.6724692732634157, "learning_rate": 1.0373501341525894e-06, "loss": 0.4435, "step": 3585 }, { "epoch": 0.7979528259902091, "grad_norm": 1.743992553327656, "learning_rate": 1.0351535875036978e-06, "loss": 0.4392, "step": 3586 }, { "epoch": 0.7981753449043169, "grad_norm": 1.819958300724329, "learning_rate": 1.0329591003100475e-06, "loss": 0.4496, "step": 3587 }, { "epoch": 0.7983978638184246, "grad_norm": 1.5938996601333582, "learning_rate": 1.0307666737115135e-06, "loss": 0.4367, "step": 3588 }, { "epoch": 0.7986203827325322, "grad_norm": 1.5469719162310263, "learning_rate": 1.0285763088469087e-06, "loss": 0.42, "step": 3589 }, { "epoch": 0.79884290164664, "grad_norm": 1.6113723245051528, "learning_rate": 1.0263880068539684e-06, "loss": 0.4254, "step": 3590 }, { "epoch": 0.7990654205607477, "grad_norm": 1.6177419849494086, "learning_rate": 1.024201768869361e-06, "loss": 0.4413, "step": 3591 }, { "epoch": 0.7992879394748553, "grad_norm": 1.5583896995952757, "learning_rate": 1.022017596028682e-06, "loss": 0.4372, "step": 3592 }, { "epoch": 0.7995104583889631, "grad_norm": 1.5495361086045505, "learning_rate": 1.0198354894664524e-06, "loss": 0.4383, "step": 3593 }, { "epoch": 0.7997329773030708, "grad_norm": 1.6070069365409063, "learning_rate": 1.0176554503161235e-06, "loss": 0.4661, "step": 3594 }, { "epoch": 0.7999554962171784, "grad_norm": 1.605989700026316, "learning_rate": 1.0154774797100669e-06, "loss": 0.4554, "step": 3595 }, { "epoch": 0.8001780151312862, "grad_norm": 1.6824838400071265, "learning_rate": 1.0133015787795853e-06, "loss": 0.435, "step": 3596 }, { "epoch": 0.8004005340453939, "grad_norm": 1.6565983968689009, "learning_rate": 1.011127748654905e-06, "loss": 0.449, "step": 3597 }, { "epoch": 0.8006230529595015, "grad_norm": 1.5920365354539876, "learning_rate": 1.0089559904651712e-06, "loss": 0.4487, "step": 3598 }, { "epoch": 0.8008455718736093, "grad_norm": 1.800096190046989, "learning_rate": 1.0067863053384646e-06, "loss": 0.4246, "step": 3599 }, { "epoch": 0.8010680907877169, "grad_norm": 1.5766186416155996, "learning_rate": 1.0046186944017767e-06, "loss": 0.4373, "step": 3600 }, { "epoch": 0.8012906097018246, "grad_norm": 1.6542886028161319, "learning_rate": 1.0024531587810282e-06, "loss": 0.4357, "step": 3601 }, { "epoch": 0.8015131286159324, "grad_norm": 1.7504917482832238, "learning_rate": 1.000289699601063e-06, "loss": 0.4432, "step": 3602 }, { "epoch": 0.80173564753004, "grad_norm": 1.6830191536380004, "learning_rate": 9.981283179856405e-07, "loss": 0.453, "step": 3603 }, { "epoch": 0.8019581664441477, "grad_norm": 1.6391138780750838, "learning_rate": 9.959690150574475e-07, "loss": 0.4379, "step": 3604 }, { "epoch": 0.8021806853582555, "grad_norm": 1.6808495995636696, "learning_rate": 9.938117919380835e-07, "loss": 0.4481, "step": 3605 }, { "epoch": 0.8024032042723631, "grad_norm": 1.6733729372096198, "learning_rate": 9.916566497480785e-07, "loss": 0.4371, "step": 3606 }, { "epoch": 0.8026257231864709, "grad_norm": 1.6864412796141925, "learning_rate": 9.895035896068705e-07, "loss": 0.4568, "step": 3607 }, { "epoch": 0.8028482421005786, "grad_norm": 1.6676815348428793, "learning_rate": 9.873526126328227e-07, "loss": 0.4569, "step": 3608 }, { "epoch": 0.8030707610146862, "grad_norm": 1.6571596408417633, "learning_rate": 9.852037199432145e-07, "loss": 0.4393, "step": 3609 }, { "epoch": 0.803293279928794, "grad_norm": 1.8124544225307666, "learning_rate": 9.830569126542416e-07, "loss": 0.4551, "step": 3610 }, { "epoch": 0.8035157988429017, "grad_norm": 1.6460847590719203, "learning_rate": 9.809121918810183e-07, "loss": 0.4465, "step": 3611 }, { "epoch": 0.8037383177570093, "grad_norm": 1.7136442432967771, "learning_rate": 9.787695587375734e-07, "loss": 0.4492, "step": 3612 }, { "epoch": 0.8039608366711171, "grad_norm": 1.608780864640819, "learning_rate": 9.766290143368535e-07, "loss": 0.4287, "step": 3613 }, { "epoch": 0.8041833555852247, "grad_norm": 1.6237975154337008, "learning_rate": 9.74490559790719e-07, "loss": 0.4257, "step": 3614 }, { "epoch": 0.8044058744993324, "grad_norm": 1.6992921959774585, "learning_rate": 9.72354196209942e-07, "loss": 0.4397, "step": 3615 }, { "epoch": 0.8046283934134402, "grad_norm": 1.6790540710354362, "learning_rate": 9.702199247042138e-07, "loss": 0.4316, "step": 3616 }, { "epoch": 0.8048509123275478, "grad_norm": 1.7358879997734427, "learning_rate": 9.680877463821352e-07, "loss": 0.4541, "step": 3617 }, { "epoch": 0.8050734312416555, "grad_norm": 1.7914670826743933, "learning_rate": 9.659576623512219e-07, "loss": 0.4401, "step": 3618 }, { "epoch": 0.8052959501557633, "grad_norm": 1.6176046485418727, "learning_rate": 9.63829673717901e-07, "loss": 0.4392, "step": 3619 }, { "epoch": 0.8055184690698709, "grad_norm": 1.7120640904371283, "learning_rate": 9.617037815875085e-07, "loss": 0.4333, "step": 3620 }, { "epoch": 0.8057409879839786, "grad_norm": 1.6051045928229666, "learning_rate": 9.595799870642964e-07, "loss": 0.4517, "step": 3621 }, { "epoch": 0.8059635068980864, "grad_norm": 1.742347381211863, "learning_rate": 9.574582912514252e-07, "loss": 0.4259, "step": 3622 }, { "epoch": 0.806186025812194, "grad_norm": 1.7233475865323986, "learning_rate": 9.553386952509603e-07, "loss": 0.4385, "step": 3623 }, { "epoch": 0.8064085447263017, "grad_norm": 1.580653457470349, "learning_rate": 9.532212001638869e-07, "loss": 0.4414, "step": 3624 }, { "epoch": 0.8066310636404095, "grad_norm": 1.650703097736885, "learning_rate": 9.511058070900886e-07, "loss": 0.428, "step": 3625 }, { "epoch": 0.8068535825545171, "grad_norm": 1.5549698965007392, "learning_rate": 9.489925171283637e-07, "loss": 0.4356, "step": 3626 }, { "epoch": 0.8070761014686249, "grad_norm": 1.7477530576129503, "learning_rate": 9.46881331376413e-07, "loss": 0.4318, "step": 3627 }, { "epoch": 0.8072986203827325, "grad_norm": 1.698602356674505, "learning_rate": 9.447722509308494e-07, "loss": 0.4267, "step": 3628 }, { "epoch": 0.8075211392968402, "grad_norm": 1.7630578623203643, "learning_rate": 9.426652768871891e-07, "loss": 0.4402, "step": 3629 }, { "epoch": 0.807743658210948, "grad_norm": 1.7313795262336034, "learning_rate": 9.405604103398552e-07, "loss": 0.436, "step": 3630 }, { "epoch": 0.8079661771250556, "grad_norm": 1.6610452679530046, "learning_rate": 9.384576523821776e-07, "loss": 0.4423, "step": 3631 }, { "epoch": 0.8081886960391633, "grad_norm": 1.7616009085213011, "learning_rate": 9.363570041063863e-07, "loss": 0.4505, "step": 3632 }, { "epoch": 0.8084112149532711, "grad_norm": 1.6381713463930376, "learning_rate": 9.342584666036192e-07, "loss": 0.4471, "step": 3633 }, { "epoch": 0.8086337338673787, "grad_norm": 1.6270030655177536, "learning_rate": 9.321620409639193e-07, "loss": 0.4333, "step": 3634 }, { "epoch": 0.8088562527814864, "grad_norm": 1.6210614881429894, "learning_rate": 9.300677282762261e-07, "loss": 0.4399, "step": 3635 }, { "epoch": 0.8090787716955942, "grad_norm": 1.7407442072695347, "learning_rate": 9.279755296283905e-07, "loss": 0.419, "step": 3636 }, { "epoch": 0.8093012906097018, "grad_norm": 1.711018757663765, "learning_rate": 9.25885446107157e-07, "loss": 0.4257, "step": 3637 }, { "epoch": 0.8095238095238095, "grad_norm": 1.6334051149131281, "learning_rate": 9.237974787981774e-07, "loss": 0.4406, "step": 3638 }, { "epoch": 0.8097463284379173, "grad_norm": 1.6557033971768385, "learning_rate": 9.217116287860017e-07, "loss": 0.4427, "step": 3639 }, { "epoch": 0.8099688473520249, "grad_norm": 1.650498228271878, "learning_rate": 9.196278971540789e-07, "loss": 0.4346, "step": 3640 }, { "epoch": 0.8101913662661326, "grad_norm": 1.611992276529439, "learning_rate": 9.175462849847594e-07, "loss": 0.4287, "step": 3641 }, { "epoch": 0.8104138851802403, "grad_norm": 1.8139138165270818, "learning_rate": 9.154667933592937e-07, "loss": 0.4564, "step": 3642 }, { "epoch": 0.810636404094348, "grad_norm": 1.6559978146419796, "learning_rate": 9.133894233578288e-07, "loss": 0.4368, "step": 3643 }, { "epoch": 0.8108589230084557, "grad_norm": 1.765791243340186, "learning_rate": 9.113141760594119e-07, "loss": 0.4381, "step": 3644 }, { "epoch": 0.8110814419225634, "grad_norm": 1.591423234608494, "learning_rate": 9.092410525419831e-07, "loss": 0.4133, "step": 3645 }, { "epoch": 0.8113039608366711, "grad_norm": 1.632130639910745, "learning_rate": 9.071700538823852e-07, "loss": 0.4359, "step": 3646 }, { "epoch": 0.8115264797507789, "grad_norm": 1.727623126372359, "learning_rate": 9.051011811563521e-07, "loss": 0.4489, "step": 3647 }, { "epoch": 0.8117489986648865, "grad_norm": 1.690882107863402, "learning_rate": 9.030344354385157e-07, "loss": 0.4322, "step": 3648 }, { "epoch": 0.8119715175789942, "grad_norm": 1.5967156717785125, "learning_rate": 9.009698178024074e-07, "loss": 0.4374, "step": 3649 }, { "epoch": 0.812194036493102, "grad_norm": 1.6506691855484612, "learning_rate": 8.989073293204442e-07, "loss": 0.4406, "step": 3650 }, { "epoch": 0.8124165554072096, "grad_norm": 1.6051887431910383, "learning_rate": 8.968469710639449e-07, "loss": 0.4342, "step": 3651 }, { "epoch": 0.8126390743213173, "grad_norm": 1.7364118271840283, "learning_rate": 8.947887441031167e-07, "loss": 0.4403, "step": 3652 }, { "epoch": 0.8128615932354251, "grad_norm": 1.6953708894438024, "learning_rate": 8.927326495070626e-07, "loss": 0.4435, "step": 3653 }, { "epoch": 0.8130841121495327, "grad_norm": 1.6491409049329686, "learning_rate": 8.906786883437773e-07, "loss": 0.4395, "step": 3654 }, { "epoch": 0.8133066310636404, "grad_norm": 1.68792420325808, "learning_rate": 8.886268616801474e-07, "loss": 0.4527, "step": 3655 }, { "epoch": 0.8135291499777481, "grad_norm": 1.7705571076782194, "learning_rate": 8.865771705819521e-07, "loss": 0.4556, "step": 3656 }, { "epoch": 0.8137516688918558, "grad_norm": 1.625744737971236, "learning_rate": 8.845296161138572e-07, "loss": 0.4418, "step": 3657 }, { "epoch": 0.8139741878059635, "grad_norm": 1.6137012685499275, "learning_rate": 8.824841993394228e-07, "loss": 0.4408, "step": 3658 }, { "epoch": 0.8141967067200712, "grad_norm": 1.7503752993750379, "learning_rate": 8.804409213210985e-07, "loss": 0.4342, "step": 3659 }, { "epoch": 0.8144192256341789, "grad_norm": 1.5513398709849735, "learning_rate": 8.783997831202174e-07, "loss": 0.4288, "step": 3660 }, { "epoch": 0.8146417445482866, "grad_norm": 1.6805323679017223, "learning_rate": 8.763607857970108e-07, "loss": 0.4436, "step": 3661 }, { "epoch": 0.8148642634623943, "grad_norm": 1.634779883752318, "learning_rate": 8.743239304105889e-07, "loss": 0.4387, "step": 3662 }, { "epoch": 0.815086782376502, "grad_norm": 1.620278465310523, "learning_rate": 8.722892180189535e-07, "loss": 0.4301, "step": 3663 }, { "epoch": 0.8153093012906097, "grad_norm": 1.6943893383933144, "learning_rate": 8.702566496789943e-07, "loss": 0.432, "step": 3664 }, { "epoch": 0.8155318202047174, "grad_norm": 1.6604983336424604, "learning_rate": 8.682262264464842e-07, "loss": 0.4408, "step": 3665 }, { "epoch": 0.8157543391188251, "grad_norm": 1.8722760881666054, "learning_rate": 8.661979493760836e-07, "loss": 0.4263, "step": 3666 }, { "epoch": 0.8159768580329329, "grad_norm": 1.6602553908031767, "learning_rate": 8.641718195213377e-07, "loss": 0.4386, "step": 3667 }, { "epoch": 0.8161993769470405, "grad_norm": 1.6339740781916121, "learning_rate": 8.621478379346782e-07, "loss": 0.4537, "step": 3668 }, { "epoch": 0.8164218958611482, "grad_norm": 1.6641490701169523, "learning_rate": 8.601260056674188e-07, "loss": 0.4518, "step": 3669 }, { "epoch": 0.8166444147752558, "grad_norm": 1.6796297473700326, "learning_rate": 8.581063237697551e-07, "loss": 0.4235, "step": 3670 }, { "epoch": 0.8168669336893636, "grad_norm": 1.7504796239217983, "learning_rate": 8.560887932907719e-07, "loss": 0.4363, "step": 3671 }, { "epoch": 0.8170894526034713, "grad_norm": 1.6434461268964047, "learning_rate": 8.54073415278427e-07, "loss": 0.442, "step": 3672 }, { "epoch": 0.817311971517579, "grad_norm": 1.773670050621455, "learning_rate": 8.520601907795717e-07, "loss": 0.4414, "step": 3673 }, { "epoch": 0.8175344904316867, "grad_norm": 1.7136640565578267, "learning_rate": 8.500491208399287e-07, "loss": 0.4458, "step": 3674 }, { "epoch": 0.8177570093457944, "grad_norm": 1.6702048214170893, "learning_rate": 8.480402065041071e-07, "loss": 0.4377, "step": 3675 }, { "epoch": 0.8179795282599021, "grad_norm": 1.7002753391296366, "learning_rate": 8.460334488155952e-07, "loss": 0.4415, "step": 3676 }, { "epoch": 0.8182020471740098, "grad_norm": 1.655299864462562, "learning_rate": 8.440288488167592e-07, "loss": 0.4418, "step": 3677 }, { "epoch": 0.8184245660881175, "grad_norm": 1.613791335912506, "learning_rate": 8.420264075488466e-07, "loss": 0.4207, "step": 3678 }, { "epoch": 0.8186470850022252, "grad_norm": 1.6721139049440534, "learning_rate": 8.400261260519832e-07, "loss": 0.4365, "step": 3679 }, { "epoch": 0.8188696039163329, "grad_norm": 1.7156922125580827, "learning_rate": 8.380280053651723e-07, "loss": 0.4431, "step": 3680 }, { "epoch": 0.8190921228304406, "grad_norm": 1.7276103295186365, "learning_rate": 8.360320465262973e-07, "loss": 0.4408, "step": 3681 }, { "epoch": 0.8193146417445483, "grad_norm": 1.6601175992981074, "learning_rate": 8.340382505721134e-07, "loss": 0.4295, "step": 3682 }, { "epoch": 0.819537160658656, "grad_norm": 1.644659660215755, "learning_rate": 8.32046618538257e-07, "loss": 0.43, "step": 3683 }, { "epoch": 0.8197596795727636, "grad_norm": 1.6174043479631064, "learning_rate": 8.300571514592404e-07, "loss": 0.4201, "step": 3684 }, { "epoch": 0.8199821984868714, "grad_norm": 1.7625908565243498, "learning_rate": 8.280698503684458e-07, "loss": 0.427, "step": 3685 }, { "epoch": 0.8202047174009791, "grad_norm": 1.731161570744422, "learning_rate": 8.260847162981406e-07, "loss": 0.4476, "step": 3686 }, { "epoch": 0.8204272363150867, "grad_norm": 1.7421096598921253, "learning_rate": 8.241017502794557e-07, "loss": 0.4539, "step": 3687 }, { "epoch": 0.8206497552291945, "grad_norm": 1.7627381510516607, "learning_rate": 8.221209533424035e-07, "loss": 0.455, "step": 3688 }, { "epoch": 0.8208722741433022, "grad_norm": 1.7448659877753911, "learning_rate": 8.201423265158648e-07, "loss": 0.4438, "step": 3689 }, { "epoch": 0.8210947930574098, "grad_norm": 1.6579787349542041, "learning_rate": 8.181658708275958e-07, "loss": 0.4187, "step": 3690 }, { "epoch": 0.8213173119715176, "grad_norm": 1.6948613547209945, "learning_rate": 8.161915873042253e-07, "loss": 0.4393, "step": 3691 }, { "epoch": 0.8215398308856253, "grad_norm": 1.7498113299128388, "learning_rate": 8.142194769712519e-07, "loss": 0.4441, "step": 3692 }, { "epoch": 0.821762349799733, "grad_norm": 1.5461282516578165, "learning_rate": 8.122495408530484e-07, "loss": 0.4369, "step": 3693 }, { "epoch": 0.8219848687138407, "grad_norm": 1.620568913358307, "learning_rate": 8.10281779972854e-07, "loss": 0.4271, "step": 3694 }, { "epoch": 0.8222073876279484, "grad_norm": 1.6499712926172183, "learning_rate": 8.083161953527807e-07, "loss": 0.4509, "step": 3695 }, { "epoch": 0.822429906542056, "grad_norm": 1.6996878531030821, "learning_rate": 8.063527880138112e-07, "loss": 0.4399, "step": 3696 }, { "epoch": 0.8226524254561638, "grad_norm": 1.7681379243892836, "learning_rate": 8.043915589757928e-07, "loss": 0.4515, "step": 3697 }, { "epoch": 0.8228749443702714, "grad_norm": 1.6252799775537952, "learning_rate": 8.024325092574475e-07, "loss": 0.4462, "step": 3698 }, { "epoch": 0.8230974632843792, "grad_norm": 1.6854714361482452, "learning_rate": 8.004756398763602e-07, "loss": 0.4411, "step": 3699 }, { "epoch": 0.8233199821984869, "grad_norm": 1.68722186351349, "learning_rate": 7.985209518489856e-07, "loss": 0.4173, "step": 3700 }, { "epoch": 0.8235425011125945, "grad_norm": 1.7386194090802563, "learning_rate": 7.965684461906453e-07, "loss": 0.4296, "step": 3701 }, { "epoch": 0.8237650200267023, "grad_norm": 1.6522193380190164, "learning_rate": 7.946181239155259e-07, "loss": 0.444, "step": 3702 }, { "epoch": 0.82398753894081, "grad_norm": 1.6778243677000515, "learning_rate": 7.92669986036681e-07, "loss": 0.4421, "step": 3703 }, { "epoch": 0.8242100578549176, "grad_norm": 1.674413752877467, "learning_rate": 7.907240335660299e-07, "loss": 0.4374, "step": 3704 }, { "epoch": 0.8244325767690254, "grad_norm": 1.9193210145916062, "learning_rate": 7.887802675143563e-07, "loss": 0.4578, "step": 3705 }, { "epoch": 0.8246550956831331, "grad_norm": 1.5736794050400558, "learning_rate": 7.868386888913093e-07, "loss": 0.4331, "step": 3706 }, { "epoch": 0.8248776145972407, "grad_norm": 1.7629651527301244, "learning_rate": 7.848992987053982e-07, "loss": 0.4307, "step": 3707 }, { "epoch": 0.8251001335113485, "grad_norm": 1.583881719692819, "learning_rate": 7.829620979640002e-07, "loss": 0.4441, "step": 3708 }, { "epoch": 0.8253226524254562, "grad_norm": 1.7578421065782095, "learning_rate": 7.8102708767335e-07, "loss": 0.4346, "step": 3709 }, { "epoch": 0.8255451713395638, "grad_norm": 1.634538832949484, "learning_rate": 7.790942688385511e-07, "loss": 0.4199, "step": 3710 }, { "epoch": 0.8257676902536716, "grad_norm": 1.6875434815836914, "learning_rate": 7.771636424635648e-07, "loss": 0.4505, "step": 3711 }, { "epoch": 0.8259902091677792, "grad_norm": 1.7211563522188085, "learning_rate": 7.752352095512117e-07, "loss": 0.4502, "step": 3712 }, { "epoch": 0.826212728081887, "grad_norm": 1.784661238039043, "learning_rate": 7.733089711031777e-07, "loss": 0.4203, "step": 3713 }, { "epoch": 0.8264352469959947, "grad_norm": 1.797761751798788, "learning_rate": 7.713849281200042e-07, "loss": 0.4317, "step": 3714 }, { "epoch": 0.8266577659101023, "grad_norm": 1.6303444314309472, "learning_rate": 7.694630816010956e-07, "loss": 0.4448, "step": 3715 }, { "epoch": 0.82688028482421, "grad_norm": 1.677677895716619, "learning_rate": 7.675434325447139e-07, "loss": 0.4257, "step": 3716 }, { "epoch": 0.8271028037383178, "grad_norm": 1.701705677695153, "learning_rate": 7.656259819479811e-07, "loss": 0.4276, "step": 3717 }, { "epoch": 0.8273253226524254, "grad_norm": 1.630092761469737, "learning_rate": 7.637107308068758e-07, "loss": 0.4245, "step": 3718 }, { "epoch": 0.8275478415665332, "grad_norm": 1.7968203502074813, "learning_rate": 7.617976801162336e-07, "loss": 0.4545, "step": 3719 }, { "epoch": 0.8277703604806409, "grad_norm": 1.7385021148957431, "learning_rate": 7.598868308697483e-07, "loss": 0.424, "step": 3720 }, { "epoch": 0.8279928793947485, "grad_norm": 1.6561641776270326, "learning_rate": 7.579781840599709e-07, "loss": 0.4467, "step": 3721 }, { "epoch": 0.8282153983088563, "grad_norm": 1.532246295291971, "learning_rate": 7.560717406783053e-07, "loss": 0.4362, "step": 3722 }, { "epoch": 0.828437917222964, "grad_norm": 1.6693433039708863, "learning_rate": 7.541675017150157e-07, "loss": 0.4157, "step": 3723 }, { "epoch": 0.8286604361370716, "grad_norm": 1.5183676892613251, "learning_rate": 7.522654681592173e-07, "loss": 0.4345, "step": 3724 }, { "epoch": 0.8288829550511794, "grad_norm": 1.6408642661895738, "learning_rate": 7.503656409988803e-07, "loss": 0.4374, "step": 3725 }, { "epoch": 0.829105473965287, "grad_norm": 1.7267239923064452, "learning_rate": 7.484680212208317e-07, "loss": 0.4373, "step": 3726 }, { "epoch": 0.8293279928793947, "grad_norm": 1.7773386298616018, "learning_rate": 7.465726098107473e-07, "loss": 0.4414, "step": 3727 }, { "epoch": 0.8295505117935025, "grad_norm": 1.9386109996944492, "learning_rate": 7.446794077531593e-07, "loss": 0.4199, "step": 3728 }, { "epoch": 0.8297730307076101, "grad_norm": 1.6841013926655082, "learning_rate": 7.427884160314513e-07, "loss": 0.4235, "step": 3729 }, { "epoch": 0.8299955496217178, "grad_norm": 1.6214607925527065, "learning_rate": 7.408996356278592e-07, "loss": 0.4347, "step": 3730 }, { "epoch": 0.8302180685358256, "grad_norm": 1.7607842294561982, "learning_rate": 7.390130675234703e-07, "loss": 0.4605, "step": 3731 }, { "epoch": 0.8304405874499332, "grad_norm": 1.726456866712228, "learning_rate": 7.371287126982208e-07, "loss": 0.428, "step": 3732 }, { "epoch": 0.830663106364041, "grad_norm": 1.6431903743737648, "learning_rate": 7.352465721309005e-07, "loss": 0.4257, "step": 3733 }, { "epoch": 0.8308856252781487, "grad_norm": 1.6291515075149599, "learning_rate": 7.333666467991435e-07, "loss": 0.4475, "step": 3734 }, { "epoch": 0.8311081441922563, "grad_norm": 1.7496790193058482, "learning_rate": 7.314889376794426e-07, "loss": 0.4286, "step": 3735 }, { "epoch": 0.831330663106364, "grad_norm": 1.8794549711457633, "learning_rate": 7.296134457471304e-07, "loss": 0.4512, "step": 3736 }, { "epoch": 0.8315531820204718, "grad_norm": 1.7608923720161307, "learning_rate": 7.277401719763916e-07, "loss": 0.4602, "step": 3737 }, { "epoch": 0.8317757009345794, "grad_norm": 1.633486821442361, "learning_rate": 7.258691173402604e-07, "loss": 0.4161, "step": 3738 }, { "epoch": 0.8319982198486872, "grad_norm": 1.6640352096828706, "learning_rate": 7.240002828106141e-07, "loss": 0.4226, "step": 3739 }, { "epoch": 0.8322207387627948, "grad_norm": 1.6294258785769908, "learning_rate": 7.221336693581798e-07, "loss": 0.445, "step": 3740 }, { "epoch": 0.8324432576769025, "grad_norm": 1.665415531026186, "learning_rate": 7.202692779525305e-07, "loss": 0.4236, "step": 3741 }, { "epoch": 0.8326657765910103, "grad_norm": 1.6617280204017306, "learning_rate": 7.184071095620849e-07, "loss": 0.4427, "step": 3742 }, { "epoch": 0.8328882955051179, "grad_norm": 1.6746090076962294, "learning_rate": 7.165471651541073e-07, "loss": 0.4421, "step": 3743 }, { "epoch": 0.8331108144192256, "grad_norm": 1.6505951508650087, "learning_rate": 7.146894456947045e-07, "loss": 0.4434, "step": 3744 }, { "epoch": 0.8333333333333334, "grad_norm": 1.7738704488331136, "learning_rate": 7.128339521488304e-07, "loss": 0.4466, "step": 3745 }, { "epoch": 0.833555852247441, "grad_norm": 1.7871447966760556, "learning_rate": 7.10980685480282e-07, "loss": 0.4432, "step": 3746 }, { "epoch": 0.8337783711615487, "grad_norm": 1.6605728647238274, "learning_rate": 7.091296466516989e-07, "loss": 0.4334, "step": 3747 }, { "epoch": 0.8340008900756565, "grad_norm": 1.6457559270070623, "learning_rate": 7.072808366245649e-07, "loss": 0.4541, "step": 3748 }, { "epoch": 0.8342234089897641, "grad_norm": 1.6043510955566656, "learning_rate": 7.054342563592032e-07, "loss": 0.4211, "step": 3749 }, { "epoch": 0.8344459279038718, "grad_norm": 1.8053323727733948, "learning_rate": 7.035899068147834e-07, "loss": 0.4503, "step": 3750 }, { "epoch": 0.8346684468179796, "grad_norm": 1.662626141137638, "learning_rate": 7.017477889493102e-07, "loss": 0.4298, "step": 3751 }, { "epoch": 0.8348909657320872, "grad_norm": 1.6317350105796709, "learning_rate": 6.999079037196349e-07, "loss": 0.4375, "step": 3752 }, { "epoch": 0.835113484646195, "grad_norm": 1.688870602163323, "learning_rate": 6.980702520814458e-07, "loss": 0.4443, "step": 3753 }, { "epoch": 0.8353360035603026, "grad_norm": 1.7366314823938795, "learning_rate": 6.962348349892728e-07, "loss": 0.4237, "step": 3754 }, { "epoch": 0.8355585224744103, "grad_norm": 1.7078690724821972, "learning_rate": 6.944016533964854e-07, "loss": 0.4333, "step": 3755 }, { "epoch": 0.835781041388518, "grad_norm": 1.6191054161402545, "learning_rate": 6.92570708255288e-07, "loss": 0.4362, "step": 3756 }, { "epoch": 0.8360035603026257, "grad_norm": 1.6179685886931987, "learning_rate": 6.907420005167276e-07, "loss": 0.4231, "step": 3757 }, { "epoch": 0.8362260792167334, "grad_norm": 1.6653204940266824, "learning_rate": 6.889155311306889e-07, "loss": 0.4447, "step": 3758 }, { "epoch": 0.8364485981308412, "grad_norm": 1.7218786118748135, "learning_rate": 6.87091301045889e-07, "loss": 0.4525, "step": 3759 }, { "epoch": 0.8366711170449488, "grad_norm": 1.5225062249572836, "learning_rate": 6.852693112098902e-07, "loss": 0.4288, "step": 3760 }, { "epoch": 0.8368936359590565, "grad_norm": 1.7251828308613202, "learning_rate": 6.834495625690824e-07, "loss": 0.4403, "step": 3761 }, { "epoch": 0.8371161548731643, "grad_norm": 1.6652438689195852, "learning_rate": 6.816320560686973e-07, "loss": 0.4411, "step": 3762 }, { "epoch": 0.8373386737872719, "grad_norm": 1.6472439130400653, "learning_rate": 6.79816792652801e-07, "loss": 0.4383, "step": 3763 }, { "epoch": 0.8375611927013796, "grad_norm": 1.6987583143532443, "learning_rate": 6.780037732642908e-07, "loss": 0.4491, "step": 3764 }, { "epoch": 0.8377837116154874, "grad_norm": 1.616763578784881, "learning_rate": 6.761929988449029e-07, "loss": 0.431, "step": 3765 }, { "epoch": 0.838006230529595, "grad_norm": 1.6903424254666892, "learning_rate": 6.743844703352049e-07, "loss": 0.4235, "step": 3766 }, { "epoch": 0.8382287494437027, "grad_norm": 1.7857973215331984, "learning_rate": 6.725781886745985e-07, "loss": 0.4451, "step": 3767 }, { "epoch": 0.8384512683578104, "grad_norm": 1.8019650361292523, "learning_rate": 6.707741548013202e-07, "loss": 0.44, "step": 3768 }, { "epoch": 0.8386737872719181, "grad_norm": 1.586434914459656, "learning_rate": 6.689723696524348e-07, "loss": 0.4271, "step": 3769 }, { "epoch": 0.8388963061860258, "grad_norm": 1.7260680142560578, "learning_rate": 6.671728341638428e-07, "loss": 0.4345, "step": 3770 }, { "epoch": 0.8391188251001335, "grad_norm": 1.677112276521366, "learning_rate": 6.653755492702718e-07, "loss": 0.4288, "step": 3771 }, { "epoch": 0.8393413440142412, "grad_norm": 1.7338714035590097, "learning_rate": 6.635805159052866e-07, "loss": 0.4326, "step": 3772 }, { "epoch": 0.839563862928349, "grad_norm": 1.5883771234003354, "learning_rate": 6.617877350012785e-07, "loss": 0.419, "step": 3773 }, { "epoch": 0.8397863818424566, "grad_norm": 1.7056216176903325, "learning_rate": 6.599972074894684e-07, "loss": 0.4402, "step": 3774 }, { "epoch": 0.8400089007565643, "grad_norm": 1.7310910895272682, "learning_rate": 6.582089342999093e-07, "loss": 0.4307, "step": 3775 }, { "epoch": 0.840231419670672, "grad_norm": 1.632569104264421, "learning_rate": 6.564229163614793e-07, "loss": 0.4325, "step": 3776 }, { "epoch": 0.8404539385847797, "grad_norm": 1.6236976050929983, "learning_rate": 6.546391546018893e-07, "loss": 0.4133, "step": 3777 }, { "epoch": 0.8406764574988874, "grad_norm": 1.724462164397906, "learning_rate": 6.528576499476757e-07, "loss": 0.433, "step": 3778 }, { "epoch": 0.8408989764129952, "grad_norm": 1.6723782494319195, "learning_rate": 6.51078403324204e-07, "loss": 0.4225, "step": 3779 }, { "epoch": 0.8411214953271028, "grad_norm": 1.7366342046924874, "learning_rate": 6.49301415655666e-07, "loss": 0.4292, "step": 3780 }, { "epoch": 0.8413440142412105, "grad_norm": 1.7375658881300284, "learning_rate": 6.475266878650793e-07, "loss": 0.4447, "step": 3781 }, { "epoch": 0.8415665331553182, "grad_norm": 1.732545492164401, "learning_rate": 6.45754220874289e-07, "loss": 0.43, "step": 3782 }, { "epoch": 0.8417890520694259, "grad_norm": 1.6075829511614865, "learning_rate": 6.439840156039657e-07, "loss": 0.4233, "step": 3783 }, { "epoch": 0.8420115709835336, "grad_norm": 1.6862526509565372, "learning_rate": 6.422160729736044e-07, "loss": 0.4457, "step": 3784 }, { "epoch": 0.8422340898976413, "grad_norm": 1.794956420447578, "learning_rate": 6.404503939015266e-07, "loss": 0.4456, "step": 3785 }, { "epoch": 0.842456608811749, "grad_norm": 1.6443404521241993, "learning_rate": 6.38686979304875e-07, "loss": 0.4385, "step": 3786 }, { "epoch": 0.8426791277258567, "grad_norm": 2.415810283387257, "learning_rate": 6.369258300996184e-07, "loss": 0.4168, "step": 3787 }, { "epoch": 0.8429016466399644, "grad_norm": 1.7151784851184289, "learning_rate": 6.3516694720055e-07, "loss": 0.4251, "step": 3788 }, { "epoch": 0.8431241655540721, "grad_norm": 1.7740145194849914, "learning_rate": 6.334103315212819e-07, "loss": 0.4457, "step": 3789 }, { "epoch": 0.8433466844681798, "grad_norm": 1.6523626300415446, "learning_rate": 6.316559839742514e-07, "loss": 0.428, "step": 3790 }, { "epoch": 0.8435692033822875, "grad_norm": 1.6355899669205496, "learning_rate": 6.299039054707174e-07, "loss": 0.417, "step": 3791 }, { "epoch": 0.8437917222963952, "grad_norm": 1.705742395588919, "learning_rate": 6.2815409692076e-07, "loss": 0.4092, "step": 3792 }, { "epoch": 0.844014241210503, "grad_norm": 1.6565812823492239, "learning_rate": 6.264065592332807e-07, "loss": 0.4392, "step": 3793 }, { "epoch": 0.8442367601246106, "grad_norm": 1.7459660142206548, "learning_rate": 6.24661293315999e-07, "loss": 0.4439, "step": 3794 }, { "epoch": 0.8444592790387183, "grad_norm": 1.8740506538162842, "learning_rate": 6.229183000754579e-07, "loss": 0.4451, "step": 3795 }, { "epoch": 0.8446817979528259, "grad_norm": 1.6818982091382804, "learning_rate": 6.211775804170161e-07, "loss": 0.4371, "step": 3796 }, { "epoch": 0.8449043168669337, "grad_norm": 1.706136990081858, "learning_rate": 6.194391352448564e-07, "loss": 0.4275, "step": 3797 }, { "epoch": 0.8451268357810414, "grad_norm": 1.598465501926907, "learning_rate": 6.177029654619748e-07, "loss": 0.4438, "step": 3798 }, { "epoch": 0.845349354695149, "grad_norm": 1.7168548053665555, "learning_rate": 6.159690719701888e-07, "loss": 0.435, "step": 3799 }, { "epoch": 0.8455718736092568, "grad_norm": 1.722295566601419, "learning_rate": 6.142374556701336e-07, "loss": 0.4308, "step": 3800 }, { "epoch": 0.8457943925233645, "grad_norm": 1.7315794355635448, "learning_rate": 6.125081174612585e-07, "loss": 0.4439, "step": 3801 }, { "epoch": 0.8460169114374722, "grad_norm": 1.6017622063050811, "learning_rate": 6.107810582418317e-07, "loss": 0.4371, "step": 3802 }, { "epoch": 0.8462394303515799, "grad_norm": 1.5196406214912654, "learning_rate": 6.090562789089383e-07, "loss": 0.4306, "step": 3803 }, { "epoch": 0.8464619492656876, "grad_norm": 1.6195962344225185, "learning_rate": 6.073337803584778e-07, "loss": 0.4379, "step": 3804 }, { "epoch": 0.8466844681797953, "grad_norm": 1.6933627525708077, "learning_rate": 6.056135634851673e-07, "loss": 0.4326, "step": 3805 }, { "epoch": 0.846906987093903, "grad_norm": 1.6980741539064446, "learning_rate": 6.038956291825338e-07, "loss": 0.4395, "step": 3806 }, { "epoch": 0.8471295060080107, "grad_norm": 1.727635716165238, "learning_rate": 6.021799783429233e-07, "loss": 0.4452, "step": 3807 }, { "epoch": 0.8473520249221184, "grad_norm": 1.7162610887808727, "learning_rate": 6.004666118574948e-07, "loss": 0.4284, "step": 3808 }, { "epoch": 0.8475745438362261, "grad_norm": 1.5486947499299442, "learning_rate": 5.98755530616219e-07, "loss": 0.4418, "step": 3809 }, { "epoch": 0.8477970627503337, "grad_norm": 1.6447670740513813, "learning_rate": 5.970467355078819e-07, "loss": 0.4375, "step": 3810 }, { "epoch": 0.8480195816644415, "grad_norm": 1.5700241801576513, "learning_rate": 5.953402274200798e-07, "loss": 0.4346, "step": 3811 }, { "epoch": 0.8482421005785492, "grad_norm": 1.7107838685885919, "learning_rate": 5.936360072392217e-07, "loss": 0.4416, "step": 3812 }, { "epoch": 0.8484646194926568, "grad_norm": 1.55173815340133, "learning_rate": 5.91934075850531e-07, "loss": 0.423, "step": 3813 }, { "epoch": 0.8486871384067646, "grad_norm": 1.7080368470496359, "learning_rate": 5.902344341380351e-07, "loss": 0.4319, "step": 3814 }, { "epoch": 0.8489096573208723, "grad_norm": 1.7020082203081217, "learning_rate": 5.885370829845826e-07, "loss": 0.432, "step": 3815 }, { "epoch": 0.8491321762349799, "grad_norm": 1.5842201694630025, "learning_rate": 5.868420232718225e-07, "loss": 0.4297, "step": 3816 }, { "epoch": 0.8493546951490877, "grad_norm": 1.689795969695517, "learning_rate": 5.851492558802191e-07, "loss": 0.4569, "step": 3817 }, { "epoch": 0.8495772140631954, "grad_norm": 1.6299532941367096, "learning_rate": 5.834587816890436e-07, "loss": 0.4453, "step": 3818 }, { "epoch": 0.849799732977303, "grad_norm": 1.8090243081939361, "learning_rate": 5.817706015763774e-07, "loss": 0.4539, "step": 3819 }, { "epoch": 0.8500222518914108, "grad_norm": 1.7781245655229883, "learning_rate": 5.800847164191093e-07, "loss": 0.4322, "step": 3820 }, { "epoch": 0.8502447708055185, "grad_norm": 1.6344848745463292, "learning_rate": 5.784011270929374e-07, "loss": 0.4453, "step": 3821 }, { "epoch": 0.8504672897196262, "grad_norm": 1.6029295627588973, "learning_rate": 5.767198344723667e-07, "loss": 0.4287, "step": 3822 }, { "epoch": 0.8506898086337339, "grad_norm": 1.6857955421893374, "learning_rate": 5.750408394307072e-07, "loss": 0.4584, "step": 3823 }, { "epoch": 0.8509123275478415, "grad_norm": 1.657278638453642, "learning_rate": 5.733641428400782e-07, "loss": 0.4296, "step": 3824 }, { "epoch": 0.8511348464619493, "grad_norm": 1.6195484951653456, "learning_rate": 5.716897455714054e-07, "loss": 0.4299, "step": 3825 }, { "epoch": 0.851357365376057, "grad_norm": 1.7426301639313588, "learning_rate": 5.70017648494417e-07, "loss": 0.4335, "step": 3826 }, { "epoch": 0.8515798842901646, "grad_norm": 1.7412668425459548, "learning_rate": 5.683478524776481e-07, "loss": 0.4357, "step": 3827 }, { "epoch": 0.8518024032042724, "grad_norm": 1.7397113401494706, "learning_rate": 5.6668035838844e-07, "loss": 0.4321, "step": 3828 }, { "epoch": 0.8520249221183801, "grad_norm": 1.8777142110134346, "learning_rate": 5.650151670929371e-07, "loss": 0.4484, "step": 3829 }, { "epoch": 0.8522474410324877, "grad_norm": 1.6893675803236405, "learning_rate": 5.633522794560875e-07, "loss": 0.4414, "step": 3830 }, { "epoch": 0.8524699599465955, "grad_norm": 1.7097899141082231, "learning_rate": 5.616916963416419e-07, "loss": 0.4115, "step": 3831 }, { "epoch": 0.8526924788607032, "grad_norm": 1.7418429722762776, "learning_rate": 5.60033418612157e-07, "loss": 0.4254, "step": 3832 }, { "epoch": 0.8529149977748108, "grad_norm": 1.6996550010944464, "learning_rate": 5.58377447128986e-07, "loss": 0.4352, "step": 3833 }, { "epoch": 0.8531375166889186, "grad_norm": 1.7702215192095687, "learning_rate": 5.56723782752292e-07, "loss": 0.4394, "step": 3834 }, { "epoch": 0.8533600356030263, "grad_norm": 1.6361916219162185, "learning_rate": 5.550724263410351e-07, "loss": 0.4232, "step": 3835 }, { "epoch": 0.8535825545171339, "grad_norm": 1.7212505755271725, "learning_rate": 5.534233787529764e-07, "loss": 0.4261, "step": 3836 }, { "epoch": 0.8538050734312417, "grad_norm": 1.6866921059345112, "learning_rate": 5.51776640844679e-07, "loss": 0.4358, "step": 3837 }, { "epoch": 0.8540275923453493, "grad_norm": 1.6635721819951874, "learning_rate": 5.501322134715053e-07, "loss": 0.424, "step": 3838 }, { "epoch": 0.854250111259457, "grad_norm": 1.7842099048248397, "learning_rate": 5.48490097487619e-07, "loss": 0.4388, "step": 3839 }, { "epoch": 0.8544726301735648, "grad_norm": 1.5826726070573702, "learning_rate": 5.468502937459818e-07, "loss": 0.4167, "step": 3840 }, { "epoch": 0.8546951490876724, "grad_norm": 1.7074062125361777, "learning_rate": 5.45212803098355e-07, "loss": 0.4185, "step": 3841 }, { "epoch": 0.8549176680017802, "grad_norm": 1.750281482692086, "learning_rate": 5.435776263952996e-07, "loss": 0.4318, "step": 3842 }, { "epoch": 0.8551401869158879, "grad_norm": 1.7020618842971083, "learning_rate": 5.419447644861719e-07, "loss": 0.4346, "step": 3843 }, { "epoch": 0.8553627058299955, "grad_norm": 1.6987089906999833, "learning_rate": 5.403142182191274e-07, "loss": 0.4033, "step": 3844 }, { "epoch": 0.8555852247441033, "grad_norm": 1.7230751221041214, "learning_rate": 5.386859884411189e-07, "loss": 0.4345, "step": 3845 }, { "epoch": 0.855807743658211, "grad_norm": 1.6562183022818548, "learning_rate": 5.370600759978961e-07, "loss": 0.4291, "step": 3846 }, { "epoch": 0.8560302625723186, "grad_norm": 1.5933833308458178, "learning_rate": 5.354364817340052e-07, "loss": 0.4367, "step": 3847 }, { "epoch": 0.8562527814864264, "grad_norm": 1.5284548114962702, "learning_rate": 5.338152064927865e-07, "loss": 0.4308, "step": 3848 }, { "epoch": 0.8564753004005341, "grad_norm": 1.6501957486444683, "learning_rate": 5.32196251116377e-07, "loss": 0.4453, "step": 3849 }, { "epoch": 0.8566978193146417, "grad_norm": 1.7150505611066154, "learning_rate": 5.305796164457106e-07, "loss": 0.4291, "step": 3850 }, { "epoch": 0.8569203382287495, "grad_norm": 1.7023196521509012, "learning_rate": 5.289653033205106e-07, "loss": 0.446, "step": 3851 }, { "epoch": 0.8571428571428571, "grad_norm": 1.760784022205649, "learning_rate": 5.273533125793013e-07, "loss": 0.4299, "step": 3852 }, { "epoch": 0.8573653760569648, "grad_norm": 1.6331785888375043, "learning_rate": 5.257436450593944e-07, "loss": 0.4198, "step": 3853 }, { "epoch": 0.8575878949710726, "grad_norm": 1.7713411949032076, "learning_rate": 5.241363015968981e-07, "loss": 0.4342, "step": 3854 }, { "epoch": 0.8578104138851802, "grad_norm": 1.7186290839295242, "learning_rate": 5.225312830267143e-07, "loss": 0.4434, "step": 3855 }, { "epoch": 0.8580329327992879, "grad_norm": 1.7519716271025636, "learning_rate": 5.209285901825334e-07, "loss": 0.4646, "step": 3856 }, { "epoch": 0.8582554517133957, "grad_norm": 1.5803342604640556, "learning_rate": 5.193282238968417e-07, "loss": 0.4204, "step": 3857 }, { "epoch": 0.8584779706275033, "grad_norm": 1.6538656863342458, "learning_rate": 5.177301850009147e-07, "loss": 0.4376, "step": 3858 }, { "epoch": 0.858700489541611, "grad_norm": 1.8129402654408193, "learning_rate": 5.161344743248209e-07, "loss": 0.4407, "step": 3859 }, { "epoch": 0.8589230084557188, "grad_norm": 1.6596565164420882, "learning_rate": 5.145410926974171e-07, "loss": 0.4272, "step": 3860 }, { "epoch": 0.8591455273698264, "grad_norm": 1.6887431825355652, "learning_rate": 5.129500409463517e-07, "loss": 0.4427, "step": 3861 }, { "epoch": 0.8593680462839342, "grad_norm": 1.6458697850424784, "learning_rate": 5.113613198980644e-07, "loss": 0.4226, "step": 3862 }, { "epoch": 0.8595905651980418, "grad_norm": 1.758837132924891, "learning_rate": 5.0977493037778e-07, "loss": 0.4352, "step": 3863 }, { "epoch": 0.8598130841121495, "grad_norm": 1.7380744311862781, "learning_rate": 5.081908732095159e-07, "loss": 0.4389, "step": 3864 }, { "epoch": 0.8600356030262573, "grad_norm": 1.7273232824419367, "learning_rate": 5.066091492160768e-07, "loss": 0.4309, "step": 3865 }, { "epoch": 0.8602581219403649, "grad_norm": 1.7471770476381339, "learning_rate": 5.050297592190567e-07, "loss": 0.4383, "step": 3866 }, { "epoch": 0.8604806408544726, "grad_norm": 1.7493259743497809, "learning_rate": 5.034527040388359e-07, "loss": 0.4462, "step": 3867 }, { "epoch": 0.8607031597685804, "grad_norm": 1.5967828848926664, "learning_rate": 5.018779844945809e-07, "loss": 0.4279, "step": 3868 }, { "epoch": 0.860925678682688, "grad_norm": 1.6707581833776741, "learning_rate": 5.003056014042468e-07, "loss": 0.4327, "step": 3869 }, { "epoch": 0.8611481975967957, "grad_norm": 1.7063478985939662, "learning_rate": 4.987355555845752e-07, "loss": 0.4306, "step": 3870 }, { "epoch": 0.8613707165109035, "grad_norm": 1.6257022186303631, "learning_rate": 4.971678478510927e-07, "loss": 0.4427, "step": 3871 }, { "epoch": 0.8615932354250111, "grad_norm": 1.6762625452366235, "learning_rate": 4.95602479018113e-07, "loss": 0.4337, "step": 3872 }, { "epoch": 0.8618157543391188, "grad_norm": 1.7244856411446736, "learning_rate": 4.940394498987316e-07, "loss": 0.4277, "step": 3873 }, { "epoch": 0.8620382732532266, "grad_norm": 1.7253720912698434, "learning_rate": 4.924787613048316e-07, "loss": 0.436, "step": 3874 }, { "epoch": 0.8622607921673342, "grad_norm": 1.7154334250484564, "learning_rate": 4.909204140470803e-07, "loss": 0.4166, "step": 3875 }, { "epoch": 0.8624833110814419, "grad_norm": 1.8641688837625359, "learning_rate": 4.893644089349258e-07, "loss": 0.4586, "step": 3876 }, { "epoch": 0.8627058299955496, "grad_norm": 1.5947343063378063, "learning_rate": 4.878107467766053e-07, "loss": 0.4294, "step": 3877 }, { "epoch": 0.8629283489096573, "grad_norm": 1.5865344823375636, "learning_rate": 4.862594283791328e-07, "loss": 0.4355, "step": 3878 }, { "epoch": 0.863150867823765, "grad_norm": 1.7442616450989177, "learning_rate": 4.847104545483094e-07, "loss": 0.4763, "step": 3879 }, { "epoch": 0.8633733867378727, "grad_norm": 2.0250955652468936, "learning_rate": 4.831638260887156e-07, "loss": 0.4459, "step": 3880 }, { "epoch": 0.8635959056519804, "grad_norm": 1.676551449243835, "learning_rate": 4.816195438037147e-07, "loss": 0.4383, "step": 3881 }, { "epoch": 0.8638184245660881, "grad_norm": 1.6610685119554736, "learning_rate": 4.800776084954518e-07, "loss": 0.4323, "step": 3882 }, { "epoch": 0.8640409434801958, "grad_norm": 1.6021060714927795, "learning_rate": 4.785380209648522e-07, "loss": 0.4264, "step": 3883 }, { "epoch": 0.8642634623943035, "grad_norm": 1.7614005121033827, "learning_rate": 4.770007820116229e-07, "loss": 0.4262, "step": 3884 }, { "epoch": 0.8644859813084113, "grad_norm": 1.5852381215220772, "learning_rate": 4.754658924342481e-07, "loss": 0.4315, "step": 3885 }, { "epoch": 0.8647085002225189, "grad_norm": 1.648006831251078, "learning_rate": 4.7393335302999497e-07, "loss": 0.4244, "step": 3886 }, { "epoch": 0.8649310191366266, "grad_norm": 1.7335603967679396, "learning_rate": 4.72403164594909e-07, "loss": 0.4323, "step": 3887 }, { "epoch": 0.8651535380507344, "grad_norm": 1.6559925267537543, "learning_rate": 4.7087532792381154e-07, "loss": 0.4553, "step": 3888 }, { "epoch": 0.865376056964842, "grad_norm": 1.6017734421299983, "learning_rate": 4.6934984381030837e-07, "loss": 0.4375, "step": 3889 }, { "epoch": 0.8655985758789497, "grad_norm": 1.5589639474721517, "learning_rate": 4.678267130467773e-07, "loss": 0.4339, "step": 3890 }, { "epoch": 0.8658210947930574, "grad_norm": 1.7198757831786586, "learning_rate": 4.6630593642437714e-07, "loss": 0.4264, "step": 3891 }, { "epoch": 0.8660436137071651, "grad_norm": 1.6285432303016076, "learning_rate": 4.647875147330433e-07, "loss": 0.4386, "step": 3892 }, { "epoch": 0.8662661326212728, "grad_norm": 1.653501213849093, "learning_rate": 4.6327144876148643e-07, "loss": 0.4478, "step": 3893 }, { "epoch": 0.8664886515353805, "grad_norm": 1.607332556146088, "learning_rate": 4.6175773929719615e-07, "loss": 0.4329, "step": 3894 }, { "epoch": 0.8667111704494882, "grad_norm": 1.5779014062262509, "learning_rate": 4.6024638712643563e-07, "loss": 0.4409, "step": 3895 }, { "epoch": 0.8669336893635959, "grad_norm": 1.795468447009144, "learning_rate": 4.587373930342448e-07, "loss": 0.4449, "step": 3896 }, { "epoch": 0.8671562082777036, "grad_norm": 1.7978407386902688, "learning_rate": 4.572307578044405e-07, "loss": 0.4361, "step": 3897 }, { "epoch": 0.8673787271918113, "grad_norm": 1.7442340999178862, "learning_rate": 4.5572648221961004e-07, "loss": 0.4194, "step": 3898 }, { "epoch": 0.867601246105919, "grad_norm": 1.6293751500581672, "learning_rate": 4.542245670611184e-07, "loss": 0.4146, "step": 3899 }, { "epoch": 0.8678237650200267, "grad_norm": 1.6885514185387545, "learning_rate": 4.527250131091027e-07, "loss": 0.4348, "step": 3900 }, { "epoch": 0.8680462839341344, "grad_norm": 1.715213954109349, "learning_rate": 4.512278211424753e-07, "loss": 0.4335, "step": 3901 }, { "epoch": 0.8682688028482421, "grad_norm": 1.662005707886403, "learning_rate": 4.497329919389204e-07, "loss": 0.4311, "step": 3902 }, { "epoch": 0.8684913217623498, "grad_norm": 1.568980770003763, "learning_rate": 4.482405262748951e-07, "loss": 0.4142, "step": 3903 }, { "epoch": 0.8687138406764575, "grad_norm": 1.6600330839180404, "learning_rate": 4.4675042492562993e-07, "loss": 0.4268, "step": 3904 }, { "epoch": 0.8689363595905651, "grad_norm": 1.7422045824346206, "learning_rate": 4.452626886651251e-07, "loss": 0.4435, "step": 3905 }, { "epoch": 0.8691588785046729, "grad_norm": 1.6807751872098313, "learning_rate": 4.4377731826615425e-07, "loss": 0.442, "step": 3906 }, { "epoch": 0.8693813974187806, "grad_norm": 1.6475607290546412, "learning_rate": 4.4229431450026116e-07, "loss": 0.4359, "step": 3907 }, { "epoch": 0.8696039163328882, "grad_norm": 1.7002334471149199, "learning_rate": 4.408136781377609e-07, "loss": 0.4567, "step": 3908 }, { "epoch": 0.869826435246996, "grad_norm": 1.6775125934333752, "learning_rate": 4.3933540994773927e-07, "loss": 0.4364, "step": 3909 }, { "epoch": 0.8700489541611037, "grad_norm": 1.7160828716641516, "learning_rate": 4.3785951069804986e-07, "loss": 0.43, "step": 3910 }, { "epoch": 0.8702714730752114, "grad_norm": 1.7432189940374097, "learning_rate": 4.363859811553173e-07, "loss": 0.4469, "step": 3911 }, { "epoch": 0.8704939919893191, "grad_norm": 1.6596382255743767, "learning_rate": 4.349148220849375e-07, "loss": 0.4223, "step": 3912 }, { "epoch": 0.8707165109034268, "grad_norm": 1.7488384965125352, "learning_rate": 4.334460342510688e-07, "loss": 0.4201, "step": 3913 }, { "epoch": 0.8709390298175345, "grad_norm": 1.6234878109606434, "learning_rate": 4.3197961841664584e-07, "loss": 0.4286, "step": 3914 }, { "epoch": 0.8711615487316422, "grad_norm": 1.7493668224446244, "learning_rate": 4.305155753433649e-07, "loss": 0.44, "step": 3915 }, { "epoch": 0.8713840676457499, "grad_norm": 1.726698791963196, "learning_rate": 4.29053905791692e-07, "loss": 0.4166, "step": 3916 }, { "epoch": 0.8716065865598576, "grad_norm": 1.743543673709128, "learning_rate": 4.2759461052086224e-07, "loss": 0.4273, "step": 3917 }, { "epoch": 0.8718291054739653, "grad_norm": 1.8201788730648327, "learning_rate": 4.261376902888731e-07, "loss": 0.4355, "step": 3918 }, { "epoch": 0.8720516243880729, "grad_norm": 3.2447100535217146, "learning_rate": 4.246831458524925e-07, "loss": 0.4359, "step": 3919 }, { "epoch": 0.8722741433021807, "grad_norm": 1.6376270046491603, "learning_rate": 4.2323097796725165e-07, "loss": 0.4325, "step": 3920 }, { "epoch": 0.8724966622162884, "grad_norm": 1.6455014932785075, "learning_rate": 4.2178118738744947e-07, "loss": 0.4192, "step": 3921 }, { "epoch": 0.872719181130396, "grad_norm": 1.8256995072080118, "learning_rate": 4.2033377486614734e-07, "loss": 0.4444, "step": 3922 }, { "epoch": 0.8729417000445038, "grad_norm": 1.7202076188791948, "learning_rate": 4.1888874115517395e-07, "loss": 0.4354, "step": 3923 }, { "epoch": 0.8731642189586115, "grad_norm": 1.7534136836122014, "learning_rate": 4.1744608700512224e-07, "loss": 0.4303, "step": 3924 }, { "epoch": 0.8733867378727191, "grad_norm": 1.6468953535551598, "learning_rate": 4.1600581316534494e-07, "loss": 0.4389, "step": 3925 }, { "epoch": 0.8736092567868269, "grad_norm": 1.720256991927666, "learning_rate": 4.1456792038396645e-07, "loss": 0.4569, "step": 3926 }, { "epoch": 0.8738317757009346, "grad_norm": 1.53426418664461, "learning_rate": 4.1313240940786537e-07, "loss": 0.4214, "step": 3927 }, { "epoch": 0.8740542946150422, "grad_norm": 1.820384700455143, "learning_rate": 4.116992809826897e-07, "loss": 0.452, "step": 3928 }, { "epoch": 0.87427681352915, "grad_norm": 1.625927803437045, "learning_rate": 4.10268535852848e-07, "loss": 0.4359, "step": 3929 }, { "epoch": 0.8744993324432577, "grad_norm": 1.729871781034219, "learning_rate": 4.088401747615084e-07, "loss": 0.4512, "step": 3930 }, { "epoch": 0.8747218513573654, "grad_norm": 1.718237736845934, "learning_rate": 4.074141984506036e-07, "loss": 0.4221, "step": 3931 }, { "epoch": 0.8749443702714731, "grad_norm": 1.8231657524672382, "learning_rate": 4.059906076608272e-07, "loss": 0.4498, "step": 3932 }, { "epoch": 0.8751668891855807, "grad_norm": 1.7223336860999807, "learning_rate": 4.045694031316327e-07, "loss": 0.437, "step": 3933 }, { "epoch": 0.8753894080996885, "grad_norm": 1.6532092501262048, "learning_rate": 4.031505856012352e-07, "loss": 0.4386, "step": 3934 }, { "epoch": 0.8756119270137962, "grad_norm": 1.6801959906623176, "learning_rate": 4.017341558066085e-07, "loss": 0.4373, "step": 3935 }, { "epoch": 0.8758344459279038, "grad_norm": 1.614126698018645, "learning_rate": 4.0032011448348727e-07, "loss": 0.4325, "step": 3936 }, { "epoch": 0.8760569648420116, "grad_norm": 1.7813883453621207, "learning_rate": 3.9890846236636636e-07, "loss": 0.4197, "step": 3937 }, { "epoch": 0.8762794837561193, "grad_norm": 1.7952114578994252, "learning_rate": 3.974992001884953e-07, "loss": 0.4355, "step": 3938 }, { "epoch": 0.8765020026702269, "grad_norm": 1.6428136845168777, "learning_rate": 3.960923286818896e-07, "loss": 0.4325, "step": 3939 }, { "epoch": 0.8767245215843347, "grad_norm": 1.6857257238853702, "learning_rate": 3.9468784857731534e-07, "loss": 0.4317, "step": 3940 }, { "epoch": 0.8769470404984424, "grad_norm": 1.7970976673723382, "learning_rate": 3.932857606043028e-07, "loss": 0.4408, "step": 3941 }, { "epoch": 0.87716955941255, "grad_norm": 1.6165354181154008, "learning_rate": 3.9188606549113386e-07, "loss": 0.4152, "step": 3942 }, { "epoch": 0.8773920783266578, "grad_norm": 1.7108994771500523, "learning_rate": 3.904887639648519e-07, "loss": 0.4366, "step": 3943 }, { "epoch": 0.8776145972407655, "grad_norm": 1.7294154093484464, "learning_rate": 3.8909385675125534e-07, "loss": 0.4417, "step": 3944 }, { "epoch": 0.8778371161548731, "grad_norm": 1.714720794030996, "learning_rate": 3.8770134457489896e-07, "loss": 0.4403, "step": 3945 }, { "epoch": 0.8780596350689809, "grad_norm": 1.7326081140580838, "learning_rate": 3.8631122815909473e-07, "loss": 0.4331, "step": 3946 }, { "epoch": 0.8782821539830885, "grad_norm": 1.7062344515215366, "learning_rate": 3.849235082259073e-07, "loss": 0.4316, "step": 3947 }, { "epoch": 0.8785046728971962, "grad_norm": 1.7272269568786407, "learning_rate": 3.835381854961595e-07, "loss": 0.4217, "step": 3948 }, { "epoch": 0.878727191811304, "grad_norm": 1.6539721213542913, "learning_rate": 3.82155260689428e-07, "loss": 0.4358, "step": 3949 }, { "epoch": 0.8789497107254116, "grad_norm": 1.7422292002216897, "learning_rate": 3.8077473452404145e-07, "loss": 0.4263, "step": 3950 }, { "epoch": 0.8791722296395194, "grad_norm": 1.6358412473372705, "learning_rate": 3.793966077170885e-07, "loss": 0.428, "step": 3951 }, { "epoch": 0.8793947485536271, "grad_norm": 1.7172125866977637, "learning_rate": 3.780208809844049e-07, "loss": 0.4119, "step": 3952 }, { "epoch": 0.8796172674677347, "grad_norm": 1.7159826058319583, "learning_rate": 3.7664755504058405e-07, "loss": 0.4323, "step": 3953 }, { "epoch": 0.8798397863818425, "grad_norm": 1.6016612860971933, "learning_rate": 3.752766305989708e-07, "loss": 0.4153, "step": 3954 }, { "epoch": 0.8800623052959502, "grad_norm": 1.7682825335742307, "learning_rate": 3.7390810837166224e-07, "loss": 0.4293, "step": 3955 }, { "epoch": 0.8802848242100578, "grad_norm": 1.7173849614205603, "learning_rate": 3.725419890695081e-07, "loss": 0.4246, "step": 3956 }, { "epoch": 0.8805073431241656, "grad_norm": 1.6063705906691579, "learning_rate": 3.711782734021102e-07, "loss": 0.4169, "step": 3957 }, { "epoch": 0.8807298620382733, "grad_norm": 1.711060499615589, "learning_rate": 3.698169620778219e-07, "loss": 0.4298, "step": 3958 }, { "epoch": 0.8809523809523809, "grad_norm": 1.6640411006855704, "learning_rate": 3.684580558037482e-07, "loss": 0.4191, "step": 3959 }, { "epoch": 0.8811748998664887, "grad_norm": 1.826255405873879, "learning_rate": 3.671015552857427e-07, "loss": 0.4476, "step": 3960 }, { "epoch": 0.8813974187805963, "grad_norm": 1.660436118316031, "learning_rate": 3.6574746122841176e-07, "loss": 0.4169, "step": 3961 }, { "epoch": 0.881619937694704, "grad_norm": 1.7164557579290531, "learning_rate": 3.64395774335109e-07, "loss": 0.4272, "step": 3962 }, { "epoch": 0.8818424566088118, "grad_norm": 1.7829460625967177, "learning_rate": 3.630464953079427e-07, "loss": 0.4444, "step": 3963 }, { "epoch": 0.8820649755229194, "grad_norm": 1.7919776735733952, "learning_rate": 3.616996248477639e-07, "loss": 0.4427, "step": 3964 }, { "epoch": 0.8822874944370271, "grad_norm": 1.8059877087619738, "learning_rate": 3.603551636541774e-07, "loss": 0.4289, "step": 3965 }, { "epoch": 0.8825100133511349, "grad_norm": 1.6017820533704121, "learning_rate": 3.5901311242553585e-07, "loss": 0.4153, "step": 3966 }, { "epoch": 0.8827325322652425, "grad_norm": 1.8195222958725246, "learning_rate": 3.576734718589375e-07, "loss": 0.4322, "step": 3967 }, { "epoch": 0.8829550511793502, "grad_norm": 1.7004783705298359, "learning_rate": 3.5633624265023093e-07, "loss": 0.4282, "step": 3968 }, { "epoch": 0.883177570093458, "grad_norm": 1.74199297038719, "learning_rate": 3.5500142549401097e-07, "loss": 0.4288, "step": 3969 }, { "epoch": 0.8834000890075656, "grad_norm": 1.7108587146934482, "learning_rate": 3.536690210836208e-07, "loss": 0.4271, "step": 3970 }, { "epoch": 0.8836226079216734, "grad_norm": 1.7574023776457284, "learning_rate": 3.523390301111501e-07, "loss": 0.424, "step": 3971 }, { "epoch": 0.8838451268357811, "grad_norm": 1.7716066296570567, "learning_rate": 3.51011453267433e-07, "loss": 0.4063, "step": 3972 }, { "epoch": 0.8840676457498887, "grad_norm": 1.7873769204773917, "learning_rate": 3.496862912420518e-07, "loss": 0.428, "step": 3973 }, { "epoch": 0.8842901646639965, "grad_norm": 1.77746085120432, "learning_rate": 3.4836354472333413e-07, "loss": 0.4441, "step": 3974 }, { "epoch": 0.8845126835781041, "grad_norm": 1.6892818410724062, "learning_rate": 3.470432143983504e-07, "loss": 0.4161, "step": 3975 }, { "epoch": 0.8847352024922118, "grad_norm": 1.8271402684203457, "learning_rate": 3.4572530095292213e-07, "loss": 0.4144, "step": 3976 }, { "epoch": 0.8849577214063196, "grad_norm": 1.6453480280645856, "learning_rate": 3.444098050716077e-07, "loss": 0.4305, "step": 3977 }, { "epoch": 0.8851802403204272, "grad_norm": 1.728380429741734, "learning_rate": 3.4309672743771506e-07, "loss": 0.4248, "step": 3978 }, { "epoch": 0.8854027592345349, "grad_norm": 1.7003590733974991, "learning_rate": 3.4178606873329577e-07, "loss": 0.4251, "step": 3979 }, { "epoch": 0.8856252781486427, "grad_norm": 1.7614537517069881, "learning_rate": 3.404778296391409e-07, "loss": 0.4125, "step": 3980 }, { "epoch": 0.8858477970627503, "grad_norm": 1.8270504810810715, "learning_rate": 3.39172010834789e-07, "loss": 0.4459, "step": 3981 }, { "epoch": 0.886070315976858, "grad_norm": 1.7489841250576346, "learning_rate": 3.378686129985198e-07, "loss": 0.4407, "step": 3982 }, { "epoch": 0.8862928348909658, "grad_norm": 1.8029600899122071, "learning_rate": 3.365676368073567e-07, "loss": 0.4322, "step": 3983 }, { "epoch": 0.8865153538050734, "grad_norm": 1.7259783146503072, "learning_rate": 3.35269082937062e-07, "loss": 0.4235, "step": 3984 }, { "epoch": 0.8867378727191811, "grad_norm": 1.572238827975216, "learning_rate": 3.3397295206214266e-07, "loss": 0.415, "step": 3985 }, { "epoch": 0.8869603916332889, "grad_norm": 1.885569901198625, "learning_rate": 3.326792448558475e-07, "loss": 0.4328, "step": 3986 }, { "epoch": 0.8871829105473965, "grad_norm": 1.7303040551338995, "learning_rate": 3.3138796199016274e-07, "loss": 0.4239, "step": 3987 }, { "epoch": 0.8874054294615042, "grad_norm": 1.7176188141639113, "learning_rate": 3.3009910413582147e-07, "loss": 0.4304, "step": 3988 }, { "epoch": 0.8876279483756119, "grad_norm": 1.7313438392674918, "learning_rate": 3.288126719622903e-07, "loss": 0.4256, "step": 3989 }, { "epoch": 0.8878504672897196, "grad_norm": 1.7998866468587076, "learning_rate": 3.2752866613778023e-07, "loss": 0.4375, "step": 3990 }, { "epoch": 0.8880729862038274, "grad_norm": 1.864272125138861, "learning_rate": 3.2624708732924226e-07, "loss": 0.4413, "step": 3991 }, { "epoch": 0.888295505117935, "grad_norm": 1.6643541541790903, "learning_rate": 3.249679362023622e-07, "loss": 0.4491, "step": 3992 }, { "epoch": 0.8885180240320427, "grad_norm": 1.7265749918971511, "learning_rate": 3.2369121342157027e-07, "loss": 0.4227, "step": 3993 }, { "epoch": 0.8887405429461505, "grad_norm": 1.7011581302471568, "learning_rate": 3.224169196500321e-07, "loss": 0.4296, "step": 3994 }, { "epoch": 0.8889630618602581, "grad_norm": 1.7529939493067783, "learning_rate": 3.211450555496531e-07, "loss": 0.441, "step": 3995 }, { "epoch": 0.8891855807743658, "grad_norm": 2.010376265115775, "learning_rate": 3.198756217810761e-07, "loss": 0.424, "step": 3996 }, { "epoch": 0.8894080996884736, "grad_norm": 1.6833231532198105, "learning_rate": 3.1860861900368024e-07, "loss": 0.4452, "step": 3997 }, { "epoch": 0.8896306186025812, "grad_norm": 1.7254788809535095, "learning_rate": 3.17344047875584e-07, "loss": 0.4351, "step": 3998 }, { "epoch": 0.8898531375166889, "grad_norm": 1.8592242450516234, "learning_rate": 3.1608190905364265e-07, "loss": 0.4437, "step": 3999 }, { "epoch": 0.8900756564307967, "grad_norm": 1.7662892387248, "learning_rate": 3.1482220319344613e-07, "loss": 0.4312, "step": 4000 }, { "epoch": 0.8902981753449043, "grad_norm": 1.6416128045167762, "learning_rate": 3.135649309493238e-07, "loss": 0.4447, "step": 4001 }, { "epoch": 0.890520694259012, "grad_norm": 1.7009666660402851, "learning_rate": 3.123100929743372e-07, "loss": 0.452, "step": 4002 }, { "epoch": 0.8907432131731197, "grad_norm": 1.6444393134699424, "learning_rate": 3.1105768992028607e-07, "loss": 0.4432, "step": 4003 }, { "epoch": 0.8909657320872274, "grad_norm": 1.746930819881142, "learning_rate": 3.0980772243770384e-07, "loss": 0.4192, "step": 4004 }, { "epoch": 0.8911882510013351, "grad_norm": 1.7249180338553478, "learning_rate": 3.08560191175859e-07, "loss": 0.4497, "step": 4005 }, { "epoch": 0.8914107699154428, "grad_norm": 1.7130038278734572, "learning_rate": 3.0731509678275816e-07, "loss": 0.4443, "step": 4006 }, { "epoch": 0.8916332888295505, "grad_norm": 1.7384413232567275, "learning_rate": 3.060724399051362e-07, "loss": 0.4392, "step": 4007 }, { "epoch": 0.8918558077436582, "grad_norm": 1.6496677544004508, "learning_rate": 3.0483222118846633e-07, "loss": 0.4261, "step": 4008 }, { "epoch": 0.8920783266577659, "grad_norm": 1.7893534716501094, "learning_rate": 3.0359444127695314e-07, "loss": 0.4181, "step": 4009 }, { "epoch": 0.8923008455718736, "grad_norm": 1.7556559293220004, "learning_rate": 3.023591008135346e-07, "loss": 0.4224, "step": 4010 }, { "epoch": 0.8925233644859814, "grad_norm": 1.6238388431116388, "learning_rate": 3.0112620043988404e-07, "loss": 0.4421, "step": 4011 }, { "epoch": 0.892745883400089, "grad_norm": 1.570612643540269, "learning_rate": 2.998957407964026e-07, "loss": 0.4259, "step": 4012 }, { "epoch": 0.8929684023141967, "grad_norm": 1.6800473100174562, "learning_rate": 2.9866772252222896e-07, "loss": 0.428, "step": 4013 }, { "epoch": 0.8931909212283045, "grad_norm": 1.6264833817179745, "learning_rate": 2.974421462552296e-07, "loss": 0.4449, "step": 4014 }, { "epoch": 0.8934134401424121, "grad_norm": 1.7233296295071416, "learning_rate": 2.962190126320047e-07, "loss": 0.4288, "step": 4015 }, { "epoch": 0.8936359590565198, "grad_norm": 1.7873693918630493, "learning_rate": 2.949983222878855e-07, "loss": 0.4383, "step": 4016 }, { "epoch": 0.8938584779706275, "grad_norm": 1.8571855395335177, "learning_rate": 2.9378007585693204e-07, "loss": 0.4318, "step": 4017 }, { "epoch": 0.8940809968847352, "grad_norm": 1.8600671287800588, "learning_rate": 2.925642739719381e-07, "loss": 0.4337, "step": 4018 }, { "epoch": 0.8943035157988429, "grad_norm": 1.7625473124408366, "learning_rate": 2.913509172644252e-07, "loss": 0.431, "step": 4019 }, { "epoch": 0.8945260347129506, "grad_norm": 1.683153911970611, "learning_rate": 2.901400063646459e-07, "loss": 0.4302, "step": 4020 }, { "epoch": 0.8947485536270583, "grad_norm": 1.633485468560158, "learning_rate": 2.889315419015831e-07, "loss": 0.4345, "step": 4021 }, { "epoch": 0.894971072541166, "grad_norm": 1.635029167657163, "learning_rate": 2.87725524502947e-07, "loss": 0.4333, "step": 4022 }, { "epoch": 0.8951935914552737, "grad_norm": 1.8780500073307134, "learning_rate": 2.8652195479517806e-07, "loss": 0.4428, "step": 4023 }, { "epoch": 0.8954161103693814, "grad_norm": 1.7837925086242035, "learning_rate": 2.853208334034441e-07, "loss": 0.4467, "step": 4024 }, { "epoch": 0.8956386292834891, "grad_norm": 1.7579713054660184, "learning_rate": 2.8412216095164314e-07, "loss": 0.4306, "step": 4025 }, { "epoch": 0.8958611481975968, "grad_norm": 1.672921333667626, "learning_rate": 2.8292593806240054e-07, "loss": 0.4207, "step": 4026 }, { "epoch": 0.8960836671117045, "grad_norm": 1.7004641335819903, "learning_rate": 2.8173216535706757e-07, "loss": 0.4148, "step": 4027 }, { "epoch": 0.8963061860258122, "grad_norm": 1.816388716608325, "learning_rate": 2.805408434557255e-07, "loss": 0.4344, "step": 4028 }, { "epoch": 0.8965287049399199, "grad_norm": 1.8163729332708418, "learning_rate": 2.793519729771793e-07, "loss": 0.4344, "step": 4029 }, { "epoch": 0.8967512238540276, "grad_norm": 1.7256608568520975, "learning_rate": 2.7816555453896386e-07, "loss": 0.4252, "step": 4030 }, { "epoch": 0.8969737427681352, "grad_norm": 1.6685644551518786, "learning_rate": 2.7698158875733907e-07, "loss": 0.4302, "step": 4031 }, { "epoch": 0.897196261682243, "grad_norm": 1.5972168382952754, "learning_rate": 2.758000762472901e-07, "loss": 0.444, "step": 4032 }, { "epoch": 0.8974187805963507, "grad_norm": 1.6652304677812177, "learning_rate": 2.7462101762253003e-07, "loss": 0.428, "step": 4033 }, { "epoch": 0.8976412995104583, "grad_norm": 1.588113160219852, "learning_rate": 2.734444134954933e-07, "loss": 0.4226, "step": 4034 }, { "epoch": 0.8978638184245661, "grad_norm": 1.754823143824238, "learning_rate": 2.7227026447734393e-07, "loss": 0.4207, "step": 4035 }, { "epoch": 0.8980863373386738, "grad_norm": 1.6910641678051102, "learning_rate": 2.710985711779679e-07, "loss": 0.4299, "step": 4036 }, { "epoch": 0.8983088562527815, "grad_norm": 1.6445937371904065, "learning_rate": 2.6992933420597666e-07, "loss": 0.4324, "step": 4037 }, { "epoch": 0.8985313751668892, "grad_norm": 1.7626245935367102, "learning_rate": 2.687625541687061e-07, "loss": 0.4358, "step": 4038 }, { "epoch": 0.8987538940809969, "grad_norm": 1.707004563072546, "learning_rate": 2.6759823167221363e-07, "loss": 0.426, "step": 4039 }, { "epoch": 0.8989764129951046, "grad_norm": 1.681857138315501, "learning_rate": 2.664363673212833e-07, "loss": 0.4315, "step": 4040 }, { "epoch": 0.8991989319092123, "grad_norm": 1.6615501712165226, "learning_rate": 2.652769617194212e-07, "loss": 0.4385, "step": 4041 }, { "epoch": 0.89942145082332, "grad_norm": 1.741306776894553, "learning_rate": 2.6412001546885334e-07, "loss": 0.4421, "step": 4042 }, { "epoch": 0.8996439697374277, "grad_norm": 1.6133400224505499, "learning_rate": 2.629655291705341e-07, "loss": 0.4361, "step": 4043 }, { "epoch": 0.8998664886515354, "grad_norm": 1.731724224227796, "learning_rate": 2.618135034241354e-07, "loss": 0.4391, "step": 4044 }, { "epoch": 0.900089007565643, "grad_norm": 1.7022541515885228, "learning_rate": 2.606639388280524e-07, "loss": 0.4116, "step": 4045 }, { "epoch": 0.9003115264797508, "grad_norm": 1.862928856489007, "learning_rate": 2.59516835979402e-07, "loss": 0.4465, "step": 4046 }, { "epoch": 0.9005340453938585, "grad_norm": 1.625623679499565, "learning_rate": 2.583721954740226e-07, "loss": 0.4278, "step": 4047 }, { "epoch": 0.9007565643079661, "grad_norm": 1.620866734091496, "learning_rate": 2.5723001790647464e-07, "loss": 0.4178, "step": 4048 }, { "epoch": 0.9009790832220739, "grad_norm": 1.7367469523988643, "learning_rate": 2.5609030387003466e-07, "loss": 0.4266, "step": 4049 }, { "epoch": 0.9012016021361816, "grad_norm": 1.8478445051265675, "learning_rate": 2.5495305395670635e-07, "loss": 0.4459, "step": 4050 }, { "epoch": 0.9014241210502892, "grad_norm": 1.7261034153949417, "learning_rate": 2.538182687572083e-07, "loss": 0.434, "step": 4051 }, { "epoch": 0.901646639964397, "grad_norm": 1.7273680284617479, "learning_rate": 2.5268594886098066e-07, "loss": 0.451, "step": 4052 }, { "epoch": 0.9018691588785047, "grad_norm": 1.7616202882270504, "learning_rate": 2.515560948561846e-07, "loss": 0.4287, "step": 4053 }, { "epoch": 0.9020916777926123, "grad_norm": 1.7088640969461868, "learning_rate": 2.504287073296957e-07, "loss": 0.4279, "step": 4054 }, { "epoch": 0.9023141967067201, "grad_norm": 1.6969974448257872, "learning_rate": 2.493037868671139e-07, "loss": 0.435, "step": 4055 }, { "epoch": 0.9025367156208278, "grad_norm": 1.7176029856731816, "learning_rate": 2.4818133405275444e-07, "loss": 0.4322, "step": 4056 }, { "epoch": 0.9027592345349355, "grad_norm": 1.6924289590847514, "learning_rate": 2.4706134946965167e-07, "loss": 0.4265, "step": 4057 }, { "epoch": 0.9029817534490432, "grad_norm": 1.822863871702891, "learning_rate": 2.4594383369955787e-07, "loss": 0.4374, "step": 4058 }, { "epoch": 0.9032042723631508, "grad_norm": 1.7010278475698999, "learning_rate": 2.448287873229427e-07, "loss": 0.4235, "step": 4059 }, { "epoch": 0.9034267912772586, "grad_norm": 1.8096778583711295, "learning_rate": 2.4371621091899335e-07, "loss": 0.4452, "step": 4060 }, { "epoch": 0.9036493101913663, "grad_norm": 1.7142476643699218, "learning_rate": 2.4260610506561364e-07, "loss": 0.4275, "step": 4061 }, { "epoch": 0.9038718291054739, "grad_norm": 1.8530837209695366, "learning_rate": 2.414984703394252e-07, "loss": 0.4355, "step": 4062 }, { "epoch": 0.9040943480195817, "grad_norm": 1.7481725365019976, "learning_rate": 2.403933073157655e-07, "loss": 0.4435, "step": 4063 }, { "epoch": 0.9043168669336894, "grad_norm": 1.9256774673081467, "learning_rate": 2.39290616568687e-07, "loss": 0.4386, "step": 4064 }, { "epoch": 0.904539385847797, "grad_norm": 1.5461310067769742, "learning_rate": 2.381903986709605e-07, "loss": 0.4122, "step": 4065 }, { "epoch": 0.9047619047619048, "grad_norm": 1.8005939171029939, "learning_rate": 2.370926541940688e-07, "loss": 0.4287, "step": 4066 }, { "epoch": 0.9049844236760125, "grad_norm": 1.712858704352338, "learning_rate": 2.359973837082119e-07, "loss": 0.4452, "step": 4067 }, { "epoch": 0.9052069425901201, "grad_norm": 1.7112072393780198, "learning_rate": 2.3490458778230752e-07, "loss": 0.44, "step": 4068 }, { "epoch": 0.9054294615042279, "grad_norm": 1.789656292584019, "learning_rate": 2.3381426698398324e-07, "loss": 0.4279, "step": 4069 }, { "epoch": 0.9056519804183356, "grad_norm": 1.820252771514541, "learning_rate": 2.3272642187958327e-07, "loss": 0.4524, "step": 4070 }, { "epoch": 0.9058744993324432, "grad_norm": 1.7367711893429796, "learning_rate": 2.316410530341656e-07, "loss": 0.4342, "step": 4071 }, { "epoch": 0.906097018246551, "grad_norm": 1.6545371194048046, "learning_rate": 2.3055816101150262e-07, "loss": 0.4152, "step": 4072 }, { "epoch": 0.9063195371606586, "grad_norm": 1.7302379674387458, "learning_rate": 2.2947774637407883e-07, "loss": 0.4289, "step": 4073 }, { "epoch": 0.9065420560747663, "grad_norm": 1.5881652531475008, "learning_rate": 2.283998096830925e-07, "loss": 0.4273, "step": 4074 }, { "epoch": 0.9067645749888741, "grad_norm": 1.6788025922338308, "learning_rate": 2.2732435149845687e-07, "loss": 0.4225, "step": 4075 }, { "epoch": 0.9069870939029817, "grad_norm": 1.7081362525369645, "learning_rate": 2.262513723787929e-07, "loss": 0.4251, "step": 4076 }, { "epoch": 0.9072096128170895, "grad_norm": 1.606231267533567, "learning_rate": 2.2518087288143918e-07, "loss": 0.4343, "step": 4077 }, { "epoch": 0.9074321317311972, "grad_norm": 1.7445944345499456, "learning_rate": 2.2411285356244318e-07, "loss": 0.4514, "step": 4078 }, { "epoch": 0.9076546506453048, "grad_norm": 1.7124438763440541, "learning_rate": 2.230473149765633e-07, "loss": 0.4299, "step": 4079 }, { "epoch": 0.9078771695594126, "grad_norm": 1.6710401301934836, "learning_rate": 2.2198425767727406e-07, "loss": 0.436, "step": 4080 }, { "epoch": 0.9080996884735203, "grad_norm": 1.641745328108422, "learning_rate": 2.2092368221675542e-07, "loss": 0.4144, "step": 4081 }, { "epoch": 0.9083222073876279, "grad_norm": 1.6584263998370252, "learning_rate": 2.1986558914590173e-07, "loss": 0.4349, "step": 4082 }, { "epoch": 0.9085447263017357, "grad_norm": 1.6526016951909182, "learning_rate": 2.1880997901431778e-07, "loss": 0.4446, "step": 4083 }, { "epoch": 0.9087672452158434, "grad_norm": 1.6873870545504568, "learning_rate": 2.1775685237031553e-07, "loss": 0.4251, "step": 4084 }, { "epoch": 0.908989764129951, "grad_norm": 1.7299709214770007, "learning_rate": 2.1670620976092127e-07, "loss": 0.4389, "step": 4085 }, { "epoch": 0.9092122830440588, "grad_norm": 1.8336713181732993, "learning_rate": 2.1565805173186792e-07, "loss": 0.4222, "step": 4086 }, { "epoch": 0.9094348019581664, "grad_norm": 1.5911829401291138, "learning_rate": 2.1461237882759945e-07, "loss": 0.4328, "step": 4087 }, { "epoch": 0.9096573208722741, "grad_norm": 1.7572550611677005, "learning_rate": 2.135691915912691e-07, "loss": 0.4339, "step": 4088 }, { "epoch": 0.9098798397863819, "grad_norm": 1.633129320529856, "learning_rate": 2.1252849056473678e-07, "loss": 0.4193, "step": 4089 }, { "epoch": 0.9101023587004895, "grad_norm": 1.6732573959116788, "learning_rate": 2.11490276288574e-07, "loss": 0.4353, "step": 4090 }, { "epoch": 0.9103248776145972, "grad_norm": 1.7919382173182608, "learning_rate": 2.1045454930205766e-07, "loss": 0.4214, "step": 4091 }, { "epoch": 0.910547396528705, "grad_norm": 1.7765615333331177, "learning_rate": 2.0942131014317469e-07, "loss": 0.4039, "step": 4092 }, { "epoch": 0.9107699154428126, "grad_norm": 1.756127016148622, "learning_rate": 2.0839055934861961e-07, "loss": 0.4411, "step": 4093 }, { "epoch": 0.9109924343569203, "grad_norm": 1.6835172416475046, "learning_rate": 2.0736229745379366e-07, "loss": 0.4243, "step": 4094 }, { "epoch": 0.9112149532710281, "grad_norm": 1.8589594439167287, "learning_rate": 2.063365249928062e-07, "loss": 0.4229, "step": 4095 }, { "epoch": 0.9114374721851357, "grad_norm": 1.629234075614157, "learning_rate": 2.0531324249847218e-07, "loss": 0.4406, "step": 4096 }, { "epoch": 0.9116599910992434, "grad_norm": 1.8148499818549477, "learning_rate": 2.0429245050231415e-07, "loss": 0.4391, "step": 4097 }, { "epoch": 0.9118825100133512, "grad_norm": 1.6303225390738112, "learning_rate": 2.032741495345608e-07, "loss": 0.4414, "step": 4098 }, { "epoch": 0.9121050289274588, "grad_norm": 1.6969209576122892, "learning_rate": 2.0225834012414737e-07, "loss": 0.4166, "step": 4099 }, { "epoch": 0.9123275478415666, "grad_norm": 1.8216418943260622, "learning_rate": 2.0124502279871504e-07, "loss": 0.4198, "step": 4100 }, { "epoch": 0.9125500667556742, "grad_norm": 1.585501036401397, "learning_rate": 2.0023419808460842e-07, "loss": 0.4256, "step": 4101 }, { "epoch": 0.9127725856697819, "grad_norm": 1.674999990756141, "learning_rate": 1.9922586650687913e-07, "loss": 0.4241, "step": 4102 }, { "epoch": 0.9129951045838897, "grad_norm": 1.7532932426159187, "learning_rate": 1.9822002858928546e-07, "loss": 0.4278, "step": 4103 }, { "epoch": 0.9132176234979973, "grad_norm": 1.7106124000475613, "learning_rate": 1.972166848542856e-07, "loss": 0.4417, "step": 4104 }, { "epoch": 0.913440142412105, "grad_norm": 1.705635278116949, "learning_rate": 1.962158358230476e-07, "loss": 0.4247, "step": 4105 }, { "epoch": 0.9136626613262128, "grad_norm": 1.6314053371079966, "learning_rate": 1.952174820154401e-07, "loss": 0.4443, "step": 4106 }, { "epoch": 0.9138851802403204, "grad_norm": 1.7285190951759606, "learning_rate": 1.9422162395003775e-07, "loss": 0.4261, "step": 4107 }, { "epoch": 0.9141076991544281, "grad_norm": 1.7041906303509164, "learning_rate": 1.9322826214411616e-07, "loss": 0.4277, "step": 4108 }, { "epoch": 0.9143302180685359, "grad_norm": 1.7094854594235567, "learning_rate": 1.9223739711365762e-07, "loss": 0.4315, "step": 4109 }, { "epoch": 0.9145527369826435, "grad_norm": 1.7709616601044975, "learning_rate": 1.9124902937334488e-07, "loss": 0.4193, "step": 4110 }, { "epoch": 0.9147752558967512, "grad_norm": 1.78688874943884, "learning_rate": 1.9026315943656502e-07, "loss": 0.4262, "step": 4111 }, { "epoch": 0.914997774810859, "grad_norm": 1.669406786683343, "learning_rate": 1.892797878154079e-07, "loss": 0.416, "step": 4112 }, { "epoch": 0.9152202937249666, "grad_norm": 1.6550976046481107, "learning_rate": 1.8829891502066379e-07, "loss": 0.4302, "step": 4113 }, { "epoch": 0.9154428126390743, "grad_norm": 1.6904395621607304, "learning_rate": 1.8732054156182622e-07, "loss": 0.428, "step": 4114 }, { "epoch": 0.915665331553182, "grad_norm": 1.6637239592250144, "learning_rate": 1.8634466794709205e-07, "loss": 0.4513, "step": 4115 }, { "epoch": 0.9158878504672897, "grad_norm": 1.7786205237202801, "learning_rate": 1.8537129468335636e-07, "loss": 0.4368, "step": 4116 }, { "epoch": 0.9161103693813974, "grad_norm": 1.6485126300810227, "learning_rate": 1.8440042227621856e-07, "loss": 0.4122, "step": 4117 }, { "epoch": 0.9163328882955051, "grad_norm": 1.6666932867869009, "learning_rate": 1.8343205122997643e-07, "loss": 0.4165, "step": 4118 }, { "epoch": 0.9165554072096128, "grad_norm": 1.734300672019743, "learning_rate": 1.8246618204763034e-07, "loss": 0.4254, "step": 4119 }, { "epoch": 0.9167779261237206, "grad_norm": 1.7533041545060677, "learning_rate": 1.8150281523088175e-07, "loss": 0.4275, "step": 4120 }, { "epoch": 0.9170004450378282, "grad_norm": 1.8132887589958977, "learning_rate": 1.8054195128012874e-07, "loss": 0.4228, "step": 4121 }, { "epoch": 0.9172229639519359, "grad_norm": 1.7846493815028621, "learning_rate": 1.7958359069447318e-07, "loss": 0.4384, "step": 4122 }, { "epoch": 0.9174454828660437, "grad_norm": 1.7608079730116666, "learning_rate": 1.7862773397171407e-07, "loss": 0.4261, "step": 4123 }, { "epoch": 0.9176680017801513, "grad_norm": 1.6477069960328934, "learning_rate": 1.7767438160835205e-07, "loss": 0.4343, "step": 4124 }, { "epoch": 0.917890520694259, "grad_norm": 1.7017329656998572, "learning_rate": 1.7672353409958597e-07, "loss": 0.4445, "step": 4125 }, { "epoch": 0.9181130396083668, "grad_norm": 1.7102629571726804, "learning_rate": 1.7577519193931248e-07, "loss": 0.4385, "step": 4126 }, { "epoch": 0.9183355585224744, "grad_norm": 1.737051938963801, "learning_rate": 1.7482935562012804e-07, "loss": 0.4062, "step": 4127 }, { "epoch": 0.9185580774365821, "grad_norm": 1.7983602491402073, "learning_rate": 1.7388602563332636e-07, "loss": 0.428, "step": 4128 }, { "epoch": 0.9187805963506898, "grad_norm": 1.7094488418793814, "learning_rate": 1.7294520246890046e-07, "loss": 0.4347, "step": 4129 }, { "epoch": 0.9190031152647975, "grad_norm": 1.6627564903221175, "learning_rate": 1.7200688661554276e-07, "loss": 0.4174, "step": 4130 }, { "epoch": 0.9192256341789052, "grad_norm": 1.6266592768898283, "learning_rate": 1.7107107856063954e-07, "loss": 0.4133, "step": 4131 }, { "epoch": 0.9194481530930129, "grad_norm": 1.7036338568354956, "learning_rate": 1.7013777879027803e-07, "loss": 0.4342, "step": 4132 }, { "epoch": 0.9196706720071206, "grad_norm": 1.8568646364589, "learning_rate": 1.6920698778923882e-07, "loss": 0.4542, "step": 4133 }, { "epoch": 0.9198931909212283, "grad_norm": 1.8064431792416948, "learning_rate": 1.6827870604100295e-07, "loss": 0.433, "step": 4134 }, { "epoch": 0.920115709835336, "grad_norm": 1.6771772142332655, "learning_rate": 1.673529340277469e-07, "loss": 0.4495, "step": 4135 }, { "epoch": 0.9203382287494437, "grad_norm": 1.7449679020897813, "learning_rate": 1.6642967223034213e-07, "loss": 0.455, "step": 4136 }, { "epoch": 0.9205607476635514, "grad_norm": 1.6894392241039808, "learning_rate": 1.6550892112835837e-07, "loss": 0.4447, "step": 4137 }, { "epoch": 0.9207832665776591, "grad_norm": 1.7463138508555718, "learning_rate": 1.645906812000597e-07, "loss": 0.4409, "step": 4138 }, { "epoch": 0.9210057854917668, "grad_norm": 1.7002712588700175, "learning_rate": 1.6367495292240686e-07, "loss": 0.4226, "step": 4139 }, { "epoch": 0.9212283044058746, "grad_norm": 1.7389733689534657, "learning_rate": 1.627617367710549e-07, "loss": 0.4199, "step": 4140 }, { "epoch": 0.9214508233199822, "grad_norm": 1.650049433515064, "learning_rate": 1.6185103322035435e-07, "loss": 0.4173, "step": 4141 }, { "epoch": 0.9216733422340899, "grad_norm": 1.753885425409723, "learning_rate": 1.6094284274335182e-07, "loss": 0.4398, "step": 4142 }, { "epoch": 0.9218958611481975, "grad_norm": 1.5727750643562954, "learning_rate": 1.600371658117861e-07, "loss": 0.4252, "step": 4143 }, { "epoch": 0.9221183800623053, "grad_norm": 1.5372887787839298, "learning_rate": 1.591340028960936e-07, "loss": 0.4265, "step": 4144 }, { "epoch": 0.922340898976413, "grad_norm": 2.1031078264608283, "learning_rate": 1.5823335446540188e-07, "loss": 0.4447, "step": 4145 }, { "epoch": 0.9225634178905207, "grad_norm": 1.8010215102059588, "learning_rate": 1.5733522098753396e-07, "loss": 0.4342, "step": 4146 }, { "epoch": 0.9227859368046284, "grad_norm": 1.7024697877717592, "learning_rate": 1.5643960292900607e-07, "loss": 0.4156, "step": 4147 }, { "epoch": 0.9230084557187361, "grad_norm": 1.7481665120280816, "learning_rate": 1.5554650075502775e-07, "loss": 0.4572, "step": 4148 }, { "epoch": 0.9232309746328438, "grad_norm": 1.7192018664729651, "learning_rate": 1.546559149295024e-07, "loss": 0.415, "step": 4149 }, { "epoch": 0.9234534935469515, "grad_norm": 1.6519094034451103, "learning_rate": 1.5376784591502658e-07, "loss": 0.4292, "step": 4150 }, { "epoch": 0.9236760124610592, "grad_norm": 1.6741634737884181, "learning_rate": 1.5288229417288746e-07, "loss": 0.4351, "step": 4151 }, { "epoch": 0.9238985313751669, "grad_norm": 1.7345627457766406, "learning_rate": 1.519992601630671e-07, "loss": 0.4414, "step": 4152 }, { "epoch": 0.9241210502892746, "grad_norm": 1.7275139073032766, "learning_rate": 1.5111874434423746e-07, "loss": 0.4472, "step": 4153 }, { "epoch": 0.9243435692033823, "grad_norm": 1.712124056523636, "learning_rate": 1.5024074717376601e-07, "loss": 0.4275, "step": 4154 }, { "epoch": 0.92456608811749, "grad_norm": 1.6646598970751596, "learning_rate": 1.4936526910770742e-07, "loss": 0.4018, "step": 4155 }, { "epoch": 0.9247886070315977, "grad_norm": 1.7171672252451076, "learning_rate": 1.4849231060081126e-07, "loss": 0.4083, "step": 4156 }, { "epoch": 0.9250111259457053, "grad_norm": 1.6266154855485315, "learning_rate": 1.4762187210651813e-07, "loss": 0.4075, "step": 4157 }, { "epoch": 0.9252336448598131, "grad_norm": 1.8147520357256604, "learning_rate": 1.4675395407695692e-07, "loss": 0.4607, "step": 4158 }, { "epoch": 0.9254561637739208, "grad_norm": 1.6142464443320508, "learning_rate": 1.4588855696295035e-07, "loss": 0.4374, "step": 4159 }, { "epoch": 0.9256786826880284, "grad_norm": 1.7128708618791255, "learning_rate": 1.4502568121400994e-07, "loss": 0.4264, "step": 4160 }, { "epoch": 0.9259012016021362, "grad_norm": 1.782421892510096, "learning_rate": 1.4416532727833888e-07, "loss": 0.4318, "step": 4161 }, { "epoch": 0.9261237205162439, "grad_norm": 1.7270083452715959, "learning_rate": 1.433074956028302e-07, "loss": 0.4229, "step": 4162 }, { "epoch": 0.9263462394303515, "grad_norm": 1.6686331307233115, "learning_rate": 1.424521866330647e-07, "loss": 0.4293, "step": 4163 }, { "epoch": 0.9265687583444593, "grad_norm": 1.6795697675466046, "learning_rate": 1.4159940081331536e-07, "loss": 0.4307, "step": 4164 }, { "epoch": 0.926791277258567, "grad_norm": 1.6592691447838142, "learning_rate": 1.407491385865445e-07, "loss": 0.4149, "step": 4165 }, { "epoch": 0.9270137961726747, "grad_norm": 1.7543566319120165, "learning_rate": 1.3990140039440104e-07, "loss": 0.4233, "step": 4166 }, { "epoch": 0.9272363150867824, "grad_norm": 1.7098041350500126, "learning_rate": 1.390561866772261e-07, "loss": 0.4266, "step": 4167 }, { "epoch": 0.9274588340008901, "grad_norm": 1.7915681578919458, "learning_rate": 1.382134978740468e-07, "loss": 0.4238, "step": 4168 }, { "epoch": 0.9276813529149978, "grad_norm": 1.7061101150040567, "learning_rate": 1.3737333442258084e-07, "loss": 0.4224, "step": 4169 }, { "epoch": 0.9279038718291055, "grad_norm": 1.671258739586644, "learning_rate": 1.3653569675923296e-07, "loss": 0.4086, "step": 4170 }, { "epoch": 0.9281263907432131, "grad_norm": 1.662283524218355, "learning_rate": 1.357005853190957e-07, "loss": 0.4409, "step": 4171 }, { "epoch": 0.9283489096573209, "grad_norm": 1.8326193483853748, "learning_rate": 1.3486800053595095e-07, "loss": 0.4315, "step": 4172 }, { "epoch": 0.9285714285714286, "grad_norm": 1.7175649744305557, "learning_rate": 1.340379428422661e-07, "loss": 0.4105, "step": 4173 }, { "epoch": 0.9287939474855362, "grad_norm": 1.7057717008859468, "learning_rate": 1.3321041266919854e-07, "loss": 0.4272, "step": 4174 }, { "epoch": 0.929016466399644, "grad_norm": 1.6036727334166792, "learning_rate": 1.3238541044658992e-07, "loss": 0.4248, "step": 4175 }, { "epoch": 0.9292389853137517, "grad_norm": 1.658969599228512, "learning_rate": 1.3156293660297025e-07, "loss": 0.4416, "step": 4176 }, { "epoch": 0.9294615042278593, "grad_norm": 1.7056961483414776, "learning_rate": 1.307429915655567e-07, "loss": 0.424, "step": 4177 }, { "epoch": 0.9296840231419671, "grad_norm": 1.6316198333181704, "learning_rate": 1.2992557576025078e-07, "loss": 0.4329, "step": 4178 }, { "epoch": 0.9299065420560748, "grad_norm": 1.6873649713495558, "learning_rate": 1.2911068961164454e-07, "loss": 0.4476, "step": 4179 }, { "epoch": 0.9301290609701824, "grad_norm": 1.5982430062353463, "learning_rate": 1.2829833354301047e-07, "loss": 0.4318, "step": 4180 }, { "epoch": 0.9303515798842902, "grad_norm": 1.5958373975132745, "learning_rate": 1.2748850797631164e-07, "loss": 0.4062, "step": 4181 }, { "epoch": 0.9305740987983978, "grad_norm": 1.6718648543166126, "learning_rate": 1.2668121333219375e-07, "loss": 0.4256, "step": 4182 }, { "epoch": 0.9307966177125055, "grad_norm": 1.9112013864007584, "learning_rate": 1.2587645002998862e-07, "loss": 0.4151, "step": 4183 }, { "epoch": 0.9310191366266133, "grad_norm": 1.6588045254297246, "learning_rate": 1.2507421848771405e-07, "loss": 0.4221, "step": 4184 }, { "epoch": 0.9312416555407209, "grad_norm": 2.00333106271936, "learning_rate": 1.2427451912207235e-07, "loss": 0.4405, "step": 4185 }, { "epoch": 0.9314641744548287, "grad_norm": 1.662147669468832, "learning_rate": 1.234773523484495e-07, "loss": 0.4192, "step": 4186 }, { "epoch": 0.9316866933689364, "grad_norm": 1.7537291352054354, "learning_rate": 1.2268271858091817e-07, "loss": 0.4286, "step": 4187 }, { "epoch": 0.931909212283044, "grad_norm": 1.7550718551555944, "learning_rate": 1.2189061823223214e-07, "loss": 0.4129, "step": 4188 }, { "epoch": 0.9321317311971518, "grad_norm": 1.7540477717682523, "learning_rate": 1.2110105171383336e-07, "loss": 0.4359, "step": 4189 }, { "epoch": 0.9323542501112595, "grad_norm": 1.7077801716899053, "learning_rate": 1.2031401943584265e-07, "loss": 0.4304, "step": 4190 }, { "epoch": 0.9325767690253671, "grad_norm": 1.7621539896743625, "learning_rate": 1.1952952180706966e-07, "loss": 0.4294, "step": 4191 }, { "epoch": 0.9327992879394749, "grad_norm": 1.9067338026356746, "learning_rate": 1.1874755923500402e-07, "loss": 0.4403, "step": 4192 }, { "epoch": 0.9330218068535826, "grad_norm": 1.7351796154886792, "learning_rate": 1.1796813212581971e-07, "loss": 0.446, "step": 4193 }, { "epoch": 0.9332443257676902, "grad_norm": 1.6760776996786328, "learning_rate": 1.1719124088437395e-07, "loss": 0.4411, "step": 4194 }, { "epoch": 0.933466844681798, "grad_norm": 1.6571478984826356, "learning_rate": 1.1641688591420508e-07, "loss": 0.4311, "step": 4195 }, { "epoch": 0.9336893635959056, "grad_norm": 1.6275438455274247, "learning_rate": 1.156450676175369e-07, "loss": 0.4253, "step": 4196 }, { "epoch": 0.9339118825100133, "grad_norm": 1.7027463825637719, "learning_rate": 1.1487578639527264e-07, "loss": 0.4315, "step": 4197 }, { "epoch": 0.9341344014241211, "grad_norm": 1.884957184232068, "learning_rate": 1.141090426470004e-07, "loss": 0.4252, "step": 4198 }, { "epoch": 0.9343569203382287, "grad_norm": 1.675121596704788, "learning_rate": 1.1334483677098829e-07, "loss": 0.4361, "step": 4199 }, { "epoch": 0.9345794392523364, "grad_norm": 1.6868978031325261, "learning_rate": 1.1258316916418655e-07, "loss": 0.4523, "step": 4200 }, { "epoch": 0.9348019581664442, "grad_norm": 1.7727220471723413, "learning_rate": 1.1182404022222759e-07, "loss": 0.4205, "step": 4201 }, { "epoch": 0.9350244770805518, "grad_norm": 1.7040556768659478, "learning_rate": 1.110674503394249e-07, "loss": 0.4078, "step": 4202 }, { "epoch": 0.9352469959946595, "grad_norm": 1.768764055776404, "learning_rate": 1.1031339990877243e-07, "loss": 0.4307, "step": 4203 }, { "epoch": 0.9354695149087673, "grad_norm": 1.701150526768216, "learning_rate": 1.0956188932194689e-07, "loss": 0.4095, "step": 4204 }, { "epoch": 0.9356920338228749, "grad_norm": 1.762841202101142, "learning_rate": 1.0881291896930324e-07, "loss": 0.4474, "step": 4205 }, { "epoch": 0.9359145527369827, "grad_norm": 1.7932561999211372, "learning_rate": 1.0806648923987862e-07, "loss": 0.4328, "step": 4206 }, { "epoch": 0.9361370716510904, "grad_norm": 1.7169898394250798, "learning_rate": 1.0732260052139065e-07, "loss": 0.434, "step": 4207 }, { "epoch": 0.936359590565198, "grad_norm": 1.7400854415539786, "learning_rate": 1.0658125320023582e-07, "loss": 0.4125, "step": 4208 }, { "epoch": 0.9365821094793058, "grad_norm": 1.642071702033638, "learning_rate": 1.058424476614911e-07, "loss": 0.4225, "step": 4209 }, { "epoch": 0.9368046283934134, "grad_norm": 1.6119018862144678, "learning_rate": 1.0510618428891395e-07, "loss": 0.4411, "step": 4210 }, { "epoch": 0.9370271473075211, "grad_norm": 1.6100781204012165, "learning_rate": 1.0437246346494012e-07, "loss": 0.4194, "step": 4211 }, { "epoch": 0.9372496662216289, "grad_norm": 1.7854597310735556, "learning_rate": 1.0364128557068642e-07, "loss": 0.4048, "step": 4212 }, { "epoch": 0.9374721851357365, "grad_norm": 1.7206695204664015, "learning_rate": 1.0291265098594628e-07, "loss": 0.4329, "step": 4213 }, { "epoch": 0.9376947040498442, "grad_norm": 1.8440728660416754, "learning_rate": 1.0218656008919469e-07, "loss": 0.4366, "step": 4214 }, { "epoch": 0.937917222963952, "grad_norm": 1.654929889983013, "learning_rate": 1.0146301325758279e-07, "loss": 0.4258, "step": 4215 }, { "epoch": 0.9381397418780596, "grad_norm": 1.774282305349783, "learning_rate": 1.0074201086694324e-07, "loss": 0.4265, "step": 4216 }, { "epoch": 0.9383622607921673, "grad_norm": 1.8165260760236122, "learning_rate": 1.000235532917837e-07, "loss": 0.4469, "step": 4217 }, { "epoch": 0.9385847797062751, "grad_norm": 1.6277618168339476, "learning_rate": 9.930764090529288e-08, "loss": 0.4291, "step": 4218 }, { "epoch": 0.9388072986203827, "grad_norm": 1.6652667976116153, "learning_rate": 9.859427407933609e-08, "loss": 0.4229, "step": 4219 }, { "epoch": 0.9390298175344904, "grad_norm": 1.7006697461991747, "learning_rate": 9.788345318445636e-08, "loss": 0.4233, "step": 4220 }, { "epoch": 0.9392523364485982, "grad_norm": 1.6704669379433048, "learning_rate": 9.717517858987446e-08, "loss": 0.4097, "step": 4221 }, { "epoch": 0.9394748553627058, "grad_norm": 1.8287453711594184, "learning_rate": 9.646945066348834e-08, "loss": 0.4386, "step": 4222 }, { "epoch": 0.9396973742768135, "grad_norm": 1.660867684687729, "learning_rate": 9.57662697718742e-08, "loss": 0.414, "step": 4223 }, { "epoch": 0.9399198931909212, "grad_norm": 1.7450593450598815, "learning_rate": 9.506563628028376e-08, "loss": 0.4371, "step": 4224 }, { "epoch": 0.9401424121050289, "grad_norm": 1.6787400781416122, "learning_rate": 9.436755055264646e-08, "loss": 0.4297, "step": 4225 }, { "epoch": 0.9403649310191367, "grad_norm": 1.5794021163449796, "learning_rate": 9.367201295156725e-08, "loss": 0.412, "step": 4226 }, { "epoch": 0.9405874499332443, "grad_norm": 1.7853683729719687, "learning_rate": 9.29790238383299e-08, "loss": 0.4567, "step": 4227 }, { "epoch": 0.940809968847352, "grad_norm": 1.7962345126082262, "learning_rate": 9.22885835728915e-08, "loss": 0.4237, "step": 4228 }, { "epoch": 0.9410324877614598, "grad_norm": 1.7750622428630771, "learning_rate": 9.160069251388792e-08, "loss": 0.4178, "step": 4229 }, { "epoch": 0.9412550066755674, "grad_norm": 1.7072243756341017, "learning_rate": 9.091535101862837e-08, "loss": 0.422, "step": 4230 }, { "epoch": 0.9414775255896751, "grad_norm": 1.6964904131740612, "learning_rate": 9.023255944309972e-08, "loss": 0.4364, "step": 4231 }, { "epoch": 0.9417000445037829, "grad_norm": 1.7586352974544384, "learning_rate": 8.955231814196274e-08, "loss": 0.4169, "step": 4232 }, { "epoch": 0.9419225634178905, "grad_norm": 1.8000940980271416, "learning_rate": 8.88746274685548e-08, "loss": 0.443, "step": 4233 }, { "epoch": 0.9421450823319982, "grad_norm": 1.609578685574315, "learning_rate": 8.819948777488819e-08, "loss": 0.4121, "step": 4234 }, { "epoch": 0.942367601246106, "grad_norm": 1.594156282254004, "learning_rate": 8.75268994116496e-08, "loss": 0.4151, "step": 4235 }, { "epoch": 0.9425901201602136, "grad_norm": 1.773962976444424, "learning_rate": 8.685686272820071e-08, "loss": 0.4223, "step": 4236 }, { "epoch": 0.9428126390743213, "grad_norm": 1.7709888685414303, "learning_rate": 8.618937807257754e-08, "loss": 0.4213, "step": 4237 }, { "epoch": 0.943035157988429, "grad_norm": 1.6904380130652292, "learning_rate": 8.552444579149167e-08, "loss": 0.4076, "step": 4238 }, { "epoch": 0.9432576769025367, "grad_norm": 1.769583768063895, "learning_rate": 8.486206623032734e-08, "loss": 0.4565, "step": 4239 }, { "epoch": 0.9434801958166444, "grad_norm": 1.6775451043731593, "learning_rate": 8.420223973314324e-08, "loss": 0.4213, "step": 4240 }, { "epoch": 0.9437027147307521, "grad_norm": 1.6143723523778355, "learning_rate": 8.354496664267354e-08, "loss": 0.4402, "step": 4241 }, { "epoch": 0.9439252336448598, "grad_norm": 1.813346314487885, "learning_rate": 8.289024730032346e-08, "loss": 0.4215, "step": 4242 }, { "epoch": 0.9441477525589675, "grad_norm": 1.632425317098712, "learning_rate": 8.223808204617378e-08, "loss": 0.4201, "step": 4243 }, { "epoch": 0.9443702714730752, "grad_norm": 1.628845310793368, "learning_rate": 8.158847121897795e-08, "loss": 0.4457, "step": 4244 }, { "epoch": 0.9445927903871829, "grad_norm": 1.6337164861551607, "learning_rate": 8.094141515616161e-08, "loss": 0.4245, "step": 4245 }, { "epoch": 0.9448153093012907, "grad_norm": 1.688160269252649, "learning_rate": 8.029691419382534e-08, "loss": 0.437, "step": 4246 }, { "epoch": 0.9450378282153983, "grad_norm": 1.6121315306314952, "learning_rate": 7.965496866674083e-08, "loss": 0.4075, "step": 4247 }, { "epoch": 0.945260347129506, "grad_norm": 1.7226944411689913, "learning_rate": 7.901557890835299e-08, "loss": 0.4045, "step": 4248 }, { "epoch": 0.9454828660436138, "grad_norm": 1.6429876510237995, "learning_rate": 7.837874525078004e-08, "loss": 0.4204, "step": 4249 }, { "epoch": 0.9457053849577214, "grad_norm": 1.7021435476873352, "learning_rate": 7.774446802481128e-08, "loss": 0.4374, "step": 4250 }, { "epoch": 0.9459279038718291, "grad_norm": 1.7439735597035384, "learning_rate": 7.711274755990816e-08, "loss": 0.4425, "step": 4251 }, { "epoch": 0.9461504227859368, "grad_norm": 1.7981638402460876, "learning_rate": 7.648358418420432e-08, "loss": 0.4252, "step": 4252 }, { "epoch": 0.9463729417000445, "grad_norm": 1.7105230253280566, "learning_rate": 7.585697822450611e-08, "loss": 0.4367, "step": 4253 }, { "epoch": 0.9465954606141522, "grad_norm": 1.8118684877774422, "learning_rate": 7.523293000629039e-08, "loss": 0.4225, "step": 4254 }, { "epoch": 0.9468179795282599, "grad_norm": 1.8348868050287546, "learning_rate": 7.461143985370567e-08, "loss": 0.4209, "step": 4255 }, { "epoch": 0.9470404984423676, "grad_norm": 1.6295989155969426, "learning_rate": 7.399250808957204e-08, "loss": 0.4282, "step": 4256 }, { "epoch": 0.9472630173564753, "grad_norm": 1.739406395401871, "learning_rate": 7.337613503537954e-08, "loss": 0.4385, "step": 4257 }, { "epoch": 0.947485536270583, "grad_norm": 1.7149248541089928, "learning_rate": 7.276232101129099e-08, "loss": 0.4336, "step": 4258 }, { "epoch": 0.9477080551846907, "grad_norm": 1.7893327280854794, "learning_rate": 7.215106633613855e-08, "loss": 0.4158, "step": 4259 }, { "epoch": 0.9479305740987984, "grad_norm": 1.8348743333295252, "learning_rate": 7.154237132742603e-08, "loss": 0.4232, "step": 4260 }, { "epoch": 0.9481530930129061, "grad_norm": 1.8057928335577809, "learning_rate": 7.093623630132663e-08, "loss": 0.4292, "step": 4261 }, { "epoch": 0.9483756119270138, "grad_norm": 1.628122632849605, "learning_rate": 7.033266157268459e-08, "loss": 0.423, "step": 4262 }, { "epoch": 0.9485981308411215, "grad_norm": 1.8233863632599328, "learning_rate": 6.97316474550136e-08, "loss": 0.4188, "step": 4263 }, { "epoch": 0.9488206497552292, "grad_norm": 1.700854479104017, "learning_rate": 6.913319426049836e-08, "loss": 0.4279, "step": 4264 }, { "epoch": 0.9490431686693369, "grad_norm": 1.5805152357386256, "learning_rate": 6.85373022999919e-08, "loss": 0.4231, "step": 4265 }, { "epoch": 0.9492656875834445, "grad_norm": 1.793183453895316, "learning_rate": 6.794397188301827e-08, "loss": 0.4405, "step": 4266 }, { "epoch": 0.9494882064975523, "grad_norm": 1.6767010255694301, "learning_rate": 6.735320331776984e-08, "loss": 0.417, "step": 4267 }, { "epoch": 0.94971072541166, "grad_norm": 1.6459572928403667, "learning_rate": 6.676499691110894e-08, "loss": 0.4307, "step": 4268 }, { "epoch": 0.9499332443257676, "grad_norm": 1.6152870066547118, "learning_rate": 6.617935296856781e-08, "loss": 0.4356, "step": 4269 }, { "epoch": 0.9501557632398754, "grad_norm": 1.9805356351955843, "learning_rate": 6.55962717943448e-08, "loss": 0.4223, "step": 4270 }, { "epoch": 0.9503782821539831, "grad_norm": 1.7067331231837117, "learning_rate": 6.501575369131041e-08, "loss": 0.4244, "step": 4271 }, { "epoch": 0.9506008010680908, "grad_norm": 1.8252318245194061, "learning_rate": 6.443779896100233e-08, "loss": 0.4344, "step": 4272 }, { "epoch": 0.9508233199821985, "grad_norm": 1.7438086800682326, "learning_rate": 6.386240790362708e-08, "loss": 0.4434, "step": 4273 }, { "epoch": 0.9510458388963062, "grad_norm": 1.8245575084680652, "learning_rate": 6.328958081805892e-08, "loss": 0.4273, "step": 4274 }, { "epoch": 0.9512683578104139, "grad_norm": 1.758502986703524, "learning_rate": 6.271931800184039e-08, "loss": 0.4182, "step": 4275 }, { "epoch": 0.9514908767245216, "grad_norm": 1.7033647995439096, "learning_rate": 6.215161975118289e-08, "loss": 0.4262, "step": 4276 }, { "epoch": 0.9517133956386293, "grad_norm": 1.7549165358948962, "learning_rate": 6.158648636096442e-08, "loss": 0.4443, "step": 4277 }, { "epoch": 0.951935914552737, "grad_norm": 1.7026282610241392, "learning_rate": 6.102391812473296e-08, "loss": 0.4253, "step": 4278 }, { "epoch": 0.9521584334668447, "grad_norm": 1.7163654414165166, "learning_rate": 6.046391533470142e-08, "loss": 0.4253, "step": 4279 }, { "epoch": 0.9523809523809523, "grad_norm": 1.7202879494149488, "learning_rate": 5.990647828175211e-08, "loss": 0.432, "step": 4280 }, { "epoch": 0.9526034712950601, "grad_norm": 1.7595589640645846, "learning_rate": 5.935160725543343e-08, "loss": 0.4364, "step": 4281 }, { "epoch": 0.9528259902091678, "grad_norm": 1.5888985448336148, "learning_rate": 5.879930254396149e-08, "loss": 0.4208, "step": 4282 }, { "epoch": 0.9530485091232754, "grad_norm": 1.7015943737593602, "learning_rate": 5.824956443421903e-08, "loss": 0.4355, "step": 4283 }, { "epoch": 0.9532710280373832, "grad_norm": 1.6194275638554587, "learning_rate": 5.7702393211755966e-08, "loss": 0.4245, "step": 4284 }, { "epoch": 0.9534935469514909, "grad_norm": 1.6521144091033035, "learning_rate": 5.715778916078885e-08, "loss": 0.4289, "step": 4285 }, { "epoch": 0.9537160658655985, "grad_norm": 1.7582250972432605, "learning_rate": 5.661575256420082e-08, "loss": 0.4283, "step": 4286 }, { "epoch": 0.9539385847797063, "grad_norm": 1.896837459684806, "learning_rate": 5.6076283703541125e-08, "loss": 0.4351, "step": 4287 }, { "epoch": 0.954161103693814, "grad_norm": 1.7292992356670025, "learning_rate": 5.553938285902505e-08, "loss": 0.4155, "step": 4288 }, { "epoch": 0.9543836226079216, "grad_norm": 1.6268510130529399, "learning_rate": 5.500505030953451e-08, "loss": 0.4073, "step": 4289 }, { "epoch": 0.9546061415220294, "grad_norm": 1.6217356040641466, "learning_rate": 5.44732863326175e-08, "loss": 0.4289, "step": 4290 }, { "epoch": 0.9548286604361371, "grad_norm": 1.7021510606685886, "learning_rate": 5.394409120448807e-08, "loss": 0.4395, "step": 4291 }, { "epoch": 0.9550511793502447, "grad_norm": 1.5937485379014482, "learning_rate": 5.3417465200023555e-08, "loss": 0.4268, "step": 4292 }, { "epoch": 0.9552736982643525, "grad_norm": 1.6079024362001788, "learning_rate": 5.28934085927707e-08, "loss": 0.4227, "step": 4293 }, { "epoch": 0.9554962171784601, "grad_norm": 1.5951088999030785, "learning_rate": 5.237192165493843e-08, "loss": 0.4329, "step": 4294 }, { "epoch": 0.9557187360925679, "grad_norm": 1.7516623977429404, "learning_rate": 5.185300465740117e-08, "loss": 0.4288, "step": 4295 }, { "epoch": 0.9559412550066756, "grad_norm": 1.7752668531460514, "learning_rate": 5.133665786970166e-08, "loss": 0.4054, "step": 4296 }, { "epoch": 0.9561637739207832, "grad_norm": 1.6860461768741626, "learning_rate": 5.082288156004367e-08, "loss": 0.4243, "step": 4297 }, { "epoch": 0.956386292834891, "grad_norm": 1.646853474955708, "learning_rate": 5.031167599529763e-08, "loss": 0.4383, "step": 4298 }, { "epoch": 0.9566088117489987, "grad_norm": 1.8341801324937155, "learning_rate": 4.98030414409989e-08, "loss": 0.4381, "step": 4299 }, { "epoch": 0.9568313306631063, "grad_norm": 1.6287480097513614, "learning_rate": 4.929697816134615e-08, "loss": 0.4376, "step": 4300 }, { "epoch": 0.9570538495772141, "grad_norm": 1.7473797333742467, "learning_rate": 4.879348641920356e-08, "loss": 0.4167, "step": 4301 }, { "epoch": 0.9572763684913218, "grad_norm": 1.6205744120326113, "learning_rate": 4.829256647609914e-08, "loss": 0.4331, "step": 4302 }, { "epoch": 0.9574988874054294, "grad_norm": 1.7961461112441173, "learning_rate": 4.779421859222533e-08, "loss": 0.4198, "step": 4303 }, { "epoch": 0.9577214063195372, "grad_norm": 1.7617101952534662, "learning_rate": 4.7298443026438377e-08, "loss": 0.4351, "step": 4304 }, { "epoch": 0.9579439252336449, "grad_norm": 1.801790977094094, "learning_rate": 4.680524003625786e-08, "loss": 0.4251, "step": 4305 }, { "epoch": 0.9581664441477525, "grad_norm": 1.7349440513245116, "learning_rate": 4.6314609877868843e-08, "loss": 0.4157, "step": 4306 }, { "epoch": 0.9583889630618603, "grad_norm": 1.747785529102804, "learning_rate": 4.582655280611692e-08, "loss": 0.4246, "step": 4307 }, { "epoch": 0.9586114819759679, "grad_norm": 1.8413064550709073, "learning_rate": 4.5341069074514297e-08, "loss": 0.432, "step": 4308 }, { "epoch": 0.9588340008900756, "grad_norm": 1.8874703655146423, "learning_rate": 4.4858158935234264e-08, "loss": 0.4348, "step": 4309 }, { "epoch": 0.9590565198041834, "grad_norm": 1.8081995783912597, "learning_rate": 4.437782263911505e-08, "loss": 0.4288, "step": 4310 }, { "epoch": 0.959279038718291, "grad_norm": 1.7045494647707216, "learning_rate": 4.390006043565764e-08, "loss": 0.4353, "step": 4311 }, { "epoch": 0.9595015576323987, "grad_norm": 1.6471950162561504, "learning_rate": 4.3424872573023525e-08, "loss": 0.4096, "step": 4312 }, { "epoch": 0.9597240765465065, "grad_norm": 1.611783800573025, "learning_rate": 4.295225929804081e-08, "loss": 0.4241, "step": 4313 }, { "epoch": 0.9599465954606141, "grad_norm": 1.63068422300231, "learning_rate": 4.2482220856197023e-08, "loss": 0.4379, "step": 4314 }, { "epoch": 0.9601691143747219, "grad_norm": 1.6939931464285527, "learning_rate": 4.201475749164463e-08, "loss": 0.4448, "step": 4315 }, { "epoch": 0.9603916332888296, "grad_norm": 1.6016159719683563, "learning_rate": 4.154986944719774e-08, "loss": 0.4325, "step": 4316 }, { "epoch": 0.9606141522029372, "grad_norm": 1.7882157123764706, "learning_rate": 4.1087556964331533e-08, "loss": 0.4335, "step": 4317 }, { "epoch": 0.960836671117045, "grad_norm": 1.741491770906615, "learning_rate": 4.062782028318502e-08, "loss": 0.4374, "step": 4318 }, { "epoch": 0.9610591900311527, "grad_norm": 1.7932857818179648, "learning_rate": 4.017065964255884e-08, "loss": 0.4453, "step": 4319 }, { "epoch": 0.9612817089452603, "grad_norm": 1.732903352511545, "learning_rate": 3.971607527991472e-08, "loss": 0.4253, "step": 4320 }, { "epoch": 0.9615042278593681, "grad_norm": 1.6204391673811698, "learning_rate": 3.9264067431377116e-08, "loss": 0.4211, "step": 4321 }, { "epoch": 0.9617267467734757, "grad_norm": 1.7488851156562022, "learning_rate": 3.8814636331732106e-08, "loss": 0.4308, "step": 4322 }, { "epoch": 0.9619492656875834, "grad_norm": 1.7291890851161293, "learning_rate": 3.836778221442738e-08, "loss": 0.4335, "step": 4323 }, { "epoch": 0.9621717846016912, "grad_norm": 1.7406714055426513, "learning_rate": 3.7923505311571184e-08, "loss": 0.4272, "step": 4324 }, { "epoch": 0.9623943035157988, "grad_norm": 1.723553898770182, "learning_rate": 3.748180585393391e-08, "loss": 0.4178, "step": 4325 }, { "epoch": 0.9626168224299065, "grad_norm": 1.7001263740225068, "learning_rate": 3.7042684070947574e-08, "loss": 0.4472, "step": 4326 }, { "epoch": 0.9628393413440143, "grad_norm": 1.6162171034784756, "learning_rate": 3.6606140190703633e-08, "loss": 0.4246, "step": 4327 }, { "epoch": 0.9630618602581219, "grad_norm": 1.599885202876792, "learning_rate": 3.617217443995624e-08, "loss": 0.412, "step": 4328 }, { "epoch": 0.9632843791722296, "grad_norm": 1.7221690616928398, "learning_rate": 3.574078704411954e-08, "loss": 0.4279, "step": 4329 }, { "epoch": 0.9635068980863374, "grad_norm": 1.6669256525806848, "learning_rate": 3.5311978227268176e-08, "loss": 0.4182, "step": 4330 }, { "epoch": 0.963729417000445, "grad_norm": 1.6100609993321948, "learning_rate": 3.488574821213897e-08, "loss": 0.4284, "step": 4331 }, { "epoch": 0.9639519359145527, "grad_norm": 1.6424797789922654, "learning_rate": 3.4462097220125945e-08, "loss": 0.4131, "step": 4332 }, { "epoch": 0.9641744548286605, "grad_norm": 1.6569815929814118, "learning_rate": 3.4041025471287515e-08, "loss": 0.4134, "step": 4333 }, { "epoch": 0.9643969737427681, "grad_norm": 1.818283855189704, "learning_rate": 3.3622533184339836e-08, "loss": 0.4364, "step": 4334 }, { "epoch": 0.9646194926568759, "grad_norm": 1.7204592856713752, "learning_rate": 3.320662057665958e-08, "loss": 0.4132, "step": 4335 }, { "epoch": 0.9648420115709835, "grad_norm": 1.7210141945706945, "learning_rate": 3.279328786428393e-08, "loss": 0.4391, "step": 4336 }, { "epoch": 0.9650645304850912, "grad_norm": 1.8591238589343, "learning_rate": 3.238253526191004e-08, "loss": 0.4258, "step": 4337 }, { "epoch": 0.965287049399199, "grad_norm": 1.7216013943066593, "learning_rate": 3.197436298289392e-08, "loss": 0.4185, "step": 4338 }, { "epoch": 0.9655095683133066, "grad_norm": 1.7718319757584127, "learning_rate": 3.1568771239252615e-08, "loss": 0.4318, "step": 4339 }, { "epoch": 0.9657320872274143, "grad_norm": 1.675226202384736, "learning_rate": 3.1165760241662066e-08, "loss": 0.4205, "step": 4340 }, { "epoch": 0.9659546061415221, "grad_norm": 1.6387140187748408, "learning_rate": 3.07653301994576e-08, "loss": 0.4181, "step": 4341 }, { "epoch": 0.9661771250556297, "grad_norm": 1.6939606006208352, "learning_rate": 3.036748132063394e-08, "loss": 0.4247, "step": 4342 }, { "epoch": 0.9663996439697374, "grad_norm": 1.666973278793632, "learning_rate": 2.9972213811845786e-08, "loss": 0.4473, "step": 4343 }, { "epoch": 0.9666221628838452, "grad_norm": 1.7931285340713834, "learning_rate": 2.9579527878405568e-08, "loss": 0.4194, "step": 4344 }, { "epoch": 0.9668446817979528, "grad_norm": 1.7337526954753142, "learning_rate": 2.9189423724286792e-08, "loss": 0.433, "step": 4345 }, { "epoch": 0.9670672007120605, "grad_norm": 1.7064704853385744, "learning_rate": 2.880190155212015e-08, "loss": 0.4341, "step": 4346 }, { "epoch": 0.9672897196261683, "grad_norm": 1.7136249625140187, "learning_rate": 2.8416961563195178e-08, "loss": 0.4289, "step": 4347 }, { "epoch": 0.9675122385402759, "grad_norm": 1.716637169321672, "learning_rate": 2.8034603957461938e-08, "loss": 0.4193, "step": 4348 }, { "epoch": 0.9677347574543836, "grad_norm": 1.6431543483266102, "learning_rate": 2.7654828933527667e-08, "loss": 0.428, "step": 4349 }, { "epoch": 0.9679572763684913, "grad_norm": 1.6868302477230193, "learning_rate": 2.7277636688657904e-08, "loss": 0.4196, "step": 4350 }, { "epoch": 0.968179795282599, "grad_norm": 1.7415165184059662, "learning_rate": 2.6903027418777038e-08, "loss": 0.4362, "step": 4351 }, { "epoch": 0.9684023141967067, "grad_norm": 1.7045847075366871, "learning_rate": 2.6531001318468862e-08, "loss": 0.4155, "step": 4352 }, { "epoch": 0.9686248331108144, "grad_norm": 1.7894498718395389, "learning_rate": 2.61615585809738e-08, "loss": 0.4313, "step": 4353 }, { "epoch": 0.9688473520249221, "grad_norm": 1.6500265076891305, "learning_rate": 2.5794699398191125e-08, "loss": 0.4395, "step": 4354 }, { "epoch": 0.9690698709390299, "grad_norm": 1.8608018660258476, "learning_rate": 2.543042396067785e-08, "loss": 0.4462, "step": 4355 }, { "epoch": 0.9692923898531375, "grad_norm": 1.6449090916760516, "learning_rate": 2.5068732457649292e-08, "loss": 0.4286, "step": 4356 }, { "epoch": 0.9695149087672452, "grad_norm": 1.7965364330085871, "learning_rate": 2.4709625076978494e-08, "loss": 0.435, "step": 4357 }, { "epoch": 0.969737427681353, "grad_norm": 1.7849512699866135, "learning_rate": 2.435310200519625e-08, "loss": 0.4143, "step": 4358 }, { "epoch": 0.9699599465954606, "grad_norm": 1.7346690984462754, "learning_rate": 2.3999163427490535e-08, "loss": 0.4258, "step": 4359 }, { "epoch": 0.9701824655095683, "grad_norm": 1.7646522652301764, "learning_rate": 2.364780952770762e-08, "loss": 0.4314, "step": 4360 }, { "epoch": 0.9704049844236761, "grad_norm": 1.7020202853480006, "learning_rate": 2.3299040488350412e-08, "loss": 0.4267, "step": 4361 }, { "epoch": 0.9706275033377837, "grad_norm": 1.6213056226759774, "learning_rate": 2.2952856490579544e-08, "loss": 0.4214, "step": 4362 }, { "epoch": 0.9708500222518914, "grad_norm": 1.6867111299036852, "learning_rate": 2.2609257714213407e-08, "loss": 0.41, "step": 4363 }, { "epoch": 0.9710725411659991, "grad_norm": 1.683317434299679, "learning_rate": 2.2268244337727008e-08, "loss": 0.422, "step": 4364 }, { "epoch": 0.9712950600801068, "grad_norm": 1.6458604951935125, "learning_rate": 2.1929816538252545e-08, "loss": 0.4273, "step": 4365 }, { "epoch": 0.9715175789942145, "grad_norm": 1.6896155013740242, "learning_rate": 2.159397449157885e-08, "loss": 0.4265, "step": 4366 }, { "epoch": 0.9717400979083222, "grad_norm": 1.746987925076291, "learning_rate": 2.1260718372151933e-08, "loss": 0.4364, "step": 4367 }, { "epoch": 0.9719626168224299, "grad_norm": 1.8233126567648623, "learning_rate": 2.0930048353074995e-08, "loss": 0.4287, "step": 4368 }, { "epoch": 0.9721851357365376, "grad_norm": 1.7909455964204886, "learning_rate": 2.060196460610675e-08, "loss": 0.4576, "step": 4369 }, { "epoch": 0.9724076546506453, "grad_norm": 1.6527700951680364, "learning_rate": 2.0276467301664215e-08, "loss": 0.4349, "step": 4370 }, { "epoch": 0.972630173564753, "grad_norm": 1.812552693267115, "learning_rate": 1.995355660881937e-08, "loss": 0.4253, "step": 4371 }, { "epoch": 0.9728526924788607, "grad_norm": 1.5651260911646008, "learning_rate": 1.963323269530193e-08, "loss": 0.4122, "step": 4372 }, { "epoch": 0.9730752113929684, "grad_norm": 1.6084620425466323, "learning_rate": 1.9315495727497137e-08, "loss": 0.4453, "step": 4373 }, { "epoch": 0.9732977303070761, "grad_norm": 1.7933466364554953, "learning_rate": 1.9000345870446303e-08, "loss": 0.4247, "step": 4374 }, { "epoch": 0.9735202492211839, "grad_norm": 1.7364929028768743, "learning_rate": 1.8687783287847926e-08, "loss": 0.4068, "step": 4375 }, { "epoch": 0.9737427681352915, "grad_norm": 1.6724758698744737, "learning_rate": 1.8377808142055475e-08, "loss": 0.4182, "step": 4376 }, { "epoch": 0.9739652870493992, "grad_norm": 1.6014399993431951, "learning_rate": 1.8070420594079042e-08, "loss": 0.414, "step": 4377 }, { "epoch": 0.9741878059635068, "grad_norm": 1.7573988197082224, "learning_rate": 1.7765620803585348e-08, "loss": 0.4376, "step": 4378 }, { "epoch": 0.9744103248776146, "grad_norm": 1.751532887431927, "learning_rate": 1.7463408928895532e-08, "loss": 0.4513, "step": 4379 }, { "epoch": 0.9746328437917223, "grad_norm": 1.6826800658018104, "learning_rate": 1.7163785126986797e-08, "loss": 0.4225, "step": 4380 }, { "epoch": 0.97485536270583, "grad_norm": 1.754782348798424, "learning_rate": 1.6866749553492433e-08, "loss": 0.4508, "step": 4381 }, { "epoch": 0.9750778816199377, "grad_norm": 1.807883284395745, "learning_rate": 1.65723023627018e-08, "loss": 0.4333, "step": 4382 }, { "epoch": 0.9753004005340454, "grad_norm": 1.6577430397022501, "learning_rate": 1.628044370755921e-08, "loss": 0.4208, "step": 4383 }, { "epoch": 0.9755229194481531, "grad_norm": 1.6653733477384636, "learning_rate": 1.5991173739663967e-08, "loss": 0.4171, "step": 4384 }, { "epoch": 0.9757454383622608, "grad_norm": 1.6118417057589125, "learning_rate": 1.5704492609271425e-08, "loss": 0.419, "step": 4385 }, { "epoch": 0.9759679572763685, "grad_norm": 1.6552648769719158, "learning_rate": 1.5420400465292473e-08, "loss": 0.4177, "step": 4386 }, { "epoch": 0.9761904761904762, "grad_norm": 1.7515255978421316, "learning_rate": 1.5138897455291847e-08, "loss": 0.4135, "step": 4387 }, { "epoch": 0.9764129951045839, "grad_norm": 1.6681398144033093, "learning_rate": 1.4859983725490357e-08, "loss": 0.4307, "step": 4388 }, { "epoch": 0.9766355140186916, "grad_norm": 1.638238807477793, "learning_rate": 1.4583659420764896e-08, "loss": 0.412, "step": 4389 }, { "epoch": 0.9768580329327993, "grad_norm": 1.6902591831714506, "learning_rate": 1.4309924684645094e-08, "loss": 0.4331, "step": 4390 }, { "epoch": 0.977080551846907, "grad_norm": 1.8094052736981832, "learning_rate": 1.4038779659317769e-08, "loss": 0.4289, "step": 4391 }, { "epoch": 0.9773030707610146, "grad_norm": 1.6738821270201703, "learning_rate": 1.377022448562193e-08, "loss": 0.4211, "step": 4392 }, { "epoch": 0.9775255896751224, "grad_norm": 1.7613592268197829, "learning_rate": 1.3504259303054323e-08, "loss": 0.4195, "step": 4393 }, { "epoch": 0.9777481085892301, "grad_norm": 1.8064869193721609, "learning_rate": 1.3240884249763886e-08, "loss": 0.4163, "step": 4394 }, { "epoch": 0.9779706275033377, "grad_norm": 1.6458922387789634, "learning_rate": 1.2980099462556184e-08, "loss": 0.4179, "step": 4395 }, { "epoch": 0.9781931464174455, "grad_norm": 1.7881814024415332, "learning_rate": 1.2721905076889529e-08, "loss": 0.4311, "step": 4396 }, { "epoch": 0.9784156653315532, "grad_norm": 1.7006101082276315, "learning_rate": 1.2466301226877752e-08, "loss": 0.4191, "step": 4397 }, { "epoch": 0.9786381842456608, "grad_norm": 1.7209173568875153, "learning_rate": 1.2213288045288541e-08, "loss": 0.4266, "step": 4398 }, { "epoch": 0.9788607031597686, "grad_norm": 1.769002790349142, "learning_rate": 1.1962865663544544e-08, "loss": 0.4314, "step": 4399 }, { "epoch": 0.9790832220738763, "grad_norm": 1.6868112166495557, "learning_rate": 1.171503421172282e-08, "loss": 0.4293, "step": 4400 }, { "epoch": 0.979305740987984, "grad_norm": 1.7880317690220902, "learning_rate": 1.1469793818553176e-08, "loss": 0.418, "step": 4401 }, { "epoch": 0.9795282599020917, "grad_norm": 1.8555769756381804, "learning_rate": 1.1227144611421492e-08, "loss": 0.4233, "step": 4402 }, { "epoch": 0.9797507788161994, "grad_norm": 1.7785241562631702, "learning_rate": 1.0987086716365835e-08, "loss": 0.4159, "step": 4403 }, { "epoch": 0.9799732977303071, "grad_norm": 1.5387408632299568, "learning_rate": 1.0749620258079241e-08, "loss": 0.413, "step": 4404 }, { "epoch": 0.9801958166444148, "grad_norm": 1.7760048861562485, "learning_rate": 1.0514745359909706e-08, "loss": 0.4272, "step": 4405 }, { "epoch": 0.9804183355585224, "grad_norm": 1.6670845423407497, "learning_rate": 1.0282462143856864e-08, "loss": 0.4342, "step": 4406 }, { "epoch": 0.9806408544726302, "grad_norm": 1.6946949087116974, "learning_rate": 1.0052770730575867e-08, "loss": 0.432, "step": 4407 }, { "epoch": 0.9808633733867379, "grad_norm": 1.6808977870113615, "learning_rate": 9.825671239374612e-09, "loss": 0.4209, "step": 4408 }, { "epoch": 0.9810858923008455, "grad_norm": 1.7739472150482753, "learning_rate": 9.601163788215406e-09, "loss": 0.4178, "step": 4409 }, { "epoch": 0.9813084112149533, "grad_norm": 1.7383627620109416, "learning_rate": 9.37924849371441e-09, "loss": 0.4242, "step": 4410 }, { "epoch": 0.981530930129061, "grad_norm": 1.626505893351603, "learning_rate": 9.159925471139419e-09, "loss": 0.4429, "step": 4411 }, { "epoch": 0.9817534490431686, "grad_norm": 1.9715181730083995, "learning_rate": 8.943194834414304e-09, "loss": 0.4458, "step": 4412 }, { "epoch": 0.9819759679572764, "grad_norm": 1.6861666210463706, "learning_rate": 8.729056696115123e-09, "loss": 0.4204, "step": 4413 }, { "epoch": 0.9821984868713841, "grad_norm": 1.6638531169107496, "learning_rate": 8.517511167470683e-09, "loss": 0.4337, "step": 4414 }, { "epoch": 0.9824210057854917, "grad_norm": 1.79202782952808, "learning_rate": 8.308558358364193e-09, "loss": 0.444, "step": 4415 }, { "epoch": 0.9826435246995995, "grad_norm": 2.011379854479186, "learning_rate": 8.102198377332172e-09, "loss": 0.4222, "step": 4416 }, { "epoch": 0.9828660436137072, "grad_norm": 1.6401263552755627, "learning_rate": 7.898431331563317e-09, "loss": 0.4255, "step": 4417 }, { "epoch": 0.9830885625278148, "grad_norm": 1.7424680745834646, "learning_rate": 7.697257326900187e-09, "loss": 0.4398, "step": 4418 }, { "epoch": 0.9833110814419226, "grad_norm": 1.6843688260864533, "learning_rate": 7.498676467838084e-09, "loss": 0.4356, "step": 4419 }, { "epoch": 0.9835336003560302, "grad_norm": 1.8318691671916316, "learning_rate": 7.3026888575267184e-09, "loss": 0.4298, "step": 4420 }, { "epoch": 0.983756119270138, "grad_norm": 1.8095163187172156, "learning_rate": 7.10929459776688e-09, "loss": 0.4297, "step": 4421 }, { "epoch": 0.9839786381842457, "grad_norm": 1.7398515100015097, "learning_rate": 6.918493789012659e-09, "loss": 0.4334, "step": 4422 }, { "epoch": 0.9842011570983533, "grad_norm": 1.5719467281674213, "learning_rate": 6.730286530372554e-09, "loss": 0.4231, "step": 4423 }, { "epoch": 0.9844236760124611, "grad_norm": 1.834798471064668, "learning_rate": 6.5446729196061434e-09, "loss": 0.4323, "step": 4424 }, { "epoch": 0.9846461949265688, "grad_norm": 1.7380344730157231, "learning_rate": 6.361653053126305e-09, "loss": 0.4307, "step": 4425 }, { "epoch": 0.9848687138406764, "grad_norm": 1.7338147376588846, "learning_rate": 6.181227025999214e-09, "loss": 0.4364, "step": 4426 }, { "epoch": 0.9850912327547842, "grad_norm": 1.720419227571445, "learning_rate": 6.0033949319437956e-09, "loss": 0.4172, "step": 4427 }, { "epoch": 0.9853137516688919, "grad_norm": 1.7683904170509388, "learning_rate": 5.8281568633300475e-09, "loss": 0.4131, "step": 4428 }, { "epoch": 0.9855362705829995, "grad_norm": 1.685100589013395, "learning_rate": 5.6555129111823815e-09, "loss": 0.4293, "step": 4429 }, { "epoch": 0.9857587894971073, "grad_norm": 1.6630411064185495, "learning_rate": 5.485463165176841e-09, "loss": 0.4408, "step": 4430 }, { "epoch": 0.985981308411215, "grad_norm": 1.7255050818831401, "learning_rate": 5.318007713642215e-09, "loss": 0.431, "step": 4431 }, { "epoch": 0.9862038273253226, "grad_norm": 1.709380394949175, "learning_rate": 5.153146643559481e-09, "loss": 0.4405, "step": 4432 }, { "epoch": 0.9864263462394304, "grad_norm": 1.724898688785051, "learning_rate": 4.990880040562918e-09, "loss": 0.42, "step": 4433 }, { "epoch": 0.986648865153538, "grad_norm": 1.6414870021239312, "learning_rate": 4.831207988937325e-09, "loss": 0.4331, "step": 4434 }, { "epoch": 0.9868713840676457, "grad_norm": 1.664906359529523, "learning_rate": 4.674130571621915e-09, "loss": 0.422, "step": 4435 }, { "epoch": 0.9870939029817535, "grad_norm": 1.625504642080506, "learning_rate": 4.519647870206978e-09, "loss": 0.4, "step": 4436 }, { "epoch": 0.9873164218958611, "grad_norm": 1.7351062152731225, "learning_rate": 4.367759964934992e-09, "loss": 0.4296, "step": 4437 }, { "epoch": 0.9875389408099688, "grad_norm": 1.7706194890655178, "learning_rate": 4.218466934701182e-09, "loss": 0.4328, "step": 4438 }, { "epoch": 0.9877614597240766, "grad_norm": 1.8487017589860988, "learning_rate": 4.071768857052405e-09, "loss": 0.433, "step": 4439 }, { "epoch": 0.9879839786381842, "grad_norm": 1.8933661173620324, "learning_rate": 3.927665808188263e-09, "loss": 0.4378, "step": 4440 }, { "epoch": 0.988206497552292, "grad_norm": 1.7049242273533882, "learning_rate": 3.7861578629594385e-09, "loss": 0.4157, "step": 4441 }, { "epoch": 0.9884290164663997, "grad_norm": 1.7361311387061786, "learning_rate": 3.647245094869356e-09, "loss": 0.4376, "step": 4442 }, { "epoch": 0.9886515353805073, "grad_norm": 1.8231277093985787, "learning_rate": 3.5109275760736304e-09, "loss": 0.424, "step": 4443 }, { "epoch": 0.9888740542946151, "grad_norm": 1.6993783360129853, "learning_rate": 3.377205377379511e-09, "loss": 0.4089, "step": 4444 }, { "epoch": 0.9890965732087228, "grad_norm": 1.6321886949064137, "learning_rate": 3.246078568246436e-09, "loss": 0.4096, "step": 4445 }, { "epoch": 0.9893190921228304, "grad_norm": 1.7382677791796157, "learning_rate": 3.1175472167843667e-09, "loss": 0.4602, "step": 4446 }, { "epoch": 0.9895416110369382, "grad_norm": 1.8664527638239805, "learning_rate": 2.9916113897571207e-09, "loss": 0.467, "step": 4447 }, { "epoch": 0.9897641299510458, "grad_norm": 1.727968835845374, "learning_rate": 2.8682711525790387e-09, "loss": 0.4184, "step": 4448 }, { "epoch": 0.9899866488651535, "grad_norm": 1.6277258837286317, "learning_rate": 2.7475265693160947e-09, "loss": 0.4152, "step": 4449 }, { "epoch": 0.9902091677792613, "grad_norm": 1.6706858767132824, "learning_rate": 2.629377702687563e-09, "loss": 0.411, "step": 4450 } ], "logging_steps": 1.0, "max_steps": 4494, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 339092741840896.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }