{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 40.013377926421406, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013377926421404682, "grad_norm": 0.5197079181671143, "learning_rate": 0.0, "loss": 4.2636, "step": 1 }, { "epoch": 0.026755852842809364, "grad_norm": 0.5626901984214783, "learning_rate": 4e-05, "loss": 4.3971, "step": 2 }, { "epoch": 0.04013377926421405, "grad_norm": 0.5167903304100037, "learning_rate": 8e-05, "loss": 4.3249, "step": 3 }, { "epoch": 0.05351170568561873, "grad_norm": 0.4764951169490814, "learning_rate": 0.00012, "loss": 4.2031, "step": 4 }, { "epoch": 0.06688963210702341, "grad_norm": 0.45488491654396057, "learning_rate": 0.00016, "loss": 4.3914, "step": 5 }, { "epoch": 0.0802675585284281, "grad_norm": 0.568274736404419, "learning_rate": 0.0002, "loss": 4.2346, "step": 6 }, { "epoch": 0.09364548494983277, "grad_norm": 0.5974003076553345, "learning_rate": 0.0001999555061179088, "loss": 4.131, "step": 7 }, { "epoch": 0.10702341137123746, "grad_norm": 0.6204471588134766, "learning_rate": 0.00019991101223581757, "loss": 4.2256, "step": 8 }, { "epoch": 0.12040133779264214, "grad_norm": 0.7143808603286743, "learning_rate": 0.00019986651835372636, "loss": 3.8449, "step": 9 }, { "epoch": 0.13377926421404682, "grad_norm": 0.7799420356750488, "learning_rate": 0.00019982202447163517, "loss": 4.4301, "step": 10 }, { "epoch": 0.14715719063545152, "grad_norm": 0.8880407214164734, "learning_rate": 0.00019977753058954395, "loss": 4.2266, "step": 11 }, { "epoch": 0.1605351170568562, "grad_norm": 0.7776209712028503, "learning_rate": 0.00019973303670745273, "loss": 4.3208, "step": 12 }, { "epoch": 0.17391304347826086, "grad_norm": 0.9125858545303345, "learning_rate": 0.0001996885428253615, "loss": 4.4363, "step": 13 }, { "epoch": 0.18729096989966554, "grad_norm": 0.9000256657600403, "learning_rate": 0.00019964404894327032, "loss": 4.2917, "step": 14 }, { "epoch": 0.20066889632107024, "grad_norm": 0.9995108246803284, "learning_rate": 0.00019959955506117908, "loss": 4.1784, "step": 15 }, { "epoch": 0.2140468227424749, "grad_norm": 0.9209024310112, "learning_rate": 0.0001995550611790879, "loss": 4.7852, "step": 16 }, { "epoch": 0.22742474916387959, "grad_norm": 0.9421981573104858, "learning_rate": 0.00019951056729699667, "loss": 4.8501, "step": 17 }, { "epoch": 0.2408026755852843, "grad_norm": 0.9213201403617859, "learning_rate": 0.00019946607341490545, "loss": 4.7923, "step": 18 }, { "epoch": 0.25418060200668896, "grad_norm": 0.9378194212913513, "learning_rate": 0.00019942157953281423, "loss": 4.9593, "step": 19 }, { "epoch": 0.26755852842809363, "grad_norm": 1.0096492767333984, "learning_rate": 0.00019937708565072304, "loss": 4.7099, "step": 20 }, { "epoch": 0.2809364548494983, "grad_norm": 0.8903587460517883, "learning_rate": 0.00019933259176863183, "loss": 4.3746, "step": 21 }, { "epoch": 0.29431438127090304, "grad_norm": 0.7808490991592407, "learning_rate": 0.0001992880978865406, "loss": 4.5873, "step": 22 }, { "epoch": 0.3076923076923077, "grad_norm": 0.8145670294761658, "learning_rate": 0.0001992436040044494, "loss": 4.7924, "step": 23 }, { "epoch": 0.3210702341137124, "grad_norm": 0.7945849299430847, "learning_rate": 0.0001991991101223582, "loss": 4.8881, "step": 24 }, { "epoch": 0.33444816053511706, "grad_norm": 0.7871395349502563, "learning_rate": 0.00019915461624026696, "loss": 4.6922, "step": 25 }, { "epoch": 0.34782608695652173, "grad_norm": 0.9111238718032837, "learning_rate": 0.00019911012235817577, "loss": 4.9982, "step": 26 }, { "epoch": 0.3612040133779264, "grad_norm": 0.7121369242668152, "learning_rate": 0.00019906562847608455, "loss": 4.4756, "step": 27 }, { "epoch": 0.3745819397993311, "grad_norm": 0.7118422389030457, "learning_rate": 0.00019902113459399333, "loss": 5.1389, "step": 28 }, { "epoch": 0.3879598662207358, "grad_norm": 0.7100292444229126, "learning_rate": 0.0001989766407119021, "loss": 4.7691, "step": 29 }, { "epoch": 0.4013377926421405, "grad_norm": 0.708591639995575, "learning_rate": 0.00019893214682981092, "loss": 4.8721, "step": 30 }, { "epoch": 0.41471571906354515, "grad_norm": 0.6711616516113281, "learning_rate": 0.0001988876529477197, "loss": 4.9152, "step": 31 }, { "epoch": 0.4280936454849498, "grad_norm": 0.7158232927322388, "learning_rate": 0.0001988431590656285, "loss": 4.828, "step": 32 }, { "epoch": 0.4414715719063545, "grad_norm": 0.6246087551116943, "learning_rate": 0.00019879866518353727, "loss": 4.8452, "step": 33 }, { "epoch": 0.45484949832775917, "grad_norm": 0.6088873744010925, "learning_rate": 0.00019875417130144608, "loss": 4.9702, "step": 34 }, { "epoch": 0.4682274247491639, "grad_norm": 0.5798126459121704, "learning_rate": 0.00019870967741935483, "loss": 4.9838, "step": 35 }, { "epoch": 0.4816053511705686, "grad_norm": 0.6268919706344604, "learning_rate": 0.00019866518353726364, "loss": 4.7636, "step": 36 }, { "epoch": 0.49498327759197325, "grad_norm": 0.5649904012680054, "learning_rate": 0.00019862068965517243, "loss": 4.506, "step": 37 }, { "epoch": 0.5083612040133779, "grad_norm": 0.5947792530059814, "learning_rate": 0.0001985761957730812, "loss": 4.8057, "step": 38 }, { "epoch": 0.5217391304347826, "grad_norm": 0.6204257011413574, "learning_rate": 0.00019853170189099, "loss": 5.0511, "step": 39 }, { "epoch": 0.5351170568561873, "grad_norm": 0.5972265601158142, "learning_rate": 0.0001984872080088988, "loss": 4.924, "step": 40 }, { "epoch": 0.5484949832775919, "grad_norm": 0.6117077469825745, "learning_rate": 0.00019844271412680758, "loss": 4.8729, "step": 41 }, { "epoch": 0.5618729096989966, "grad_norm": 0.5085508823394775, "learning_rate": 0.00019839822024471637, "loss": 4.3616, "step": 42 }, { "epoch": 0.5752508361204013, "grad_norm": 0.550647497177124, "learning_rate": 0.00019835372636262515, "loss": 5.2512, "step": 43 }, { "epoch": 0.5886287625418061, "grad_norm": 0.48329588770866394, "learning_rate": 0.00019830923248053396, "loss": 4.9501, "step": 44 }, { "epoch": 0.6020066889632107, "grad_norm": 0.6313246488571167, "learning_rate": 0.0001982647385984427, "loss": 4.5767, "step": 45 }, { "epoch": 0.6153846153846154, "grad_norm": 0.5111928582191467, "learning_rate": 0.00019822024471635152, "loss": 4.5586, "step": 46 }, { "epoch": 0.6287625418060201, "grad_norm": 0.5264492630958557, "learning_rate": 0.0001981757508342603, "loss": 4.7033, "step": 47 }, { "epoch": 0.6421404682274248, "grad_norm": 0.5058289170265198, "learning_rate": 0.0001981312569521691, "loss": 4.8396, "step": 48 }, { "epoch": 0.6555183946488294, "grad_norm": 0.5688439607620239, "learning_rate": 0.00019808676307007787, "loss": 5.1887, "step": 49 }, { "epoch": 0.6688963210702341, "grad_norm": 0.5488842129707336, "learning_rate": 0.00019804226918798665, "loss": 4.6075, "step": 50 }, { "epoch": 0.6822742474916388, "grad_norm": 0.5358632206916809, "learning_rate": 0.00019799777530589546, "loss": 5.0205, "step": 51 }, { "epoch": 0.6956521739130435, "grad_norm": 0.47869494557380676, "learning_rate": 0.00019795328142380422, "loss": 4.8495, "step": 52 }, { "epoch": 0.7090301003344481, "grad_norm": 0.49378660321235657, "learning_rate": 0.00019790878754171303, "loss": 4.6563, "step": 53 }, { "epoch": 0.7224080267558528, "grad_norm": 0.5167868733406067, "learning_rate": 0.0001978642936596218, "loss": 5.2558, "step": 54 }, { "epoch": 0.7357859531772575, "grad_norm": 0.5230040550231934, "learning_rate": 0.0001978197997775306, "loss": 4.7769, "step": 55 }, { "epoch": 0.7491638795986622, "grad_norm": 0.4822310507297516, "learning_rate": 0.00019777530589543937, "loss": 4.9282, "step": 56 }, { "epoch": 0.7625418060200669, "grad_norm": 0.500045895576477, "learning_rate": 0.00019773081201334818, "loss": 5.0399, "step": 57 }, { "epoch": 0.7759197324414716, "grad_norm": 0.4740642309188843, "learning_rate": 0.00019768631813125696, "loss": 4.8041, "step": 58 }, { "epoch": 0.7892976588628763, "grad_norm": 0.45918184518814087, "learning_rate": 0.00019764182424916575, "loss": 4.6304, "step": 59 }, { "epoch": 0.802675585284281, "grad_norm": 0.53122878074646, "learning_rate": 0.00019759733036707453, "loss": 4.8377, "step": 60 }, { "epoch": 0.8160535117056856, "grad_norm": 0.4925791919231415, "learning_rate": 0.00019755283648498334, "loss": 5.0919, "step": 61 }, { "epoch": 0.8294314381270903, "grad_norm": 0.4777262806892395, "learning_rate": 0.0001975083426028921, "loss": 4.8379, "step": 62 }, { "epoch": 0.842809364548495, "grad_norm": 0.49119675159454346, "learning_rate": 0.0001974638487208009, "loss": 5.0819, "step": 63 }, { "epoch": 0.8561872909698997, "grad_norm": 0.4732685089111328, "learning_rate": 0.00019741935483870969, "loss": 4.8948, "step": 64 }, { "epoch": 0.8695652173913043, "grad_norm": 0.46269145607948303, "learning_rate": 0.00019737486095661847, "loss": 4.824, "step": 65 }, { "epoch": 0.882943143812709, "grad_norm": 0.49532708525657654, "learning_rate": 0.00019733036707452725, "loss": 4.8986, "step": 66 }, { "epoch": 0.8963210702341137, "grad_norm": 0.5253002643585205, "learning_rate": 0.00019728587319243606, "loss": 4.9073, "step": 67 }, { "epoch": 0.9096989966555183, "grad_norm": 0.5069419145584106, "learning_rate": 0.00019724137931034484, "loss": 4.8962, "step": 68 }, { "epoch": 0.9230769230769231, "grad_norm": 0.5038817524909973, "learning_rate": 0.00019719688542825363, "loss": 4.8711, "step": 69 }, { "epoch": 0.9364548494983278, "grad_norm": 0.4987100064754486, "learning_rate": 0.0001971523915461624, "loss": 4.8816, "step": 70 }, { "epoch": 0.9498327759197325, "grad_norm": 0.47370976209640503, "learning_rate": 0.00019710789766407122, "loss": 4.9675, "step": 71 }, { "epoch": 0.9632107023411371, "grad_norm": 0.5081727504730225, "learning_rate": 0.00019706340378197997, "loss": 4.2768, "step": 72 }, { "epoch": 0.9765886287625418, "grad_norm": 0.45571258664131165, "learning_rate": 0.00019701890989988878, "loss": 4.6182, "step": 73 }, { "epoch": 0.9899665551839465, "grad_norm": 0.5216127634048462, "learning_rate": 0.00019697441601779756, "loss": 4.7126, "step": 74 }, { "epoch": 1.0, "grad_norm": 0.5393329858779907, "learning_rate": 0.00019692992213570635, "loss": 4.4919, "step": 75 }, { "epoch": 1.0133779264214047, "grad_norm": 0.4506986737251282, "learning_rate": 0.00019688542825361513, "loss": 4.5089, "step": 76 }, { "epoch": 1.0267558528428093, "grad_norm": 0.4328899085521698, "learning_rate": 0.00019684093437152394, "loss": 4.7518, "step": 77 }, { "epoch": 1.040133779264214, "grad_norm": 0.4397362470626831, "learning_rate": 0.00019679644048943272, "loss": 4.5069, "step": 78 }, { "epoch": 1.0535117056856187, "grad_norm": 0.4604664146900177, "learning_rate": 0.0001967519466073415, "loss": 4.7054, "step": 79 }, { "epoch": 1.0668896321070234, "grad_norm": 0.4398234784603119, "learning_rate": 0.00019670745272525029, "loss": 4.2743, "step": 80 }, { "epoch": 1.080267558528428, "grad_norm": 0.4570735692977905, "learning_rate": 0.0001966629588431591, "loss": 4.8012, "step": 81 }, { "epoch": 1.0936454849498327, "grad_norm": 0.4814144968986511, "learning_rate": 0.00019661846496106785, "loss": 4.6449, "step": 82 }, { "epoch": 1.1070234113712374, "grad_norm": 0.4526231288909912, "learning_rate": 0.00019657397107897666, "loss": 4.5546, "step": 83 }, { "epoch": 1.120401337792642, "grad_norm": 0.4847906827926636, "learning_rate": 0.00019652947719688544, "loss": 4.4421, "step": 84 }, { "epoch": 1.1337792642140467, "grad_norm": 0.5136271715164185, "learning_rate": 0.00019648498331479422, "loss": 4.7136, "step": 85 }, { "epoch": 1.1471571906354514, "grad_norm": 0.49209895730018616, "learning_rate": 0.000196440489432703, "loss": 4.3145, "step": 86 }, { "epoch": 1.160535117056856, "grad_norm": 0.4972032904624939, "learning_rate": 0.00019639599555061182, "loss": 4.0408, "step": 87 }, { "epoch": 1.1739130434782608, "grad_norm": 0.5077862739562988, "learning_rate": 0.0001963515016685206, "loss": 4.4074, "step": 88 }, { "epoch": 1.1872909698996654, "grad_norm": 0.5293861031532288, "learning_rate": 0.00019630700778642935, "loss": 4.5385, "step": 89 }, { "epoch": 1.2006688963210703, "grad_norm": 0.5062645673751831, "learning_rate": 0.00019626251390433816, "loss": 4.5141, "step": 90 }, { "epoch": 1.214046822742475, "grad_norm": 0.49655866622924805, "learning_rate": 0.00019621802002224695, "loss": 4.4765, "step": 91 }, { "epoch": 1.2274247491638797, "grad_norm": 0.6059755086898804, "learning_rate": 0.00019617352614015573, "loss": 4.568, "step": 92 }, { "epoch": 1.2408026755852843, "grad_norm": 0.5442761778831482, "learning_rate": 0.0001961290322580645, "loss": 4.7724, "step": 93 }, { "epoch": 1.254180602006689, "grad_norm": 0.5426056385040283, "learning_rate": 0.00019608453837597332, "loss": 4.5308, "step": 94 }, { "epoch": 1.2675585284280937, "grad_norm": 0.525372326374054, "learning_rate": 0.0001960400444938821, "loss": 4.394, "step": 95 }, { "epoch": 1.2809364548494984, "grad_norm": 0.5407588481903076, "learning_rate": 0.00019599555061179089, "loss": 4.7347, "step": 96 }, { "epoch": 1.294314381270903, "grad_norm": 0.5726659893989563, "learning_rate": 0.00019595105672969967, "loss": 4.9446, "step": 97 }, { "epoch": 1.3076923076923077, "grad_norm": 0.6211283206939697, "learning_rate": 0.00019590656284760848, "loss": 4.697, "step": 98 }, { "epoch": 1.3210702341137124, "grad_norm": 0.5627567172050476, "learning_rate": 0.00019586206896551723, "loss": 4.4892, "step": 99 }, { "epoch": 1.334448160535117, "grad_norm": 0.6174790859222412, "learning_rate": 0.00019581757508342604, "loss": 4.5686, "step": 100 }, { "epoch": 1.3478260869565217, "grad_norm": 0.5586990118026733, "learning_rate": 0.00019577308120133482, "loss": 4.3916, "step": 101 }, { "epoch": 1.3612040133779264, "grad_norm": 0.5655365586280823, "learning_rate": 0.0001957285873192436, "loss": 4.1789, "step": 102 }, { "epoch": 1.374581939799331, "grad_norm": 0.5834594964981079, "learning_rate": 0.0001956840934371524, "loss": 4.3316, "step": 103 }, { "epoch": 1.3879598662207357, "grad_norm": 0.6065447926521301, "learning_rate": 0.0001956395995550612, "loss": 4.5167, "step": 104 }, { "epoch": 1.4013377926421404, "grad_norm": 0.5250216722488403, "learning_rate": 0.00019559510567296998, "loss": 4.1718, "step": 105 }, { "epoch": 1.414715719063545, "grad_norm": 0.5861116051673889, "learning_rate": 0.00019555061179087876, "loss": 4.3077, "step": 106 }, { "epoch": 1.4280936454849498, "grad_norm": 0.6138104796409607, "learning_rate": 0.00019550611790878755, "loss": 4.4748, "step": 107 }, { "epoch": 1.4414715719063544, "grad_norm": 0.6742071509361267, "learning_rate": 0.00019546162402669636, "loss": 4.8769, "step": 108 }, { "epoch": 1.4548494983277591, "grad_norm": 0.6634951233863831, "learning_rate": 0.0001954171301446051, "loss": 4.6423, "step": 109 }, { "epoch": 1.468227424749164, "grad_norm": 0.626646876335144, "learning_rate": 0.00019537263626251392, "loss": 4.4654, "step": 110 }, { "epoch": 1.4816053511705687, "grad_norm": 0.6306963562965393, "learning_rate": 0.0001953281423804227, "loss": 4.7021, "step": 111 }, { "epoch": 1.4949832775919734, "grad_norm": 0.620370626449585, "learning_rate": 0.00019528364849833149, "loss": 4.587, "step": 112 }, { "epoch": 1.508361204013378, "grad_norm": 0.6410287618637085, "learning_rate": 0.00019523915461624027, "loss": 4.8089, "step": 113 }, { "epoch": 1.5217391304347827, "grad_norm": 0.676434338092804, "learning_rate": 0.00019519466073414908, "loss": 4.668, "step": 114 }, { "epoch": 1.5351170568561874, "grad_norm": 0.5756319761276245, "learning_rate": 0.00019515016685205786, "loss": 4.3223, "step": 115 }, { "epoch": 1.548494983277592, "grad_norm": 0.5850693583488464, "learning_rate": 0.00019510567296996664, "loss": 4.2343, "step": 116 }, { "epoch": 1.5618729096989967, "grad_norm": 0.6172360777854919, "learning_rate": 0.00019506117908787542, "loss": 4.6102, "step": 117 }, { "epoch": 1.5752508361204014, "grad_norm": 0.5887568593025208, "learning_rate": 0.00019501668520578423, "loss": 4.8097, "step": 118 }, { "epoch": 1.588628762541806, "grad_norm": 0.5763369798660278, "learning_rate": 0.000194972191323693, "loss": 4.2001, "step": 119 }, { "epoch": 1.6020066889632107, "grad_norm": 0.6158986687660217, "learning_rate": 0.0001949276974416018, "loss": 4.7075, "step": 120 }, { "epoch": 1.6153846153846154, "grad_norm": 0.5540957450866699, "learning_rate": 0.00019488320355951058, "loss": 4.452, "step": 121 }, { "epoch": 1.62876254180602, "grad_norm": 0.6193795204162598, "learning_rate": 0.00019483870967741936, "loss": 4.4583, "step": 122 }, { "epoch": 1.6421404682274248, "grad_norm": 0.6699966788291931, "learning_rate": 0.00019479421579532815, "loss": 4.3728, "step": 123 }, { "epoch": 1.6555183946488294, "grad_norm": 0.5904677510261536, "learning_rate": 0.00019474972191323696, "loss": 4.5452, "step": 124 }, { "epoch": 1.6688963210702341, "grad_norm": 0.6137760281562805, "learning_rate": 0.00019470522803114574, "loss": 4.2853, "step": 125 }, { "epoch": 1.6822742474916388, "grad_norm": 0.6396192908287048, "learning_rate": 0.00019466073414905452, "loss": 4.4258, "step": 126 }, { "epoch": 1.6956521739130435, "grad_norm": 0.6190487742424011, "learning_rate": 0.0001946162402669633, "loss": 4.9866, "step": 127 }, { "epoch": 1.7090301003344481, "grad_norm": 0.6971675157546997, "learning_rate": 0.0001945717463848721, "loss": 4.2126, "step": 128 }, { "epoch": 1.7224080267558528, "grad_norm": 0.6245931386947632, "learning_rate": 0.00019452725250278087, "loss": 4.8477, "step": 129 }, { "epoch": 1.7357859531772575, "grad_norm": 0.5675052404403687, "learning_rate": 0.00019448275862068965, "loss": 4.4097, "step": 130 }, { "epoch": 1.7491638795986622, "grad_norm": 0.6594040393829346, "learning_rate": 0.00019443826473859846, "loss": 4.3747, "step": 131 }, { "epoch": 1.7625418060200668, "grad_norm": 0.6377655267715454, "learning_rate": 0.00019439377085650724, "loss": 4.2733, "step": 132 }, { "epoch": 1.7759197324414715, "grad_norm": 0.6167862415313721, "learning_rate": 0.00019434927697441602, "loss": 4.5694, "step": 133 }, { "epoch": 1.7892976588628762, "grad_norm": 0.577671468257904, "learning_rate": 0.0001943047830923248, "loss": 4.5006, "step": 134 }, { "epoch": 1.8026755852842808, "grad_norm": 0.6361016035079956, "learning_rate": 0.00019426028921023362, "loss": 4.9907, "step": 135 }, { "epoch": 1.8160535117056855, "grad_norm": 0.6445321440696716, "learning_rate": 0.00019421579532814237, "loss": 4.779, "step": 136 }, { "epoch": 1.8294314381270902, "grad_norm": 0.5955402851104736, "learning_rate": 0.00019417130144605118, "loss": 4.6026, "step": 137 }, { "epoch": 1.8428093645484949, "grad_norm": 0.6807080507278442, "learning_rate": 0.00019412680756395996, "loss": 4.7124, "step": 138 }, { "epoch": 1.8561872909698995, "grad_norm": 0.5799288153648376, "learning_rate": 0.00019408231368186875, "loss": 4.0701, "step": 139 }, { "epoch": 1.8695652173913042, "grad_norm": 0.6187757253646851, "learning_rate": 0.00019403781979977753, "loss": 4.705, "step": 140 }, { "epoch": 1.8829431438127089, "grad_norm": 0.6614826917648315, "learning_rate": 0.00019399332591768634, "loss": 4.8146, "step": 141 }, { "epoch": 1.8963210702341136, "grad_norm": 0.6204859614372253, "learning_rate": 0.00019394883203559512, "loss": 4.3041, "step": 142 }, { "epoch": 1.9096989966555182, "grad_norm": 0.6527450680732727, "learning_rate": 0.0001939043381535039, "loss": 4.4493, "step": 143 }, { "epoch": 1.9230769230769231, "grad_norm": 0.6470615267753601, "learning_rate": 0.00019385984427141268, "loss": 4.7771, "step": 144 }, { "epoch": 1.9364548494983278, "grad_norm": 0.5642555952072144, "learning_rate": 0.0001938153503893215, "loss": 4.3344, "step": 145 }, { "epoch": 1.9498327759197325, "grad_norm": 0.6206467151641846, "learning_rate": 0.00019377085650723025, "loss": 4.2191, "step": 146 }, { "epoch": 1.9632107023411371, "grad_norm": 0.6079016923904419, "learning_rate": 0.00019372636262513906, "loss": 4.7397, "step": 147 }, { "epoch": 1.9765886287625418, "grad_norm": 0.6197662353515625, "learning_rate": 0.00019368186874304784, "loss": 4.5342, "step": 148 }, { "epoch": 1.9899665551839465, "grad_norm": 0.6556297540664673, "learning_rate": 0.00019363737486095662, "loss": 4.6709, "step": 149 }, { "epoch": 2.0, "grad_norm": 0.7837930917739868, "learning_rate": 0.0001935928809788654, "loss": 4.6215, "step": 150 }, { "epoch": 2.0133779264214047, "grad_norm": 0.5267267227172852, "learning_rate": 0.00019354838709677422, "loss": 4.2695, "step": 151 }, { "epoch": 2.0267558528428093, "grad_norm": 0.5862157344818115, "learning_rate": 0.000193503893214683, "loss": 4.3702, "step": 152 }, { "epoch": 2.040133779264214, "grad_norm": 0.538254976272583, "learning_rate": 0.00019345939933259178, "loss": 4.3953, "step": 153 }, { "epoch": 2.0535117056856187, "grad_norm": 0.5977053642272949, "learning_rate": 0.00019341490545050056, "loss": 4.2156, "step": 154 }, { "epoch": 2.0668896321070234, "grad_norm": 0.606006383895874, "learning_rate": 0.00019337041156840937, "loss": 4.2802, "step": 155 }, { "epoch": 2.080267558528428, "grad_norm": 0.6071277856826782, "learning_rate": 0.00019332591768631813, "loss": 4.5545, "step": 156 }, { "epoch": 2.0936454849498327, "grad_norm": 0.6281546354293823, "learning_rate": 0.00019328142380422694, "loss": 4.6105, "step": 157 }, { "epoch": 2.1070234113712374, "grad_norm": 0.5703116655349731, "learning_rate": 0.00019323692992213572, "loss": 4.2751, "step": 158 }, { "epoch": 2.120401337792642, "grad_norm": 0.6587452292442322, "learning_rate": 0.0001931924360400445, "loss": 4.6342, "step": 159 }, { "epoch": 2.1337792642140467, "grad_norm": 0.6141905784606934, "learning_rate": 0.00019314794215795328, "loss": 4.4345, "step": 160 }, { "epoch": 2.1471571906354514, "grad_norm": 0.6741939187049866, "learning_rate": 0.0001931034482758621, "loss": 4.0257, "step": 161 }, { "epoch": 2.160535117056856, "grad_norm": 0.6468759179115295, "learning_rate": 0.00019305895439377088, "loss": 4.2313, "step": 162 }, { "epoch": 2.1739130434782608, "grad_norm": 0.6703383326530457, "learning_rate": 0.00019301446051167966, "loss": 4.2164, "step": 163 }, { "epoch": 2.1872909698996654, "grad_norm": 0.710967481136322, "learning_rate": 0.00019296996662958844, "loss": 4.3398, "step": 164 }, { "epoch": 2.20066889632107, "grad_norm": 0.6862124800682068, "learning_rate": 0.00019292547274749725, "loss": 4.3379, "step": 165 }, { "epoch": 2.2140468227424748, "grad_norm": 0.6288430690765381, "learning_rate": 0.000192880978865406, "loss": 4.3487, "step": 166 }, { "epoch": 2.2274247491638794, "grad_norm": 0.6358796954154968, "learning_rate": 0.00019283648498331481, "loss": 4.1656, "step": 167 }, { "epoch": 2.240802675585284, "grad_norm": 0.6818917393684387, "learning_rate": 0.0001927919911012236, "loss": 4.5363, "step": 168 }, { "epoch": 2.254180602006689, "grad_norm": 0.6996105313301086, "learning_rate": 0.00019274749721913238, "loss": 4.3208, "step": 169 }, { "epoch": 2.2675585284280935, "grad_norm": 0.6730326414108276, "learning_rate": 0.00019270300333704116, "loss": 4.1401, "step": 170 }, { "epoch": 2.280936454849498, "grad_norm": 0.7022603750228882, "learning_rate": 0.00019265850945494994, "loss": 4.5761, "step": 171 }, { "epoch": 2.294314381270903, "grad_norm": 0.6525995135307312, "learning_rate": 0.00019261401557285875, "loss": 4.4017, "step": 172 }, { "epoch": 2.3076923076923075, "grad_norm": 0.7066033482551575, "learning_rate": 0.0001925695216907675, "loss": 4.0037, "step": 173 }, { "epoch": 2.321070234113712, "grad_norm": 0.6708059310913086, "learning_rate": 0.00019252502780867632, "loss": 4.1947, "step": 174 }, { "epoch": 2.334448160535117, "grad_norm": 0.8711172342300415, "learning_rate": 0.0001924805339265851, "loss": 3.9958, "step": 175 }, { "epoch": 2.3478260869565215, "grad_norm": 0.7258634567260742, "learning_rate": 0.00019243604004449388, "loss": 4.4682, "step": 176 }, { "epoch": 2.361204013377926, "grad_norm": 0.7693021893501282, "learning_rate": 0.00019239154616240267, "loss": 4.54, "step": 177 }, { "epoch": 2.374581939799331, "grad_norm": 0.7271276116371155, "learning_rate": 0.00019234705228031148, "loss": 4.2942, "step": 178 }, { "epoch": 2.387959866220736, "grad_norm": 0.6836609244346619, "learning_rate": 0.00019230255839822026, "loss": 4.3099, "step": 179 }, { "epoch": 2.4013377926421406, "grad_norm": 0.731164813041687, "learning_rate": 0.00019225806451612904, "loss": 4.4077, "step": 180 }, { "epoch": 2.4147157190635453, "grad_norm": 0.7575274109840393, "learning_rate": 0.00019221357063403782, "loss": 4.6572, "step": 181 }, { "epoch": 2.42809364548495, "grad_norm": 0.8461325168609619, "learning_rate": 0.00019216907675194663, "loss": 4.4922, "step": 182 }, { "epoch": 2.4414715719063547, "grad_norm": 0.7225251197814941, "learning_rate": 0.0001921245828698554, "loss": 4.0372, "step": 183 }, { "epoch": 2.4548494983277593, "grad_norm": 3.563720703125, "learning_rate": 0.0001920800889877642, "loss": 4.5412, "step": 184 }, { "epoch": 2.468227424749164, "grad_norm": 0.8452121019363403, "learning_rate": 0.00019203559510567298, "loss": 4.4961, "step": 185 }, { "epoch": 2.4816053511705687, "grad_norm": 0.8734024167060852, "learning_rate": 0.00019199110122358176, "loss": 4.0884, "step": 186 }, { "epoch": 2.4949832775919734, "grad_norm": 1.1765823364257812, "learning_rate": 0.00019194660734149054, "loss": 4.2228, "step": 187 }, { "epoch": 2.508361204013378, "grad_norm": 0.750206708908081, "learning_rate": 0.00019190211345939935, "loss": 4.4305, "step": 188 }, { "epoch": 2.5217391304347827, "grad_norm": 0.7574430704116821, "learning_rate": 0.00019185761957730814, "loss": 4.1511, "step": 189 }, { "epoch": 2.5351170568561874, "grad_norm": 0.7105517387390137, "learning_rate": 0.00019181312569521692, "loss": 4.4793, "step": 190 }, { "epoch": 2.548494983277592, "grad_norm": 0.7495557069778442, "learning_rate": 0.0001917686318131257, "loss": 4.1335, "step": 191 }, { "epoch": 2.5618729096989967, "grad_norm": 0.8001168966293335, "learning_rate": 0.0001917241379310345, "loss": 4.7898, "step": 192 }, { "epoch": 2.5752508361204014, "grad_norm": 0.7402104735374451, "learning_rate": 0.00019167964404894327, "loss": 4.4482, "step": 193 }, { "epoch": 2.588628762541806, "grad_norm": 0.748267650604248, "learning_rate": 0.00019163515016685207, "loss": 4.3167, "step": 194 }, { "epoch": 2.6020066889632107, "grad_norm": 0.8291250467300415, "learning_rate": 0.00019159065628476086, "loss": 4.058, "step": 195 }, { "epoch": 2.6153846153846154, "grad_norm": 0.6945542693138123, "learning_rate": 0.00019154616240266964, "loss": 3.9751, "step": 196 }, { "epoch": 2.62876254180602, "grad_norm": 0.7307319045066833, "learning_rate": 0.00019150166852057842, "loss": 4.2736, "step": 197 }, { "epoch": 2.6421404682274248, "grad_norm": 0.7489168047904968, "learning_rate": 0.00019145717463848723, "loss": 4.3075, "step": 198 }, { "epoch": 2.6555183946488294, "grad_norm": 0.9727582931518555, "learning_rate": 0.00019141268075639601, "loss": 4.6474, "step": 199 }, { "epoch": 2.668896321070234, "grad_norm": 0.6776256561279297, "learning_rate": 0.0001913681868743048, "loss": 4.4217, "step": 200 }, { "epoch": 2.682274247491639, "grad_norm": 0.7305111885070801, "learning_rate": 0.00019132369299221358, "loss": 4.2804, "step": 201 }, { "epoch": 2.6956521739130435, "grad_norm": 0.7196978330612183, "learning_rate": 0.0001912791991101224, "loss": 4.3941, "step": 202 }, { "epoch": 2.709030100334448, "grad_norm": 0.7988458871841431, "learning_rate": 0.00019123470522803114, "loss": 4.437, "step": 203 }, { "epoch": 2.722408026755853, "grad_norm": 0.7004797458648682, "learning_rate": 0.00019119021134593995, "loss": 4.4986, "step": 204 }, { "epoch": 2.7357859531772575, "grad_norm": 0.677796483039856, "learning_rate": 0.00019114571746384874, "loss": 4.0851, "step": 205 }, { "epoch": 2.749163879598662, "grad_norm": 0.7527475357055664, "learning_rate": 0.00019110122358175752, "loss": 4.4469, "step": 206 }, { "epoch": 2.762541806020067, "grad_norm": 1.1659115552902222, "learning_rate": 0.0001910567296996663, "loss": 4.3284, "step": 207 }, { "epoch": 2.7759197324414715, "grad_norm": 0.7238364815711975, "learning_rate": 0.0001910122358175751, "loss": 4.2605, "step": 208 }, { "epoch": 2.789297658862876, "grad_norm": 0.7537760734558105, "learning_rate": 0.0001909677419354839, "loss": 4.3775, "step": 209 }, { "epoch": 2.802675585284281, "grad_norm": 0.6874127388000488, "learning_rate": 0.00019092324805339267, "loss": 4.3404, "step": 210 }, { "epoch": 2.8160535117056855, "grad_norm": 0.7045959830284119, "learning_rate": 0.00019087875417130146, "loss": 4.1568, "step": 211 }, { "epoch": 2.82943143812709, "grad_norm": 0.7249194383621216, "learning_rate": 0.00019083426028921027, "loss": 4.1969, "step": 212 }, { "epoch": 2.842809364548495, "grad_norm": 0.8331268429756165, "learning_rate": 0.00019078976640711902, "loss": 4.3169, "step": 213 }, { "epoch": 2.8561872909698995, "grad_norm": 0.7171936631202698, "learning_rate": 0.0001907452725250278, "loss": 4.5123, "step": 214 }, { "epoch": 2.869565217391304, "grad_norm": 0.759919285774231, "learning_rate": 0.0001907007786429366, "loss": 4.5412, "step": 215 }, { "epoch": 2.882943143812709, "grad_norm": 0.7451274991035461, "learning_rate": 0.0001906562847608454, "loss": 4.5253, "step": 216 }, { "epoch": 2.8963210702341136, "grad_norm": 0.6564481258392334, "learning_rate": 0.00019061179087875418, "loss": 4.1092, "step": 217 }, { "epoch": 2.9096989966555182, "grad_norm": 0.7339865565299988, "learning_rate": 0.00019056729699666296, "loss": 4.5092, "step": 218 }, { "epoch": 2.9230769230769234, "grad_norm": 0.7113937735557556, "learning_rate": 0.00019052280311457177, "loss": 4.3355, "step": 219 }, { "epoch": 2.936454849498328, "grad_norm": 0.7306456565856934, "learning_rate": 0.00019047830923248053, "loss": 4.5745, "step": 220 }, { "epoch": 2.9498327759197327, "grad_norm": 0.7971818447113037, "learning_rate": 0.00019043381535038933, "loss": 4.2903, "step": 221 }, { "epoch": 2.9632107023411374, "grad_norm": 0.7757331728935242, "learning_rate": 0.00019038932146829812, "loss": 4.2832, "step": 222 }, { "epoch": 2.976588628762542, "grad_norm": 0.7326288223266602, "learning_rate": 0.0001903448275862069, "loss": 4.2444, "step": 223 }, { "epoch": 2.9899665551839467, "grad_norm": 0.7363834381103516, "learning_rate": 0.00019030033370411568, "loss": 4.6744, "step": 224 }, { "epoch": 3.0, "grad_norm": 0.8835271596908569, "learning_rate": 0.0001902558398220245, "loss": 4.432, "step": 225 }, { "epoch": 3.0133779264214047, "grad_norm": 0.6591921448707581, "learning_rate": 0.00019021134593993327, "loss": 4.1353, "step": 226 }, { "epoch": 3.0267558528428093, "grad_norm": 0.6895263195037842, "learning_rate": 0.00019016685205784206, "loss": 4.1253, "step": 227 }, { "epoch": 3.040133779264214, "grad_norm": 0.6476898789405823, "learning_rate": 0.00019012235817575084, "loss": 4.0354, "step": 228 }, { "epoch": 3.0535117056856187, "grad_norm": 0.6398957967758179, "learning_rate": 0.00019007786429365965, "loss": 4.024, "step": 229 }, { "epoch": 3.0668896321070234, "grad_norm": 0.7483389973640442, "learning_rate": 0.0001900333704115684, "loss": 4.1405, "step": 230 }, { "epoch": 3.080267558528428, "grad_norm": 0.7003724575042725, "learning_rate": 0.0001899888765294772, "loss": 4.3593, "step": 231 }, { "epoch": 3.0936454849498327, "grad_norm": 0.7426732182502747, "learning_rate": 0.000189944382647386, "loss": 4.485, "step": 232 }, { "epoch": 3.1070234113712374, "grad_norm": 0.6957541108131409, "learning_rate": 0.00018989988876529478, "loss": 4.1017, "step": 233 }, { "epoch": 3.120401337792642, "grad_norm": 0.8613067865371704, "learning_rate": 0.00018985539488320356, "loss": 4.3038, "step": 234 }, { "epoch": 3.1337792642140467, "grad_norm": 0.8375754952430725, "learning_rate": 0.00018981090100111237, "loss": 4.4356, "step": 235 }, { "epoch": 3.1471571906354514, "grad_norm": 0.7878522872924805, "learning_rate": 0.00018976640711902115, "loss": 3.9916, "step": 236 }, { "epoch": 3.160535117056856, "grad_norm": 0.7463901042938232, "learning_rate": 0.00018972191323692993, "loss": 3.6761, "step": 237 }, { "epoch": 3.1739130434782608, "grad_norm": 0.7360939979553223, "learning_rate": 0.00018967741935483872, "loss": 3.9573, "step": 238 }, { "epoch": 3.1872909698996654, "grad_norm": 0.891861081123352, "learning_rate": 0.00018963292547274753, "loss": 4.1853, "step": 239 }, { "epoch": 3.20066889632107, "grad_norm": 0.8589549660682678, "learning_rate": 0.00018958843159065628, "loss": 4.0679, "step": 240 }, { "epoch": 3.2140468227424748, "grad_norm": 0.9534163475036621, "learning_rate": 0.0001895439377085651, "loss": 3.732, "step": 241 }, { "epoch": 3.2274247491638794, "grad_norm": 0.8968185186386108, "learning_rate": 0.00018949944382647387, "loss": 4.2217, "step": 242 }, { "epoch": 3.240802675585284, "grad_norm": 0.81589275598526, "learning_rate": 0.00018945494994438266, "loss": 4.428, "step": 243 }, { "epoch": 3.254180602006689, "grad_norm": 0.929050862789154, "learning_rate": 0.00018941045606229144, "loss": 4.3468, "step": 244 }, { "epoch": 3.2675585284280935, "grad_norm": 0.8535035252571106, "learning_rate": 0.00018936596218020025, "loss": 3.8489, "step": 245 }, { "epoch": 3.280936454849498, "grad_norm": 0.9484681487083435, "learning_rate": 0.00018932146829810903, "loss": 4.0132, "step": 246 }, { "epoch": 3.294314381270903, "grad_norm": 0.8190047144889832, "learning_rate": 0.0001892769744160178, "loss": 4.3574, "step": 247 }, { "epoch": 3.3076923076923075, "grad_norm": 0.8764749765396118, "learning_rate": 0.0001892324805339266, "loss": 4.3103, "step": 248 }, { "epoch": 3.321070234113712, "grad_norm": 0.8929185271263123, "learning_rate": 0.0001891879866518354, "loss": 4.3606, "step": 249 }, { "epoch": 3.334448160535117, "grad_norm": 0.9096692204475403, "learning_rate": 0.00018914349276974416, "loss": 4.0047, "step": 250 }, { "epoch": 3.3478260869565215, "grad_norm": 0.885143518447876, "learning_rate": 0.00018909899888765297, "loss": 4.182, "step": 251 }, { "epoch": 3.361204013377926, "grad_norm": 0.7724215984344482, "learning_rate": 0.00018905450500556175, "loss": 3.9529, "step": 252 }, { "epoch": 3.374581939799331, "grad_norm": 0.8351865410804749, "learning_rate": 0.00018901001112347053, "loss": 3.9533, "step": 253 }, { "epoch": 3.387959866220736, "grad_norm": 0.8684999942779541, "learning_rate": 0.00018896551724137932, "loss": 3.8594, "step": 254 }, { "epoch": 3.4013377926421406, "grad_norm": 0.8903334736824036, "learning_rate": 0.0001889210233592881, "loss": 3.9248, "step": 255 }, { "epoch": 3.4147157190635453, "grad_norm": 0.826690137386322, "learning_rate": 0.0001888765294771969, "loss": 4.0389, "step": 256 }, { "epoch": 3.42809364548495, "grad_norm": 0.8306142687797546, "learning_rate": 0.00018883203559510566, "loss": 3.8168, "step": 257 }, { "epoch": 3.4414715719063547, "grad_norm": 0.9032199382781982, "learning_rate": 0.00018878754171301447, "loss": 4.178, "step": 258 }, { "epoch": 3.4548494983277593, "grad_norm": 0.9081966280937195, "learning_rate": 0.00018874304783092326, "loss": 4.2583, "step": 259 }, { "epoch": 3.468227424749164, "grad_norm": 0.8424077033996582, "learning_rate": 0.00018869855394883204, "loss": 4.3285, "step": 260 }, { "epoch": 3.4816053511705687, "grad_norm": 0.8302170038223267, "learning_rate": 0.00018865406006674082, "loss": 4.1346, "step": 261 }, { "epoch": 3.4949832775919734, "grad_norm": 0.8747193217277527, "learning_rate": 0.00018860956618464963, "loss": 4.0747, "step": 262 }, { "epoch": 3.508361204013378, "grad_norm": 0.8613927364349365, "learning_rate": 0.0001885650723025584, "loss": 4.2346, "step": 263 }, { "epoch": 3.5217391304347827, "grad_norm": 0.8321558833122253, "learning_rate": 0.0001885205784204672, "loss": 3.9781, "step": 264 }, { "epoch": 3.5351170568561874, "grad_norm": 0.8961741328239441, "learning_rate": 0.00018847608453837598, "loss": 4.311, "step": 265 }, { "epoch": 3.548494983277592, "grad_norm": 0.7703898549079895, "learning_rate": 0.00018843159065628479, "loss": 4.1163, "step": 266 }, { "epoch": 3.5618729096989967, "grad_norm": 0.880051851272583, "learning_rate": 0.00018838709677419354, "loss": 3.8032, "step": 267 }, { "epoch": 3.5752508361204014, "grad_norm": 0.8287038207054138, "learning_rate": 0.00018834260289210235, "loss": 4.1627, "step": 268 }, { "epoch": 3.588628762541806, "grad_norm": 0.9726569652557373, "learning_rate": 0.00018829810901001113, "loss": 4.4055, "step": 269 }, { "epoch": 3.6020066889632107, "grad_norm": 0.8071132898330688, "learning_rate": 0.00018825361512791992, "loss": 4.1709, "step": 270 }, { "epoch": 3.6153846153846154, "grad_norm": 0.8310988545417786, "learning_rate": 0.0001882091212458287, "loss": 4.2359, "step": 271 }, { "epoch": 3.62876254180602, "grad_norm": 0.8713561296463013, "learning_rate": 0.0001881646273637375, "loss": 4.1247, "step": 272 }, { "epoch": 3.6421404682274248, "grad_norm": 0.8964342474937439, "learning_rate": 0.0001881201334816463, "loss": 4.0794, "step": 273 }, { "epoch": 3.6555183946488294, "grad_norm": 0.9901681542396545, "learning_rate": 0.00018807563959955507, "loss": 4.0217, "step": 274 }, { "epoch": 3.668896321070234, "grad_norm": 0.9279042482376099, "learning_rate": 0.00018803114571746385, "loss": 4.3244, "step": 275 }, { "epoch": 3.682274247491639, "grad_norm": 0.8105964660644531, "learning_rate": 0.00018798665183537266, "loss": 3.9041, "step": 276 }, { "epoch": 3.6956521739130435, "grad_norm": 0.8511622548103333, "learning_rate": 0.00018794215795328142, "loss": 3.8969, "step": 277 }, { "epoch": 3.709030100334448, "grad_norm": 0.9072037935256958, "learning_rate": 0.00018789766407119023, "loss": 4.2185, "step": 278 }, { "epoch": 3.722408026755853, "grad_norm": 0.9792962670326233, "learning_rate": 0.000187853170189099, "loss": 4.1915, "step": 279 }, { "epoch": 3.7357859531772575, "grad_norm": 0.8579828143119812, "learning_rate": 0.0001878086763070078, "loss": 3.8903, "step": 280 }, { "epoch": 3.749163879598662, "grad_norm": 0.9866719841957092, "learning_rate": 0.00018776418242491658, "loss": 4.2022, "step": 281 }, { "epoch": 3.762541806020067, "grad_norm": 0.9251964688301086, "learning_rate": 0.00018771968854282539, "loss": 3.9536, "step": 282 }, { "epoch": 3.7759197324414715, "grad_norm": 1.0300836563110352, "learning_rate": 0.00018767519466073417, "loss": 4.2908, "step": 283 }, { "epoch": 3.789297658862876, "grad_norm": 1.0194575786590576, "learning_rate": 0.00018763070077864295, "loss": 4.1851, "step": 284 }, { "epoch": 3.802675585284281, "grad_norm": 0.8165330290794373, "learning_rate": 0.00018758620689655173, "loss": 4.138, "step": 285 }, { "epoch": 3.8160535117056855, "grad_norm": 1.0104280710220337, "learning_rate": 0.00018754171301446054, "loss": 3.9481, "step": 286 }, { "epoch": 3.82943143812709, "grad_norm": 0.9972538352012634, "learning_rate": 0.0001874972191323693, "loss": 4.3932, "step": 287 }, { "epoch": 3.842809364548495, "grad_norm": 0.96323162317276, "learning_rate": 0.0001874527252502781, "loss": 4.1133, "step": 288 }, { "epoch": 3.8561872909698995, "grad_norm": 0.8500615954399109, "learning_rate": 0.0001874082313681869, "loss": 4.2205, "step": 289 }, { "epoch": 3.869565217391304, "grad_norm": 0.8451250195503235, "learning_rate": 0.00018736373748609567, "loss": 4.1371, "step": 290 }, { "epoch": 3.882943143812709, "grad_norm": 0.9399815201759338, "learning_rate": 0.00018731924360400445, "loss": 4.5237, "step": 291 }, { "epoch": 3.8963210702341136, "grad_norm": 0.8061622977256775, "learning_rate": 0.00018727474972191326, "loss": 4.1033, "step": 292 }, { "epoch": 3.9096989966555182, "grad_norm": 0.7987121343612671, "learning_rate": 0.00018723025583982205, "loss": 3.9311, "step": 293 }, { "epoch": 3.9230769230769234, "grad_norm": 0.9041138291358948, "learning_rate": 0.00018718576195773083, "loss": 4.0252, "step": 294 }, { "epoch": 3.936454849498328, "grad_norm": 1.0002484321594238, "learning_rate": 0.0001871412680756396, "loss": 4.4605, "step": 295 }, { "epoch": 3.9498327759197327, "grad_norm": 0.9991098046302795, "learning_rate": 0.0001870967741935484, "loss": 4.1528, "step": 296 }, { "epoch": 3.9632107023411374, "grad_norm": 1.2179397344589233, "learning_rate": 0.00018705228031145718, "loss": 4.5224, "step": 297 }, { "epoch": 3.976588628762542, "grad_norm": 0.8279774785041809, "learning_rate": 0.00018700778642936596, "loss": 3.9464, "step": 298 }, { "epoch": 3.9899665551839467, "grad_norm": 0.8012803792953491, "learning_rate": 0.00018696329254727477, "loss": 4.0139, "step": 299 }, { "epoch": 4.0, "grad_norm": 0.9700272083282471, "learning_rate": 0.00018691879866518355, "loss": 3.8306, "step": 300 }, { "epoch": 4.013377926421405, "grad_norm": 0.7136749625205994, "learning_rate": 0.00018687430478309233, "loss": 3.9253, "step": 301 }, { "epoch": 4.026755852842809, "grad_norm": 0.7885096669197083, "learning_rate": 0.00018682981090100111, "loss": 3.927, "step": 302 }, { "epoch": 4.040133779264214, "grad_norm": 0.7801666855812073, "learning_rate": 0.00018678531701890992, "loss": 3.6482, "step": 303 }, { "epoch": 4.053511705685619, "grad_norm": 0.7857955098152161, "learning_rate": 0.00018674082313681868, "loss": 4.0665, "step": 304 }, { "epoch": 4.066889632107023, "grad_norm": 0.707421064376831, "learning_rate": 0.0001866963292547275, "loss": 3.9142, "step": 305 }, { "epoch": 4.080267558528428, "grad_norm": 0.7936912775039673, "learning_rate": 0.00018665183537263627, "loss": 4.1227, "step": 306 }, { "epoch": 4.093645484949833, "grad_norm": 0.8899754881858826, "learning_rate": 0.00018660734149054505, "loss": 3.7661, "step": 307 }, { "epoch": 4.107023411371237, "grad_norm": 0.7760347723960876, "learning_rate": 0.00018656284760845384, "loss": 3.8921, "step": 308 }, { "epoch": 4.120401337792642, "grad_norm": 0.8672968745231628, "learning_rate": 0.00018651835372636265, "loss": 3.6037, "step": 309 }, { "epoch": 4.133779264214047, "grad_norm": 0.8046863675117493, "learning_rate": 0.0001864738598442714, "loss": 3.9117, "step": 310 }, { "epoch": 4.147157190635451, "grad_norm": 0.9172897934913635, "learning_rate": 0.0001864293659621802, "loss": 3.7229, "step": 311 }, { "epoch": 4.160535117056856, "grad_norm": 0.9616653919219971, "learning_rate": 0.000186384872080089, "loss": 3.8851, "step": 312 }, { "epoch": 4.173913043478261, "grad_norm": 0.9659278988838196, "learning_rate": 0.0001863403781979978, "loss": 4.005, "step": 313 }, { "epoch": 4.187290969899665, "grad_norm": 0.9171205163002014, "learning_rate": 0.00018629588431590656, "loss": 3.8634, "step": 314 }, { "epoch": 4.20066889632107, "grad_norm": 0.9968683123588562, "learning_rate": 0.00018625139043381537, "loss": 3.7321, "step": 315 }, { "epoch": 4.214046822742475, "grad_norm": 0.8762083053588867, "learning_rate": 0.00018620689655172415, "loss": 3.931, "step": 316 }, { "epoch": 4.2274247491638794, "grad_norm": 0.9815887212753296, "learning_rate": 0.00018616240266963293, "loss": 3.9975, "step": 317 }, { "epoch": 4.240802675585284, "grad_norm": 1.0065505504608154, "learning_rate": 0.00018611790878754171, "loss": 3.8364, "step": 318 }, { "epoch": 4.254180602006689, "grad_norm": 0.9785431623458862, "learning_rate": 0.00018607341490545052, "loss": 3.8822, "step": 319 }, { "epoch": 4.2675585284280935, "grad_norm": 1.077799677848816, "learning_rate": 0.00018602892102335928, "loss": 3.8299, "step": 320 }, { "epoch": 4.280936454849498, "grad_norm": 0.8109619617462158, "learning_rate": 0.0001859844271412681, "loss": 3.8096, "step": 321 }, { "epoch": 4.294314381270903, "grad_norm": 0.967856764793396, "learning_rate": 0.00018593993325917687, "loss": 3.8639, "step": 322 }, { "epoch": 4.3076923076923075, "grad_norm": 0.8657905459403992, "learning_rate": 0.00018589543937708568, "loss": 3.7556, "step": 323 }, { "epoch": 4.321070234113712, "grad_norm": 0.9641517400741577, "learning_rate": 0.00018585094549499444, "loss": 3.9702, "step": 324 }, { "epoch": 4.334448160535117, "grad_norm": 0.9664435982704163, "learning_rate": 0.00018580645161290325, "loss": 3.8754, "step": 325 }, { "epoch": 4.3478260869565215, "grad_norm": 0.8322617411613464, "learning_rate": 0.00018576195773081203, "loss": 3.83, "step": 326 }, { "epoch": 4.361204013377926, "grad_norm": 1.0363450050354004, "learning_rate": 0.0001857174638487208, "loss": 3.9825, "step": 327 }, { "epoch": 4.374581939799331, "grad_norm": 1.0125840902328491, "learning_rate": 0.0001856729699666296, "loss": 3.6525, "step": 328 }, { "epoch": 4.3879598662207355, "grad_norm": 0.9922601580619812, "learning_rate": 0.0001856284760845384, "loss": 4.2373, "step": 329 }, { "epoch": 4.40133779264214, "grad_norm": 0.9070426225662231, "learning_rate": 0.00018558398220244716, "loss": 3.9623, "step": 330 }, { "epoch": 4.414715719063545, "grad_norm": 0.9369637370109558, "learning_rate": 0.00018553948832035597, "loss": 3.9297, "step": 331 }, { "epoch": 4.4280936454849495, "grad_norm": 1.108876347541809, "learning_rate": 0.00018549499443826475, "loss": 3.7325, "step": 332 }, { "epoch": 4.441471571906354, "grad_norm": 0.9405660629272461, "learning_rate": 0.00018545050055617356, "loss": 3.8615, "step": 333 }, { "epoch": 4.454849498327759, "grad_norm": 0.9730128645896912, "learning_rate": 0.00018540600667408231, "loss": 4.1794, "step": 334 }, { "epoch": 4.468227424749164, "grad_norm": 0.9341335892677307, "learning_rate": 0.00018536151279199112, "loss": 3.9422, "step": 335 }, { "epoch": 4.481605351170568, "grad_norm": 0.9262625575065613, "learning_rate": 0.0001853170189098999, "loss": 3.9819, "step": 336 }, { "epoch": 4.494983277591973, "grad_norm": 1.0419141054153442, "learning_rate": 0.00018527252502780866, "loss": 3.7481, "step": 337 }, { "epoch": 4.508361204013378, "grad_norm": 0.8986826539039612, "learning_rate": 0.00018522803114571747, "loss": 4.1195, "step": 338 }, { "epoch": 4.521739130434782, "grad_norm": 0.9502431154251099, "learning_rate": 0.00018518353726362625, "loss": 3.8521, "step": 339 }, { "epoch": 4.535117056856187, "grad_norm": 0.8936267495155334, "learning_rate": 0.00018513904338153504, "loss": 3.647, "step": 340 }, { "epoch": 4.548494983277592, "grad_norm": 0.8870158195495605, "learning_rate": 0.00018509454949944382, "loss": 3.7624, "step": 341 }, { "epoch": 4.561872909698996, "grad_norm": 0.9030978679656982, "learning_rate": 0.00018505005561735263, "loss": 3.8018, "step": 342 }, { "epoch": 4.575250836120401, "grad_norm": 0.8690946698188782, "learning_rate": 0.0001850055617352614, "loss": 3.6183, "step": 343 }, { "epoch": 4.588628762541806, "grad_norm": 0.9812071323394775, "learning_rate": 0.0001849610678531702, "loss": 4.1231, "step": 344 }, { "epoch": 4.602006688963211, "grad_norm": 0.9404383301734924, "learning_rate": 0.00018491657397107897, "loss": 3.9645, "step": 345 }, { "epoch": 4.615384615384615, "grad_norm": 1.0422123670578003, "learning_rate": 0.00018487208008898778, "loss": 3.9031, "step": 346 }, { "epoch": 4.6287625418060205, "grad_norm": 0.9838129281997681, "learning_rate": 0.00018482758620689654, "loss": 3.9985, "step": 347 }, { "epoch": 4.642140468227424, "grad_norm": 0.9232532978057861, "learning_rate": 0.00018478309232480535, "loss": 4.0343, "step": 348 }, { "epoch": 4.65551839464883, "grad_norm": 0.9242956042289734, "learning_rate": 0.00018473859844271413, "loss": 4.0669, "step": 349 }, { "epoch": 4.668896321070234, "grad_norm": 0.919269859790802, "learning_rate": 0.0001846941045606229, "loss": 4.0549, "step": 350 }, { "epoch": 4.682274247491639, "grad_norm": 0.93565833568573, "learning_rate": 0.0001846496106785317, "loss": 4.1306, "step": 351 }, { "epoch": 4.695652173913043, "grad_norm": 0.9001899361610413, "learning_rate": 0.0001846051167964405, "loss": 3.8916, "step": 352 }, { "epoch": 4.709030100334449, "grad_norm": 0.8896821737289429, "learning_rate": 0.0001845606229143493, "loss": 3.8377, "step": 353 }, { "epoch": 4.722408026755852, "grad_norm": 1.0137807130813599, "learning_rate": 0.00018451612903225807, "loss": 3.9923, "step": 354 }, { "epoch": 4.735785953177258, "grad_norm": 1.075823426246643, "learning_rate": 0.00018447163515016685, "loss": 4.0706, "step": 355 }, { "epoch": 4.749163879598662, "grad_norm": 1.0076895952224731, "learning_rate": 0.00018442714126807566, "loss": 4.0759, "step": 356 }, { "epoch": 4.762541806020067, "grad_norm": 0.9387428164482117, "learning_rate": 0.00018438264738598442, "loss": 3.6959, "step": 357 }, { "epoch": 4.775919732441472, "grad_norm": 0.8920648097991943, "learning_rate": 0.00018433815350389323, "loss": 3.9213, "step": 358 }, { "epoch": 4.789297658862877, "grad_norm": 1.0252491235733032, "learning_rate": 0.000184293659621802, "loss": 3.9118, "step": 359 }, { "epoch": 4.802675585284281, "grad_norm": 1.0382707118988037, "learning_rate": 0.0001842491657397108, "loss": 4.0172, "step": 360 }, { "epoch": 4.816053511705686, "grad_norm": 1.07838773727417, "learning_rate": 0.00018420467185761957, "loss": 3.8531, "step": 361 }, { "epoch": 4.829431438127091, "grad_norm": 0.9974546432495117, "learning_rate": 0.00018416017797552838, "loss": 4.0387, "step": 362 }, { "epoch": 4.842809364548495, "grad_norm": 1.024491548538208, "learning_rate": 0.00018411568409343717, "loss": 3.9504, "step": 363 }, { "epoch": 4.8561872909699, "grad_norm": 0.9236369132995605, "learning_rate": 0.00018407119021134595, "loss": 3.7119, "step": 364 }, { "epoch": 4.869565217391305, "grad_norm": 0.935644268989563, "learning_rate": 0.00018402669632925473, "loss": 4.0077, "step": 365 }, { "epoch": 4.882943143812709, "grad_norm": 0.9328681230545044, "learning_rate": 0.00018398220244716354, "loss": 3.9133, "step": 366 }, { "epoch": 4.896321070234114, "grad_norm": 0.9596607089042664, "learning_rate": 0.0001839377085650723, "loss": 3.8003, "step": 367 }, { "epoch": 4.909698996655519, "grad_norm": 0.9878052473068237, "learning_rate": 0.0001838932146829811, "loss": 3.8805, "step": 368 }, { "epoch": 4.923076923076923, "grad_norm": 1.00381600856781, "learning_rate": 0.0001838487208008899, "loss": 4.0264, "step": 369 }, { "epoch": 4.936454849498328, "grad_norm": 1.024754524230957, "learning_rate": 0.00018380422691879867, "loss": 3.7291, "step": 370 }, { "epoch": 4.949832775919733, "grad_norm": 0.9670823812484741, "learning_rate": 0.00018375973303670745, "loss": 3.9418, "step": 371 }, { "epoch": 4.963210702341137, "grad_norm": 0.9736581444740295, "learning_rate": 0.00018371523915461626, "loss": 3.8813, "step": 372 }, { "epoch": 4.976588628762542, "grad_norm": 0.9752672910690308, "learning_rate": 0.00018367074527252504, "loss": 3.6717, "step": 373 }, { "epoch": 4.989966555183947, "grad_norm": 1.1268304586410522, "learning_rate": 0.00018362625139043383, "loss": 3.9782, "step": 374 }, { "epoch": 5.0, "grad_norm": 1.7933701276779175, "learning_rate": 0.0001835817575083426, "loss": 3.001, "step": 375 }, { "epoch": 5.013377926421405, "grad_norm": 0.8035010099411011, "learning_rate": 0.00018353726362625142, "loss": 3.7943, "step": 376 }, { "epoch": 5.026755852842809, "grad_norm": 0.8016420006752014, "learning_rate": 0.00018349276974416017, "loss": 3.7454, "step": 377 }, { "epoch": 5.040133779264214, "grad_norm": 0.6844643354415894, "learning_rate": 0.00018344827586206896, "loss": 3.699, "step": 378 }, { "epoch": 5.053511705685619, "grad_norm": 0.8649943470954895, "learning_rate": 0.00018340378197997777, "loss": 3.7197, "step": 379 }, { "epoch": 5.066889632107023, "grad_norm": 0.9685015678405762, "learning_rate": 0.00018335928809788655, "loss": 3.6952, "step": 380 }, { "epoch": 5.080267558528428, "grad_norm": 0.8728330135345459, "learning_rate": 0.00018331479421579533, "loss": 3.7164, "step": 381 }, { "epoch": 5.093645484949833, "grad_norm": 0.962504506111145, "learning_rate": 0.0001832703003337041, "loss": 3.6123, "step": 382 }, { "epoch": 5.107023411371237, "grad_norm": 0.9194462895393372, "learning_rate": 0.00018322580645161292, "loss": 3.275, "step": 383 }, { "epoch": 5.120401337792642, "grad_norm": 0.9851329326629639, "learning_rate": 0.00018318131256952168, "loss": 3.6222, "step": 384 }, { "epoch": 5.133779264214047, "grad_norm": 1.0702580213546753, "learning_rate": 0.0001831368186874305, "loss": 3.8728, "step": 385 }, { "epoch": 5.147157190635451, "grad_norm": 1.3237228393554688, "learning_rate": 0.00018309232480533927, "loss": 3.8948, "step": 386 }, { "epoch": 5.160535117056856, "grad_norm": 1.0076218843460083, "learning_rate": 0.00018304783092324805, "loss": 3.8894, "step": 387 }, { "epoch": 5.173913043478261, "grad_norm": 1.084722876548767, "learning_rate": 0.00018300333704115683, "loss": 3.7398, "step": 388 }, { "epoch": 5.187290969899665, "grad_norm": 0.9112711548805237, "learning_rate": 0.00018295884315906564, "loss": 3.5901, "step": 389 }, { "epoch": 5.20066889632107, "grad_norm": 0.9451406002044678, "learning_rate": 0.00018291434927697443, "loss": 3.6313, "step": 390 }, { "epoch": 5.214046822742475, "grad_norm": 0.8901047706604004, "learning_rate": 0.0001828698553948832, "loss": 3.3191, "step": 391 }, { "epoch": 5.2274247491638794, "grad_norm": 0.9838565587997437, "learning_rate": 0.000182825361512792, "loss": 3.882, "step": 392 }, { "epoch": 5.240802675585284, "grad_norm": 0.9839156866073608, "learning_rate": 0.0001827808676307008, "loss": 3.6068, "step": 393 }, { "epoch": 5.254180602006689, "grad_norm": 0.9328583478927612, "learning_rate": 0.00018273637374860956, "loss": 3.6856, "step": 394 }, { "epoch": 5.2675585284280935, "grad_norm": 0.8705796003341675, "learning_rate": 0.00018269187986651837, "loss": 3.7282, "step": 395 }, { "epoch": 5.280936454849498, "grad_norm": 0.9675374031066895, "learning_rate": 0.00018264738598442715, "loss": 3.6588, "step": 396 }, { "epoch": 5.294314381270903, "grad_norm": 1.145280361175537, "learning_rate": 0.00018260289210233593, "loss": 3.8843, "step": 397 }, { "epoch": 5.3076923076923075, "grad_norm": 0.9769694805145264, "learning_rate": 0.0001825583982202447, "loss": 3.7207, "step": 398 }, { "epoch": 5.321070234113712, "grad_norm": 0.9277816414833069, "learning_rate": 0.00018251390433815352, "loss": 3.712, "step": 399 }, { "epoch": 5.334448160535117, "grad_norm": 1.1015180349349976, "learning_rate": 0.0001824694104560623, "loss": 3.7941, "step": 400 }, { "epoch": 5.3478260869565215, "grad_norm": 1.2234200239181519, "learning_rate": 0.0001824249165739711, "loss": 3.6559, "step": 401 }, { "epoch": 5.361204013377926, "grad_norm": 0.9358471035957336, "learning_rate": 0.00018238042269187987, "loss": 3.7665, "step": 402 }, { "epoch": 5.374581939799331, "grad_norm": 0.8287034630775452, "learning_rate": 0.00018233592880978868, "loss": 3.8265, "step": 403 }, { "epoch": 5.3879598662207355, "grad_norm": 1.0219204425811768, "learning_rate": 0.00018229143492769743, "loss": 3.6829, "step": 404 }, { "epoch": 5.40133779264214, "grad_norm": 1.0601041316986084, "learning_rate": 0.00018224694104560624, "loss": 3.5879, "step": 405 }, { "epoch": 5.414715719063545, "grad_norm": 1.2221566438674927, "learning_rate": 0.00018220244716351503, "loss": 3.6023, "step": 406 }, { "epoch": 5.4280936454849495, "grad_norm": 0.9589087963104248, "learning_rate": 0.0001821579532814238, "loss": 3.9109, "step": 407 }, { "epoch": 5.441471571906354, "grad_norm": 1.088295340538025, "learning_rate": 0.0001821134593993326, "loss": 3.5695, "step": 408 }, { "epoch": 5.454849498327759, "grad_norm": 1.1284915208816528, "learning_rate": 0.0001820689655172414, "loss": 3.6004, "step": 409 }, { "epoch": 5.468227424749164, "grad_norm": 1.0108689069747925, "learning_rate": 0.00018202447163515018, "loss": 3.7797, "step": 410 }, { "epoch": 5.481605351170568, "grad_norm": 0.8550918102264404, "learning_rate": 0.00018197997775305896, "loss": 3.5758, "step": 411 }, { "epoch": 5.494983277591973, "grad_norm": 0.8765145540237427, "learning_rate": 0.00018193548387096775, "loss": 3.7625, "step": 412 }, { "epoch": 5.508361204013378, "grad_norm": 1.0253541469573975, "learning_rate": 0.00018189098998887656, "loss": 3.6033, "step": 413 }, { "epoch": 5.521739130434782, "grad_norm": 1.0475622415542603, "learning_rate": 0.0001818464961067853, "loss": 3.813, "step": 414 }, { "epoch": 5.535117056856187, "grad_norm": 1.053133249282837, "learning_rate": 0.00018180200222469412, "loss": 3.4779, "step": 415 }, { "epoch": 5.548494983277592, "grad_norm": 1.0151216983795166, "learning_rate": 0.0001817575083426029, "loss": 3.9038, "step": 416 }, { "epoch": 5.561872909698996, "grad_norm": 1.4666434526443481, "learning_rate": 0.00018171301446051169, "loss": 3.4735, "step": 417 }, { "epoch": 5.575250836120401, "grad_norm": 1.1043344736099243, "learning_rate": 0.00018166852057842047, "loss": 3.7449, "step": 418 }, { "epoch": 5.588628762541806, "grad_norm": 0.900745153427124, "learning_rate": 0.00018162402669632925, "loss": 3.7401, "step": 419 }, { "epoch": 5.602006688963211, "grad_norm": 0.9771101474761963, "learning_rate": 0.00018157953281423806, "loss": 3.8328, "step": 420 }, { "epoch": 5.615384615384615, "grad_norm": 0.9099516272544861, "learning_rate": 0.00018153503893214682, "loss": 3.6245, "step": 421 }, { "epoch": 5.6287625418060205, "grad_norm": 0.9844585657119751, "learning_rate": 0.00018149054505005563, "loss": 3.5776, "step": 422 }, { "epoch": 5.642140468227424, "grad_norm": 1.0481154918670654, "learning_rate": 0.0001814460511679644, "loss": 3.6304, "step": 423 }, { "epoch": 5.65551839464883, "grad_norm": 0.9971081614494324, "learning_rate": 0.0001814015572858732, "loss": 3.7863, "step": 424 }, { "epoch": 5.668896321070234, "grad_norm": 0.9247872829437256, "learning_rate": 0.00018135706340378197, "loss": 3.7319, "step": 425 }, { "epoch": 5.682274247491639, "grad_norm": 0.9895725846290588, "learning_rate": 0.00018131256952169078, "loss": 3.78, "step": 426 }, { "epoch": 5.695652173913043, "grad_norm": 1.0847641229629517, "learning_rate": 0.00018126807563959956, "loss": 3.8662, "step": 427 }, { "epoch": 5.709030100334449, "grad_norm": 0.986259937286377, "learning_rate": 0.00018122358175750835, "loss": 3.5621, "step": 428 }, { "epoch": 5.722408026755852, "grad_norm": 0.9166681170463562, "learning_rate": 0.00018117908787541713, "loss": 3.6153, "step": 429 }, { "epoch": 5.735785953177258, "grad_norm": 1.1331177949905396, "learning_rate": 0.00018113459399332594, "loss": 3.5976, "step": 430 }, { "epoch": 5.749163879598662, "grad_norm": 0.8743540644645691, "learning_rate": 0.0001810901001112347, "loss": 3.2511, "step": 431 }, { "epoch": 5.762541806020067, "grad_norm": 1.0700207948684692, "learning_rate": 0.0001810456062291435, "loss": 3.7634, "step": 432 }, { "epoch": 5.775919732441472, "grad_norm": 0.9412694573402405, "learning_rate": 0.00018100111234705229, "loss": 3.6264, "step": 433 }, { "epoch": 5.789297658862877, "grad_norm": 1.0398496389389038, "learning_rate": 0.00018095661846496107, "loss": 3.744, "step": 434 }, { "epoch": 5.802675585284281, "grad_norm": 0.9605004787445068, "learning_rate": 0.00018091212458286985, "loss": 3.5532, "step": 435 }, { "epoch": 5.816053511705686, "grad_norm": 1.0449095964431763, "learning_rate": 0.00018086763070077866, "loss": 4.0611, "step": 436 }, { "epoch": 5.829431438127091, "grad_norm": 0.9342606663703918, "learning_rate": 0.00018082313681868744, "loss": 3.9957, "step": 437 }, { "epoch": 5.842809364548495, "grad_norm": 0.9687880873680115, "learning_rate": 0.00018077864293659622, "loss": 3.9299, "step": 438 }, { "epoch": 5.8561872909699, "grad_norm": 1.1390576362609863, "learning_rate": 0.000180734149054505, "loss": 3.6552, "step": 439 }, { "epoch": 5.869565217391305, "grad_norm": 0.9280988574028015, "learning_rate": 0.00018068965517241382, "loss": 3.7828, "step": 440 }, { "epoch": 5.882943143812709, "grad_norm": 1.2928193807601929, "learning_rate": 0.00018064516129032257, "loss": 3.6292, "step": 441 }, { "epoch": 5.896321070234114, "grad_norm": 1.0959875583648682, "learning_rate": 0.00018060066740823138, "loss": 3.4293, "step": 442 }, { "epoch": 5.909698996655519, "grad_norm": 1.0713289976119995, "learning_rate": 0.00018055617352614016, "loss": 3.7767, "step": 443 }, { "epoch": 5.923076923076923, "grad_norm": 0.9309440851211548, "learning_rate": 0.00018051167964404895, "loss": 3.5473, "step": 444 }, { "epoch": 5.936454849498328, "grad_norm": 1.0999056100845337, "learning_rate": 0.00018046718576195773, "loss": 3.9694, "step": 445 }, { "epoch": 5.949832775919733, "grad_norm": 1.1073781251907349, "learning_rate": 0.00018042269187986654, "loss": 3.6882, "step": 446 }, { "epoch": 5.963210702341137, "grad_norm": 1.0430257320404053, "learning_rate": 0.00018037819799777532, "loss": 3.4009, "step": 447 }, { "epoch": 5.976588628762542, "grad_norm": 1.1132690906524658, "learning_rate": 0.0001803337041156841, "loss": 3.8832, "step": 448 }, { "epoch": 5.989966555183947, "grad_norm": 1.0147771835327148, "learning_rate": 0.00018028921023359289, "loss": 3.7117, "step": 449 }, { "epoch": 6.0, "grad_norm": 1.458959698677063, "learning_rate": 0.0001802447163515017, "loss": 3.5745, "step": 450 }, { "epoch": 6.013377926421405, "grad_norm": 0.8363592028617859, "learning_rate": 0.00018020022246941045, "loss": 3.5835, "step": 451 }, { "epoch": 6.026755852842809, "grad_norm": 0.8071937561035156, "learning_rate": 0.00018015572858731926, "loss": 3.5923, "step": 452 }, { "epoch": 6.040133779264214, "grad_norm": 0.7746313214302063, "learning_rate": 0.00018011123470522804, "loss": 3.5688, "step": 453 }, { "epoch": 6.053511705685619, "grad_norm": 0.689179539680481, "learning_rate": 0.00018006674082313682, "loss": 3.412, "step": 454 }, { "epoch": 6.066889632107023, "grad_norm": 0.8438050746917725, "learning_rate": 0.0001800222469410456, "loss": 3.403, "step": 455 }, { "epoch": 6.080267558528428, "grad_norm": 0.7670062780380249, "learning_rate": 0.00017997775305895442, "loss": 3.5029, "step": 456 }, { "epoch": 6.093645484949833, "grad_norm": 0.8185870051383972, "learning_rate": 0.0001799332591768632, "loss": 3.4584, "step": 457 }, { "epoch": 6.107023411371237, "grad_norm": 0.9618543386459351, "learning_rate": 0.00017988876529477198, "loss": 3.6538, "step": 458 }, { "epoch": 6.120401337792642, "grad_norm": 0.959724485874176, "learning_rate": 0.00017984427141268076, "loss": 3.5284, "step": 459 }, { "epoch": 6.133779264214047, "grad_norm": 0.8044765591621399, "learning_rate": 0.00017979977753058955, "loss": 3.2198, "step": 460 }, { "epoch": 6.147157190635451, "grad_norm": 0.8287092447280884, "learning_rate": 0.00017975528364849833, "loss": 3.4977, "step": 461 }, { "epoch": 6.160535117056856, "grad_norm": 0.8855329155921936, "learning_rate": 0.0001797107897664071, "loss": 3.5008, "step": 462 }, { "epoch": 6.173913043478261, "grad_norm": 0.8839483857154846, "learning_rate": 0.00017966629588431592, "loss": 3.6135, "step": 463 }, { "epoch": 6.187290969899665, "grad_norm": 0.963446319103241, "learning_rate": 0.0001796218020022247, "loss": 3.6156, "step": 464 }, { "epoch": 6.20066889632107, "grad_norm": 0.896743655204773, "learning_rate": 0.00017957730812013348, "loss": 3.6623, "step": 465 }, { "epoch": 6.214046822742475, "grad_norm": 0.9268617033958435, "learning_rate": 0.00017953281423804227, "loss": 3.4343, "step": 466 }, { "epoch": 6.2274247491638794, "grad_norm": 0.8335449695587158, "learning_rate": 0.00017948832035595108, "loss": 3.5716, "step": 467 }, { "epoch": 6.240802675585284, "grad_norm": 0.7771849036216736, "learning_rate": 0.00017944382647385983, "loss": 3.5191, "step": 468 }, { "epoch": 6.254180602006689, "grad_norm": 0.9157488346099854, "learning_rate": 0.00017939933259176864, "loss": 3.5583, "step": 469 }, { "epoch": 6.2675585284280935, "grad_norm": 0.9348477721214294, "learning_rate": 0.00017935483870967742, "loss": 3.3137, "step": 470 }, { "epoch": 6.280936454849498, "grad_norm": 0.8791135549545288, "learning_rate": 0.0001793103448275862, "loss": 3.5111, "step": 471 }, { "epoch": 6.294314381270903, "grad_norm": 0.9963672757148743, "learning_rate": 0.000179265850945495, "loss": 3.7518, "step": 472 }, { "epoch": 6.3076923076923075, "grad_norm": 0.9291539192199707, "learning_rate": 0.0001792213570634038, "loss": 3.4524, "step": 473 }, { "epoch": 6.321070234113712, "grad_norm": 0.9349279403686523, "learning_rate": 0.00017917686318131258, "loss": 3.4753, "step": 474 }, { "epoch": 6.334448160535117, "grad_norm": 0.8984476327896118, "learning_rate": 0.00017913236929922136, "loss": 3.7325, "step": 475 }, { "epoch": 6.3478260869565215, "grad_norm": 0.8452139496803284, "learning_rate": 0.00017908787541713015, "loss": 3.8021, "step": 476 }, { "epoch": 6.361204013377926, "grad_norm": 0.9418376088142395, "learning_rate": 0.00017904338153503895, "loss": 3.7426, "step": 477 }, { "epoch": 6.374581939799331, "grad_norm": 1.0661097764968872, "learning_rate": 0.0001789988876529477, "loss": 3.7556, "step": 478 }, { "epoch": 6.3879598662207355, "grad_norm": 0.9645984768867493, "learning_rate": 0.00017895439377085652, "loss": 3.3353, "step": 479 }, { "epoch": 6.40133779264214, "grad_norm": 0.9243470430374146, "learning_rate": 0.0001789098998887653, "loss": 3.5729, "step": 480 }, { "epoch": 6.414715719063545, "grad_norm": 0.885061502456665, "learning_rate": 0.00017886540600667408, "loss": 3.5699, "step": 481 }, { "epoch": 6.4280936454849495, "grad_norm": 0.9025402069091797, "learning_rate": 0.00017882091212458287, "loss": 3.4532, "step": 482 }, { "epoch": 6.441471571906354, "grad_norm": 0.9760842323303223, "learning_rate": 0.00017877641824249168, "loss": 3.7222, "step": 483 }, { "epoch": 6.454849498327759, "grad_norm": 1.2709609270095825, "learning_rate": 0.00017873192436040046, "loss": 3.756, "step": 484 }, { "epoch": 6.468227424749164, "grad_norm": 0.904513955116272, "learning_rate": 0.00017868743047830924, "loss": 3.657, "step": 485 }, { "epoch": 6.481605351170568, "grad_norm": 1.158915400505066, "learning_rate": 0.00017864293659621802, "loss": 3.5897, "step": 486 }, { "epoch": 6.494983277591973, "grad_norm": 0.9457879066467285, "learning_rate": 0.00017859844271412683, "loss": 3.4394, "step": 487 }, { "epoch": 6.508361204013378, "grad_norm": 1.04762601852417, "learning_rate": 0.0001785539488320356, "loss": 3.39, "step": 488 }, { "epoch": 6.521739130434782, "grad_norm": 0.9370948076248169, "learning_rate": 0.0001785094549499444, "loss": 3.4816, "step": 489 }, { "epoch": 6.535117056856187, "grad_norm": 1.1307988166809082, "learning_rate": 0.00017846496106785318, "loss": 3.5079, "step": 490 }, { "epoch": 6.548494983277592, "grad_norm": 1.0025054216384888, "learning_rate": 0.00017842046718576196, "loss": 3.3447, "step": 491 }, { "epoch": 6.561872909698996, "grad_norm": 0.9893412590026855, "learning_rate": 0.00017837597330367074, "loss": 3.4698, "step": 492 }, { "epoch": 6.575250836120401, "grad_norm": 1.0476347208023071, "learning_rate": 0.00017833147942157955, "loss": 3.4955, "step": 493 }, { "epoch": 6.588628762541806, "grad_norm": 1.0508891344070435, "learning_rate": 0.00017828698553948834, "loss": 3.6568, "step": 494 }, { "epoch": 6.602006688963211, "grad_norm": 1.0397465229034424, "learning_rate": 0.00017824249165739712, "loss": 3.5087, "step": 495 }, { "epoch": 6.615384615384615, "grad_norm": 0.9884181022644043, "learning_rate": 0.0001781979977753059, "loss": 3.6778, "step": 496 }, { "epoch": 6.6287625418060205, "grad_norm": 1.1187562942504883, "learning_rate": 0.0001781535038932147, "loss": 3.4345, "step": 497 }, { "epoch": 6.642140468227424, "grad_norm": 1.1133880615234375, "learning_rate": 0.00017810901001112347, "loss": 3.4959, "step": 498 }, { "epoch": 6.65551839464883, "grad_norm": 0.9368589520454407, "learning_rate": 0.00017806451612903228, "loss": 3.628, "step": 499 }, { "epoch": 6.668896321070234, "grad_norm": 1.0427212715148926, "learning_rate": 0.00017802002224694106, "loss": 3.6073, "step": 500 }, { "epoch": 6.682274247491639, "grad_norm": 0.9564261436462402, "learning_rate": 0.00017797552836484984, "loss": 3.4406, "step": 501 }, { "epoch": 6.695652173913043, "grad_norm": 0.9754629135131836, "learning_rate": 0.00017793103448275862, "loss": 3.7025, "step": 502 }, { "epoch": 6.709030100334449, "grad_norm": 0.9669683575630188, "learning_rate": 0.0001778865406006674, "loss": 3.5872, "step": 503 }, { "epoch": 6.722408026755852, "grad_norm": 0.959619402885437, "learning_rate": 0.00017784204671857621, "loss": 3.5124, "step": 504 }, { "epoch": 6.735785953177258, "grad_norm": 0.981737494468689, "learning_rate": 0.00017779755283648497, "loss": 3.4038, "step": 505 }, { "epoch": 6.749163879598662, "grad_norm": 0.9372640252113342, "learning_rate": 0.00017775305895439378, "loss": 3.6725, "step": 506 }, { "epoch": 6.762541806020067, "grad_norm": 0.947066605091095, "learning_rate": 0.00017770856507230256, "loss": 3.807, "step": 507 }, { "epoch": 6.775919732441472, "grad_norm": 0.7659755349159241, "learning_rate": 0.00017766407119021134, "loss": 3.0411, "step": 508 }, { "epoch": 6.789297658862877, "grad_norm": 1.0432168245315552, "learning_rate": 0.00017761957730812013, "loss": 3.6259, "step": 509 }, { "epoch": 6.802675585284281, "grad_norm": 1.0104693174362183, "learning_rate": 0.00017757508342602894, "loss": 3.5971, "step": 510 }, { "epoch": 6.816053511705686, "grad_norm": 0.9007440805435181, "learning_rate": 0.00017753058954393772, "loss": 3.5894, "step": 511 }, { "epoch": 6.829431438127091, "grad_norm": 0.8829946517944336, "learning_rate": 0.0001774860956618465, "loss": 3.6094, "step": 512 }, { "epoch": 6.842809364548495, "grad_norm": 0.9823127388954163, "learning_rate": 0.00017744160177975528, "loss": 3.6568, "step": 513 }, { "epoch": 6.8561872909699, "grad_norm": 1.0247899293899536, "learning_rate": 0.0001773971078976641, "loss": 3.259, "step": 514 }, { "epoch": 6.869565217391305, "grad_norm": 0.9435336589813232, "learning_rate": 0.00017735261401557285, "loss": 3.4978, "step": 515 }, { "epoch": 6.882943143812709, "grad_norm": 1.0135424137115479, "learning_rate": 0.00017730812013348166, "loss": 3.4765, "step": 516 }, { "epoch": 6.896321070234114, "grad_norm": 1.1327738761901855, "learning_rate": 0.00017726362625139044, "loss": 3.6927, "step": 517 }, { "epoch": 6.909698996655519, "grad_norm": 0.9335159659385681, "learning_rate": 0.00017721913236929922, "loss": 3.7278, "step": 518 }, { "epoch": 6.923076923076923, "grad_norm": 0.8229056000709534, "learning_rate": 0.000177174638487208, "loss": 3.549, "step": 519 }, { "epoch": 6.936454849498328, "grad_norm": 0.8996124267578125, "learning_rate": 0.00017713014460511681, "loss": 3.5093, "step": 520 }, { "epoch": 6.949832775919733, "grad_norm": 0.9984102845191956, "learning_rate": 0.0001770856507230256, "loss": 3.4772, "step": 521 }, { "epoch": 6.963210702341137, "grad_norm": 0.9136049747467041, "learning_rate": 0.00017704115684093438, "loss": 3.6487, "step": 522 }, { "epoch": 6.976588628762542, "grad_norm": 0.9707899689674377, "learning_rate": 0.00017699666295884316, "loss": 3.7471, "step": 523 }, { "epoch": 6.989966555183947, "grad_norm": 0.919865071773529, "learning_rate": 0.00017695216907675197, "loss": 3.8013, "step": 524 }, { "epoch": 7.0, "grad_norm": 1.125541090965271, "learning_rate": 0.00017690767519466073, "loss": 3.4869, "step": 525 }, { "epoch": 7.013377926421405, "grad_norm": 0.8512810468673706, "learning_rate": 0.00017686318131256954, "loss": 3.3334, "step": 526 }, { "epoch": 7.026755852842809, "grad_norm": 0.8450623750686646, "learning_rate": 0.00017681868743047832, "loss": 3.5769, "step": 527 }, { "epoch": 7.040133779264214, "grad_norm": 0.8526298403739929, "learning_rate": 0.0001767741935483871, "loss": 3.4546, "step": 528 }, { "epoch": 7.053511705685619, "grad_norm": 0.75905442237854, "learning_rate": 0.00017672969966629588, "loss": 3.4228, "step": 529 }, { "epoch": 7.066889632107023, "grad_norm": 0.8442811965942383, "learning_rate": 0.0001766852057842047, "loss": 3.5766, "step": 530 }, { "epoch": 7.080267558528428, "grad_norm": 0.9584814310073853, "learning_rate": 0.00017664071190211347, "loss": 3.5312, "step": 531 }, { "epoch": 7.093645484949833, "grad_norm": 0.9741052985191345, "learning_rate": 0.00017659621802002226, "loss": 3.4877, "step": 532 }, { "epoch": 7.107023411371237, "grad_norm": 0.8638135194778442, "learning_rate": 0.00017655172413793104, "loss": 3.5701, "step": 533 }, { "epoch": 7.120401337792642, "grad_norm": 1.0128440856933594, "learning_rate": 0.00017650723025583985, "loss": 3.3629, "step": 534 }, { "epoch": 7.133779264214047, "grad_norm": 0.9763593673706055, "learning_rate": 0.0001764627363737486, "loss": 3.5651, "step": 535 }, { "epoch": 7.147157190635451, "grad_norm": 0.8706293702125549, "learning_rate": 0.00017641824249165741, "loss": 3.454, "step": 536 }, { "epoch": 7.160535117056856, "grad_norm": 0.9227468967437744, "learning_rate": 0.0001763737486095662, "loss": 3.5528, "step": 537 }, { "epoch": 7.173913043478261, "grad_norm": 0.7493206262588501, "learning_rate": 0.00017632925472747498, "loss": 3.2662, "step": 538 }, { "epoch": 7.187290969899665, "grad_norm": 0.8414123058319092, "learning_rate": 0.00017628476084538376, "loss": 3.3864, "step": 539 }, { "epoch": 7.20066889632107, "grad_norm": 0.8352764248847961, "learning_rate": 0.00017624026696329257, "loss": 3.2407, "step": 540 }, { "epoch": 7.214046822742475, "grad_norm": 0.7413480281829834, "learning_rate": 0.00017619577308120135, "loss": 3.3989, "step": 541 }, { "epoch": 7.2274247491638794, "grad_norm": 0.7661281228065491, "learning_rate": 0.00017615127919911014, "loss": 3.3792, "step": 542 }, { "epoch": 7.240802675585284, "grad_norm": 0.86900395154953, "learning_rate": 0.00017610678531701892, "loss": 3.2191, "step": 543 }, { "epoch": 7.254180602006689, "grad_norm": 0.8536344170570374, "learning_rate": 0.0001760622914349277, "loss": 3.3366, "step": 544 }, { "epoch": 7.2675585284280935, "grad_norm": 0.8729544878005981, "learning_rate": 0.00017601779755283648, "loss": 3.4976, "step": 545 }, { "epoch": 7.280936454849498, "grad_norm": 0.8263023495674133, "learning_rate": 0.00017597330367074526, "loss": 3.4102, "step": 546 }, { "epoch": 7.294314381270903, "grad_norm": 0.748373806476593, "learning_rate": 0.00017592880978865407, "loss": 3.332, "step": 547 }, { "epoch": 7.3076923076923075, "grad_norm": 0.7606791853904724, "learning_rate": 0.00017588431590656286, "loss": 3.361, "step": 548 }, { "epoch": 7.321070234113712, "grad_norm": 0.9155070781707764, "learning_rate": 0.00017583982202447164, "loss": 3.657, "step": 549 }, { "epoch": 7.334448160535117, "grad_norm": 0.7440597414970398, "learning_rate": 0.00017579532814238042, "loss": 3.335, "step": 550 }, { "epoch": 7.3478260869565215, "grad_norm": 0.8781002759933472, "learning_rate": 0.00017575083426028923, "loss": 3.5579, "step": 551 }, { "epoch": 7.361204013377926, "grad_norm": 0.7886701822280884, "learning_rate": 0.00017570634037819799, "loss": 3.4636, "step": 552 }, { "epoch": 7.374581939799331, "grad_norm": 0.8931376934051514, "learning_rate": 0.0001756618464961068, "loss": 3.4375, "step": 553 }, { "epoch": 7.3879598662207355, "grad_norm": 0.7599623799324036, "learning_rate": 0.00017561735261401558, "loss": 3.6551, "step": 554 }, { "epoch": 7.40133779264214, "grad_norm": 0.7692762613296509, "learning_rate": 0.00017557285873192436, "loss": 3.6373, "step": 555 }, { "epoch": 7.414715719063545, "grad_norm": 0.8861828446388245, "learning_rate": 0.00017552836484983314, "loss": 3.4791, "step": 556 }, { "epoch": 7.4280936454849495, "grad_norm": 0.9560372829437256, "learning_rate": 0.00017548387096774195, "loss": 3.7291, "step": 557 }, { "epoch": 7.441471571906354, "grad_norm": 0.8745344281196594, "learning_rate": 0.00017543937708565073, "loss": 3.3071, "step": 558 }, { "epoch": 7.454849498327759, "grad_norm": 0.8178285360336304, "learning_rate": 0.00017539488320355952, "loss": 3.4738, "step": 559 }, { "epoch": 7.468227424749164, "grad_norm": 0.8611259460449219, "learning_rate": 0.0001753503893214683, "loss": 3.25, "step": 560 }, { "epoch": 7.481605351170568, "grad_norm": 0.8623505234718323, "learning_rate": 0.0001753058954393771, "loss": 3.3701, "step": 561 }, { "epoch": 7.494983277591973, "grad_norm": 0.76930171251297, "learning_rate": 0.00017526140155728586, "loss": 3.332, "step": 562 }, { "epoch": 7.508361204013378, "grad_norm": 0.8986758589744568, "learning_rate": 0.00017521690767519467, "loss": 3.3927, "step": 563 }, { "epoch": 7.521739130434782, "grad_norm": 0.9844257831573486, "learning_rate": 0.00017517241379310346, "loss": 3.5664, "step": 564 }, { "epoch": 7.535117056856187, "grad_norm": 0.983921229839325, "learning_rate": 0.00017512791991101224, "loss": 3.3888, "step": 565 }, { "epoch": 7.548494983277592, "grad_norm": 0.8052308559417725, "learning_rate": 0.00017508342602892102, "loss": 3.4809, "step": 566 }, { "epoch": 7.561872909698996, "grad_norm": 0.7996425032615662, "learning_rate": 0.00017503893214682983, "loss": 3.4793, "step": 567 }, { "epoch": 7.575250836120401, "grad_norm": 0.8453391194343567, "learning_rate": 0.0001749944382647386, "loss": 3.3199, "step": 568 }, { "epoch": 7.588628762541806, "grad_norm": 0.8720147013664246, "learning_rate": 0.0001749499443826474, "loss": 3.5612, "step": 569 }, { "epoch": 7.602006688963211, "grad_norm": 0.9093672633171082, "learning_rate": 0.00017490545050055618, "loss": 3.0509, "step": 570 }, { "epoch": 7.615384615384615, "grad_norm": 0.8936579823493958, "learning_rate": 0.000174860956618465, "loss": 3.4408, "step": 571 }, { "epoch": 7.6287625418060205, "grad_norm": 0.7683162689208984, "learning_rate": 0.00017481646273637374, "loss": 3.3536, "step": 572 }, { "epoch": 7.642140468227424, "grad_norm": 0.7943581342697144, "learning_rate": 0.00017477196885428255, "loss": 3.4542, "step": 573 }, { "epoch": 7.65551839464883, "grad_norm": 0.8183353543281555, "learning_rate": 0.00017472747497219133, "loss": 3.603, "step": 574 }, { "epoch": 7.668896321070234, "grad_norm": 0.7816463708877563, "learning_rate": 0.00017468298109010012, "loss": 3.7388, "step": 575 }, { "epoch": 7.682274247491639, "grad_norm": 0.8167930245399475, "learning_rate": 0.0001746384872080089, "loss": 3.7743, "step": 576 }, { "epoch": 7.695652173913043, "grad_norm": 0.832392156124115, "learning_rate": 0.0001745939933259177, "loss": 3.7488, "step": 577 }, { "epoch": 7.709030100334449, "grad_norm": 0.9362333416938782, "learning_rate": 0.0001745494994438265, "loss": 3.5722, "step": 578 }, { "epoch": 7.722408026755852, "grad_norm": 1.0247248411178589, "learning_rate": 0.00017450500556173527, "loss": 3.2048, "step": 579 }, { "epoch": 7.735785953177258, "grad_norm": 0.8833767175674438, "learning_rate": 0.00017446051167964406, "loss": 3.389, "step": 580 }, { "epoch": 7.749163879598662, "grad_norm": 0.8344758749008179, "learning_rate": 0.00017441601779755287, "loss": 3.5264, "step": 581 }, { "epoch": 7.762541806020067, "grad_norm": 0.9771448373794556, "learning_rate": 0.00017437152391546162, "loss": 3.041, "step": 582 }, { "epoch": 7.775919732441472, "grad_norm": 0.8279567956924438, "learning_rate": 0.00017432703003337043, "loss": 3.6273, "step": 583 }, { "epoch": 7.789297658862877, "grad_norm": 0.957206130027771, "learning_rate": 0.0001742825361512792, "loss": 3.3831, "step": 584 }, { "epoch": 7.802675585284281, "grad_norm": 0.860619843006134, "learning_rate": 0.000174238042269188, "loss": 3.4566, "step": 585 }, { "epoch": 7.816053511705686, "grad_norm": 0.8725448846817017, "learning_rate": 0.00017419354838709678, "loss": 3.6594, "step": 586 }, { "epoch": 7.829431438127091, "grad_norm": 0.8343111276626587, "learning_rate": 0.00017414905450500556, "loss": 3.3423, "step": 587 }, { "epoch": 7.842809364548495, "grad_norm": 0.9043267965316772, "learning_rate": 0.00017410456062291437, "loss": 3.221, "step": 588 }, { "epoch": 7.8561872909699, "grad_norm": 0.9563114643096924, "learning_rate": 0.00017406006674082312, "loss": 3.5143, "step": 589 }, { "epoch": 7.869565217391305, "grad_norm": 0.9726302027702332, "learning_rate": 0.00017401557285873193, "loss": 3.4373, "step": 590 }, { "epoch": 7.882943143812709, "grad_norm": 0.9203178882598877, "learning_rate": 0.00017397107897664072, "loss": 3.6014, "step": 591 }, { "epoch": 7.896321070234114, "grad_norm": 0.9120233654975891, "learning_rate": 0.0001739265850945495, "loss": 3.1429, "step": 592 }, { "epoch": 7.909698996655519, "grad_norm": 0.7576518058776855, "learning_rate": 0.00017388209121245828, "loss": 3.2065, "step": 593 }, { "epoch": 7.923076923076923, "grad_norm": 0.9629240036010742, "learning_rate": 0.0001738375973303671, "loss": 3.4788, "step": 594 }, { "epoch": 7.936454849498328, "grad_norm": 0.8390881419181824, "learning_rate": 0.00017379310344827587, "loss": 3.2857, "step": 595 }, { "epoch": 7.949832775919733, "grad_norm": 0.8708979487419128, "learning_rate": 0.00017374860956618466, "loss": 3.3321, "step": 596 }, { "epoch": 7.963210702341137, "grad_norm": 0.7076835632324219, "learning_rate": 0.00017370411568409344, "loss": 3.5905, "step": 597 }, { "epoch": 7.976588628762542, "grad_norm": 1.016526222229004, "learning_rate": 0.00017365962180200225, "loss": 3.4053, "step": 598 }, { "epoch": 7.989966555183947, "grad_norm": 0.7592278718948364, "learning_rate": 0.000173615127919911, "loss": 3.3968, "step": 599 }, { "epoch": 8.0, "grad_norm": 1.0106462240219116, "learning_rate": 0.0001735706340378198, "loss": 3.3722, "step": 600 }, { "epoch": 8.013377926421406, "grad_norm": 0.740808367729187, "learning_rate": 0.0001735261401557286, "loss": 3.394, "step": 601 }, { "epoch": 8.02675585284281, "grad_norm": 0.6732498407363892, "learning_rate": 0.00017348164627363738, "loss": 3.297, "step": 602 }, { "epoch": 8.040133779264215, "grad_norm": 0.8319197297096252, "learning_rate": 0.00017343715239154616, "loss": 3.2898, "step": 603 }, { "epoch": 8.053511705685619, "grad_norm": 0.7834349870681763, "learning_rate": 0.00017339265850945497, "loss": 3.2341, "step": 604 }, { "epoch": 8.066889632107024, "grad_norm": 0.705737292766571, "learning_rate": 0.00017334816462736375, "loss": 3.3429, "step": 605 }, { "epoch": 8.080267558528428, "grad_norm": 0.8270958065986633, "learning_rate": 0.00017330367074527253, "loss": 3.0458, "step": 606 }, { "epoch": 8.093645484949834, "grad_norm": 0.7254801392555237, "learning_rate": 0.00017325917686318132, "loss": 3.5143, "step": 607 }, { "epoch": 8.107023411371237, "grad_norm": 0.8450751900672913, "learning_rate": 0.00017321468298109013, "loss": 3.1507, "step": 608 }, { "epoch": 8.120401337792643, "grad_norm": 0.7936837673187256, "learning_rate": 0.00017317018909899888, "loss": 3.3979, "step": 609 }, { "epoch": 8.133779264214047, "grad_norm": 0.6496401429176331, "learning_rate": 0.0001731256952169077, "loss": 3.3613, "step": 610 }, { "epoch": 8.147157190635452, "grad_norm": 0.8721235990524292, "learning_rate": 0.00017308120133481647, "loss": 3.4299, "step": 611 }, { "epoch": 8.160535117056856, "grad_norm": 0.7671874761581421, "learning_rate": 0.00017303670745272525, "loss": 3.3333, "step": 612 }, { "epoch": 8.173913043478262, "grad_norm": 0.6427144408226013, "learning_rate": 0.00017299221357063404, "loss": 3.3304, "step": 613 }, { "epoch": 8.187290969899665, "grad_norm": 0.7999966144561768, "learning_rate": 0.00017294771968854285, "loss": 3.4016, "step": 614 }, { "epoch": 8.200668896321071, "grad_norm": 0.8216206431388855, "learning_rate": 0.00017290322580645163, "loss": 3.1724, "step": 615 }, { "epoch": 8.214046822742475, "grad_norm": 0.7364024519920349, "learning_rate": 0.0001728587319243604, "loss": 3.34, "step": 616 }, { "epoch": 8.22742474916388, "grad_norm": 0.7688239812850952, "learning_rate": 0.0001728142380422692, "loss": 3.2114, "step": 617 }, { "epoch": 8.240802675585284, "grad_norm": 0.8786870837211609, "learning_rate": 0.000172769744160178, "loss": 3.378, "step": 618 }, { "epoch": 8.25418060200669, "grad_norm": 0.9048855900764465, "learning_rate": 0.00017272525027808676, "loss": 3.1801, "step": 619 }, { "epoch": 8.267558528428093, "grad_norm": 0.657189130783081, "learning_rate": 0.00017268075639599557, "loss": 3.1389, "step": 620 }, { "epoch": 8.280936454849499, "grad_norm": 0.8015987873077393, "learning_rate": 0.00017263626251390435, "loss": 3.4621, "step": 621 }, { "epoch": 8.294314381270903, "grad_norm": 0.8232793807983398, "learning_rate": 0.00017259176863181313, "loss": 3.1763, "step": 622 }, { "epoch": 8.307692307692308, "grad_norm": 0.7447130680084229, "learning_rate": 0.00017254727474972192, "loss": 3.1266, "step": 623 }, { "epoch": 8.321070234113712, "grad_norm": 0.7649840116500854, "learning_rate": 0.00017250278086763072, "loss": 3.1959, "step": 624 }, { "epoch": 8.334448160535118, "grad_norm": 0.7119699120521545, "learning_rate": 0.0001724582869855395, "loss": 3.5626, "step": 625 }, { "epoch": 8.347826086956522, "grad_norm": 0.8238518834114075, "learning_rate": 0.00017241379310344826, "loss": 3.1873, "step": 626 }, { "epoch": 8.361204013377927, "grad_norm": 0.8248497843742371, "learning_rate": 0.00017236929922135707, "loss": 3.5686, "step": 627 }, { "epoch": 8.37458193979933, "grad_norm": 0.8704475164413452, "learning_rate": 0.00017232480533926585, "loss": 3.3195, "step": 628 }, { "epoch": 8.387959866220736, "grad_norm": 0.8160929083824158, "learning_rate": 0.00017228031145717464, "loss": 3.3808, "step": 629 }, { "epoch": 8.40133779264214, "grad_norm": 0.8537085652351379, "learning_rate": 0.00017223581757508342, "loss": 3.3638, "step": 630 }, { "epoch": 8.414715719063546, "grad_norm": 0.876519501209259, "learning_rate": 0.00017219132369299223, "loss": 3.2019, "step": 631 }, { "epoch": 8.42809364548495, "grad_norm": 0.6573703289031982, "learning_rate": 0.000172146829810901, "loss": 3.4998, "step": 632 }, { "epoch": 8.441471571906355, "grad_norm": 0.8822937607765198, "learning_rate": 0.0001721023359288098, "loss": 3.4281, "step": 633 }, { "epoch": 8.454849498327759, "grad_norm": 0.764872670173645, "learning_rate": 0.00017205784204671858, "loss": 3.3693, "step": 634 }, { "epoch": 8.468227424749164, "grad_norm": 0.7492384910583496, "learning_rate": 0.00017201334816462739, "loss": 3.5672, "step": 635 }, { "epoch": 8.481605351170568, "grad_norm": 0.8037416934967041, "learning_rate": 0.00017196885428253614, "loss": 3.3804, "step": 636 }, { "epoch": 8.494983277591974, "grad_norm": 0.8380945324897766, "learning_rate": 0.00017192436040044495, "loss": 3.2272, "step": 637 }, { "epoch": 8.508361204013378, "grad_norm": 0.8467932939529419, "learning_rate": 0.00017187986651835373, "loss": 3.2649, "step": 638 }, { "epoch": 8.521739130434783, "grad_norm": 0.751542866230011, "learning_rate": 0.00017183537263626252, "loss": 3.4135, "step": 639 }, { "epoch": 8.535117056856187, "grad_norm": 0.7618190050125122, "learning_rate": 0.0001717908787541713, "loss": 3.483, "step": 640 }, { "epoch": 8.548494983277592, "grad_norm": 0.9661890864372253, "learning_rate": 0.0001717463848720801, "loss": 3.2201, "step": 641 }, { "epoch": 8.561872909698996, "grad_norm": 0.8655393719673157, "learning_rate": 0.0001717018909899889, "loss": 3.4142, "step": 642 }, { "epoch": 8.575250836120402, "grad_norm": 0.796047031879425, "learning_rate": 0.00017165739710789767, "loss": 3.3558, "step": 643 }, { "epoch": 8.588628762541806, "grad_norm": 1.0098161697387695, "learning_rate": 0.00017161290322580645, "loss": 3.553, "step": 644 }, { "epoch": 8.602006688963211, "grad_norm": 1.1880302429199219, "learning_rate": 0.00017156840934371526, "loss": 3.3581, "step": 645 }, { "epoch": 8.615384615384615, "grad_norm": 0.9361609220504761, "learning_rate": 0.00017152391546162402, "loss": 3.4543, "step": 646 }, { "epoch": 8.62876254180602, "grad_norm": 0.8794479966163635, "learning_rate": 0.00017147942157953283, "loss": 3.3654, "step": 647 }, { "epoch": 8.642140468227424, "grad_norm": 0.9263080954551697, "learning_rate": 0.0001714349276974416, "loss": 3.4376, "step": 648 }, { "epoch": 8.65551839464883, "grad_norm": 1.0015815496444702, "learning_rate": 0.0001713904338153504, "loss": 3.3533, "step": 649 }, { "epoch": 8.668896321070234, "grad_norm": 0.8525484204292297, "learning_rate": 0.00017134593993325918, "loss": 3.3897, "step": 650 }, { "epoch": 8.68227424749164, "grad_norm": 0.7196484804153442, "learning_rate": 0.00017130144605116799, "loss": 3.2428, "step": 651 }, { "epoch": 8.695652173913043, "grad_norm": 0.8779593706130981, "learning_rate": 0.00017125695216907677, "loss": 3.5471, "step": 652 }, { "epoch": 8.709030100334449, "grad_norm": 0.9256909489631653, "learning_rate": 0.00017121245828698555, "loss": 3.1776, "step": 653 }, { "epoch": 8.722408026755852, "grad_norm": 0.7774620652198792, "learning_rate": 0.00017116796440489433, "loss": 3.4836, "step": 654 }, { "epoch": 8.735785953177258, "grad_norm": 0.8112596273422241, "learning_rate": 0.00017112347052280314, "loss": 3.5194, "step": 655 }, { "epoch": 8.749163879598662, "grad_norm": 0.7350602746009827, "learning_rate": 0.0001710789766407119, "loss": 3.2248, "step": 656 }, { "epoch": 8.762541806020067, "grad_norm": 0.8231781125068665, "learning_rate": 0.0001710344827586207, "loss": 3.4659, "step": 657 }, { "epoch": 8.775919732441471, "grad_norm": 0.8921564221382141, "learning_rate": 0.0001709899888765295, "loss": 3.2712, "step": 658 }, { "epoch": 8.789297658862877, "grad_norm": 0.8921830058097839, "learning_rate": 0.00017094549499443827, "loss": 3.5071, "step": 659 }, { "epoch": 8.80267558528428, "grad_norm": 0.7809077501296997, "learning_rate": 0.00017090100111234705, "loss": 3.6639, "step": 660 }, { "epoch": 8.816053511705686, "grad_norm": 0.9431234002113342, "learning_rate": 0.00017085650723025586, "loss": 3.2795, "step": 661 }, { "epoch": 8.82943143812709, "grad_norm": 0.9707314968109131, "learning_rate": 0.00017081201334816465, "loss": 3.3395, "step": 662 }, { "epoch": 8.842809364548495, "grad_norm": 0.7547470331192017, "learning_rate": 0.00017076751946607343, "loss": 3.5316, "step": 663 }, { "epoch": 8.856187290969899, "grad_norm": 0.8989250659942627, "learning_rate": 0.0001707230255839822, "loss": 3.4029, "step": 664 }, { "epoch": 8.869565217391305, "grad_norm": 1.0237400531768799, "learning_rate": 0.00017067853170189102, "loss": 3.5014, "step": 665 }, { "epoch": 8.882943143812708, "grad_norm": 0.7289263010025024, "learning_rate": 0.00017063403781979978, "loss": 3.4211, "step": 666 }, { "epoch": 8.896321070234114, "grad_norm": 0.7978695034980774, "learning_rate": 0.00017058954393770856, "loss": 3.5421, "step": 667 }, { "epoch": 8.909698996655518, "grad_norm": 0.7401835918426514, "learning_rate": 0.00017054505005561737, "loss": 3.3419, "step": 668 }, { "epoch": 8.923076923076923, "grad_norm": 0.8952983617782593, "learning_rate": 0.00017050055617352615, "loss": 3.1322, "step": 669 }, { "epoch": 8.936454849498327, "grad_norm": 0.6922047734260559, "learning_rate": 0.00017045606229143493, "loss": 3.5872, "step": 670 }, { "epoch": 8.949832775919733, "grad_norm": 0.8618977665901184, "learning_rate": 0.00017041156840934371, "loss": 3.1366, "step": 671 }, { "epoch": 8.963210702341136, "grad_norm": 0.7933799624443054, "learning_rate": 0.00017036707452725252, "loss": 3.3108, "step": 672 }, { "epoch": 8.976588628762542, "grad_norm": 0.718401312828064, "learning_rate": 0.00017032258064516128, "loss": 3.3771, "step": 673 }, { "epoch": 8.989966555183946, "grad_norm": 0.8096804618835449, "learning_rate": 0.0001702780867630701, "loss": 3.3225, "step": 674 }, { "epoch": 9.0, "grad_norm": 1.0055694580078125, "learning_rate": 0.00017023359288097887, "loss": 3.5545, "step": 675 }, { "epoch": 9.013377926421406, "grad_norm": 0.710986852645874, "learning_rate": 0.00017018909899888765, "loss": 3.3333, "step": 676 }, { "epoch": 9.02675585284281, "grad_norm": 0.672132134437561, "learning_rate": 0.00017014460511679644, "loss": 2.9995, "step": 677 }, { "epoch": 9.040133779264215, "grad_norm": 0.6752933263778687, "learning_rate": 0.00017010011123470525, "loss": 3.3571, "step": 678 }, { "epoch": 9.053511705685619, "grad_norm": 0.6553521156311035, "learning_rate": 0.00017005561735261403, "loss": 3.3407, "step": 679 }, { "epoch": 9.066889632107024, "grad_norm": 0.7492311596870422, "learning_rate": 0.0001700111234705228, "loss": 3.325, "step": 680 }, { "epoch": 9.080267558528428, "grad_norm": 0.736139714717865, "learning_rate": 0.0001699666295884316, "loss": 3.2626, "step": 681 }, { "epoch": 9.093645484949834, "grad_norm": 0.7131486535072327, "learning_rate": 0.0001699221357063404, "loss": 3.2612, "step": 682 }, { "epoch": 9.107023411371237, "grad_norm": 0.7037603855133057, "learning_rate": 0.00016987764182424916, "loss": 3.2418, "step": 683 }, { "epoch": 9.120401337792643, "grad_norm": 0.685518205165863, "learning_rate": 0.00016983314794215797, "loss": 3.4854, "step": 684 }, { "epoch": 9.133779264214047, "grad_norm": 0.6050254106521606, "learning_rate": 0.00016978865406006675, "loss": 3.2312, "step": 685 }, { "epoch": 9.147157190635452, "grad_norm": 0.6932830810546875, "learning_rate": 0.00016974416017797553, "loss": 3.4634, "step": 686 }, { "epoch": 9.160535117056856, "grad_norm": 0.7055158615112305, "learning_rate": 0.00016969966629588431, "loss": 3.1408, "step": 687 }, { "epoch": 9.173913043478262, "grad_norm": 0.6887643337249756, "learning_rate": 0.00016965517241379312, "loss": 3.0697, "step": 688 }, { "epoch": 9.187290969899665, "grad_norm": 0.7201237082481384, "learning_rate": 0.0001696106785317019, "loss": 3.303, "step": 689 }, { "epoch": 9.200668896321071, "grad_norm": 0.6617894768714905, "learning_rate": 0.0001695661846496107, "loss": 3.4846, "step": 690 }, { "epoch": 9.214046822742475, "grad_norm": 0.8979818224906921, "learning_rate": 0.00016952169076751947, "loss": 3.1898, "step": 691 }, { "epoch": 9.22742474916388, "grad_norm": 0.9507981538772583, "learning_rate": 0.00016947719688542828, "loss": 3.4748, "step": 692 }, { "epoch": 9.240802675585284, "grad_norm": 0.7935391068458557, "learning_rate": 0.00016943270300333704, "loss": 3.3661, "step": 693 }, { "epoch": 9.25418060200669, "grad_norm": 0.7437114715576172, "learning_rate": 0.00016938820912124584, "loss": 3.4407, "step": 694 }, { "epoch": 9.267558528428093, "grad_norm": 0.680610179901123, "learning_rate": 0.00016934371523915463, "loss": 3.3135, "step": 695 }, { "epoch": 9.280936454849499, "grad_norm": 0.846716582775116, "learning_rate": 0.0001692992213570634, "loss": 3.3923, "step": 696 }, { "epoch": 9.294314381270903, "grad_norm": 0.9567786455154419, "learning_rate": 0.0001692547274749722, "loss": 3.1405, "step": 697 }, { "epoch": 9.307692307692308, "grad_norm": 0.7509344816207886, "learning_rate": 0.000169210233592881, "loss": 3.4031, "step": 698 }, { "epoch": 9.321070234113712, "grad_norm": 0.8118243217468262, "learning_rate": 0.00016916573971078978, "loss": 3.317, "step": 699 }, { "epoch": 9.334448160535118, "grad_norm": 0.7445617318153381, "learning_rate": 0.00016912124582869857, "loss": 3.3989, "step": 700 }, { "epoch": 9.347826086956522, "grad_norm": 0.7520869970321655, "learning_rate": 0.00016907675194660735, "loss": 3.08, "step": 701 }, { "epoch": 9.361204013377927, "grad_norm": 0.7466426491737366, "learning_rate": 0.00016903225806451616, "loss": 3.3338, "step": 702 }, { "epoch": 9.37458193979933, "grad_norm": 0.7595514059066772, "learning_rate": 0.0001689877641824249, "loss": 3.08, "step": 703 }, { "epoch": 9.387959866220736, "grad_norm": 0.713771641254425, "learning_rate": 0.00016894327030033372, "loss": 3.236, "step": 704 }, { "epoch": 9.40133779264214, "grad_norm": 0.670863687992096, "learning_rate": 0.0001688987764182425, "loss": 3.4491, "step": 705 }, { "epoch": 9.414715719063546, "grad_norm": 0.8842789530754089, "learning_rate": 0.0001688542825361513, "loss": 3.3444, "step": 706 }, { "epoch": 9.42809364548495, "grad_norm": 0.8298172950744629, "learning_rate": 0.00016880978865406007, "loss": 3.3818, "step": 707 }, { "epoch": 9.441471571906355, "grad_norm": 0.7407504320144653, "learning_rate": 0.00016876529477196885, "loss": 3.2294, "step": 708 }, { "epoch": 9.454849498327759, "grad_norm": 0.6642070412635803, "learning_rate": 0.00016872080088987766, "loss": 3.3009, "step": 709 }, { "epoch": 9.468227424749164, "grad_norm": 0.7627503275871277, "learning_rate": 0.00016867630700778642, "loss": 3.3486, "step": 710 }, { "epoch": 9.481605351170568, "grad_norm": 0.7307603359222412, "learning_rate": 0.00016863181312569523, "loss": 3.0257, "step": 711 }, { "epoch": 9.494983277591974, "grad_norm": 0.7932866215705872, "learning_rate": 0.000168587319243604, "loss": 3.4191, "step": 712 }, { "epoch": 9.508361204013378, "grad_norm": 0.7457575798034668, "learning_rate": 0.0001685428253615128, "loss": 3.2283, "step": 713 }, { "epoch": 9.521739130434783, "grad_norm": 0.6718200445175171, "learning_rate": 0.00016849833147942157, "loss": 3.4569, "step": 714 }, { "epoch": 9.535117056856187, "grad_norm": 0.8189072608947754, "learning_rate": 0.00016845383759733038, "loss": 3.2585, "step": 715 }, { "epoch": 9.548494983277592, "grad_norm": 0.6895336508750916, "learning_rate": 0.00016840934371523917, "loss": 3.2417, "step": 716 }, { "epoch": 9.561872909698996, "grad_norm": 0.723173975944519, "learning_rate": 0.00016836484983314795, "loss": 3.3243, "step": 717 }, { "epoch": 9.575250836120402, "grad_norm": 0.8354344964027405, "learning_rate": 0.00016832035595105673, "loss": 3.3585, "step": 718 }, { "epoch": 9.588628762541806, "grad_norm": 0.6736294031143188, "learning_rate": 0.00016827586206896554, "loss": 3.188, "step": 719 }, { "epoch": 9.602006688963211, "grad_norm": 0.7790263295173645, "learning_rate": 0.0001682313681868743, "loss": 3.3171, "step": 720 }, { "epoch": 9.615384615384615, "grad_norm": 0.6426937580108643, "learning_rate": 0.0001681868743047831, "loss": 3.0854, "step": 721 }, { "epoch": 9.62876254180602, "grad_norm": 0.7029106020927429, "learning_rate": 0.0001681423804226919, "loss": 3.3629, "step": 722 }, { "epoch": 9.642140468227424, "grad_norm": 0.8353022933006287, "learning_rate": 0.00016809788654060067, "loss": 3.3715, "step": 723 }, { "epoch": 9.65551839464883, "grad_norm": 0.8578335642814636, "learning_rate": 0.00016805339265850945, "loss": 3.3554, "step": 724 }, { "epoch": 9.668896321070234, "grad_norm": 0.6998556852340698, "learning_rate": 0.00016800889877641826, "loss": 3.3043, "step": 725 }, { "epoch": 9.68227424749164, "grad_norm": 0.7134855389595032, "learning_rate": 0.00016796440489432704, "loss": 3.5856, "step": 726 }, { "epoch": 9.695652173913043, "grad_norm": 0.6636050939559937, "learning_rate": 0.00016791991101223583, "loss": 3.2156, "step": 727 }, { "epoch": 9.709030100334449, "grad_norm": 0.7757130861282349, "learning_rate": 0.0001678754171301446, "loss": 3.4974, "step": 728 }, { "epoch": 9.722408026755852, "grad_norm": 0.770648181438446, "learning_rate": 0.00016783092324805342, "loss": 3.3251, "step": 729 }, { "epoch": 9.735785953177258, "grad_norm": 0.7728201746940613, "learning_rate": 0.00016778642936596217, "loss": 3.2666, "step": 730 }, { "epoch": 9.749163879598662, "grad_norm": 0.8277239203453064, "learning_rate": 0.00016774193548387098, "loss": 3.3867, "step": 731 }, { "epoch": 9.762541806020067, "grad_norm": 0.6534886360168457, "learning_rate": 0.00016769744160177977, "loss": 3.175, "step": 732 }, { "epoch": 9.775919732441471, "grad_norm": 0.8508428335189819, "learning_rate": 0.00016765294771968855, "loss": 3.2084, "step": 733 }, { "epoch": 9.789297658862877, "grad_norm": 0.7656721472740173, "learning_rate": 0.00016760845383759733, "loss": 3.1426, "step": 734 }, { "epoch": 9.80267558528428, "grad_norm": 0.9495553970336914, "learning_rate": 0.00016756395995550614, "loss": 3.1623, "step": 735 }, { "epoch": 9.816053511705686, "grad_norm": 0.7998641729354858, "learning_rate": 0.00016751946607341492, "loss": 3.3893, "step": 736 }, { "epoch": 9.82943143812709, "grad_norm": 0.8124551177024841, "learning_rate": 0.0001674749721913237, "loss": 3.2012, "step": 737 }, { "epoch": 9.842809364548495, "grad_norm": 0.6332049369812012, "learning_rate": 0.0001674304783092325, "loss": 3.3384, "step": 738 }, { "epoch": 9.856187290969899, "grad_norm": 0.7114555835723877, "learning_rate": 0.0001673859844271413, "loss": 3.0802, "step": 739 }, { "epoch": 9.869565217391305, "grad_norm": 0.7175182700157166, "learning_rate": 0.00016734149054505005, "loss": 3.2572, "step": 740 }, { "epoch": 9.882943143812708, "grad_norm": 0.7724816799163818, "learning_rate": 0.00016729699666295886, "loss": 3.1078, "step": 741 }, { "epoch": 9.896321070234114, "grad_norm": 0.7834901213645935, "learning_rate": 0.00016725250278086764, "loss": 3.2513, "step": 742 }, { "epoch": 9.909698996655518, "grad_norm": 0.663495659828186, "learning_rate": 0.00016720800889877643, "loss": 3.327, "step": 743 }, { "epoch": 9.923076923076923, "grad_norm": 0.7828975319862366, "learning_rate": 0.0001671635150166852, "loss": 3.609, "step": 744 }, { "epoch": 9.936454849498327, "grad_norm": 0.6747825145721436, "learning_rate": 0.00016711902113459402, "loss": 3.4479, "step": 745 }, { "epoch": 9.949832775919733, "grad_norm": 0.7816379070281982, "learning_rate": 0.0001670745272525028, "loss": 3.2369, "step": 746 }, { "epoch": 9.963210702341136, "grad_norm": 0.7011098265647888, "learning_rate": 0.00016703003337041158, "loss": 2.9103, "step": 747 }, { "epoch": 9.976588628762542, "grad_norm": 0.7165176868438721, "learning_rate": 0.00016698553948832036, "loss": 3.1669, "step": 748 }, { "epoch": 9.989966555183946, "grad_norm": 0.766315758228302, "learning_rate": 0.00016694104560622915, "loss": 3.1138, "step": 749 }, { "epoch": 10.0, "grad_norm": 0.876315176486969, "learning_rate": 0.00016689655172413793, "loss": 3.4851, "step": 750 }, { "epoch": 10.013377926421406, "grad_norm": 0.807686984539032, "learning_rate": 0.0001668520578420467, "loss": 3.193, "step": 751 }, { "epoch": 10.02675585284281, "grad_norm": 0.7085704803466797, "learning_rate": 0.00016680756395995552, "loss": 3.4797, "step": 752 }, { "epoch": 10.040133779264215, "grad_norm": 0.7119605541229248, "learning_rate": 0.0001667630700778643, "loss": 3.1713, "step": 753 }, { "epoch": 10.053511705685619, "grad_norm": 0.6569423675537109, "learning_rate": 0.00016671857619577309, "loss": 3.1661, "step": 754 }, { "epoch": 10.066889632107024, "grad_norm": 0.8173550367355347, "learning_rate": 0.00016667408231368187, "loss": 2.8467, "step": 755 }, { "epoch": 10.080267558528428, "grad_norm": 0.7261365056037903, "learning_rate": 0.00016662958843159068, "loss": 3.3679, "step": 756 }, { "epoch": 10.093645484949834, "grad_norm": 0.7997227311134338, "learning_rate": 0.00016658509454949943, "loss": 3.0985, "step": 757 }, { "epoch": 10.107023411371237, "grad_norm": 0.653391420841217, "learning_rate": 0.00016654060066740824, "loss": 3.2156, "step": 758 }, { "epoch": 10.120401337792643, "grad_norm": 0.6799002289772034, "learning_rate": 0.00016649610678531703, "loss": 3.3302, "step": 759 }, { "epoch": 10.133779264214047, "grad_norm": 0.6444498896598816, "learning_rate": 0.0001664516129032258, "loss": 3.2813, "step": 760 }, { "epoch": 10.147157190635452, "grad_norm": 1.064769983291626, "learning_rate": 0.0001664071190211346, "loss": 3.1852, "step": 761 }, { "epoch": 10.160535117056856, "grad_norm": 0.6534339189529419, "learning_rate": 0.0001663626251390434, "loss": 3.1563, "step": 762 }, { "epoch": 10.173913043478262, "grad_norm": 0.6909127235412598, "learning_rate": 0.00016631813125695218, "loss": 3.2728, "step": 763 }, { "epoch": 10.187290969899665, "grad_norm": 0.6549767851829529, "learning_rate": 0.00016627363737486096, "loss": 3.0491, "step": 764 }, { "epoch": 10.200668896321071, "grad_norm": 0.678054928779602, "learning_rate": 0.00016622914349276975, "loss": 3.4807, "step": 765 }, { "epoch": 10.214046822742475, "grad_norm": 0.613358199596405, "learning_rate": 0.00016618464961067856, "loss": 3.1746, "step": 766 }, { "epoch": 10.22742474916388, "grad_norm": 0.6624737977981567, "learning_rate": 0.0001661401557285873, "loss": 2.8528, "step": 767 }, { "epoch": 10.240802675585284, "grad_norm": 0.65067458152771, "learning_rate": 0.00016609566184649612, "loss": 3.1843, "step": 768 }, { "epoch": 10.25418060200669, "grad_norm": 0.6192435622215271, "learning_rate": 0.0001660511679644049, "loss": 3.3162, "step": 769 }, { "epoch": 10.267558528428093, "grad_norm": 0.6456341743469238, "learning_rate": 0.00016600667408231369, "loss": 3.2302, "step": 770 }, { "epoch": 10.280936454849499, "grad_norm": 2.357724189758301, "learning_rate": 0.00016596218020022247, "loss": 3.2741, "step": 771 }, { "epoch": 10.294314381270903, "grad_norm": 0.6833475828170776, "learning_rate": 0.00016591768631813128, "loss": 3.1516, "step": 772 }, { "epoch": 10.307692307692308, "grad_norm": 0.5557199716567993, "learning_rate": 0.00016587319243604006, "loss": 3.281, "step": 773 }, { "epoch": 10.321070234113712, "grad_norm": 0.6617905497550964, "learning_rate": 0.00016582869855394884, "loss": 3.375, "step": 774 }, { "epoch": 10.334448160535118, "grad_norm": 0.5671921372413635, "learning_rate": 0.00016578420467185762, "loss": 3.4335, "step": 775 }, { "epoch": 10.347826086956522, "grad_norm": 0.8487278819084167, "learning_rate": 0.00016573971078976643, "loss": 3.143, "step": 776 }, { "epoch": 10.361204013377927, "grad_norm": 0.6489982604980469, "learning_rate": 0.0001656952169076752, "loss": 3.3258, "step": 777 }, { "epoch": 10.37458193979933, "grad_norm": 0.8773537278175354, "learning_rate": 0.000165650723025584, "loss": 3.0466, "step": 778 }, { "epoch": 10.387959866220736, "grad_norm": 0.5961865782737732, "learning_rate": 0.00016560622914349278, "loss": 3.3417, "step": 779 }, { "epoch": 10.40133779264214, "grad_norm": 0.6149600148200989, "learning_rate": 0.00016556173526140156, "loss": 3.0622, "step": 780 }, { "epoch": 10.414715719063546, "grad_norm": 0.7591158151626587, "learning_rate": 0.00016551724137931035, "loss": 3.2078, "step": 781 }, { "epoch": 10.42809364548495, "grad_norm": 0.7915151119232178, "learning_rate": 0.00016547274749721916, "loss": 3.3082, "step": 782 }, { "epoch": 10.441471571906355, "grad_norm": 0.8709903359413147, "learning_rate": 0.00016542825361512794, "loss": 3.3073, "step": 783 }, { "epoch": 10.454849498327759, "grad_norm": 0.6593959331512451, "learning_rate": 0.00016538375973303672, "loss": 3.1574, "step": 784 }, { "epoch": 10.468227424749164, "grad_norm": 0.8101013898849487, "learning_rate": 0.0001653392658509455, "loss": 3.3631, "step": 785 }, { "epoch": 10.481605351170568, "grad_norm": 0.8200273513793945, "learning_rate": 0.0001652947719688543, "loss": 3.0447, "step": 786 }, { "epoch": 10.494983277591974, "grad_norm": 0.7304090857505798, "learning_rate": 0.00016525027808676307, "loss": 3.3382, "step": 787 }, { "epoch": 10.508361204013378, "grad_norm": 0.7059088349342346, "learning_rate": 0.00016520578420467188, "loss": 3.085, "step": 788 }, { "epoch": 10.521739130434783, "grad_norm": 0.6664522886276245, "learning_rate": 0.00016516129032258066, "loss": 3.3198, "step": 789 }, { "epoch": 10.535117056856187, "grad_norm": 0.9230799078941345, "learning_rate": 0.00016511679644048944, "loss": 3.2502, "step": 790 }, { "epoch": 10.548494983277592, "grad_norm": 0.6974027752876282, "learning_rate": 0.00016507230255839822, "loss": 3.2432, "step": 791 }, { "epoch": 10.561872909698996, "grad_norm": 0.7186788320541382, "learning_rate": 0.000165027808676307, "loss": 3.2273, "step": 792 }, { "epoch": 10.575250836120402, "grad_norm": 0.6168047189712524, "learning_rate": 0.00016498331479421582, "loss": 3.2319, "step": 793 }, { "epoch": 10.588628762541806, "grad_norm": 0.6219142079353333, "learning_rate": 0.00016493882091212457, "loss": 2.9733, "step": 794 }, { "epoch": 10.602006688963211, "grad_norm": 0.573359489440918, "learning_rate": 0.00016489432703003338, "loss": 3.214, "step": 795 }, { "epoch": 10.615384615384615, "grad_norm": 0.678263783454895, "learning_rate": 0.00016484983314794216, "loss": 3.2331, "step": 796 }, { "epoch": 10.62876254180602, "grad_norm": 0.6593761444091797, "learning_rate": 0.00016480533926585095, "loss": 3.3605, "step": 797 }, { "epoch": 10.642140468227424, "grad_norm": 0.8732627034187317, "learning_rate": 0.00016476084538375973, "loss": 3.3531, "step": 798 }, { "epoch": 10.65551839464883, "grad_norm": 0.7198925614356995, "learning_rate": 0.00016471635150166854, "loss": 3.2927, "step": 799 }, { "epoch": 10.668896321070234, "grad_norm": 0.7275107502937317, "learning_rate": 0.00016467185761957732, "loss": 3.3695, "step": 800 }, { "epoch": 10.68227424749164, "grad_norm": 0.7077828049659729, "learning_rate": 0.0001646273637374861, "loss": 2.846, "step": 801 }, { "epoch": 10.695652173913043, "grad_norm": 0.7579251527786255, "learning_rate": 0.00016458286985539488, "loss": 3.1917, "step": 802 }, { "epoch": 10.709030100334449, "grad_norm": 0.7607265114784241, "learning_rate": 0.0001645383759733037, "loss": 3.2919, "step": 803 }, { "epoch": 10.722408026755852, "grad_norm": 0.7122685313224792, "learning_rate": 0.00016449388209121245, "loss": 3.7108, "step": 804 }, { "epoch": 10.735785953177258, "grad_norm": 0.7256726622581482, "learning_rate": 0.00016444938820912126, "loss": 3.3209, "step": 805 }, { "epoch": 10.749163879598662, "grad_norm": 0.7903631925582886, "learning_rate": 0.00016440489432703004, "loss": 3.398, "step": 806 }, { "epoch": 10.762541806020067, "grad_norm": 1.78204345703125, "learning_rate": 0.00016436040044493882, "loss": 3.1118, "step": 807 }, { "epoch": 10.775919732441471, "grad_norm": 0.7647016644477844, "learning_rate": 0.0001643159065628476, "loss": 3.1889, "step": 808 }, { "epoch": 10.789297658862877, "grad_norm": 0.8039811253547668, "learning_rate": 0.00016427141268075642, "loss": 3.0104, "step": 809 }, { "epoch": 10.80267558528428, "grad_norm": 0.6011155843734741, "learning_rate": 0.0001642269187986652, "loss": 3.0988, "step": 810 }, { "epoch": 10.816053511705686, "grad_norm": 0.8137276768684387, "learning_rate": 0.00016418242491657398, "loss": 3.4308, "step": 811 }, { "epoch": 10.82943143812709, "grad_norm": 0.6501771807670593, "learning_rate": 0.00016413793103448276, "loss": 3.1405, "step": 812 }, { "epoch": 10.842809364548495, "grad_norm": 0.678032636642456, "learning_rate": 0.00016409343715239157, "loss": 3.2243, "step": 813 }, { "epoch": 10.856187290969899, "grad_norm": 0.6830305457115173, "learning_rate": 0.00016404894327030033, "loss": 3.2685, "step": 814 }, { "epoch": 10.869565217391305, "grad_norm": 0.7482068538665771, "learning_rate": 0.00016400444938820914, "loss": 3.3363, "step": 815 }, { "epoch": 10.882943143812708, "grad_norm": 0.6592227816581726, "learning_rate": 0.00016395995550611792, "loss": 3.2914, "step": 816 }, { "epoch": 10.896321070234114, "grad_norm": 0.7520759105682373, "learning_rate": 0.0001639154616240267, "loss": 3.1371, "step": 817 }, { "epoch": 10.909698996655518, "grad_norm": 0.6802201271057129, "learning_rate": 0.00016387096774193548, "loss": 3.2925, "step": 818 }, { "epoch": 10.923076923076923, "grad_norm": 0.7528939247131348, "learning_rate": 0.0001638264738598443, "loss": 3.2147, "step": 819 }, { "epoch": 10.936454849498327, "grad_norm": 0.7070727348327637, "learning_rate": 0.00016378197997775308, "loss": 3.2649, "step": 820 }, { "epoch": 10.949832775919733, "grad_norm": 0.6121620535850525, "learning_rate": 0.00016373748609566186, "loss": 3.3999, "step": 821 }, { "epoch": 10.963210702341136, "grad_norm": 0.7355679273605347, "learning_rate": 0.00016369299221357064, "loss": 3.2561, "step": 822 }, { "epoch": 10.976588628762542, "grad_norm": 0.7294445037841797, "learning_rate": 0.00016364849833147945, "loss": 2.7019, "step": 823 }, { "epoch": 10.989966555183946, "grad_norm": 0.8628729581832886, "learning_rate": 0.0001636040044493882, "loss": 3.3655, "step": 824 }, { "epoch": 11.0, "grad_norm": 0.8784325122833252, "learning_rate": 0.00016355951056729702, "loss": 3.504, "step": 825 }, { "epoch": 11.013377926421406, "grad_norm": 0.6880869269371033, "learning_rate": 0.0001635150166852058, "loss": 3.1878, "step": 826 }, { "epoch": 11.02675585284281, "grad_norm": 0.5625393390655518, "learning_rate": 0.00016347052280311458, "loss": 3.2757, "step": 827 }, { "epoch": 11.040133779264215, "grad_norm": 0.5854038596153259, "learning_rate": 0.00016342602892102336, "loss": 3.0994, "step": 828 }, { "epoch": 11.053511705685619, "grad_norm": 0.6682130098342896, "learning_rate": 0.00016338153503893217, "loss": 3.1935, "step": 829 }, { "epoch": 11.066889632107024, "grad_norm": 0.6216278076171875, "learning_rate": 0.00016333704115684095, "loss": 3.1273, "step": 830 }, { "epoch": 11.080267558528428, "grad_norm": 0.61285001039505, "learning_rate": 0.0001632925472747497, "loss": 3.2377, "step": 831 }, { "epoch": 11.093645484949834, "grad_norm": 0.6559188365936279, "learning_rate": 0.00016324805339265852, "loss": 3.1084, "step": 832 }, { "epoch": 11.107023411371237, "grad_norm": 0.6322848200798035, "learning_rate": 0.0001632035595105673, "loss": 3.3178, "step": 833 }, { "epoch": 11.120401337792643, "grad_norm": 0.6306194067001343, "learning_rate": 0.00016315906562847608, "loss": 3.2012, "step": 834 }, { "epoch": 11.133779264214047, "grad_norm": 0.6923161149024963, "learning_rate": 0.00016311457174638487, "loss": 2.9328, "step": 835 }, { "epoch": 11.147157190635452, "grad_norm": 0.6900002360343933, "learning_rate": 0.00016307007786429368, "loss": 3.1436, "step": 836 }, { "epoch": 11.160535117056856, "grad_norm": 0.817669153213501, "learning_rate": 0.00016302558398220246, "loss": 3.0935, "step": 837 }, { "epoch": 11.173913043478262, "grad_norm": 0.7544119954109192, "learning_rate": 0.00016298109010011124, "loss": 3.0646, "step": 838 }, { "epoch": 11.187290969899665, "grad_norm": 0.7996231913566589, "learning_rate": 0.00016293659621802002, "loss": 3.1975, "step": 839 }, { "epoch": 11.200668896321071, "grad_norm": 0.6186792850494385, "learning_rate": 0.00016289210233592883, "loss": 3.31, "step": 840 }, { "epoch": 11.214046822742475, "grad_norm": 0.6926666498184204, "learning_rate": 0.0001628476084538376, "loss": 3.0765, "step": 841 }, { "epoch": 11.22742474916388, "grad_norm": 0.7475588917732239, "learning_rate": 0.0001628031145717464, "loss": 3.1743, "step": 842 }, { "epoch": 11.240802675585284, "grad_norm": 0.5520989298820496, "learning_rate": 0.00016275862068965518, "loss": 3.4243, "step": 843 }, { "epoch": 11.25418060200669, "grad_norm": 0.6556730270385742, "learning_rate": 0.00016271412680756396, "loss": 3.3293, "step": 844 }, { "epoch": 11.267558528428093, "grad_norm": 0.6509199738502502, "learning_rate": 0.00016266963292547274, "loss": 2.999, "step": 845 }, { "epoch": 11.280936454849499, "grad_norm": 0.6254273653030396, "learning_rate": 0.00016262513904338155, "loss": 3.1869, "step": 846 }, { "epoch": 11.294314381270903, "grad_norm": 0.7454530000686646, "learning_rate": 0.00016258064516129034, "loss": 3.3694, "step": 847 }, { "epoch": 11.307692307692308, "grad_norm": 0.7563592791557312, "learning_rate": 0.00016253615127919912, "loss": 3.0057, "step": 848 }, { "epoch": 11.321070234113712, "grad_norm": 0.6986783742904663, "learning_rate": 0.0001624916573971079, "loss": 2.9893, "step": 849 }, { "epoch": 11.334448160535118, "grad_norm": 0.7260631322860718, "learning_rate": 0.0001624471635150167, "loss": 3.1733, "step": 850 }, { "epoch": 11.347826086956522, "grad_norm": 0.7522863745689392, "learning_rate": 0.00016240266963292547, "loss": 2.9829, "step": 851 }, { "epoch": 11.361204013377927, "grad_norm": 0.7290140986442566, "learning_rate": 0.00016235817575083428, "loss": 3.1887, "step": 852 }, { "epoch": 11.37458193979933, "grad_norm": 0.6470169425010681, "learning_rate": 0.00016231368186874306, "loss": 3.2537, "step": 853 }, { "epoch": 11.387959866220736, "grad_norm": 0.863742470741272, "learning_rate": 0.00016226918798665184, "loss": 3.3443, "step": 854 }, { "epoch": 11.40133779264214, "grad_norm": 0.7363939881324768, "learning_rate": 0.00016222469410456062, "loss": 3.3653, "step": 855 }, { "epoch": 11.414715719063546, "grad_norm": 0.6548926830291748, "learning_rate": 0.00016218020022246943, "loss": 3.0373, "step": 856 }, { "epoch": 11.42809364548495, "grad_norm": 0.8087872862815857, "learning_rate": 0.00016213570634037821, "loss": 3.0118, "step": 857 }, { "epoch": 11.441471571906355, "grad_norm": 0.677811324596405, "learning_rate": 0.000162091212458287, "loss": 3.0339, "step": 858 }, { "epoch": 11.454849498327759, "grad_norm": 0.6907945275306702, "learning_rate": 0.00016204671857619578, "loss": 2.9496, "step": 859 }, { "epoch": 11.468227424749164, "grad_norm": 0.6940027475357056, "learning_rate": 0.0001620022246941046, "loss": 3.2825, "step": 860 }, { "epoch": 11.481605351170568, "grad_norm": 0.7132136225700378, "learning_rate": 0.00016195773081201334, "loss": 3.1271, "step": 861 }, { "epoch": 11.494983277591974, "grad_norm": 0.5997372269630432, "learning_rate": 0.00016191323692992215, "loss": 3.1292, "step": 862 }, { "epoch": 11.508361204013378, "grad_norm": 0.6468494534492493, "learning_rate": 0.00016186874304783094, "loss": 3.32, "step": 863 }, { "epoch": 11.521739130434783, "grad_norm": 0.5792532563209534, "learning_rate": 0.00016182424916573972, "loss": 3.3657, "step": 864 }, { "epoch": 11.535117056856187, "grad_norm": 0.8242068290710449, "learning_rate": 0.0001617797552836485, "loss": 3.0094, "step": 865 }, { "epoch": 11.548494983277592, "grad_norm": 0.9260333776473999, "learning_rate": 0.0001617352614015573, "loss": 3.1123, "step": 866 }, { "epoch": 11.561872909698996, "grad_norm": 0.6337956786155701, "learning_rate": 0.0001616907675194661, "loss": 3.1761, "step": 867 }, { "epoch": 11.575250836120402, "grad_norm": 0.6010364294052124, "learning_rate": 0.00016164627363737487, "loss": 3.2987, "step": 868 }, { "epoch": 11.588628762541806, "grad_norm": 0.7492111921310425, "learning_rate": 0.00016160177975528366, "loss": 2.9085, "step": 869 }, { "epoch": 11.602006688963211, "grad_norm": 0.6329553127288818, "learning_rate": 0.00016155728587319247, "loss": 3.1841, "step": 870 }, { "epoch": 11.615384615384615, "grad_norm": 0.768527626991272, "learning_rate": 0.00016151279199110122, "loss": 3.0061, "step": 871 }, { "epoch": 11.62876254180602, "grad_norm": 0.6333640813827515, "learning_rate": 0.00016146829810901003, "loss": 3.3046, "step": 872 }, { "epoch": 11.642140468227424, "grad_norm": 0.7457571625709534, "learning_rate": 0.00016142380422691881, "loss": 3.2728, "step": 873 }, { "epoch": 11.65551839464883, "grad_norm": 0.6389586925506592, "learning_rate": 0.0001613793103448276, "loss": 3.419, "step": 874 }, { "epoch": 11.668896321070234, "grad_norm": 0.8885436058044434, "learning_rate": 0.00016133481646273638, "loss": 3.0938, "step": 875 }, { "epoch": 11.68227424749164, "grad_norm": 0.7936431765556335, "learning_rate": 0.00016129032258064516, "loss": 3.305, "step": 876 }, { "epoch": 11.695652173913043, "grad_norm": 0.6133994460105896, "learning_rate": 0.00016124582869855397, "loss": 3.3474, "step": 877 }, { "epoch": 11.709030100334449, "grad_norm": 0.6638192534446716, "learning_rate": 0.00016120133481646273, "loss": 3.1418, "step": 878 }, { "epoch": 11.722408026755852, "grad_norm": 0.6820496320724487, "learning_rate": 0.00016115684093437154, "loss": 3.1421, "step": 879 }, { "epoch": 11.735785953177258, "grad_norm": 0.6057732105255127, "learning_rate": 0.00016111234705228032, "loss": 3.091, "step": 880 }, { "epoch": 11.749163879598662, "grad_norm": 0.6267048716545105, "learning_rate": 0.0001610678531701891, "loss": 3.1289, "step": 881 }, { "epoch": 11.762541806020067, "grad_norm": 0.6822847723960876, "learning_rate": 0.00016102335928809788, "loss": 3.157, "step": 882 }, { "epoch": 11.775919732441471, "grad_norm": 0.6809714436531067, "learning_rate": 0.0001609788654060067, "loss": 3.0806, "step": 883 }, { "epoch": 11.789297658862877, "grad_norm": 0.5546092391014099, "learning_rate": 0.00016093437152391547, "loss": 3.1853, "step": 884 }, { "epoch": 11.80267558528428, "grad_norm": 0.7375029921531677, "learning_rate": 0.00016088987764182426, "loss": 3.1287, "step": 885 }, { "epoch": 11.816053511705686, "grad_norm": 0.6246840953826904, "learning_rate": 0.00016084538375973304, "loss": 3.1331, "step": 886 }, { "epoch": 11.82943143812709, "grad_norm": 0.6088026762008667, "learning_rate": 0.00016080088987764185, "loss": 3.3781, "step": 887 }, { "epoch": 11.842809364548495, "grad_norm": 0.7996237874031067, "learning_rate": 0.0001607563959955506, "loss": 3.0161, "step": 888 }, { "epoch": 11.856187290969899, "grad_norm": 0.6221441626548767, "learning_rate": 0.0001607119021134594, "loss": 3.1491, "step": 889 }, { "epoch": 11.869565217391305, "grad_norm": 0.6276041269302368, "learning_rate": 0.0001606674082313682, "loss": 3.2575, "step": 890 }, { "epoch": 11.882943143812708, "grad_norm": 0.6394500136375427, "learning_rate": 0.00016062291434927698, "loss": 3.3437, "step": 891 }, { "epoch": 11.896321070234114, "grad_norm": 0.7674509286880493, "learning_rate": 0.00016057842046718576, "loss": 3.1995, "step": 892 }, { "epoch": 11.909698996655518, "grad_norm": 0.7502215504646301, "learning_rate": 0.00016053392658509457, "loss": 3.3129, "step": 893 }, { "epoch": 11.923076923076923, "grad_norm": 0.6078189611434937, "learning_rate": 0.00016048943270300335, "loss": 3.1623, "step": 894 }, { "epoch": 11.936454849498327, "grad_norm": 0.6113708019256592, "learning_rate": 0.00016044493882091213, "loss": 3.6063, "step": 895 }, { "epoch": 11.949832775919733, "grad_norm": 0.6606878638267517, "learning_rate": 0.00016040044493882092, "loss": 3.3216, "step": 896 }, { "epoch": 11.963210702341136, "grad_norm": 0.7055956125259399, "learning_rate": 0.00016035595105672973, "loss": 3.2006, "step": 897 }, { "epoch": 11.976588628762542, "grad_norm": 0.7424116730690002, "learning_rate": 0.00016031145717463848, "loss": 3.1298, "step": 898 }, { "epoch": 11.989966555183946, "grad_norm": 0.6675695180892944, "learning_rate": 0.0001602669632925473, "loss": 3.1116, "step": 899 }, { "epoch": 12.0, "grad_norm": 0.9356181621551514, "learning_rate": 0.00016022246941045607, "loss": 3.2461, "step": 900 }, { "epoch": 12.013377926421406, "grad_norm": 0.8539507985115051, "learning_rate": 0.00016017797552836486, "loss": 3.0671, "step": 901 }, { "epoch": 12.02675585284281, "grad_norm": 0.573266327381134, "learning_rate": 0.00016013348164627364, "loss": 3.0789, "step": 902 }, { "epoch": 12.040133779264215, "grad_norm": 0.5849746465682983, "learning_rate": 0.00016008898776418245, "loss": 3.1623, "step": 903 }, { "epoch": 12.053511705685619, "grad_norm": 0.6523334980010986, "learning_rate": 0.00016004449388209123, "loss": 3.0243, "step": 904 }, { "epoch": 12.066889632107024, "grad_norm": 0.6428223848342896, "learning_rate": 0.00016, "loss": 3.1241, "step": 905 }, { "epoch": 12.080267558528428, "grad_norm": 0.5881937742233276, "learning_rate": 0.0001599555061179088, "loss": 3.1744, "step": 906 }, { "epoch": 12.093645484949834, "grad_norm": 0.7523583173751831, "learning_rate": 0.0001599110122358176, "loss": 3.0821, "step": 907 }, { "epoch": 12.107023411371237, "grad_norm": 0.6120390295982361, "learning_rate": 0.00015986651835372636, "loss": 3.16, "step": 908 }, { "epoch": 12.120401337792643, "grad_norm": 0.6486253142356873, "learning_rate": 0.00015982202447163517, "loss": 3.1402, "step": 909 }, { "epoch": 12.133779264214047, "grad_norm": 0.6322839260101318, "learning_rate": 0.00015977753058954395, "loss": 3.1325, "step": 910 }, { "epoch": 12.147157190635452, "grad_norm": 0.5858875513076782, "learning_rate": 0.00015973303670745273, "loss": 3.1705, "step": 911 }, { "epoch": 12.160535117056856, "grad_norm": 0.6495780348777771, "learning_rate": 0.00015968854282536152, "loss": 3.1977, "step": 912 }, { "epoch": 12.173913043478262, "grad_norm": 0.6483474969863892, "learning_rate": 0.00015964404894327033, "loss": 2.9101, "step": 913 }, { "epoch": 12.187290969899665, "grad_norm": 0.6021110415458679, "learning_rate": 0.0001595995550611791, "loss": 3.2106, "step": 914 }, { "epoch": 12.200668896321071, "grad_norm": 0.5101630687713623, "learning_rate": 0.00015955506117908786, "loss": 3.1316, "step": 915 }, { "epoch": 12.214046822742475, "grad_norm": 0.6226193904876709, "learning_rate": 0.00015951056729699667, "loss": 3.0897, "step": 916 }, { "epoch": 12.22742474916388, "grad_norm": 0.6268473267555237, "learning_rate": 0.00015946607341490546, "loss": 3.2036, "step": 917 }, { "epoch": 12.240802675585284, "grad_norm": 0.7825391292572021, "learning_rate": 0.00015942157953281424, "loss": 3.0082, "step": 918 }, { "epoch": 12.25418060200669, "grad_norm": 0.7881148457527161, "learning_rate": 0.00015937708565072302, "loss": 3.0779, "step": 919 }, { "epoch": 12.267558528428093, "grad_norm": 0.6725586652755737, "learning_rate": 0.00015933259176863183, "loss": 3.04, "step": 920 }, { "epoch": 12.280936454849499, "grad_norm": 0.5831689238548279, "learning_rate": 0.0001592880978865406, "loss": 3.3319, "step": 921 }, { "epoch": 12.294314381270903, "grad_norm": 0.6057907342910767, "learning_rate": 0.0001592436040044494, "loss": 3.0869, "step": 922 }, { "epoch": 12.307692307692308, "grad_norm": 0.771857500076294, "learning_rate": 0.00015919911012235818, "loss": 3.1472, "step": 923 }, { "epoch": 12.321070234113712, "grad_norm": 0.7447528839111328, "learning_rate": 0.000159154616240267, "loss": 3.174, "step": 924 }, { "epoch": 12.334448160535118, "grad_norm": 0.5772632956504822, "learning_rate": 0.00015911012235817574, "loss": 3.1767, "step": 925 }, { "epoch": 12.347826086956522, "grad_norm": 0.6952618956565857, "learning_rate": 0.00015906562847608455, "loss": 3.1597, "step": 926 }, { "epoch": 12.361204013377927, "grad_norm": 0.600922703742981, "learning_rate": 0.00015902113459399333, "loss": 3.3612, "step": 927 }, { "epoch": 12.37458193979933, "grad_norm": 0.7571472525596619, "learning_rate": 0.00015897664071190212, "loss": 2.9405, "step": 928 }, { "epoch": 12.387959866220736, "grad_norm": 0.9343985915184021, "learning_rate": 0.0001589321468298109, "loss": 3.2886, "step": 929 }, { "epoch": 12.40133779264214, "grad_norm": 0.7046729922294617, "learning_rate": 0.0001588876529477197, "loss": 3.4421, "step": 930 }, { "epoch": 12.414715719063546, "grad_norm": 0.7591777443885803, "learning_rate": 0.0001588431590656285, "loss": 3.005, "step": 931 }, { "epoch": 12.42809364548495, "grad_norm": 0.6508903503417969, "learning_rate": 0.00015879866518353727, "loss": 2.8554, "step": 932 }, { "epoch": 12.441471571906355, "grad_norm": 0.6557784676551819, "learning_rate": 0.00015875417130144606, "loss": 3.3268, "step": 933 }, { "epoch": 12.454849498327759, "grad_norm": 0.6941578984260559, "learning_rate": 0.00015870967741935487, "loss": 3.2088, "step": 934 }, { "epoch": 12.468227424749164, "grad_norm": 0.6824263334274292, "learning_rate": 0.00015866518353726362, "loss": 3.0897, "step": 935 }, { "epoch": 12.481605351170568, "grad_norm": 0.7324599027633667, "learning_rate": 0.00015862068965517243, "loss": 3.1443, "step": 936 }, { "epoch": 12.494983277591974, "grad_norm": 0.577022135257721, "learning_rate": 0.0001585761957730812, "loss": 3.0896, "step": 937 }, { "epoch": 12.508361204013378, "grad_norm": 0.6165060997009277, "learning_rate": 0.00015853170189099, "loss": 2.7546, "step": 938 }, { "epoch": 12.521739130434783, "grad_norm": 0.561906635761261, "learning_rate": 0.00015848720800889878, "loss": 3.4192, "step": 939 }, { "epoch": 12.535117056856187, "grad_norm": 0.5894923806190491, "learning_rate": 0.0001584427141268076, "loss": 3.0388, "step": 940 }, { "epoch": 12.548494983277592, "grad_norm": 0.6261674761772156, "learning_rate": 0.00015839822024471637, "loss": 3.0705, "step": 941 }, { "epoch": 12.561872909698996, "grad_norm": 0.695101261138916, "learning_rate": 0.00015835372636262515, "loss": 3.1684, "step": 942 }, { "epoch": 12.575250836120402, "grad_norm": 0.6176817417144775, "learning_rate": 0.00015830923248053393, "loss": 3.0708, "step": 943 }, { "epoch": 12.588628762541806, "grad_norm": 0.6548507213592529, "learning_rate": 0.00015826473859844274, "loss": 3.1569, "step": 944 }, { "epoch": 12.602006688963211, "grad_norm": 0.6046382188796997, "learning_rate": 0.0001582202447163515, "loss": 3.2479, "step": 945 }, { "epoch": 12.615384615384615, "grad_norm": 0.7103912234306335, "learning_rate": 0.0001581757508342603, "loss": 3.127, "step": 946 }, { "epoch": 12.62876254180602, "grad_norm": 0.7131765484809875, "learning_rate": 0.0001581312569521691, "loss": 3.0975, "step": 947 }, { "epoch": 12.642140468227424, "grad_norm": 0.6442859768867493, "learning_rate": 0.00015808676307007787, "loss": 3.2885, "step": 948 }, { "epoch": 12.65551839464883, "grad_norm": 0.6430050134658813, "learning_rate": 0.00015804226918798666, "loss": 3.0397, "step": 949 }, { "epoch": 12.668896321070234, "grad_norm": 0.6894303560256958, "learning_rate": 0.00015799777530589546, "loss": 3.0756, "step": 950 }, { "epoch": 12.68227424749164, "grad_norm": 0.7319600582122803, "learning_rate": 0.00015795328142380425, "loss": 3.0457, "step": 951 }, { "epoch": 12.695652173913043, "grad_norm": 0.6445140838623047, "learning_rate": 0.00015790878754171303, "loss": 2.9828, "step": 952 }, { "epoch": 12.709030100334449, "grad_norm": 0.7522070407867432, "learning_rate": 0.0001578642936596218, "loss": 2.8942, "step": 953 }, { "epoch": 12.722408026755852, "grad_norm": 0.7962691783905029, "learning_rate": 0.00015781979977753062, "loss": 3.0985, "step": 954 }, { "epoch": 12.735785953177258, "grad_norm": 0.6391687393188477, "learning_rate": 0.00015777530589543938, "loss": 3.1752, "step": 955 }, { "epoch": 12.749163879598662, "grad_norm": 0.7632976174354553, "learning_rate": 0.00015773081201334816, "loss": 3.3505, "step": 956 }, { "epoch": 12.762541806020067, "grad_norm": 0.7491022944450378, "learning_rate": 0.00015768631813125697, "loss": 3.0721, "step": 957 }, { "epoch": 12.775919732441471, "grad_norm": 0.6163421273231506, "learning_rate": 0.00015764182424916572, "loss": 3.3242, "step": 958 }, { "epoch": 12.789297658862877, "grad_norm": 0.6831198334693909, "learning_rate": 0.00015759733036707453, "loss": 3.1409, "step": 959 }, { "epoch": 12.80267558528428, "grad_norm": 0.812300980091095, "learning_rate": 0.00015755283648498332, "loss": 2.9606, "step": 960 }, { "epoch": 12.816053511705686, "grad_norm": 0.6904334425926208, "learning_rate": 0.00015750834260289213, "loss": 3.0398, "step": 961 }, { "epoch": 12.82943143812709, "grad_norm": 0.6349720358848572, "learning_rate": 0.00015746384872080088, "loss": 3.1033, "step": 962 }, { "epoch": 12.842809364548495, "grad_norm": 0.6837566494941711, "learning_rate": 0.0001574193548387097, "loss": 3.2353, "step": 963 }, { "epoch": 12.856187290969899, "grad_norm": 0.5852749943733215, "learning_rate": 0.00015737486095661847, "loss": 3.0972, "step": 964 }, { "epoch": 12.869565217391305, "grad_norm": 0.6641372442245483, "learning_rate": 0.00015733036707452725, "loss": 3.2243, "step": 965 }, { "epoch": 12.882943143812708, "grad_norm": 0.6613900065422058, "learning_rate": 0.00015728587319243604, "loss": 3.1263, "step": 966 }, { "epoch": 12.896321070234114, "grad_norm": 0.6126120090484619, "learning_rate": 0.00015724137931034485, "loss": 3.069, "step": 967 }, { "epoch": 12.909698996655518, "grad_norm": 0.6764604449272156, "learning_rate": 0.0001571968854282536, "loss": 3.1397, "step": 968 }, { "epoch": 12.923076923076923, "grad_norm": 0.6447578072547913, "learning_rate": 0.0001571523915461624, "loss": 3.1839, "step": 969 }, { "epoch": 12.936454849498327, "grad_norm": 0.5872016549110413, "learning_rate": 0.0001571078976640712, "loss": 3.3144, "step": 970 }, { "epoch": 12.949832775919733, "grad_norm": 0.626276969909668, "learning_rate": 0.00015706340378198, "loss": 3.1295, "step": 971 }, { "epoch": 12.963210702341136, "grad_norm": 0.6829231381416321, "learning_rate": 0.00015701890989988876, "loss": 3.2261, "step": 972 }, { "epoch": 12.976588628762542, "grad_norm": 0.6197345852851868, "learning_rate": 0.00015697441601779757, "loss": 3.1117, "step": 973 }, { "epoch": 12.989966555183946, "grad_norm": 0.6137062907218933, "learning_rate": 0.00015692992213570635, "loss": 3.1548, "step": 974 }, { "epoch": 13.0, "grad_norm": 0.7483121752738953, "learning_rate": 0.00015688542825361513, "loss": 3.4944, "step": 975 }, { "epoch": 13.013377926421406, "grad_norm": 0.6102525591850281, "learning_rate": 0.00015684093437152392, "loss": 3.1403, "step": 976 }, { "epoch": 13.02675585284281, "grad_norm": 0.7258747220039368, "learning_rate": 0.00015679644048943272, "loss": 2.8474, "step": 977 }, { "epoch": 13.040133779264215, "grad_norm": 0.6891087293624878, "learning_rate": 0.00015675194660734148, "loss": 2.9722, "step": 978 }, { "epoch": 13.053511705685619, "grad_norm": 0.6320910453796387, "learning_rate": 0.0001567074527252503, "loss": 2.9807, "step": 979 }, { "epoch": 13.066889632107024, "grad_norm": 0.8684266209602356, "learning_rate": 0.00015666295884315907, "loss": 2.8448, "step": 980 }, { "epoch": 13.080267558528428, "grad_norm": 0.7126099467277527, "learning_rate": 0.00015661846496106788, "loss": 3.1915, "step": 981 }, { "epoch": 13.093645484949834, "grad_norm": 0.7001529335975647, "learning_rate": 0.00015657397107897664, "loss": 3.2017, "step": 982 }, { "epoch": 13.107023411371237, "grad_norm": 0.7901191711425781, "learning_rate": 0.00015652947719688545, "loss": 3.1273, "step": 983 }, { "epoch": 13.120401337792643, "grad_norm": 0.5769410729408264, "learning_rate": 0.00015648498331479423, "loss": 2.8582, "step": 984 }, { "epoch": 13.133779264214047, "grad_norm": 0.5969700813293457, "learning_rate": 0.000156440489432703, "loss": 3.118, "step": 985 }, { "epoch": 13.147157190635452, "grad_norm": 0.5789377093315125, "learning_rate": 0.0001563959955506118, "loss": 2.8121, "step": 986 }, { "epoch": 13.160535117056856, "grad_norm": 0.7945278882980347, "learning_rate": 0.0001563515016685206, "loss": 2.9212, "step": 987 }, { "epoch": 13.173913043478262, "grad_norm": 0.60884690284729, "learning_rate": 0.00015630700778642936, "loss": 3.1993, "step": 988 }, { "epoch": 13.187290969899665, "grad_norm": 0.616142213344574, "learning_rate": 0.00015626251390433817, "loss": 3.107, "step": 989 }, { "epoch": 13.200668896321071, "grad_norm": 0.6428812146186829, "learning_rate": 0.00015621802002224695, "loss": 3.1401, "step": 990 }, { "epoch": 13.214046822742475, "grad_norm": 0.5723693370819092, "learning_rate": 0.00015617352614015576, "loss": 3.0474, "step": 991 }, { "epoch": 13.22742474916388, "grad_norm": 0.5820907950401306, "learning_rate": 0.00015612903225806451, "loss": 3.2211, "step": 992 }, { "epoch": 13.240802675585284, "grad_norm": 0.555957555770874, "learning_rate": 0.00015608453837597332, "loss": 3.1306, "step": 993 }, { "epoch": 13.25418060200669, "grad_norm": 0.528698205947876, "learning_rate": 0.0001560400444938821, "loss": 3.0989, "step": 994 }, { "epoch": 13.267558528428093, "grad_norm": 0.5959749817848206, "learning_rate": 0.0001559955506117909, "loss": 2.9128, "step": 995 }, { "epoch": 13.280936454849499, "grad_norm": 0.6702240705490112, "learning_rate": 0.00015595105672969967, "loss": 2.9969, "step": 996 }, { "epoch": 13.294314381270903, "grad_norm": 0.5363825559616089, "learning_rate": 0.00015590656284760845, "loss": 3.2027, "step": 997 }, { "epoch": 13.307692307692308, "grad_norm": 0.6402661204338074, "learning_rate": 0.00015586206896551724, "loss": 3.2733, "step": 998 }, { "epoch": 13.321070234113712, "grad_norm": 0.700517475605011, "learning_rate": 0.00015581757508342602, "loss": 3.2629, "step": 999 }, { "epoch": 13.334448160535118, "grad_norm": 0.5500949621200562, "learning_rate": 0.00015577308120133483, "loss": 3.172, "step": 1000 }, { "epoch": 13.347826086956522, "grad_norm": 0.6199147701263428, "learning_rate": 0.0001557285873192436, "loss": 3.0556, "step": 1001 }, { "epoch": 13.361204013377927, "grad_norm": 0.5900529026985168, "learning_rate": 0.0001556840934371524, "loss": 3.1248, "step": 1002 }, { "epoch": 13.37458193979933, "grad_norm": 0.7272413372993469, "learning_rate": 0.00015563959955506118, "loss": 3.1142, "step": 1003 }, { "epoch": 13.387959866220736, "grad_norm": 0.6461951732635498, "learning_rate": 0.00015559510567296998, "loss": 3.1145, "step": 1004 }, { "epoch": 13.40133779264214, "grad_norm": 0.5750373005867004, "learning_rate": 0.00015555061179087874, "loss": 3.2117, "step": 1005 }, { "epoch": 13.414715719063546, "grad_norm": 0.6486302614212036, "learning_rate": 0.00015550611790878755, "loss": 2.9018, "step": 1006 }, { "epoch": 13.42809364548495, "grad_norm": 0.6897476315498352, "learning_rate": 0.00015546162402669633, "loss": 3.2987, "step": 1007 }, { "epoch": 13.441471571906355, "grad_norm": 0.5997576713562012, "learning_rate": 0.00015541713014460511, "loss": 2.974, "step": 1008 }, { "epoch": 13.454849498327759, "grad_norm": 0.6484793424606323, "learning_rate": 0.0001553726362625139, "loss": 2.9827, "step": 1009 }, { "epoch": 13.468227424749164, "grad_norm": 0.562312126159668, "learning_rate": 0.0001553281423804227, "loss": 3.2243, "step": 1010 }, { "epoch": 13.481605351170568, "grad_norm": 0.7257137298583984, "learning_rate": 0.0001552836484983315, "loss": 3.1081, "step": 1011 }, { "epoch": 13.494983277591974, "grad_norm": 0.7201404571533203, "learning_rate": 0.00015523915461624027, "loss": 3.1468, "step": 1012 }, { "epoch": 13.508361204013378, "grad_norm": 0.666539192199707, "learning_rate": 0.00015519466073414905, "loss": 3.0081, "step": 1013 }, { "epoch": 13.521739130434783, "grad_norm": 0.6867642998695374, "learning_rate": 0.00015515016685205786, "loss": 2.7889, "step": 1014 }, { "epoch": 13.535117056856187, "grad_norm": 0.5799785256385803, "learning_rate": 0.00015510567296996662, "loss": 3.2995, "step": 1015 }, { "epoch": 13.548494983277592, "grad_norm": 0.6155371069908142, "learning_rate": 0.00015506117908787543, "loss": 3.2227, "step": 1016 }, { "epoch": 13.561872909698996, "grad_norm": 0.7604040503501892, "learning_rate": 0.0001550166852057842, "loss": 3.0384, "step": 1017 }, { "epoch": 13.575250836120402, "grad_norm": 0.8445917963981628, "learning_rate": 0.000154972191323693, "loss": 2.9566, "step": 1018 }, { "epoch": 13.588628762541806, "grad_norm": 0.7978566288948059, "learning_rate": 0.00015492769744160177, "loss": 3.0175, "step": 1019 }, { "epoch": 13.602006688963211, "grad_norm": 0.5899437069892883, "learning_rate": 0.00015488320355951058, "loss": 3.2418, "step": 1020 }, { "epoch": 13.615384615384615, "grad_norm": 0.7204627990722656, "learning_rate": 0.00015483870967741937, "loss": 3.1568, "step": 1021 }, { "epoch": 13.62876254180602, "grad_norm": 0.6504855155944824, "learning_rate": 0.00015479421579532815, "loss": 2.8952, "step": 1022 }, { "epoch": 13.642140468227424, "grad_norm": 0.8101251125335693, "learning_rate": 0.00015474972191323693, "loss": 3.1975, "step": 1023 }, { "epoch": 13.65551839464883, "grad_norm": 0.6161416172981262, "learning_rate": 0.00015470522803114574, "loss": 3.1565, "step": 1024 }, { "epoch": 13.668896321070234, "grad_norm": 0.6131258606910706, "learning_rate": 0.0001546607341490545, "loss": 3.0382, "step": 1025 }, { "epoch": 13.68227424749164, "grad_norm": 0.8008583784103394, "learning_rate": 0.0001546162402669633, "loss": 2.995, "step": 1026 }, { "epoch": 13.695652173913043, "grad_norm": 0.7101227045059204, "learning_rate": 0.0001545717463848721, "loss": 3.0704, "step": 1027 }, { "epoch": 13.709030100334449, "grad_norm": 0.7988458275794983, "learning_rate": 0.00015452725250278087, "loss": 3.1424, "step": 1028 }, { "epoch": 13.722408026755852, "grad_norm": 0.6013655662536621, "learning_rate": 0.00015448275862068965, "loss": 2.9986, "step": 1029 }, { "epoch": 13.735785953177258, "grad_norm": 0.6368236541748047, "learning_rate": 0.00015443826473859846, "loss": 3.0193, "step": 1030 }, { "epoch": 13.749163879598662, "grad_norm": 0.8222694396972656, "learning_rate": 0.00015439377085650724, "loss": 3.0888, "step": 1031 }, { "epoch": 13.762541806020067, "grad_norm": 0.7270404696464539, "learning_rate": 0.00015434927697441603, "loss": 3.0929, "step": 1032 }, { "epoch": 13.775919732441471, "grad_norm": 0.7292355298995972, "learning_rate": 0.0001543047830923248, "loss": 2.7676, "step": 1033 }, { "epoch": 13.789297658862877, "grad_norm": 0.6662157773971558, "learning_rate": 0.00015426028921023362, "loss": 3.1984, "step": 1034 }, { "epoch": 13.80267558528428, "grad_norm": 0.6350163817405701, "learning_rate": 0.00015421579532814237, "loss": 3.3042, "step": 1035 }, { "epoch": 13.816053511705686, "grad_norm": 0.5999907851219177, "learning_rate": 0.00015417130144605118, "loss": 3.0983, "step": 1036 }, { "epoch": 13.82943143812709, "grad_norm": 0.5942257642745972, "learning_rate": 0.00015412680756395997, "loss": 3.0474, "step": 1037 }, { "epoch": 13.842809364548495, "grad_norm": 0.662589430809021, "learning_rate": 0.00015408231368186875, "loss": 2.9251, "step": 1038 }, { "epoch": 13.856187290969899, "grad_norm": 0.5817089080810547, "learning_rate": 0.00015403781979977753, "loss": 3.0716, "step": 1039 }, { "epoch": 13.869565217391305, "grad_norm": 0.6019257307052612, "learning_rate": 0.0001539933259176863, "loss": 3.1754, "step": 1040 }, { "epoch": 13.882943143812708, "grad_norm": 0.6301860213279724, "learning_rate": 0.00015394883203559512, "loss": 3.2066, "step": 1041 }, { "epoch": 13.896321070234114, "grad_norm": 0.6468888521194458, "learning_rate": 0.00015390433815350388, "loss": 3.3001, "step": 1042 }, { "epoch": 13.909698996655518, "grad_norm": 0.6510801911354065, "learning_rate": 0.0001538598442714127, "loss": 3.2198, "step": 1043 }, { "epoch": 13.923076923076923, "grad_norm": 0.5692014694213867, "learning_rate": 0.00015381535038932147, "loss": 3.3257, "step": 1044 }, { "epoch": 13.936454849498327, "grad_norm": 0.594219982624054, "learning_rate": 0.00015377085650723025, "loss": 2.9918, "step": 1045 }, { "epoch": 13.949832775919733, "grad_norm": 0.6501769423484802, "learning_rate": 0.00015372636262513903, "loss": 2.9653, "step": 1046 }, { "epoch": 13.963210702341136, "grad_norm": 0.6310623288154602, "learning_rate": 0.00015368186874304784, "loss": 3.19, "step": 1047 }, { "epoch": 13.976588628762542, "grad_norm": 0.5795436501502991, "learning_rate": 0.00015363737486095663, "loss": 3.272, "step": 1048 }, { "epoch": 13.989966555183946, "grad_norm": 0.5421392917633057, "learning_rate": 0.0001535928809788654, "loss": 3.2109, "step": 1049 }, { "epoch": 14.0, "grad_norm": 0.833959698677063, "learning_rate": 0.0001535483870967742, "loss": 3.3017, "step": 1050 }, { "epoch": 14.013377926421406, "grad_norm": 0.6856208443641663, "learning_rate": 0.000153503893214683, "loss": 3.0784, "step": 1051 }, { "epoch": 14.02675585284281, "grad_norm": 0.5841811895370483, "learning_rate": 0.00015345939933259176, "loss": 2.848, "step": 1052 }, { "epoch": 14.040133779264215, "grad_norm": 0.557906985282898, "learning_rate": 0.00015341490545050057, "loss": 3.1564, "step": 1053 }, { "epoch": 14.053511705685619, "grad_norm": 0.5468619465827942, "learning_rate": 0.00015337041156840935, "loss": 3.1237, "step": 1054 }, { "epoch": 14.066889632107024, "grad_norm": 0.7213225364685059, "learning_rate": 0.00015332591768631813, "loss": 2.8993, "step": 1055 }, { "epoch": 14.080267558528428, "grad_norm": 0.7413175106048584, "learning_rate": 0.0001532814238042269, "loss": 3.0028, "step": 1056 }, { "epoch": 14.093645484949834, "grad_norm": 0.6072244644165039, "learning_rate": 0.00015323692992213572, "loss": 2.8534, "step": 1057 }, { "epoch": 14.107023411371237, "grad_norm": 0.683262288570404, "learning_rate": 0.0001531924360400445, "loss": 2.8982, "step": 1058 }, { "epoch": 14.120401337792643, "grad_norm": 0.5880157351493835, "learning_rate": 0.0001531479421579533, "loss": 2.9567, "step": 1059 }, { "epoch": 14.133779264214047, "grad_norm": 0.7519298195838928, "learning_rate": 0.00015310344827586207, "loss": 2.9615, "step": 1060 }, { "epoch": 14.147157190635452, "grad_norm": 0.7747945189476013, "learning_rate": 0.00015305895439377088, "loss": 2.7823, "step": 1061 }, { "epoch": 14.160535117056856, "grad_norm": 0.6560395956039429, "learning_rate": 0.00015301446051167963, "loss": 3.1791, "step": 1062 }, { "epoch": 14.173913043478262, "grad_norm": 0.6388076543807983, "learning_rate": 0.00015296996662958844, "loss": 2.9868, "step": 1063 }, { "epoch": 14.187290969899665, "grad_norm": 0.7349525690078735, "learning_rate": 0.00015292547274749723, "loss": 2.9291, "step": 1064 }, { "epoch": 14.200668896321071, "grad_norm": 0.7184433341026306, "learning_rate": 0.000152880978865406, "loss": 2.7889, "step": 1065 }, { "epoch": 14.214046822742475, "grad_norm": 0.6776930093765259, "learning_rate": 0.0001528364849833148, "loss": 3.1572, "step": 1066 }, { "epoch": 14.22742474916388, "grad_norm": 0.818756103515625, "learning_rate": 0.0001527919911012236, "loss": 2.767, "step": 1067 }, { "epoch": 14.240802675585284, "grad_norm": 0.6005066633224487, "learning_rate": 0.00015274749721913238, "loss": 3.1796, "step": 1068 }, { "epoch": 14.25418060200669, "grad_norm": 0.6367926001548767, "learning_rate": 0.00015270300333704117, "loss": 2.9999, "step": 1069 }, { "epoch": 14.267558528428093, "grad_norm": 0.6823679208755493, "learning_rate": 0.00015265850945494995, "loss": 2.7663, "step": 1070 }, { "epoch": 14.280936454849499, "grad_norm": 0.6238808631896973, "learning_rate": 0.00015261401557285876, "loss": 3.1908, "step": 1071 }, { "epoch": 14.294314381270903, "grad_norm": 0.6983721256256104, "learning_rate": 0.0001525695216907675, "loss": 3.0264, "step": 1072 }, { "epoch": 14.307692307692308, "grad_norm": 0.7568501234054565, "learning_rate": 0.00015252502780867632, "loss": 3.0474, "step": 1073 }, { "epoch": 14.321070234113712, "grad_norm": 0.6250051259994507, "learning_rate": 0.0001524805339265851, "loss": 3.114, "step": 1074 }, { "epoch": 14.334448160535118, "grad_norm": 0.5907386541366577, "learning_rate": 0.0001524360400444939, "loss": 3.1823, "step": 1075 }, { "epoch": 14.347826086956522, "grad_norm": 0.6719332337379456, "learning_rate": 0.00015239154616240267, "loss": 2.994, "step": 1076 }, { "epoch": 14.361204013377927, "grad_norm": 0.5911534428596497, "learning_rate": 0.00015234705228031148, "loss": 3.4178, "step": 1077 }, { "epoch": 14.37458193979933, "grad_norm": 0.8071689009666443, "learning_rate": 0.00015230255839822026, "loss": 2.9633, "step": 1078 }, { "epoch": 14.387959866220736, "grad_norm": 0.5957038998603821, "learning_rate": 0.00015225806451612902, "loss": 3.1126, "step": 1079 }, { "epoch": 14.40133779264214, "grad_norm": 0.6604459285736084, "learning_rate": 0.00015221357063403783, "loss": 2.9193, "step": 1080 }, { "epoch": 14.414715719063546, "grad_norm": 0.626081109046936, "learning_rate": 0.0001521690767519466, "loss": 2.9321, "step": 1081 }, { "epoch": 14.42809364548495, "grad_norm": 0.5767174959182739, "learning_rate": 0.0001521245828698554, "loss": 3.1518, "step": 1082 }, { "epoch": 14.441471571906355, "grad_norm": 0.6444874405860901, "learning_rate": 0.00015208008898776417, "loss": 3.1948, "step": 1083 }, { "epoch": 14.454849498327759, "grad_norm": 0.668171763420105, "learning_rate": 0.00015203559510567298, "loss": 3.016, "step": 1084 }, { "epoch": 14.468227424749164, "grad_norm": 0.6998944878578186, "learning_rate": 0.00015199110122358176, "loss": 2.9558, "step": 1085 }, { "epoch": 14.481605351170568, "grad_norm": 0.5896235704421997, "learning_rate": 0.00015194660734149055, "loss": 3.0997, "step": 1086 }, { "epoch": 14.494983277591974, "grad_norm": 0.6724826097488403, "learning_rate": 0.00015190211345939933, "loss": 2.9369, "step": 1087 }, { "epoch": 14.508361204013378, "grad_norm": 0.5710486769676208, "learning_rate": 0.00015185761957730814, "loss": 3.4724, "step": 1088 }, { "epoch": 14.521739130434783, "grad_norm": 0.9997962117195129, "learning_rate": 0.0001518131256952169, "loss": 2.9058, "step": 1089 }, { "epoch": 14.535117056856187, "grad_norm": 0.668074905872345, "learning_rate": 0.0001517686318131257, "loss": 3.2401, "step": 1090 }, { "epoch": 14.548494983277592, "grad_norm": 0.6180433630943298, "learning_rate": 0.00015172413793103449, "loss": 2.8975, "step": 1091 }, { "epoch": 14.561872909698996, "grad_norm": 0.6412661075592041, "learning_rate": 0.00015167964404894327, "loss": 2.9882, "step": 1092 }, { "epoch": 14.575250836120402, "grad_norm": 0.715288519859314, "learning_rate": 0.00015163515016685205, "loss": 3.4004, "step": 1093 }, { "epoch": 14.588628762541806, "grad_norm": 0.689164400100708, "learning_rate": 0.00015159065628476086, "loss": 3.0705, "step": 1094 }, { "epoch": 14.602006688963211, "grad_norm": 0.7713497281074524, "learning_rate": 0.00015154616240266964, "loss": 3.159, "step": 1095 }, { "epoch": 14.615384615384615, "grad_norm": 0.6725841164588928, "learning_rate": 0.00015150166852057843, "loss": 2.8938, "step": 1096 }, { "epoch": 14.62876254180602, "grad_norm": 0.658108651638031, "learning_rate": 0.0001514571746384872, "loss": 2.8747, "step": 1097 }, { "epoch": 14.642140468227424, "grad_norm": 0.5711888074874878, "learning_rate": 0.00015141268075639602, "loss": 2.8989, "step": 1098 }, { "epoch": 14.65551839464883, "grad_norm": 0.6184161305427551, "learning_rate": 0.00015136818687430477, "loss": 3.0904, "step": 1099 }, { "epoch": 14.668896321070234, "grad_norm": 0.5937799215316772, "learning_rate": 0.00015132369299221358, "loss": 3.1637, "step": 1100 }, { "epoch": 14.68227424749164, "grad_norm": 0.591673731803894, "learning_rate": 0.00015127919911012236, "loss": 2.9547, "step": 1101 }, { "epoch": 14.695652173913043, "grad_norm": 0.7317401170730591, "learning_rate": 0.00015123470522803115, "loss": 3.2043, "step": 1102 }, { "epoch": 14.709030100334449, "grad_norm": 0.5784003734588623, "learning_rate": 0.00015119021134593993, "loss": 3.102, "step": 1103 }, { "epoch": 14.722408026755852, "grad_norm": 0.7077385187149048, "learning_rate": 0.00015114571746384874, "loss": 3.011, "step": 1104 }, { "epoch": 14.735785953177258, "grad_norm": 0.6472675204277039, "learning_rate": 0.00015110122358175752, "loss": 3.2075, "step": 1105 }, { "epoch": 14.749163879598662, "grad_norm": 0.6789306998252869, "learning_rate": 0.0001510567296996663, "loss": 2.9458, "step": 1106 }, { "epoch": 14.762541806020067, "grad_norm": 0.6602732539176941, "learning_rate": 0.00015101223581757509, "loss": 2.9941, "step": 1107 }, { "epoch": 14.775919732441471, "grad_norm": 0.7484832406044006, "learning_rate": 0.0001509677419354839, "loss": 3.0358, "step": 1108 }, { "epoch": 14.789297658862877, "grad_norm": 0.704139769077301, "learning_rate": 0.00015092324805339265, "loss": 3.1006, "step": 1109 }, { "epoch": 14.80267558528428, "grad_norm": 0.6545978784561157, "learning_rate": 0.00015087875417130146, "loss": 3.0369, "step": 1110 }, { "epoch": 14.816053511705686, "grad_norm": 0.5718163847923279, "learning_rate": 0.00015083426028921024, "loss": 3.1683, "step": 1111 }, { "epoch": 14.82943143812709, "grad_norm": 0.5773367285728455, "learning_rate": 0.00015078976640711902, "loss": 3.2753, "step": 1112 }, { "epoch": 14.842809364548495, "grad_norm": 0.6617185473442078, "learning_rate": 0.0001507452725250278, "loss": 2.9713, "step": 1113 }, { "epoch": 14.856187290969899, "grad_norm": 0.6748194098472595, "learning_rate": 0.00015070077864293662, "loss": 3.0961, "step": 1114 }, { "epoch": 14.869565217391305, "grad_norm": 0.6942034959793091, "learning_rate": 0.0001506562847608454, "loss": 3.0778, "step": 1115 }, { "epoch": 14.882943143812708, "grad_norm": 1.0203640460968018, "learning_rate": 0.00015061179087875418, "loss": 3.0705, "step": 1116 }, { "epoch": 14.896321070234114, "grad_norm": 0.5746601223945618, "learning_rate": 0.00015056729699666296, "loss": 3.1204, "step": 1117 }, { "epoch": 14.909698996655518, "grad_norm": 0.7374005317687988, "learning_rate": 0.00015052280311457177, "loss": 3.1289, "step": 1118 }, { "epoch": 14.923076923076923, "grad_norm": 0.5524411201477051, "learning_rate": 0.00015047830923248053, "loss": 3.2795, "step": 1119 }, { "epoch": 14.936454849498327, "grad_norm": 0.7024741768836975, "learning_rate": 0.0001504338153503893, "loss": 3.0675, "step": 1120 }, { "epoch": 14.949832775919733, "grad_norm": 0.7431137561798096, "learning_rate": 0.00015038932146829812, "loss": 3.1222, "step": 1121 }, { "epoch": 14.963210702341136, "grad_norm": 0.6568113565444946, "learning_rate": 0.0001503448275862069, "loss": 3.1523, "step": 1122 }, { "epoch": 14.976588628762542, "grad_norm": 0.6193330883979797, "learning_rate": 0.00015030033370411569, "loss": 3.1632, "step": 1123 }, { "epoch": 14.989966555183946, "grad_norm": 0.6371363401412964, "learning_rate": 0.00015025583982202447, "loss": 3.0525, "step": 1124 }, { "epoch": 15.0, "grad_norm": 0.652542233467102, "learning_rate": 0.00015021134593993328, "loss": 3.1846, "step": 1125 }, { "epoch": 15.013377926421406, "grad_norm": 0.6337831616401672, "learning_rate": 0.00015016685205784203, "loss": 2.9292, "step": 1126 }, { "epoch": 15.02675585284281, "grad_norm": 0.85350501537323, "learning_rate": 0.00015012235817575084, "loss": 2.913, "step": 1127 }, { "epoch": 15.040133779264215, "grad_norm": 0.6439313888549805, "learning_rate": 0.00015007786429365962, "loss": 3.0809, "step": 1128 }, { "epoch": 15.053511705685619, "grad_norm": 0.5232247114181519, "learning_rate": 0.0001500333704115684, "loss": 3.1348, "step": 1129 }, { "epoch": 15.066889632107024, "grad_norm": 0.6082741618156433, "learning_rate": 0.0001499888765294772, "loss": 2.9282, "step": 1130 }, { "epoch": 15.080267558528428, "grad_norm": 0.5736444592475891, "learning_rate": 0.000149944382647386, "loss": 2.9891, "step": 1131 }, { "epoch": 15.093645484949834, "grad_norm": 0.7732790112495422, "learning_rate": 0.00014989988876529478, "loss": 3.0461, "step": 1132 }, { "epoch": 15.107023411371237, "grad_norm": 0.618357241153717, "learning_rate": 0.00014985539488320356, "loss": 3.0296, "step": 1133 }, { "epoch": 15.120401337792643, "grad_norm": 0.7245836853981018, "learning_rate": 0.00014981090100111235, "loss": 3.0078, "step": 1134 }, { "epoch": 15.133779264214047, "grad_norm": 0.6738787293434143, "learning_rate": 0.00014976640711902116, "loss": 3.1171, "step": 1135 }, { "epoch": 15.147157190635452, "grad_norm": 0.5802761316299438, "learning_rate": 0.0001497219132369299, "loss": 3.1403, "step": 1136 }, { "epoch": 15.160535117056856, "grad_norm": 0.5941367745399475, "learning_rate": 0.00014967741935483872, "loss": 2.9149, "step": 1137 }, { "epoch": 15.173913043478262, "grad_norm": 0.8884940147399902, "learning_rate": 0.0001496329254727475, "loss": 2.8983, "step": 1138 }, { "epoch": 15.187290969899665, "grad_norm": 0.7229192852973938, "learning_rate": 0.00014958843159065628, "loss": 2.9774, "step": 1139 }, { "epoch": 15.200668896321071, "grad_norm": 0.6714467406272888, "learning_rate": 0.00014954393770856507, "loss": 2.9219, "step": 1140 }, { "epoch": 15.214046822742475, "grad_norm": 0.6785704493522644, "learning_rate": 0.00014949944382647388, "loss": 3.0205, "step": 1141 }, { "epoch": 15.22742474916388, "grad_norm": 0.6349677443504333, "learning_rate": 0.00014945494994438266, "loss": 3.1601, "step": 1142 }, { "epoch": 15.240802675585284, "grad_norm": 0.557123064994812, "learning_rate": 0.00014941045606229144, "loss": 2.6297, "step": 1143 }, { "epoch": 15.25418060200669, "grad_norm": 0.6714944243431091, "learning_rate": 0.00014936596218020022, "loss": 2.7951, "step": 1144 }, { "epoch": 15.267558528428093, "grad_norm": 0.6747463345527649, "learning_rate": 0.00014932146829810903, "loss": 2.7909, "step": 1145 }, { "epoch": 15.280936454849499, "grad_norm": 0.5717387199401855, "learning_rate": 0.0001492769744160178, "loss": 3.2896, "step": 1146 }, { "epoch": 15.294314381270903, "grad_norm": 0.6589123010635376, "learning_rate": 0.0001492324805339266, "loss": 2.8332, "step": 1147 }, { "epoch": 15.307692307692308, "grad_norm": 0.6273646950721741, "learning_rate": 0.00014918798665183538, "loss": 3.0084, "step": 1148 }, { "epoch": 15.321070234113712, "grad_norm": 0.6551377773284912, "learning_rate": 0.00014914349276974416, "loss": 2.8147, "step": 1149 }, { "epoch": 15.334448160535118, "grad_norm": 0.6751659512519836, "learning_rate": 0.00014909899888765295, "loss": 3.1345, "step": 1150 }, { "epoch": 15.347826086956522, "grad_norm": 0.677094042301178, "learning_rate": 0.00014905450500556175, "loss": 3.1958, "step": 1151 }, { "epoch": 15.361204013377927, "grad_norm": 0.6613426804542542, "learning_rate": 0.00014901001112347054, "loss": 3.081, "step": 1152 }, { "epoch": 15.37458193979933, "grad_norm": 0.7645783424377441, "learning_rate": 0.00014896551724137932, "loss": 2.8799, "step": 1153 }, { "epoch": 15.387959866220736, "grad_norm": 0.5698953866958618, "learning_rate": 0.0001489210233592881, "loss": 3.1691, "step": 1154 }, { "epoch": 15.40133779264214, "grad_norm": 0.6581351161003113, "learning_rate": 0.0001488765294771969, "loss": 3.2365, "step": 1155 }, { "epoch": 15.414715719063546, "grad_norm": 0.7809271812438965, "learning_rate": 0.00014883203559510567, "loss": 2.833, "step": 1156 }, { "epoch": 15.42809364548495, "grad_norm": 0.6226280927658081, "learning_rate": 0.00014878754171301448, "loss": 3.1502, "step": 1157 }, { "epoch": 15.441471571906355, "grad_norm": 0.5494824051856995, "learning_rate": 0.00014874304783092326, "loss": 3.2195, "step": 1158 }, { "epoch": 15.454849498327759, "grad_norm": 0.5729116797447205, "learning_rate": 0.00014869855394883204, "loss": 2.9463, "step": 1159 }, { "epoch": 15.468227424749164, "grad_norm": 0.6673750877380371, "learning_rate": 0.00014865406006674082, "loss": 3.1514, "step": 1160 }, { "epoch": 15.481605351170568, "grad_norm": 0.6746686697006226, "learning_rate": 0.0001486095661846496, "loss": 3.0088, "step": 1161 }, { "epoch": 15.494983277591974, "grad_norm": 0.6898564100265503, "learning_rate": 0.00014856507230255842, "loss": 2.8468, "step": 1162 }, { "epoch": 15.508361204013378, "grad_norm": 0.7262438535690308, "learning_rate": 0.00014852057842046717, "loss": 2.8017, "step": 1163 }, { "epoch": 15.521739130434783, "grad_norm": 0.6878666877746582, "learning_rate": 0.00014847608453837598, "loss": 2.9486, "step": 1164 }, { "epoch": 15.535117056856187, "grad_norm": 0.6375080347061157, "learning_rate": 0.00014843159065628476, "loss": 3.0757, "step": 1165 }, { "epoch": 15.548494983277592, "grad_norm": 0.6540268063545227, "learning_rate": 0.00014838709677419355, "loss": 3.1262, "step": 1166 }, { "epoch": 15.561872909698996, "grad_norm": 0.6036689877510071, "learning_rate": 0.00014834260289210233, "loss": 3.0607, "step": 1167 }, { "epoch": 15.575250836120402, "grad_norm": 0.5899893641471863, "learning_rate": 0.00014829810901001114, "loss": 3.1222, "step": 1168 }, { "epoch": 15.588628762541806, "grad_norm": 0.7268028259277344, "learning_rate": 0.00014825361512791992, "loss": 3.1777, "step": 1169 }, { "epoch": 15.602006688963211, "grad_norm": 0.6990141272544861, "learning_rate": 0.0001482091212458287, "loss": 3.2142, "step": 1170 }, { "epoch": 15.615384615384615, "grad_norm": 0.6009657382965088, "learning_rate": 0.00014816462736373748, "loss": 3.141, "step": 1171 }, { "epoch": 15.62876254180602, "grad_norm": 0.6287830471992493, "learning_rate": 0.0001481201334816463, "loss": 3.0621, "step": 1172 }, { "epoch": 15.642140468227424, "grad_norm": 0.6720128655433655, "learning_rate": 0.00014807563959955505, "loss": 3.0353, "step": 1173 }, { "epoch": 15.65551839464883, "grad_norm": 0.6694427132606506, "learning_rate": 0.00014803114571746386, "loss": 3.0171, "step": 1174 }, { "epoch": 15.668896321070234, "grad_norm": 0.5630237460136414, "learning_rate": 0.00014798665183537264, "loss": 3.1444, "step": 1175 }, { "epoch": 15.68227424749164, "grad_norm": 0.7139558792114258, "learning_rate": 0.00014794215795328142, "loss": 3.0012, "step": 1176 }, { "epoch": 15.695652173913043, "grad_norm": 0.6374551057815552, "learning_rate": 0.0001478976640711902, "loss": 2.9123, "step": 1177 }, { "epoch": 15.709030100334449, "grad_norm": 0.5957819223403931, "learning_rate": 0.00014785317018909902, "loss": 3.09, "step": 1178 }, { "epoch": 15.722408026755852, "grad_norm": 0.6083621382713318, "learning_rate": 0.0001478086763070078, "loss": 3.0231, "step": 1179 }, { "epoch": 15.735785953177258, "grad_norm": 0.6169192790985107, "learning_rate": 0.00014776418242491658, "loss": 2.9863, "step": 1180 }, { "epoch": 15.749163879598662, "grad_norm": 0.6058081984519958, "learning_rate": 0.00014771968854282536, "loss": 3.0261, "step": 1181 }, { "epoch": 15.762541806020067, "grad_norm": 0.5816760659217834, "learning_rate": 0.00014767519466073417, "loss": 3.1593, "step": 1182 }, { "epoch": 15.775919732441471, "grad_norm": 0.6246895790100098, "learning_rate": 0.00014763070077864293, "loss": 3.1029, "step": 1183 }, { "epoch": 15.789297658862877, "grad_norm": 0.56280517578125, "learning_rate": 0.00014758620689655174, "loss": 2.9778, "step": 1184 }, { "epoch": 15.80267558528428, "grad_norm": 0.5743212699890137, "learning_rate": 0.00014754171301446052, "loss": 2.8799, "step": 1185 }, { "epoch": 15.816053511705686, "grad_norm": 0.6163922548294067, "learning_rate": 0.0001474972191323693, "loss": 3.0226, "step": 1186 }, { "epoch": 15.82943143812709, "grad_norm": 0.5892409682273865, "learning_rate": 0.00014745272525027808, "loss": 3.1167, "step": 1187 }, { "epoch": 15.842809364548495, "grad_norm": 0.7977785468101501, "learning_rate": 0.0001474082313681869, "loss": 2.8427, "step": 1188 }, { "epoch": 15.856187290969899, "grad_norm": 0.7396023273468018, "learning_rate": 0.00014736373748609568, "loss": 2.7809, "step": 1189 }, { "epoch": 15.869565217391305, "grad_norm": 0.58844393491745, "learning_rate": 0.00014731924360400446, "loss": 3.0624, "step": 1190 }, { "epoch": 15.882943143812708, "grad_norm": 0.6903204321861267, "learning_rate": 0.00014727474972191324, "loss": 3.1246, "step": 1191 }, { "epoch": 15.896321070234114, "grad_norm": 0.5902391672134399, "learning_rate": 0.00014723025583982205, "loss": 3.0505, "step": 1192 }, { "epoch": 15.909698996655518, "grad_norm": 0.575752317905426, "learning_rate": 0.0001471857619577308, "loss": 2.8508, "step": 1193 }, { "epoch": 15.923076923076923, "grad_norm": 0.7248224020004272, "learning_rate": 0.00014714126807563961, "loss": 3.1438, "step": 1194 }, { "epoch": 15.936454849498327, "grad_norm": 0.5669791102409363, "learning_rate": 0.0001470967741935484, "loss": 3.1765, "step": 1195 }, { "epoch": 15.949832775919733, "grad_norm": 0.6656806468963623, "learning_rate": 0.00014705228031145718, "loss": 3.1456, "step": 1196 }, { "epoch": 15.963210702341136, "grad_norm": 0.6073266863822937, "learning_rate": 0.00014700778642936596, "loss": 3.1836, "step": 1197 }, { "epoch": 15.976588628762542, "grad_norm": 0.8209658861160278, "learning_rate": 0.00014696329254727477, "loss": 2.8457, "step": 1198 }, { "epoch": 15.989966555183946, "grad_norm": 0.6495081186294556, "learning_rate": 0.00014691879866518355, "loss": 3.0161, "step": 1199 }, { "epoch": 16.0, "grad_norm": 0.7522635459899902, "learning_rate": 0.00014687430478309234, "loss": 2.8293, "step": 1200 }, { "epoch": 16.013377926421406, "grad_norm": 0.8024417161941528, "learning_rate": 0.00014682981090100112, "loss": 2.909, "step": 1201 }, { "epoch": 16.02675585284281, "grad_norm": 0.7147983908653259, "learning_rate": 0.0001467853170189099, "loss": 2.7664, "step": 1202 }, { "epoch": 16.040133779264213, "grad_norm": 0.602427065372467, "learning_rate": 0.00014674082313681868, "loss": 2.8189, "step": 1203 }, { "epoch": 16.05351170568562, "grad_norm": 0.7274264097213745, "learning_rate": 0.00014669632925472747, "loss": 2.9957, "step": 1204 }, { "epoch": 16.066889632107024, "grad_norm": 0.6042147278785706, "learning_rate": 0.00014665183537263628, "loss": 2.8925, "step": 1205 }, { "epoch": 16.08026755852843, "grad_norm": 0.592339813709259, "learning_rate": 0.00014660734149054506, "loss": 3.1463, "step": 1206 }, { "epoch": 16.093645484949832, "grad_norm": 0.6099987030029297, "learning_rate": 0.00014656284760845384, "loss": 3.2264, "step": 1207 }, { "epoch": 16.107023411371237, "grad_norm": 0.9324999451637268, "learning_rate": 0.00014651835372636262, "loss": 2.916, "step": 1208 }, { "epoch": 16.120401337792643, "grad_norm": 0.6000798344612122, "learning_rate": 0.00014647385984427143, "loss": 2.9242, "step": 1209 }, { "epoch": 16.13377926421405, "grad_norm": 0.609199047088623, "learning_rate": 0.0001464293659621802, "loss": 2.9405, "step": 1210 }, { "epoch": 16.14715719063545, "grad_norm": 0.6789796948432922, "learning_rate": 0.000146384872080089, "loss": 3.003, "step": 1211 }, { "epoch": 16.160535117056856, "grad_norm": 0.6567651629447937, "learning_rate": 0.00014634037819799778, "loss": 2.8733, "step": 1212 }, { "epoch": 16.17391304347826, "grad_norm": 0.5860549211502075, "learning_rate": 0.00014629588431590656, "loss": 3.1429, "step": 1213 }, { "epoch": 16.187290969899667, "grad_norm": 0.6623414158821106, "learning_rate": 0.00014625139043381534, "loss": 3.1741, "step": 1214 }, { "epoch": 16.20066889632107, "grad_norm": 0.8366180062294006, "learning_rate": 0.00014620689655172415, "loss": 2.8627, "step": 1215 }, { "epoch": 16.214046822742475, "grad_norm": 0.616780698299408, "learning_rate": 0.00014616240266963294, "loss": 2.7619, "step": 1216 }, { "epoch": 16.22742474916388, "grad_norm": 0.6345306634902954, "learning_rate": 0.00014611790878754172, "loss": 2.9104, "step": 1217 }, { "epoch": 16.240802675585286, "grad_norm": 0.6326844096183777, "learning_rate": 0.0001460734149054505, "loss": 2.8453, "step": 1218 }, { "epoch": 16.254180602006688, "grad_norm": 0.5441793203353882, "learning_rate": 0.0001460289210233593, "loss": 3.2924, "step": 1219 }, { "epoch": 16.267558528428093, "grad_norm": 0.604637086391449, "learning_rate": 0.00014598442714126807, "loss": 3.0362, "step": 1220 }, { "epoch": 16.2809364548495, "grad_norm": 0.6100621819496155, "learning_rate": 0.00014593993325917687, "loss": 2.917, "step": 1221 }, { "epoch": 16.294314381270905, "grad_norm": 0.6323224902153015, "learning_rate": 0.00014589543937708566, "loss": 3.144, "step": 1222 }, { "epoch": 16.307692307692307, "grad_norm": 0.595485270023346, "learning_rate": 0.00014585094549499444, "loss": 2.8869, "step": 1223 }, { "epoch": 16.321070234113712, "grad_norm": 0.6350538730621338, "learning_rate": 0.00014580645161290322, "loss": 2.7253, "step": 1224 }, { "epoch": 16.334448160535118, "grad_norm": 0.5804395079612732, "learning_rate": 0.00014576195773081203, "loss": 3.1523, "step": 1225 }, { "epoch": 16.347826086956523, "grad_norm": 0.5905717015266418, "learning_rate": 0.00014571746384872081, "loss": 3.1419, "step": 1226 }, { "epoch": 16.361204013377925, "grad_norm": 0.6824894547462463, "learning_rate": 0.0001456729699666296, "loss": 2.8953, "step": 1227 }, { "epoch": 16.37458193979933, "grad_norm": 0.5840978622436523, "learning_rate": 0.00014562847608453838, "loss": 3.1229, "step": 1228 }, { "epoch": 16.387959866220736, "grad_norm": 0.7102469801902771, "learning_rate": 0.0001455839822024472, "loss": 2.6571, "step": 1229 }, { "epoch": 16.401337792642142, "grad_norm": 0.6148349046707153, "learning_rate": 0.00014553948832035594, "loss": 3.0084, "step": 1230 }, { "epoch": 16.414715719063544, "grad_norm": 0.60859215259552, "learning_rate": 0.00014549499443826475, "loss": 3.1348, "step": 1231 }, { "epoch": 16.42809364548495, "grad_norm": 0.6059513688087463, "learning_rate": 0.00014545050055617354, "loss": 2.9678, "step": 1232 }, { "epoch": 16.441471571906355, "grad_norm": 0.6012231707572937, "learning_rate": 0.00014540600667408232, "loss": 2.9279, "step": 1233 }, { "epoch": 16.45484949832776, "grad_norm": 0.6185587644577026, "learning_rate": 0.0001453615127919911, "loss": 2.9922, "step": 1234 }, { "epoch": 16.468227424749163, "grad_norm": 0.5989127159118652, "learning_rate": 0.0001453170189098999, "loss": 3.0736, "step": 1235 }, { "epoch": 16.48160535117057, "grad_norm": 0.673633337020874, "learning_rate": 0.0001452725250278087, "loss": 2.9703, "step": 1236 }, { "epoch": 16.494983277591974, "grad_norm": 0.6815900206565857, "learning_rate": 0.00014522803114571747, "loss": 3.0171, "step": 1237 }, { "epoch": 16.50836120401338, "grad_norm": 0.7687232494354248, "learning_rate": 0.00014518353726362626, "loss": 2.8988, "step": 1238 }, { "epoch": 16.52173913043478, "grad_norm": 0.7992755174636841, "learning_rate": 0.00014513904338153507, "loss": 3.0347, "step": 1239 }, { "epoch": 16.535117056856187, "grad_norm": 0.8673639893531799, "learning_rate": 0.00014509454949944382, "loss": 2.9654, "step": 1240 }, { "epoch": 16.548494983277592, "grad_norm": 0.6200671792030334, "learning_rate": 0.00014505005561735263, "loss": 3.1075, "step": 1241 }, { "epoch": 16.561872909698998, "grad_norm": 0.8055624961853027, "learning_rate": 0.0001450055617352614, "loss": 2.877, "step": 1242 }, { "epoch": 16.5752508361204, "grad_norm": 0.6428245902061462, "learning_rate": 0.0001449610678531702, "loss": 3.0326, "step": 1243 }, { "epoch": 16.588628762541806, "grad_norm": 0.844804584980011, "learning_rate": 0.00014491657397107898, "loss": 2.8093, "step": 1244 }, { "epoch": 16.60200668896321, "grad_norm": 0.5699613690376282, "learning_rate": 0.00014487208008898776, "loss": 3.1914, "step": 1245 }, { "epoch": 16.615384615384617, "grad_norm": 0.6638582348823547, "learning_rate": 0.00014482758620689657, "loss": 2.7484, "step": 1246 }, { "epoch": 16.62876254180602, "grad_norm": 0.7390914559364319, "learning_rate": 0.00014478309232480533, "loss": 3.0355, "step": 1247 }, { "epoch": 16.642140468227424, "grad_norm": 0.6177923083305359, "learning_rate": 0.00014473859844271413, "loss": 3.0067, "step": 1248 }, { "epoch": 16.65551839464883, "grad_norm": 0.6234062314033508, "learning_rate": 0.00014469410456062292, "loss": 2.8153, "step": 1249 }, { "epoch": 16.668896321070235, "grad_norm": 0.8505418300628662, "learning_rate": 0.0001446496106785317, "loss": 2.8292, "step": 1250 }, { "epoch": 16.682274247491637, "grad_norm": 0.8339266180992126, "learning_rate": 0.00014460511679644048, "loss": 2.8072, "step": 1251 }, { "epoch": 16.695652173913043, "grad_norm": 0.5782635807991028, "learning_rate": 0.0001445606229143493, "loss": 3.1079, "step": 1252 }, { "epoch": 16.70903010033445, "grad_norm": 0.687126874923706, "learning_rate": 0.00014451612903225807, "loss": 3.1401, "step": 1253 }, { "epoch": 16.722408026755854, "grad_norm": 0.7313762307167053, "learning_rate": 0.00014447163515016686, "loss": 2.9167, "step": 1254 }, { "epoch": 16.735785953177256, "grad_norm": 0.8815247416496277, "learning_rate": 0.00014442714126807564, "loss": 3.0294, "step": 1255 }, { "epoch": 16.74916387959866, "grad_norm": 0.7636277675628662, "learning_rate": 0.00014438264738598445, "loss": 3.0758, "step": 1256 }, { "epoch": 16.762541806020067, "grad_norm": 0.5961578488349915, "learning_rate": 0.0001443381535038932, "loss": 3.1027, "step": 1257 }, { "epoch": 16.775919732441473, "grad_norm": 0.6840028762817383, "learning_rate": 0.000144293659621802, "loss": 2.9192, "step": 1258 }, { "epoch": 16.789297658862875, "grad_norm": 0.7895340323448181, "learning_rate": 0.0001442491657397108, "loss": 2.8772, "step": 1259 }, { "epoch": 16.80267558528428, "grad_norm": 0.8516091704368591, "learning_rate": 0.00014420467185761958, "loss": 2.9168, "step": 1260 }, { "epoch": 16.816053511705686, "grad_norm": 0.6745076179504395, "learning_rate": 0.00014416017797552836, "loss": 3.1352, "step": 1261 }, { "epoch": 16.82943143812709, "grad_norm": 0.6744667887687683, "learning_rate": 0.00014411568409343717, "loss": 2.8439, "step": 1262 }, { "epoch": 16.842809364548494, "grad_norm": 0.6307089924812317, "learning_rate": 0.00014407119021134595, "loss": 3.2054, "step": 1263 }, { "epoch": 16.8561872909699, "grad_norm": 0.6480753421783447, "learning_rate": 0.00014402669632925473, "loss": 3.039, "step": 1264 }, { "epoch": 16.869565217391305, "grad_norm": 0.6143667697906494, "learning_rate": 0.00014398220244716352, "loss": 2.9475, "step": 1265 }, { "epoch": 16.88294314381271, "grad_norm": 0.6289299130439758, "learning_rate": 0.00014393770856507233, "loss": 3.1437, "step": 1266 }, { "epoch": 16.896321070234112, "grad_norm": 0.6618160009384155, "learning_rate": 0.00014389321468298108, "loss": 2.8641, "step": 1267 }, { "epoch": 16.909698996655518, "grad_norm": 0.6053375601768494, "learning_rate": 0.0001438487208008899, "loss": 2.9129, "step": 1268 }, { "epoch": 16.923076923076923, "grad_norm": 0.5706185102462769, "learning_rate": 0.00014380422691879867, "loss": 3.005, "step": 1269 }, { "epoch": 16.93645484949833, "grad_norm": 0.6779253482818604, "learning_rate": 0.00014375973303670746, "loss": 3.1071, "step": 1270 }, { "epoch": 16.94983277591973, "grad_norm": 0.6679616570472717, "learning_rate": 0.00014371523915461624, "loss": 3.1792, "step": 1271 }, { "epoch": 16.963210702341136, "grad_norm": 0.6018584966659546, "learning_rate": 0.00014367074527252505, "loss": 2.9947, "step": 1272 }, { "epoch": 16.976588628762542, "grad_norm": 0.6106094717979431, "learning_rate": 0.00014362625139043383, "loss": 3.1965, "step": 1273 }, { "epoch": 16.989966555183948, "grad_norm": 0.5486257672309875, "learning_rate": 0.0001435817575083426, "loss": 3.0975, "step": 1274 }, { "epoch": 17.0, "grad_norm": 0.6516265273094177, "learning_rate": 0.0001435372636262514, "loss": 3.1064, "step": 1275 }, { "epoch": 17.013377926421406, "grad_norm": 0.6434454917907715, "learning_rate": 0.0001434927697441602, "loss": 2.8903, "step": 1276 }, { "epoch": 17.02675585284281, "grad_norm": 0.6237186193466187, "learning_rate": 0.00014344827586206896, "loss": 3.135, "step": 1277 }, { "epoch": 17.040133779264213, "grad_norm": 0.5920026302337646, "learning_rate": 0.00014340378197997777, "loss": 2.8612, "step": 1278 }, { "epoch": 17.05351170568562, "grad_norm": 0.6545232534408569, "learning_rate": 0.00014335928809788655, "loss": 2.8934, "step": 1279 }, { "epoch": 17.066889632107024, "grad_norm": 0.7839710116386414, "learning_rate": 0.00014331479421579533, "loss": 2.965, "step": 1280 }, { "epoch": 17.08026755852843, "grad_norm": 0.6448978781700134, "learning_rate": 0.00014327030033370412, "loss": 2.9343, "step": 1281 }, { "epoch": 17.093645484949832, "grad_norm": 0.5713958144187927, "learning_rate": 0.00014322580645161293, "loss": 3.1566, "step": 1282 }, { "epoch": 17.107023411371237, "grad_norm": 0.666409969329834, "learning_rate": 0.0001431813125695217, "loss": 2.7705, "step": 1283 }, { "epoch": 17.120401337792643, "grad_norm": 0.6068354249000549, "learning_rate": 0.0001431368186874305, "loss": 3.088, "step": 1284 }, { "epoch": 17.13377926421405, "grad_norm": 0.8292580246925354, "learning_rate": 0.00014309232480533927, "loss": 2.8939, "step": 1285 }, { "epoch": 17.14715719063545, "grad_norm": 0.6789494156837463, "learning_rate": 0.00014304783092324806, "loss": 3.2317, "step": 1286 }, { "epoch": 17.160535117056856, "grad_norm": 0.6030963063240051, "learning_rate": 0.00014300333704115684, "loss": 2.9248, "step": 1287 }, { "epoch": 17.17391304347826, "grad_norm": 0.7090041041374207, "learning_rate": 0.00014295884315906562, "loss": 2.9097, "step": 1288 }, { "epoch": 17.187290969899667, "grad_norm": 0.5750879645347595, "learning_rate": 0.00014291434927697443, "loss": 2.9585, "step": 1289 }, { "epoch": 17.20066889632107, "grad_norm": 0.6379792094230652, "learning_rate": 0.0001428698553948832, "loss": 2.794, "step": 1290 }, { "epoch": 17.214046822742475, "grad_norm": 0.6736400127410889, "learning_rate": 0.000142825361512792, "loss": 2.8812, "step": 1291 }, { "epoch": 17.22742474916388, "grad_norm": 0.6580933332443237, "learning_rate": 0.00014278086763070078, "loss": 2.8029, "step": 1292 }, { "epoch": 17.240802675585286, "grad_norm": 0.6550527215003967, "learning_rate": 0.00014273637374860959, "loss": 3.1135, "step": 1293 }, { "epoch": 17.254180602006688, "grad_norm": 0.6616796255111694, "learning_rate": 0.00014269187986651834, "loss": 2.8916, "step": 1294 }, { "epoch": 17.267558528428093, "grad_norm": 0.7247623801231384, "learning_rate": 0.00014264738598442715, "loss": 2.8482, "step": 1295 }, { "epoch": 17.2809364548495, "grad_norm": 0.7138639092445374, "learning_rate": 0.00014260289210233593, "loss": 2.9283, "step": 1296 }, { "epoch": 17.294314381270905, "grad_norm": 0.6413894891738892, "learning_rate": 0.00014255839822024472, "loss": 3.0273, "step": 1297 }, { "epoch": 17.307692307692307, "grad_norm": 0.6106882095336914, "learning_rate": 0.0001425139043381535, "loss": 3.062, "step": 1298 }, { "epoch": 17.321070234113712, "grad_norm": 0.6762199997901917, "learning_rate": 0.0001424694104560623, "loss": 2.7864, "step": 1299 }, { "epoch": 17.334448160535118, "grad_norm": 0.65083909034729, "learning_rate": 0.0001424249165739711, "loss": 3.115, "step": 1300 }, { "epoch": 17.347826086956523, "grad_norm": 0.7381249666213989, "learning_rate": 0.00014238042269187987, "loss": 2.6981, "step": 1301 }, { "epoch": 17.361204013377925, "grad_norm": 0.5674475431442261, "learning_rate": 0.00014233592880978865, "loss": 2.8646, "step": 1302 }, { "epoch": 17.37458193979933, "grad_norm": 0.6201330423355103, "learning_rate": 0.00014229143492769746, "loss": 3.207, "step": 1303 }, { "epoch": 17.387959866220736, "grad_norm": 0.7004446983337402, "learning_rate": 0.00014224694104560622, "loss": 2.7623, "step": 1304 }, { "epoch": 17.401337792642142, "grad_norm": 0.7278717756271362, "learning_rate": 0.00014220244716351503, "loss": 2.7967, "step": 1305 }, { "epoch": 17.414715719063544, "grad_norm": 0.6384133696556091, "learning_rate": 0.0001421579532814238, "loss": 2.7542, "step": 1306 }, { "epoch": 17.42809364548495, "grad_norm": 0.6443619132041931, "learning_rate": 0.0001421134593993326, "loss": 2.9376, "step": 1307 }, { "epoch": 17.441471571906355, "grad_norm": 0.6889017224311829, "learning_rate": 0.00014206896551724138, "loss": 2.8868, "step": 1308 }, { "epoch": 17.45484949832776, "grad_norm": 0.624864935874939, "learning_rate": 0.00014202447163515019, "loss": 3.0459, "step": 1309 }, { "epoch": 17.468227424749163, "grad_norm": 0.5964120626449585, "learning_rate": 0.00014197997775305897, "loss": 2.8604, "step": 1310 }, { "epoch": 17.48160535117057, "grad_norm": 0.5584320425987244, "learning_rate": 0.00014193548387096775, "loss": 3.1267, "step": 1311 }, { "epoch": 17.494983277591974, "grad_norm": 0.6566155552864075, "learning_rate": 0.00014189098998887653, "loss": 2.7059, "step": 1312 }, { "epoch": 17.50836120401338, "grad_norm": 0.7166509032249451, "learning_rate": 0.00014184649610678534, "loss": 3.1076, "step": 1313 }, { "epoch": 17.52173913043478, "grad_norm": 0.6076642870903015, "learning_rate": 0.0001418020022246941, "loss": 3.1121, "step": 1314 }, { "epoch": 17.535117056856187, "grad_norm": 0.6293672323226929, "learning_rate": 0.0001417575083426029, "loss": 3.0719, "step": 1315 }, { "epoch": 17.548494983277592, "grad_norm": 0.6696231365203857, "learning_rate": 0.0001417130144605117, "loss": 2.4281, "step": 1316 }, { "epoch": 17.561872909698998, "grad_norm": 0.7928171157836914, "learning_rate": 0.00014166852057842047, "loss": 2.8602, "step": 1317 }, { "epoch": 17.5752508361204, "grad_norm": 0.5897494554519653, "learning_rate": 0.00014162402669632925, "loss": 3.1556, "step": 1318 }, { "epoch": 17.588628762541806, "grad_norm": 0.6028451323509216, "learning_rate": 0.00014157953281423806, "loss": 3.1228, "step": 1319 }, { "epoch": 17.60200668896321, "grad_norm": 0.6237207651138306, "learning_rate": 0.00014153503893214685, "loss": 2.9691, "step": 1320 }, { "epoch": 17.615384615384617, "grad_norm": 0.6401494741439819, "learning_rate": 0.00014149054505005563, "loss": 3.0659, "step": 1321 }, { "epoch": 17.62876254180602, "grad_norm": 0.7098166942596436, "learning_rate": 0.0001414460511679644, "loss": 2.9606, "step": 1322 }, { "epoch": 17.642140468227424, "grad_norm": 0.6416228413581848, "learning_rate": 0.00014140155728587322, "loss": 3.1056, "step": 1323 }, { "epoch": 17.65551839464883, "grad_norm": 0.7303211092948914, "learning_rate": 0.00014135706340378198, "loss": 2.8604, "step": 1324 }, { "epoch": 17.668896321070235, "grad_norm": 0.64544677734375, "learning_rate": 0.00014131256952169079, "loss": 2.6751, "step": 1325 }, { "epoch": 17.682274247491637, "grad_norm": 0.6870211362838745, "learning_rate": 0.00014126807563959957, "loss": 2.9802, "step": 1326 }, { "epoch": 17.695652173913043, "grad_norm": 0.6570687294006348, "learning_rate": 0.00014122358175750835, "loss": 3.0496, "step": 1327 }, { "epoch": 17.70903010033445, "grad_norm": 0.7057302594184875, "learning_rate": 0.00014117908787541713, "loss": 2.845, "step": 1328 }, { "epoch": 17.722408026755854, "grad_norm": 0.8613574504852295, "learning_rate": 0.00014113459399332591, "loss": 3.0314, "step": 1329 }, { "epoch": 17.735785953177256, "grad_norm": 0.7408957481384277, "learning_rate": 0.00014109010011123472, "loss": 2.8651, "step": 1330 }, { "epoch": 17.74916387959866, "grad_norm": 0.6553664803504944, "learning_rate": 0.00014104560622914348, "loss": 2.9647, "step": 1331 }, { "epoch": 17.762541806020067, "grad_norm": 0.5991332530975342, "learning_rate": 0.0001410011123470523, "loss": 2.9796, "step": 1332 }, { "epoch": 17.775919732441473, "grad_norm": 0.6124044060707092, "learning_rate": 0.00014095661846496107, "loss": 3.0682, "step": 1333 }, { "epoch": 17.789297658862875, "grad_norm": 0.5788628458976746, "learning_rate": 0.00014091212458286985, "loss": 3.0357, "step": 1334 }, { "epoch": 17.80267558528428, "grad_norm": 0.6785842776298523, "learning_rate": 0.00014086763070077864, "loss": 3.1128, "step": 1335 }, { "epoch": 17.816053511705686, "grad_norm": 0.5994388461112976, "learning_rate": 0.00014082313681868745, "loss": 2.9087, "step": 1336 }, { "epoch": 17.82943143812709, "grad_norm": 0.6150069236755371, "learning_rate": 0.00014077864293659623, "loss": 2.9488, "step": 1337 }, { "epoch": 17.842809364548494, "grad_norm": 0.6211126446723938, "learning_rate": 0.000140734149054505, "loss": 2.8006, "step": 1338 }, { "epoch": 17.8561872909699, "grad_norm": 0.6093603372573853, "learning_rate": 0.0001406896551724138, "loss": 3.0013, "step": 1339 }, { "epoch": 17.869565217391305, "grad_norm": 0.6861109137535095, "learning_rate": 0.0001406451612903226, "loss": 2.9423, "step": 1340 }, { "epoch": 17.88294314381271, "grad_norm": 0.6148517727851868, "learning_rate": 0.00014060066740823136, "loss": 2.8471, "step": 1341 }, { "epoch": 17.896321070234112, "grad_norm": 0.8285694718360901, "learning_rate": 0.00014055617352614017, "loss": 3.055, "step": 1342 }, { "epoch": 17.909698996655518, "grad_norm": 0.5955973863601685, "learning_rate": 0.00014051167964404895, "loss": 3.0088, "step": 1343 }, { "epoch": 17.923076923076923, "grad_norm": 0.6020825505256653, "learning_rate": 0.00014046718576195773, "loss": 3.1445, "step": 1344 }, { "epoch": 17.93645484949833, "grad_norm": 0.6115384101867676, "learning_rate": 0.00014042269187986651, "loss": 3.0889, "step": 1345 }, { "epoch": 17.94983277591973, "grad_norm": 0.6469634175300598, "learning_rate": 0.00014037819799777532, "loss": 3.1916, "step": 1346 }, { "epoch": 17.963210702341136, "grad_norm": 0.6653386354446411, "learning_rate": 0.0001403337041156841, "loss": 2.8805, "step": 1347 }, { "epoch": 17.976588628762542, "grad_norm": 0.6167243719100952, "learning_rate": 0.0001402892102335929, "loss": 2.9819, "step": 1348 }, { "epoch": 17.989966555183948, "grad_norm": 0.6281883716583252, "learning_rate": 0.00014024471635150167, "loss": 3.1109, "step": 1349 }, { "epoch": 18.0, "grad_norm": 0.6747295260429382, "learning_rate": 0.00014020022246941048, "loss": 2.8772, "step": 1350 }, { "epoch": 18.013377926421406, "grad_norm": 0.5834116339683533, "learning_rate": 0.00014015572858731924, "loss": 2.9399, "step": 1351 }, { "epoch": 18.02675585284281, "grad_norm": 0.620858371257782, "learning_rate": 0.00014011123470522805, "loss": 2.7351, "step": 1352 }, { "epoch": 18.040133779264213, "grad_norm": 0.5407689809799194, "learning_rate": 0.00014006674082313683, "loss": 2.8329, "step": 1353 }, { "epoch": 18.05351170568562, "grad_norm": 0.6045056581497192, "learning_rate": 0.0001400222469410456, "loss": 2.7382, "step": 1354 }, { "epoch": 18.066889632107024, "grad_norm": 0.5433570146560669, "learning_rate": 0.0001399777530589544, "loss": 2.9154, "step": 1355 }, { "epoch": 18.08026755852843, "grad_norm": 0.6174083352088928, "learning_rate": 0.0001399332591768632, "loss": 2.7438, "step": 1356 }, { "epoch": 18.093645484949832, "grad_norm": 0.6720690727233887, "learning_rate": 0.00013988876529477198, "loss": 3.1324, "step": 1357 }, { "epoch": 18.107023411371237, "grad_norm": 0.648423433303833, "learning_rate": 0.00013984427141268077, "loss": 2.7802, "step": 1358 }, { "epoch": 18.120401337792643, "grad_norm": 0.6625978350639343, "learning_rate": 0.00013979977753058955, "loss": 2.9152, "step": 1359 }, { "epoch": 18.13377926421405, "grad_norm": 0.5362007021903992, "learning_rate": 0.00013975528364849836, "loss": 2.9552, "step": 1360 }, { "epoch": 18.14715719063545, "grad_norm": 0.6275555491447449, "learning_rate": 0.00013971078976640711, "loss": 3.0742, "step": 1361 }, { "epoch": 18.160535117056856, "grad_norm": 0.5755884647369385, "learning_rate": 0.00013966629588431592, "loss": 2.8705, "step": 1362 }, { "epoch": 18.17391304347826, "grad_norm": 0.5092719793319702, "learning_rate": 0.0001396218020022247, "loss": 2.9684, "step": 1363 }, { "epoch": 18.187290969899667, "grad_norm": 0.7400075197219849, "learning_rate": 0.0001395773081201335, "loss": 2.9726, "step": 1364 }, { "epoch": 18.20066889632107, "grad_norm": 0.6478124260902405, "learning_rate": 0.00013953281423804227, "loss": 2.7427, "step": 1365 }, { "epoch": 18.214046822742475, "grad_norm": 0.6313418745994568, "learning_rate": 0.00013948832035595108, "loss": 2.7713, "step": 1366 }, { "epoch": 18.22742474916388, "grad_norm": 0.5571421980857849, "learning_rate": 0.00013944382647385986, "loss": 2.9221, "step": 1367 }, { "epoch": 18.240802675585286, "grad_norm": 0.5346395373344421, "learning_rate": 0.00013939933259176862, "loss": 2.9842, "step": 1368 }, { "epoch": 18.254180602006688, "grad_norm": 0.5828048586845398, "learning_rate": 0.00013935483870967743, "loss": 2.8683, "step": 1369 }, { "epoch": 18.267558528428093, "grad_norm": 0.6446037888526917, "learning_rate": 0.0001393103448275862, "loss": 2.8, "step": 1370 }, { "epoch": 18.2809364548495, "grad_norm": 0.612689197063446, "learning_rate": 0.000139265850945495, "loss": 3.0642, "step": 1371 }, { "epoch": 18.294314381270905, "grad_norm": 0.5511941909790039, "learning_rate": 0.00013922135706340377, "loss": 3.0633, "step": 1372 }, { "epoch": 18.307692307692307, "grad_norm": 0.7538149356842041, "learning_rate": 0.00013917686318131258, "loss": 2.7224, "step": 1373 }, { "epoch": 18.321070234113712, "grad_norm": 0.6194874048233032, "learning_rate": 0.00013913236929922137, "loss": 2.9357, "step": 1374 }, { "epoch": 18.334448160535118, "grad_norm": 0.5770833492279053, "learning_rate": 0.00013908787541713015, "loss": 2.8585, "step": 1375 }, { "epoch": 18.347826086956523, "grad_norm": 0.609080970287323, "learning_rate": 0.00013904338153503893, "loss": 2.7763, "step": 1376 }, { "epoch": 18.361204013377925, "grad_norm": 0.5578462481498718, "learning_rate": 0.00013899888765294774, "loss": 3.0324, "step": 1377 }, { "epoch": 18.37458193979933, "grad_norm": 0.5949610471725464, "learning_rate": 0.0001389543937708565, "loss": 2.7332, "step": 1378 }, { "epoch": 18.387959866220736, "grad_norm": 0.6248785257339478, "learning_rate": 0.0001389098998887653, "loss": 2.7355, "step": 1379 }, { "epoch": 18.401337792642142, "grad_norm": 0.606239378452301, "learning_rate": 0.0001388654060066741, "loss": 2.9883, "step": 1380 }, { "epoch": 18.414715719063544, "grad_norm": 0.6222496628761292, "learning_rate": 0.00013882091212458287, "loss": 2.7384, "step": 1381 }, { "epoch": 18.42809364548495, "grad_norm": 0.6253412365913391, "learning_rate": 0.00013877641824249165, "loss": 2.7555, "step": 1382 }, { "epoch": 18.441471571906355, "grad_norm": 0.6204626560211182, "learning_rate": 0.00013873192436040046, "loss": 2.7279, "step": 1383 }, { "epoch": 18.45484949832776, "grad_norm": 0.7254919409751892, "learning_rate": 0.00013868743047830924, "loss": 2.9053, "step": 1384 }, { "epoch": 18.468227424749163, "grad_norm": 0.6207154393196106, "learning_rate": 0.00013864293659621803, "loss": 3.0648, "step": 1385 }, { "epoch": 18.48160535117057, "grad_norm": 0.6959066390991211, "learning_rate": 0.0001385984427141268, "loss": 2.9641, "step": 1386 }, { "epoch": 18.494983277591974, "grad_norm": 0.6345707774162292, "learning_rate": 0.00013855394883203562, "loss": 2.9452, "step": 1387 }, { "epoch": 18.50836120401338, "grad_norm": 0.5806639790534973, "learning_rate": 0.00013850945494994437, "loss": 2.9375, "step": 1388 }, { "epoch": 18.52173913043478, "grad_norm": 0.6498666405677795, "learning_rate": 0.00013846496106785318, "loss": 2.867, "step": 1389 }, { "epoch": 18.535117056856187, "grad_norm": 0.629264771938324, "learning_rate": 0.00013842046718576197, "loss": 2.822, "step": 1390 }, { "epoch": 18.548494983277592, "grad_norm": 0.6734644174575806, "learning_rate": 0.00013837597330367075, "loss": 2.9065, "step": 1391 }, { "epoch": 18.561872909698998, "grad_norm": 0.5705899000167847, "learning_rate": 0.00013833147942157953, "loss": 2.9909, "step": 1392 }, { "epoch": 18.5752508361204, "grad_norm": 0.6786744594573975, "learning_rate": 0.00013828698553948834, "loss": 3.0667, "step": 1393 }, { "epoch": 18.588628762541806, "grad_norm": 0.6044118404388428, "learning_rate": 0.00013824249165739712, "loss": 2.8345, "step": 1394 }, { "epoch": 18.60200668896321, "grad_norm": 0.5928333401679993, "learning_rate": 0.0001381979977753059, "loss": 2.974, "step": 1395 }, { "epoch": 18.615384615384617, "grad_norm": 0.636883556842804, "learning_rate": 0.0001381535038932147, "loss": 3.1034, "step": 1396 }, { "epoch": 18.62876254180602, "grad_norm": 0.6029159426689148, "learning_rate": 0.0001381090100111235, "loss": 3.0303, "step": 1397 }, { "epoch": 18.642140468227424, "grad_norm": 0.6479122638702393, "learning_rate": 0.00013806451612903225, "loss": 3.1782, "step": 1398 }, { "epoch": 18.65551839464883, "grad_norm": 0.6547753810882568, "learning_rate": 0.00013802002224694106, "loss": 3.0594, "step": 1399 }, { "epoch": 18.668896321070235, "grad_norm": 0.6506614089012146, "learning_rate": 0.00013797552836484984, "loss": 3.0955, "step": 1400 }, { "epoch": 18.682274247491637, "grad_norm": 0.6073411107063293, "learning_rate": 0.00013793103448275863, "loss": 2.7452, "step": 1401 }, { "epoch": 18.695652173913043, "grad_norm": 0.6307429075241089, "learning_rate": 0.0001378865406006674, "loss": 2.8329, "step": 1402 }, { "epoch": 18.70903010033445, "grad_norm": 0.7205286026000977, "learning_rate": 0.00013784204671857622, "loss": 2.8228, "step": 1403 }, { "epoch": 18.722408026755854, "grad_norm": 0.6154866814613342, "learning_rate": 0.000137797552836485, "loss": 2.7081, "step": 1404 }, { "epoch": 18.735785953177256, "grad_norm": 0.548799455165863, "learning_rate": 0.00013775305895439378, "loss": 2.9336, "step": 1405 }, { "epoch": 18.74916387959866, "grad_norm": 0.5723371505737305, "learning_rate": 0.00013770856507230257, "loss": 2.9661, "step": 1406 }, { "epoch": 18.762541806020067, "grad_norm": 0.5917731523513794, "learning_rate": 0.00013766407119021137, "loss": 2.8458, "step": 1407 }, { "epoch": 18.775919732441473, "grad_norm": 0.6160814166069031, "learning_rate": 0.00013761957730812013, "loss": 2.8453, "step": 1408 }, { "epoch": 18.789297658862875, "grad_norm": 0.5995500683784485, "learning_rate": 0.0001375750834260289, "loss": 3.1361, "step": 1409 }, { "epoch": 18.80267558528428, "grad_norm": 0.5574924945831299, "learning_rate": 0.00013753058954393772, "loss": 2.9808, "step": 1410 }, { "epoch": 18.816053511705686, "grad_norm": 0.6444510221481323, "learning_rate": 0.0001374860956618465, "loss": 2.9204, "step": 1411 }, { "epoch": 18.82943143812709, "grad_norm": 0.6153264045715332, "learning_rate": 0.0001374416017797553, "loss": 3.036, "step": 1412 }, { "epoch": 18.842809364548494, "grad_norm": 0.5951060652732849, "learning_rate": 0.00013739710789766407, "loss": 3.1012, "step": 1413 }, { "epoch": 18.8561872909699, "grad_norm": 0.5688861012458801, "learning_rate": 0.00013735261401557288, "loss": 3.1568, "step": 1414 }, { "epoch": 18.869565217391305, "grad_norm": 0.6256094574928284, "learning_rate": 0.00013730812013348163, "loss": 2.7721, "step": 1415 }, { "epoch": 18.88294314381271, "grad_norm": 0.646250307559967, "learning_rate": 0.00013726362625139044, "loss": 3.0091, "step": 1416 }, { "epoch": 18.896321070234112, "grad_norm": 0.6805879473686218, "learning_rate": 0.00013721913236929923, "loss": 2.8397, "step": 1417 }, { "epoch": 18.909698996655518, "grad_norm": 0.6164728999137878, "learning_rate": 0.000137174638487208, "loss": 2.8318, "step": 1418 }, { "epoch": 18.923076923076923, "grad_norm": 0.6298549771308899, "learning_rate": 0.0001371301446051168, "loss": 2.8493, "step": 1419 }, { "epoch": 18.93645484949833, "grad_norm": 0.5760109424591064, "learning_rate": 0.0001370856507230256, "loss": 3.1736, "step": 1420 }, { "epoch": 18.94983277591973, "grad_norm": 0.6126035451889038, "learning_rate": 0.00013704115684093438, "loss": 3.1313, "step": 1421 }, { "epoch": 18.963210702341136, "grad_norm": 0.6092283129692078, "learning_rate": 0.00013699666295884316, "loss": 3.0601, "step": 1422 }, { "epoch": 18.976588628762542, "grad_norm": 0.6506980657577515, "learning_rate": 0.00013695216907675195, "loss": 2.7787, "step": 1423 }, { "epoch": 18.989966555183948, "grad_norm": 0.6060482263565063, "learning_rate": 0.00013690767519466076, "loss": 3.062, "step": 1424 }, { "epoch": 19.0, "grad_norm": 0.7881284952163696, "learning_rate": 0.0001368631813125695, "loss": 2.9882, "step": 1425 }, { "epoch": 19.013377926421406, "grad_norm": 0.5459823608398438, "learning_rate": 0.00013681868743047832, "loss": 2.9312, "step": 1426 }, { "epoch": 19.02675585284281, "grad_norm": 0.71108078956604, "learning_rate": 0.0001367741935483871, "loss": 2.6003, "step": 1427 }, { "epoch": 19.040133779264213, "grad_norm": 0.6824572682380676, "learning_rate": 0.00013672969966629589, "loss": 2.8354, "step": 1428 }, { "epoch": 19.05351170568562, "grad_norm": 0.607400119304657, "learning_rate": 0.00013668520578420467, "loss": 2.95, "step": 1429 }, { "epoch": 19.066889632107024, "grad_norm": 0.5925526022911072, "learning_rate": 0.00013664071190211348, "loss": 2.7101, "step": 1430 }, { "epoch": 19.08026755852843, "grad_norm": 0.6208476424217224, "learning_rate": 0.00013659621802002226, "loss": 2.7759, "step": 1431 }, { "epoch": 19.093645484949832, "grad_norm": 0.6047778725624084, "learning_rate": 0.00013655172413793104, "loss": 3.0377, "step": 1432 }, { "epoch": 19.107023411371237, "grad_norm": 0.5979378819465637, "learning_rate": 0.00013650723025583983, "loss": 2.8671, "step": 1433 }, { "epoch": 19.120401337792643, "grad_norm": 0.6338753700256348, "learning_rate": 0.00013646273637374863, "loss": 2.7869, "step": 1434 }, { "epoch": 19.13377926421405, "grad_norm": 0.6263737678527832, "learning_rate": 0.0001364182424916574, "loss": 2.6591, "step": 1435 }, { "epoch": 19.14715719063545, "grad_norm": 0.646990180015564, "learning_rate": 0.0001363737486095662, "loss": 2.896, "step": 1436 }, { "epoch": 19.160535117056856, "grad_norm": 0.5691138505935669, "learning_rate": 0.00013632925472747498, "loss": 2.8774, "step": 1437 }, { "epoch": 19.17391304347826, "grad_norm": 0.5838844180107117, "learning_rate": 0.00013628476084538376, "loss": 2.9722, "step": 1438 }, { "epoch": 19.187290969899667, "grad_norm": 0.6154463887214661, "learning_rate": 0.00013624026696329255, "loss": 3.0143, "step": 1439 }, { "epoch": 19.20066889632107, "grad_norm": 0.6480549573898315, "learning_rate": 0.00013619577308120136, "loss": 2.8118, "step": 1440 }, { "epoch": 19.214046822742475, "grad_norm": 0.7675592303276062, "learning_rate": 0.00013615127919911014, "loss": 2.865, "step": 1441 }, { "epoch": 19.22742474916388, "grad_norm": 0.7231382131576538, "learning_rate": 0.00013610678531701892, "loss": 2.6942, "step": 1442 }, { "epoch": 19.240802675585286, "grad_norm": 0.6359425187110901, "learning_rate": 0.0001360622914349277, "loss": 2.5976, "step": 1443 }, { "epoch": 19.254180602006688, "grad_norm": 0.6486908793449402, "learning_rate": 0.0001360177975528365, "loss": 2.9494, "step": 1444 }, { "epoch": 19.267558528428093, "grad_norm": 0.5930846929550171, "learning_rate": 0.00013597330367074527, "loss": 3.1295, "step": 1445 }, { "epoch": 19.2809364548495, "grad_norm": 0.6988996267318726, "learning_rate": 0.00013592880978865408, "loss": 2.9627, "step": 1446 }, { "epoch": 19.294314381270905, "grad_norm": 0.5971337556838989, "learning_rate": 0.00013588431590656286, "loss": 2.9636, "step": 1447 }, { "epoch": 19.307692307692307, "grad_norm": 0.6479155421257019, "learning_rate": 0.00013583982202447164, "loss": 2.6575, "step": 1448 }, { "epoch": 19.321070234113712, "grad_norm": 0.6771759986877441, "learning_rate": 0.00013579532814238043, "loss": 3.0667, "step": 1449 }, { "epoch": 19.334448160535118, "grad_norm": 0.5785907506942749, "learning_rate": 0.0001357508342602892, "loss": 2.8839, "step": 1450 }, { "epoch": 19.347826086956523, "grad_norm": 0.6315357089042664, "learning_rate": 0.00013570634037819802, "loss": 2.6925, "step": 1451 }, { "epoch": 19.361204013377925, "grad_norm": 0.6047807931900024, "learning_rate": 0.00013566184649610677, "loss": 2.7948, "step": 1452 }, { "epoch": 19.37458193979933, "grad_norm": 0.6079906225204468, "learning_rate": 0.00013561735261401558, "loss": 3.1798, "step": 1453 }, { "epoch": 19.387959866220736, "grad_norm": 0.5820274353027344, "learning_rate": 0.00013557285873192436, "loss": 3.1159, "step": 1454 }, { "epoch": 19.401337792642142, "grad_norm": 0.562022864818573, "learning_rate": 0.00013552836484983315, "loss": 2.9869, "step": 1455 }, { "epoch": 19.414715719063544, "grad_norm": 0.6663182973861694, "learning_rate": 0.00013548387096774193, "loss": 2.8038, "step": 1456 }, { "epoch": 19.42809364548495, "grad_norm": 0.7092719078063965, "learning_rate": 0.00013543937708565074, "loss": 3.0159, "step": 1457 }, { "epoch": 19.441471571906355, "grad_norm": 0.5990714430809021, "learning_rate": 0.00013539488320355952, "loss": 2.8858, "step": 1458 }, { "epoch": 19.45484949832776, "grad_norm": 0.600847065448761, "learning_rate": 0.0001353503893214683, "loss": 2.9352, "step": 1459 }, { "epoch": 19.468227424749163, "grad_norm": 0.6393849849700928, "learning_rate": 0.00013530589543937709, "loss": 2.6911, "step": 1460 }, { "epoch": 19.48160535117057, "grad_norm": 0.6170421838760376, "learning_rate": 0.0001352614015572859, "loss": 2.749, "step": 1461 }, { "epoch": 19.494983277591974, "grad_norm": 0.53690105676651, "learning_rate": 0.00013521690767519465, "loss": 2.9516, "step": 1462 }, { "epoch": 19.50836120401338, "grad_norm": 0.5976501703262329, "learning_rate": 0.00013517241379310346, "loss": 2.9996, "step": 1463 }, { "epoch": 19.52173913043478, "grad_norm": 0.5892135500907898, "learning_rate": 0.00013512791991101224, "loss": 3.2039, "step": 1464 }, { "epoch": 19.535117056856187, "grad_norm": 0.65968918800354, "learning_rate": 0.00013508342602892102, "loss": 2.5897, "step": 1465 }, { "epoch": 19.548494983277592, "grad_norm": 0.591454267501831, "learning_rate": 0.0001350389321468298, "loss": 3.0104, "step": 1466 }, { "epoch": 19.561872909698998, "grad_norm": 0.6272184252738953, "learning_rate": 0.00013499443826473862, "loss": 2.7266, "step": 1467 }, { "epoch": 19.5752508361204, "grad_norm": 0.6142420172691345, "learning_rate": 0.0001349499443826474, "loss": 2.8358, "step": 1468 }, { "epoch": 19.588628762541806, "grad_norm": 0.6268441677093506, "learning_rate": 0.00013490545050055618, "loss": 3.0196, "step": 1469 }, { "epoch": 19.60200668896321, "grad_norm": 0.6512436866760254, "learning_rate": 0.00013486095661846496, "loss": 2.9558, "step": 1470 }, { "epoch": 19.615384615384617, "grad_norm": 0.5983771681785583, "learning_rate": 0.00013481646273637377, "loss": 2.9958, "step": 1471 }, { "epoch": 19.62876254180602, "grad_norm": 0.6994190216064453, "learning_rate": 0.00013477196885428253, "loss": 2.8019, "step": 1472 }, { "epoch": 19.642140468227424, "grad_norm": 0.5878567695617676, "learning_rate": 0.00013472747497219134, "loss": 2.7007, "step": 1473 }, { "epoch": 19.65551839464883, "grad_norm": 0.6140199303627014, "learning_rate": 0.00013468298109010012, "loss": 2.9212, "step": 1474 }, { "epoch": 19.668896321070235, "grad_norm": 0.648714542388916, "learning_rate": 0.0001346384872080089, "loss": 3.0053, "step": 1475 }, { "epoch": 19.682274247491637, "grad_norm": 0.5991750359535217, "learning_rate": 0.00013459399332591769, "loss": 2.9129, "step": 1476 }, { "epoch": 19.695652173913043, "grad_norm": 0.5538223385810852, "learning_rate": 0.0001345494994438265, "loss": 2.9097, "step": 1477 }, { "epoch": 19.70903010033445, "grad_norm": 0.5864409804344177, "learning_rate": 0.00013450500556173528, "loss": 2.9348, "step": 1478 }, { "epoch": 19.722408026755854, "grad_norm": 0.6004533767700195, "learning_rate": 0.00013446051167964406, "loss": 2.8845, "step": 1479 }, { "epoch": 19.735785953177256, "grad_norm": 0.6316581964492798, "learning_rate": 0.00013441601779755284, "loss": 2.8619, "step": 1480 }, { "epoch": 19.74916387959866, "grad_norm": 0.593138575553894, "learning_rate": 0.00013437152391546165, "loss": 3.041, "step": 1481 }, { "epoch": 19.762541806020067, "grad_norm": 0.5826678276062012, "learning_rate": 0.0001343270300333704, "loss": 2.7432, "step": 1482 }, { "epoch": 19.775919732441473, "grad_norm": 0.6341697573661804, "learning_rate": 0.00013428253615127922, "loss": 2.9755, "step": 1483 }, { "epoch": 19.789297658862875, "grad_norm": 0.5894901156425476, "learning_rate": 0.000134238042269188, "loss": 2.9754, "step": 1484 }, { "epoch": 19.80267558528428, "grad_norm": 0.5840655565261841, "learning_rate": 0.00013419354838709678, "loss": 2.9959, "step": 1485 }, { "epoch": 19.816053511705686, "grad_norm": 0.6006319522857666, "learning_rate": 0.00013414905450500556, "loss": 3.0196, "step": 1486 }, { "epoch": 19.82943143812709, "grad_norm": 0.5647453665733337, "learning_rate": 0.00013410456062291437, "loss": 2.9662, "step": 1487 }, { "epoch": 19.842809364548494, "grad_norm": 0.6583006978034973, "learning_rate": 0.00013406006674082316, "loss": 2.8719, "step": 1488 }, { "epoch": 19.8561872909699, "grad_norm": 0.6041131615638733, "learning_rate": 0.00013401557285873194, "loss": 3.0019, "step": 1489 }, { "epoch": 19.869565217391305, "grad_norm": 0.5524600148200989, "learning_rate": 0.00013397107897664072, "loss": 2.7409, "step": 1490 }, { "epoch": 19.88294314381271, "grad_norm": 0.6532869338989258, "learning_rate": 0.0001339265850945495, "loss": 2.9336, "step": 1491 }, { "epoch": 19.896321070234112, "grad_norm": 0.6459875106811523, "learning_rate": 0.00013388209121245828, "loss": 2.9572, "step": 1492 }, { "epoch": 19.909698996655518, "grad_norm": 0.6051417589187622, "learning_rate": 0.00013383759733036707, "loss": 2.864, "step": 1493 }, { "epoch": 19.923076923076923, "grad_norm": 0.6565695405006409, "learning_rate": 0.00013379310344827588, "loss": 2.865, "step": 1494 }, { "epoch": 19.93645484949833, "grad_norm": 0.6118014454841614, "learning_rate": 0.00013374860956618466, "loss": 2.7594, "step": 1495 }, { "epoch": 19.94983277591973, "grad_norm": 0.6801209449768066, "learning_rate": 0.00013370411568409344, "loss": 2.7532, "step": 1496 }, { "epoch": 19.963210702341136, "grad_norm": 0.5785267353057861, "learning_rate": 0.00013365962180200222, "loss": 3.0618, "step": 1497 }, { "epoch": 19.976588628762542, "grad_norm": 0.6344903707504272, "learning_rate": 0.00013361512791991103, "loss": 2.7908, "step": 1498 }, { "epoch": 19.989966555183948, "grad_norm": 0.6073011159896851, "learning_rate": 0.0001335706340378198, "loss": 3.0699, "step": 1499 }, { "epoch": 20.0, "grad_norm": 0.6989748477935791, "learning_rate": 0.0001335261401557286, "loss": 2.7932, "step": 1500 }, { "epoch": 20.013377926421406, "grad_norm": 0.5897710919380188, "learning_rate": 0.00013348164627363738, "loss": 3.0379, "step": 1501 }, { "epoch": 20.02675585284281, "grad_norm": 0.5845353603363037, "learning_rate": 0.00013343715239154616, "loss": 2.7123, "step": 1502 }, { "epoch": 20.040133779264213, "grad_norm": 0.5720913410186768, "learning_rate": 0.00013339265850945495, "loss": 3.0196, "step": 1503 }, { "epoch": 20.05351170568562, "grad_norm": 0.7616726756095886, "learning_rate": 0.00013334816462736375, "loss": 2.6719, "step": 1504 }, { "epoch": 20.066889632107024, "grad_norm": 0.6211048364639282, "learning_rate": 0.00013330367074527254, "loss": 2.8263, "step": 1505 }, { "epoch": 20.08026755852843, "grad_norm": 0.6016023755073547, "learning_rate": 0.00013325917686318132, "loss": 2.7226, "step": 1506 }, { "epoch": 20.093645484949832, "grad_norm": 0.6265879273414612, "learning_rate": 0.0001332146829810901, "loss": 2.8597, "step": 1507 }, { "epoch": 20.107023411371237, "grad_norm": 0.6129719614982605, "learning_rate": 0.0001331701890989989, "loss": 2.9449, "step": 1508 }, { "epoch": 20.120401337792643, "grad_norm": 0.6351513266563416, "learning_rate": 0.00013312569521690767, "loss": 2.9768, "step": 1509 }, { "epoch": 20.13377926421405, "grad_norm": 0.5772795677185059, "learning_rate": 0.00013308120133481648, "loss": 3.0424, "step": 1510 }, { "epoch": 20.14715719063545, "grad_norm": 0.600697934627533, "learning_rate": 0.00013303670745272526, "loss": 2.793, "step": 1511 }, { "epoch": 20.160535117056856, "grad_norm": 0.5418747663497925, "learning_rate": 0.00013299221357063404, "loss": 2.8706, "step": 1512 }, { "epoch": 20.17391304347826, "grad_norm": 0.5500038862228394, "learning_rate": 0.00013294771968854282, "loss": 2.7352, "step": 1513 }, { "epoch": 20.187290969899667, "grad_norm": 0.5478414297103882, "learning_rate": 0.00013290322580645163, "loss": 2.9113, "step": 1514 }, { "epoch": 20.20066889632107, "grad_norm": 0.5382576584815979, "learning_rate": 0.00013285873192436042, "loss": 2.8276, "step": 1515 }, { "epoch": 20.214046822742475, "grad_norm": 0.6003616452217102, "learning_rate": 0.0001328142380422692, "loss": 2.9112, "step": 1516 }, { "epoch": 20.22742474916388, "grad_norm": 0.6674323678016663, "learning_rate": 0.00013276974416017798, "loss": 2.7382, "step": 1517 }, { "epoch": 20.240802675585286, "grad_norm": 0.591314435005188, "learning_rate": 0.0001327252502780868, "loss": 2.8003, "step": 1518 }, { "epoch": 20.254180602006688, "grad_norm": 0.5268637537956238, "learning_rate": 0.00013268075639599554, "loss": 2.7134, "step": 1519 }, { "epoch": 20.267558528428093, "grad_norm": 0.5217694640159607, "learning_rate": 0.00013263626251390435, "loss": 2.6157, "step": 1520 }, { "epoch": 20.2809364548495, "grad_norm": 0.6171165704727173, "learning_rate": 0.00013259176863181314, "loss": 2.8738, "step": 1521 }, { "epoch": 20.294314381270905, "grad_norm": 0.5457054972648621, "learning_rate": 0.00013254727474972192, "loss": 2.6106, "step": 1522 }, { "epoch": 20.307692307692307, "grad_norm": 0.6596150994300842, "learning_rate": 0.0001325027808676307, "loss": 2.9974, "step": 1523 }, { "epoch": 20.321070234113712, "grad_norm": 0.7236288785934448, "learning_rate": 0.0001324582869855395, "loss": 2.8669, "step": 1524 }, { "epoch": 20.334448160535118, "grad_norm": 0.6390851736068726, "learning_rate": 0.0001324137931034483, "loss": 2.909, "step": 1525 }, { "epoch": 20.347826086956523, "grad_norm": 0.6010439991950989, "learning_rate": 0.00013236929922135708, "loss": 2.9567, "step": 1526 }, { "epoch": 20.361204013377925, "grad_norm": 0.5825399160385132, "learning_rate": 0.00013232480533926586, "loss": 2.7688, "step": 1527 }, { "epoch": 20.37458193979933, "grad_norm": 0.6000121831893921, "learning_rate": 0.00013228031145717467, "loss": 3.0515, "step": 1528 }, { "epoch": 20.387959866220736, "grad_norm": 0.5775492787361145, "learning_rate": 0.00013223581757508342, "loss": 2.8958, "step": 1529 }, { "epoch": 20.401337792642142, "grad_norm": 0.6193161010742188, "learning_rate": 0.00013219132369299223, "loss": 2.9648, "step": 1530 }, { "epoch": 20.414715719063544, "grad_norm": 0.7751132249832153, "learning_rate": 0.00013214682981090101, "loss": 2.677, "step": 1531 }, { "epoch": 20.42809364548495, "grad_norm": 0.6269053220748901, "learning_rate": 0.0001321023359288098, "loss": 2.9483, "step": 1532 }, { "epoch": 20.441471571906355, "grad_norm": 0.5793836116790771, "learning_rate": 0.00013205784204671858, "loss": 2.8025, "step": 1533 }, { "epoch": 20.45484949832776, "grad_norm": 0.5609649419784546, "learning_rate": 0.00013201334816462736, "loss": 2.8104, "step": 1534 }, { "epoch": 20.468227424749163, "grad_norm": 0.6055050492286682, "learning_rate": 0.00013196885428253617, "loss": 2.8649, "step": 1535 }, { "epoch": 20.48160535117057, "grad_norm": 0.5952653884887695, "learning_rate": 0.00013192436040044493, "loss": 2.6815, "step": 1536 }, { "epoch": 20.494983277591974, "grad_norm": 0.5716878175735474, "learning_rate": 0.00013187986651835374, "loss": 2.637, "step": 1537 }, { "epoch": 20.50836120401338, "grad_norm": 0.5684781074523926, "learning_rate": 0.00013183537263626252, "loss": 2.8601, "step": 1538 }, { "epoch": 20.52173913043478, "grad_norm": 0.5603588223457336, "learning_rate": 0.0001317908787541713, "loss": 2.9173, "step": 1539 }, { "epoch": 20.535117056856187, "grad_norm": 0.6022379398345947, "learning_rate": 0.00013174638487208008, "loss": 2.873, "step": 1540 }, { "epoch": 20.548494983277592, "grad_norm": 0.6124081611633301, "learning_rate": 0.0001317018909899889, "loss": 3.05, "step": 1541 }, { "epoch": 20.561872909698998, "grad_norm": 0.5928300023078918, "learning_rate": 0.00013165739710789768, "loss": 2.7411, "step": 1542 }, { "epoch": 20.5752508361204, "grad_norm": 0.5931242108345032, "learning_rate": 0.00013161290322580646, "loss": 3.0032, "step": 1543 }, { "epoch": 20.588628762541806, "grad_norm": 0.7123291492462158, "learning_rate": 0.00013156840934371524, "loss": 2.6485, "step": 1544 }, { "epoch": 20.60200668896321, "grad_norm": 0.565980851650238, "learning_rate": 0.00013152391546162405, "loss": 2.7143, "step": 1545 }, { "epoch": 20.615384615384617, "grad_norm": 0.6504361033439636, "learning_rate": 0.0001314794215795328, "loss": 2.6285, "step": 1546 }, { "epoch": 20.62876254180602, "grad_norm": 0.6558433175086975, "learning_rate": 0.00013143492769744161, "loss": 3.0403, "step": 1547 }, { "epoch": 20.642140468227424, "grad_norm": 0.6159015893936157, "learning_rate": 0.0001313904338153504, "loss": 2.8836, "step": 1548 }, { "epoch": 20.65551839464883, "grad_norm": 0.6061471104621887, "learning_rate": 0.00013134593993325918, "loss": 2.9076, "step": 1549 }, { "epoch": 20.668896321070235, "grad_norm": 0.6891756057739258, "learning_rate": 0.00013130144605116796, "loss": 3.0326, "step": 1550 }, { "epoch": 20.682274247491637, "grad_norm": 0.5920016765594482, "learning_rate": 0.00013125695216907677, "loss": 2.949, "step": 1551 }, { "epoch": 20.695652173913043, "grad_norm": 0.7947055697441101, "learning_rate": 0.00013121245828698555, "loss": 2.7796, "step": 1552 }, { "epoch": 20.70903010033445, "grad_norm": 0.6409865021705627, "learning_rate": 0.00013116796440489434, "loss": 2.9103, "step": 1553 }, { "epoch": 20.722408026755854, "grad_norm": 0.5864197611808777, "learning_rate": 0.00013112347052280312, "loss": 3.0493, "step": 1554 }, { "epoch": 20.735785953177256, "grad_norm": 0.6567059755325317, "learning_rate": 0.00013107897664071193, "loss": 2.8359, "step": 1555 }, { "epoch": 20.74916387959866, "grad_norm": 0.5808839201927185, "learning_rate": 0.00013103448275862068, "loss": 2.9595, "step": 1556 }, { "epoch": 20.762541806020067, "grad_norm": 0.6176792979240417, "learning_rate": 0.0001309899888765295, "loss": 2.6975, "step": 1557 }, { "epoch": 20.775919732441473, "grad_norm": 0.5763049721717834, "learning_rate": 0.00013094549499443827, "loss": 2.9752, "step": 1558 }, { "epoch": 20.789297658862875, "grad_norm": 0.5877807140350342, "learning_rate": 0.00013090100111234706, "loss": 2.7214, "step": 1559 }, { "epoch": 20.80267558528428, "grad_norm": 0.6624999642372131, "learning_rate": 0.00013085650723025584, "loss": 2.8642, "step": 1560 }, { "epoch": 20.816053511705686, "grad_norm": 0.7411154508590698, "learning_rate": 0.00013081201334816465, "loss": 2.7526, "step": 1561 }, { "epoch": 20.82943143812709, "grad_norm": 0.6246466636657715, "learning_rate": 0.00013076751946607343, "loss": 3.0441, "step": 1562 }, { "epoch": 20.842809364548494, "grad_norm": 0.5515304803848267, "learning_rate": 0.00013072302558398221, "loss": 2.7211, "step": 1563 }, { "epoch": 20.8561872909699, "grad_norm": 0.5537006258964539, "learning_rate": 0.000130678531701891, "loss": 2.9619, "step": 1564 }, { "epoch": 20.869565217391305, "grad_norm": 0.6326210498809814, "learning_rate": 0.0001306340378197998, "loss": 3.2433, "step": 1565 }, { "epoch": 20.88294314381271, "grad_norm": 0.6463941335678101, "learning_rate": 0.00013058954393770856, "loss": 2.7263, "step": 1566 }, { "epoch": 20.896321070234112, "grad_norm": 0.6212389469146729, "learning_rate": 0.00013054505005561737, "loss": 2.9775, "step": 1567 }, { "epoch": 20.909698996655518, "grad_norm": 0.5878139734268188, "learning_rate": 0.00013050055617352615, "loss": 2.7427, "step": 1568 }, { "epoch": 20.923076923076923, "grad_norm": 0.5821187496185303, "learning_rate": 0.00013045606229143494, "loss": 2.9058, "step": 1569 }, { "epoch": 20.93645484949833, "grad_norm": 0.6192734837532043, "learning_rate": 0.00013041156840934372, "loss": 2.9744, "step": 1570 }, { "epoch": 20.94983277591973, "grad_norm": 0.5540494322776794, "learning_rate": 0.00013036707452725253, "loss": 2.8299, "step": 1571 }, { "epoch": 20.963210702341136, "grad_norm": 0.6620298624038696, "learning_rate": 0.0001303225806451613, "loss": 2.8291, "step": 1572 }, { "epoch": 20.976588628762542, "grad_norm": 0.7161823511123657, "learning_rate": 0.0001302780867630701, "loss": 2.9829, "step": 1573 }, { "epoch": 20.989966555183948, "grad_norm": 1.1449205875396729, "learning_rate": 0.00013023359288097887, "loss": 2.8807, "step": 1574 }, { "epoch": 21.0, "grad_norm": 0.6721773743629456, "learning_rate": 0.00013018909899888766, "loss": 2.8398, "step": 1575 }, { "epoch": 21.013377926421406, "grad_norm": 0.5506662130355835, "learning_rate": 0.00013014460511679644, "loss": 2.6858, "step": 1576 }, { "epoch": 21.02675585284281, "grad_norm": 0.5651558041572571, "learning_rate": 0.00013010011123470522, "loss": 2.8692, "step": 1577 }, { "epoch": 21.040133779264213, "grad_norm": 0.5379538536071777, "learning_rate": 0.00013005561735261403, "loss": 2.8221, "step": 1578 }, { "epoch": 21.05351170568562, "grad_norm": 0.5756275653839111, "learning_rate": 0.0001300111234705228, "loss": 2.5469, "step": 1579 }, { "epoch": 21.066889632107024, "grad_norm": 0.672498881816864, "learning_rate": 0.0001299666295884316, "loss": 2.825, "step": 1580 }, { "epoch": 21.08026755852843, "grad_norm": 0.5217440128326416, "learning_rate": 0.00012992213570634038, "loss": 2.732, "step": 1581 }, { "epoch": 21.093645484949832, "grad_norm": 0.6326786875724792, "learning_rate": 0.0001298776418242492, "loss": 2.6537, "step": 1582 }, { "epoch": 21.107023411371237, "grad_norm": 0.6899091005325317, "learning_rate": 0.00012983314794215794, "loss": 2.7133, "step": 1583 }, { "epoch": 21.120401337792643, "grad_norm": 0.604834794998169, "learning_rate": 0.00012978865406006675, "loss": 2.9341, "step": 1584 }, { "epoch": 21.13377926421405, "grad_norm": 0.7540157437324524, "learning_rate": 0.00012974416017797553, "loss": 2.8224, "step": 1585 }, { "epoch": 21.14715719063545, "grad_norm": 0.6161774396896362, "learning_rate": 0.00012969966629588432, "loss": 2.9968, "step": 1586 }, { "epoch": 21.160535117056856, "grad_norm": 0.618781328201294, "learning_rate": 0.0001296551724137931, "loss": 2.8074, "step": 1587 }, { "epoch": 21.17391304347826, "grad_norm": 0.6708270907402039, "learning_rate": 0.0001296106785317019, "loss": 2.9327, "step": 1588 }, { "epoch": 21.187290969899667, "grad_norm": 0.6362271904945374, "learning_rate": 0.0001295661846496107, "loss": 2.46, "step": 1589 }, { "epoch": 21.20066889632107, "grad_norm": 0.6039800047874451, "learning_rate": 0.00012952169076751947, "loss": 2.7728, "step": 1590 }, { "epoch": 21.214046822742475, "grad_norm": 0.6119550466537476, "learning_rate": 0.00012947719688542826, "loss": 2.7123, "step": 1591 }, { "epoch": 21.22742474916388, "grad_norm": 0.8122309446334839, "learning_rate": 0.00012943270300333707, "loss": 2.7424, "step": 1592 }, { "epoch": 21.240802675585286, "grad_norm": 0.5658085346221924, "learning_rate": 0.00012938820912124582, "loss": 2.4553, "step": 1593 }, { "epoch": 21.254180602006688, "grad_norm": 0.7365813851356506, "learning_rate": 0.00012934371523915463, "loss": 2.9437, "step": 1594 }, { "epoch": 21.267558528428093, "grad_norm": 0.5746303796768188, "learning_rate": 0.0001292992213570634, "loss": 2.8262, "step": 1595 }, { "epoch": 21.2809364548495, "grad_norm": 0.5708132386207581, "learning_rate": 0.0001292547274749722, "loss": 2.9298, "step": 1596 }, { "epoch": 21.294314381270905, "grad_norm": 0.6003934741020203, "learning_rate": 0.00012921023359288098, "loss": 2.7943, "step": 1597 }, { "epoch": 21.307692307692307, "grad_norm": 0.5722339749336243, "learning_rate": 0.0001291657397107898, "loss": 3.0547, "step": 1598 }, { "epoch": 21.321070234113712, "grad_norm": 0.6287402510643005, "learning_rate": 0.00012912124582869857, "loss": 2.8814, "step": 1599 }, { "epoch": 21.334448160535118, "grad_norm": 0.6594595313072205, "learning_rate": 0.00012907675194660735, "loss": 2.7927, "step": 1600 }, { "epoch": 21.347826086956523, "grad_norm": 0.6678686141967773, "learning_rate": 0.00012903225806451613, "loss": 2.7981, "step": 1601 }, { "epoch": 21.361204013377925, "grad_norm": 0.6103408336639404, "learning_rate": 0.00012898776418242494, "loss": 2.9009, "step": 1602 }, { "epoch": 21.37458193979933, "grad_norm": 0.5791922807693481, "learning_rate": 0.0001289432703003337, "loss": 2.6689, "step": 1603 }, { "epoch": 21.387959866220736, "grad_norm": 0.5776186585426331, "learning_rate": 0.0001288987764182425, "loss": 2.903, "step": 1604 }, { "epoch": 21.401337792642142, "grad_norm": 0.5746363401412964, "learning_rate": 0.0001288542825361513, "loss": 2.832, "step": 1605 }, { "epoch": 21.414715719063544, "grad_norm": 0.5495322346687317, "learning_rate": 0.00012880978865406007, "loss": 2.7305, "step": 1606 }, { "epoch": 21.42809364548495, "grad_norm": 0.6944610476493835, "learning_rate": 0.00012876529477196886, "loss": 2.7534, "step": 1607 }, { "epoch": 21.441471571906355, "grad_norm": 0.6640026569366455, "learning_rate": 0.00012872080088987767, "loss": 2.814, "step": 1608 }, { "epoch": 21.45484949832776, "grad_norm": 0.7469935417175293, "learning_rate": 0.00012867630700778642, "loss": 2.8523, "step": 1609 }, { "epoch": 21.468227424749163, "grad_norm": 0.600739061832428, "learning_rate": 0.00012863181312569523, "loss": 3.0074, "step": 1610 }, { "epoch": 21.48160535117057, "grad_norm": 0.6701086759567261, "learning_rate": 0.000128587319243604, "loss": 2.6693, "step": 1611 }, { "epoch": 21.494983277591974, "grad_norm": 0.6365100145339966, "learning_rate": 0.00012854282536151282, "loss": 2.9558, "step": 1612 }, { "epoch": 21.50836120401338, "grad_norm": 0.6170171499252319, "learning_rate": 0.00012849833147942158, "loss": 2.605, "step": 1613 }, { "epoch": 21.52173913043478, "grad_norm": 0.5899895429611206, "learning_rate": 0.0001284538375973304, "loss": 2.7816, "step": 1614 }, { "epoch": 21.535117056856187, "grad_norm": 0.6815734505653381, "learning_rate": 0.00012840934371523917, "loss": 2.6663, "step": 1615 }, { "epoch": 21.548494983277592, "grad_norm": 0.6620442271232605, "learning_rate": 0.00012836484983314792, "loss": 2.9333, "step": 1616 }, { "epoch": 21.561872909698998, "grad_norm": 0.6552561521530151, "learning_rate": 0.00012832035595105673, "loss": 2.6833, "step": 1617 }, { "epoch": 21.5752508361204, "grad_norm": 0.6288586854934692, "learning_rate": 0.00012827586206896552, "loss": 2.959, "step": 1618 }, { "epoch": 21.588628762541806, "grad_norm": 0.6969814300537109, "learning_rate": 0.0001282313681868743, "loss": 2.7129, "step": 1619 }, { "epoch": 21.60200668896321, "grad_norm": 0.5918965339660645, "learning_rate": 0.00012818687430478308, "loss": 2.9057, "step": 1620 }, { "epoch": 21.615384615384617, "grad_norm": 0.5739157199859619, "learning_rate": 0.0001281423804226919, "loss": 2.9275, "step": 1621 }, { "epoch": 21.62876254180602, "grad_norm": 0.6649355888366699, "learning_rate": 0.00012809788654060067, "loss": 2.7629, "step": 1622 }, { "epoch": 21.642140468227424, "grad_norm": 0.6449686884880066, "learning_rate": 0.00012805339265850946, "loss": 2.9173, "step": 1623 }, { "epoch": 21.65551839464883, "grad_norm": 0.5940249562263489, "learning_rate": 0.00012800889877641824, "loss": 2.875, "step": 1624 }, { "epoch": 21.668896321070235, "grad_norm": 0.8104184865951538, "learning_rate": 0.00012796440489432705, "loss": 2.7381, "step": 1625 }, { "epoch": 21.682274247491637, "grad_norm": 0.6892338395118713, "learning_rate": 0.0001279199110122358, "loss": 2.7516, "step": 1626 }, { "epoch": 21.695652173913043, "grad_norm": 0.5888476967811584, "learning_rate": 0.0001278754171301446, "loss": 2.9234, "step": 1627 }, { "epoch": 21.70903010033445, "grad_norm": 0.6148846745491028, "learning_rate": 0.0001278309232480534, "loss": 3.0824, "step": 1628 }, { "epoch": 21.722408026755854, "grad_norm": 0.6559250354766846, "learning_rate": 0.00012778642936596218, "loss": 3.0316, "step": 1629 }, { "epoch": 21.735785953177256, "grad_norm": 0.5679346919059753, "learning_rate": 0.00012774193548387096, "loss": 3.0012, "step": 1630 }, { "epoch": 21.74916387959866, "grad_norm": 0.6776624917984009, "learning_rate": 0.00012769744160177977, "loss": 2.8331, "step": 1631 }, { "epoch": 21.762541806020067, "grad_norm": 0.6982457637786865, "learning_rate": 0.00012765294771968855, "loss": 2.8175, "step": 1632 }, { "epoch": 21.775919732441473, "grad_norm": 0.6834619045257568, "learning_rate": 0.00012760845383759733, "loss": 3.1136, "step": 1633 }, { "epoch": 21.789297658862875, "grad_norm": 0.6460130214691162, "learning_rate": 0.00012756395995550612, "loss": 2.7737, "step": 1634 }, { "epoch": 21.80267558528428, "grad_norm": 0.6818533539772034, "learning_rate": 0.00012751946607341493, "loss": 2.5013, "step": 1635 }, { "epoch": 21.816053511705686, "grad_norm": 0.7875826954841614, "learning_rate": 0.00012747497219132368, "loss": 2.9702, "step": 1636 }, { "epoch": 21.82943143812709, "grad_norm": 0.5845924019813538, "learning_rate": 0.0001274304783092325, "loss": 2.8592, "step": 1637 }, { "epoch": 21.842809364548494, "grad_norm": 0.6136084794998169, "learning_rate": 0.00012738598442714127, "loss": 2.8456, "step": 1638 }, { "epoch": 21.8561872909699, "grad_norm": 0.7345831394195557, "learning_rate": 0.00012734149054505005, "loss": 2.9846, "step": 1639 }, { "epoch": 21.869565217391305, "grad_norm": 0.5709069967269897, "learning_rate": 0.00012729699666295884, "loss": 2.9736, "step": 1640 }, { "epoch": 21.88294314381271, "grad_norm": 0.7338367104530334, "learning_rate": 0.00012725250278086765, "loss": 2.8215, "step": 1641 }, { "epoch": 21.896321070234112, "grad_norm": 0.6029645800590515, "learning_rate": 0.00012720800889877643, "loss": 2.7419, "step": 1642 }, { "epoch": 21.909698996655518, "grad_norm": 0.7492797374725342, "learning_rate": 0.0001271635150166852, "loss": 2.8256, "step": 1643 }, { "epoch": 21.923076923076923, "grad_norm": 0.6877094507217407, "learning_rate": 0.000127119021134594, "loss": 2.8241, "step": 1644 }, { "epoch": 21.93645484949833, "grad_norm": 0.6263371109962463, "learning_rate": 0.0001270745272525028, "loss": 2.8459, "step": 1645 }, { "epoch": 21.94983277591973, "grad_norm": 0.5821470618247986, "learning_rate": 0.00012703003337041156, "loss": 3.0513, "step": 1646 }, { "epoch": 21.963210702341136, "grad_norm": 0.5494033098220825, "learning_rate": 0.00012698553948832037, "loss": 3.0951, "step": 1647 }, { "epoch": 21.976588628762542, "grad_norm": 0.5838212370872498, "learning_rate": 0.00012694104560622915, "loss": 3.0089, "step": 1648 }, { "epoch": 21.989966555183948, "grad_norm": 0.7115112543106079, "learning_rate": 0.00012689655172413793, "loss": 2.9137, "step": 1649 }, { "epoch": 22.0, "grad_norm": 0.7208350896835327, "learning_rate": 0.00012685205784204672, "loss": 2.8793, "step": 1650 }, { "epoch": 22.013377926421406, "grad_norm": 0.912466824054718, "learning_rate": 0.00012680756395995552, "loss": 2.7563, "step": 1651 }, { "epoch": 22.02675585284281, "grad_norm": 0.7778460383415222, "learning_rate": 0.0001267630700778643, "loss": 2.7859, "step": 1652 }, { "epoch": 22.040133779264213, "grad_norm": 0.6658245921134949, "learning_rate": 0.0001267185761957731, "loss": 2.8353, "step": 1653 }, { "epoch": 22.05351170568562, "grad_norm": 0.5711331963539124, "learning_rate": 0.00012667408231368187, "loss": 2.7731, "step": 1654 }, { "epoch": 22.066889632107024, "grad_norm": 0.6227294206619263, "learning_rate": 0.00012662958843159068, "loss": 2.6841, "step": 1655 }, { "epoch": 22.08026755852843, "grad_norm": 0.6221343874931335, "learning_rate": 0.00012658509454949944, "loss": 2.6303, "step": 1656 }, { "epoch": 22.093645484949832, "grad_norm": 0.6461536288261414, "learning_rate": 0.00012654060066740822, "loss": 2.7591, "step": 1657 }, { "epoch": 22.107023411371237, "grad_norm": 0.9253934025764465, "learning_rate": 0.00012649610678531703, "loss": 2.7619, "step": 1658 }, { "epoch": 22.120401337792643, "grad_norm": 1.0310887098312378, "learning_rate": 0.0001264516129032258, "loss": 2.7614, "step": 1659 }, { "epoch": 22.13377926421405, "grad_norm": 0.682077944278717, "learning_rate": 0.0001264071190211346, "loss": 2.9013, "step": 1660 }, { "epoch": 22.14715719063545, "grad_norm": 0.6438060998916626, "learning_rate": 0.00012636262513904338, "loss": 2.8715, "step": 1661 }, { "epoch": 22.160535117056856, "grad_norm": 0.5626085996627808, "learning_rate": 0.00012631813125695219, "loss": 2.7459, "step": 1662 }, { "epoch": 22.17391304347826, "grad_norm": 0.6211214065551758, "learning_rate": 0.00012627363737486094, "loss": 2.9223, "step": 1663 }, { "epoch": 22.187290969899667, "grad_norm": 0.5843247175216675, "learning_rate": 0.00012622914349276975, "loss": 3.0176, "step": 1664 }, { "epoch": 22.20066889632107, "grad_norm": 0.6780831813812256, "learning_rate": 0.00012618464961067853, "loss": 3.0641, "step": 1665 }, { "epoch": 22.214046822742475, "grad_norm": 0.6507664918899536, "learning_rate": 0.00012614015572858731, "loss": 2.9043, "step": 1666 }, { "epoch": 22.22742474916388, "grad_norm": 0.6472734212875366, "learning_rate": 0.0001260956618464961, "loss": 2.678, "step": 1667 }, { "epoch": 22.240802675585286, "grad_norm": 0.5920576453208923, "learning_rate": 0.0001260511679644049, "loss": 2.9801, "step": 1668 }, { "epoch": 22.254180602006688, "grad_norm": 0.5785727500915527, "learning_rate": 0.0001260066740823137, "loss": 2.6949, "step": 1669 }, { "epoch": 22.267558528428093, "grad_norm": 0.8260320425033569, "learning_rate": 0.00012596218020022247, "loss": 2.939, "step": 1670 }, { "epoch": 22.2809364548495, "grad_norm": 0.6697145700454712, "learning_rate": 0.00012591768631813125, "loss": 2.6997, "step": 1671 }, { "epoch": 22.294314381270905, "grad_norm": 0.6255594491958618, "learning_rate": 0.00012587319243604006, "loss": 2.7875, "step": 1672 }, { "epoch": 22.307692307692307, "grad_norm": 0.707083523273468, "learning_rate": 0.00012582869855394882, "loss": 2.8843, "step": 1673 }, { "epoch": 22.321070234113712, "grad_norm": 0.5888648629188538, "learning_rate": 0.00012578420467185763, "loss": 2.8862, "step": 1674 }, { "epoch": 22.334448160535118, "grad_norm": 0.6249898672103882, "learning_rate": 0.0001257397107897664, "loss": 2.6108, "step": 1675 }, { "epoch": 22.347826086956523, "grad_norm": 0.6932422518730164, "learning_rate": 0.0001256952169076752, "loss": 2.8393, "step": 1676 }, { "epoch": 22.361204013377925, "grad_norm": 0.6037236452102661, "learning_rate": 0.00012565072302558398, "loss": 2.6517, "step": 1677 }, { "epoch": 22.37458193979933, "grad_norm": 0.5549351572990417, "learning_rate": 0.00012560622914349278, "loss": 2.7725, "step": 1678 }, { "epoch": 22.387959866220736, "grad_norm": 0.6309815645217896, "learning_rate": 0.00012556173526140157, "loss": 2.7539, "step": 1679 }, { "epoch": 22.401337792642142, "grad_norm": 0.6618108153343201, "learning_rate": 0.00012551724137931035, "loss": 2.9342, "step": 1680 }, { "epoch": 22.414715719063544, "grad_norm": 0.7427678108215332, "learning_rate": 0.00012547274749721913, "loss": 2.9707, "step": 1681 }, { "epoch": 22.42809364548495, "grad_norm": 0.6226203441619873, "learning_rate": 0.00012542825361512794, "loss": 2.728, "step": 1682 }, { "epoch": 22.441471571906355, "grad_norm": 0.6432873606681824, "learning_rate": 0.0001253837597330367, "loss": 2.6271, "step": 1683 }, { "epoch": 22.45484949832776, "grad_norm": 0.618368923664093, "learning_rate": 0.0001253392658509455, "loss": 2.7364, "step": 1684 }, { "epoch": 22.468227424749163, "grad_norm": 0.601243257522583, "learning_rate": 0.0001252947719688543, "loss": 2.7071, "step": 1685 }, { "epoch": 22.48160535117057, "grad_norm": 0.5549631714820862, "learning_rate": 0.00012525027808676307, "loss": 2.7703, "step": 1686 }, { "epoch": 22.494983277591974, "grad_norm": 0.8125869035720825, "learning_rate": 0.00012520578420467185, "loss": 2.6798, "step": 1687 }, { "epoch": 22.50836120401338, "grad_norm": 0.5712392330169678, "learning_rate": 0.00012516129032258066, "loss": 2.7694, "step": 1688 }, { "epoch": 22.52173913043478, "grad_norm": 0.5955860614776611, "learning_rate": 0.00012511679644048945, "loss": 2.8128, "step": 1689 }, { "epoch": 22.535117056856187, "grad_norm": 0.6090961694717407, "learning_rate": 0.00012507230255839823, "loss": 2.9096, "step": 1690 }, { "epoch": 22.548494983277592, "grad_norm": 0.6430270671844482, "learning_rate": 0.000125027808676307, "loss": 2.8046, "step": 1691 }, { "epoch": 22.561872909698998, "grad_norm": 0.6325036883354187, "learning_rate": 0.00012498331479421582, "loss": 2.9619, "step": 1692 }, { "epoch": 22.5752508361204, "grad_norm": 0.5997435450553894, "learning_rate": 0.00012493882091212458, "loss": 2.9809, "step": 1693 }, { "epoch": 22.588628762541806, "grad_norm": 0.6140502095222473, "learning_rate": 0.00012489432703003338, "loss": 2.9622, "step": 1694 }, { "epoch": 22.60200668896321, "grad_norm": 0.5908461809158325, "learning_rate": 0.00012484983314794217, "loss": 2.8282, "step": 1695 }, { "epoch": 22.615384615384617, "grad_norm": 0.5987250208854675, "learning_rate": 0.00012480533926585095, "loss": 2.8239, "step": 1696 }, { "epoch": 22.62876254180602, "grad_norm": 0.6643062233924866, "learning_rate": 0.00012476084538375973, "loss": 2.8677, "step": 1697 }, { "epoch": 22.642140468227424, "grad_norm": 0.640986442565918, "learning_rate": 0.00012471635150166851, "loss": 2.6359, "step": 1698 }, { "epoch": 22.65551839464883, "grad_norm": 0.6315325498580933, "learning_rate": 0.00012467185761957732, "loss": 2.7919, "step": 1699 }, { "epoch": 22.668896321070235, "grad_norm": 0.6384916305541992, "learning_rate": 0.00012462736373748608, "loss": 3.0353, "step": 1700 }, { "epoch": 22.682274247491637, "grad_norm": 0.6934499144554138, "learning_rate": 0.0001245828698553949, "loss": 2.8694, "step": 1701 }, { "epoch": 22.695652173913043, "grad_norm": 0.6261676549911499, "learning_rate": 0.00012453837597330367, "loss": 2.6893, "step": 1702 }, { "epoch": 22.70903010033445, "grad_norm": 0.6069616079330444, "learning_rate": 0.00012449388209121245, "loss": 2.9214, "step": 1703 }, { "epoch": 22.722408026755854, "grad_norm": 0.6354973316192627, "learning_rate": 0.00012444938820912124, "loss": 2.754, "step": 1704 }, { "epoch": 22.735785953177256, "grad_norm": 0.7385901808738708, "learning_rate": 0.00012440489432703004, "loss": 2.7633, "step": 1705 }, { "epoch": 22.74916387959866, "grad_norm": 0.680008053779602, "learning_rate": 0.00012436040044493883, "loss": 2.851, "step": 1706 }, { "epoch": 22.762541806020067, "grad_norm": 0.5894846320152283, "learning_rate": 0.0001243159065628476, "loss": 2.8797, "step": 1707 }, { "epoch": 22.775919732441473, "grad_norm": 0.5705847144126892, "learning_rate": 0.0001242714126807564, "loss": 2.7471, "step": 1708 }, { "epoch": 22.789297658862875, "grad_norm": 0.6021112203598022, "learning_rate": 0.0001242269187986652, "loss": 3.0716, "step": 1709 }, { "epoch": 22.80267558528428, "grad_norm": 0.63507080078125, "learning_rate": 0.00012418242491657396, "loss": 2.8873, "step": 1710 }, { "epoch": 22.816053511705686, "grad_norm": 0.6159088015556335, "learning_rate": 0.00012413793103448277, "loss": 2.5226, "step": 1711 }, { "epoch": 22.82943143812709, "grad_norm": 0.6099200248718262, "learning_rate": 0.00012409343715239155, "loss": 2.9937, "step": 1712 }, { "epoch": 22.842809364548494, "grad_norm": 0.5942363142967224, "learning_rate": 0.00012404894327030033, "loss": 2.5113, "step": 1713 }, { "epoch": 22.8561872909699, "grad_norm": 0.6209394335746765, "learning_rate": 0.00012400444938820911, "loss": 3.0766, "step": 1714 }, { "epoch": 22.869565217391305, "grad_norm": 0.6850425601005554, "learning_rate": 0.00012395995550611792, "loss": 2.8807, "step": 1715 }, { "epoch": 22.88294314381271, "grad_norm": 0.5698277354240417, "learning_rate": 0.0001239154616240267, "loss": 2.6821, "step": 1716 }, { "epoch": 22.896321070234112, "grad_norm": 0.7140303254127502, "learning_rate": 0.0001238709677419355, "loss": 2.7047, "step": 1717 }, { "epoch": 22.909698996655518, "grad_norm": 0.6129325032234192, "learning_rate": 0.00012382647385984427, "loss": 2.705, "step": 1718 }, { "epoch": 22.923076923076923, "grad_norm": 0.6409560441970825, "learning_rate": 0.00012378197997775308, "loss": 2.7126, "step": 1719 }, { "epoch": 22.93645484949833, "grad_norm": 0.5590237975120544, "learning_rate": 0.00012373748609566184, "loss": 2.7945, "step": 1720 }, { "epoch": 22.94983277591973, "grad_norm": 0.5763843655586243, "learning_rate": 0.00012369299221357064, "loss": 2.6802, "step": 1721 }, { "epoch": 22.963210702341136, "grad_norm": 0.6224062442779541, "learning_rate": 0.00012364849833147943, "loss": 2.7035, "step": 1722 }, { "epoch": 22.976588628762542, "grad_norm": 0.5638879537582397, "learning_rate": 0.0001236040044493882, "loss": 2.8259, "step": 1723 }, { "epoch": 22.989966555183948, "grad_norm": 0.6214390993118286, "learning_rate": 0.000123559510567297, "loss": 2.9997, "step": 1724 }, { "epoch": 23.0, "grad_norm": 0.6950054168701172, "learning_rate": 0.0001235150166852058, "loss": 2.7497, "step": 1725 }, { "epoch": 23.013377926421406, "grad_norm": 0.67231285572052, "learning_rate": 0.00012347052280311458, "loss": 2.859, "step": 1726 }, { "epoch": 23.02675585284281, "grad_norm": 0.6401329040527344, "learning_rate": 0.00012342602892102337, "loss": 2.7056, "step": 1727 }, { "epoch": 23.040133779264213, "grad_norm": 0.6234462261199951, "learning_rate": 0.00012338153503893215, "loss": 2.8271, "step": 1728 }, { "epoch": 23.05351170568562, "grad_norm": 0.6482179164886475, "learning_rate": 0.00012333704115684096, "loss": 2.898, "step": 1729 }, { "epoch": 23.066889632107024, "grad_norm": 0.6077515482902527, "learning_rate": 0.0001232925472747497, "loss": 2.6865, "step": 1730 }, { "epoch": 23.08026755852843, "grad_norm": 0.5866036415100098, "learning_rate": 0.00012324805339265852, "loss": 2.9001, "step": 1731 }, { "epoch": 23.093645484949832, "grad_norm": 0.6499370336532593, "learning_rate": 0.0001232035595105673, "loss": 2.8718, "step": 1732 }, { "epoch": 23.107023411371237, "grad_norm": 0.6188756227493286, "learning_rate": 0.0001231590656284761, "loss": 2.7928, "step": 1733 }, { "epoch": 23.120401337792643, "grad_norm": 0.5990676879882812, "learning_rate": 0.00012311457174638487, "loss": 2.7918, "step": 1734 }, { "epoch": 23.13377926421405, "grad_norm": 0.5592599511146545, "learning_rate": 0.00012307007786429368, "loss": 2.7948, "step": 1735 }, { "epoch": 23.14715719063545, "grad_norm": 0.5751471519470215, "learning_rate": 0.00012302558398220246, "loss": 2.6472, "step": 1736 }, { "epoch": 23.160535117056856, "grad_norm": 0.6300846338272095, "learning_rate": 0.00012298109010011124, "loss": 2.7491, "step": 1737 }, { "epoch": 23.17391304347826, "grad_norm": 0.5754296779632568, "learning_rate": 0.00012293659621802003, "loss": 2.7557, "step": 1738 }, { "epoch": 23.187290969899667, "grad_norm": 0.6483306884765625, "learning_rate": 0.0001228921023359288, "loss": 2.776, "step": 1739 }, { "epoch": 23.20066889632107, "grad_norm": 0.5819816589355469, "learning_rate": 0.0001228476084538376, "loss": 2.9192, "step": 1740 }, { "epoch": 23.214046822742475, "grad_norm": 0.6570101380348206, "learning_rate": 0.00012280311457174637, "loss": 2.8571, "step": 1741 }, { "epoch": 23.22742474916388, "grad_norm": 0.6435232162475586, "learning_rate": 0.00012275862068965518, "loss": 2.6134, "step": 1742 }, { "epoch": 23.240802675585286, "grad_norm": 0.5817757844924927, "learning_rate": 0.00012271412680756397, "loss": 2.8384, "step": 1743 }, { "epoch": 23.254180602006688, "grad_norm": 0.5564932823181152, "learning_rate": 0.00012266963292547275, "loss": 2.4606, "step": 1744 }, { "epoch": 23.267558528428093, "grad_norm": 0.6028578281402588, "learning_rate": 0.00012262513904338153, "loss": 2.824, "step": 1745 }, { "epoch": 23.2809364548495, "grad_norm": 0.5562644600868225, "learning_rate": 0.00012258064516129034, "loss": 2.8195, "step": 1746 }, { "epoch": 23.294314381270905, "grad_norm": 0.6838886141777039, "learning_rate": 0.0001225361512791991, "loss": 2.8453, "step": 1747 }, { "epoch": 23.307692307692307, "grad_norm": 0.6102856993675232, "learning_rate": 0.0001224916573971079, "loss": 2.8331, "step": 1748 }, { "epoch": 23.321070234113712, "grad_norm": 0.5895605087280273, "learning_rate": 0.0001224471635150167, "loss": 2.7479, "step": 1749 }, { "epoch": 23.334448160535118, "grad_norm": 0.7318586111068726, "learning_rate": 0.00012240266963292547, "loss": 2.7201, "step": 1750 }, { "epoch": 23.347826086956523, "grad_norm": 0.6560878157615662, "learning_rate": 0.00012235817575083425, "loss": 2.931, "step": 1751 }, { "epoch": 23.361204013377925, "grad_norm": 0.5879004001617432, "learning_rate": 0.00012231368186874306, "loss": 2.8607, "step": 1752 }, { "epoch": 23.37458193979933, "grad_norm": 0.5916743278503418, "learning_rate": 0.00012226918798665184, "loss": 2.6923, "step": 1753 }, { "epoch": 23.387959866220736, "grad_norm": 0.5569814443588257, "learning_rate": 0.00012222469410456063, "loss": 2.634, "step": 1754 }, { "epoch": 23.401337792642142, "grad_norm": 0.7285422682762146, "learning_rate": 0.0001221802002224694, "loss": 2.7135, "step": 1755 }, { "epoch": 23.414715719063544, "grad_norm": 0.7323710918426514, "learning_rate": 0.00012213570634037822, "loss": 2.6855, "step": 1756 }, { "epoch": 23.42809364548495, "grad_norm": 0.65497887134552, "learning_rate": 0.00012209121245828697, "loss": 2.8777, "step": 1757 }, { "epoch": 23.441471571906355, "grad_norm": 0.5919074416160583, "learning_rate": 0.00012204671857619578, "loss": 2.657, "step": 1758 }, { "epoch": 23.45484949832776, "grad_norm": 0.6749523282051086, "learning_rate": 0.00012200222469410457, "loss": 2.8142, "step": 1759 }, { "epoch": 23.468227424749163, "grad_norm": 0.6859252452850342, "learning_rate": 0.00012195773081201336, "loss": 2.8572, "step": 1760 }, { "epoch": 23.48160535117057, "grad_norm": 0.5956023931503296, "learning_rate": 0.00012191323692992213, "loss": 2.8385, "step": 1761 }, { "epoch": 23.494983277591974, "grad_norm": 0.684101939201355, "learning_rate": 0.00012186874304783094, "loss": 2.4779, "step": 1762 }, { "epoch": 23.50836120401338, "grad_norm": 0.726864755153656, "learning_rate": 0.00012182424916573971, "loss": 2.8523, "step": 1763 }, { "epoch": 23.52173913043478, "grad_norm": 0.5874570608139038, "learning_rate": 0.00012177975528364852, "loss": 2.7866, "step": 1764 }, { "epoch": 23.535117056856187, "grad_norm": 0.6077170968055725, "learning_rate": 0.00012173526140155729, "loss": 2.711, "step": 1765 }, { "epoch": 23.548494983277592, "grad_norm": 0.6854214668273926, "learning_rate": 0.00012169076751946608, "loss": 2.655, "step": 1766 }, { "epoch": 23.561872909698998, "grad_norm": 0.6409576535224915, "learning_rate": 0.00012164627363737486, "loss": 2.7309, "step": 1767 }, { "epoch": 23.5752508361204, "grad_norm": 0.6241947412490845, "learning_rate": 0.00012160177975528366, "loss": 2.67, "step": 1768 }, { "epoch": 23.588628762541806, "grad_norm": 0.5962851643562317, "learning_rate": 0.00012155728587319244, "loss": 2.6147, "step": 1769 }, { "epoch": 23.60200668896321, "grad_norm": 0.6183756589889526, "learning_rate": 0.00012151279199110124, "loss": 2.9772, "step": 1770 }, { "epoch": 23.615384615384617, "grad_norm": 0.6092197299003601, "learning_rate": 0.00012146829810901001, "loss": 2.7146, "step": 1771 }, { "epoch": 23.62876254180602, "grad_norm": 0.6660104990005493, "learning_rate": 0.00012142380422691882, "loss": 2.6339, "step": 1772 }, { "epoch": 23.642140468227424, "grad_norm": 0.9198834896087646, "learning_rate": 0.00012137931034482759, "loss": 2.7188, "step": 1773 }, { "epoch": 23.65551839464883, "grad_norm": 0.7053755521774292, "learning_rate": 0.0001213348164627364, "loss": 2.7484, "step": 1774 }, { "epoch": 23.668896321070235, "grad_norm": 0.7443736791610718, "learning_rate": 0.00012129032258064516, "loss": 2.6805, "step": 1775 }, { "epoch": 23.682274247491637, "grad_norm": 0.579801082611084, "learning_rate": 0.00012124582869855396, "loss": 2.6467, "step": 1776 }, { "epoch": 23.695652173913043, "grad_norm": 0.6014502644538879, "learning_rate": 0.00012120133481646274, "loss": 2.5816, "step": 1777 }, { "epoch": 23.70903010033445, "grad_norm": 0.6318315863609314, "learning_rate": 0.00012115684093437154, "loss": 2.8137, "step": 1778 }, { "epoch": 23.722408026755854, "grad_norm": 0.6172413229942322, "learning_rate": 0.00012111234705228032, "loss": 2.7789, "step": 1779 }, { "epoch": 23.735785953177256, "grad_norm": 0.6184534430503845, "learning_rate": 0.00012106785317018909, "loss": 2.6802, "step": 1780 }, { "epoch": 23.74916387959866, "grad_norm": 0.6380288600921631, "learning_rate": 0.00012102335928809789, "loss": 2.8755, "step": 1781 }, { "epoch": 23.762541806020067, "grad_norm": 0.5941389799118042, "learning_rate": 0.00012097886540600667, "loss": 2.945, "step": 1782 }, { "epoch": 23.775919732441473, "grad_norm": 0.6913108825683594, "learning_rate": 0.00012093437152391546, "loss": 2.7036, "step": 1783 }, { "epoch": 23.789297658862875, "grad_norm": 0.563119113445282, "learning_rate": 0.00012088987764182425, "loss": 2.6608, "step": 1784 }, { "epoch": 23.80267558528428, "grad_norm": 0.6387828588485718, "learning_rate": 0.00012084538375973304, "loss": 2.8933, "step": 1785 }, { "epoch": 23.816053511705686, "grad_norm": 0.7530612945556641, "learning_rate": 0.00012080088987764183, "loss": 2.8399, "step": 1786 }, { "epoch": 23.82943143812709, "grad_norm": 0.6401646733283997, "learning_rate": 0.00012075639599555062, "loss": 2.8757, "step": 1787 }, { "epoch": 23.842809364548494, "grad_norm": 0.7403398752212524, "learning_rate": 0.00012071190211345939, "loss": 2.629, "step": 1788 }, { "epoch": 23.8561872909699, "grad_norm": 0.6479887366294861, "learning_rate": 0.0001206674082313682, "loss": 2.9069, "step": 1789 }, { "epoch": 23.869565217391305, "grad_norm": 0.588141679763794, "learning_rate": 0.00012062291434927697, "loss": 2.732, "step": 1790 }, { "epoch": 23.88294314381271, "grad_norm": 0.6330631971359253, "learning_rate": 0.00012057842046718576, "loss": 2.9348, "step": 1791 }, { "epoch": 23.896321070234112, "grad_norm": 0.6657344698905945, "learning_rate": 0.00012053392658509455, "loss": 3.0499, "step": 1792 }, { "epoch": 23.909698996655518, "grad_norm": 0.6816025972366333, "learning_rate": 0.00012048943270300334, "loss": 2.9961, "step": 1793 }, { "epoch": 23.923076923076923, "grad_norm": 0.6501593589782715, "learning_rate": 0.00012044493882091212, "loss": 2.8555, "step": 1794 }, { "epoch": 23.93645484949833, "grad_norm": 0.6157960295677185, "learning_rate": 0.00012040044493882092, "loss": 2.8133, "step": 1795 }, { "epoch": 23.94983277591973, "grad_norm": 0.5868191719055176, "learning_rate": 0.0001203559510567297, "loss": 2.7887, "step": 1796 }, { "epoch": 23.963210702341136, "grad_norm": 0.6586911678314209, "learning_rate": 0.0001203114571746385, "loss": 3.0916, "step": 1797 }, { "epoch": 23.976588628762542, "grad_norm": 0.6382162570953369, "learning_rate": 0.00012026696329254727, "loss": 2.7469, "step": 1798 }, { "epoch": 23.989966555183948, "grad_norm": 0.6481708288192749, "learning_rate": 0.00012022246941045608, "loss": 2.9216, "step": 1799 }, { "epoch": 24.0, "grad_norm": 0.7272862792015076, "learning_rate": 0.00012017797552836485, "loss": 2.686, "step": 1800 }, { "epoch": 24.013377926421406, "grad_norm": 0.743366003036499, "learning_rate": 0.00012013348164627364, "loss": 2.6791, "step": 1801 }, { "epoch": 24.02675585284281, "grad_norm": 0.6428125500679016, "learning_rate": 0.00012008898776418242, "loss": 2.782, "step": 1802 }, { "epoch": 24.040133779264213, "grad_norm": 0.6210600137710571, "learning_rate": 0.00012004449388209122, "loss": 2.9352, "step": 1803 }, { "epoch": 24.05351170568562, "grad_norm": 0.9589283466339111, "learning_rate": 0.00012, "loss": 2.7686, "step": 1804 }, { "epoch": 24.066889632107024, "grad_norm": 0.5503745675086975, "learning_rate": 0.0001199555061179088, "loss": 2.9241, "step": 1805 }, { "epoch": 24.08026755852843, "grad_norm": 0.6018356084823608, "learning_rate": 0.00011991101223581758, "loss": 2.7512, "step": 1806 }, { "epoch": 24.093645484949832, "grad_norm": 0.7458929419517517, "learning_rate": 0.00011986651835372638, "loss": 2.5795, "step": 1807 }, { "epoch": 24.107023411371237, "grad_norm": 0.705740213394165, "learning_rate": 0.00011982202447163515, "loss": 2.8537, "step": 1808 }, { "epoch": 24.120401337792643, "grad_norm": 0.6299166679382324, "learning_rate": 0.00011977753058954396, "loss": 2.7815, "step": 1809 }, { "epoch": 24.13377926421405, "grad_norm": 1.6271346807479858, "learning_rate": 0.00011973303670745272, "loss": 2.6373, "step": 1810 }, { "epoch": 24.14715719063545, "grad_norm": 0.6286007165908813, "learning_rate": 0.00011968854282536152, "loss": 2.5587, "step": 1811 }, { "epoch": 24.160535117056856, "grad_norm": 0.6259005069732666, "learning_rate": 0.0001196440489432703, "loss": 2.6414, "step": 1812 }, { "epoch": 24.17391304347826, "grad_norm": 0.6058273911476135, "learning_rate": 0.0001195995550611791, "loss": 2.8571, "step": 1813 }, { "epoch": 24.187290969899667, "grad_norm": 0.6144312620162964, "learning_rate": 0.00011955506117908788, "loss": 2.7777, "step": 1814 }, { "epoch": 24.20066889632107, "grad_norm": 0.7302254438400269, "learning_rate": 0.00011951056729699668, "loss": 2.6298, "step": 1815 }, { "epoch": 24.214046822742475, "grad_norm": 0.7253098487854004, "learning_rate": 0.00011946607341490546, "loss": 2.9066, "step": 1816 }, { "epoch": 24.22742474916388, "grad_norm": 0.7629284858703613, "learning_rate": 0.00011942157953281426, "loss": 2.5391, "step": 1817 }, { "epoch": 24.240802675585286, "grad_norm": 0.6672796607017517, "learning_rate": 0.00011937708565072302, "loss": 2.8014, "step": 1818 }, { "epoch": 24.254180602006688, "grad_norm": 0.6049585938453674, "learning_rate": 0.00011933259176863183, "loss": 2.5539, "step": 1819 }, { "epoch": 24.267558528428093, "grad_norm": 0.5817275047302246, "learning_rate": 0.0001192880978865406, "loss": 2.5771, "step": 1820 }, { "epoch": 24.2809364548495, "grad_norm": 0.6785464882850647, "learning_rate": 0.00011924360400444938, "loss": 2.8043, "step": 1821 }, { "epoch": 24.294314381270905, "grad_norm": 0.6705557107925415, "learning_rate": 0.00011919911012235818, "loss": 2.7126, "step": 1822 }, { "epoch": 24.307692307692307, "grad_norm": 0.6267027854919434, "learning_rate": 0.00011915461624026696, "loss": 2.8739, "step": 1823 }, { "epoch": 24.321070234113712, "grad_norm": 0.6952410340309143, "learning_rate": 0.00011911012235817576, "loss": 2.8402, "step": 1824 }, { "epoch": 24.334448160535118, "grad_norm": 0.6892711520195007, "learning_rate": 0.00011906562847608453, "loss": 2.7314, "step": 1825 }, { "epoch": 24.347826086956523, "grad_norm": 0.5816205739974976, "learning_rate": 0.00011902113459399334, "loss": 2.6115, "step": 1826 }, { "epoch": 24.361204013377925, "grad_norm": 0.6549028158187866, "learning_rate": 0.0001189766407119021, "loss": 2.7503, "step": 1827 }, { "epoch": 24.37458193979933, "grad_norm": 0.5902572870254517, "learning_rate": 0.0001189321468298109, "loss": 2.7549, "step": 1828 }, { "epoch": 24.387959866220736, "grad_norm": 0.8412529230117798, "learning_rate": 0.00011888765294771968, "loss": 2.9654, "step": 1829 }, { "epoch": 24.401337792642142, "grad_norm": 0.5785757303237915, "learning_rate": 0.00011884315906562848, "loss": 2.8101, "step": 1830 }, { "epoch": 24.414715719063544, "grad_norm": 0.5835216045379639, "learning_rate": 0.00011879866518353726, "loss": 2.5368, "step": 1831 }, { "epoch": 24.42809364548495, "grad_norm": 0.6589221954345703, "learning_rate": 0.00011875417130144606, "loss": 2.7917, "step": 1832 }, { "epoch": 24.441471571906355, "grad_norm": 0.6370606422424316, "learning_rate": 0.00011870967741935484, "loss": 2.7514, "step": 1833 }, { "epoch": 24.45484949832776, "grad_norm": 0.644980788230896, "learning_rate": 0.00011866518353726364, "loss": 2.6756, "step": 1834 }, { "epoch": 24.468227424749163, "grad_norm": 0.7281529307365417, "learning_rate": 0.0001186206896551724, "loss": 2.5308, "step": 1835 }, { "epoch": 24.48160535117057, "grad_norm": 0.5669592618942261, "learning_rate": 0.00011857619577308122, "loss": 2.7197, "step": 1836 }, { "epoch": 24.494983277591974, "grad_norm": 0.7093492150306702, "learning_rate": 0.00011853170189098998, "loss": 2.9053, "step": 1837 }, { "epoch": 24.50836120401338, "grad_norm": 0.5949118733406067, "learning_rate": 0.00011848720800889878, "loss": 2.8594, "step": 1838 }, { "epoch": 24.52173913043478, "grad_norm": 0.5725157260894775, "learning_rate": 0.00011844271412680756, "loss": 2.5906, "step": 1839 }, { "epoch": 24.535117056856187, "grad_norm": 0.7810790538787842, "learning_rate": 0.00011839822024471636, "loss": 2.7852, "step": 1840 }, { "epoch": 24.548494983277592, "grad_norm": 0.6285178661346436, "learning_rate": 0.00011835372636262514, "loss": 2.7201, "step": 1841 }, { "epoch": 24.561872909698998, "grad_norm": 0.6262674927711487, "learning_rate": 0.00011830923248053394, "loss": 2.7249, "step": 1842 }, { "epoch": 24.5752508361204, "grad_norm": 0.5526009202003479, "learning_rate": 0.00011826473859844272, "loss": 2.6668, "step": 1843 }, { "epoch": 24.588628762541806, "grad_norm": 0.6295923590660095, "learning_rate": 0.00011822024471635152, "loss": 2.8788, "step": 1844 }, { "epoch": 24.60200668896321, "grad_norm": 0.6965957283973694, "learning_rate": 0.00011817575083426028, "loss": 2.8676, "step": 1845 }, { "epoch": 24.615384615384617, "grad_norm": 0.6246476173400879, "learning_rate": 0.0001181312569521691, "loss": 2.8326, "step": 1846 }, { "epoch": 24.62876254180602, "grad_norm": 0.6312415599822998, "learning_rate": 0.00011808676307007786, "loss": 2.5748, "step": 1847 }, { "epoch": 24.642140468227424, "grad_norm": 0.7262862920761108, "learning_rate": 0.00011804226918798666, "loss": 2.922, "step": 1848 }, { "epoch": 24.65551839464883, "grad_norm": 0.6640278697013855, "learning_rate": 0.00011799777530589544, "loss": 2.7494, "step": 1849 }, { "epoch": 24.668896321070235, "grad_norm": 0.6154195070266724, "learning_rate": 0.00011795328142380424, "loss": 2.6168, "step": 1850 }, { "epoch": 24.682274247491637, "grad_norm": 0.6020949482917786, "learning_rate": 0.00011790878754171302, "loss": 2.7566, "step": 1851 }, { "epoch": 24.695652173913043, "grad_norm": 0.5775970816612244, "learning_rate": 0.00011786429365962182, "loss": 2.6862, "step": 1852 }, { "epoch": 24.70903010033445, "grad_norm": 0.6687362194061279, "learning_rate": 0.0001178197997775306, "loss": 2.554, "step": 1853 }, { "epoch": 24.722408026755854, "grad_norm": 0.6184269785881042, "learning_rate": 0.0001177753058954394, "loss": 2.9353, "step": 1854 }, { "epoch": 24.735785953177256, "grad_norm": 0.6340799331665039, "learning_rate": 0.00011773081201334816, "loss": 2.857, "step": 1855 }, { "epoch": 24.74916387959866, "grad_norm": 0.8216782808303833, "learning_rate": 0.00011768631813125697, "loss": 2.6295, "step": 1856 }, { "epoch": 24.762541806020067, "grad_norm": 0.5764971375465393, "learning_rate": 0.00011764182424916574, "loss": 2.8431, "step": 1857 }, { "epoch": 24.775919732441473, "grad_norm": 0.6233363747596741, "learning_rate": 0.00011759733036707454, "loss": 2.6867, "step": 1858 }, { "epoch": 24.789297658862875, "grad_norm": 0.6669492721557617, "learning_rate": 0.00011755283648498332, "loss": 2.5405, "step": 1859 }, { "epoch": 24.80267558528428, "grad_norm": 0.6156982183456421, "learning_rate": 0.00011750834260289212, "loss": 2.8071, "step": 1860 }, { "epoch": 24.816053511705686, "grad_norm": 0.6210219860076904, "learning_rate": 0.0001174638487208009, "loss": 2.8308, "step": 1861 }, { "epoch": 24.82943143812709, "grad_norm": 0.6786196231842041, "learning_rate": 0.00011741935483870967, "loss": 2.9039, "step": 1862 }, { "epoch": 24.842809364548494, "grad_norm": 0.6467169523239136, "learning_rate": 0.00011737486095661848, "loss": 2.6601, "step": 1863 }, { "epoch": 24.8561872909699, "grad_norm": 0.6734811663627625, "learning_rate": 0.00011733036707452724, "loss": 2.8614, "step": 1864 }, { "epoch": 24.869565217391305, "grad_norm": 0.6223945021629333, "learning_rate": 0.00011728587319243604, "loss": 2.9397, "step": 1865 }, { "epoch": 24.88294314381271, "grad_norm": 0.6042306423187256, "learning_rate": 0.00011724137931034482, "loss": 2.8984, "step": 1866 }, { "epoch": 24.896321070234112, "grad_norm": 0.945598840713501, "learning_rate": 0.00011719688542825362, "loss": 2.6882, "step": 1867 }, { "epoch": 24.909698996655518, "grad_norm": 0.6739409565925598, "learning_rate": 0.0001171523915461624, "loss": 2.6285, "step": 1868 }, { "epoch": 24.923076923076923, "grad_norm": 0.6158167719841003, "learning_rate": 0.0001171078976640712, "loss": 2.8019, "step": 1869 }, { "epoch": 24.93645484949833, "grad_norm": 0.5984783172607422, "learning_rate": 0.00011706340378197998, "loss": 2.785, "step": 1870 }, { "epoch": 24.94983277591973, "grad_norm": 0.594236433506012, "learning_rate": 0.00011701890989988878, "loss": 3.0126, "step": 1871 }, { "epoch": 24.963210702341136, "grad_norm": 0.6268919706344604, "learning_rate": 0.00011697441601779754, "loss": 2.7908, "step": 1872 }, { "epoch": 24.976588628762542, "grad_norm": 0.6119821071624756, "learning_rate": 0.00011692992213570635, "loss": 2.5886, "step": 1873 }, { "epoch": 24.989966555183948, "grad_norm": 0.8935804963111877, "learning_rate": 0.00011688542825361512, "loss": 2.9702, "step": 1874 }, { "epoch": 25.0, "grad_norm": 0.6583001613616943, "learning_rate": 0.00011684093437152392, "loss": 2.7691, "step": 1875 }, { "epoch": 25.013377926421406, "grad_norm": 0.5952125787734985, "learning_rate": 0.0001167964404894327, "loss": 2.6709, "step": 1876 }, { "epoch": 25.02675585284281, "grad_norm": 0.6740133762359619, "learning_rate": 0.0001167519466073415, "loss": 2.6548, "step": 1877 }, { "epoch": 25.040133779264213, "grad_norm": 0.6665695309638977, "learning_rate": 0.00011670745272525028, "loss": 2.6141, "step": 1878 }, { "epoch": 25.05351170568562, "grad_norm": 0.698957085609436, "learning_rate": 0.00011666295884315908, "loss": 2.7752, "step": 1879 }, { "epoch": 25.066889632107024, "grad_norm": 0.7277541160583496, "learning_rate": 0.00011661846496106786, "loss": 2.8876, "step": 1880 }, { "epoch": 25.08026755852843, "grad_norm": 0.5987577438354492, "learning_rate": 0.00011657397107897665, "loss": 2.5601, "step": 1881 }, { "epoch": 25.093645484949832, "grad_norm": 0.5897870063781738, "learning_rate": 0.00011652947719688542, "loss": 2.5854, "step": 1882 }, { "epoch": 25.107023411371237, "grad_norm": 0.573273241519928, "learning_rate": 0.00011648498331479423, "loss": 2.7141, "step": 1883 }, { "epoch": 25.120401337792643, "grad_norm": 0.6173187494277954, "learning_rate": 0.000116440489432703, "loss": 2.6697, "step": 1884 }, { "epoch": 25.13377926421405, "grad_norm": 0.5728760361671448, "learning_rate": 0.0001163959955506118, "loss": 2.7058, "step": 1885 }, { "epoch": 25.14715719063545, "grad_norm": 0.543645441532135, "learning_rate": 0.00011635150166852058, "loss": 2.441, "step": 1886 }, { "epoch": 25.160535117056856, "grad_norm": 0.6582958698272705, "learning_rate": 0.00011630700778642938, "loss": 2.8462, "step": 1887 }, { "epoch": 25.17391304347826, "grad_norm": 0.6733880043029785, "learning_rate": 0.00011626251390433816, "loss": 2.7294, "step": 1888 }, { "epoch": 25.187290969899667, "grad_norm": 0.8249802589416504, "learning_rate": 0.00011621802002224695, "loss": 2.7738, "step": 1889 }, { "epoch": 25.20066889632107, "grad_norm": 0.6004601120948792, "learning_rate": 0.00011617352614015574, "loss": 2.6992, "step": 1890 }, { "epoch": 25.214046822742475, "grad_norm": 0.6188962459564209, "learning_rate": 0.00011612903225806453, "loss": 2.5715, "step": 1891 }, { "epoch": 25.22742474916388, "grad_norm": 0.6831389665603638, "learning_rate": 0.0001160845383759733, "loss": 2.8777, "step": 1892 }, { "epoch": 25.240802675585286, "grad_norm": 0.5578925013542175, "learning_rate": 0.00011604004449388211, "loss": 2.4875, "step": 1893 }, { "epoch": 25.254180602006688, "grad_norm": 0.5995929837226868, "learning_rate": 0.00011599555061179088, "loss": 2.5166, "step": 1894 }, { "epoch": 25.267558528428093, "grad_norm": 0.6817669868469238, "learning_rate": 0.00011595105672969967, "loss": 2.7628, "step": 1895 }, { "epoch": 25.2809364548495, "grad_norm": 0.671890139579773, "learning_rate": 0.00011590656284760846, "loss": 2.5406, "step": 1896 }, { "epoch": 25.294314381270905, "grad_norm": 0.6167321801185608, "learning_rate": 0.00011586206896551725, "loss": 2.8789, "step": 1897 }, { "epoch": 25.307692307692307, "grad_norm": 0.6145638227462769, "learning_rate": 0.00011581757508342604, "loss": 2.7629, "step": 1898 }, { "epoch": 25.321070234113712, "grad_norm": 0.6117168068885803, "learning_rate": 0.00011577308120133483, "loss": 2.7048, "step": 1899 }, { "epoch": 25.334448160535118, "grad_norm": 0.6903496384620667, "learning_rate": 0.00011572858731924361, "loss": 2.9096, "step": 1900 }, { "epoch": 25.347826086956523, "grad_norm": 0.5942792296409607, "learning_rate": 0.00011568409343715241, "loss": 2.4525, "step": 1901 }, { "epoch": 25.361204013377925, "grad_norm": 0.5714770555496216, "learning_rate": 0.00011563959955506118, "loss": 2.6605, "step": 1902 }, { "epoch": 25.37458193979933, "grad_norm": 1.0252799987792969, "learning_rate": 0.00011559510567296996, "loss": 2.4599, "step": 1903 }, { "epoch": 25.387959866220736, "grad_norm": 0.6459619998931885, "learning_rate": 0.00011555061179087876, "loss": 2.657, "step": 1904 }, { "epoch": 25.401337792642142, "grad_norm": 0.6513925790786743, "learning_rate": 0.00011550611790878754, "loss": 2.6491, "step": 1905 }, { "epoch": 25.414715719063544, "grad_norm": 0.838390588760376, "learning_rate": 0.00011546162402669634, "loss": 2.8909, "step": 1906 }, { "epoch": 25.42809364548495, "grad_norm": 0.6873288750648499, "learning_rate": 0.00011541713014460512, "loss": 2.9252, "step": 1907 }, { "epoch": 25.441471571906355, "grad_norm": 0.6325615644454956, "learning_rate": 0.00011537263626251391, "loss": 2.8784, "step": 1908 }, { "epoch": 25.45484949832776, "grad_norm": 0.7683016061782837, "learning_rate": 0.00011532814238042268, "loss": 2.7951, "step": 1909 }, { "epoch": 25.468227424749163, "grad_norm": 0.6581037044525146, "learning_rate": 0.00011528364849833149, "loss": 2.7674, "step": 1910 }, { "epoch": 25.48160535117057, "grad_norm": 0.6761939525604248, "learning_rate": 0.00011523915461624026, "loss": 2.7315, "step": 1911 }, { "epoch": 25.494983277591974, "grad_norm": 0.621744692325592, "learning_rate": 0.00011519466073414906, "loss": 2.6977, "step": 1912 }, { "epoch": 25.50836120401338, "grad_norm": 0.5744712352752686, "learning_rate": 0.00011515016685205784, "loss": 2.6568, "step": 1913 }, { "epoch": 25.52173913043478, "grad_norm": 0.6587238907814026, "learning_rate": 0.00011510567296996664, "loss": 3.0222, "step": 1914 }, { "epoch": 25.535117056856187, "grad_norm": 0.6338929533958435, "learning_rate": 0.00011506117908787542, "loss": 2.4611, "step": 1915 }, { "epoch": 25.548494983277592, "grad_norm": 0.6468850374221802, "learning_rate": 0.00011501668520578421, "loss": 2.5532, "step": 1916 }, { "epoch": 25.561872909698998, "grad_norm": 0.6900503635406494, "learning_rate": 0.000114972191323693, "loss": 2.7291, "step": 1917 }, { "epoch": 25.5752508361204, "grad_norm": 0.9081000089645386, "learning_rate": 0.00011492769744160179, "loss": 2.528, "step": 1918 }, { "epoch": 25.588628762541806, "grad_norm": 0.6108705401420593, "learning_rate": 0.00011488320355951056, "loss": 2.7942, "step": 1919 }, { "epoch": 25.60200668896321, "grad_norm": 0.5745365023612976, "learning_rate": 0.00011483870967741937, "loss": 2.7273, "step": 1920 }, { "epoch": 25.615384615384617, "grad_norm": 0.7215850949287415, "learning_rate": 0.00011479421579532814, "loss": 2.85, "step": 1921 }, { "epoch": 25.62876254180602, "grad_norm": 0.6535587310791016, "learning_rate": 0.00011474972191323693, "loss": 2.7488, "step": 1922 }, { "epoch": 25.642140468227424, "grad_norm": 0.6285836696624756, "learning_rate": 0.00011470522803114572, "loss": 2.6874, "step": 1923 }, { "epoch": 25.65551839464883, "grad_norm": 0.652277410030365, "learning_rate": 0.00011466073414905451, "loss": 2.889, "step": 1924 }, { "epoch": 25.668896321070235, "grad_norm": 0.6526362299919128, "learning_rate": 0.0001146162402669633, "loss": 2.8342, "step": 1925 }, { "epoch": 25.682274247491637, "grad_norm": 0.6387808918952942, "learning_rate": 0.00011457174638487209, "loss": 2.6713, "step": 1926 }, { "epoch": 25.695652173913043, "grad_norm": 0.5775367617607117, "learning_rate": 0.00011452725250278087, "loss": 2.6848, "step": 1927 }, { "epoch": 25.70903010033445, "grad_norm": 0.7041560411453247, "learning_rate": 0.00011448275862068967, "loss": 2.7532, "step": 1928 }, { "epoch": 25.722408026755854, "grad_norm": 0.6833249926567078, "learning_rate": 0.00011443826473859844, "loss": 2.8257, "step": 1929 }, { "epoch": 25.735785953177256, "grad_norm": 0.6916714310646057, "learning_rate": 0.00011439377085650725, "loss": 2.5491, "step": 1930 }, { "epoch": 25.74916387959866, "grad_norm": 0.8915151357650757, "learning_rate": 0.00011434927697441602, "loss": 2.8361, "step": 1931 }, { "epoch": 25.762541806020067, "grad_norm": 0.6278401613235474, "learning_rate": 0.00011430478309232481, "loss": 2.8022, "step": 1932 }, { "epoch": 25.775919732441473, "grad_norm": 0.6929000020027161, "learning_rate": 0.0001142602892102336, "loss": 2.8938, "step": 1933 }, { "epoch": 25.789297658862875, "grad_norm": 0.6363662481307983, "learning_rate": 0.00011421579532814239, "loss": 2.8736, "step": 1934 }, { "epoch": 25.80267558528428, "grad_norm": 0.6596313118934631, "learning_rate": 0.00011417130144605117, "loss": 2.9864, "step": 1935 }, { "epoch": 25.816053511705686, "grad_norm": 0.9694854617118835, "learning_rate": 0.00011412680756395997, "loss": 2.7362, "step": 1936 }, { "epoch": 25.82943143812709, "grad_norm": 0.721339225769043, "learning_rate": 0.00011408231368186875, "loss": 2.9919, "step": 1937 }, { "epoch": 25.842809364548494, "grad_norm": 0.6164969205856323, "learning_rate": 0.00011403781979977755, "loss": 3.0446, "step": 1938 }, { "epoch": 25.8561872909699, "grad_norm": 0.8240790367126465, "learning_rate": 0.00011399332591768632, "loss": 2.7113, "step": 1939 }, { "epoch": 25.869565217391305, "grad_norm": 0.865219235420227, "learning_rate": 0.00011394883203559513, "loss": 2.6692, "step": 1940 }, { "epoch": 25.88294314381271, "grad_norm": 0.6014530062675476, "learning_rate": 0.0001139043381535039, "loss": 2.7527, "step": 1941 }, { "epoch": 25.896321070234112, "grad_norm": 0.6110913157463074, "learning_rate": 0.00011385984427141269, "loss": 2.6213, "step": 1942 }, { "epoch": 25.909698996655518, "grad_norm": 0.6967377662658691, "learning_rate": 0.00011381535038932147, "loss": 3.0013, "step": 1943 }, { "epoch": 25.923076923076923, "grad_norm": 0.6235397458076477, "learning_rate": 0.00011377085650723027, "loss": 2.8256, "step": 1944 }, { "epoch": 25.93645484949833, "grad_norm": 0.6463499665260315, "learning_rate": 0.00011372636262513905, "loss": 2.6573, "step": 1945 }, { "epoch": 25.94983277591973, "grad_norm": 0.7898368239402771, "learning_rate": 0.00011368186874304782, "loss": 2.6484, "step": 1946 }, { "epoch": 25.963210702341136, "grad_norm": 0.6130072474479675, "learning_rate": 0.00011363737486095663, "loss": 2.6912, "step": 1947 }, { "epoch": 25.976588628762542, "grad_norm": 0.6541060209274292, "learning_rate": 0.0001135928809788654, "loss": 2.789, "step": 1948 }, { "epoch": 25.989966555183948, "grad_norm": 0.6722179651260376, "learning_rate": 0.0001135483870967742, "loss": 2.7197, "step": 1949 }, { "epoch": 26.0, "grad_norm": 0.6884087920188904, "learning_rate": 0.00011350389321468298, "loss": 2.83, "step": 1950 }, { "epoch": 26.013377926421406, "grad_norm": 0.5991981029510498, "learning_rate": 0.00011345939933259177, "loss": 2.8456, "step": 1951 }, { "epoch": 26.02675585284281, "grad_norm": 0.660169243812561, "learning_rate": 0.00011341490545050056, "loss": 2.8782, "step": 1952 }, { "epoch": 26.040133779264213, "grad_norm": 0.651779294013977, "learning_rate": 0.00011337041156840935, "loss": 2.7286, "step": 1953 }, { "epoch": 26.05351170568562, "grad_norm": 0.6962876915931702, "learning_rate": 0.00011332591768631813, "loss": 2.5009, "step": 1954 }, { "epoch": 26.066889632107024, "grad_norm": 0.6752526164054871, "learning_rate": 0.00011328142380422693, "loss": 2.6137, "step": 1955 }, { "epoch": 26.08026755852843, "grad_norm": 0.6335856914520264, "learning_rate": 0.0001132369299221357, "loss": 2.4744, "step": 1956 }, { "epoch": 26.093645484949832, "grad_norm": 0.6627519726753235, "learning_rate": 0.00011319243604004451, "loss": 2.9633, "step": 1957 }, { "epoch": 26.107023411371237, "grad_norm": 0.5948102474212646, "learning_rate": 0.00011314794215795328, "loss": 2.6333, "step": 1958 }, { "epoch": 26.120401337792643, "grad_norm": 0.5609325766563416, "learning_rate": 0.00011310344827586207, "loss": 2.5427, "step": 1959 }, { "epoch": 26.13377926421405, "grad_norm": 0.6280467510223389, "learning_rate": 0.00011305895439377086, "loss": 2.6838, "step": 1960 }, { "epoch": 26.14715719063545, "grad_norm": 0.5900541543960571, "learning_rate": 0.00011301446051167965, "loss": 2.8057, "step": 1961 }, { "epoch": 26.160535117056856, "grad_norm": 0.6185488700866699, "learning_rate": 0.00011296996662958843, "loss": 2.7778, "step": 1962 }, { "epoch": 26.17391304347826, "grad_norm": 0.6149762272834778, "learning_rate": 0.00011292547274749723, "loss": 2.7384, "step": 1963 }, { "epoch": 26.187290969899667, "grad_norm": 0.5975783467292786, "learning_rate": 0.00011288097886540601, "loss": 2.6957, "step": 1964 }, { "epoch": 26.20066889632107, "grad_norm": 0.7946596741676331, "learning_rate": 0.00011283648498331481, "loss": 2.6071, "step": 1965 }, { "epoch": 26.214046822742475, "grad_norm": 0.5987370014190674, "learning_rate": 0.00011279199110122358, "loss": 2.5812, "step": 1966 }, { "epoch": 26.22742474916388, "grad_norm": 0.7613934278488159, "learning_rate": 0.00011274749721913239, "loss": 2.7886, "step": 1967 }, { "epoch": 26.240802675585286, "grad_norm": 0.6035755276679993, "learning_rate": 0.00011270300333704116, "loss": 2.4709, "step": 1968 }, { "epoch": 26.254180602006688, "grad_norm": 0.9545117616653442, "learning_rate": 0.00011265850945494995, "loss": 2.6039, "step": 1969 }, { "epoch": 26.267558528428093, "grad_norm": 0.5982191562652588, "learning_rate": 0.00011261401557285873, "loss": 2.664, "step": 1970 }, { "epoch": 26.2809364548495, "grad_norm": 0.6070300340652466, "learning_rate": 0.00011256952169076753, "loss": 2.671, "step": 1971 }, { "epoch": 26.294314381270905, "grad_norm": 0.6093387603759766, "learning_rate": 0.00011252502780867631, "loss": 2.6441, "step": 1972 }, { "epoch": 26.307692307692307, "grad_norm": 0.6499563455581665, "learning_rate": 0.00011248053392658511, "loss": 2.6851, "step": 1973 }, { "epoch": 26.321070234113712, "grad_norm": 0.6848227977752686, "learning_rate": 0.00011243604004449389, "loss": 2.6448, "step": 1974 }, { "epoch": 26.334448160535118, "grad_norm": 0.6483498811721802, "learning_rate": 0.00011239154616240269, "loss": 2.5361, "step": 1975 }, { "epoch": 26.347826086956523, "grad_norm": 0.6412089467048645, "learning_rate": 0.00011234705228031146, "loss": 2.6625, "step": 1976 }, { "epoch": 26.361204013377925, "grad_norm": 0.6891600489616394, "learning_rate": 0.00011230255839822026, "loss": 2.6901, "step": 1977 }, { "epoch": 26.37458193979933, "grad_norm": 0.606422483921051, "learning_rate": 0.00011225806451612903, "loss": 2.7778, "step": 1978 }, { "epoch": 26.387959866220736, "grad_norm": 0.6754788756370544, "learning_rate": 0.00011221357063403783, "loss": 2.7757, "step": 1979 }, { "epoch": 26.401337792642142, "grad_norm": 0.6534073352813721, "learning_rate": 0.00011216907675194661, "loss": 2.7682, "step": 1980 }, { "epoch": 26.414715719063544, "grad_norm": 0.6333116888999939, "learning_rate": 0.00011212458286985541, "loss": 2.739, "step": 1981 }, { "epoch": 26.42809364548495, "grad_norm": 0.6784356236457825, "learning_rate": 0.00011208008898776419, "loss": 2.499, "step": 1982 }, { "epoch": 26.441471571906355, "grad_norm": 0.6247740983963013, "learning_rate": 0.00011203559510567299, "loss": 2.8451, "step": 1983 }, { "epoch": 26.45484949832776, "grad_norm": 0.6405763030052185, "learning_rate": 0.00011199110122358177, "loss": 2.7237, "step": 1984 }, { "epoch": 26.468227424749163, "grad_norm": 0.7398512959480286, "learning_rate": 0.00011194660734149056, "loss": 2.6931, "step": 1985 }, { "epoch": 26.48160535117057, "grad_norm": 0.6067454814910889, "learning_rate": 0.00011190211345939933, "loss": 2.8153, "step": 1986 }, { "epoch": 26.494983277591974, "grad_norm": 0.5860223770141602, "learning_rate": 0.00011185761957730812, "loss": 2.9522, "step": 1987 }, { "epoch": 26.50836120401338, "grad_norm": 0.6325815320014954, "learning_rate": 0.00011181312569521691, "loss": 2.7294, "step": 1988 }, { "epoch": 26.52173913043478, "grad_norm": 0.6049959063529968, "learning_rate": 0.0001117686318131257, "loss": 2.7989, "step": 1989 }, { "epoch": 26.535117056856187, "grad_norm": 0.5589177012443542, "learning_rate": 0.00011172413793103449, "loss": 2.4431, "step": 1990 }, { "epoch": 26.548494983277592, "grad_norm": 0.6467594504356384, "learning_rate": 0.00011167964404894327, "loss": 2.7166, "step": 1991 }, { "epoch": 26.561872909698998, "grad_norm": 0.7082380056381226, "learning_rate": 0.00011163515016685207, "loss": 2.7768, "step": 1992 }, { "epoch": 26.5752508361204, "grad_norm": 0.7438964247703552, "learning_rate": 0.00011159065628476084, "loss": 2.8655, "step": 1993 }, { "epoch": 26.588628762541806, "grad_norm": 0.7755714654922485, "learning_rate": 0.00011154616240266965, "loss": 2.6956, "step": 1994 }, { "epoch": 26.60200668896321, "grad_norm": 0.576988160610199, "learning_rate": 0.00011150166852057842, "loss": 2.5831, "step": 1995 }, { "epoch": 26.615384615384617, "grad_norm": 0.6044856309890747, "learning_rate": 0.00011145717463848721, "loss": 2.6834, "step": 1996 }, { "epoch": 26.62876254180602, "grad_norm": 0.5846719741821289, "learning_rate": 0.000111412680756396, "loss": 2.5553, "step": 1997 }, { "epoch": 26.642140468227424, "grad_norm": 0.6511263251304626, "learning_rate": 0.00011136818687430479, "loss": 2.5404, "step": 1998 }, { "epoch": 26.65551839464883, "grad_norm": 0.6784057021141052, "learning_rate": 0.00011132369299221357, "loss": 2.5896, "step": 1999 }, { "epoch": 26.668896321070235, "grad_norm": 0.7156029939651489, "learning_rate": 0.00011127919911012237, "loss": 2.6683, "step": 2000 }, { "epoch": 26.682274247491637, "grad_norm": 1.5313527584075928, "learning_rate": 0.00011123470522803115, "loss": 3.1002, "step": 2001 }, { "epoch": 26.695652173913043, "grad_norm": 1.570999264717102, "learning_rate": 0.00011119021134593995, "loss": 3.0022, "step": 2002 }, { "epoch": 26.70903010033445, "grad_norm": 1.672279953956604, "learning_rate": 0.00011114571746384872, "loss": 2.9443, "step": 2003 }, { "epoch": 26.722408026755854, "grad_norm": 1.3200056552886963, "learning_rate": 0.00011110122358175752, "loss": 3.3101, "step": 2004 }, { "epoch": 26.735785953177256, "grad_norm": 1.9621714353561401, "learning_rate": 0.0001110567296996663, "loss": 3.0294, "step": 2005 }, { "epoch": 26.74916387959866, "grad_norm": 1.5567591190338135, "learning_rate": 0.00011101223581757509, "loss": 3.1361, "step": 2006 }, { "epoch": 26.762541806020067, "grad_norm": 1.6681092977523804, "learning_rate": 0.00011096774193548387, "loss": 3.2709, "step": 2007 }, { "epoch": 26.775919732441473, "grad_norm": 1.461212158203125, "learning_rate": 0.00011092324805339267, "loss": 3.0585, "step": 2008 }, { "epoch": 26.789297658862875, "grad_norm": 1.4997388124465942, "learning_rate": 0.00011087875417130145, "loss": 3.0182, "step": 2009 }, { "epoch": 26.80267558528428, "grad_norm": 1.3930811882019043, "learning_rate": 0.00011083426028921025, "loss": 2.9796, "step": 2010 }, { "epoch": 26.816053511705686, "grad_norm": 1.3426967859268188, "learning_rate": 0.00011078976640711903, "loss": 3.2029, "step": 2011 }, { "epoch": 26.82943143812709, "grad_norm": 1.7054353952407837, "learning_rate": 0.00011074527252502782, "loss": 3.0889, "step": 2012 }, { "epoch": 26.842809364548494, "grad_norm": 1.6260415315628052, "learning_rate": 0.00011070077864293659, "loss": 3.2461, "step": 2013 }, { "epoch": 26.8561872909699, "grad_norm": 1.7708278894424438, "learning_rate": 0.0001106562847608454, "loss": 3.1737, "step": 2014 }, { "epoch": 26.869565217391305, "grad_norm": 1.4107633829116821, "learning_rate": 0.00011061179087875417, "loss": 3.0006, "step": 2015 }, { "epoch": 26.88294314381271, "grad_norm": 1.929655909538269, "learning_rate": 0.00011056729699666297, "loss": 3.0805, "step": 2016 }, { "epoch": 26.896321070234112, "grad_norm": 1.835286021232605, "learning_rate": 0.00011052280311457175, "loss": 3.0995, "step": 2017 }, { "epoch": 26.909698996655518, "grad_norm": 1.9293112754821777, "learning_rate": 0.00011047830923248055, "loss": 3.1714, "step": 2018 }, { "epoch": 26.923076923076923, "grad_norm": 1.5640552043914795, "learning_rate": 0.00011043381535038933, "loss": 3.1381, "step": 2019 }, { "epoch": 26.93645484949833, "grad_norm": 1.7890485525131226, "learning_rate": 0.00011038932146829812, "loss": 3.0785, "step": 2020 }, { "epoch": 26.94983277591973, "grad_norm": 1.3167777061462402, "learning_rate": 0.0001103448275862069, "loss": 3.2041, "step": 2021 }, { "epoch": 26.963210702341136, "grad_norm": 1.3006356954574585, "learning_rate": 0.0001103003337041157, "loss": 2.6744, "step": 2022 }, { "epoch": 26.976588628762542, "grad_norm": 1.3111026287078857, "learning_rate": 0.00011025583982202447, "loss": 2.9279, "step": 2023 }, { "epoch": 26.989966555183948, "grad_norm": 1.9163484573364258, "learning_rate": 0.00011021134593993328, "loss": 3.0983, "step": 2024 }, { "epoch": 27.013377926421406, "grad_norm": 2.121852159500122, "learning_rate": 0.00011016685205784205, "loss": 6.1272, "step": 2025 }, { "epoch": 27.02675585284281, "grad_norm": 1.2503304481506348, "learning_rate": 0.00011012235817575085, "loss": 2.8418, "step": 2026 }, { "epoch": 27.040133779264213, "grad_norm": 1.2677263021469116, "learning_rate": 0.00011007786429365963, "loss": 3.166, "step": 2027 }, { "epoch": 27.05351170568562, "grad_norm": 1.1323914527893066, "learning_rate": 0.00011003337041156841, "loss": 2.881, "step": 2028 }, { "epoch": 27.066889632107024, "grad_norm": 1.32306969165802, "learning_rate": 0.0001099888765294772, "loss": 3.1736, "step": 2029 }, { "epoch": 27.08026755852843, "grad_norm": 1.1854408979415894, "learning_rate": 0.00010994438264738598, "loss": 3.0644, "step": 2030 }, { "epoch": 27.093645484949832, "grad_norm": 1.3617606163024902, "learning_rate": 0.00010989988876529478, "loss": 3.0845, "step": 2031 }, { "epoch": 27.107023411371237, "grad_norm": 1.3771755695343018, "learning_rate": 0.00010985539488320355, "loss": 3.215, "step": 2032 }, { "epoch": 27.120401337792643, "grad_norm": 1.4441111087799072, "learning_rate": 0.00010981090100111235, "loss": 3.1639, "step": 2033 }, { "epoch": 27.13377926421405, "grad_norm": 1.4004138708114624, "learning_rate": 0.00010976640711902113, "loss": 3.2066, "step": 2034 }, { "epoch": 27.14715719063545, "grad_norm": 1.3546315431594849, "learning_rate": 0.00010972191323692993, "loss": 2.8154, "step": 2035 }, { "epoch": 27.160535117056856, "grad_norm": 1.122201681137085, "learning_rate": 0.00010967741935483871, "loss": 2.8852, "step": 2036 }, { "epoch": 27.17391304347826, "grad_norm": 1.4592616558074951, "learning_rate": 0.0001096329254727475, "loss": 2.9766, "step": 2037 }, { "epoch": 27.187290969899667, "grad_norm": 1.1791727542877197, "learning_rate": 0.00010958843159065629, "loss": 2.8115, "step": 2038 }, { "epoch": 27.20066889632107, "grad_norm": 1.3347991704940796, "learning_rate": 0.00010954393770856508, "loss": 2.839, "step": 2039 }, { "epoch": 27.214046822742475, "grad_norm": 1.6515034437179565, "learning_rate": 0.00010949944382647385, "loss": 3.2088, "step": 2040 }, { "epoch": 27.22742474916388, "grad_norm": 1.4179280996322632, "learning_rate": 0.00010945494994438266, "loss": 3.0364, "step": 2041 }, { "epoch": 27.240802675585286, "grad_norm": 1.5073821544647217, "learning_rate": 0.00010941045606229143, "loss": 2.9634, "step": 2042 }, { "epoch": 27.254180602006688, "grad_norm": 1.3612418174743652, "learning_rate": 0.00010936596218020023, "loss": 3.0204, "step": 2043 }, { "epoch": 27.267558528428093, "grad_norm": 1.3515981435775757, "learning_rate": 0.00010932146829810901, "loss": 2.7506, "step": 2044 }, { "epoch": 27.2809364548495, "grad_norm": 1.394405722618103, "learning_rate": 0.0001092769744160178, "loss": 2.8979, "step": 2045 }, { "epoch": 27.294314381270905, "grad_norm": 1.4995383024215698, "learning_rate": 0.00010923248053392659, "loss": 3.2501, "step": 2046 }, { "epoch": 27.307692307692307, "grad_norm": 1.4658801555633545, "learning_rate": 0.00010918798665183538, "loss": 3.1901, "step": 2047 }, { "epoch": 27.321070234113712, "grad_norm": 1.4346429109573364, "learning_rate": 0.00010914349276974417, "loss": 2.9835, "step": 2048 }, { "epoch": 27.334448160535118, "grad_norm": 1.455994963645935, "learning_rate": 0.00010909899888765296, "loss": 3.0354, "step": 2049 }, { "epoch": 27.347826086956523, "grad_norm": 1.2409014701843262, "learning_rate": 0.00010905450500556173, "loss": 3.1903, "step": 2050 }, { "epoch": 27.361204013377925, "grad_norm": 1.417441487312317, "learning_rate": 0.00010901001112347054, "loss": 2.9081, "step": 2051 }, { "epoch": 27.37458193979933, "grad_norm": 1.4116290807724, "learning_rate": 0.00010896551724137931, "loss": 2.8466, "step": 2052 }, { "epoch": 27.387959866220736, "grad_norm": 1.58635675907135, "learning_rate": 0.0001089210233592881, "loss": 3.134, "step": 2053 }, { "epoch": 27.401337792642142, "grad_norm": 1.4947638511657715, "learning_rate": 0.00010887652947719689, "loss": 2.9585, "step": 2054 }, { "epoch": 27.414715719063544, "grad_norm": 1.5971535444259644, "learning_rate": 0.00010883203559510568, "loss": 2.954, "step": 2055 }, { "epoch": 27.42809364548495, "grad_norm": 1.5000510215759277, "learning_rate": 0.00010878754171301447, "loss": 3.1699, "step": 2056 }, { "epoch": 27.441471571906355, "grad_norm": 1.3633960485458374, "learning_rate": 0.00010874304783092326, "loss": 3.0109, "step": 2057 }, { "epoch": 27.45484949832776, "grad_norm": 1.2857588529586792, "learning_rate": 0.00010869855394883204, "loss": 3.1932, "step": 2058 }, { "epoch": 27.468227424749163, "grad_norm": 1.6654231548309326, "learning_rate": 0.00010865406006674084, "loss": 3.2411, "step": 2059 }, { "epoch": 27.48160535117057, "grad_norm": 1.398990511894226, "learning_rate": 0.00010860956618464961, "loss": 3.2183, "step": 2060 }, { "epoch": 27.494983277591974, "grad_norm": 1.4811097383499146, "learning_rate": 0.00010856507230255842, "loss": 2.9565, "step": 2061 }, { "epoch": 27.50836120401338, "grad_norm": 1.5817798376083374, "learning_rate": 0.00010852057842046719, "loss": 3.0652, "step": 2062 }, { "epoch": 27.52173913043478, "grad_norm": 1.2948859930038452, "learning_rate": 0.00010847608453837598, "loss": 2.8978, "step": 2063 }, { "epoch": 27.535117056856187, "grad_norm": 1.3314701318740845, "learning_rate": 0.00010843159065628477, "loss": 2.8764, "step": 2064 }, { "epoch": 27.548494983277592, "grad_norm": 1.531958818435669, "learning_rate": 0.00010838709677419356, "loss": 3.3317, "step": 2065 }, { "epoch": 27.561872909698998, "grad_norm": 1.5060667991638184, "learning_rate": 0.00010834260289210234, "loss": 3.0724, "step": 2066 }, { "epoch": 27.5752508361204, "grad_norm": 1.4489496946334839, "learning_rate": 0.00010829810901001114, "loss": 3.0841, "step": 2067 }, { "epoch": 27.588628762541806, "grad_norm": 1.4431540966033936, "learning_rate": 0.00010825361512791992, "loss": 3.145, "step": 2068 }, { "epoch": 27.60200668896321, "grad_norm": 1.6104519367218018, "learning_rate": 0.00010820912124582869, "loss": 3.0226, "step": 2069 }, { "epoch": 27.615384615384617, "grad_norm": 1.249314546585083, "learning_rate": 0.00010816462736373749, "loss": 3.1273, "step": 2070 }, { "epoch": 27.62876254180602, "grad_norm": 1.1928060054779053, "learning_rate": 0.00010812013348164627, "loss": 2.8877, "step": 2071 }, { "epoch": 27.642140468227424, "grad_norm": 1.5014128684997559, "learning_rate": 0.00010807563959955507, "loss": 3.4129, "step": 2072 }, { "epoch": 27.65551839464883, "grad_norm": 1.344773530960083, "learning_rate": 0.00010803114571746385, "loss": 3.1112, "step": 2073 }, { "epoch": 27.668896321070235, "grad_norm": 1.616158366203308, "learning_rate": 0.00010798665183537264, "loss": 3.1799, "step": 2074 }, { "epoch": 27.682274247491637, "grad_norm": 1.4087249040603638, "learning_rate": 0.00010794215795328141, "loss": 2.9315, "step": 2075 }, { "epoch": 27.695652173913043, "grad_norm": 1.48402738571167, "learning_rate": 0.00010789766407119022, "loss": 3.1755, "step": 2076 }, { "epoch": 27.70903010033445, "grad_norm": 1.3316876888275146, "learning_rate": 0.00010785317018909899, "loss": 3.087, "step": 2077 }, { "epoch": 27.722408026755854, "grad_norm": 1.3765549659729004, "learning_rate": 0.0001078086763070078, "loss": 3.2595, "step": 2078 }, { "epoch": 27.735785953177256, "grad_norm": 1.646952748298645, "learning_rate": 0.00010776418242491657, "loss": 3.0627, "step": 2079 }, { "epoch": 27.74916387959866, "grad_norm": 1.2064934968948364, "learning_rate": 0.00010771968854282537, "loss": 3.0569, "step": 2080 }, { "epoch": 27.762541806020067, "grad_norm": 1.3027973175048828, "learning_rate": 0.00010767519466073415, "loss": 3.0246, "step": 2081 }, { "epoch": 27.775919732441473, "grad_norm": 1.4610519409179688, "learning_rate": 0.00010763070077864294, "loss": 3.0123, "step": 2082 }, { "epoch": 27.789297658862875, "grad_norm": 1.376183032989502, "learning_rate": 0.00010758620689655173, "loss": 3.1312, "step": 2083 }, { "epoch": 27.80267558528428, "grad_norm": 1.3715664148330688, "learning_rate": 0.00010754171301446052, "loss": 3.0104, "step": 2084 }, { "epoch": 27.816053511705686, "grad_norm": 1.2951513528823853, "learning_rate": 0.00010749721913236929, "loss": 3.0353, "step": 2085 }, { "epoch": 27.82943143812709, "grad_norm": 1.1614545583724976, "learning_rate": 0.0001074527252502781, "loss": 2.8977, "step": 2086 }, { "epoch": 27.842809364548494, "grad_norm": 1.3948044776916504, "learning_rate": 0.00010740823136818687, "loss": 2.8611, "step": 2087 }, { "epoch": 27.8561872909699, "grad_norm": 1.3053209781646729, "learning_rate": 0.00010736373748609568, "loss": 2.7965, "step": 2088 }, { "epoch": 27.869565217391305, "grad_norm": 1.289396047592163, "learning_rate": 0.00010731924360400445, "loss": 2.8663, "step": 2089 }, { "epoch": 27.88294314381271, "grad_norm": 1.4188008308410645, "learning_rate": 0.00010727474972191324, "loss": 3.0139, "step": 2090 }, { "epoch": 27.896321070234112, "grad_norm": 1.5196731090545654, "learning_rate": 0.00010723025583982203, "loss": 2.9593, "step": 2091 }, { "epoch": 27.909698996655518, "grad_norm": 1.5437039136886597, "learning_rate": 0.00010718576195773082, "loss": 2.8009, "step": 2092 }, { "epoch": 27.923076923076923, "grad_norm": 1.4248530864715576, "learning_rate": 0.0001071412680756396, "loss": 2.8469, "step": 2093 }, { "epoch": 27.93645484949833, "grad_norm": 1.4018962383270264, "learning_rate": 0.0001070967741935484, "loss": 3.0897, "step": 2094 }, { "epoch": 27.94983277591973, "grad_norm": 1.2483067512512207, "learning_rate": 0.00010705228031145717, "loss": 3.1738, "step": 2095 }, { "epoch": 27.963210702341136, "grad_norm": 1.25924813747406, "learning_rate": 0.00010700778642936598, "loss": 3.263, "step": 2096 }, { "epoch": 27.976588628762542, "grad_norm": 1.2528305053710938, "learning_rate": 0.00010696329254727475, "loss": 3.0374, "step": 2097 }, { "epoch": 27.989966555183948, "grad_norm": 1.7555912733078003, "learning_rate": 0.00010691879866518356, "loss": 3.1906, "step": 2098 }, { "epoch": 28.0, "grad_norm": 1.7080843448638916, "learning_rate": 0.00010687430478309233, "loss": 2.9748, "step": 2099 }, { "epoch": 28.013377926421406, "grad_norm": 1.120409369468689, "learning_rate": 0.00010682981090100112, "loss": 2.9385, "step": 2100 }, { "epoch": 28.02675585284281, "grad_norm": 0.8793032169342041, "learning_rate": 0.0001067853170189099, "loss": 2.9205, "step": 2101 }, { "epoch": 28.040133779264213, "grad_norm": 1.018744707107544, "learning_rate": 0.0001067408231368187, "loss": 3.1447, "step": 2102 }, { "epoch": 28.05351170568562, "grad_norm": 0.9645024538040161, "learning_rate": 0.00010669632925472748, "loss": 2.783, "step": 2103 }, { "epoch": 28.066889632107024, "grad_norm": 1.0723059177398682, "learning_rate": 0.00010665183537263628, "loss": 2.9627, "step": 2104 }, { "epoch": 28.08026755852843, "grad_norm": 1.0304062366485596, "learning_rate": 0.00010660734149054505, "loss": 2.8118, "step": 2105 }, { "epoch": 28.093645484949832, "grad_norm": 1.0894114971160889, "learning_rate": 0.00010656284760845386, "loss": 2.9643, "step": 2106 }, { "epoch": 28.107023411371237, "grad_norm": 1.1615228652954102, "learning_rate": 0.00010651835372636263, "loss": 2.9402, "step": 2107 }, { "epoch": 28.120401337792643, "grad_norm": 1.066375732421875, "learning_rate": 0.00010647385984427144, "loss": 2.8733, "step": 2108 }, { "epoch": 28.13377926421405, "grad_norm": 1.1981687545776367, "learning_rate": 0.0001064293659621802, "loss": 2.9301, "step": 2109 }, { "epoch": 28.14715719063545, "grad_norm": 1.0867598056793213, "learning_rate": 0.00010638487208008899, "loss": 3.1676, "step": 2110 }, { "epoch": 28.160535117056856, "grad_norm": 1.156256079673767, "learning_rate": 0.00010634037819799778, "loss": 2.9555, "step": 2111 }, { "epoch": 28.17391304347826, "grad_norm": 0.9931294322013855, "learning_rate": 0.00010629588431590655, "loss": 2.7119, "step": 2112 }, { "epoch": 28.187290969899667, "grad_norm": 0.9383543729782104, "learning_rate": 0.00010625139043381536, "loss": 2.9251, "step": 2113 }, { "epoch": 28.20066889632107, "grad_norm": 0.859805166721344, "learning_rate": 0.00010620689655172413, "loss": 2.8734, "step": 2114 }, { "epoch": 28.214046822742475, "grad_norm": 1.081536889076233, "learning_rate": 0.00010616240266963293, "loss": 3.0356, "step": 2115 }, { "epoch": 28.22742474916388, "grad_norm": 1.132004976272583, "learning_rate": 0.00010611790878754171, "loss": 3.1751, "step": 2116 }, { "epoch": 28.240802675585286, "grad_norm": 1.0406700372695923, "learning_rate": 0.0001060734149054505, "loss": 2.913, "step": 2117 }, { "epoch": 28.254180602006688, "grad_norm": 1.4362869262695312, "learning_rate": 0.00010602892102335929, "loss": 3.0823, "step": 2118 }, { "epoch": 28.267558528428093, "grad_norm": 1.0609610080718994, "learning_rate": 0.00010598442714126808, "loss": 3.0693, "step": 2119 }, { "epoch": 28.2809364548495, "grad_norm": 1.0952329635620117, "learning_rate": 0.00010593993325917686, "loss": 2.9953, "step": 2120 }, { "epoch": 28.294314381270905, "grad_norm": 1.0358316898345947, "learning_rate": 0.00010589543937708566, "loss": 2.7737, "step": 2121 }, { "epoch": 28.307692307692307, "grad_norm": 1.1332812309265137, "learning_rate": 0.00010585094549499443, "loss": 2.9874, "step": 2122 }, { "epoch": 28.321070234113712, "grad_norm": 1.0737558603286743, "learning_rate": 0.00010580645161290324, "loss": 3.0547, "step": 2123 }, { "epoch": 28.334448160535118, "grad_norm": 1.030110478401184, "learning_rate": 0.00010576195773081201, "loss": 2.9786, "step": 2124 }, { "epoch": 28.347826086956523, "grad_norm": 1.09152352809906, "learning_rate": 0.0001057174638487208, "loss": 2.6407, "step": 2125 }, { "epoch": 28.361204013377925, "grad_norm": 0.9880086183547974, "learning_rate": 0.00010567296996662959, "loss": 3.1367, "step": 2126 }, { "epoch": 28.37458193979933, "grad_norm": 1.1486655473709106, "learning_rate": 0.00010562847608453838, "loss": 3.1705, "step": 2127 }, { "epoch": 28.387959866220736, "grad_norm": 1.199541449546814, "learning_rate": 0.00010558398220244716, "loss": 3.0254, "step": 2128 }, { "epoch": 28.401337792642142, "grad_norm": 0.9470932483673096, "learning_rate": 0.00010553948832035596, "loss": 3.1296, "step": 2129 }, { "epoch": 28.414715719063544, "grad_norm": 1.1546344757080078, "learning_rate": 0.00010549499443826474, "loss": 2.9981, "step": 2130 }, { "epoch": 28.42809364548495, "grad_norm": 1.0525349378585815, "learning_rate": 0.00010545050055617354, "loss": 2.884, "step": 2131 }, { "epoch": 28.441471571906355, "grad_norm": 0.8965997695922852, "learning_rate": 0.00010540600667408231, "loss": 3.1666, "step": 2132 }, { "epoch": 28.45484949832776, "grad_norm": 1.0740344524383545, "learning_rate": 0.00010536151279199112, "loss": 2.9477, "step": 2133 }, { "epoch": 28.468227424749163, "grad_norm": 1.0058174133300781, "learning_rate": 0.00010531701890989989, "loss": 2.9355, "step": 2134 }, { "epoch": 28.48160535117057, "grad_norm": 1.0790162086486816, "learning_rate": 0.00010527252502780868, "loss": 2.8728, "step": 2135 }, { "epoch": 28.494983277591974, "grad_norm": 1.0231183767318726, "learning_rate": 0.00010522803114571746, "loss": 2.6208, "step": 2136 }, { "epoch": 28.50836120401338, "grad_norm": 1.1527316570281982, "learning_rate": 0.00010518353726362626, "loss": 3.0255, "step": 2137 }, { "epoch": 28.52173913043478, "grad_norm": 1.0380228757858276, "learning_rate": 0.00010513904338153504, "loss": 3.1382, "step": 2138 }, { "epoch": 28.535117056856187, "grad_norm": 1.1528582572937012, "learning_rate": 0.00010509454949944384, "loss": 2.9921, "step": 2139 }, { "epoch": 28.548494983277592, "grad_norm": 1.0610437393188477, "learning_rate": 0.00010505005561735262, "loss": 2.832, "step": 2140 }, { "epoch": 28.561872909698998, "grad_norm": 1.1252180337905884, "learning_rate": 0.00010500556173526142, "loss": 3.0318, "step": 2141 }, { "epoch": 28.5752508361204, "grad_norm": 0.910602331161499, "learning_rate": 0.00010496106785317019, "loss": 3.0174, "step": 2142 }, { "epoch": 28.588628762541806, "grad_norm": 1.1615829467773438, "learning_rate": 0.000104916573971079, "loss": 3.1465, "step": 2143 }, { "epoch": 28.60200668896321, "grad_norm": 1.0431264638900757, "learning_rate": 0.00010487208008898776, "loss": 2.9406, "step": 2144 }, { "epoch": 28.615384615384617, "grad_norm": 1.1530067920684814, "learning_rate": 0.00010482758620689656, "loss": 2.7672, "step": 2145 }, { "epoch": 28.62876254180602, "grad_norm": 1.0598475933074951, "learning_rate": 0.00010478309232480534, "loss": 3.0477, "step": 2146 }, { "epoch": 28.642140468227424, "grad_norm": 0.9107276797294617, "learning_rate": 0.00010473859844271414, "loss": 2.949, "step": 2147 }, { "epoch": 28.65551839464883, "grad_norm": 1.0739660263061523, "learning_rate": 0.00010469410456062292, "loss": 3.0195, "step": 2148 }, { "epoch": 28.668896321070235, "grad_norm": 1.0200115442276, "learning_rate": 0.00010464961067853172, "loss": 2.9086, "step": 2149 }, { "epoch": 28.682274247491637, "grad_norm": 0.9945818185806274, "learning_rate": 0.0001046051167964405, "loss": 3.3103, "step": 2150 }, { "epoch": 28.695652173913043, "grad_norm": 1.231123447418213, "learning_rate": 0.00010456062291434927, "loss": 2.9266, "step": 2151 }, { "epoch": 28.70903010033445, "grad_norm": 1.02529776096344, "learning_rate": 0.00010451612903225806, "loss": 2.8796, "step": 2152 }, { "epoch": 28.722408026755854, "grad_norm": 1.1735590696334839, "learning_rate": 0.00010447163515016685, "loss": 2.8582, "step": 2153 }, { "epoch": 28.735785953177256, "grad_norm": 1.0260053873062134, "learning_rate": 0.00010442714126807564, "loss": 3.1459, "step": 2154 }, { "epoch": 28.74916387959866, "grad_norm": 1.0473296642303467, "learning_rate": 0.00010438264738598442, "loss": 3.1804, "step": 2155 }, { "epoch": 28.762541806020067, "grad_norm": 1.0932636260986328, "learning_rate": 0.00010433815350389322, "loss": 2.8114, "step": 2156 }, { "epoch": 28.775919732441473, "grad_norm": 1.115605115890503, "learning_rate": 0.000104293659621802, "loss": 2.925, "step": 2157 }, { "epoch": 28.789297658862875, "grad_norm": 1.0479992628097534, "learning_rate": 0.0001042491657397108, "loss": 3.0461, "step": 2158 }, { "epoch": 28.80267558528428, "grad_norm": 0.9241504669189453, "learning_rate": 0.00010420467185761957, "loss": 3.0755, "step": 2159 }, { "epoch": 28.816053511705686, "grad_norm": 0.9609296917915344, "learning_rate": 0.00010416017797552838, "loss": 3.077, "step": 2160 }, { "epoch": 28.82943143812709, "grad_norm": 0.9561198949813843, "learning_rate": 0.00010411568409343715, "loss": 2.9041, "step": 2161 }, { "epoch": 28.842809364548494, "grad_norm": 1.1552175283432007, "learning_rate": 0.00010407119021134594, "loss": 2.8253, "step": 2162 }, { "epoch": 28.8561872909699, "grad_norm": 0.8945892453193665, "learning_rate": 0.00010402669632925472, "loss": 2.9228, "step": 2163 }, { "epoch": 28.869565217391305, "grad_norm": 1.0247336626052856, "learning_rate": 0.00010398220244716352, "loss": 2.6202, "step": 2164 }, { "epoch": 28.88294314381271, "grad_norm": 1.0500261783599854, "learning_rate": 0.0001039377085650723, "loss": 2.777, "step": 2165 }, { "epoch": 28.896321070234112, "grad_norm": 1.0250235795974731, "learning_rate": 0.0001038932146829811, "loss": 3.0606, "step": 2166 }, { "epoch": 28.909698996655518, "grad_norm": 1.0018290281295776, "learning_rate": 0.00010384872080088988, "loss": 3.0099, "step": 2167 }, { "epoch": 28.923076923076923, "grad_norm": 0.8957269191741943, "learning_rate": 0.00010380422691879868, "loss": 2.7081, "step": 2168 }, { "epoch": 28.93645484949833, "grad_norm": 1.060746431350708, "learning_rate": 0.00010375973303670745, "loss": 2.9771, "step": 2169 }, { "epoch": 28.94983277591973, "grad_norm": 0.9782920479774475, "learning_rate": 0.00010371523915461626, "loss": 3.035, "step": 2170 }, { "epoch": 28.963210702341136, "grad_norm": 0.9972925186157227, "learning_rate": 0.00010367074527252502, "loss": 3.0531, "step": 2171 }, { "epoch": 28.976588628762542, "grad_norm": 1.1463640928268433, "learning_rate": 0.00010362625139043382, "loss": 2.9485, "step": 2172 }, { "epoch": 28.989966555183948, "grad_norm": 1.0142980813980103, "learning_rate": 0.0001035817575083426, "loss": 2.8852, "step": 2173 }, { "epoch": 29.0, "grad_norm": 1.347244381904602, "learning_rate": 0.0001035372636262514, "loss": 2.898, "step": 2174 }, { "epoch": 29.013377926421406, "grad_norm": 0.9205636978149414, "learning_rate": 0.00010349276974416018, "loss": 2.7324, "step": 2175 }, { "epoch": 29.02675585284281, "grad_norm": 0.9123748540878296, "learning_rate": 0.00010344827586206898, "loss": 2.943, "step": 2176 }, { "epoch": 29.040133779264213, "grad_norm": 0.8594069480895996, "learning_rate": 0.00010340378197997776, "loss": 2.8531, "step": 2177 }, { "epoch": 29.05351170568562, "grad_norm": 0.7439792156219482, "learning_rate": 0.00010335928809788655, "loss": 2.521, "step": 2178 }, { "epoch": 29.066889632107024, "grad_norm": 0.7489947080612183, "learning_rate": 0.00010331479421579532, "loss": 2.9317, "step": 2179 }, { "epoch": 29.08026755852843, "grad_norm": 0.8311702013015747, "learning_rate": 0.00010327030033370413, "loss": 2.7137, "step": 2180 }, { "epoch": 29.093645484949832, "grad_norm": 0.9763099551200867, "learning_rate": 0.0001032258064516129, "loss": 2.7502, "step": 2181 }, { "epoch": 29.107023411371237, "grad_norm": 0.8277449607849121, "learning_rate": 0.0001031813125695217, "loss": 2.9792, "step": 2182 }, { "epoch": 29.120401337792643, "grad_norm": 0.7491191029548645, "learning_rate": 0.00010313681868743048, "loss": 2.8479, "step": 2183 }, { "epoch": 29.13377926421405, "grad_norm": 0.7314183115959167, "learning_rate": 0.00010309232480533928, "loss": 2.8962, "step": 2184 }, { "epoch": 29.14715719063545, "grad_norm": 0.8334509134292603, "learning_rate": 0.00010304783092324806, "loss": 2.86, "step": 2185 }, { "epoch": 29.160535117056856, "grad_norm": 0.9736613631248474, "learning_rate": 0.00010300333704115685, "loss": 2.9289, "step": 2186 }, { "epoch": 29.17391304347826, "grad_norm": 0.9118865132331848, "learning_rate": 0.00010295884315906564, "loss": 3.0865, "step": 2187 }, { "epoch": 29.187290969899667, "grad_norm": 0.992799699306488, "learning_rate": 0.00010291434927697443, "loss": 2.7811, "step": 2188 }, { "epoch": 29.20066889632107, "grad_norm": 1.0415345430374146, "learning_rate": 0.0001028698553948832, "loss": 2.7707, "step": 2189 }, { "epoch": 29.214046822742475, "grad_norm": 0.7599477171897888, "learning_rate": 0.00010282536151279201, "loss": 2.8162, "step": 2190 }, { "epoch": 29.22742474916388, "grad_norm": 1.17939293384552, "learning_rate": 0.00010278086763070078, "loss": 2.7939, "step": 2191 }, { "epoch": 29.240802675585286, "grad_norm": 0.8793815970420837, "learning_rate": 0.00010273637374860956, "loss": 2.7704, "step": 2192 }, { "epoch": 29.254180602006688, "grad_norm": 0.838680624961853, "learning_rate": 0.00010269187986651836, "loss": 2.8513, "step": 2193 }, { "epoch": 29.267558528428093, "grad_norm": 0.8918489217758179, "learning_rate": 0.00010264738598442714, "loss": 2.7748, "step": 2194 }, { "epoch": 29.2809364548495, "grad_norm": 0.7513167858123779, "learning_rate": 0.00010260289210233594, "loss": 2.8151, "step": 2195 }, { "epoch": 29.294314381270905, "grad_norm": 0.7921515703201294, "learning_rate": 0.0001025583982202447, "loss": 2.8004, "step": 2196 }, { "epoch": 29.307692307692307, "grad_norm": 0.7862170934677124, "learning_rate": 0.00010251390433815352, "loss": 2.9749, "step": 2197 }, { "epoch": 29.321070234113712, "grad_norm": 0.8512644171714783, "learning_rate": 0.00010246941045606228, "loss": 2.9936, "step": 2198 }, { "epoch": 29.334448160535118, "grad_norm": 0.9074665307998657, "learning_rate": 0.00010242491657397108, "loss": 2.814, "step": 2199 }, { "epoch": 29.347826086956523, "grad_norm": 0.9875670075416565, "learning_rate": 0.00010238042269187986, "loss": 2.9862, "step": 2200 }, { "epoch": 29.361204013377925, "grad_norm": 0.7445062398910522, "learning_rate": 0.00010233592880978866, "loss": 2.8233, "step": 2201 }, { "epoch": 29.37458193979933, "grad_norm": 0.8388944268226624, "learning_rate": 0.00010229143492769744, "loss": 2.8983, "step": 2202 }, { "epoch": 29.387959866220736, "grad_norm": 0.7882921099662781, "learning_rate": 0.00010224694104560624, "loss": 2.9521, "step": 2203 }, { "epoch": 29.401337792642142, "grad_norm": 0.8031525015830994, "learning_rate": 0.00010220244716351502, "loss": 2.8722, "step": 2204 }, { "epoch": 29.414715719063544, "grad_norm": 0.8128193616867065, "learning_rate": 0.00010215795328142381, "loss": 2.972, "step": 2205 }, { "epoch": 29.42809364548495, "grad_norm": 1.153869867324829, "learning_rate": 0.00010211345939933258, "loss": 2.8711, "step": 2206 }, { "epoch": 29.441471571906355, "grad_norm": 0.8942824602127075, "learning_rate": 0.0001020689655172414, "loss": 3.2308, "step": 2207 }, { "epoch": 29.45484949832776, "grad_norm": 0.8985738158226013, "learning_rate": 0.00010202447163515016, "loss": 2.8042, "step": 2208 }, { "epoch": 29.468227424749163, "grad_norm": 0.8202070593833923, "learning_rate": 0.00010197997775305896, "loss": 2.7193, "step": 2209 }, { "epoch": 29.48160535117057, "grad_norm": 0.7752913236618042, "learning_rate": 0.00010193548387096774, "loss": 2.8162, "step": 2210 }, { "epoch": 29.494983277591974, "grad_norm": 0.8399960994720459, "learning_rate": 0.00010189098998887654, "loss": 3.0488, "step": 2211 }, { "epoch": 29.50836120401338, "grad_norm": 0.9991989731788635, "learning_rate": 0.00010184649610678532, "loss": 2.8647, "step": 2212 }, { "epoch": 29.52173913043478, "grad_norm": 0.8456683158874512, "learning_rate": 0.00010180200222469411, "loss": 2.9635, "step": 2213 }, { "epoch": 29.535117056856187, "grad_norm": 0.7978901863098145, "learning_rate": 0.0001017575083426029, "loss": 2.8927, "step": 2214 }, { "epoch": 29.548494983277592, "grad_norm": 0.8560264706611633, "learning_rate": 0.00010171301446051169, "loss": 2.9403, "step": 2215 }, { "epoch": 29.561872909698998, "grad_norm": 0.7215510010719299, "learning_rate": 0.00010166852057842046, "loss": 2.7367, "step": 2216 }, { "epoch": 29.5752508361204, "grad_norm": 0.8395666480064392, "learning_rate": 0.00010162402669632927, "loss": 2.9857, "step": 2217 }, { "epoch": 29.588628762541806, "grad_norm": 0.9163089394569397, "learning_rate": 0.00010157953281423804, "loss": 2.9605, "step": 2218 }, { "epoch": 29.60200668896321, "grad_norm": 0.8586741089820862, "learning_rate": 0.00010153503893214684, "loss": 2.9543, "step": 2219 }, { "epoch": 29.615384615384617, "grad_norm": 0.7774202227592468, "learning_rate": 0.00010149054505005562, "loss": 3.0245, "step": 2220 }, { "epoch": 29.62876254180602, "grad_norm": 0.8288631439208984, "learning_rate": 0.00010144605116796441, "loss": 2.975, "step": 2221 }, { "epoch": 29.642140468227424, "grad_norm": 0.7886912226676941, "learning_rate": 0.0001014015572858732, "loss": 2.6679, "step": 2222 }, { "epoch": 29.65551839464883, "grad_norm": 0.7964534759521484, "learning_rate": 0.00010135706340378199, "loss": 2.9989, "step": 2223 }, { "epoch": 29.668896321070235, "grad_norm": 1.0234196186065674, "learning_rate": 0.00010131256952169078, "loss": 3.2245, "step": 2224 }, { "epoch": 29.682274247491637, "grad_norm": 0.8414390683174133, "learning_rate": 0.00010126807563959957, "loss": 3.0525, "step": 2225 }, { "epoch": 29.695652173913043, "grad_norm": 0.8034384250640869, "learning_rate": 0.00010122358175750834, "loss": 2.9012, "step": 2226 }, { "epoch": 29.70903010033445, "grad_norm": 0.7711749076843262, "learning_rate": 0.00010117908787541715, "loss": 2.7548, "step": 2227 }, { "epoch": 29.722408026755854, "grad_norm": 0.9712610840797424, "learning_rate": 0.00010113459399332592, "loss": 3.1162, "step": 2228 }, { "epoch": 29.735785953177256, "grad_norm": 0.8966991305351257, "learning_rate": 0.00010109010011123471, "loss": 3.0883, "step": 2229 }, { "epoch": 29.74916387959866, "grad_norm": 0.8569051027297974, "learning_rate": 0.0001010456062291435, "loss": 2.8909, "step": 2230 }, { "epoch": 29.762541806020067, "grad_norm": 0.9149661660194397, "learning_rate": 0.00010100111234705229, "loss": 2.7559, "step": 2231 }, { "epoch": 29.775919732441473, "grad_norm": 1.1889090538024902, "learning_rate": 0.00010095661846496107, "loss": 2.7872, "step": 2232 }, { "epoch": 29.789297658862875, "grad_norm": 0.8464928865432739, "learning_rate": 0.00010091212458286984, "loss": 2.8496, "step": 2233 }, { "epoch": 29.80267558528428, "grad_norm": 0.8210122585296631, "learning_rate": 0.00010086763070077865, "loss": 3.0782, "step": 2234 }, { "epoch": 29.816053511705686, "grad_norm": 0.7978618741035461, "learning_rate": 0.00010082313681868742, "loss": 2.9633, "step": 2235 }, { "epoch": 29.82943143812709, "grad_norm": 0.7863582968711853, "learning_rate": 0.00010077864293659622, "loss": 2.9536, "step": 2236 }, { "epoch": 29.842809364548494, "grad_norm": 0.9451448917388916, "learning_rate": 0.000100734149054505, "loss": 2.945, "step": 2237 }, { "epoch": 29.8561872909699, "grad_norm": 0.9478015303611755, "learning_rate": 0.0001006896551724138, "loss": 2.8408, "step": 2238 }, { "epoch": 29.869565217391305, "grad_norm": 0.804568886756897, "learning_rate": 0.00010064516129032258, "loss": 3.093, "step": 2239 }, { "epoch": 29.88294314381271, "grad_norm": 0.8370556235313416, "learning_rate": 0.00010060066740823137, "loss": 3.0561, "step": 2240 }, { "epoch": 29.896321070234112, "grad_norm": 0.9402264952659607, "learning_rate": 0.00010055617352614016, "loss": 3.0024, "step": 2241 }, { "epoch": 29.909698996655518, "grad_norm": 0.7789230942726135, "learning_rate": 0.00010051167964404895, "loss": 2.8388, "step": 2242 }, { "epoch": 29.923076923076923, "grad_norm": 0.8228744268417358, "learning_rate": 0.00010046718576195772, "loss": 3.1335, "step": 2243 }, { "epoch": 29.93645484949833, "grad_norm": 0.9612992405891418, "learning_rate": 0.00010042269187986653, "loss": 3.0355, "step": 2244 }, { "epoch": 29.94983277591973, "grad_norm": 0.989894688129425, "learning_rate": 0.0001003781979977753, "loss": 2.6879, "step": 2245 }, { "epoch": 29.963210702341136, "grad_norm": 0.7333267331123352, "learning_rate": 0.0001003337041156841, "loss": 3.004, "step": 2246 }, { "epoch": 29.976588628762542, "grad_norm": 0.802925705909729, "learning_rate": 0.00010028921023359288, "loss": 3.0526, "step": 2247 }, { "epoch": 29.989966555183948, "grad_norm": 0.8313626646995544, "learning_rate": 0.00010024471635150167, "loss": 2.554, "step": 2248 }, { "epoch": 30.0, "grad_norm": 1.1626179218292236, "learning_rate": 0.00010020022246941046, "loss": 3.0765, "step": 2249 }, { "epoch": 30.013377926421406, "grad_norm": 0.6676865220069885, "learning_rate": 0.00010015572858731925, "loss": 2.8693, "step": 2250 }, { "epoch": 30.02675585284281, "grad_norm": 0.719235897064209, "learning_rate": 0.00010011123470522804, "loss": 2.8247, "step": 2251 }, { "epoch": 30.040133779264213, "grad_norm": 0.8199754953384399, "learning_rate": 0.00010006674082313683, "loss": 3.0088, "step": 2252 }, { "epoch": 30.05351170568562, "grad_norm": 0.7587671279907227, "learning_rate": 0.0001000222469410456, "loss": 3.118, "step": 2253 }, { "epoch": 30.066889632107024, "grad_norm": 0.7287217974662781, "learning_rate": 9.99777530589544e-05, "loss": 2.6612, "step": 2254 }, { "epoch": 30.08026755852843, "grad_norm": 0.6484748125076294, "learning_rate": 9.993325917686318e-05, "loss": 3.0679, "step": 2255 }, { "epoch": 30.093645484949832, "grad_norm": 0.6975134015083313, "learning_rate": 9.988876529477197e-05, "loss": 2.9633, "step": 2256 }, { "epoch": 30.107023411371237, "grad_norm": 0.7293218970298767, "learning_rate": 9.984427141268076e-05, "loss": 2.9374, "step": 2257 }, { "epoch": 30.120401337792643, "grad_norm": 0.8367610573768616, "learning_rate": 9.979977753058954e-05, "loss": 2.8749, "step": 2258 }, { "epoch": 30.13377926421405, "grad_norm": 0.6851879954338074, "learning_rate": 9.975528364849834e-05, "loss": 2.4921, "step": 2259 }, { "epoch": 30.14715719063545, "grad_norm": 0.6916420459747314, "learning_rate": 9.971078976640712e-05, "loss": 2.6425, "step": 2260 }, { "epoch": 30.160535117056856, "grad_norm": 0.6615001559257507, "learning_rate": 9.966629588431591e-05, "loss": 2.6696, "step": 2261 }, { "epoch": 30.17391304347826, "grad_norm": 0.7402496933937073, "learning_rate": 9.96218020022247e-05, "loss": 2.7479, "step": 2262 }, { "epoch": 30.187290969899667, "grad_norm": 0.6824116110801697, "learning_rate": 9.957730812013348e-05, "loss": 2.427, "step": 2263 }, { "epoch": 30.20066889632107, "grad_norm": 0.6979151964187622, "learning_rate": 9.953281423804227e-05, "loss": 3.0677, "step": 2264 }, { "epoch": 30.214046822742475, "grad_norm": 0.7541396617889404, "learning_rate": 9.948832035595106e-05, "loss": 2.9202, "step": 2265 }, { "epoch": 30.22742474916388, "grad_norm": 0.671625554561615, "learning_rate": 9.944382647385985e-05, "loss": 2.6953, "step": 2266 }, { "epoch": 30.240802675585286, "grad_norm": 0.6994143128395081, "learning_rate": 9.939933259176863e-05, "loss": 2.877, "step": 2267 }, { "epoch": 30.254180602006688, "grad_norm": 0.7016029953956604, "learning_rate": 9.935483870967742e-05, "loss": 2.7883, "step": 2268 }, { "epoch": 30.267558528428093, "grad_norm": 0.7190927267074585, "learning_rate": 9.931034482758621e-05, "loss": 2.9066, "step": 2269 }, { "epoch": 30.2809364548495, "grad_norm": 0.8793424963951111, "learning_rate": 9.9265850945495e-05, "loss": 2.7433, "step": 2270 }, { "epoch": 30.294314381270905, "grad_norm": 0.7914022207260132, "learning_rate": 9.922135706340379e-05, "loss": 2.6283, "step": 2271 }, { "epoch": 30.307692307692307, "grad_norm": 0.7672899961471558, "learning_rate": 9.917686318131257e-05, "loss": 3.0767, "step": 2272 }, { "epoch": 30.321070234113712, "grad_norm": 0.7170990109443665, "learning_rate": 9.913236929922136e-05, "loss": 2.9666, "step": 2273 }, { "epoch": 30.334448160535118, "grad_norm": 0.7215063571929932, "learning_rate": 9.908787541713015e-05, "loss": 2.9646, "step": 2274 }, { "epoch": 30.347826086956523, "grad_norm": 0.6509780287742615, "learning_rate": 9.904338153503893e-05, "loss": 2.9134, "step": 2275 }, { "epoch": 30.361204013377925, "grad_norm": 0.6361657977104187, "learning_rate": 9.899888765294773e-05, "loss": 2.6955, "step": 2276 }, { "epoch": 30.37458193979933, "grad_norm": 0.6722748279571533, "learning_rate": 9.895439377085651e-05, "loss": 3.1694, "step": 2277 }, { "epoch": 30.387959866220736, "grad_norm": 0.796759307384491, "learning_rate": 9.89098998887653e-05, "loss": 2.9863, "step": 2278 }, { "epoch": 30.401337792642142, "grad_norm": 0.8303789496421814, "learning_rate": 9.886540600667409e-05, "loss": 2.6192, "step": 2279 }, { "epoch": 30.414715719063544, "grad_norm": 0.7565797567367554, "learning_rate": 9.882091212458287e-05, "loss": 2.8709, "step": 2280 }, { "epoch": 30.42809364548495, "grad_norm": 0.7000163197517395, "learning_rate": 9.877641824249167e-05, "loss": 2.9017, "step": 2281 }, { "epoch": 30.441471571906355, "grad_norm": 0.7144603729248047, "learning_rate": 9.873192436040045e-05, "loss": 2.9685, "step": 2282 }, { "epoch": 30.45484949832776, "grad_norm": 0.8616060018539429, "learning_rate": 9.868743047830923e-05, "loss": 2.554, "step": 2283 }, { "epoch": 30.468227424749163, "grad_norm": 0.6722779273986816, "learning_rate": 9.864293659621803e-05, "loss": 2.7132, "step": 2284 }, { "epoch": 30.48160535117057, "grad_norm": 0.8574376702308655, "learning_rate": 9.859844271412681e-05, "loss": 2.8295, "step": 2285 }, { "epoch": 30.494983277591974, "grad_norm": 0.6491283178329468, "learning_rate": 9.855394883203561e-05, "loss": 2.6042, "step": 2286 }, { "epoch": 30.50836120401338, "grad_norm": 0.677865743637085, "learning_rate": 9.850945494994439e-05, "loss": 2.9783, "step": 2287 }, { "epoch": 30.52173913043478, "grad_norm": 0.6842278242111206, "learning_rate": 9.846496106785317e-05, "loss": 2.8781, "step": 2288 }, { "epoch": 30.535117056856187, "grad_norm": 0.7441837787628174, "learning_rate": 9.842046718576197e-05, "loss": 2.73, "step": 2289 }, { "epoch": 30.548494983277592, "grad_norm": 0.8213462233543396, "learning_rate": 9.837597330367075e-05, "loss": 3.0742, "step": 2290 }, { "epoch": 30.561872909698998, "grad_norm": 0.7320848703384399, "learning_rate": 9.833147942157955e-05, "loss": 3.0144, "step": 2291 }, { "epoch": 30.5752508361204, "grad_norm": 0.6924611330032349, "learning_rate": 9.828698553948833e-05, "loss": 2.825, "step": 2292 }, { "epoch": 30.588628762541806, "grad_norm": 0.6979595422744751, "learning_rate": 9.824249165739711e-05, "loss": 2.5886, "step": 2293 }, { "epoch": 30.60200668896321, "grad_norm": 0.772278368473053, "learning_rate": 9.819799777530591e-05, "loss": 2.9113, "step": 2294 }, { "epoch": 30.615384615384617, "grad_norm": 0.7430849075317383, "learning_rate": 9.815350389321468e-05, "loss": 2.8571, "step": 2295 }, { "epoch": 30.62876254180602, "grad_norm": 0.7554115056991577, "learning_rate": 9.810901001112347e-05, "loss": 2.8272, "step": 2296 }, { "epoch": 30.642140468227424, "grad_norm": 0.8022170662879944, "learning_rate": 9.806451612903226e-05, "loss": 2.8113, "step": 2297 }, { "epoch": 30.65551839464883, "grad_norm": 0.7076969146728516, "learning_rate": 9.802002224694105e-05, "loss": 2.8282, "step": 2298 }, { "epoch": 30.668896321070235, "grad_norm": 0.8672876954078674, "learning_rate": 9.797552836484983e-05, "loss": 2.8059, "step": 2299 }, { "epoch": 30.682274247491637, "grad_norm": 0.7063407301902771, "learning_rate": 9.793103448275862e-05, "loss": 3.0516, "step": 2300 }, { "epoch": 30.695652173913043, "grad_norm": 0.6833634376525879, "learning_rate": 9.788654060066741e-05, "loss": 3.0762, "step": 2301 }, { "epoch": 30.70903010033445, "grad_norm": 0.7486119270324707, "learning_rate": 9.78420467185762e-05, "loss": 2.7423, "step": 2302 }, { "epoch": 30.722408026755854, "grad_norm": 0.7409520149230957, "learning_rate": 9.779755283648499e-05, "loss": 2.9579, "step": 2303 }, { "epoch": 30.735785953177256, "grad_norm": 0.7436200380325317, "learning_rate": 9.775305895439377e-05, "loss": 2.796, "step": 2304 }, { "epoch": 30.74916387959866, "grad_norm": 0.7009103298187256, "learning_rate": 9.770856507230256e-05, "loss": 2.9099, "step": 2305 }, { "epoch": 30.762541806020067, "grad_norm": 0.7946734428405762, "learning_rate": 9.766407119021135e-05, "loss": 3.014, "step": 2306 }, { "epoch": 30.775919732441473, "grad_norm": 0.7147461175918579, "learning_rate": 9.761957730812013e-05, "loss": 2.5407, "step": 2307 }, { "epoch": 30.789297658862875, "grad_norm": 0.7347055673599243, "learning_rate": 9.757508342602893e-05, "loss": 3.0734, "step": 2308 }, { "epoch": 30.80267558528428, "grad_norm": 0.7037932872772217, "learning_rate": 9.753058954393771e-05, "loss": 2.8854, "step": 2309 }, { "epoch": 30.816053511705686, "grad_norm": 0.6475211977958679, "learning_rate": 9.74860956618465e-05, "loss": 2.8844, "step": 2310 }, { "epoch": 30.82943143812709, "grad_norm": 0.7679370045661926, "learning_rate": 9.744160177975529e-05, "loss": 2.8667, "step": 2311 }, { "epoch": 30.842809364548494, "grad_norm": 0.7334820032119751, "learning_rate": 9.739710789766407e-05, "loss": 2.8936, "step": 2312 }, { "epoch": 30.8561872909699, "grad_norm": 0.6336019039154053, "learning_rate": 9.735261401557287e-05, "loss": 2.7476, "step": 2313 }, { "epoch": 30.869565217391305, "grad_norm": 0.6566322445869446, "learning_rate": 9.730812013348165e-05, "loss": 2.8259, "step": 2314 }, { "epoch": 30.88294314381271, "grad_norm": 0.6635501384735107, "learning_rate": 9.726362625139043e-05, "loss": 2.7668, "step": 2315 }, { "epoch": 30.896321070234112, "grad_norm": 0.7602748274803162, "learning_rate": 9.721913236929923e-05, "loss": 2.8645, "step": 2316 }, { "epoch": 30.909698996655518, "grad_norm": 0.8196545243263245, "learning_rate": 9.717463848720801e-05, "loss": 2.8413, "step": 2317 }, { "epoch": 30.923076923076923, "grad_norm": 0.6762588620185852, "learning_rate": 9.713014460511681e-05, "loss": 2.6832, "step": 2318 }, { "epoch": 30.93645484949833, "grad_norm": 0.665920078754425, "learning_rate": 9.708565072302559e-05, "loss": 3.0145, "step": 2319 }, { "epoch": 30.94983277591973, "grad_norm": 0.6335712671279907, "learning_rate": 9.704115684093437e-05, "loss": 2.9063, "step": 2320 }, { "epoch": 30.963210702341136, "grad_norm": 0.7016931176185608, "learning_rate": 9.699666295884317e-05, "loss": 2.7531, "step": 2321 }, { "epoch": 30.976588628762542, "grad_norm": 0.8611680865287781, "learning_rate": 9.695216907675195e-05, "loss": 2.9685, "step": 2322 }, { "epoch": 30.989966555183948, "grad_norm": 0.6724279522895813, "learning_rate": 9.690767519466075e-05, "loss": 2.8643, "step": 2323 }, { "epoch": 31.0, "grad_norm": 0.7072698473930359, "learning_rate": 9.686318131256953e-05, "loss": 2.8678, "step": 2324 }, { "epoch": 31.013377926421406, "grad_norm": 0.7127351760864258, "learning_rate": 9.681868743047831e-05, "loss": 2.7762, "step": 2325 }, { "epoch": 31.02675585284281, "grad_norm": 0.6769905686378479, "learning_rate": 9.677419354838711e-05, "loss": 2.3606, "step": 2326 }, { "epoch": 31.040133779264213, "grad_norm": 0.6301751732826233, "learning_rate": 9.672969966629589e-05, "loss": 2.8011, "step": 2327 }, { "epoch": 31.05351170568562, "grad_norm": 0.6441762447357178, "learning_rate": 9.668520578420469e-05, "loss": 2.902, "step": 2328 }, { "epoch": 31.066889632107024, "grad_norm": 0.6481513381004333, "learning_rate": 9.664071190211347e-05, "loss": 2.7218, "step": 2329 }, { "epoch": 31.08026755852843, "grad_norm": 0.5800172686576843, "learning_rate": 9.659621802002225e-05, "loss": 2.6825, "step": 2330 }, { "epoch": 31.093645484949832, "grad_norm": 0.574657142162323, "learning_rate": 9.655172413793105e-05, "loss": 3.0232, "step": 2331 }, { "epoch": 31.107023411371237, "grad_norm": 0.7543913125991821, "learning_rate": 9.650723025583983e-05, "loss": 2.8385, "step": 2332 }, { "epoch": 31.120401337792643, "grad_norm": 0.5946618914604187, "learning_rate": 9.646273637374862e-05, "loss": 2.9112, "step": 2333 }, { "epoch": 31.13377926421405, "grad_norm": 0.6467399001121521, "learning_rate": 9.641824249165741e-05, "loss": 2.6113, "step": 2334 }, { "epoch": 31.14715719063545, "grad_norm": 0.6605884432792664, "learning_rate": 9.637374860956619e-05, "loss": 2.6679, "step": 2335 }, { "epoch": 31.160535117056856, "grad_norm": 0.6213564276695251, "learning_rate": 9.632925472747497e-05, "loss": 2.7759, "step": 2336 }, { "epoch": 31.17391304347826, "grad_norm": 0.5951142907142639, "learning_rate": 9.628476084538375e-05, "loss": 2.7004, "step": 2337 }, { "epoch": 31.187290969899667, "grad_norm": 0.7088178396224976, "learning_rate": 9.624026696329255e-05, "loss": 2.6991, "step": 2338 }, { "epoch": 31.20066889632107, "grad_norm": 0.6114148497581482, "learning_rate": 9.619577308120133e-05, "loss": 2.8156, "step": 2339 }, { "epoch": 31.214046822742475, "grad_norm": 0.6434885859489441, "learning_rate": 9.615127919911013e-05, "loss": 2.7579, "step": 2340 }, { "epoch": 31.22742474916388, "grad_norm": 0.6285985708236694, "learning_rate": 9.610678531701891e-05, "loss": 2.748, "step": 2341 }, { "epoch": 31.240802675585286, "grad_norm": 0.6324411630630493, "learning_rate": 9.60622914349277e-05, "loss": 3.0152, "step": 2342 }, { "epoch": 31.254180602006688, "grad_norm": 0.6331474781036377, "learning_rate": 9.601779755283649e-05, "loss": 2.6819, "step": 2343 }, { "epoch": 31.267558528428093, "grad_norm": 0.6276957392692566, "learning_rate": 9.597330367074527e-05, "loss": 2.8349, "step": 2344 }, { "epoch": 31.2809364548495, "grad_norm": 0.6469705104827881, "learning_rate": 9.592880978865407e-05, "loss": 2.9038, "step": 2345 }, { "epoch": 31.294314381270905, "grad_norm": 0.6182751059532166, "learning_rate": 9.588431590656285e-05, "loss": 2.6157, "step": 2346 }, { "epoch": 31.307692307692307, "grad_norm": 0.6365599632263184, "learning_rate": 9.583982202447163e-05, "loss": 2.6948, "step": 2347 }, { "epoch": 31.321070234113712, "grad_norm": 0.6663181781768799, "learning_rate": 9.579532814238043e-05, "loss": 2.7856, "step": 2348 }, { "epoch": 31.334448160535118, "grad_norm": 0.6316637396812439, "learning_rate": 9.575083426028921e-05, "loss": 2.7797, "step": 2349 }, { "epoch": 31.347826086956523, "grad_norm": 0.6328778862953186, "learning_rate": 9.570634037819801e-05, "loss": 2.856, "step": 2350 }, { "epoch": 31.361204013377925, "grad_norm": 0.6759666204452515, "learning_rate": 9.566184649610679e-05, "loss": 2.8553, "step": 2351 }, { "epoch": 31.37458193979933, "grad_norm": 0.6212480664253235, "learning_rate": 9.561735261401557e-05, "loss": 2.9275, "step": 2352 }, { "epoch": 31.387959866220736, "grad_norm": 0.6709821224212646, "learning_rate": 9.557285873192437e-05, "loss": 2.6805, "step": 2353 }, { "epoch": 31.401337792642142, "grad_norm": 0.883128821849823, "learning_rate": 9.552836484983315e-05, "loss": 2.9176, "step": 2354 }, { "epoch": 31.414715719063544, "grad_norm": 0.6021890640258789, "learning_rate": 9.548387096774195e-05, "loss": 2.9855, "step": 2355 }, { "epoch": 31.42809364548495, "grad_norm": 0.6398679614067078, "learning_rate": 9.543937708565073e-05, "loss": 2.6652, "step": 2356 }, { "epoch": 31.441471571906355, "grad_norm": 0.6757022738456726, "learning_rate": 9.539488320355951e-05, "loss": 2.7315, "step": 2357 }, { "epoch": 31.45484949832776, "grad_norm": 0.6144642233848572, "learning_rate": 9.53503893214683e-05, "loss": 2.9645, "step": 2358 }, { "epoch": 31.468227424749163, "grad_norm": 0.6620282530784607, "learning_rate": 9.530589543937709e-05, "loss": 2.6869, "step": 2359 }, { "epoch": 31.48160535117057, "grad_norm": 0.7360846996307373, "learning_rate": 9.526140155728588e-05, "loss": 2.8083, "step": 2360 }, { "epoch": 31.494983277591974, "grad_norm": 0.6184893250465393, "learning_rate": 9.521690767519467e-05, "loss": 2.9699, "step": 2361 }, { "epoch": 31.50836120401338, "grad_norm": 0.5998436808586121, "learning_rate": 9.517241379310345e-05, "loss": 2.6591, "step": 2362 }, { "epoch": 31.52173913043478, "grad_norm": 0.6120555996894836, "learning_rate": 9.512791991101225e-05, "loss": 3.0051, "step": 2363 }, { "epoch": 31.535117056856187, "grad_norm": 0.6484112739562988, "learning_rate": 9.508342602892103e-05, "loss": 2.8998, "step": 2364 }, { "epoch": 31.548494983277592, "grad_norm": 0.7333430051803589, "learning_rate": 9.503893214682982e-05, "loss": 2.8552, "step": 2365 }, { "epoch": 31.561872909698998, "grad_norm": 0.6599973440170288, "learning_rate": 9.49944382647386e-05, "loss": 2.8119, "step": 2366 }, { "epoch": 31.5752508361204, "grad_norm": 0.6654923558235168, "learning_rate": 9.494994438264739e-05, "loss": 2.6518, "step": 2367 }, { "epoch": 31.588628762541806, "grad_norm": 0.6521298289299011, "learning_rate": 9.490545050055618e-05, "loss": 2.9758, "step": 2368 }, { "epoch": 31.60200668896321, "grad_norm": 0.6431354284286499, "learning_rate": 9.486095661846497e-05, "loss": 2.7054, "step": 2369 }, { "epoch": 31.615384615384617, "grad_norm": 0.7166205644607544, "learning_rate": 9.481646273637376e-05, "loss": 2.8292, "step": 2370 }, { "epoch": 31.62876254180602, "grad_norm": 0.6184803247451782, "learning_rate": 9.477196885428255e-05, "loss": 2.8888, "step": 2371 }, { "epoch": 31.642140468227424, "grad_norm": 0.6148157119750977, "learning_rate": 9.472747497219133e-05, "loss": 3.0765, "step": 2372 }, { "epoch": 31.65551839464883, "grad_norm": 0.5624946355819702, "learning_rate": 9.468298109010012e-05, "loss": 2.4355, "step": 2373 }, { "epoch": 31.668896321070235, "grad_norm": 0.6958364844322205, "learning_rate": 9.46384872080089e-05, "loss": 2.7913, "step": 2374 }, { "epoch": 31.682274247491637, "grad_norm": 0.5998165011405945, "learning_rate": 9.45939933259177e-05, "loss": 2.7362, "step": 2375 }, { "epoch": 31.695652173913043, "grad_norm": 0.7454515695571899, "learning_rate": 9.454949944382648e-05, "loss": 2.6624, "step": 2376 }, { "epoch": 31.70903010033445, "grad_norm": 0.7100658416748047, "learning_rate": 9.450500556173527e-05, "loss": 3.0165, "step": 2377 }, { "epoch": 31.722408026755854, "grad_norm": 0.6329564452171326, "learning_rate": 9.446051167964405e-05, "loss": 2.6719, "step": 2378 }, { "epoch": 31.735785953177256, "grad_norm": 0.6892597675323486, "learning_rate": 9.441601779755283e-05, "loss": 2.9397, "step": 2379 }, { "epoch": 31.74916387959866, "grad_norm": 0.7385604381561279, "learning_rate": 9.437152391546163e-05, "loss": 2.8576, "step": 2380 }, { "epoch": 31.762541806020067, "grad_norm": 0.6018693447113037, "learning_rate": 9.432703003337041e-05, "loss": 2.6954, "step": 2381 }, { "epoch": 31.775919732441473, "grad_norm": 0.8231506943702698, "learning_rate": 9.42825361512792e-05, "loss": 2.6043, "step": 2382 }, { "epoch": 31.789297658862875, "grad_norm": 0.6664673686027527, "learning_rate": 9.423804226918799e-05, "loss": 2.6166, "step": 2383 }, { "epoch": 31.80267558528428, "grad_norm": 0.6987932324409485, "learning_rate": 9.419354838709677e-05, "loss": 2.8913, "step": 2384 }, { "epoch": 31.816053511705686, "grad_norm": 0.6309788823127747, "learning_rate": 9.414905450500557e-05, "loss": 2.9431, "step": 2385 }, { "epoch": 31.82943143812709, "grad_norm": 0.6008022427558899, "learning_rate": 9.410456062291435e-05, "loss": 2.8104, "step": 2386 }, { "epoch": 31.842809364548494, "grad_norm": 0.5797815918922424, "learning_rate": 9.406006674082315e-05, "loss": 2.685, "step": 2387 }, { "epoch": 31.8561872909699, "grad_norm": 0.6194100379943848, "learning_rate": 9.401557285873193e-05, "loss": 2.9663, "step": 2388 }, { "epoch": 31.869565217391305, "grad_norm": 0.633310079574585, "learning_rate": 9.397107897664071e-05, "loss": 2.9512, "step": 2389 }, { "epoch": 31.88294314381271, "grad_norm": 0.6811971664428711, "learning_rate": 9.39265850945495e-05, "loss": 2.8803, "step": 2390 }, { "epoch": 31.896321070234112, "grad_norm": 0.6683096885681152, "learning_rate": 9.388209121245829e-05, "loss": 2.8334, "step": 2391 }, { "epoch": 31.909698996655518, "grad_norm": 0.6561327576637268, "learning_rate": 9.383759733036708e-05, "loss": 2.7997, "step": 2392 }, { "epoch": 31.923076923076923, "grad_norm": 0.7540359497070312, "learning_rate": 9.379310344827587e-05, "loss": 2.9634, "step": 2393 }, { "epoch": 31.93645484949833, "grad_norm": 0.6072854399681091, "learning_rate": 9.374860956618465e-05, "loss": 2.8317, "step": 2394 }, { "epoch": 31.94983277591973, "grad_norm": 0.6614176630973816, "learning_rate": 9.370411568409344e-05, "loss": 2.9773, "step": 2395 }, { "epoch": 31.963210702341136, "grad_norm": 0.6928074955940247, "learning_rate": 9.365962180200223e-05, "loss": 2.8665, "step": 2396 }, { "epoch": 31.976588628762542, "grad_norm": 0.644250750541687, "learning_rate": 9.361512791991102e-05, "loss": 2.7206, "step": 2397 }, { "epoch": 31.989966555183948, "grad_norm": 0.6529290080070496, "learning_rate": 9.35706340378198e-05, "loss": 2.7764, "step": 2398 }, { "epoch": 32.0, "grad_norm": 0.7077373266220093, "learning_rate": 9.352614015572859e-05, "loss": 2.9435, "step": 2399 }, { "epoch": 32.013377926421406, "grad_norm": 0.614982545375824, "learning_rate": 9.348164627363738e-05, "loss": 2.9358, "step": 2400 }, { "epoch": 32.02675585284281, "grad_norm": 0.6013137102127075, "learning_rate": 9.343715239154617e-05, "loss": 2.7346, "step": 2401 }, { "epoch": 32.04013377926422, "grad_norm": 0.653290867805481, "learning_rate": 9.339265850945496e-05, "loss": 2.9841, "step": 2402 }, { "epoch": 32.05351170568562, "grad_norm": 0.6256197094917297, "learning_rate": 9.334816462736374e-05, "loss": 2.8465, "step": 2403 }, { "epoch": 32.06688963210702, "grad_norm": 0.5798116326332092, "learning_rate": 9.330367074527253e-05, "loss": 2.5661, "step": 2404 }, { "epoch": 32.080267558528426, "grad_norm": 0.6489708423614502, "learning_rate": 9.325917686318132e-05, "loss": 2.8186, "step": 2405 }, { "epoch": 32.09364548494983, "grad_norm": 0.6225152015686035, "learning_rate": 9.32146829810901e-05, "loss": 2.6598, "step": 2406 }, { "epoch": 32.10702341137124, "grad_norm": 0.6490108966827393, "learning_rate": 9.31701890989989e-05, "loss": 2.7589, "step": 2407 }, { "epoch": 32.12040133779264, "grad_norm": 0.5600370764732361, "learning_rate": 9.312569521690768e-05, "loss": 2.6324, "step": 2408 }, { "epoch": 32.13377926421405, "grad_norm": 0.6141876578330994, "learning_rate": 9.308120133481647e-05, "loss": 2.7371, "step": 2409 }, { "epoch": 32.147157190635454, "grad_norm": 0.5927343368530273, "learning_rate": 9.303670745272526e-05, "loss": 2.9498, "step": 2410 }, { "epoch": 32.16053511705686, "grad_norm": 0.686123788356781, "learning_rate": 9.299221357063404e-05, "loss": 2.698, "step": 2411 }, { "epoch": 32.17391304347826, "grad_norm": 0.6357244253158569, "learning_rate": 9.294771968854284e-05, "loss": 2.7431, "step": 2412 }, { "epoch": 32.187290969899664, "grad_norm": 0.6475897431373596, "learning_rate": 9.290322580645162e-05, "loss": 2.7808, "step": 2413 }, { "epoch": 32.20066889632107, "grad_norm": 0.5825409293174744, "learning_rate": 9.28587319243604e-05, "loss": 2.6867, "step": 2414 }, { "epoch": 32.214046822742475, "grad_norm": 0.6462129354476929, "learning_rate": 9.28142380422692e-05, "loss": 2.5075, "step": 2415 }, { "epoch": 32.22742474916388, "grad_norm": 0.6217963099479675, "learning_rate": 9.276974416017798e-05, "loss": 3.1276, "step": 2416 }, { "epoch": 32.240802675585286, "grad_norm": 1.1910638809204102, "learning_rate": 9.272525027808678e-05, "loss": 2.7246, "step": 2417 }, { "epoch": 32.25418060200669, "grad_norm": 0.6929856538772583, "learning_rate": 9.268075639599556e-05, "loss": 2.9576, "step": 2418 }, { "epoch": 32.2675585284281, "grad_norm": 0.6331362128257751, "learning_rate": 9.263626251390433e-05, "loss": 2.7267, "step": 2419 }, { "epoch": 32.280936454849495, "grad_norm": 0.6223586797714233, "learning_rate": 9.259176863181313e-05, "loss": 2.5904, "step": 2420 }, { "epoch": 32.2943143812709, "grad_norm": 0.6478082537651062, "learning_rate": 9.254727474972191e-05, "loss": 2.7874, "step": 2421 }, { "epoch": 32.30769230769231, "grad_norm": 0.6407091617584229, "learning_rate": 9.25027808676307e-05, "loss": 2.7926, "step": 2422 }, { "epoch": 32.32107023411371, "grad_norm": 0.5770341753959656, "learning_rate": 9.245828698553949e-05, "loss": 2.9566, "step": 2423 }, { "epoch": 32.33444816053512, "grad_norm": 0.6170133948326111, "learning_rate": 9.241379310344827e-05, "loss": 2.8071, "step": 2424 }, { "epoch": 32.34782608695652, "grad_norm": 0.6636959910392761, "learning_rate": 9.236929922135707e-05, "loss": 2.8502, "step": 2425 }, { "epoch": 32.36120401337793, "grad_norm": 0.5865075588226318, "learning_rate": 9.232480533926585e-05, "loss": 2.9213, "step": 2426 }, { "epoch": 32.374581939799334, "grad_norm": 0.6461585760116577, "learning_rate": 9.228031145717464e-05, "loss": 2.7773, "step": 2427 }, { "epoch": 32.38795986622073, "grad_norm": 0.6047717928886414, "learning_rate": 9.223581757508343e-05, "loss": 2.7859, "step": 2428 }, { "epoch": 32.40133779264214, "grad_norm": 0.6110160946846008, "learning_rate": 9.219132369299221e-05, "loss": 3.0866, "step": 2429 }, { "epoch": 32.414715719063544, "grad_norm": 0.6165122389793396, "learning_rate": 9.2146829810901e-05, "loss": 2.5561, "step": 2430 }, { "epoch": 32.42809364548495, "grad_norm": 0.6536113023757935, "learning_rate": 9.210233592880979e-05, "loss": 2.6207, "step": 2431 }, { "epoch": 32.441471571906355, "grad_norm": 0.6606733202934265, "learning_rate": 9.205784204671858e-05, "loss": 2.8492, "step": 2432 }, { "epoch": 32.45484949832776, "grad_norm": 0.5955353379249573, "learning_rate": 9.201334816462737e-05, "loss": 2.517, "step": 2433 }, { "epoch": 32.468227424749166, "grad_norm": 0.6918089985847473, "learning_rate": 9.196885428253615e-05, "loss": 2.7207, "step": 2434 }, { "epoch": 32.48160535117057, "grad_norm": 0.6295514702796936, "learning_rate": 9.192436040044494e-05, "loss": 2.8519, "step": 2435 }, { "epoch": 32.49498327759197, "grad_norm": 0.5654889345169067, "learning_rate": 9.187986651835373e-05, "loss": 2.6741, "step": 2436 }, { "epoch": 32.508361204013376, "grad_norm": 0.6156261563301086, "learning_rate": 9.183537263626252e-05, "loss": 2.8508, "step": 2437 }, { "epoch": 32.52173913043478, "grad_norm": 0.578255832195282, "learning_rate": 9.17908787541713e-05, "loss": 2.7162, "step": 2438 }, { "epoch": 32.53511705685619, "grad_norm": 0.6073182225227356, "learning_rate": 9.174638487208009e-05, "loss": 2.9915, "step": 2439 }, { "epoch": 32.54849498327759, "grad_norm": 0.6045039892196655, "learning_rate": 9.170189098998888e-05, "loss": 2.7507, "step": 2440 }, { "epoch": 32.561872909699, "grad_norm": 0.6131772398948669, "learning_rate": 9.165739710789767e-05, "loss": 2.8352, "step": 2441 }, { "epoch": 32.575250836120404, "grad_norm": 0.5804582834243774, "learning_rate": 9.161290322580646e-05, "loss": 2.844, "step": 2442 }, { "epoch": 32.58862876254181, "grad_norm": 0.6363027095794678, "learning_rate": 9.156840934371524e-05, "loss": 2.7884, "step": 2443 }, { "epoch": 32.60200668896321, "grad_norm": 0.599229097366333, "learning_rate": 9.152391546162403e-05, "loss": 2.5051, "step": 2444 }, { "epoch": 32.61538461538461, "grad_norm": 0.6206624507904053, "learning_rate": 9.147942157953282e-05, "loss": 2.9053, "step": 2445 }, { "epoch": 32.62876254180602, "grad_norm": 0.6836562752723694, "learning_rate": 9.14349276974416e-05, "loss": 2.8332, "step": 2446 }, { "epoch": 32.642140468227424, "grad_norm": 0.6113333702087402, "learning_rate": 9.13904338153504e-05, "loss": 2.8872, "step": 2447 }, { "epoch": 32.65551839464883, "grad_norm": 0.5945793986320496, "learning_rate": 9.134593993325918e-05, "loss": 2.7071, "step": 2448 }, { "epoch": 32.668896321070235, "grad_norm": 0.647544264793396, "learning_rate": 9.130144605116796e-05, "loss": 2.7016, "step": 2449 }, { "epoch": 32.68227424749164, "grad_norm": 0.5891870856285095, "learning_rate": 9.125695216907676e-05, "loss": 2.7178, "step": 2450 }, { "epoch": 32.69565217391305, "grad_norm": 0.5718060731887817, "learning_rate": 9.121245828698554e-05, "loss": 2.5202, "step": 2451 }, { "epoch": 32.709030100334445, "grad_norm": 0.6582256555557251, "learning_rate": 9.116796440489434e-05, "loss": 2.9955, "step": 2452 }, { "epoch": 32.72240802675585, "grad_norm": 0.6060263514518738, "learning_rate": 9.112347052280312e-05, "loss": 2.7279, "step": 2453 }, { "epoch": 32.735785953177256, "grad_norm": 0.6045027375221252, "learning_rate": 9.10789766407119e-05, "loss": 2.7305, "step": 2454 }, { "epoch": 32.74916387959866, "grad_norm": 0.5832952857017517, "learning_rate": 9.10344827586207e-05, "loss": 2.8703, "step": 2455 }, { "epoch": 32.76254180602007, "grad_norm": 0.5899837613105774, "learning_rate": 9.098998887652948e-05, "loss": 2.5243, "step": 2456 }, { "epoch": 32.77591973244147, "grad_norm": 0.6895177960395813, "learning_rate": 9.094549499443828e-05, "loss": 2.7974, "step": 2457 }, { "epoch": 32.78929765886288, "grad_norm": 0.5995526909828186, "learning_rate": 9.090100111234706e-05, "loss": 2.6908, "step": 2458 }, { "epoch": 32.802675585284284, "grad_norm": 0.592761754989624, "learning_rate": 9.085650723025584e-05, "loss": 2.7314, "step": 2459 }, { "epoch": 32.81605351170568, "grad_norm": 0.5758498311042786, "learning_rate": 9.081201334816463e-05, "loss": 2.5282, "step": 2460 }, { "epoch": 32.82943143812709, "grad_norm": 0.6093677282333374, "learning_rate": 9.076751946607341e-05, "loss": 2.9135, "step": 2461 }, { "epoch": 32.84280936454849, "grad_norm": 0.617378830909729, "learning_rate": 9.07230255839822e-05, "loss": 2.6457, "step": 2462 }, { "epoch": 32.8561872909699, "grad_norm": 0.5618733763694763, "learning_rate": 9.067853170189099e-05, "loss": 2.4018, "step": 2463 }, { "epoch": 32.869565217391305, "grad_norm": 0.6228212714195251, "learning_rate": 9.063403781979978e-05, "loss": 2.7449, "step": 2464 }, { "epoch": 32.88294314381271, "grad_norm": 0.6647076606750488, "learning_rate": 9.058954393770856e-05, "loss": 2.7208, "step": 2465 }, { "epoch": 32.896321070234116, "grad_norm": 0.6458041071891785, "learning_rate": 9.054505005561735e-05, "loss": 2.9473, "step": 2466 }, { "epoch": 32.90969899665552, "grad_norm": 0.5941137671470642, "learning_rate": 9.050055617352614e-05, "loss": 2.807, "step": 2467 }, { "epoch": 32.92307692307692, "grad_norm": 0.7032221555709839, "learning_rate": 9.045606229143493e-05, "loss": 2.7777, "step": 2468 }, { "epoch": 32.936454849498325, "grad_norm": 0.5803418755531311, "learning_rate": 9.041156840934372e-05, "loss": 2.7822, "step": 2469 }, { "epoch": 32.94983277591973, "grad_norm": 0.5971820950508118, "learning_rate": 9.03670745272525e-05, "loss": 2.8719, "step": 2470 }, { "epoch": 32.96321070234114, "grad_norm": 0.6287712454795837, "learning_rate": 9.032258064516129e-05, "loss": 2.9398, "step": 2471 }, { "epoch": 32.97658862876254, "grad_norm": 0.5914057493209839, "learning_rate": 9.027808676307008e-05, "loss": 2.8867, "step": 2472 }, { "epoch": 32.98996655518395, "grad_norm": 0.5863922834396362, "learning_rate": 9.023359288097886e-05, "loss": 2.8591, "step": 2473 }, { "epoch": 33.0, "grad_norm": 0.7483989000320435, "learning_rate": 9.018909899888766e-05, "loss": 2.6477, "step": 2474 }, { "epoch": 33.013377926421406, "grad_norm": 0.6205896139144897, "learning_rate": 9.014460511679644e-05, "loss": 2.5337, "step": 2475 }, { "epoch": 33.02675585284281, "grad_norm": 0.5723957419395447, "learning_rate": 9.010011123470522e-05, "loss": 2.5932, "step": 2476 }, { "epoch": 33.04013377926422, "grad_norm": 0.5575253963470459, "learning_rate": 9.005561735261402e-05, "loss": 2.6566, "step": 2477 }, { "epoch": 33.05351170568562, "grad_norm": 0.5518794655799866, "learning_rate": 9.00111234705228e-05, "loss": 2.6017, "step": 2478 }, { "epoch": 33.06688963210702, "grad_norm": 0.5580832958221436, "learning_rate": 8.99666295884316e-05, "loss": 2.5346, "step": 2479 }, { "epoch": 33.080267558528426, "grad_norm": 0.5831477046012878, "learning_rate": 8.992213570634038e-05, "loss": 2.4491, "step": 2480 }, { "epoch": 33.09364548494983, "grad_norm": 0.5773186683654785, "learning_rate": 8.987764182424916e-05, "loss": 2.6822, "step": 2481 }, { "epoch": 33.10702341137124, "grad_norm": 0.63813316822052, "learning_rate": 8.983314794215796e-05, "loss": 2.6537, "step": 2482 }, { "epoch": 33.12040133779264, "grad_norm": 0.6081259250640869, "learning_rate": 8.978865406006674e-05, "loss": 2.6482, "step": 2483 }, { "epoch": 33.13377926421405, "grad_norm": 0.5737481117248535, "learning_rate": 8.974416017797554e-05, "loss": 2.6858, "step": 2484 }, { "epoch": 33.147157190635454, "grad_norm": 0.6300746202468872, "learning_rate": 8.969966629588432e-05, "loss": 2.7791, "step": 2485 }, { "epoch": 33.16053511705686, "grad_norm": 0.6003954410552979, "learning_rate": 8.96551724137931e-05, "loss": 2.8088, "step": 2486 }, { "epoch": 33.17391304347826, "grad_norm": 0.5618523955345154, "learning_rate": 8.96106785317019e-05, "loss": 2.6345, "step": 2487 }, { "epoch": 33.187290969899664, "grad_norm": 0.6398965120315552, "learning_rate": 8.956618464961068e-05, "loss": 2.7798, "step": 2488 }, { "epoch": 33.20066889632107, "grad_norm": 0.6220288276672363, "learning_rate": 8.952169076751948e-05, "loss": 2.8217, "step": 2489 }, { "epoch": 33.214046822742475, "grad_norm": 0.6096678376197815, "learning_rate": 8.947719688542826e-05, "loss": 2.6255, "step": 2490 }, { "epoch": 33.22742474916388, "grad_norm": 0.5997774600982666, "learning_rate": 8.943270300333704e-05, "loss": 2.8361, "step": 2491 }, { "epoch": 33.240802675585286, "grad_norm": 0.6017264127731323, "learning_rate": 8.938820912124584e-05, "loss": 2.7003, "step": 2492 }, { "epoch": 33.25418060200669, "grad_norm": 0.6213170289993286, "learning_rate": 8.934371523915462e-05, "loss": 2.8992, "step": 2493 }, { "epoch": 33.2675585284281, "grad_norm": 0.6049240231513977, "learning_rate": 8.929922135706342e-05, "loss": 2.79, "step": 2494 }, { "epoch": 33.280936454849495, "grad_norm": 0.6120469570159912, "learning_rate": 8.92547274749722e-05, "loss": 2.7695, "step": 2495 }, { "epoch": 33.2943143812709, "grad_norm": 0.5764347314834595, "learning_rate": 8.921023359288098e-05, "loss": 2.5934, "step": 2496 }, { "epoch": 33.30769230769231, "grad_norm": 0.5952526926994324, "learning_rate": 8.916573971078978e-05, "loss": 2.8907, "step": 2497 }, { "epoch": 33.32107023411371, "grad_norm": 0.5542176961898804, "learning_rate": 8.912124582869856e-05, "loss": 2.6922, "step": 2498 }, { "epoch": 33.33444816053512, "grad_norm": 0.622314453125, "learning_rate": 8.907675194660736e-05, "loss": 2.7322, "step": 2499 }, { "epoch": 33.34782608695652, "grad_norm": 0.5831202864646912, "learning_rate": 8.903225806451614e-05, "loss": 2.766, "step": 2500 }, { "epoch": 33.36120401337793, "grad_norm": 0.6198363900184631, "learning_rate": 8.898776418242492e-05, "loss": 3.0431, "step": 2501 }, { "epoch": 33.374581939799334, "grad_norm": 0.6069219708442688, "learning_rate": 8.89432703003337e-05, "loss": 2.8924, "step": 2502 }, { "epoch": 33.38795986622073, "grad_norm": 0.6229302287101746, "learning_rate": 8.889877641824249e-05, "loss": 2.807, "step": 2503 }, { "epoch": 33.40133779264214, "grad_norm": 0.6178746819496155, "learning_rate": 8.885428253615128e-05, "loss": 2.7833, "step": 2504 }, { "epoch": 33.414715719063544, "grad_norm": 0.5880036950111389, "learning_rate": 8.880978865406006e-05, "loss": 2.7688, "step": 2505 }, { "epoch": 33.42809364548495, "grad_norm": 0.6083427667617798, "learning_rate": 8.876529477196886e-05, "loss": 2.5995, "step": 2506 }, { "epoch": 33.441471571906355, "grad_norm": 0.5846229195594788, "learning_rate": 8.872080088987764e-05, "loss": 2.7198, "step": 2507 }, { "epoch": 33.45484949832776, "grad_norm": 0.602645754814148, "learning_rate": 8.867630700778642e-05, "loss": 2.8395, "step": 2508 }, { "epoch": 33.468227424749166, "grad_norm": 0.5731549859046936, "learning_rate": 8.863181312569522e-05, "loss": 2.6083, "step": 2509 }, { "epoch": 33.48160535117057, "grad_norm": 0.6011414527893066, "learning_rate": 8.8587319243604e-05, "loss": 2.6036, "step": 2510 }, { "epoch": 33.49498327759197, "grad_norm": 0.5945054292678833, "learning_rate": 8.85428253615128e-05, "loss": 2.8461, "step": 2511 }, { "epoch": 33.508361204013376, "grad_norm": 0.6700778007507324, "learning_rate": 8.849833147942158e-05, "loss": 2.9723, "step": 2512 }, { "epoch": 33.52173913043478, "grad_norm": 0.5661737322807312, "learning_rate": 8.845383759733036e-05, "loss": 2.5432, "step": 2513 }, { "epoch": 33.53511705685619, "grad_norm": 0.615665078163147, "learning_rate": 8.840934371523916e-05, "loss": 2.5919, "step": 2514 }, { "epoch": 33.54849498327759, "grad_norm": 0.6195909380912781, "learning_rate": 8.836484983314794e-05, "loss": 2.6273, "step": 2515 }, { "epoch": 33.561872909699, "grad_norm": 0.6008119583129883, "learning_rate": 8.832035595105674e-05, "loss": 2.6099, "step": 2516 }, { "epoch": 33.575250836120404, "grad_norm": 0.6525030732154846, "learning_rate": 8.827586206896552e-05, "loss": 2.7715, "step": 2517 }, { "epoch": 33.58862876254181, "grad_norm": 0.6317800879478455, "learning_rate": 8.82313681868743e-05, "loss": 3.1264, "step": 2518 }, { "epoch": 33.60200668896321, "grad_norm": 0.5794599056243896, "learning_rate": 8.81868743047831e-05, "loss": 2.5186, "step": 2519 }, { "epoch": 33.61538461538461, "grad_norm": 0.633000373840332, "learning_rate": 8.814238042269188e-05, "loss": 2.7709, "step": 2520 }, { "epoch": 33.62876254180602, "grad_norm": 0.6132048964500427, "learning_rate": 8.809788654060068e-05, "loss": 2.6311, "step": 2521 }, { "epoch": 33.642140468227424, "grad_norm": 0.6357754468917847, "learning_rate": 8.805339265850946e-05, "loss": 2.8635, "step": 2522 }, { "epoch": 33.65551839464883, "grad_norm": 0.5678791403770447, "learning_rate": 8.800889877641824e-05, "loss": 2.773, "step": 2523 }, { "epoch": 33.668896321070235, "grad_norm": 0.5938114523887634, "learning_rate": 8.796440489432704e-05, "loss": 2.6978, "step": 2524 }, { "epoch": 33.68227424749164, "grad_norm": 0.6006829142570496, "learning_rate": 8.791991101223582e-05, "loss": 2.8147, "step": 2525 }, { "epoch": 33.69565217391305, "grad_norm": 0.6094152331352234, "learning_rate": 8.787541713014462e-05, "loss": 2.9597, "step": 2526 }, { "epoch": 33.709030100334445, "grad_norm": 0.6261194944381714, "learning_rate": 8.78309232480534e-05, "loss": 2.7116, "step": 2527 }, { "epoch": 33.72240802675585, "grad_norm": 0.6670746803283691, "learning_rate": 8.778642936596218e-05, "loss": 2.8481, "step": 2528 }, { "epoch": 33.735785953177256, "grad_norm": 0.5976547598838806, "learning_rate": 8.774193548387098e-05, "loss": 2.7368, "step": 2529 }, { "epoch": 33.74916387959866, "grad_norm": 0.5903379321098328, "learning_rate": 8.769744160177976e-05, "loss": 2.784, "step": 2530 }, { "epoch": 33.76254180602007, "grad_norm": 0.604728102684021, "learning_rate": 8.765294771968855e-05, "loss": 2.7636, "step": 2531 }, { "epoch": 33.77591973244147, "grad_norm": 0.6338250041007996, "learning_rate": 8.760845383759734e-05, "loss": 2.8111, "step": 2532 }, { "epoch": 33.78929765886288, "grad_norm": 0.6076844334602356, "learning_rate": 8.756395995550612e-05, "loss": 2.7592, "step": 2533 }, { "epoch": 33.802675585284284, "grad_norm": 0.6152723431587219, "learning_rate": 8.751946607341492e-05, "loss": 2.889, "step": 2534 }, { "epoch": 33.81605351170568, "grad_norm": 0.5518035292625427, "learning_rate": 8.74749721913237e-05, "loss": 2.5681, "step": 2535 }, { "epoch": 33.82943143812709, "grad_norm": 0.624555766582489, "learning_rate": 8.74304783092325e-05, "loss": 2.6651, "step": 2536 }, { "epoch": 33.84280936454849, "grad_norm": 0.6052064895629883, "learning_rate": 8.738598442714128e-05, "loss": 2.6292, "step": 2537 }, { "epoch": 33.8561872909699, "grad_norm": 0.6039239168167114, "learning_rate": 8.734149054505006e-05, "loss": 2.9519, "step": 2538 }, { "epoch": 33.869565217391305, "grad_norm": 0.633802056312561, "learning_rate": 8.729699666295885e-05, "loss": 2.8169, "step": 2539 }, { "epoch": 33.88294314381271, "grad_norm": 0.6555929780006409, "learning_rate": 8.725250278086764e-05, "loss": 2.7988, "step": 2540 }, { "epoch": 33.896321070234116, "grad_norm": 0.6146313548088074, "learning_rate": 8.720800889877643e-05, "loss": 2.7496, "step": 2541 }, { "epoch": 33.90969899665552, "grad_norm": 0.5777727365493774, "learning_rate": 8.716351501668522e-05, "loss": 2.7106, "step": 2542 }, { "epoch": 33.92307692307692, "grad_norm": 0.6095665097236633, "learning_rate": 8.7119021134594e-05, "loss": 2.8617, "step": 2543 }, { "epoch": 33.936454849498325, "grad_norm": 0.5426962375640869, "learning_rate": 8.707452725250278e-05, "loss": 2.7434, "step": 2544 }, { "epoch": 33.94983277591973, "grad_norm": 0.5962931513786316, "learning_rate": 8.703003337041156e-05, "loss": 2.6328, "step": 2545 }, { "epoch": 33.96321070234114, "grad_norm": 0.609198808670044, "learning_rate": 8.698553948832036e-05, "loss": 3.0128, "step": 2546 }, { "epoch": 33.97658862876254, "grad_norm": 0.5592045783996582, "learning_rate": 8.694104560622914e-05, "loss": 2.4845, "step": 2547 }, { "epoch": 33.98996655518395, "grad_norm": 0.6074299812316895, "learning_rate": 8.689655172413794e-05, "loss": 2.76, "step": 2548 }, { "epoch": 34.0, "grad_norm": 0.7298697233200073, "learning_rate": 8.685205784204672e-05, "loss": 2.7132, "step": 2549 }, { "epoch": 34.013377926421406, "grad_norm": 0.5669110417366028, "learning_rate": 8.68075639599555e-05, "loss": 2.6124, "step": 2550 }, { "epoch": 34.02675585284281, "grad_norm": 0.6375609636306763, "learning_rate": 8.67630700778643e-05, "loss": 2.8555, "step": 2551 }, { "epoch": 34.04013377926422, "grad_norm": 0.6021872162818909, "learning_rate": 8.671857619577308e-05, "loss": 2.6396, "step": 2552 }, { "epoch": 34.05351170568562, "grad_norm": 0.5895376801490784, "learning_rate": 8.667408231368188e-05, "loss": 2.7079, "step": 2553 }, { "epoch": 34.06688963210702, "grad_norm": 0.5841491222381592, "learning_rate": 8.662958843159066e-05, "loss": 2.5644, "step": 2554 }, { "epoch": 34.080267558528426, "grad_norm": 0.6124491691589355, "learning_rate": 8.658509454949944e-05, "loss": 2.8281, "step": 2555 }, { "epoch": 34.09364548494983, "grad_norm": 0.5991553664207458, "learning_rate": 8.654060066740824e-05, "loss": 2.7442, "step": 2556 }, { "epoch": 34.10702341137124, "grad_norm": 0.5902360081672668, "learning_rate": 8.649610678531702e-05, "loss": 2.6759, "step": 2557 }, { "epoch": 34.12040133779264, "grad_norm": 0.590696394443512, "learning_rate": 8.645161290322581e-05, "loss": 2.6966, "step": 2558 }, { "epoch": 34.13377926421405, "grad_norm": 0.5867067575454712, "learning_rate": 8.64071190211346e-05, "loss": 2.7264, "step": 2559 }, { "epoch": 34.147157190635454, "grad_norm": 0.6447772979736328, "learning_rate": 8.636262513904338e-05, "loss": 2.7659, "step": 2560 }, { "epoch": 34.16053511705686, "grad_norm": 0.6006119251251221, "learning_rate": 8.631813125695218e-05, "loss": 2.7641, "step": 2561 }, { "epoch": 34.17391304347826, "grad_norm": 0.593132495880127, "learning_rate": 8.627363737486096e-05, "loss": 2.6671, "step": 2562 }, { "epoch": 34.187290969899664, "grad_norm": 0.5959640145301819, "learning_rate": 8.622914349276975e-05, "loss": 2.7752, "step": 2563 }, { "epoch": 34.20066889632107, "grad_norm": 0.6102313995361328, "learning_rate": 8.618464961067854e-05, "loss": 2.8268, "step": 2564 }, { "epoch": 34.214046822742475, "grad_norm": 0.6111622452735901, "learning_rate": 8.614015572858732e-05, "loss": 2.5703, "step": 2565 }, { "epoch": 34.22742474916388, "grad_norm": 0.5814309120178223, "learning_rate": 8.609566184649611e-05, "loss": 2.5823, "step": 2566 }, { "epoch": 34.240802675585286, "grad_norm": 0.5712332129478455, "learning_rate": 8.60511679644049e-05, "loss": 2.3881, "step": 2567 }, { "epoch": 34.25418060200669, "grad_norm": 0.5990560054779053, "learning_rate": 8.600667408231369e-05, "loss": 2.6209, "step": 2568 }, { "epoch": 34.2675585284281, "grad_norm": 0.6006229519844055, "learning_rate": 8.596218020022248e-05, "loss": 2.6623, "step": 2569 }, { "epoch": 34.280936454849495, "grad_norm": 0.607763946056366, "learning_rate": 8.591768631813126e-05, "loss": 2.7403, "step": 2570 }, { "epoch": 34.2943143812709, "grad_norm": 0.6283872127532959, "learning_rate": 8.587319243604005e-05, "loss": 2.9186, "step": 2571 }, { "epoch": 34.30769230769231, "grad_norm": 0.6027917861938477, "learning_rate": 8.582869855394884e-05, "loss": 2.7604, "step": 2572 }, { "epoch": 34.32107023411371, "grad_norm": 0.617796778678894, "learning_rate": 8.578420467185763e-05, "loss": 2.2285, "step": 2573 }, { "epoch": 34.33444816053512, "grad_norm": 0.6242703199386597, "learning_rate": 8.573971078976641e-05, "loss": 2.7596, "step": 2574 }, { "epoch": 34.34782608695652, "grad_norm": 0.6676938533782959, "learning_rate": 8.56952169076752e-05, "loss": 2.8307, "step": 2575 }, { "epoch": 34.36120401337793, "grad_norm": 0.616120457649231, "learning_rate": 8.565072302558399e-05, "loss": 2.562, "step": 2576 }, { "epoch": 34.374581939799334, "grad_norm": 0.6003774404525757, "learning_rate": 8.560622914349277e-05, "loss": 2.913, "step": 2577 }, { "epoch": 34.38795986622073, "grad_norm": 0.615688145160675, "learning_rate": 8.556173526140157e-05, "loss": 2.707, "step": 2578 }, { "epoch": 34.40133779264214, "grad_norm": 0.6033686995506287, "learning_rate": 8.551724137931035e-05, "loss": 2.8834, "step": 2579 }, { "epoch": 34.414715719063544, "grad_norm": 0.5940545201301575, "learning_rate": 8.547274749721914e-05, "loss": 2.5229, "step": 2580 }, { "epoch": 34.42809364548495, "grad_norm": 0.587794303894043, "learning_rate": 8.542825361512793e-05, "loss": 2.4714, "step": 2581 }, { "epoch": 34.441471571906355, "grad_norm": 0.6004595160484314, "learning_rate": 8.538375973303671e-05, "loss": 2.7798, "step": 2582 }, { "epoch": 34.45484949832776, "grad_norm": 0.5780850648880005, "learning_rate": 8.533926585094551e-05, "loss": 2.5977, "step": 2583 }, { "epoch": 34.468227424749166, "grad_norm": 0.6270626187324524, "learning_rate": 8.529477196885428e-05, "loss": 2.8688, "step": 2584 }, { "epoch": 34.48160535117057, "grad_norm": 0.5620662569999695, "learning_rate": 8.525027808676307e-05, "loss": 2.5738, "step": 2585 }, { "epoch": 34.49498327759197, "grad_norm": 0.6294083595275879, "learning_rate": 8.520578420467186e-05, "loss": 2.9486, "step": 2586 }, { "epoch": 34.508361204013376, "grad_norm": 0.596815824508667, "learning_rate": 8.516129032258064e-05, "loss": 2.7196, "step": 2587 }, { "epoch": 34.52173913043478, "grad_norm": 0.5836464166641235, "learning_rate": 8.511679644048944e-05, "loss": 2.7812, "step": 2588 }, { "epoch": 34.53511705685619, "grad_norm": 0.6028749942779541, "learning_rate": 8.507230255839822e-05, "loss": 2.7632, "step": 2589 }, { "epoch": 34.54849498327759, "grad_norm": 0.602236807346344, "learning_rate": 8.502780867630701e-05, "loss": 2.7839, "step": 2590 }, { "epoch": 34.561872909699, "grad_norm": 0.6275736093521118, "learning_rate": 8.49833147942158e-05, "loss": 2.7257, "step": 2591 }, { "epoch": 34.575250836120404, "grad_norm": 0.5825592875480652, "learning_rate": 8.493882091212458e-05, "loss": 2.6373, "step": 2592 }, { "epoch": 34.58862876254181, "grad_norm": 0.6081156134605408, "learning_rate": 8.489432703003337e-05, "loss": 2.7701, "step": 2593 }, { "epoch": 34.60200668896321, "grad_norm": 0.5824214220046997, "learning_rate": 8.484983314794216e-05, "loss": 2.7086, "step": 2594 }, { "epoch": 34.61538461538461, "grad_norm": 0.5969799160957336, "learning_rate": 8.480533926585095e-05, "loss": 2.6757, "step": 2595 }, { "epoch": 34.62876254180602, "grad_norm": 0.6191763877868652, "learning_rate": 8.476084538375974e-05, "loss": 2.8289, "step": 2596 }, { "epoch": 34.642140468227424, "grad_norm": 0.570646345615387, "learning_rate": 8.471635150166852e-05, "loss": 2.6874, "step": 2597 }, { "epoch": 34.65551839464883, "grad_norm": 0.6193318963050842, "learning_rate": 8.467185761957731e-05, "loss": 2.7932, "step": 2598 }, { "epoch": 34.668896321070235, "grad_norm": 0.607635498046875, "learning_rate": 8.46273637374861e-05, "loss": 2.7382, "step": 2599 }, { "epoch": 34.68227424749164, "grad_norm": 0.5707620978355408, "learning_rate": 8.458286985539489e-05, "loss": 2.5729, "step": 2600 }, { "epoch": 34.69565217391305, "grad_norm": 0.5609964728355408, "learning_rate": 8.453837597330367e-05, "loss": 2.6938, "step": 2601 }, { "epoch": 34.709030100334445, "grad_norm": 0.6459850072860718, "learning_rate": 8.449388209121246e-05, "loss": 2.7604, "step": 2602 }, { "epoch": 34.72240802675585, "grad_norm": 0.5685142874717712, "learning_rate": 8.444938820912125e-05, "loss": 2.6518, "step": 2603 }, { "epoch": 34.735785953177256, "grad_norm": 0.5800818204879761, "learning_rate": 8.440489432703003e-05, "loss": 2.6193, "step": 2604 }, { "epoch": 34.74916387959866, "grad_norm": 0.6133848428726196, "learning_rate": 8.436040044493883e-05, "loss": 2.8524, "step": 2605 }, { "epoch": 34.76254180602007, "grad_norm": 0.5716528296470642, "learning_rate": 8.431590656284761e-05, "loss": 2.9175, "step": 2606 }, { "epoch": 34.77591973244147, "grad_norm": 0.5581215023994446, "learning_rate": 8.42714126807564e-05, "loss": 2.6181, "step": 2607 }, { "epoch": 34.78929765886288, "grad_norm": 0.5720329284667969, "learning_rate": 8.422691879866519e-05, "loss": 2.6401, "step": 2608 }, { "epoch": 34.802675585284284, "grad_norm": 0.6074360609054565, "learning_rate": 8.418242491657397e-05, "loss": 2.8911, "step": 2609 }, { "epoch": 34.81605351170568, "grad_norm": 0.5685964226722717, "learning_rate": 8.413793103448277e-05, "loss": 2.868, "step": 2610 }, { "epoch": 34.82943143812709, "grad_norm": 0.5762805938720703, "learning_rate": 8.409343715239155e-05, "loss": 2.7641, "step": 2611 }, { "epoch": 34.84280936454849, "grad_norm": 0.6570479869842529, "learning_rate": 8.404894327030033e-05, "loss": 2.8734, "step": 2612 }, { "epoch": 34.8561872909699, "grad_norm": 0.6143054366111755, "learning_rate": 8.400444938820913e-05, "loss": 2.7792, "step": 2613 }, { "epoch": 34.869565217391305, "grad_norm": 0.6144394278526306, "learning_rate": 8.395995550611791e-05, "loss": 2.8023, "step": 2614 }, { "epoch": 34.88294314381271, "grad_norm": 0.6102853417396545, "learning_rate": 8.391546162402671e-05, "loss": 2.7508, "step": 2615 }, { "epoch": 34.896321070234116, "grad_norm": 0.6034586429595947, "learning_rate": 8.387096774193549e-05, "loss": 2.5528, "step": 2616 }, { "epoch": 34.90969899665552, "grad_norm": 0.6032466292381287, "learning_rate": 8.382647385984427e-05, "loss": 2.7771, "step": 2617 }, { "epoch": 34.92307692307692, "grad_norm": 0.6024323105812073, "learning_rate": 8.378197997775307e-05, "loss": 2.5514, "step": 2618 }, { "epoch": 34.936454849498325, "grad_norm": 0.6762076616287231, "learning_rate": 8.373748609566185e-05, "loss": 2.8336, "step": 2619 }, { "epoch": 34.94983277591973, "grad_norm": 0.5701456069946289, "learning_rate": 8.369299221357065e-05, "loss": 2.4598, "step": 2620 }, { "epoch": 34.96321070234114, "grad_norm": 0.5878699421882629, "learning_rate": 8.364849833147943e-05, "loss": 2.6511, "step": 2621 }, { "epoch": 34.97658862876254, "grad_norm": 0.6187490820884705, "learning_rate": 8.360400444938821e-05, "loss": 2.6226, "step": 2622 }, { "epoch": 34.98996655518395, "grad_norm": 0.5954105854034424, "learning_rate": 8.355951056729701e-05, "loss": 2.7115, "step": 2623 }, { "epoch": 35.0, "grad_norm": 0.7193446755409241, "learning_rate": 8.351501668520579e-05, "loss": 2.7859, "step": 2624 }, { "epoch": 35.013377926421406, "grad_norm": 0.6013773083686829, "learning_rate": 8.347052280311457e-05, "loss": 2.8222, "step": 2625 }, { "epoch": 35.02675585284281, "grad_norm": 0.594046950340271, "learning_rate": 8.342602892102336e-05, "loss": 2.6455, "step": 2626 }, { "epoch": 35.04013377926422, "grad_norm": 0.5741755962371826, "learning_rate": 8.338153503893215e-05, "loss": 2.6076, "step": 2627 }, { "epoch": 35.05351170568562, "grad_norm": 0.6248610019683838, "learning_rate": 8.333704115684093e-05, "loss": 2.8714, "step": 2628 }, { "epoch": 35.06688963210702, "grad_norm": 0.6038011312484741, "learning_rate": 8.329254727474972e-05, "loss": 2.6117, "step": 2629 }, { "epoch": 35.080267558528426, "grad_norm": 0.618163526058197, "learning_rate": 8.324805339265851e-05, "loss": 2.8792, "step": 2630 }, { "epoch": 35.09364548494983, "grad_norm": 0.5919619798660278, "learning_rate": 8.32035595105673e-05, "loss": 2.7317, "step": 2631 }, { "epoch": 35.10702341137124, "grad_norm": 0.5996441841125488, "learning_rate": 8.315906562847609e-05, "loss": 2.6743, "step": 2632 }, { "epoch": 35.12040133779264, "grad_norm": 0.5751325488090515, "learning_rate": 8.311457174638487e-05, "loss": 2.7814, "step": 2633 }, { "epoch": 35.13377926421405, "grad_norm": 0.5554592609405518, "learning_rate": 8.307007786429366e-05, "loss": 2.4874, "step": 2634 }, { "epoch": 35.147157190635454, "grad_norm": 0.5908927321434021, "learning_rate": 8.302558398220245e-05, "loss": 2.6838, "step": 2635 }, { "epoch": 35.16053511705686, "grad_norm": 0.6261016130447388, "learning_rate": 8.298109010011123e-05, "loss": 2.8281, "step": 2636 }, { "epoch": 35.17391304347826, "grad_norm": 0.5825657844543457, "learning_rate": 8.293659621802003e-05, "loss": 2.806, "step": 2637 }, { "epoch": 35.187290969899664, "grad_norm": 0.5951602458953857, "learning_rate": 8.289210233592881e-05, "loss": 2.728, "step": 2638 }, { "epoch": 35.20066889632107, "grad_norm": 0.6045755743980408, "learning_rate": 8.28476084538376e-05, "loss": 2.5632, "step": 2639 }, { "epoch": 35.214046822742475, "grad_norm": 0.5793343782424927, "learning_rate": 8.280311457174639e-05, "loss": 2.4912, "step": 2640 }, { "epoch": 35.22742474916388, "grad_norm": 0.5778194665908813, "learning_rate": 8.275862068965517e-05, "loss": 2.649, "step": 2641 }, { "epoch": 35.240802675585286, "grad_norm": 0.5981440544128418, "learning_rate": 8.271412680756397e-05, "loss": 2.7538, "step": 2642 }, { "epoch": 35.25418060200669, "grad_norm": 0.601005494594574, "learning_rate": 8.266963292547275e-05, "loss": 2.5793, "step": 2643 }, { "epoch": 35.2675585284281, "grad_norm": 0.5964012742042542, "learning_rate": 8.262513904338153e-05, "loss": 2.7611, "step": 2644 }, { "epoch": 35.280936454849495, "grad_norm": 0.603792130947113, "learning_rate": 8.258064516129033e-05, "loss": 2.5607, "step": 2645 }, { "epoch": 35.2943143812709, "grad_norm": 0.6139290928840637, "learning_rate": 8.253615127919911e-05, "loss": 2.7616, "step": 2646 }, { "epoch": 35.30769230769231, "grad_norm": 0.5915001034736633, "learning_rate": 8.249165739710791e-05, "loss": 2.7107, "step": 2647 }, { "epoch": 35.32107023411371, "grad_norm": 0.6058906316757202, "learning_rate": 8.244716351501669e-05, "loss": 2.7078, "step": 2648 }, { "epoch": 35.33444816053512, "grad_norm": 0.6192322969436646, "learning_rate": 8.240266963292547e-05, "loss": 2.6736, "step": 2649 }, { "epoch": 35.34782608695652, "grad_norm": 0.6301470994949341, "learning_rate": 8.235817575083427e-05, "loss": 2.6024, "step": 2650 }, { "epoch": 35.36120401337793, "grad_norm": 0.59424889087677, "learning_rate": 8.231368186874305e-05, "loss": 2.7615, "step": 2651 }, { "epoch": 35.374581939799334, "grad_norm": 0.5878474712371826, "learning_rate": 8.226918798665185e-05, "loss": 2.7163, "step": 2652 }, { "epoch": 35.38795986622073, "grad_norm": 0.6637581586837769, "learning_rate": 8.222469410456063e-05, "loss": 2.3804, "step": 2653 }, { "epoch": 35.40133779264214, "grad_norm": 0.6142652630805969, "learning_rate": 8.218020022246941e-05, "loss": 2.8305, "step": 2654 }, { "epoch": 35.414715719063544, "grad_norm": 0.59095698595047, "learning_rate": 8.213570634037821e-05, "loss": 2.7017, "step": 2655 }, { "epoch": 35.42809364548495, "grad_norm": 0.6011471748352051, "learning_rate": 8.209121245828699e-05, "loss": 2.5261, "step": 2656 }, { "epoch": 35.441471571906355, "grad_norm": 0.6142131686210632, "learning_rate": 8.204671857619579e-05, "loss": 2.8304, "step": 2657 }, { "epoch": 35.45484949832776, "grad_norm": 0.5916325449943542, "learning_rate": 8.200222469410457e-05, "loss": 2.5681, "step": 2658 }, { "epoch": 35.468227424749166, "grad_norm": 0.6357722282409668, "learning_rate": 8.195773081201335e-05, "loss": 2.6106, "step": 2659 }, { "epoch": 35.48160535117057, "grad_norm": 0.6173760890960693, "learning_rate": 8.191323692992215e-05, "loss": 2.644, "step": 2660 }, { "epoch": 35.49498327759197, "grad_norm": 0.6166900396347046, "learning_rate": 8.186874304783093e-05, "loss": 2.6629, "step": 2661 }, { "epoch": 35.508361204013376, "grad_norm": 0.593647837638855, "learning_rate": 8.182424916573973e-05, "loss": 2.722, "step": 2662 }, { "epoch": 35.52173913043478, "grad_norm": 0.6336297988891602, "learning_rate": 8.177975528364851e-05, "loss": 2.7934, "step": 2663 }, { "epoch": 35.53511705685619, "grad_norm": 0.6457211971282959, "learning_rate": 8.173526140155729e-05, "loss": 2.5204, "step": 2664 }, { "epoch": 35.54849498327759, "grad_norm": 0.5988196730613708, "learning_rate": 8.169076751946609e-05, "loss": 2.8071, "step": 2665 }, { "epoch": 35.561872909699, "grad_norm": 0.6295507550239563, "learning_rate": 8.164627363737485e-05, "loss": 2.6807, "step": 2666 }, { "epoch": 35.575250836120404, "grad_norm": 0.5988739728927612, "learning_rate": 8.160177975528365e-05, "loss": 2.5091, "step": 2667 }, { "epoch": 35.58862876254181, "grad_norm": 0.5924574732780457, "learning_rate": 8.155728587319243e-05, "loss": 2.5552, "step": 2668 }, { "epoch": 35.60200668896321, "grad_norm": 0.6240471005439758, "learning_rate": 8.151279199110123e-05, "loss": 2.8841, "step": 2669 }, { "epoch": 35.61538461538461, "grad_norm": 0.5939052700996399, "learning_rate": 8.146829810901001e-05, "loss": 2.4867, "step": 2670 }, { "epoch": 35.62876254180602, "grad_norm": 0.6398495435714722, "learning_rate": 8.14238042269188e-05, "loss": 2.801, "step": 2671 }, { "epoch": 35.642140468227424, "grad_norm": 0.6106517910957336, "learning_rate": 8.137931034482759e-05, "loss": 2.7849, "step": 2672 }, { "epoch": 35.65551839464883, "grad_norm": 0.5723669528961182, "learning_rate": 8.133481646273637e-05, "loss": 2.6566, "step": 2673 }, { "epoch": 35.668896321070235, "grad_norm": 0.616449236869812, "learning_rate": 8.129032258064517e-05, "loss": 2.823, "step": 2674 }, { "epoch": 35.68227424749164, "grad_norm": 0.6001511812210083, "learning_rate": 8.124582869855395e-05, "loss": 2.6833, "step": 2675 }, { "epoch": 35.69565217391305, "grad_norm": 0.5830066800117493, "learning_rate": 8.120133481646273e-05, "loss": 2.8018, "step": 2676 }, { "epoch": 35.709030100334445, "grad_norm": 0.5999500155448914, "learning_rate": 8.115684093437153e-05, "loss": 2.7339, "step": 2677 }, { "epoch": 35.72240802675585, "grad_norm": 0.6584502458572388, "learning_rate": 8.111234705228031e-05, "loss": 2.8347, "step": 2678 }, { "epoch": 35.735785953177256, "grad_norm": 0.6561383008956909, "learning_rate": 8.106785317018911e-05, "loss": 2.6747, "step": 2679 }, { "epoch": 35.74916387959866, "grad_norm": 0.6040836572647095, "learning_rate": 8.102335928809789e-05, "loss": 2.713, "step": 2680 }, { "epoch": 35.76254180602007, "grad_norm": 0.6353011131286621, "learning_rate": 8.097886540600667e-05, "loss": 2.51, "step": 2681 }, { "epoch": 35.77591973244147, "grad_norm": 0.5977175235748291, "learning_rate": 8.093437152391547e-05, "loss": 2.7502, "step": 2682 }, { "epoch": 35.78929765886288, "grad_norm": 0.6166728734970093, "learning_rate": 8.088987764182425e-05, "loss": 2.8618, "step": 2683 }, { "epoch": 35.802675585284284, "grad_norm": 0.5676133036613464, "learning_rate": 8.084538375973305e-05, "loss": 2.6979, "step": 2684 }, { "epoch": 35.81605351170568, "grad_norm": 0.6121795773506165, "learning_rate": 8.080088987764183e-05, "loss": 2.7341, "step": 2685 }, { "epoch": 35.82943143812709, "grad_norm": 0.6005775332450867, "learning_rate": 8.075639599555061e-05, "loss": 2.6737, "step": 2686 }, { "epoch": 35.84280936454849, "grad_norm": 0.5900837182998657, "learning_rate": 8.071190211345941e-05, "loss": 2.7243, "step": 2687 }, { "epoch": 35.8561872909699, "grad_norm": 0.5916823744773865, "learning_rate": 8.066740823136819e-05, "loss": 2.5637, "step": 2688 }, { "epoch": 35.869565217391305, "grad_norm": 0.6083678603172302, "learning_rate": 8.062291434927699e-05, "loss": 2.7644, "step": 2689 }, { "epoch": 35.88294314381271, "grad_norm": 0.5921754837036133, "learning_rate": 8.057842046718577e-05, "loss": 2.6599, "step": 2690 }, { "epoch": 35.896321070234116, "grad_norm": 0.6174843907356262, "learning_rate": 8.053392658509455e-05, "loss": 2.6216, "step": 2691 }, { "epoch": 35.90969899665552, "grad_norm": 0.5929603576660156, "learning_rate": 8.048943270300335e-05, "loss": 2.5201, "step": 2692 }, { "epoch": 35.92307692307692, "grad_norm": 0.6305480599403381, "learning_rate": 8.044493882091213e-05, "loss": 2.726, "step": 2693 }, { "epoch": 35.936454849498325, "grad_norm": 0.662198543548584, "learning_rate": 8.040044493882092e-05, "loss": 2.7794, "step": 2694 }, { "epoch": 35.94983277591973, "grad_norm": 0.5887805223464966, "learning_rate": 8.03559510567297e-05, "loss": 2.5348, "step": 2695 }, { "epoch": 35.96321070234114, "grad_norm": 0.5848047137260437, "learning_rate": 8.031145717463849e-05, "loss": 2.4549, "step": 2696 }, { "epoch": 35.97658862876254, "grad_norm": 0.5758711695671082, "learning_rate": 8.026696329254729e-05, "loss": 2.5325, "step": 2697 }, { "epoch": 35.98996655518395, "grad_norm": 0.6385326981544495, "learning_rate": 8.022246941045607e-05, "loss": 2.7417, "step": 2698 }, { "epoch": 36.0, "grad_norm": 0.7033568024635315, "learning_rate": 8.017797552836486e-05, "loss": 2.6844, "step": 2699 }, { "epoch": 36.013377926421406, "grad_norm": 0.5869501233100891, "learning_rate": 8.013348164627365e-05, "loss": 2.5899, "step": 2700 }, { "epoch": 36.02675585284281, "grad_norm": 0.6519103050231934, "learning_rate": 8.008898776418243e-05, "loss": 2.8153, "step": 2701 }, { "epoch": 36.04013377926422, "grad_norm": 0.5899248123168945, "learning_rate": 8.004449388209122e-05, "loss": 2.8749, "step": 2702 }, { "epoch": 36.05351170568562, "grad_norm": 0.6234768629074097, "learning_rate": 8e-05, "loss": 2.5244, "step": 2703 }, { "epoch": 36.06688963210702, "grad_norm": 0.5662122368812561, "learning_rate": 7.99555061179088e-05, "loss": 2.4421, "step": 2704 }, { "epoch": 36.080267558528426, "grad_norm": 0.584144651889801, "learning_rate": 7.991101223581758e-05, "loss": 2.7261, "step": 2705 }, { "epoch": 36.09364548494983, "grad_norm": 0.6016060709953308, "learning_rate": 7.986651835372637e-05, "loss": 2.607, "step": 2706 }, { "epoch": 36.10702341137124, "grad_norm": 0.6164819002151489, "learning_rate": 7.982202447163516e-05, "loss": 2.7332, "step": 2707 }, { "epoch": 36.12040133779264, "grad_norm": 0.6115155816078186, "learning_rate": 7.977753058954393e-05, "loss": 2.706, "step": 2708 }, { "epoch": 36.13377926421405, "grad_norm": 0.6102612018585205, "learning_rate": 7.973303670745273e-05, "loss": 2.8179, "step": 2709 }, { "epoch": 36.147157190635454, "grad_norm": 0.5910987257957458, "learning_rate": 7.968854282536151e-05, "loss": 2.6354, "step": 2710 }, { "epoch": 36.16053511705686, "grad_norm": 0.6355498433113098, "learning_rate": 7.96440489432703e-05, "loss": 2.705, "step": 2711 }, { "epoch": 36.17391304347826, "grad_norm": 0.6213639378547668, "learning_rate": 7.959955506117909e-05, "loss": 2.481, "step": 2712 }, { "epoch": 36.187290969899664, "grad_norm": 0.587666392326355, "learning_rate": 7.955506117908787e-05, "loss": 2.6295, "step": 2713 }, { "epoch": 36.20066889632107, "grad_norm": 0.5778639316558838, "learning_rate": 7.951056729699667e-05, "loss": 2.5795, "step": 2714 }, { "epoch": 36.214046822742475, "grad_norm": 0.6050285696983337, "learning_rate": 7.946607341490545e-05, "loss": 2.671, "step": 2715 }, { "epoch": 36.22742474916388, "grad_norm": 0.6501610279083252, "learning_rate": 7.942157953281425e-05, "loss": 2.7712, "step": 2716 }, { "epoch": 36.240802675585286, "grad_norm": 0.6189882159233093, "learning_rate": 7.937708565072303e-05, "loss": 2.5324, "step": 2717 }, { "epoch": 36.25418060200669, "grad_norm": 0.6189801096916199, "learning_rate": 7.933259176863181e-05, "loss": 2.5716, "step": 2718 }, { "epoch": 36.2675585284281, "grad_norm": 0.617070734500885, "learning_rate": 7.92880978865406e-05, "loss": 2.7485, "step": 2719 }, { "epoch": 36.280936454849495, "grad_norm": 0.6042205095291138, "learning_rate": 7.924360400444939e-05, "loss": 2.4323, "step": 2720 }, { "epoch": 36.2943143812709, "grad_norm": 0.6032358407974243, "learning_rate": 7.919911012235818e-05, "loss": 2.6852, "step": 2721 }, { "epoch": 36.30769230769231, "grad_norm": 0.5686265230178833, "learning_rate": 7.915461624026697e-05, "loss": 2.551, "step": 2722 }, { "epoch": 36.32107023411371, "grad_norm": 0.5876219868659973, "learning_rate": 7.911012235817575e-05, "loss": 2.3279, "step": 2723 }, { "epoch": 36.33444816053512, "grad_norm": 0.624603807926178, "learning_rate": 7.906562847608455e-05, "loss": 2.6303, "step": 2724 }, { "epoch": 36.34782608695652, "grad_norm": 0.5873613953590393, "learning_rate": 7.902113459399333e-05, "loss": 2.7109, "step": 2725 }, { "epoch": 36.36120401337793, "grad_norm": 0.6027836203575134, "learning_rate": 7.897664071190212e-05, "loss": 2.6051, "step": 2726 }, { "epoch": 36.374581939799334, "grad_norm": 0.5907491445541382, "learning_rate": 7.89321468298109e-05, "loss": 2.5136, "step": 2727 }, { "epoch": 36.38795986622073, "grad_norm": 0.6281634569168091, "learning_rate": 7.888765294771969e-05, "loss": 2.6751, "step": 2728 }, { "epoch": 36.40133779264214, "grad_norm": 0.6197676658630371, "learning_rate": 7.884315906562848e-05, "loss": 2.6621, "step": 2729 }, { "epoch": 36.414715719063544, "grad_norm": 0.6759832501411438, "learning_rate": 7.879866518353727e-05, "loss": 2.9938, "step": 2730 }, { "epoch": 36.42809364548495, "grad_norm": 0.6368512511253357, "learning_rate": 7.875417130144606e-05, "loss": 2.8629, "step": 2731 }, { "epoch": 36.441471571906355, "grad_norm": 0.5914281606674194, "learning_rate": 7.870967741935484e-05, "loss": 2.6094, "step": 2732 }, { "epoch": 36.45484949832776, "grad_norm": 0.60133957862854, "learning_rate": 7.866518353726363e-05, "loss": 2.7982, "step": 2733 }, { "epoch": 36.468227424749166, "grad_norm": 0.621593177318573, "learning_rate": 7.862068965517242e-05, "loss": 2.595, "step": 2734 }, { "epoch": 36.48160535117057, "grad_norm": 0.6033498048782349, "learning_rate": 7.85761957730812e-05, "loss": 2.7714, "step": 2735 }, { "epoch": 36.49498327759197, "grad_norm": 0.6060223579406738, "learning_rate": 7.853170189099e-05, "loss": 2.5214, "step": 2736 }, { "epoch": 36.508361204013376, "grad_norm": 0.6156677007675171, "learning_rate": 7.848720800889878e-05, "loss": 2.7125, "step": 2737 }, { "epoch": 36.52173913043478, "grad_norm": 0.611400306224823, "learning_rate": 7.844271412680757e-05, "loss": 2.8178, "step": 2738 }, { "epoch": 36.53511705685619, "grad_norm": 0.6582430601119995, "learning_rate": 7.839822024471636e-05, "loss": 2.9621, "step": 2739 }, { "epoch": 36.54849498327759, "grad_norm": 0.6210145950317383, "learning_rate": 7.835372636262514e-05, "loss": 2.6619, "step": 2740 }, { "epoch": 36.561872909699, "grad_norm": 0.6173349022865295, "learning_rate": 7.830923248053394e-05, "loss": 2.6075, "step": 2741 }, { "epoch": 36.575250836120404, "grad_norm": 0.5770074129104614, "learning_rate": 7.826473859844272e-05, "loss": 2.5834, "step": 2742 }, { "epoch": 36.58862876254181, "grad_norm": 0.6317800283432007, "learning_rate": 7.82202447163515e-05, "loss": 2.6134, "step": 2743 }, { "epoch": 36.60200668896321, "grad_norm": 0.6122284531593323, "learning_rate": 7.81757508342603e-05, "loss": 2.7218, "step": 2744 }, { "epoch": 36.61538461538461, "grad_norm": 0.5796928405761719, "learning_rate": 7.813125695216908e-05, "loss": 2.6459, "step": 2745 }, { "epoch": 36.62876254180602, "grad_norm": 0.5904363393783569, "learning_rate": 7.808676307007788e-05, "loss": 2.5216, "step": 2746 }, { "epoch": 36.642140468227424, "grad_norm": 0.6074969172477722, "learning_rate": 7.804226918798666e-05, "loss": 2.5776, "step": 2747 }, { "epoch": 36.65551839464883, "grad_norm": 0.6080595850944519, "learning_rate": 7.799777530589544e-05, "loss": 2.7651, "step": 2748 }, { "epoch": 36.668896321070235, "grad_norm": 0.5953335762023926, "learning_rate": 7.795328142380423e-05, "loss": 2.5139, "step": 2749 }, { "epoch": 36.68227424749164, "grad_norm": 0.6192560195922852, "learning_rate": 7.790878754171301e-05, "loss": 2.5091, "step": 2750 }, { "epoch": 36.69565217391305, "grad_norm": 0.6019810438156128, "learning_rate": 7.78642936596218e-05, "loss": 2.7217, "step": 2751 }, { "epoch": 36.709030100334445, "grad_norm": 0.5785876512527466, "learning_rate": 7.781979977753059e-05, "loss": 2.4336, "step": 2752 }, { "epoch": 36.72240802675585, "grad_norm": 0.6099944114685059, "learning_rate": 7.777530589543937e-05, "loss": 2.6614, "step": 2753 }, { "epoch": 36.735785953177256, "grad_norm": 0.6206619143486023, "learning_rate": 7.773081201334817e-05, "loss": 2.7906, "step": 2754 }, { "epoch": 36.74916387959866, "grad_norm": 0.5885736346244812, "learning_rate": 7.768631813125695e-05, "loss": 2.4836, "step": 2755 }, { "epoch": 36.76254180602007, "grad_norm": 0.6022318005561829, "learning_rate": 7.764182424916574e-05, "loss": 2.6954, "step": 2756 }, { "epoch": 36.77591973244147, "grad_norm": 0.6479224562644958, "learning_rate": 7.759733036707453e-05, "loss": 2.6576, "step": 2757 }, { "epoch": 36.78929765886288, "grad_norm": 0.6288067102432251, "learning_rate": 7.755283648498331e-05, "loss": 2.6265, "step": 2758 }, { "epoch": 36.802675585284284, "grad_norm": 0.6241925358772278, "learning_rate": 7.75083426028921e-05, "loss": 2.7554, "step": 2759 }, { "epoch": 36.81605351170568, "grad_norm": 0.6327086687088013, "learning_rate": 7.746384872080089e-05, "loss": 2.6145, "step": 2760 }, { "epoch": 36.82943143812709, "grad_norm": 0.6177266836166382, "learning_rate": 7.741935483870968e-05, "loss": 2.8887, "step": 2761 }, { "epoch": 36.84280936454849, "grad_norm": 0.6014055013656616, "learning_rate": 7.737486095661847e-05, "loss": 2.6512, "step": 2762 }, { "epoch": 36.8561872909699, "grad_norm": 0.5890874266624451, "learning_rate": 7.733036707452725e-05, "loss": 2.4959, "step": 2763 }, { "epoch": 36.869565217391305, "grad_norm": 0.5938498973846436, "learning_rate": 7.728587319243604e-05, "loss": 2.6364, "step": 2764 }, { "epoch": 36.88294314381271, "grad_norm": 0.587141752243042, "learning_rate": 7.724137931034483e-05, "loss": 2.804, "step": 2765 }, { "epoch": 36.896321070234116, "grad_norm": 0.5674299597740173, "learning_rate": 7.719688542825362e-05, "loss": 2.5209, "step": 2766 }, { "epoch": 36.90969899665552, "grad_norm": 0.616888165473938, "learning_rate": 7.71523915461624e-05, "loss": 2.6253, "step": 2767 }, { "epoch": 36.92307692307692, "grad_norm": 0.6040933132171631, "learning_rate": 7.710789766407119e-05, "loss": 2.6645, "step": 2768 }, { "epoch": 36.936454849498325, "grad_norm": 0.5765551328659058, "learning_rate": 7.706340378197998e-05, "loss": 2.5598, "step": 2769 }, { "epoch": 36.94983277591973, "grad_norm": 0.598349928855896, "learning_rate": 7.701890989988877e-05, "loss": 2.6193, "step": 2770 }, { "epoch": 36.96321070234114, "grad_norm": 0.5944122076034546, "learning_rate": 7.697441601779756e-05, "loss": 2.7833, "step": 2771 }, { "epoch": 36.97658862876254, "grad_norm": 0.6495612263679504, "learning_rate": 7.692992213570634e-05, "loss": 2.8149, "step": 2772 }, { "epoch": 36.98996655518395, "grad_norm": 0.6290954351425171, "learning_rate": 7.688542825361513e-05, "loss": 2.8039, "step": 2773 }, { "epoch": 37.0, "grad_norm": 0.7225489020347595, "learning_rate": 7.684093437152392e-05, "loss": 2.6676, "step": 2774 }, { "epoch": 37.013377926421406, "grad_norm": 0.5684818029403687, "learning_rate": 7.67964404894327e-05, "loss": 2.5921, "step": 2775 }, { "epoch": 37.02675585284281, "grad_norm": 0.6261118054389954, "learning_rate": 7.67519466073415e-05, "loss": 2.6034, "step": 2776 }, { "epoch": 37.04013377926422, "grad_norm": 0.5897383689880371, "learning_rate": 7.670745272525028e-05, "loss": 2.6218, "step": 2777 }, { "epoch": 37.05351170568562, "grad_norm": 0.6284380555152893, "learning_rate": 7.666295884315907e-05, "loss": 2.8228, "step": 2778 }, { "epoch": 37.06688963210702, "grad_norm": 0.6081552505493164, "learning_rate": 7.661846496106786e-05, "loss": 2.4883, "step": 2779 }, { "epoch": 37.080267558528426, "grad_norm": 0.5826512575149536, "learning_rate": 7.657397107897664e-05, "loss": 2.288, "step": 2780 }, { "epoch": 37.09364548494983, "grad_norm": 0.609711766242981, "learning_rate": 7.652947719688544e-05, "loss": 2.7462, "step": 2781 }, { "epoch": 37.10702341137124, "grad_norm": 0.6180025339126587, "learning_rate": 7.648498331479422e-05, "loss": 2.5538, "step": 2782 }, { "epoch": 37.12040133779264, "grad_norm": 0.5980042815208435, "learning_rate": 7.6440489432703e-05, "loss": 2.5066, "step": 2783 }, { "epoch": 37.13377926421405, "grad_norm": 0.5932002067565918, "learning_rate": 7.63959955506118e-05, "loss": 2.5456, "step": 2784 }, { "epoch": 37.147157190635454, "grad_norm": 0.5952025055885315, "learning_rate": 7.635150166852058e-05, "loss": 2.5584, "step": 2785 }, { "epoch": 37.16053511705686, "grad_norm": 0.6259213089942932, "learning_rate": 7.630700778642938e-05, "loss": 2.7745, "step": 2786 }, { "epoch": 37.17391304347826, "grad_norm": 0.620085597038269, "learning_rate": 7.626251390433816e-05, "loss": 2.7472, "step": 2787 }, { "epoch": 37.187290969899664, "grad_norm": 0.5972263216972351, "learning_rate": 7.621802002224694e-05, "loss": 2.4507, "step": 2788 }, { "epoch": 37.20066889632107, "grad_norm": 0.6285966634750366, "learning_rate": 7.617352614015574e-05, "loss": 2.6729, "step": 2789 }, { "epoch": 37.214046822742475, "grad_norm": 0.6250035762786865, "learning_rate": 7.612903225806451e-05, "loss": 2.7339, "step": 2790 }, { "epoch": 37.22742474916388, "grad_norm": 0.6274656653404236, "learning_rate": 7.60845383759733e-05, "loss": 2.4538, "step": 2791 }, { "epoch": 37.240802675585286, "grad_norm": 0.632949948310852, "learning_rate": 7.604004449388209e-05, "loss": 2.6448, "step": 2792 }, { "epoch": 37.25418060200669, "grad_norm": 0.5869743824005127, "learning_rate": 7.599555061179088e-05, "loss": 2.6065, "step": 2793 }, { "epoch": 37.2675585284281, "grad_norm": 0.6650564670562744, "learning_rate": 7.595105672969966e-05, "loss": 2.6521, "step": 2794 }, { "epoch": 37.280936454849495, "grad_norm": 0.597977340221405, "learning_rate": 7.590656284760845e-05, "loss": 2.7055, "step": 2795 }, { "epoch": 37.2943143812709, "grad_norm": 0.6276383996009827, "learning_rate": 7.586206896551724e-05, "loss": 2.6584, "step": 2796 }, { "epoch": 37.30769230769231, "grad_norm": 0.637657105922699, "learning_rate": 7.581757508342603e-05, "loss": 2.763, "step": 2797 }, { "epoch": 37.32107023411371, "grad_norm": 0.5994833707809448, "learning_rate": 7.577308120133482e-05, "loss": 2.6043, "step": 2798 }, { "epoch": 37.33444816053512, "grad_norm": 0.6213527321815491, "learning_rate": 7.57285873192436e-05, "loss": 2.7679, "step": 2799 }, { "epoch": 37.34782608695652, "grad_norm": 0.6137163639068604, "learning_rate": 7.568409343715239e-05, "loss": 2.6287, "step": 2800 }, { "epoch": 37.36120401337793, "grad_norm": 0.6083935499191284, "learning_rate": 7.563959955506118e-05, "loss": 2.6636, "step": 2801 }, { "epoch": 37.374581939799334, "grad_norm": 0.6070682406425476, "learning_rate": 7.559510567296996e-05, "loss": 2.5606, "step": 2802 }, { "epoch": 37.38795986622073, "grad_norm": 0.6027565002441406, "learning_rate": 7.555061179087876e-05, "loss": 2.55, "step": 2803 }, { "epoch": 37.40133779264214, "grad_norm": 0.6037593483924866, "learning_rate": 7.550611790878754e-05, "loss": 2.4951, "step": 2804 }, { "epoch": 37.414715719063544, "grad_norm": 0.588081955909729, "learning_rate": 7.546162402669633e-05, "loss": 2.4888, "step": 2805 }, { "epoch": 37.42809364548495, "grad_norm": 0.640067994594574, "learning_rate": 7.541713014460512e-05, "loss": 2.6508, "step": 2806 }, { "epoch": 37.441471571906355, "grad_norm": 0.6123721599578857, "learning_rate": 7.53726362625139e-05, "loss": 2.6542, "step": 2807 }, { "epoch": 37.45484949832776, "grad_norm": 0.6153370141983032, "learning_rate": 7.53281423804227e-05, "loss": 2.5986, "step": 2808 }, { "epoch": 37.468227424749166, "grad_norm": 0.6331472396850586, "learning_rate": 7.528364849833148e-05, "loss": 2.5883, "step": 2809 }, { "epoch": 37.48160535117057, "grad_norm": 0.6108154654502869, "learning_rate": 7.523915461624026e-05, "loss": 2.5254, "step": 2810 }, { "epoch": 37.49498327759197, "grad_norm": 0.6167747974395752, "learning_rate": 7.519466073414906e-05, "loss": 2.5088, "step": 2811 }, { "epoch": 37.508361204013376, "grad_norm": 0.6372016668319702, "learning_rate": 7.515016685205784e-05, "loss": 2.6996, "step": 2812 }, { "epoch": 37.52173913043478, "grad_norm": 0.5725007057189941, "learning_rate": 7.510567296996664e-05, "loss": 2.5294, "step": 2813 }, { "epoch": 37.53511705685619, "grad_norm": 0.5692083835601807, "learning_rate": 7.506117908787542e-05, "loss": 2.5158, "step": 2814 }, { "epoch": 37.54849498327759, "grad_norm": 0.6351197361946106, "learning_rate": 7.50166852057842e-05, "loss": 2.7666, "step": 2815 }, { "epoch": 37.561872909699, "grad_norm": 0.5791570544242859, "learning_rate": 7.4972191323693e-05, "loss": 2.4728, "step": 2816 }, { "epoch": 37.575250836120404, "grad_norm": 0.6294588446617126, "learning_rate": 7.492769744160178e-05, "loss": 2.657, "step": 2817 }, { "epoch": 37.58862876254181, "grad_norm": 0.6071592569351196, "learning_rate": 7.488320355951058e-05, "loss": 2.7995, "step": 2818 }, { "epoch": 37.60200668896321, "grad_norm": 0.6079913377761841, "learning_rate": 7.483870967741936e-05, "loss": 2.8394, "step": 2819 }, { "epoch": 37.61538461538461, "grad_norm": 0.5867500901222229, "learning_rate": 7.479421579532814e-05, "loss": 2.413, "step": 2820 }, { "epoch": 37.62876254180602, "grad_norm": 0.6049195528030396, "learning_rate": 7.474972191323694e-05, "loss": 2.64, "step": 2821 }, { "epoch": 37.642140468227424, "grad_norm": 0.5787717700004578, "learning_rate": 7.470522803114572e-05, "loss": 2.5029, "step": 2822 }, { "epoch": 37.65551839464883, "grad_norm": 0.6378308534622192, "learning_rate": 7.466073414905452e-05, "loss": 2.6937, "step": 2823 }, { "epoch": 37.668896321070235, "grad_norm": 0.6016746163368225, "learning_rate": 7.46162402669633e-05, "loss": 2.477, "step": 2824 }, { "epoch": 37.68227424749164, "grad_norm": 0.5864558219909668, "learning_rate": 7.457174638487208e-05, "loss": 2.7177, "step": 2825 }, { "epoch": 37.69565217391305, "grad_norm": 0.6352173686027527, "learning_rate": 7.452725250278088e-05, "loss": 2.73, "step": 2826 }, { "epoch": 37.709030100334445, "grad_norm": 0.6432989239692688, "learning_rate": 7.448275862068966e-05, "loss": 2.5062, "step": 2827 }, { "epoch": 37.72240802675585, "grad_norm": 0.6061347723007202, "learning_rate": 7.443826473859846e-05, "loss": 2.5776, "step": 2828 }, { "epoch": 37.735785953177256, "grad_norm": 0.6208527684211731, "learning_rate": 7.439377085650724e-05, "loss": 2.4125, "step": 2829 }, { "epoch": 37.74916387959866, "grad_norm": 0.6052190661430359, "learning_rate": 7.434927697441602e-05, "loss": 2.5331, "step": 2830 }, { "epoch": 37.76254180602007, "grad_norm": 0.6361626982688904, "learning_rate": 7.43047830923248e-05, "loss": 2.7675, "step": 2831 }, { "epoch": 37.77591973244147, "grad_norm": 0.6102336645126343, "learning_rate": 7.426028921023359e-05, "loss": 2.3707, "step": 2832 }, { "epoch": 37.78929765886288, "grad_norm": 0.6248254776000977, "learning_rate": 7.421579532814238e-05, "loss": 2.8015, "step": 2833 }, { "epoch": 37.802675585284284, "grad_norm": 0.6449552178382874, "learning_rate": 7.417130144605116e-05, "loss": 2.7157, "step": 2834 }, { "epoch": 37.81605351170568, "grad_norm": 0.6328997611999512, "learning_rate": 7.412680756395996e-05, "loss": 2.8754, "step": 2835 }, { "epoch": 37.82943143812709, "grad_norm": 0.617190957069397, "learning_rate": 7.408231368186874e-05, "loss": 2.8825, "step": 2836 }, { "epoch": 37.84280936454849, "grad_norm": 0.6431034207344055, "learning_rate": 7.403781979977752e-05, "loss": 2.877, "step": 2837 }, { "epoch": 37.8561872909699, "grad_norm": 0.6334032416343689, "learning_rate": 7.399332591768632e-05, "loss": 2.5787, "step": 2838 }, { "epoch": 37.869565217391305, "grad_norm": 0.599069356918335, "learning_rate": 7.39488320355951e-05, "loss": 2.6774, "step": 2839 }, { "epoch": 37.88294314381271, "grad_norm": 0.6226022243499756, "learning_rate": 7.39043381535039e-05, "loss": 2.7821, "step": 2840 }, { "epoch": 37.896321070234116, "grad_norm": 0.6168680191040039, "learning_rate": 7.385984427141268e-05, "loss": 2.6552, "step": 2841 }, { "epoch": 37.90969899665552, "grad_norm": 0.5987963080406189, "learning_rate": 7.381535038932146e-05, "loss": 2.6139, "step": 2842 }, { "epoch": 37.92307692307692, "grad_norm": 0.5855274796485901, "learning_rate": 7.377085650723026e-05, "loss": 2.687, "step": 2843 }, { "epoch": 37.936454849498325, "grad_norm": 0.6431803703308105, "learning_rate": 7.372636262513904e-05, "loss": 2.7411, "step": 2844 }, { "epoch": 37.94983277591973, "grad_norm": 0.6025601029396057, "learning_rate": 7.368186874304784e-05, "loss": 2.6054, "step": 2845 }, { "epoch": 37.96321070234114, "grad_norm": 0.6150123476982117, "learning_rate": 7.363737486095662e-05, "loss": 2.7483, "step": 2846 }, { "epoch": 37.97658862876254, "grad_norm": 0.5857491493225098, "learning_rate": 7.35928809788654e-05, "loss": 2.68, "step": 2847 }, { "epoch": 37.98996655518395, "grad_norm": 0.6244483590126038, "learning_rate": 7.35483870967742e-05, "loss": 2.7171, "step": 2848 }, { "epoch": 38.0, "grad_norm": 0.7437806725502014, "learning_rate": 7.350389321468298e-05, "loss": 2.5646, "step": 2849 }, { "epoch": 38.013377926421406, "grad_norm": 0.6323840618133545, "learning_rate": 7.345939933259178e-05, "loss": 2.7457, "step": 2850 }, { "epoch": 38.02675585284281, "grad_norm": 0.6000533103942871, "learning_rate": 7.341490545050056e-05, "loss": 2.6829, "step": 2851 }, { "epoch": 38.04013377926422, "grad_norm": 0.621135950088501, "learning_rate": 7.337041156840934e-05, "loss": 2.795, "step": 2852 }, { "epoch": 38.05351170568562, "grad_norm": 0.6256486773490906, "learning_rate": 7.332591768631814e-05, "loss": 2.6762, "step": 2853 }, { "epoch": 38.06688963210702, "grad_norm": 0.6535592079162598, "learning_rate": 7.328142380422692e-05, "loss": 2.8339, "step": 2854 }, { "epoch": 38.080267558528426, "grad_norm": 0.6324830055236816, "learning_rate": 7.323692992213572e-05, "loss": 2.6129, "step": 2855 }, { "epoch": 38.09364548494983, "grad_norm": 0.5966958403587341, "learning_rate": 7.31924360400445e-05, "loss": 2.3738, "step": 2856 }, { "epoch": 38.10702341137124, "grad_norm": 0.6118017435073853, "learning_rate": 7.314794215795328e-05, "loss": 2.5778, "step": 2857 }, { "epoch": 38.12040133779264, "grad_norm": 0.5761687159538269, "learning_rate": 7.310344827586208e-05, "loss": 2.347, "step": 2858 }, { "epoch": 38.13377926421405, "grad_norm": 0.6576936841011047, "learning_rate": 7.305895439377086e-05, "loss": 2.6811, "step": 2859 }, { "epoch": 38.147157190635454, "grad_norm": 0.6086276173591614, "learning_rate": 7.301446051167965e-05, "loss": 2.713, "step": 2860 }, { "epoch": 38.16053511705686, "grad_norm": 0.5824201107025146, "learning_rate": 7.296996662958844e-05, "loss": 2.457, "step": 2861 }, { "epoch": 38.17391304347826, "grad_norm": 0.6397709250450134, "learning_rate": 7.292547274749722e-05, "loss": 2.5384, "step": 2862 }, { "epoch": 38.187290969899664, "grad_norm": 0.6120484471321106, "learning_rate": 7.288097886540602e-05, "loss": 2.5742, "step": 2863 }, { "epoch": 38.20066889632107, "grad_norm": 0.6424990892410278, "learning_rate": 7.28364849833148e-05, "loss": 2.8879, "step": 2864 }, { "epoch": 38.214046822742475, "grad_norm": 0.610888659954071, "learning_rate": 7.27919911012236e-05, "loss": 2.5114, "step": 2865 }, { "epoch": 38.22742474916388, "grad_norm": 0.5722895264625549, "learning_rate": 7.274749721913238e-05, "loss": 2.1089, "step": 2866 }, { "epoch": 38.240802675585286, "grad_norm": 0.6000377535820007, "learning_rate": 7.270300333704116e-05, "loss": 2.3571, "step": 2867 }, { "epoch": 38.25418060200669, "grad_norm": 0.6441863775253296, "learning_rate": 7.265850945494995e-05, "loss": 2.828, "step": 2868 }, { "epoch": 38.2675585284281, "grad_norm": 0.6954602003097534, "learning_rate": 7.261401557285874e-05, "loss": 2.586, "step": 2869 }, { "epoch": 38.280936454849495, "grad_norm": 0.6308737397193909, "learning_rate": 7.256952169076753e-05, "loss": 2.558, "step": 2870 }, { "epoch": 38.2943143812709, "grad_norm": 0.5915984511375427, "learning_rate": 7.252502780867632e-05, "loss": 2.6369, "step": 2871 }, { "epoch": 38.30769230769231, "grad_norm": 0.5993978381156921, "learning_rate": 7.24805339265851e-05, "loss": 2.5857, "step": 2872 }, { "epoch": 38.32107023411371, "grad_norm": 0.5899291038513184, "learning_rate": 7.243604004449388e-05, "loss": 2.396, "step": 2873 }, { "epoch": 38.33444816053512, "grad_norm": 0.6214590668678284, "learning_rate": 7.239154616240266e-05, "loss": 2.3691, "step": 2874 }, { "epoch": 38.34782608695652, "grad_norm": 0.5765502452850342, "learning_rate": 7.234705228031146e-05, "loss": 2.5417, "step": 2875 }, { "epoch": 38.36120401337793, "grad_norm": 0.6192215085029602, "learning_rate": 7.230255839822024e-05, "loss": 2.459, "step": 2876 }, { "epoch": 38.374581939799334, "grad_norm": 0.6413353085517883, "learning_rate": 7.225806451612904e-05, "loss": 2.5345, "step": 2877 }, { "epoch": 38.38795986622073, "grad_norm": 0.6169536709785461, "learning_rate": 7.221357063403782e-05, "loss": 2.5972, "step": 2878 }, { "epoch": 38.40133779264214, "grad_norm": 0.6331331133842468, "learning_rate": 7.21690767519466e-05, "loss": 2.625, "step": 2879 }, { "epoch": 38.414715719063544, "grad_norm": 0.6282365918159485, "learning_rate": 7.21245828698554e-05, "loss": 2.6334, "step": 2880 }, { "epoch": 38.42809364548495, "grad_norm": 0.6211024522781372, "learning_rate": 7.208008898776418e-05, "loss": 2.4833, "step": 2881 }, { "epoch": 38.441471571906355, "grad_norm": 0.6600573062896729, "learning_rate": 7.203559510567298e-05, "loss": 2.7515, "step": 2882 }, { "epoch": 38.45484949832776, "grad_norm": 0.6234501004219055, "learning_rate": 7.199110122358176e-05, "loss": 2.4914, "step": 2883 }, { "epoch": 38.468227424749166, "grad_norm": 0.6104183197021484, "learning_rate": 7.194660734149054e-05, "loss": 2.6202, "step": 2884 }, { "epoch": 38.48160535117057, "grad_norm": 0.6139323711395264, "learning_rate": 7.190211345939934e-05, "loss": 2.6918, "step": 2885 }, { "epoch": 38.49498327759197, "grad_norm": 0.6672762632369995, "learning_rate": 7.185761957730812e-05, "loss": 2.7207, "step": 2886 }, { "epoch": 38.508361204013376, "grad_norm": 0.636197030544281, "learning_rate": 7.181312569521691e-05, "loss": 2.8393, "step": 2887 }, { "epoch": 38.52173913043478, "grad_norm": 0.6464013457298279, "learning_rate": 7.17686318131257e-05, "loss": 2.6601, "step": 2888 }, { "epoch": 38.53511705685619, "grad_norm": 0.5882225632667542, "learning_rate": 7.172413793103448e-05, "loss": 2.4206, "step": 2889 }, { "epoch": 38.54849498327759, "grad_norm": 0.6114091873168945, "learning_rate": 7.167964404894328e-05, "loss": 2.6474, "step": 2890 }, { "epoch": 38.561872909699, "grad_norm": 0.5976521968841553, "learning_rate": 7.163515016685206e-05, "loss": 2.5393, "step": 2891 }, { "epoch": 38.575250836120404, "grad_norm": 0.6302757859230042, "learning_rate": 7.159065628476085e-05, "loss": 2.5687, "step": 2892 }, { "epoch": 38.58862876254181, "grad_norm": 0.6598967909812927, "learning_rate": 7.154616240266964e-05, "loss": 2.8124, "step": 2893 }, { "epoch": 38.60200668896321, "grad_norm": 0.6214116215705872, "learning_rate": 7.150166852057842e-05, "loss": 2.6531, "step": 2894 }, { "epoch": 38.61538461538461, "grad_norm": 0.6264208555221558, "learning_rate": 7.145717463848721e-05, "loss": 2.6821, "step": 2895 }, { "epoch": 38.62876254180602, "grad_norm": 0.647375226020813, "learning_rate": 7.1412680756396e-05, "loss": 2.5769, "step": 2896 }, { "epoch": 38.642140468227424, "grad_norm": 0.6418511271476746, "learning_rate": 7.136818687430479e-05, "loss": 2.682, "step": 2897 }, { "epoch": 38.65551839464883, "grad_norm": 0.6308450102806091, "learning_rate": 7.132369299221358e-05, "loss": 2.6151, "step": 2898 }, { "epoch": 38.668896321070235, "grad_norm": 0.6675850749015808, "learning_rate": 7.127919911012236e-05, "loss": 2.7805, "step": 2899 }, { "epoch": 38.68227424749164, "grad_norm": 0.5915642380714417, "learning_rate": 7.123470522803115e-05, "loss": 2.6036, "step": 2900 }, { "epoch": 38.69565217391305, "grad_norm": 0.6425783038139343, "learning_rate": 7.119021134593994e-05, "loss": 2.805, "step": 2901 }, { "epoch": 38.709030100334445, "grad_norm": 0.613692045211792, "learning_rate": 7.114571746384873e-05, "loss": 2.5263, "step": 2902 }, { "epoch": 38.72240802675585, "grad_norm": 0.7906155586242676, "learning_rate": 7.110122358175751e-05, "loss": 2.7032, "step": 2903 }, { "epoch": 38.735785953177256, "grad_norm": 0.6317926645278931, "learning_rate": 7.10567296996663e-05, "loss": 2.7303, "step": 2904 }, { "epoch": 38.74916387959866, "grad_norm": 0.6563540101051331, "learning_rate": 7.101223581757509e-05, "loss": 2.6913, "step": 2905 }, { "epoch": 38.76254180602007, "grad_norm": 0.6583380699157715, "learning_rate": 7.096774193548388e-05, "loss": 2.8355, "step": 2906 }, { "epoch": 38.77591973244147, "grad_norm": 0.6026883125305176, "learning_rate": 7.092324805339267e-05, "loss": 2.4306, "step": 2907 }, { "epoch": 38.78929765886288, "grad_norm": 0.6352461576461792, "learning_rate": 7.087875417130145e-05, "loss": 2.7963, "step": 2908 }, { "epoch": 38.802675585284284, "grad_norm": 0.6036055684089661, "learning_rate": 7.083426028921024e-05, "loss": 2.6946, "step": 2909 }, { "epoch": 38.81605351170568, "grad_norm": 0.5911431312561035, "learning_rate": 7.078976640711903e-05, "loss": 2.5936, "step": 2910 }, { "epoch": 38.82943143812709, "grad_norm": 0.6125901341438293, "learning_rate": 7.074527252502781e-05, "loss": 2.4901, "step": 2911 }, { "epoch": 38.84280936454849, "grad_norm": 0.600537896156311, "learning_rate": 7.070077864293661e-05, "loss": 2.6603, "step": 2912 }, { "epoch": 38.8561872909699, "grad_norm": 0.6200169324874878, "learning_rate": 7.065628476084539e-05, "loss": 2.629, "step": 2913 }, { "epoch": 38.869565217391305, "grad_norm": 0.5789446830749512, "learning_rate": 7.061179087875418e-05, "loss": 2.3878, "step": 2914 }, { "epoch": 38.88294314381271, "grad_norm": 0.6316003799438477, "learning_rate": 7.056729699666296e-05, "loss": 2.4585, "step": 2915 }, { "epoch": 38.896321070234116, "grad_norm": 0.6021048426628113, "learning_rate": 7.052280311457174e-05, "loss": 2.4609, "step": 2916 }, { "epoch": 38.90969899665552, "grad_norm": 0.6088526844978333, "learning_rate": 7.047830923248054e-05, "loss": 2.6102, "step": 2917 }, { "epoch": 38.92307692307692, "grad_norm": 0.658087432384491, "learning_rate": 7.043381535038932e-05, "loss": 2.6058, "step": 2918 }, { "epoch": 38.936454849498325, "grad_norm": 0.6708016991615295, "learning_rate": 7.038932146829811e-05, "loss": 2.9315, "step": 2919 }, { "epoch": 38.94983277591973, "grad_norm": 0.6475014686584473, "learning_rate": 7.03448275862069e-05, "loss": 2.8692, "step": 2920 }, { "epoch": 38.96321070234114, "grad_norm": 0.6264966726303101, "learning_rate": 7.030033370411568e-05, "loss": 2.7866, "step": 2921 }, { "epoch": 38.97658862876254, "grad_norm": 0.6225918531417847, "learning_rate": 7.025583982202447e-05, "loss": 2.4795, "step": 2922 }, { "epoch": 38.98996655518395, "grad_norm": 0.6052890419960022, "learning_rate": 7.021134593993326e-05, "loss": 2.2833, "step": 2923 }, { "epoch": 39.0, "grad_norm": 0.695241391658783, "learning_rate": 7.016685205784205e-05, "loss": 2.4251, "step": 2924 }, { "epoch": 39.013377926421406, "grad_norm": 0.5891976952552795, "learning_rate": 7.012235817575084e-05, "loss": 2.561, "step": 2925 }, { "epoch": 39.02675585284281, "grad_norm": 0.6551567316055298, "learning_rate": 7.007786429365962e-05, "loss": 2.4206, "step": 2926 }, { "epoch": 39.04013377926422, "grad_norm": 0.5852656364440918, "learning_rate": 7.003337041156841e-05, "loss": 2.4159, "step": 2927 }, { "epoch": 39.05351170568562, "grad_norm": 0.5813770294189453, "learning_rate": 6.99888765294772e-05, "loss": 2.5426, "step": 2928 }, { "epoch": 39.06688963210702, "grad_norm": 0.5840299725532532, "learning_rate": 6.994438264738599e-05, "loss": 2.5013, "step": 2929 }, { "epoch": 39.080267558528426, "grad_norm": 0.5940836071968079, "learning_rate": 6.989988876529477e-05, "loss": 2.4776, "step": 2930 }, { "epoch": 39.09364548494983, "grad_norm": 0.6290647387504578, "learning_rate": 6.985539488320356e-05, "loss": 2.6289, "step": 2931 }, { "epoch": 39.10702341137124, "grad_norm": 0.6070288419723511, "learning_rate": 6.981090100111235e-05, "loss": 2.3952, "step": 2932 }, { "epoch": 39.12040133779264, "grad_norm": 0.6416279077529907, "learning_rate": 6.976640711902114e-05, "loss": 2.8001, "step": 2933 }, { "epoch": 39.13377926421405, "grad_norm": 0.6238275170326233, "learning_rate": 6.972191323692993e-05, "loss": 2.5927, "step": 2934 }, { "epoch": 39.147157190635454, "grad_norm": 0.6179841756820679, "learning_rate": 6.967741935483871e-05, "loss": 2.4691, "step": 2935 }, { "epoch": 39.16053511705686, "grad_norm": 0.5916646122932434, "learning_rate": 6.96329254727475e-05, "loss": 2.4734, "step": 2936 }, { "epoch": 39.17391304347826, "grad_norm": 0.6340898275375366, "learning_rate": 6.958843159065629e-05, "loss": 2.6764, "step": 2937 }, { "epoch": 39.187290969899664, "grad_norm": 0.6154446005821228, "learning_rate": 6.954393770856507e-05, "loss": 2.666, "step": 2938 }, { "epoch": 39.20066889632107, "grad_norm": 0.6166310906410217, "learning_rate": 6.949944382647387e-05, "loss": 2.5198, "step": 2939 }, { "epoch": 39.214046822742475, "grad_norm": 0.6393052935600281, "learning_rate": 6.945494994438265e-05, "loss": 2.6016, "step": 2940 }, { "epoch": 39.22742474916388, "grad_norm": 0.6041097640991211, "learning_rate": 6.941045606229144e-05, "loss": 2.462, "step": 2941 }, { "epoch": 39.240802675585286, "grad_norm": 0.6157374382019043, "learning_rate": 6.936596218020023e-05, "loss": 2.5844, "step": 2942 }, { "epoch": 39.25418060200669, "grad_norm": 0.641880452632904, "learning_rate": 6.932146829810901e-05, "loss": 2.7159, "step": 2943 }, { "epoch": 39.2675585284281, "grad_norm": 0.6598557233810425, "learning_rate": 6.927697441601781e-05, "loss": 2.5649, "step": 2944 }, { "epoch": 39.280936454849495, "grad_norm": 0.6511268615722656, "learning_rate": 6.923248053392659e-05, "loss": 2.7623, "step": 2945 }, { "epoch": 39.2943143812709, "grad_norm": 0.6559281945228577, "learning_rate": 6.918798665183537e-05, "loss": 2.3524, "step": 2946 }, { "epoch": 39.30769230769231, "grad_norm": 0.601424515247345, "learning_rate": 6.914349276974417e-05, "loss": 2.2888, "step": 2947 }, { "epoch": 39.32107023411371, "grad_norm": 0.6525271534919739, "learning_rate": 6.909899888765295e-05, "loss": 2.4266, "step": 2948 }, { "epoch": 39.33444816053512, "grad_norm": 0.6467943787574768, "learning_rate": 6.905450500556175e-05, "loss": 2.654, "step": 2949 }, { "epoch": 39.34782608695652, "grad_norm": 0.6527394652366638, "learning_rate": 6.901001112347053e-05, "loss": 2.6237, "step": 2950 }, { "epoch": 39.36120401337793, "grad_norm": 0.615578830242157, "learning_rate": 6.896551724137931e-05, "loss": 2.6382, "step": 2951 }, { "epoch": 39.374581939799334, "grad_norm": 0.6257203221321106, "learning_rate": 6.892102335928811e-05, "loss": 2.8014, "step": 2952 }, { "epoch": 39.38795986622073, "grad_norm": 0.6732933521270752, "learning_rate": 6.887652947719689e-05, "loss": 2.7796, "step": 2953 }, { "epoch": 39.40133779264214, "grad_norm": 0.5996230840682983, "learning_rate": 6.883203559510569e-05, "loss": 2.4852, "step": 2954 }, { "epoch": 39.414715719063544, "grad_norm": 0.6640390157699585, "learning_rate": 6.878754171301446e-05, "loss": 2.4979, "step": 2955 }, { "epoch": 39.42809364548495, "grad_norm": 0.6220453381538391, "learning_rate": 6.874304783092325e-05, "loss": 2.4849, "step": 2956 }, { "epoch": 39.441471571906355, "grad_norm": 0.6283277273178101, "learning_rate": 6.869855394883203e-05, "loss": 2.5193, "step": 2957 }, { "epoch": 39.45484949832776, "grad_norm": 0.6696292757987976, "learning_rate": 6.865406006674082e-05, "loss": 2.6485, "step": 2958 }, { "epoch": 39.468227424749166, "grad_norm": 0.6544603705406189, "learning_rate": 6.860956618464961e-05, "loss": 2.5956, "step": 2959 }, { "epoch": 39.48160535117057, "grad_norm": 0.6541810631752014, "learning_rate": 6.85650723025584e-05, "loss": 2.502, "step": 2960 }, { "epoch": 39.49498327759197, "grad_norm": 0.6799680590629578, "learning_rate": 6.852057842046719e-05, "loss": 2.7545, "step": 2961 }, { "epoch": 39.508361204013376, "grad_norm": 0.6350213289260864, "learning_rate": 6.847608453837597e-05, "loss": 2.707, "step": 2962 }, { "epoch": 39.52173913043478, "grad_norm": 0.5974067449569702, "learning_rate": 6.843159065628476e-05, "loss": 2.5572, "step": 2963 }, { "epoch": 39.53511705685619, "grad_norm": 0.5829086899757385, "learning_rate": 6.838709677419355e-05, "loss": 2.5003, "step": 2964 }, { "epoch": 39.54849498327759, "grad_norm": 0.617396891117096, "learning_rate": 6.834260289210233e-05, "loss": 2.5413, "step": 2965 }, { "epoch": 39.561872909699, "grad_norm": 0.6405021548271179, "learning_rate": 6.829810901001113e-05, "loss": 2.4371, "step": 2966 }, { "epoch": 39.575250836120404, "grad_norm": 0.6281189322471619, "learning_rate": 6.825361512791991e-05, "loss": 2.6484, "step": 2967 }, { "epoch": 39.58862876254181, "grad_norm": 0.7368245720863342, "learning_rate": 6.82091212458287e-05, "loss": 2.5467, "step": 2968 }, { "epoch": 39.60200668896321, "grad_norm": 0.6534647941589355, "learning_rate": 6.816462736373749e-05, "loss": 2.7322, "step": 2969 }, { "epoch": 39.61538461538461, "grad_norm": 0.821247935295105, "learning_rate": 6.812013348164627e-05, "loss": 2.4748, "step": 2970 }, { "epoch": 39.62876254180602, "grad_norm": 0.6398321390151978, "learning_rate": 6.807563959955507e-05, "loss": 2.6597, "step": 2971 }, { "epoch": 39.642140468227424, "grad_norm": 0.6529011130332947, "learning_rate": 6.803114571746385e-05, "loss": 2.5417, "step": 2972 }, { "epoch": 39.65551839464883, "grad_norm": 0.6198508143424988, "learning_rate": 6.798665183537263e-05, "loss": 2.4684, "step": 2973 }, { "epoch": 39.668896321070235, "grad_norm": 0.6332361698150635, "learning_rate": 6.794215795328143e-05, "loss": 2.4961, "step": 2974 }, { "epoch": 39.68227424749164, "grad_norm": 0.6088337898254395, "learning_rate": 6.789766407119021e-05, "loss": 2.6169, "step": 2975 }, { "epoch": 39.69565217391305, "grad_norm": 0.6401640176773071, "learning_rate": 6.785317018909901e-05, "loss": 2.4942, "step": 2976 }, { "epoch": 39.709030100334445, "grad_norm": 0.6432260870933533, "learning_rate": 6.780867630700779e-05, "loss": 2.4886, "step": 2977 }, { "epoch": 39.72240802675585, "grad_norm": 0.6184273362159729, "learning_rate": 6.776418242491657e-05, "loss": 2.6445, "step": 2978 }, { "epoch": 39.735785953177256, "grad_norm": 0.6584762334823608, "learning_rate": 6.771968854282537e-05, "loss": 2.5727, "step": 2979 }, { "epoch": 39.74916387959866, "grad_norm": 0.577503502368927, "learning_rate": 6.767519466073415e-05, "loss": 2.3277, "step": 2980 }, { "epoch": 39.76254180602007, "grad_norm": 0.62856525182724, "learning_rate": 6.763070077864295e-05, "loss": 2.7912, "step": 2981 }, { "epoch": 39.77591973244147, "grad_norm": 0.6405977606773376, "learning_rate": 6.758620689655173e-05, "loss": 2.6165, "step": 2982 }, { "epoch": 39.78929765886288, "grad_norm": 0.5938370823860168, "learning_rate": 6.754171301446051e-05, "loss": 2.6061, "step": 2983 }, { "epoch": 39.802675585284284, "grad_norm": 0.6175585985183716, "learning_rate": 6.749721913236931e-05, "loss": 2.4875, "step": 2984 }, { "epoch": 39.81605351170568, "grad_norm": 0.7778329253196716, "learning_rate": 6.745272525027809e-05, "loss": 2.8471, "step": 2985 }, { "epoch": 39.82943143812709, "grad_norm": 0.6055701375007629, "learning_rate": 6.740823136818689e-05, "loss": 2.4975, "step": 2986 }, { "epoch": 39.84280936454849, "grad_norm": 0.68842613697052, "learning_rate": 6.736373748609567e-05, "loss": 2.7126, "step": 2987 }, { "epoch": 39.8561872909699, "grad_norm": 0.6329212784767151, "learning_rate": 6.731924360400445e-05, "loss": 2.6741, "step": 2988 }, { "epoch": 39.869565217391305, "grad_norm": 0.6239383220672607, "learning_rate": 6.727474972191325e-05, "loss": 2.6681, "step": 2989 }, { "epoch": 39.88294314381271, "grad_norm": 0.6401740908622742, "learning_rate": 6.723025583982203e-05, "loss": 2.7272, "step": 2990 }, { "epoch": 39.896321070234116, "grad_norm": 0.6988689303398132, "learning_rate": 6.718576195773083e-05, "loss": 2.7339, "step": 2991 }, { "epoch": 39.90969899665552, "grad_norm": 0.6372131705284119, "learning_rate": 6.714126807563961e-05, "loss": 2.4379, "step": 2992 }, { "epoch": 39.92307692307692, "grad_norm": 0.6818649172782898, "learning_rate": 6.709677419354839e-05, "loss": 2.8075, "step": 2993 }, { "epoch": 39.936454849498325, "grad_norm": 0.653063952922821, "learning_rate": 6.705228031145719e-05, "loss": 2.5813, "step": 2994 }, { "epoch": 39.94983277591973, "grad_norm": 0.657010018825531, "learning_rate": 6.700778642936597e-05, "loss": 2.7227, "step": 2995 }, { "epoch": 39.96321070234114, "grad_norm": 0.6535374522209167, "learning_rate": 6.696329254727475e-05, "loss": 2.7369, "step": 2996 }, { "epoch": 39.97658862876254, "grad_norm": 0.6069753766059875, "learning_rate": 6.691879866518353e-05, "loss": 2.5248, "step": 2997 }, { "epoch": 39.98996655518395, "grad_norm": 0.6699077486991882, "learning_rate": 6.687430478309233e-05, "loss": 2.6428, "step": 2998 }, { "epoch": 40.0, "grad_norm": 0.7744498252868652, "learning_rate": 6.682981090100111e-05, "loss": 2.7199, "step": 2999 }, { "epoch": 40.013377926421406, "grad_norm": 0.6341271996498108, "learning_rate": 6.67853170189099e-05, "loss": 2.5324, "step": 3000 } ], "logging_steps": 1, "max_steps": 4500, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.283862643111936e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }