{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 57.971014492753625, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.028985507246376812, "grad_norm": 3.0505027770996094, "learning_rate": 9e-07, "loss": 1.5657, "step": 10 }, { "epoch": 0.057971014492753624, "grad_norm": 2.080113410949707, "learning_rate": 1.9e-06, "loss": 1.5118, "step": 20 }, { "epoch": 0.08695652173913043, "grad_norm": 2.4925858974456787, "learning_rate": 2.9e-06, "loss": 1.2433, "step": 30 }, { "epoch": 0.11594202898550725, "grad_norm": 1.7392232418060303, "learning_rate": 3.9e-06, "loss": 1.3809, "step": 40 }, { "epoch": 0.14492753623188406, "grad_norm": 1.8013620376586914, "learning_rate": 4.9000000000000005e-06, "loss": 1.3118, "step": 50 }, { "epoch": 0.17391304347826086, "grad_norm": 1.5780786275863647, "learning_rate": 5.9e-06, "loss": 1.1046, "step": 60 }, { "epoch": 0.2028985507246377, "grad_norm": 1.2937341928482056, "learning_rate": 6.900000000000001e-06, "loss": 1.2428, "step": 70 }, { "epoch": 0.2318840579710145, "grad_norm": 1.147234559059143, "learning_rate": 7.9e-06, "loss": 1.2453, "step": 80 }, { "epoch": 0.2608695652173913, "grad_norm": 0.7600051760673523, "learning_rate": 8.9e-06, "loss": 0.6767, "step": 90 }, { "epoch": 0.2898550724637681, "grad_norm": 0.8683933615684509, "learning_rate": 9.900000000000002e-06, "loss": 1.0279, "step": 100 }, { "epoch": 0.3188405797101449, "grad_norm": 0.6988456845283508, "learning_rate": 1.09e-05, "loss": 0.857, "step": 110 }, { "epoch": 0.34782608695652173, "grad_norm": 0.8881454467773438, "learning_rate": 1.19e-05, "loss": 0.8046, "step": 120 }, { "epoch": 0.37681159420289856, "grad_norm": 0.40000322461128235, "learning_rate": 1.29e-05, "loss": 0.774, "step": 130 }, { "epoch": 0.4057971014492754, "grad_norm": 1.2142903804779053, "learning_rate": 1.3900000000000002e-05, "loss": 0.5516, "step": 140 }, { "epoch": 0.43478260869565216, "grad_norm": 0.9134606719017029, "learning_rate": 1.49e-05, "loss": 0.7689, "step": 150 }, { "epoch": 0.463768115942029, "grad_norm": 0.978635311126709, "learning_rate": 1.59e-05, "loss": 0.7034, "step": 160 }, { "epoch": 0.4927536231884058, "grad_norm": 0.9544720649719238, "learning_rate": 1.69e-05, "loss": 0.5249, "step": 170 }, { "epoch": 0.5217391304347826, "grad_norm": 0.750441312789917, "learning_rate": 1.79e-05, "loss": 0.5178, "step": 180 }, { "epoch": 0.5507246376811594, "grad_norm": 0.8242844939231873, "learning_rate": 1.8900000000000002e-05, "loss": 0.4664, "step": 190 }, { "epoch": 0.5797101449275363, "grad_norm": 0.9814381003379822, "learning_rate": 1.9900000000000003e-05, "loss": 0.6898, "step": 200 }, { "epoch": 0.6086956521739131, "grad_norm": 0.7581349611282349, "learning_rate": 2.09e-05, "loss": 0.5289, "step": 210 }, { "epoch": 0.6376811594202898, "grad_norm": 0.9911883473396301, "learning_rate": 2.19e-05, "loss": 0.4249, "step": 220 }, { "epoch": 0.6666666666666666, "grad_norm": 0.9439306855201721, "learning_rate": 2.29e-05, "loss": 0.4135, "step": 230 }, { "epoch": 0.6956521739130435, "grad_norm": 1.3483587503433228, "learning_rate": 2.39e-05, "loss": 0.3891, "step": 240 }, { "epoch": 0.7246376811594203, "grad_norm": 0.819063663482666, "learning_rate": 2.4900000000000002e-05, "loss": 0.4549, "step": 250 }, { "epoch": 0.7536231884057971, "grad_norm": 0.8383818864822388, "learning_rate": 2.5900000000000003e-05, "loss": 0.4251, "step": 260 }, { "epoch": 0.782608695652174, "grad_norm": 0.9047835469245911, "learning_rate": 2.6900000000000003e-05, "loss": 0.4383, "step": 270 }, { "epoch": 0.8115942028985508, "grad_norm": 0.7909944653511047, "learning_rate": 2.7900000000000004e-05, "loss": 0.3907, "step": 280 }, { "epoch": 0.8405797101449275, "grad_norm": 0.8012731075286865, "learning_rate": 2.8899999999999998e-05, "loss": 0.4489, "step": 290 }, { "epoch": 0.8695652173913043, "grad_norm": 1.4028682708740234, "learning_rate": 2.9900000000000002e-05, "loss": 0.3037, "step": 300 }, { "epoch": 0.8985507246376812, "grad_norm": 1.488762617111206, "learning_rate": 3.09e-05, "loss": 0.3911, "step": 310 }, { "epoch": 0.927536231884058, "grad_norm": 0.7830433249473572, "learning_rate": 3.19e-05, "loss": 0.3919, "step": 320 }, { "epoch": 0.9565217391304348, "grad_norm": 1.5407651662826538, "learning_rate": 3.29e-05, "loss": 0.3686, "step": 330 }, { "epoch": 0.9855072463768116, "grad_norm": 0.9575673937797546, "learning_rate": 3.3900000000000004e-05, "loss": 0.3367, "step": 340 }, { "epoch": 1.0144927536231885, "grad_norm": 1.3226127624511719, "learning_rate": 3.49e-05, "loss": 0.3767, "step": 350 }, { "epoch": 1.0434782608695652, "grad_norm": 1.4169162511825562, "learning_rate": 3.59e-05, "loss": 0.3338, "step": 360 }, { "epoch": 1.0724637681159421, "grad_norm": 1.7206474542617798, "learning_rate": 3.69e-05, "loss": 0.3345, "step": 370 }, { "epoch": 1.1014492753623188, "grad_norm": 1.4332363605499268, "learning_rate": 3.79e-05, "loss": 0.3272, "step": 380 }, { "epoch": 1.1304347826086956, "grad_norm": 1.1724469661712646, "learning_rate": 3.8900000000000004e-05, "loss": 0.2866, "step": 390 }, { "epoch": 1.1594202898550725, "grad_norm": 0.83205646276474, "learning_rate": 3.99e-05, "loss": 0.356, "step": 400 }, { "epoch": 1.1884057971014492, "grad_norm": 1.1642824411392212, "learning_rate": 4.09e-05, "loss": 0.2858, "step": 410 }, { "epoch": 1.2173913043478262, "grad_norm": 1.0703731775283813, "learning_rate": 4.19e-05, "loss": 0.3091, "step": 420 }, { "epoch": 1.2463768115942029, "grad_norm": 0.886146605014801, "learning_rate": 4.29e-05, "loss": 0.3163, "step": 430 }, { "epoch": 1.2753623188405796, "grad_norm": 0.8630309104919434, "learning_rate": 4.39e-05, "loss": 0.2843, "step": 440 }, { "epoch": 1.3043478260869565, "grad_norm": 0.8399680256843567, "learning_rate": 4.49e-05, "loss": 0.2451, "step": 450 }, { "epoch": 1.3333333333333333, "grad_norm": 1.4553627967834473, "learning_rate": 4.5900000000000004e-05, "loss": 0.2888, "step": 460 }, { "epoch": 1.3623188405797102, "grad_norm": 1.8121979236602783, "learning_rate": 4.69e-05, "loss": 0.257, "step": 470 }, { "epoch": 1.391304347826087, "grad_norm": 1.165885329246521, "learning_rate": 4.79e-05, "loss": 0.2579, "step": 480 }, { "epoch": 1.4202898550724639, "grad_norm": 0.8950861096382141, "learning_rate": 4.89e-05, "loss": 0.279, "step": 490 }, { "epoch": 1.4492753623188406, "grad_norm": 0.8436807990074158, "learning_rate": 4.99e-05, "loss": 0.2503, "step": 500 }, { "epoch": 1.4782608695652173, "grad_norm": 0.5035578012466431, "learning_rate": 5.0900000000000004e-05, "loss": 0.2177, "step": 510 }, { "epoch": 1.5072463768115942, "grad_norm": 1.0943514108657837, "learning_rate": 5.19e-05, "loss": 0.3226, "step": 520 }, { "epoch": 1.5362318840579712, "grad_norm": 0.7721551060676575, "learning_rate": 5.2900000000000005e-05, "loss": 0.2258, "step": 530 }, { "epoch": 1.5652173913043477, "grad_norm": 1.0129557847976685, "learning_rate": 5.390000000000001e-05, "loss": 0.299, "step": 540 }, { "epoch": 1.5942028985507246, "grad_norm": 1.014032006263733, "learning_rate": 5.4900000000000006e-05, "loss": 0.2733, "step": 550 }, { "epoch": 1.6231884057971016, "grad_norm": 1.73903489112854, "learning_rate": 5.590000000000001e-05, "loss": 0.2611, "step": 560 }, { "epoch": 1.6521739130434783, "grad_norm": 2.070592164993286, "learning_rate": 5.69e-05, "loss": 0.2514, "step": 570 }, { "epoch": 1.681159420289855, "grad_norm": 1.2096529006958008, "learning_rate": 5.79e-05, "loss": 0.2635, "step": 580 }, { "epoch": 1.710144927536232, "grad_norm": 0.9375045895576477, "learning_rate": 5.89e-05, "loss": 0.2542, "step": 590 }, { "epoch": 1.7391304347826086, "grad_norm": 0.8468955755233765, "learning_rate": 5.99e-05, "loss": 0.2525, "step": 600 }, { "epoch": 1.7681159420289854, "grad_norm": 0.9298123717308044, "learning_rate": 6.09e-05, "loss": 0.2511, "step": 610 }, { "epoch": 1.7971014492753623, "grad_norm": 0.8824529647827148, "learning_rate": 6.19e-05, "loss": 0.2373, "step": 620 }, { "epoch": 1.8260869565217392, "grad_norm": 0.7265031337738037, "learning_rate": 6.29e-05, "loss": 0.2139, "step": 630 }, { "epoch": 1.855072463768116, "grad_norm": 1.0328197479248047, "learning_rate": 6.390000000000001e-05, "loss": 0.2141, "step": 640 }, { "epoch": 1.8840579710144927, "grad_norm": 0.5020371079444885, "learning_rate": 6.49e-05, "loss": 0.2348, "step": 650 }, { "epoch": 1.9130434782608696, "grad_norm": 0.7637607455253601, "learning_rate": 6.59e-05, "loss": 0.2097, "step": 660 }, { "epoch": 1.9420289855072463, "grad_norm": 0.8997554779052734, "learning_rate": 6.690000000000001e-05, "loss": 0.2228, "step": 670 }, { "epoch": 1.971014492753623, "grad_norm": 0.7784063816070557, "learning_rate": 6.790000000000001e-05, "loss": 0.2412, "step": 680 }, { "epoch": 2.0, "grad_norm": 0.9886015057563782, "learning_rate": 6.89e-05, "loss": 0.2263, "step": 690 }, { "epoch": 2.028985507246377, "grad_norm": 0.7861230969429016, "learning_rate": 6.99e-05, "loss": 0.2281, "step": 700 }, { "epoch": 2.0579710144927534, "grad_norm": 0.6980922222137451, "learning_rate": 7.09e-05, "loss": 0.2295, "step": 710 }, { "epoch": 2.0869565217391304, "grad_norm": 0.9516819715499878, "learning_rate": 7.19e-05, "loss": 0.2485, "step": 720 }, { "epoch": 2.1159420289855073, "grad_norm": 0.7088673710823059, "learning_rate": 7.29e-05, "loss": 0.2256, "step": 730 }, { "epoch": 2.1449275362318843, "grad_norm": 0.8767524361610413, "learning_rate": 7.390000000000001e-05, "loss": 0.2105, "step": 740 }, { "epoch": 2.1739130434782608, "grad_norm": 0.5966852903366089, "learning_rate": 7.49e-05, "loss": 0.2192, "step": 750 }, { "epoch": 2.2028985507246377, "grad_norm": 0.7955141067504883, "learning_rate": 7.59e-05, "loss": 0.2166, "step": 760 }, { "epoch": 2.2318840579710146, "grad_norm": 0.8298391699790955, "learning_rate": 7.69e-05, "loss": 0.2583, "step": 770 }, { "epoch": 2.260869565217391, "grad_norm": 1.2403712272644043, "learning_rate": 7.790000000000001e-05, "loss": 0.2428, "step": 780 }, { "epoch": 2.289855072463768, "grad_norm": 1.2657474279403687, "learning_rate": 7.890000000000001e-05, "loss": 0.2025, "step": 790 }, { "epoch": 2.318840579710145, "grad_norm": 1.0565385818481445, "learning_rate": 7.99e-05, "loss": 0.1949, "step": 800 }, { "epoch": 2.3478260869565215, "grad_norm": 1.0542415380477905, "learning_rate": 8.090000000000001e-05, "loss": 0.2692, "step": 810 }, { "epoch": 2.3768115942028984, "grad_norm": 0.6383161544799805, "learning_rate": 8.19e-05, "loss": 0.221, "step": 820 }, { "epoch": 2.4057971014492754, "grad_norm": 0.8400139808654785, "learning_rate": 8.29e-05, "loss": 0.1894, "step": 830 }, { "epoch": 2.4347826086956523, "grad_norm": 0.9511343240737915, "learning_rate": 8.39e-05, "loss": 0.2402, "step": 840 }, { "epoch": 2.463768115942029, "grad_norm": 1.1040838956832886, "learning_rate": 8.49e-05, "loss": 0.1974, "step": 850 }, { "epoch": 2.4927536231884058, "grad_norm": 0.8064889311790466, "learning_rate": 8.59e-05, "loss": 0.2312, "step": 860 }, { "epoch": 2.5217391304347827, "grad_norm": 0.7647086381912231, "learning_rate": 8.69e-05, "loss": 0.1977, "step": 870 }, { "epoch": 2.550724637681159, "grad_norm": 0.8380846977233887, "learning_rate": 8.790000000000001e-05, "loss": 0.2233, "step": 880 }, { "epoch": 2.579710144927536, "grad_norm": 0.48276486992836, "learning_rate": 8.89e-05, "loss": 0.1741, "step": 890 }, { "epoch": 2.608695652173913, "grad_norm": 0.7797939777374268, "learning_rate": 8.99e-05, "loss": 0.1951, "step": 900 }, { "epoch": 2.63768115942029, "grad_norm": 0.8178322911262512, "learning_rate": 9.090000000000001e-05, "loss": 0.2087, "step": 910 }, { "epoch": 2.6666666666666665, "grad_norm": 0.8776262998580933, "learning_rate": 9.190000000000001e-05, "loss": 0.1914, "step": 920 }, { "epoch": 2.6956521739130435, "grad_norm": 0.46826550364494324, "learning_rate": 9.290000000000001e-05, "loss": 0.1902, "step": 930 }, { "epoch": 2.7246376811594204, "grad_norm": 0.9637788534164429, "learning_rate": 9.39e-05, "loss": 0.2052, "step": 940 }, { "epoch": 2.753623188405797, "grad_norm": 1.1427522897720337, "learning_rate": 9.49e-05, "loss": 0.2212, "step": 950 }, { "epoch": 2.782608695652174, "grad_norm": 0.6853973865509033, "learning_rate": 9.59e-05, "loss": 0.1921, "step": 960 }, { "epoch": 2.8115942028985508, "grad_norm": 0.8581897020339966, "learning_rate": 9.69e-05, "loss": 0.2215, "step": 970 }, { "epoch": 2.8405797101449277, "grad_norm": 0.9988269805908203, "learning_rate": 9.790000000000001e-05, "loss": 0.1838, "step": 980 }, { "epoch": 2.869565217391304, "grad_norm": 0.3667157292366028, "learning_rate": 9.89e-05, "loss": 0.156, "step": 990 }, { "epoch": 2.898550724637681, "grad_norm": 0.8472527265548706, "learning_rate": 9.99e-05, "loss": 0.1936, "step": 1000 }, { "epoch": 2.927536231884058, "grad_norm": 0.6419370770454407, "learning_rate": 9.999994463727085e-05, "loss": 0.1723, "step": 1010 }, { "epoch": 2.9565217391304346, "grad_norm": 1.061924934387207, "learning_rate": 9.999975326009292e-05, "loss": 0.24, "step": 1020 }, { "epoch": 2.9855072463768115, "grad_norm": 0.44797030091285706, "learning_rate": 9.999942518549879e-05, "loss": 0.1526, "step": 1030 }, { "epoch": 3.0144927536231885, "grad_norm": 0.5258593559265137, "learning_rate": 9.999896041438544e-05, "loss": 0.2082, "step": 1040 }, { "epoch": 3.0434782608695654, "grad_norm": 0.7527342438697815, "learning_rate": 9.999835894802353e-05, "loss": 0.166, "step": 1050 }, { "epoch": 3.072463768115942, "grad_norm": 0.8045316934585571, "learning_rate": 9.999762078805743e-05, "loss": 0.1526, "step": 1060 }, { "epoch": 3.101449275362319, "grad_norm": 0.6621928215026855, "learning_rate": 9.999674593650526e-05, "loss": 0.1965, "step": 1070 }, { "epoch": 3.130434782608696, "grad_norm": 0.6621638536453247, "learning_rate": 9.99957343957588e-05, "loss": 0.1575, "step": 1080 }, { "epoch": 3.1594202898550723, "grad_norm": 0.6635481715202332, "learning_rate": 9.99945861685836e-05, "loss": 0.1943, "step": 1090 }, { "epoch": 3.1884057971014492, "grad_norm": 1.0563372373580933, "learning_rate": 9.999330125811884e-05, "loss": 0.1864, "step": 1100 }, { "epoch": 3.217391304347826, "grad_norm": 0.7428378462791443, "learning_rate": 9.999187966787744e-05, "loss": 0.2003, "step": 1110 }, { "epoch": 3.246376811594203, "grad_norm": 0.6000686287879944, "learning_rate": 9.999032140174595e-05, "loss": 0.1587, "step": 1120 }, { "epoch": 3.2753623188405796, "grad_norm": 0.8239452838897705, "learning_rate": 9.998862646398464e-05, "loss": 0.1838, "step": 1130 }, { "epoch": 3.3043478260869565, "grad_norm": 0.6900084018707275, "learning_rate": 9.998679485922739e-05, "loss": 0.2002, "step": 1140 }, { "epoch": 3.3333333333333335, "grad_norm": 1.6132053136825562, "learning_rate": 9.998482659248174e-05, "loss": 0.2293, "step": 1150 }, { "epoch": 3.36231884057971, "grad_norm": 0.6084638237953186, "learning_rate": 9.998272166912883e-05, "loss": 0.1645, "step": 1160 }, { "epoch": 3.391304347826087, "grad_norm": 0.5943679809570312, "learning_rate": 9.998048009492347e-05, "loss": 0.1763, "step": 1170 }, { "epoch": 3.420289855072464, "grad_norm": 0.5672821998596191, "learning_rate": 9.997810187599403e-05, "loss": 0.1679, "step": 1180 }, { "epoch": 3.449275362318841, "grad_norm": 1.185848593711853, "learning_rate": 9.997558701884249e-05, "loss": 0.2152, "step": 1190 }, { "epoch": 3.4782608695652173, "grad_norm": 0.7329660058021545, "learning_rate": 9.997293553034433e-05, "loss": 0.1943, "step": 1200 }, { "epoch": 3.5072463768115942, "grad_norm": 0.6363108158111572, "learning_rate": 9.997014741774866e-05, "loss": 0.1579, "step": 1210 }, { "epoch": 3.536231884057971, "grad_norm": 1.2481898069381714, "learning_rate": 9.996722268867803e-05, "loss": 0.1869, "step": 1220 }, { "epoch": 3.5652173913043477, "grad_norm": 0.8098170757293701, "learning_rate": 9.996416135112858e-05, "loss": 0.2126, "step": 1230 }, { "epoch": 3.5942028985507246, "grad_norm": 0.6532134413719177, "learning_rate": 9.996096341346988e-05, "loss": 0.2359, "step": 1240 }, { "epoch": 3.6231884057971016, "grad_norm": 0.774456262588501, "learning_rate": 9.995762888444495e-05, "loss": 0.2043, "step": 1250 }, { "epoch": 3.6521739130434785, "grad_norm": 0.7362341284751892, "learning_rate": 9.995415777317027e-05, "loss": 0.1705, "step": 1260 }, { "epoch": 3.681159420289855, "grad_norm": 0.6909469366073608, "learning_rate": 9.995055008913574e-05, "loss": 0.1981, "step": 1270 }, { "epoch": 3.710144927536232, "grad_norm": 0.5451234579086304, "learning_rate": 9.994680584220463e-05, "loss": 0.1705, "step": 1280 }, { "epoch": 3.7391304347826084, "grad_norm": 0.7192392945289612, "learning_rate": 9.994292504261355e-05, "loss": 0.1707, "step": 1290 }, { "epoch": 3.7681159420289854, "grad_norm": 0.5111631751060486, "learning_rate": 9.993890770097247e-05, "loss": 0.2049, "step": 1300 }, { "epoch": 3.7971014492753623, "grad_norm": 0.5530916452407837, "learning_rate": 9.993475382826467e-05, "loss": 0.1931, "step": 1310 }, { "epoch": 3.8260869565217392, "grad_norm": 0.4613671898841858, "learning_rate": 9.993046343584664e-05, "loss": 0.1553, "step": 1320 }, { "epoch": 3.855072463768116, "grad_norm": 0.5719594359397888, "learning_rate": 9.992603653544816e-05, "loss": 0.1865, "step": 1330 }, { "epoch": 3.8840579710144927, "grad_norm": 0.6633929014205933, "learning_rate": 9.992147313917222e-05, "loss": 0.1901, "step": 1340 }, { "epoch": 3.9130434782608696, "grad_norm": 0.3168647587299347, "learning_rate": 9.991677325949497e-05, "loss": 0.1871, "step": 1350 }, { "epoch": 3.942028985507246, "grad_norm": 0.35858315229415894, "learning_rate": 9.991193690926568e-05, "loss": 0.1533, "step": 1360 }, { "epoch": 3.971014492753623, "grad_norm": 0.35452893376350403, "learning_rate": 9.990696410170678e-05, "loss": 0.1843, "step": 1370 }, { "epoch": 4.0, "grad_norm": 1.4836504459381104, "learning_rate": 9.990185485041371e-05, "loss": 0.1691, "step": 1380 }, { "epoch": 4.028985507246377, "grad_norm": 0.7394298315048218, "learning_rate": 9.989660916935498e-05, "loss": 0.1648, "step": 1390 }, { "epoch": 4.057971014492754, "grad_norm": 0.8527777791023254, "learning_rate": 9.989122707287208e-05, "loss": 0.1741, "step": 1400 }, { "epoch": 4.086956521739131, "grad_norm": 0.6024882197380066, "learning_rate": 9.988570857567945e-05, "loss": 0.1863, "step": 1410 }, { "epoch": 4.115942028985507, "grad_norm": 0.6260817050933838, "learning_rate": 9.988005369286446e-05, "loss": 0.1815, "step": 1420 }, { "epoch": 4.144927536231884, "grad_norm": 0.9622341394424438, "learning_rate": 9.987426243988734e-05, "loss": 0.1698, "step": 1430 }, { "epoch": 4.173913043478261, "grad_norm": 0.5575575232505798, "learning_rate": 9.986833483258114e-05, "loss": 0.1753, "step": 1440 }, { "epoch": 4.202898550724638, "grad_norm": 0.24518761038780212, "learning_rate": 9.986227088715173e-05, "loss": 0.16, "step": 1450 }, { "epoch": 4.231884057971015, "grad_norm": 0.5677102208137512, "learning_rate": 9.98560706201777e-05, "loss": 0.1746, "step": 1460 }, { "epoch": 4.260869565217392, "grad_norm": 0.35185858607292175, "learning_rate": 9.984973404861036e-05, "loss": 0.152, "step": 1470 }, { "epoch": 4.2898550724637685, "grad_norm": 0.5845288038253784, "learning_rate": 9.984326118977361e-05, "loss": 0.1458, "step": 1480 }, { "epoch": 4.318840579710145, "grad_norm": 0.5872308611869812, "learning_rate": 9.983665206136406e-05, "loss": 0.1783, "step": 1490 }, { "epoch": 4.3478260869565215, "grad_norm": 0.6161956787109375, "learning_rate": 9.982990668145075e-05, "loss": 0.1617, "step": 1500 }, { "epoch": 4.3768115942028984, "grad_norm": 0.48462975025177, "learning_rate": 9.982302506847534e-05, "loss": 0.1544, "step": 1510 }, { "epoch": 4.405797101449275, "grad_norm": 0.43805649876594543, "learning_rate": 9.981600724125189e-05, "loss": 0.1632, "step": 1520 }, { "epoch": 4.434782608695652, "grad_norm": 0.6712663173675537, "learning_rate": 9.980885321896685e-05, "loss": 0.1681, "step": 1530 }, { "epoch": 4.463768115942029, "grad_norm": 0.46296727657318115, "learning_rate": 9.980156302117905e-05, "loss": 0.147, "step": 1540 }, { "epoch": 4.492753623188406, "grad_norm": 0.47002753615379333, "learning_rate": 9.979413666781963e-05, "loss": 0.1285, "step": 1550 }, { "epoch": 4.521739130434782, "grad_norm": 0.508978009223938, "learning_rate": 9.978657417919193e-05, "loss": 0.1611, "step": 1560 }, { "epoch": 4.550724637681159, "grad_norm": 0.5047881007194519, "learning_rate": 9.977887557597153e-05, "loss": 0.169, "step": 1570 }, { "epoch": 4.579710144927536, "grad_norm": 0.5661750435829163, "learning_rate": 9.97710408792061e-05, "loss": 0.1745, "step": 1580 }, { "epoch": 4.608695652173913, "grad_norm": 0.33027854561805725, "learning_rate": 9.976307011031542e-05, "loss": 0.1515, "step": 1590 }, { "epoch": 4.63768115942029, "grad_norm": 0.5191190838813782, "learning_rate": 9.975496329109126e-05, "loss": 0.1812, "step": 1600 }, { "epoch": 4.666666666666667, "grad_norm": 0.6009054183959961, "learning_rate": 9.974672044369732e-05, "loss": 0.154, "step": 1610 }, { "epoch": 4.695652173913043, "grad_norm": 0.83514004945755, "learning_rate": 9.97383415906693e-05, "loss": 0.1915, "step": 1620 }, { "epoch": 4.72463768115942, "grad_norm": 0.7153990864753723, "learning_rate": 9.97298267549146e-05, "loss": 0.151, "step": 1630 }, { "epoch": 4.753623188405797, "grad_norm": 0.5760650634765625, "learning_rate": 9.972117595971249e-05, "loss": 0.1613, "step": 1640 }, { "epoch": 4.782608695652174, "grad_norm": 0.46681898832321167, "learning_rate": 9.971238922871391e-05, "loss": 0.1547, "step": 1650 }, { "epoch": 4.811594202898551, "grad_norm": 0.6712074875831604, "learning_rate": 9.970346658594142e-05, "loss": 0.1693, "step": 1660 }, { "epoch": 4.840579710144928, "grad_norm": 0.41927066445350647, "learning_rate": 9.969440805578923e-05, "loss": 0.1537, "step": 1670 }, { "epoch": 4.869565217391305, "grad_norm": 0.718482255935669, "learning_rate": 9.968521366302298e-05, "loss": 0.1503, "step": 1680 }, { "epoch": 4.898550724637682, "grad_norm": 0.41100355982780457, "learning_rate": 9.967588343277981e-05, "loss": 0.131, "step": 1690 }, { "epoch": 4.927536231884058, "grad_norm": 0.6164652705192566, "learning_rate": 9.966641739056818e-05, "loss": 0.1633, "step": 1700 }, { "epoch": 4.956521739130435, "grad_norm": 0.6644942760467529, "learning_rate": 9.965681556226793e-05, "loss": 0.1686, "step": 1710 }, { "epoch": 4.9855072463768115, "grad_norm": 0.6024698615074158, "learning_rate": 9.964707797413006e-05, "loss": 0.1629, "step": 1720 }, { "epoch": 5.0144927536231885, "grad_norm": 0.37680429220199585, "learning_rate": 9.963720465277679e-05, "loss": 0.1634, "step": 1730 }, { "epoch": 5.043478260869565, "grad_norm": 0.6451659798622131, "learning_rate": 9.96271956252014e-05, "loss": 0.1613, "step": 1740 }, { "epoch": 5.072463768115942, "grad_norm": 0.28793832659721375, "learning_rate": 9.961705091876816e-05, "loss": 0.1589, "step": 1750 }, { "epoch": 5.101449275362318, "grad_norm": 0.59237140417099, "learning_rate": 9.960677056121235e-05, "loss": 0.1607, "step": 1760 }, { "epoch": 5.130434782608695, "grad_norm": 0.47422319650650024, "learning_rate": 9.959635458064005e-05, "loss": 0.1916, "step": 1770 }, { "epoch": 5.159420289855072, "grad_norm": 0.681136965751648, "learning_rate": 9.958580300552815e-05, "loss": 0.1624, "step": 1780 }, { "epoch": 5.188405797101449, "grad_norm": 0.6878365874290466, "learning_rate": 9.957511586472426e-05, "loss": 0.1762, "step": 1790 }, { "epoch": 5.217391304347826, "grad_norm": 0.5597853064537048, "learning_rate": 9.956429318744662e-05, "loss": 0.1648, "step": 1800 }, { "epoch": 5.246376811594203, "grad_norm": 0.5032410621643066, "learning_rate": 9.955333500328404e-05, "loss": 0.1482, "step": 1810 }, { "epoch": 5.27536231884058, "grad_norm": 0.6717603802680969, "learning_rate": 9.95422413421957e-05, "loss": 0.1815, "step": 1820 }, { "epoch": 5.304347826086957, "grad_norm": 0.5992377400398254, "learning_rate": 9.953101223451133e-05, "loss": 0.1551, "step": 1830 }, { "epoch": 5.333333333333333, "grad_norm": 0.35043808817863464, "learning_rate": 9.951964771093085e-05, "loss": 0.1493, "step": 1840 }, { "epoch": 5.36231884057971, "grad_norm": 0.49411511421203613, "learning_rate": 9.950814780252442e-05, "loss": 0.1561, "step": 1850 }, { "epoch": 5.391304347826087, "grad_norm": 0.5951570868492126, "learning_rate": 9.949651254073236e-05, "loss": 0.1675, "step": 1860 }, { "epoch": 5.420289855072464, "grad_norm": 0.6489980220794678, "learning_rate": 9.948474195736504e-05, "loss": 0.1579, "step": 1870 }, { "epoch": 5.449275362318841, "grad_norm": 0.5115748047828674, "learning_rate": 9.947283608460277e-05, "loss": 0.1999, "step": 1880 }, { "epoch": 5.478260869565218, "grad_norm": 0.4821164906024933, "learning_rate": 9.946079495499577e-05, "loss": 0.1695, "step": 1890 }, { "epoch": 5.507246376811594, "grad_norm": 0.40529024600982666, "learning_rate": 9.944861860146401e-05, "loss": 0.1764, "step": 1900 }, { "epoch": 5.536231884057971, "grad_norm": 0.46906864643096924, "learning_rate": 9.943630705729719e-05, "loss": 0.1572, "step": 1910 }, { "epoch": 5.565217391304348, "grad_norm": 0.34866201877593994, "learning_rate": 9.942386035615459e-05, "loss": 0.1155, "step": 1920 }, { "epoch": 5.594202898550725, "grad_norm": 0.6494722962379456, "learning_rate": 9.941127853206503e-05, "loss": 0.1588, "step": 1930 }, { "epoch": 5.6231884057971016, "grad_norm": 0.4848741292953491, "learning_rate": 9.939856161942673e-05, "loss": 0.1489, "step": 1940 }, { "epoch": 5.6521739130434785, "grad_norm": 0.5746407508850098, "learning_rate": 9.938570965300724e-05, "loss": 0.1503, "step": 1950 }, { "epoch": 5.681159420289855, "grad_norm": 0.6178921461105347, "learning_rate": 9.937272266794335e-05, "loss": 0.1297, "step": 1960 }, { "epoch": 5.710144927536232, "grad_norm": 0.48752641677856445, "learning_rate": 9.935960069974096e-05, "loss": 0.1125, "step": 1970 }, { "epoch": 5.739130434782608, "grad_norm": 0.4455469846725464, "learning_rate": 9.934634378427506e-05, "loss": 0.1523, "step": 1980 }, { "epoch": 5.768115942028985, "grad_norm": 0.8876426219940186, "learning_rate": 9.933295195778954e-05, "loss": 0.1284, "step": 1990 }, { "epoch": 5.797101449275362, "grad_norm": 0.5639053583145142, "learning_rate": 9.931942525689715e-05, "loss": 0.1557, "step": 2000 }, { "epoch": 5.826086956521739, "grad_norm": 0.5348621606826782, "learning_rate": 9.930576371857936e-05, "loss": 0.1416, "step": 2010 }, { "epoch": 5.855072463768116, "grad_norm": 0.4637743830680847, "learning_rate": 9.929196738018629e-05, "loss": 0.1387, "step": 2020 }, { "epoch": 5.884057971014493, "grad_norm": 0.7224751114845276, "learning_rate": 9.927803627943662e-05, "loss": 0.1483, "step": 2030 }, { "epoch": 5.913043478260869, "grad_norm": 0.4575344920158386, "learning_rate": 9.926397045441744e-05, "loss": 0.1525, "step": 2040 }, { "epoch": 5.942028985507246, "grad_norm": 0.4177353084087372, "learning_rate": 9.924976994358417e-05, "loss": 0.137, "step": 2050 }, { "epoch": 5.971014492753623, "grad_norm": 0.5887998938560486, "learning_rate": 9.923543478576048e-05, "loss": 0.1799, "step": 2060 }, { "epoch": 6.0, "grad_norm": 0.6577372550964355, "learning_rate": 9.922096502013813e-05, "loss": 0.1675, "step": 2070 }, { "epoch": 6.028985507246377, "grad_norm": 0.6861566305160522, "learning_rate": 9.92063606862769e-05, "loss": 0.143, "step": 2080 }, { "epoch": 6.057971014492754, "grad_norm": 0.5720553994178772, "learning_rate": 9.919162182410453e-05, "loss": 0.1264, "step": 2090 }, { "epoch": 6.086956521739131, "grad_norm": 0.6558146476745605, "learning_rate": 9.917674847391645e-05, "loss": 0.1398, "step": 2100 }, { "epoch": 6.115942028985507, "grad_norm": 0.4062115252017975, "learning_rate": 9.916174067637584e-05, "loss": 0.1402, "step": 2110 }, { "epoch": 6.144927536231884, "grad_norm": 0.5962466597557068, "learning_rate": 9.914659847251348e-05, "loss": 0.1459, "step": 2120 }, { "epoch": 6.173913043478261, "grad_norm": 0.5116047263145447, "learning_rate": 9.913132190372753e-05, "loss": 0.1502, "step": 2130 }, { "epoch": 6.202898550724638, "grad_norm": 0.6019411683082581, "learning_rate": 9.911591101178359e-05, "loss": 0.1373, "step": 2140 }, { "epoch": 6.231884057971015, "grad_norm": 0.7383087873458862, "learning_rate": 9.910036583881443e-05, "loss": 0.1614, "step": 2150 }, { "epoch": 6.260869565217392, "grad_norm": 0.6318684816360474, "learning_rate": 9.908468642731995e-05, "loss": 0.1571, "step": 2160 }, { "epoch": 6.2898550724637685, "grad_norm": 0.4686439633369446, "learning_rate": 9.906887282016707e-05, "loss": 0.1431, "step": 2170 }, { "epoch": 6.318840579710145, "grad_norm": 0.5213261842727661, "learning_rate": 9.90529250605896e-05, "loss": 0.1661, "step": 2180 }, { "epoch": 6.3478260869565215, "grad_norm": 0.5317389369010925, "learning_rate": 9.903684319218809e-05, "loss": 0.1251, "step": 2190 }, { "epoch": 6.3768115942028984, "grad_norm": 0.4725372791290283, "learning_rate": 9.902062725892976e-05, "loss": 0.1367, "step": 2200 }, { "epoch": 6.405797101449275, "grad_norm": 0.5488022565841675, "learning_rate": 9.900427730514834e-05, "loss": 0.1295, "step": 2210 }, { "epoch": 6.434782608695652, "grad_norm": 0.402173787355423, "learning_rate": 9.8987793375544e-05, "loss": 0.1478, "step": 2220 }, { "epoch": 6.463768115942029, "grad_norm": 0.6250830292701721, "learning_rate": 9.897117551518318e-05, "loss": 0.1516, "step": 2230 }, { "epoch": 6.492753623188406, "grad_norm": 0.4163563549518585, "learning_rate": 9.895442376949844e-05, "loss": 0.1209, "step": 2240 }, { "epoch": 6.521739130434782, "grad_norm": 0.709176778793335, "learning_rate": 9.893753818428845e-05, "loss": 0.1412, "step": 2250 }, { "epoch": 6.550724637681159, "grad_norm": 0.526637077331543, "learning_rate": 9.892051880571773e-05, "loss": 0.1622, "step": 2260 }, { "epoch": 6.579710144927536, "grad_norm": 0.5909827351570129, "learning_rate": 9.890336568031663e-05, "loss": 0.156, "step": 2270 }, { "epoch": 6.608695652173913, "grad_norm": 0.6670017838478088, "learning_rate": 9.888607885498113e-05, "loss": 0.1487, "step": 2280 }, { "epoch": 6.63768115942029, "grad_norm": 0.6181092858314514, "learning_rate": 9.886865837697275e-05, "loss": 0.151, "step": 2290 }, { "epoch": 6.666666666666667, "grad_norm": 0.4304220378398895, "learning_rate": 9.88511042939184e-05, "loss": 0.1463, "step": 2300 }, { "epoch": 6.695652173913043, "grad_norm": 0.40652596950531006, "learning_rate": 9.883341665381028e-05, "loss": 0.1495, "step": 2310 }, { "epoch": 6.72463768115942, "grad_norm": 0.43385979533195496, "learning_rate": 9.881559550500575e-05, "loss": 0.1357, "step": 2320 }, { "epoch": 6.753623188405797, "grad_norm": 0.4716493487358093, "learning_rate": 9.879764089622712e-05, "loss": 0.1589, "step": 2330 }, { "epoch": 6.782608695652174, "grad_norm": 0.4198303520679474, "learning_rate": 9.87795528765616e-05, "loss": 0.1314, "step": 2340 }, { "epoch": 6.811594202898551, "grad_norm": 0.5235840082168579, "learning_rate": 9.876133149546118e-05, "loss": 0.1525, "step": 2350 }, { "epoch": 6.840579710144928, "grad_norm": 0.3913216292858124, "learning_rate": 9.874297680274238e-05, "loss": 0.1571, "step": 2360 }, { "epoch": 6.869565217391305, "grad_norm": 0.38975727558135986, "learning_rate": 9.872448884858624e-05, "loss": 0.1561, "step": 2370 }, { "epoch": 6.898550724637682, "grad_norm": 0.2768588662147522, "learning_rate": 9.870586768353815e-05, "loss": 0.1152, "step": 2380 }, { "epoch": 6.927536231884058, "grad_norm": 0.48241758346557617, "learning_rate": 9.868711335850764e-05, "loss": 0.1588, "step": 2390 }, { "epoch": 6.956521739130435, "grad_norm": 0.4768286347389221, "learning_rate": 9.866822592476833e-05, "loss": 0.1518, "step": 2400 }, { "epoch": 6.9855072463768115, "grad_norm": 0.5642341375350952, "learning_rate": 9.86492054339577e-05, "loss": 0.1345, "step": 2410 }, { "epoch": 7.0144927536231885, "grad_norm": 0.4740188717842102, "learning_rate": 9.863005193807711e-05, "loss": 0.1148, "step": 2420 }, { "epoch": 7.043478260869565, "grad_norm": 0.3090324103832245, "learning_rate": 9.861076548949143e-05, "loss": 0.1197, "step": 2430 }, { "epoch": 7.072463768115942, "grad_norm": 0.4523588716983795, "learning_rate": 9.859134614092912e-05, "loss": 0.1443, "step": 2440 }, { "epoch": 7.101449275362318, "grad_norm": 0.539725124835968, "learning_rate": 9.857179394548191e-05, "loss": 0.1371, "step": 2450 }, { "epoch": 7.130434782608695, "grad_norm": 0.5571834444999695, "learning_rate": 9.855210895660477e-05, "loss": 0.1456, "step": 2460 }, { "epoch": 7.159420289855072, "grad_norm": 0.4227403402328491, "learning_rate": 9.853229122811568e-05, "loss": 0.1377, "step": 2470 }, { "epoch": 7.188405797101449, "grad_norm": 0.4217086434364319, "learning_rate": 9.851234081419559e-05, "loss": 0.1331, "step": 2480 }, { "epoch": 7.217391304347826, "grad_norm": 0.47015127539634705, "learning_rate": 9.849225776938814e-05, "loss": 0.1382, "step": 2490 }, { "epoch": 7.246376811594203, "grad_norm": 0.6300743818283081, "learning_rate": 9.847204214859964e-05, "loss": 0.1437, "step": 2500 }, { "epoch": 7.27536231884058, "grad_norm": 0.49502405524253845, "learning_rate": 9.845169400709879e-05, "loss": 0.1415, "step": 2510 }, { "epoch": 7.304347826086957, "grad_norm": 0.5468514561653137, "learning_rate": 9.843121340051664e-05, "loss": 0.1363, "step": 2520 }, { "epoch": 7.333333333333333, "grad_norm": 0.5560225248336792, "learning_rate": 9.841060038484641e-05, "loss": 0.14, "step": 2530 }, { "epoch": 7.36231884057971, "grad_norm": 0.6520473957061768, "learning_rate": 9.838985501644328e-05, "loss": 0.1538, "step": 2540 }, { "epoch": 7.391304347826087, "grad_norm": 0.71478271484375, "learning_rate": 9.83689773520243e-05, "loss": 0.1521, "step": 2550 }, { "epoch": 7.420289855072464, "grad_norm": 0.41255566477775574, "learning_rate": 9.834796744866819e-05, "loss": 0.1469, "step": 2560 }, { "epoch": 7.449275362318841, "grad_norm": 0.41565924882888794, "learning_rate": 9.832682536381525e-05, "loss": 0.1522, "step": 2570 }, { "epoch": 7.478260869565218, "grad_norm": 0.6504526138305664, "learning_rate": 9.830555115526711e-05, "loss": 0.1318, "step": 2580 }, { "epoch": 7.507246376811594, "grad_norm": 0.3729122281074524, "learning_rate": 9.828414488118667e-05, "loss": 0.108, "step": 2590 }, { "epoch": 7.536231884057971, "grad_norm": 0.6625639796257019, "learning_rate": 9.826260660009785e-05, "loss": 0.1773, "step": 2600 }, { "epoch": 7.565217391304348, "grad_norm": 1.0479519367218018, "learning_rate": 9.824093637088547e-05, "loss": 0.1384, "step": 2610 }, { "epoch": 7.594202898550725, "grad_norm": 0.4728688597679138, "learning_rate": 9.821913425279514e-05, "loss": 0.144, "step": 2620 }, { "epoch": 7.6231884057971016, "grad_norm": 0.5890956521034241, "learning_rate": 9.8197200305433e-05, "loss": 0.1556, "step": 2630 }, { "epoch": 7.6521739130434785, "grad_norm": 0.5349107384681702, "learning_rate": 9.817513458876564e-05, "loss": 0.1333, "step": 2640 }, { "epoch": 7.681159420289855, "grad_norm": 0.3802502155303955, "learning_rate": 9.815293716311987e-05, "loss": 0.1366, "step": 2650 }, { "epoch": 7.710144927536232, "grad_norm": 0.539300262928009, "learning_rate": 9.813060808918262e-05, "loss": 0.1531, "step": 2660 }, { "epoch": 7.739130434782608, "grad_norm": 0.45709091424942017, "learning_rate": 9.810814742800069e-05, "loss": 0.1543, "step": 2670 }, { "epoch": 7.768115942028985, "grad_norm": 0.44815441966056824, "learning_rate": 9.808555524098074e-05, "loss": 0.1281, "step": 2680 }, { "epoch": 7.797101449275362, "grad_norm": 0.45325276255607605, "learning_rate": 9.806283158988887e-05, "loss": 0.136, "step": 2690 }, { "epoch": 7.826086956521739, "grad_norm": 0.41119185090065, "learning_rate": 9.803997653685072e-05, "loss": 0.1382, "step": 2700 }, { "epoch": 7.855072463768116, "grad_norm": 0.5879584550857544, "learning_rate": 9.801699014435112e-05, "loss": 0.1433, "step": 2710 }, { "epoch": 7.884057971014493, "grad_norm": 0.3625235855579376, "learning_rate": 9.799387247523398e-05, "loss": 0.127, "step": 2720 }, { "epoch": 7.913043478260869, "grad_norm": 0.6583592891693115, "learning_rate": 9.797062359270215e-05, "loss": 0.16, "step": 2730 }, { "epoch": 7.942028985507246, "grad_norm": 0.3526526689529419, "learning_rate": 9.794724356031715e-05, "loss": 0.1129, "step": 2740 }, { "epoch": 7.971014492753623, "grad_norm": 0.4039490818977356, "learning_rate": 9.792373244199913e-05, "loss": 0.145, "step": 2750 }, { "epoch": 8.0, "grad_norm": 0.9839149117469788, "learning_rate": 9.790009030202658e-05, "loss": 0.1548, "step": 2760 }, { "epoch": 8.028985507246377, "grad_norm": 0.5473302602767944, "learning_rate": 9.78763172050362e-05, "loss": 0.1357, "step": 2770 }, { "epoch": 8.057971014492754, "grad_norm": 0.4842037260532379, "learning_rate": 9.785241321602274e-05, "loss": 0.1599, "step": 2780 }, { "epoch": 8.08695652173913, "grad_norm": 0.6084038615226746, "learning_rate": 9.782837840033879e-05, "loss": 0.1236, "step": 2790 }, { "epoch": 8.115942028985508, "grad_norm": 0.5223290324211121, "learning_rate": 9.780421282369461e-05, "loss": 0.1185, "step": 2800 }, { "epoch": 8.144927536231885, "grad_norm": 0.49084579944610596, "learning_rate": 9.777991655215797e-05, "loss": 0.1335, "step": 2810 }, { "epoch": 8.173913043478262, "grad_norm": 0.5133453607559204, "learning_rate": 9.775548965215394e-05, "loss": 0.143, "step": 2820 }, { "epoch": 8.202898550724637, "grad_norm": 0.5703955292701721, "learning_rate": 9.773093219046474e-05, "loss": 0.1714, "step": 2830 }, { "epoch": 8.231884057971014, "grad_norm": 0.3753199279308319, "learning_rate": 9.770624423422954e-05, "loss": 0.1514, "step": 2840 }, { "epoch": 8.26086956521739, "grad_norm": 0.3518688678741455, "learning_rate": 9.768142585094426e-05, "loss": 0.1448, "step": 2850 }, { "epoch": 8.289855072463768, "grad_norm": 0.5194658041000366, "learning_rate": 9.765647710846142e-05, "loss": 0.1319, "step": 2860 }, { "epoch": 8.318840579710145, "grad_norm": 0.4543875455856323, "learning_rate": 9.763139807498991e-05, "loss": 0.1525, "step": 2870 }, { "epoch": 8.347826086956522, "grad_norm": 0.5964239239692688, "learning_rate": 9.760618881909487e-05, "loss": 0.1428, "step": 2880 }, { "epoch": 8.376811594202898, "grad_norm": 0.31862547993659973, "learning_rate": 9.758084940969744e-05, "loss": 0.1424, "step": 2890 }, { "epoch": 8.405797101449275, "grad_norm": 0.5183411836624146, "learning_rate": 9.755537991607459e-05, "loss": 0.1235, "step": 2900 }, { "epoch": 8.434782608695652, "grad_norm": 0.5497164130210876, "learning_rate": 9.752978040785895e-05, "loss": 0.1226, "step": 2910 }, { "epoch": 8.46376811594203, "grad_norm": 0.5015374422073364, "learning_rate": 9.750405095503859e-05, "loss": 0.126, "step": 2920 }, { "epoch": 8.492753623188406, "grad_norm": 0.3834163546562195, "learning_rate": 9.747819162795686e-05, "loss": 0.1299, "step": 2930 }, { "epoch": 8.521739130434783, "grad_norm": 0.4107052981853485, "learning_rate": 9.745220249731217e-05, "loss": 0.1399, "step": 2940 }, { "epoch": 8.55072463768116, "grad_norm": 0.6754370331764221, "learning_rate": 9.742608363415781e-05, "loss": 0.1369, "step": 2950 }, { "epoch": 8.579710144927537, "grad_norm": 0.38062620162963867, "learning_rate": 9.739983510990176e-05, "loss": 0.1303, "step": 2960 }, { "epoch": 8.608695652173914, "grad_norm": 0.5319868326187134, "learning_rate": 9.737345699630647e-05, "loss": 0.1393, "step": 2970 }, { "epoch": 8.63768115942029, "grad_norm": 0.28532159328460693, "learning_rate": 9.734694936548869e-05, "loss": 0.1368, "step": 2980 }, { "epoch": 8.666666666666666, "grad_norm": 0.6283175945281982, "learning_rate": 9.732031228991932e-05, "loss": 0.137, "step": 2990 }, { "epoch": 8.695652173913043, "grad_norm": 0.4746125042438507, "learning_rate": 9.729354584242302e-05, "loss": 0.1409, "step": 3000 }, { "epoch": 8.72463768115942, "grad_norm": 0.6005597114562988, "learning_rate": 9.726665009617832e-05, "loss": 0.1407, "step": 3010 }, { "epoch": 8.753623188405797, "grad_norm": 0.4808926284313202, "learning_rate": 9.723962512471714e-05, "loss": 0.1552, "step": 3020 }, { "epoch": 8.782608695652174, "grad_norm": 0.5887641310691833, "learning_rate": 9.72124710019247e-05, "loss": 0.1336, "step": 3030 }, { "epoch": 8.81159420289855, "grad_norm": 0.34358280897140503, "learning_rate": 9.718518780203934e-05, "loss": 0.1367, "step": 3040 }, { "epoch": 8.840579710144928, "grad_norm": 0.4416921138763428, "learning_rate": 9.715777559965228e-05, "loss": 0.1232, "step": 3050 }, { "epoch": 8.869565217391305, "grad_norm": 0.6384701132774353, "learning_rate": 9.713023446970746e-05, "loss": 0.1429, "step": 3060 }, { "epoch": 8.898550724637682, "grad_norm": 0.5382649302482605, "learning_rate": 9.710256448750126e-05, "loss": 0.1606, "step": 3070 }, { "epoch": 8.927536231884059, "grad_norm": 0.3950713276863098, "learning_rate": 9.707476572868235e-05, "loss": 0.131, "step": 3080 }, { "epoch": 8.956521739130435, "grad_norm": 0.38749822974205017, "learning_rate": 9.704683826925149e-05, "loss": 0.1158, "step": 3090 }, { "epoch": 8.985507246376812, "grad_norm": 0.4517150819301605, "learning_rate": 9.701878218556129e-05, "loss": 0.166, "step": 3100 }, { "epoch": 9.014492753623188, "grad_norm": 0.47911375761032104, "learning_rate": 9.699059755431598e-05, "loss": 0.1177, "step": 3110 }, { "epoch": 9.043478260869565, "grad_norm": 0.2541674077510834, "learning_rate": 9.696228445257132e-05, "loss": 0.1254, "step": 3120 }, { "epoch": 9.072463768115941, "grad_norm": 0.498009592294693, "learning_rate": 9.693384295773419e-05, "loss": 0.1603, "step": 3130 }, { "epoch": 9.101449275362318, "grad_norm": 0.443220317363739, "learning_rate": 9.690527314756259e-05, "loss": 0.1382, "step": 3140 }, { "epoch": 9.130434782608695, "grad_norm": 0.32711514830589294, "learning_rate": 9.687657510016527e-05, "loss": 0.1351, "step": 3150 }, { "epoch": 9.159420289855072, "grad_norm": 0.4041106402873993, "learning_rate": 9.684774889400161e-05, "loss": 0.132, "step": 3160 }, { "epoch": 9.18840579710145, "grad_norm": 0.3735228180885315, "learning_rate": 9.681879460788135e-05, "loss": 0.1204, "step": 3170 }, { "epoch": 9.217391304347826, "grad_norm": 0.4736388921737671, "learning_rate": 9.67897123209644e-05, "loss": 0.1156, "step": 3180 }, { "epoch": 9.246376811594203, "grad_norm": 0.39969536662101746, "learning_rate": 9.676050211276062e-05, "loss": 0.1488, "step": 3190 }, { "epoch": 9.27536231884058, "grad_norm": 0.5019108057022095, "learning_rate": 9.673116406312962e-05, "loss": 0.1351, "step": 3200 }, { "epoch": 9.304347826086957, "grad_norm": 0.45118093490600586, "learning_rate": 9.67016982522805e-05, "loss": 0.1263, "step": 3210 }, { "epoch": 9.333333333333334, "grad_norm": 0.5472857356071472, "learning_rate": 9.667210476077164e-05, "loss": 0.1648, "step": 3220 }, { "epoch": 9.36231884057971, "grad_norm": 0.32493582367897034, "learning_rate": 9.664238366951055e-05, "loss": 0.1309, "step": 3230 }, { "epoch": 9.391304347826088, "grad_norm": 0.7096918821334839, "learning_rate": 9.661253505975355e-05, "loss": 0.1383, "step": 3240 }, { "epoch": 9.420289855072463, "grad_norm": 0.5345839858055115, "learning_rate": 9.658255901310557e-05, "loss": 0.1198, "step": 3250 }, { "epoch": 9.44927536231884, "grad_norm": 0.5087151527404785, "learning_rate": 9.655245561152e-05, "loss": 0.1199, "step": 3260 }, { "epoch": 9.478260869565217, "grad_norm": 0.2939687967300415, "learning_rate": 9.65222249372984e-05, "loss": 0.1342, "step": 3270 }, { "epoch": 9.507246376811594, "grad_norm": 0.3696477711200714, "learning_rate": 9.649186707309026e-05, "loss": 0.1361, "step": 3280 }, { "epoch": 9.53623188405797, "grad_norm": 0.4263698160648346, "learning_rate": 9.646138210189283e-05, "loss": 0.1453, "step": 3290 }, { "epoch": 9.565217391304348, "grad_norm": 0.40898415446281433, "learning_rate": 9.643077010705087e-05, "loss": 0.112, "step": 3300 }, { "epoch": 9.594202898550725, "grad_norm": 0.37168997526168823, "learning_rate": 9.640003117225637e-05, "loss": 0.1338, "step": 3310 }, { "epoch": 9.623188405797102, "grad_norm": 0.4604577124118805, "learning_rate": 9.636916538154846e-05, "loss": 0.1511, "step": 3320 }, { "epoch": 9.652173913043478, "grad_norm": 0.5092346668243408, "learning_rate": 9.633817281931296e-05, "loss": 0.1197, "step": 3330 }, { "epoch": 9.681159420289855, "grad_norm": 0.43370747566223145, "learning_rate": 9.630705357028242e-05, "loss": 0.144, "step": 3340 }, { "epoch": 9.710144927536232, "grad_norm": 0.4658154249191284, "learning_rate": 9.627580771953563e-05, "loss": 0.1453, "step": 3350 }, { "epoch": 9.73913043478261, "grad_norm": 0.4420405924320221, "learning_rate": 9.624443535249759e-05, "loss": 0.1331, "step": 3360 }, { "epoch": 9.768115942028986, "grad_norm": 0.4711594879627228, "learning_rate": 9.621293655493913e-05, "loss": 0.1204, "step": 3370 }, { "epoch": 9.797101449275363, "grad_norm": 0.2817968428134918, "learning_rate": 9.618131141297675e-05, "loss": 0.1309, "step": 3380 }, { "epoch": 9.826086956521738, "grad_norm": 0.3537946343421936, "learning_rate": 9.614956001307242e-05, "loss": 0.1464, "step": 3390 }, { "epoch": 9.855072463768115, "grad_norm": 0.30007612705230713, "learning_rate": 9.611768244203321e-05, "loss": 0.1186, "step": 3400 }, { "epoch": 9.884057971014492, "grad_norm": 0.41064971685409546, "learning_rate": 9.60856787870112e-05, "loss": 0.1263, "step": 3410 }, { "epoch": 9.91304347826087, "grad_norm": 0.4655996263027191, "learning_rate": 9.605354913550318e-05, "loss": 0.1514, "step": 3420 }, { "epoch": 9.942028985507246, "grad_norm": 0.5630468726158142, "learning_rate": 9.602129357535037e-05, "loss": 0.1315, "step": 3430 }, { "epoch": 9.971014492753623, "grad_norm": 0.7113257646560669, "learning_rate": 9.598891219473825e-05, "loss": 0.1179, "step": 3440 }, { "epoch": 10.0, "grad_norm": 0.7581853866577148, "learning_rate": 9.595640508219625e-05, "loss": 0.1434, "step": 3450 }, { "epoch": 10.028985507246377, "grad_norm": 0.6476505994796753, "learning_rate": 9.592377232659761e-05, "loss": 0.1276, "step": 3460 }, { "epoch": 10.057971014492754, "grad_norm": 0.4075034260749817, "learning_rate": 9.589101401715904e-05, "loss": 0.142, "step": 3470 }, { "epoch": 10.08695652173913, "grad_norm": 0.7294759154319763, "learning_rate": 9.585813024344045e-05, "loss": 0.1464, "step": 3480 }, { "epoch": 10.115942028985508, "grad_norm": 0.3397752642631531, "learning_rate": 9.58251210953449e-05, "loss": 0.1374, "step": 3490 }, { "epoch": 10.144927536231885, "grad_norm": 0.4181293547153473, "learning_rate": 9.579198666311809e-05, "loss": 0.1442, "step": 3500 }, { "epoch": 10.173913043478262, "grad_norm": 0.45683369040489197, "learning_rate": 9.575872703734832e-05, "loss": 0.142, "step": 3510 }, { "epoch": 10.202898550724637, "grad_norm": 0.37618064880371094, "learning_rate": 9.572534230896611e-05, "loss": 0.1256, "step": 3520 }, { "epoch": 10.231884057971014, "grad_norm": 0.581132709980011, "learning_rate": 9.569183256924403e-05, "loss": 0.1547, "step": 3530 }, { "epoch": 10.26086956521739, "grad_norm": 0.35314807295799255, "learning_rate": 9.565819790979646e-05, "loss": 0.119, "step": 3540 }, { "epoch": 10.289855072463768, "grad_norm": 0.43084269762039185, "learning_rate": 9.562443842257925e-05, "loss": 0.1155, "step": 3550 }, { "epoch": 10.318840579710145, "grad_norm": 0.37022560834884644, "learning_rate": 9.559055419988956e-05, "loss": 0.1609, "step": 3560 }, { "epoch": 10.347826086956522, "grad_norm": 0.2883586883544922, "learning_rate": 9.555654533436557e-05, "loss": 0.1052, "step": 3570 }, { "epoch": 10.376811594202898, "grad_norm": 0.5148602724075317, "learning_rate": 9.552241191898621e-05, "loss": 0.1423, "step": 3580 }, { "epoch": 10.405797101449275, "grad_norm": 0.4749770164489746, "learning_rate": 9.548815404707092e-05, "loss": 0.1194, "step": 3590 }, { "epoch": 10.434782608695652, "grad_norm": 0.4021095335483551, "learning_rate": 9.545377181227942e-05, "loss": 0.124, "step": 3600 }, { "epoch": 10.46376811594203, "grad_norm": 0.30841973423957825, "learning_rate": 9.541926530861145e-05, "loss": 0.1195, "step": 3610 }, { "epoch": 10.492753623188406, "grad_norm": 0.3576466739177704, "learning_rate": 9.538463463040645e-05, "loss": 0.1169, "step": 3620 }, { "epoch": 10.521739130434783, "grad_norm": 0.5112766027450562, "learning_rate": 9.534987987234337e-05, "loss": 0.1283, "step": 3630 }, { "epoch": 10.55072463768116, "grad_norm": 0.27624791860580444, "learning_rate": 9.53150011294404e-05, "loss": 0.1042, "step": 3640 }, { "epoch": 10.579710144927537, "grad_norm": 0.4616936445236206, "learning_rate": 9.527999849705471e-05, "loss": 0.1214, "step": 3650 }, { "epoch": 10.608695652173914, "grad_norm": 0.2872353792190552, "learning_rate": 9.524487207088213e-05, "loss": 0.1272, "step": 3660 }, { "epoch": 10.63768115942029, "grad_norm": 0.3924836218357086, "learning_rate": 9.520962194695698e-05, "loss": 0.1152, "step": 3670 }, { "epoch": 10.666666666666666, "grad_norm": 0.5018351078033447, "learning_rate": 9.517424822165175e-05, "loss": 0.1425, "step": 3680 }, { "epoch": 10.695652173913043, "grad_norm": 0.4114161431789398, "learning_rate": 9.513875099167685e-05, "loss": 0.1287, "step": 3690 }, { "epoch": 10.72463768115942, "grad_norm": 0.8867626190185547, "learning_rate": 9.510313035408035e-05, "loss": 0.1243, "step": 3700 }, { "epoch": 10.753623188405797, "grad_norm": 0.48639723658561707, "learning_rate": 9.506738640624775e-05, "loss": 0.1244, "step": 3710 }, { "epoch": 10.782608695652174, "grad_norm": 0.5300337672233582, "learning_rate": 9.50315192459016e-05, "loss": 0.1339, "step": 3720 }, { "epoch": 10.81159420289855, "grad_norm": 0.4277614951133728, "learning_rate": 9.499552897110136e-05, "loss": 0.148, "step": 3730 }, { "epoch": 10.840579710144928, "grad_norm": 0.41532713174819946, "learning_rate": 9.495941568024304e-05, "loss": 0.1276, "step": 3740 }, { "epoch": 10.869565217391305, "grad_norm": 0.37189435958862305, "learning_rate": 9.492317947205904e-05, "loss": 0.1215, "step": 3750 }, { "epoch": 10.898550724637682, "grad_norm": 0.4247940182685852, "learning_rate": 9.488682044561775e-05, "loss": 0.1248, "step": 3760 }, { "epoch": 10.927536231884059, "grad_norm": 0.4739855229854584, "learning_rate": 9.485033870032335e-05, "loss": 0.1156, "step": 3770 }, { "epoch": 10.956521739130435, "grad_norm": 0.275510311126709, "learning_rate": 9.481373433591556e-05, "loss": 0.129, "step": 3780 }, { "epoch": 10.985507246376812, "grad_norm": 0.4555635154247284, "learning_rate": 9.47770074524693e-05, "loss": 0.1377, "step": 3790 }, { "epoch": 11.014492753623188, "grad_norm": 0.4588840901851654, "learning_rate": 9.474015815039446e-05, "loss": 0.1352, "step": 3800 }, { "epoch": 11.043478260869565, "grad_norm": 0.27891016006469727, "learning_rate": 9.470318653043565e-05, "loss": 0.1242, "step": 3810 }, { "epoch": 11.072463768115941, "grad_norm": 0.34980854392051697, "learning_rate": 9.466609269367185e-05, "loss": 0.1303, "step": 3820 }, { "epoch": 11.101449275362318, "grad_norm": 0.4605090022087097, "learning_rate": 9.46288767415162e-05, "loss": 0.1186, "step": 3830 }, { "epoch": 11.130434782608695, "grad_norm": 0.2761806845664978, "learning_rate": 9.459153877571567e-05, "loss": 0.1285, "step": 3840 }, { "epoch": 11.159420289855072, "grad_norm": 0.4459534287452698, "learning_rate": 9.455407889835087e-05, "loss": 0.1129, "step": 3850 }, { "epoch": 11.18840579710145, "grad_norm": 0.40482795238494873, "learning_rate": 9.451649721183564e-05, "loss": 0.1553, "step": 3860 }, { "epoch": 11.217391304347826, "grad_norm": 0.596967875957489, "learning_rate": 9.447879381891692e-05, "loss": 0.1389, "step": 3870 }, { "epoch": 11.246376811594203, "grad_norm": 0.4592018127441406, "learning_rate": 9.444096882267428e-05, "loss": 0.1375, "step": 3880 }, { "epoch": 11.27536231884058, "grad_norm": 0.4663671851158142, "learning_rate": 9.440302232651988e-05, "loss": 0.1164, "step": 3890 }, { "epoch": 11.304347826086957, "grad_norm": 0.42845603823661804, "learning_rate": 9.436495443419795e-05, "loss": 0.1206, "step": 3900 }, { "epoch": 11.333333333333334, "grad_norm": 0.39661505818367004, "learning_rate": 9.432676524978466e-05, "loss": 0.1007, "step": 3910 }, { "epoch": 11.36231884057971, "grad_norm": 0.3809431195259094, "learning_rate": 9.42884548776878e-05, "loss": 0.147, "step": 3920 }, { "epoch": 11.391304347826088, "grad_norm": 0.3601577877998352, "learning_rate": 9.425002342264646e-05, "loss": 0.1223, "step": 3930 }, { "epoch": 11.420289855072463, "grad_norm": 0.4095447063446045, "learning_rate": 9.421147098973077e-05, "loss": 0.1101, "step": 3940 }, { "epoch": 11.44927536231884, "grad_norm": 0.43890243768692017, "learning_rate": 9.41727976843416e-05, "loss": 0.1257, "step": 3950 }, { "epoch": 11.478260869565217, "grad_norm": 0.31772735714912415, "learning_rate": 9.413400361221029e-05, "loss": 0.1126, "step": 3960 }, { "epoch": 11.507246376811594, "grad_norm": 0.3342031240463257, "learning_rate": 9.409508887939835e-05, "loss": 0.1275, "step": 3970 }, { "epoch": 11.53623188405797, "grad_norm": 0.3726749122142792, "learning_rate": 9.40560535922972e-05, "loss": 0.1108, "step": 3980 }, { "epoch": 11.565217391304348, "grad_norm": 0.4039180278778076, "learning_rate": 9.40168978576278e-05, "loss": 0.1288, "step": 3990 }, { "epoch": 11.594202898550725, "grad_norm": 0.4435559809207916, "learning_rate": 9.397762178244043e-05, "loss": 0.1298, "step": 4000 }, { "epoch": 11.623188405797102, "grad_norm": 0.48986756801605225, "learning_rate": 9.393822547411439e-05, "loss": 0.1584, "step": 4010 }, { "epoch": 11.652173913043478, "grad_norm": 0.33243680000305176, "learning_rate": 9.389870904035769e-05, "loss": 0.1322, "step": 4020 }, { "epoch": 11.681159420289855, "grad_norm": 0.27870336174964905, "learning_rate": 9.385907258920672e-05, "loss": 0.1187, "step": 4030 }, { "epoch": 11.710144927536232, "grad_norm": 0.4363289773464203, "learning_rate": 9.381931622902607e-05, "loss": 0.1322, "step": 4040 }, { "epoch": 11.73913043478261, "grad_norm": 0.39369621872901917, "learning_rate": 9.377944006850807e-05, "loss": 0.1221, "step": 4050 }, { "epoch": 11.768115942028986, "grad_norm": 0.4057519733905792, "learning_rate": 9.373944421667265e-05, "loss": 0.1439, "step": 4060 }, { "epoch": 11.797101449275363, "grad_norm": 0.4745919406414032, "learning_rate": 9.369932878286691e-05, "loss": 0.1367, "step": 4070 }, { "epoch": 11.826086956521738, "grad_norm": 0.5527012944221497, "learning_rate": 9.365909387676494e-05, "loss": 0.1388, "step": 4080 }, { "epoch": 11.855072463768115, "grad_norm": 0.4839910566806793, "learning_rate": 9.361873960836744e-05, "loss": 0.1204, "step": 4090 }, { "epoch": 11.884057971014492, "grad_norm": 0.4102983772754669, "learning_rate": 9.357826608800142e-05, "loss": 0.1202, "step": 4100 }, { "epoch": 11.91304347826087, "grad_norm": 0.382380872964859, "learning_rate": 9.353767342631994e-05, "loss": 0.1247, "step": 4110 }, { "epoch": 11.942028985507246, "grad_norm": 0.384352445602417, "learning_rate": 9.34969617343018e-05, "loss": 0.1364, "step": 4120 }, { "epoch": 11.971014492753623, "grad_norm": 0.46882691979408264, "learning_rate": 9.345613112325122e-05, "loss": 0.1298, "step": 4130 }, { "epoch": 12.0, "grad_norm": 0.986838698387146, "learning_rate": 9.34151817047975e-05, "loss": 0.1259, "step": 4140 }, { "epoch": 12.028985507246377, "grad_norm": 0.41458386182785034, "learning_rate": 9.33741135908948e-05, "loss": 0.1174, "step": 4150 }, { "epoch": 12.057971014492754, "grad_norm": 0.5669786930084229, "learning_rate": 9.33329268938218e-05, "loss": 0.1255, "step": 4160 }, { "epoch": 12.08695652173913, "grad_norm": 0.418151319026947, "learning_rate": 9.329162172618132e-05, "loss": 0.1223, "step": 4170 }, { "epoch": 12.115942028985508, "grad_norm": 0.4376254677772522, "learning_rate": 9.325019820090013e-05, "loss": 0.1284, "step": 4180 }, { "epoch": 12.144927536231885, "grad_norm": 0.5084844827651978, "learning_rate": 9.320865643122855e-05, "loss": 0.1225, "step": 4190 }, { "epoch": 12.173913043478262, "grad_norm": 0.3965758979320526, "learning_rate": 9.316699653074023e-05, "loss": 0.1332, "step": 4200 }, { "epoch": 12.202898550724637, "grad_norm": 0.39621663093566895, "learning_rate": 9.312521861333172e-05, "loss": 0.1141, "step": 4210 }, { "epoch": 12.231884057971014, "grad_norm": 0.4206016957759857, "learning_rate": 9.308332279322224e-05, "loss": 0.1282, "step": 4220 }, { "epoch": 12.26086956521739, "grad_norm": 0.34612345695495605, "learning_rate": 9.304130918495338e-05, "loss": 0.1067, "step": 4230 }, { "epoch": 12.289855072463768, "grad_norm": 0.5288470387458801, "learning_rate": 9.299917790338874e-05, "loss": 0.1364, "step": 4240 }, { "epoch": 12.318840579710145, "grad_norm": 0.49290069937705994, "learning_rate": 9.295692906371363e-05, "loss": 0.1348, "step": 4250 }, { "epoch": 12.347826086956522, "grad_norm": 0.2043665647506714, "learning_rate": 9.291456278143476e-05, "loss": 0.1127, "step": 4260 }, { "epoch": 12.376811594202898, "grad_norm": 0.6402058005332947, "learning_rate": 9.287207917237994e-05, "loss": 0.1295, "step": 4270 }, { "epoch": 12.405797101449275, "grad_norm": 0.29695194959640503, "learning_rate": 9.282947835269773e-05, "loss": 0.102, "step": 4280 }, { "epoch": 12.434782608695652, "grad_norm": 0.3424241244792938, "learning_rate": 9.278676043885715e-05, "loss": 0.1275, "step": 4290 }, { "epoch": 12.46376811594203, "grad_norm": 0.4913289546966553, "learning_rate": 9.274392554764733e-05, "loss": 0.1413, "step": 4300 }, { "epoch": 12.492753623188406, "grad_norm": 0.3618018627166748, "learning_rate": 9.270097379617723e-05, "loss": 0.1103, "step": 4310 }, { "epoch": 12.521739130434783, "grad_norm": 0.42373889684677124, "learning_rate": 9.26579053018753e-05, "loss": 0.1198, "step": 4320 }, { "epoch": 12.55072463768116, "grad_norm": 0.3397703170776367, "learning_rate": 9.261472018248918e-05, "loss": 0.1132, "step": 4330 }, { "epoch": 12.579710144927537, "grad_norm": 0.4344271719455719, "learning_rate": 9.25714185560853e-05, "loss": 0.1173, "step": 4340 }, { "epoch": 12.608695652173914, "grad_norm": 0.4063388705253601, "learning_rate": 9.252800054104868e-05, "loss": 0.108, "step": 4350 }, { "epoch": 12.63768115942029, "grad_norm": 0.3664158880710602, "learning_rate": 9.248446625608252e-05, "loss": 0.1152, "step": 4360 }, { "epoch": 12.666666666666666, "grad_norm": 0.35261791944503784, "learning_rate": 9.244081582020789e-05, "loss": 0.1277, "step": 4370 }, { "epoch": 12.695652173913043, "grad_norm": 0.4147641062736511, "learning_rate": 9.239704935276339e-05, "loss": 0.1108, "step": 4380 }, { "epoch": 12.72463768115942, "grad_norm": 0.5231832265853882, "learning_rate": 9.235316697340489e-05, "loss": 0.1287, "step": 4390 }, { "epoch": 12.753623188405797, "grad_norm": 0.40551823377609253, "learning_rate": 9.230916880210512e-05, "loss": 0.1171, "step": 4400 }, { "epoch": 12.782608695652174, "grad_norm": 0.27819085121154785, "learning_rate": 9.226505495915342e-05, "loss": 0.1384, "step": 4410 }, { "epoch": 12.81159420289855, "grad_norm": 0.40564286708831787, "learning_rate": 9.222082556515536e-05, "loss": 0.1157, "step": 4420 }, { "epoch": 12.840579710144928, "grad_norm": 0.4431588351726532, "learning_rate": 9.217648074103242e-05, "loss": 0.1224, "step": 4430 }, { "epoch": 12.869565217391305, "grad_norm": 0.34970754384994507, "learning_rate": 9.213202060802161e-05, "loss": 0.1189, "step": 4440 }, { "epoch": 12.898550724637682, "grad_norm": 0.29916661977767944, "learning_rate": 9.208744528767528e-05, "loss": 0.1139, "step": 4450 }, { "epoch": 12.927536231884059, "grad_norm": 0.3757326304912567, "learning_rate": 9.204275490186064e-05, "loss": 0.1073, "step": 4460 }, { "epoch": 12.956521739130435, "grad_norm": 0.43750470876693726, "learning_rate": 9.199794957275949e-05, "loss": 0.1354, "step": 4470 }, { "epoch": 12.985507246376812, "grad_norm": 0.3462923467159271, "learning_rate": 9.19530294228679e-05, "loss": 0.109, "step": 4480 }, { "epoch": 13.014492753623188, "grad_norm": 0.23552751541137695, "learning_rate": 9.190799457499583e-05, "loss": 0.1315, "step": 4490 }, { "epoch": 13.043478260869565, "grad_norm": 0.44175973534584045, "learning_rate": 9.186284515226686e-05, "loss": 0.1313, "step": 4500 }, { "epoch": 13.072463768115941, "grad_norm": 0.43847179412841797, "learning_rate": 9.181758127811777e-05, "loss": 0.1329, "step": 4510 }, { "epoch": 13.101449275362318, "grad_norm": 0.31816014647483826, "learning_rate": 9.177220307629825e-05, "loss": 0.1265, "step": 4520 }, { "epoch": 13.130434782608695, "grad_norm": 0.4455469846725464, "learning_rate": 9.172671067087059e-05, "loss": 0.1069, "step": 4530 }, { "epoch": 13.159420289855072, "grad_norm": 0.2768830358982086, "learning_rate": 9.16811041862093e-05, "loss": 0.1166, "step": 4540 }, { "epoch": 13.18840579710145, "grad_norm": 0.39586612582206726, "learning_rate": 9.163538374700076e-05, "loss": 0.1239, "step": 4550 }, { "epoch": 13.217391304347826, "grad_norm": 0.6842658519744873, "learning_rate": 9.158954947824287e-05, "loss": 0.1196, "step": 4560 }, { "epoch": 13.246376811594203, "grad_norm": 0.3051077127456665, "learning_rate": 9.154360150524482e-05, "loss": 0.1277, "step": 4570 }, { "epoch": 13.27536231884058, "grad_norm": 0.32419049739837646, "learning_rate": 9.14975399536266e-05, "loss": 0.1328, "step": 4580 }, { "epoch": 13.304347826086957, "grad_norm": 0.49009594321250916, "learning_rate": 9.14513649493187e-05, "loss": 0.11, "step": 4590 }, { "epoch": 13.333333333333334, "grad_norm": 0.41023188829421997, "learning_rate": 9.140507661856187e-05, "loss": 0.1204, "step": 4600 }, { "epoch": 13.36231884057971, "grad_norm": 0.27681684494018555, "learning_rate": 9.135867508790661e-05, "loss": 0.127, "step": 4610 }, { "epoch": 13.391304347826088, "grad_norm": 0.33429259061813354, "learning_rate": 9.131216048421291e-05, "loss": 0.1056, "step": 4620 }, { "epoch": 13.420289855072463, "grad_norm": 0.3825032114982605, "learning_rate": 9.126553293464998e-05, "loss": 0.1296, "step": 4630 }, { "epoch": 13.44927536231884, "grad_norm": 0.28926411271095276, "learning_rate": 9.121879256669572e-05, "loss": 0.1088, "step": 4640 }, { "epoch": 13.478260869565217, "grad_norm": 0.24572978913784027, "learning_rate": 9.117193950813652e-05, "loss": 0.1068, "step": 4650 }, { "epoch": 13.507246376811594, "grad_norm": 0.462626576423645, "learning_rate": 9.112497388706685e-05, "loss": 0.1119, "step": 4660 }, { "epoch": 13.53623188405797, "grad_norm": 0.4677536189556122, "learning_rate": 9.10778958318889e-05, "loss": 0.1113, "step": 4670 }, { "epoch": 13.565217391304348, "grad_norm": 0.3768196105957031, "learning_rate": 9.103070547131232e-05, "loss": 0.111, "step": 4680 }, { "epoch": 13.594202898550725, "grad_norm": 0.28670257329940796, "learning_rate": 9.098340293435375e-05, "loss": 0.1007, "step": 4690 }, { "epoch": 13.623188405797102, "grad_norm": 0.3326264023780823, "learning_rate": 9.093598835033649e-05, "loss": 0.1417, "step": 4700 }, { "epoch": 13.652173913043478, "grad_norm": 0.4190509021282196, "learning_rate": 9.088846184889021e-05, "loss": 0.1094, "step": 4710 }, { "epoch": 13.681159420289855, "grad_norm": 0.48827919363975525, "learning_rate": 9.084082355995057e-05, "loss": 0.1145, "step": 4720 }, { "epoch": 13.710144927536232, "grad_norm": 0.42035019397735596, "learning_rate": 9.079307361375882e-05, "loss": 0.1408, "step": 4730 }, { "epoch": 13.73913043478261, "grad_norm": 0.35590943694114685, "learning_rate": 9.074521214086149e-05, "loss": 0.1125, "step": 4740 }, { "epoch": 13.768115942028986, "grad_norm": 0.3481467068195343, "learning_rate": 9.069723927211001e-05, "loss": 0.1306, "step": 4750 }, { "epoch": 13.797101449275363, "grad_norm": 0.5402430891990662, "learning_rate": 9.064915513866037e-05, "loss": 0.131, "step": 4760 }, { "epoch": 13.826086956521738, "grad_norm": 0.4278501272201538, "learning_rate": 9.060095987197279e-05, "loss": 0.1275, "step": 4770 }, { "epoch": 13.855072463768115, "grad_norm": 0.27769970893859863, "learning_rate": 9.055265360381126e-05, "loss": 0.1186, "step": 4780 }, { "epoch": 13.884057971014492, "grad_norm": 0.258645236492157, "learning_rate": 9.050423646624326e-05, "loss": 0.1288, "step": 4790 }, { "epoch": 13.91304347826087, "grad_norm": 0.39688029885292053, "learning_rate": 9.045570859163943e-05, "loss": 0.1174, "step": 4800 }, { "epoch": 13.942028985507246, "grad_norm": 0.4738856554031372, "learning_rate": 9.04070701126731e-05, "loss": 0.1179, "step": 4810 }, { "epoch": 13.971014492753623, "grad_norm": 0.4535987675189972, "learning_rate": 9.035832116232001e-05, "loss": 0.123, "step": 4820 }, { "epoch": 14.0, "grad_norm": 0.941590428352356, "learning_rate": 9.030946187385796e-05, "loss": 0.1209, "step": 4830 }, { "epoch": 14.028985507246377, "grad_norm": 0.5170802474021912, "learning_rate": 9.026049238086635e-05, "loss": 0.1152, "step": 4840 }, { "epoch": 14.057971014492754, "grad_norm": 0.2910565435886383, "learning_rate": 9.021141281722591e-05, "loss": 0.1163, "step": 4850 }, { "epoch": 14.08695652173913, "grad_norm": 0.4192429482936859, "learning_rate": 9.01622233171183e-05, "loss": 0.0962, "step": 4860 }, { "epoch": 14.115942028985508, "grad_norm": 0.3841983377933502, "learning_rate": 9.011292401502574e-05, "loss": 0.122, "step": 4870 }, { "epoch": 14.144927536231885, "grad_norm": 0.42944851517677307, "learning_rate": 9.006351504573063e-05, "loss": 0.1251, "step": 4880 }, { "epoch": 14.173913043478262, "grad_norm": 0.3717688322067261, "learning_rate": 9.001399654431519e-05, "loss": 0.1096, "step": 4890 }, { "epoch": 14.202898550724637, "grad_norm": 0.37804022431373596, "learning_rate": 8.996436864616116e-05, "loss": 0.1209, "step": 4900 }, { "epoch": 14.231884057971014, "grad_norm": 0.3129970133304596, "learning_rate": 8.991463148694925e-05, "loss": 0.1097, "step": 4910 }, { "epoch": 14.26086956521739, "grad_norm": 0.40859848260879517, "learning_rate": 8.986478520265902e-05, "loss": 0.1214, "step": 4920 }, { "epoch": 14.289855072463768, "grad_norm": 0.3709128797054291, "learning_rate": 8.981482992956827e-05, "loss": 0.1264, "step": 4930 }, { "epoch": 14.318840579710145, "grad_norm": 0.3855811059474945, "learning_rate": 8.976476580425282e-05, "loss": 0.1113, "step": 4940 }, { "epoch": 14.347826086956522, "grad_norm": 0.28712448477745056, "learning_rate": 8.971459296358606e-05, "loss": 0.0821, "step": 4950 }, { "epoch": 14.376811594202898, "grad_norm": 0.48466065526008606, "learning_rate": 8.966431154473864e-05, "loss": 0.1493, "step": 4960 }, { "epoch": 14.405797101449275, "grad_norm": 0.30625486373901367, "learning_rate": 8.961392168517803e-05, "loss": 0.1163, "step": 4970 }, { "epoch": 14.434782608695652, "grad_norm": 0.32612621784210205, "learning_rate": 8.956342352266821e-05, "loss": 0.1294, "step": 4980 }, { "epoch": 14.46376811594203, "grad_norm": 0.39533373713493347, "learning_rate": 8.95128171952692e-05, "loss": 0.1141, "step": 4990 }, { "epoch": 14.492753623188406, "grad_norm": 0.5708385109901428, "learning_rate": 8.946210284133676e-05, "loss": 0.1435, "step": 5000 }, { "epoch": 14.521739130434783, "grad_norm": 0.41702768206596375, "learning_rate": 8.941128059952201e-05, "loss": 0.1244, "step": 5010 }, { "epoch": 14.55072463768116, "grad_norm": 0.581706702709198, "learning_rate": 8.936035060877102e-05, "loss": 0.1013, "step": 5020 }, { "epoch": 14.579710144927537, "grad_norm": 0.508090078830719, "learning_rate": 8.930931300832443e-05, "loss": 0.0987, "step": 5030 }, { "epoch": 14.608695652173914, "grad_norm": 0.32675766944885254, "learning_rate": 8.925816793771711e-05, "loss": 0.1173, "step": 5040 }, { "epoch": 14.63768115942029, "grad_norm": 0.4030362069606781, "learning_rate": 8.92069155367777e-05, "loss": 0.1054, "step": 5050 }, { "epoch": 14.666666666666666, "grad_norm": 0.4901740550994873, "learning_rate": 8.915555594562834e-05, "loss": 0.1197, "step": 5060 }, { "epoch": 14.695652173913043, "grad_norm": 0.43186917901039124, "learning_rate": 8.910408930468416e-05, "loss": 0.1146, "step": 5070 }, { "epoch": 14.72463768115942, "grad_norm": 0.3401460647583008, "learning_rate": 8.905251575465303e-05, "loss": 0.1237, "step": 5080 }, { "epoch": 14.753623188405797, "grad_norm": 0.2620072662830353, "learning_rate": 8.900083543653502e-05, "loss": 0.123, "step": 5090 }, { "epoch": 14.782608695652174, "grad_norm": 0.3774551451206207, "learning_rate": 8.894904849162218e-05, "loss": 0.1237, "step": 5100 }, { "epoch": 14.81159420289855, "grad_norm": 0.4038746654987335, "learning_rate": 8.889715506149802e-05, "loss": 0.115, "step": 5110 }, { "epoch": 14.840579710144928, "grad_norm": 0.4395363926887512, "learning_rate": 8.884515528803722e-05, "loss": 0.1139, "step": 5120 }, { "epoch": 14.869565217391305, "grad_norm": 0.34769847989082336, "learning_rate": 8.879304931340517e-05, "loss": 0.1211, "step": 5130 }, { "epoch": 14.898550724637682, "grad_norm": 0.3238866925239563, "learning_rate": 8.874083728005759e-05, "loss": 0.1181, "step": 5140 }, { "epoch": 14.927536231884059, "grad_norm": 0.43937593698501587, "learning_rate": 8.868851933074021e-05, "loss": 0.1232, "step": 5150 }, { "epoch": 14.956521739130435, "grad_norm": 0.4402833580970764, "learning_rate": 8.863609560848829e-05, "loss": 0.1365, "step": 5160 }, { "epoch": 14.985507246376812, "grad_norm": 0.6102784276008606, "learning_rate": 8.85835662566263e-05, "loss": 0.1248, "step": 5170 }, { "epoch": 15.014492753623188, "grad_norm": 0.28894439339637756, "learning_rate": 8.853093141876747e-05, "loss": 0.1016, "step": 5180 }, { "epoch": 15.043478260869565, "grad_norm": 0.2645789086818695, "learning_rate": 8.847819123881343e-05, "loss": 0.1256, "step": 5190 }, { "epoch": 15.072463768115941, "grad_norm": 0.38724544644355774, "learning_rate": 8.842534586095383e-05, "loss": 0.1432, "step": 5200 }, { "epoch": 15.101449275362318, "grad_norm": 0.2536871135234833, "learning_rate": 8.837239542966593e-05, "loss": 0.1033, "step": 5210 }, { "epoch": 15.130434782608695, "grad_norm": 0.337372750043869, "learning_rate": 8.831934008971417e-05, "loss": 0.1231, "step": 5220 }, { "epoch": 15.159420289855072, "grad_norm": 0.3590666353702545, "learning_rate": 8.826617998614982e-05, "loss": 0.109, "step": 5230 }, { "epoch": 15.18840579710145, "grad_norm": 0.37052637338638306, "learning_rate": 8.821291526431056e-05, "loss": 0.1001, "step": 5240 }, { "epoch": 15.217391304347826, "grad_norm": 0.5083751082420349, "learning_rate": 8.815954606982015e-05, "loss": 0.1224, "step": 5250 }, { "epoch": 15.246376811594203, "grad_norm": 0.3430265486240387, "learning_rate": 8.810607254858789e-05, "loss": 0.1201, "step": 5260 }, { "epoch": 15.27536231884058, "grad_norm": 0.6075800061225891, "learning_rate": 8.805249484680838e-05, "loss": 0.1281, "step": 5270 }, { "epoch": 15.304347826086957, "grad_norm": 0.6015037894248962, "learning_rate": 8.799881311096096e-05, "loss": 0.1337, "step": 5280 }, { "epoch": 15.333333333333334, "grad_norm": 0.3478599786758423, "learning_rate": 8.794502748780949e-05, "loss": 0.1363, "step": 5290 }, { "epoch": 15.36231884057971, "grad_norm": 0.39971593022346497, "learning_rate": 8.78911381244018e-05, "loss": 0.1015, "step": 5300 }, { "epoch": 15.391304347826088, "grad_norm": 0.38049763441085815, "learning_rate": 8.783714516806933e-05, "loss": 0.1209, "step": 5310 }, { "epoch": 15.420289855072463, "grad_norm": 0.33554980158805847, "learning_rate": 8.77830487664268e-05, "loss": 0.1077, "step": 5320 }, { "epoch": 15.44927536231884, "grad_norm": 0.2598898410797119, "learning_rate": 8.772884906737167e-05, "loss": 0.1056, "step": 5330 }, { "epoch": 15.478260869565217, "grad_norm": 0.30635103583335876, "learning_rate": 8.767454621908387e-05, "loss": 0.1182, "step": 5340 }, { "epoch": 15.507246376811594, "grad_norm": 0.31595268845558167, "learning_rate": 8.76201403700253e-05, "loss": 0.0987, "step": 5350 }, { "epoch": 15.53623188405797, "grad_norm": 0.4669897258281708, "learning_rate": 8.756563166893949e-05, "loss": 0.1093, "step": 5360 }, { "epoch": 15.565217391304348, "grad_norm": 0.41924533247947693, "learning_rate": 8.751102026485113e-05, "loss": 0.0981, "step": 5370 }, { "epoch": 15.594202898550725, "grad_norm": 0.3114607334136963, "learning_rate": 8.745630630706571e-05, "loss": 0.1265, "step": 5380 }, { "epoch": 15.623188405797102, "grad_norm": 0.33994221687316895, "learning_rate": 8.740148994516912e-05, "loss": 0.1061, "step": 5390 }, { "epoch": 15.652173913043478, "grad_norm": 0.5424929857254028, "learning_rate": 8.73465713290272e-05, "loss": 0.1112, "step": 5400 }, { "epoch": 15.681159420289855, "grad_norm": 0.4351734519004822, "learning_rate": 8.729155060878533e-05, "loss": 0.1043, "step": 5410 }, { "epoch": 15.710144927536232, "grad_norm": 0.33228495717048645, "learning_rate": 8.723642793486809e-05, "loss": 0.1257, "step": 5420 }, { "epoch": 15.73913043478261, "grad_norm": 0.4116186201572418, "learning_rate": 8.718120345797873e-05, "loss": 0.1102, "step": 5430 }, { "epoch": 15.768115942028986, "grad_norm": 0.38537874817848206, "learning_rate": 8.712587732909889e-05, "loss": 0.1315, "step": 5440 }, { "epoch": 15.797101449275363, "grad_norm": 0.2920888066291809, "learning_rate": 8.707044969948806e-05, "loss": 0.1393, "step": 5450 }, { "epoch": 15.826086956521738, "grad_norm": 0.3017374277114868, "learning_rate": 8.701492072068329e-05, "loss": 0.1181, "step": 5460 }, { "epoch": 15.855072463768115, "grad_norm": 0.3454197645187378, "learning_rate": 8.695929054449869e-05, "loss": 0.1144, "step": 5470 }, { "epoch": 15.884057971014492, "grad_norm": 0.3054383099079132, "learning_rate": 8.690355932302501e-05, "loss": 0.1149, "step": 5480 }, { "epoch": 15.91304347826087, "grad_norm": 0.6223363280296326, "learning_rate": 8.684772720862931e-05, "loss": 0.1138, "step": 5490 }, { "epoch": 15.942028985507246, "grad_norm": 0.33070531487464905, "learning_rate": 8.679179435395446e-05, "loss": 0.1074, "step": 5500 }, { "epoch": 15.971014492753623, "grad_norm": 0.3179458677768707, "learning_rate": 8.673576091191874e-05, "loss": 0.109, "step": 5510 }, { "epoch": 16.0, "grad_norm": 0.2908962070941925, "learning_rate": 8.667962703571541e-05, "loss": 0.0997, "step": 5520 }, { "epoch": 16.028985507246375, "grad_norm": 0.45387428998947144, "learning_rate": 8.662339287881238e-05, "loss": 0.1132, "step": 5530 }, { "epoch": 16.057971014492754, "grad_norm": 0.34306666254997253, "learning_rate": 8.656705859495169e-05, "loss": 0.099, "step": 5540 }, { "epoch": 16.08695652173913, "grad_norm": 0.317571222782135, "learning_rate": 8.651062433814912e-05, "loss": 0.1142, "step": 5550 }, { "epoch": 16.115942028985508, "grad_norm": 0.2807283103466034, "learning_rate": 8.645409026269375e-05, "loss": 0.1085, "step": 5560 }, { "epoch": 16.144927536231883, "grad_norm": 0.48772743344306946, "learning_rate": 8.639745652314759e-05, "loss": 0.1222, "step": 5570 }, { "epoch": 16.17391304347826, "grad_norm": 0.3181246519088745, "learning_rate": 8.634072327434515e-05, "loss": 0.1113, "step": 5580 }, { "epoch": 16.202898550724637, "grad_norm": 0.28259527683258057, "learning_rate": 8.628389067139294e-05, "loss": 0.0973, "step": 5590 }, { "epoch": 16.231884057971016, "grad_norm": 0.21859432756900787, "learning_rate": 8.622695886966911e-05, "loss": 0.105, "step": 5600 }, { "epoch": 16.26086956521739, "grad_norm": 0.38870155811309814, "learning_rate": 8.616992802482308e-05, "loss": 0.1054, "step": 5610 }, { "epoch": 16.28985507246377, "grad_norm": 0.30381137132644653, "learning_rate": 8.611279829277496e-05, "loss": 0.1095, "step": 5620 }, { "epoch": 16.318840579710145, "grad_norm": 0.33329617977142334, "learning_rate": 8.605556982971528e-05, "loss": 0.0896, "step": 5630 }, { "epoch": 16.347826086956523, "grad_norm": 0.3171881437301636, "learning_rate": 8.599824279210447e-05, "loss": 0.1097, "step": 5640 }, { "epoch": 16.3768115942029, "grad_norm": 0.36195775866508484, "learning_rate": 8.594081733667243e-05, "loss": 0.1088, "step": 5650 }, { "epoch": 16.405797101449274, "grad_norm": 0.3968923091888428, "learning_rate": 8.58832936204182e-05, "loss": 0.0995, "step": 5660 }, { "epoch": 16.434782608695652, "grad_norm": 0.515150249004364, "learning_rate": 8.582567180060942e-05, "loss": 0.107, "step": 5670 }, { "epoch": 16.463768115942027, "grad_norm": 0.4465225040912628, "learning_rate": 8.576795203478194e-05, "loss": 0.123, "step": 5680 }, { "epoch": 16.492753623188406, "grad_norm": 0.27907755970954895, "learning_rate": 8.571013448073939e-05, "loss": 0.1023, "step": 5690 }, { "epoch": 16.52173913043478, "grad_norm": 0.4790158271789551, "learning_rate": 8.565221929655275e-05, "loss": 0.1154, "step": 5700 }, { "epoch": 16.55072463768116, "grad_norm": 0.5309686660766602, "learning_rate": 8.559420664055992e-05, "loss": 0.1308, "step": 5710 }, { "epoch": 16.579710144927535, "grad_norm": 0.36980125308036804, "learning_rate": 8.553609667136532e-05, "loss": 0.1177, "step": 5720 }, { "epoch": 16.608695652173914, "grad_norm": 0.33945196866989136, "learning_rate": 8.547788954783936e-05, "loss": 0.1511, "step": 5730 }, { "epoch": 16.63768115942029, "grad_norm": 0.26327815651893616, "learning_rate": 8.541958542911808e-05, "loss": 0.1238, "step": 5740 }, { "epoch": 16.666666666666668, "grad_norm": 0.351123571395874, "learning_rate": 8.536118447460275e-05, "loss": 0.0927, "step": 5750 }, { "epoch": 16.695652173913043, "grad_norm": 0.3815719187259674, "learning_rate": 8.530268684395932e-05, "loss": 0.1071, "step": 5760 }, { "epoch": 16.72463768115942, "grad_norm": 0.4307467043399811, "learning_rate": 8.524409269711807e-05, "loss": 0.1098, "step": 5770 }, { "epoch": 16.753623188405797, "grad_norm": 0.33247116208076477, "learning_rate": 8.51854021942732e-05, "loss": 0.121, "step": 5780 }, { "epoch": 16.782608695652176, "grad_norm": 0.29645007848739624, "learning_rate": 8.512661549588227e-05, "loss": 0.0983, "step": 5790 }, { "epoch": 16.81159420289855, "grad_norm": 0.3584914207458496, "learning_rate": 8.506773276266588e-05, "loss": 0.1093, "step": 5800 }, { "epoch": 16.840579710144926, "grad_norm": 0.4944992959499359, "learning_rate": 8.500875415560721e-05, "loss": 0.12, "step": 5810 }, { "epoch": 16.869565217391305, "grad_norm": 0.3007963001728058, "learning_rate": 8.494967983595144e-05, "loss": 0.1165, "step": 5820 }, { "epoch": 16.89855072463768, "grad_norm": 0.2885436415672302, "learning_rate": 8.489050996520558e-05, "loss": 0.1295, "step": 5830 }, { "epoch": 16.92753623188406, "grad_norm": 0.45604297518730164, "learning_rate": 8.483124470513775e-05, "loss": 0.1038, "step": 5840 }, { "epoch": 16.956521739130434, "grad_norm": 0.34743809700012207, "learning_rate": 8.477188421777692e-05, "loss": 0.1114, "step": 5850 }, { "epoch": 16.985507246376812, "grad_norm": 0.3643774390220642, "learning_rate": 8.47124286654124e-05, "loss": 0.1207, "step": 5860 }, { "epoch": 17.014492753623188, "grad_norm": 0.40412595868110657, "learning_rate": 8.465287821059341e-05, "loss": 0.1355, "step": 5870 }, { "epoch": 17.043478260869566, "grad_norm": 0.27604588866233826, "learning_rate": 8.45932330161286e-05, "loss": 0.1053, "step": 5880 }, { "epoch": 17.07246376811594, "grad_norm": 0.472373366355896, "learning_rate": 8.453349324508567e-05, "loss": 0.1032, "step": 5890 }, { "epoch": 17.10144927536232, "grad_norm": 0.4434383511543274, "learning_rate": 8.447365906079088e-05, "loss": 0.1033, "step": 5900 }, { "epoch": 17.130434782608695, "grad_norm": 0.38238826394081116, "learning_rate": 8.441373062682856e-05, "loss": 0.1056, "step": 5910 }, { "epoch": 17.159420289855074, "grad_norm": 0.34880152344703674, "learning_rate": 8.43537081070408e-05, "loss": 0.0964, "step": 5920 }, { "epoch": 17.18840579710145, "grad_norm": 0.46405625343322754, "learning_rate": 8.429359166552689e-05, "loss": 0.1363, "step": 5930 }, { "epoch": 17.217391304347824, "grad_norm": 0.35732561349868774, "learning_rate": 8.423338146664284e-05, "loss": 0.1046, "step": 5940 }, { "epoch": 17.246376811594203, "grad_norm": 0.3758239150047302, "learning_rate": 8.417307767500107e-05, "loss": 0.0963, "step": 5950 }, { "epoch": 17.27536231884058, "grad_norm": 0.40921303629875183, "learning_rate": 8.411268045546983e-05, "loss": 0.12, "step": 5960 }, { "epoch": 17.304347826086957, "grad_norm": 0.30313900113105774, "learning_rate": 8.405218997317281e-05, "loss": 0.1213, "step": 5970 }, { "epoch": 17.333333333333332, "grad_norm": 0.27081504464149475, "learning_rate": 8.399160639348869e-05, "loss": 0.106, "step": 5980 }, { "epoch": 17.36231884057971, "grad_norm": 0.32741713523864746, "learning_rate": 8.393092988205065e-05, "loss": 0.089, "step": 5990 }, { "epoch": 17.391304347826086, "grad_norm": 0.350293904542923, "learning_rate": 8.387016060474597e-05, "loss": 0.1074, "step": 6000 }, { "epoch": 17.420289855072465, "grad_norm": 0.46384942531585693, "learning_rate": 8.380929872771551e-05, "loss": 0.1151, "step": 6010 }, { "epoch": 17.44927536231884, "grad_norm": 0.38906311988830566, "learning_rate": 8.374834441735335e-05, "loss": 0.0944, "step": 6020 }, { "epoch": 17.47826086956522, "grad_norm": 0.4116496443748474, "learning_rate": 8.368729784030622e-05, "loss": 0.1009, "step": 6030 }, { "epoch": 17.507246376811594, "grad_norm": 0.36326107382774353, "learning_rate": 8.362615916347315e-05, "loss": 0.117, "step": 6040 }, { "epoch": 17.536231884057973, "grad_norm": 0.4073273539543152, "learning_rate": 8.356492855400493e-05, "loss": 0.1196, "step": 6050 }, { "epoch": 17.565217391304348, "grad_norm": 0.3261200189590454, "learning_rate": 8.350360617930371e-05, "loss": 0.0887, "step": 6060 }, { "epoch": 17.594202898550726, "grad_norm": 0.2686854302883148, "learning_rate": 8.344219220702255e-05, "loss": 0.103, "step": 6070 }, { "epoch": 17.6231884057971, "grad_norm": 0.30324316024780273, "learning_rate": 8.338068680506485e-05, "loss": 0.1244, "step": 6080 }, { "epoch": 17.652173913043477, "grad_norm": 0.3971955180168152, "learning_rate": 8.33190901415841e-05, "loss": 0.1114, "step": 6090 }, { "epoch": 17.681159420289855, "grad_norm": 0.23621766269207, "learning_rate": 8.325740238498317e-05, "loss": 0.1151, "step": 6100 }, { "epoch": 17.71014492753623, "grad_norm": 0.3847745954990387, "learning_rate": 8.319562370391406e-05, "loss": 0.1067, "step": 6110 }, { "epoch": 17.73913043478261, "grad_norm": 0.29595401883125305, "learning_rate": 8.31337542672773e-05, "loss": 0.0946, "step": 6120 }, { "epoch": 17.768115942028984, "grad_norm": 0.36179453134536743, "learning_rate": 8.307179424422158e-05, "loss": 0.1058, "step": 6130 }, { "epoch": 17.797101449275363, "grad_norm": 0.28337523341178894, "learning_rate": 8.300974380414327e-05, "loss": 0.0932, "step": 6140 }, { "epoch": 17.82608695652174, "grad_norm": 0.3820880651473999, "learning_rate": 8.294760311668586e-05, "loss": 0.1179, "step": 6150 }, { "epoch": 17.855072463768117, "grad_norm": 0.19762246310710907, "learning_rate": 8.288537235173961e-05, "loss": 0.1321, "step": 6160 }, { "epoch": 17.884057971014492, "grad_norm": 0.4157634973526001, "learning_rate": 8.282305167944108e-05, "loss": 0.1054, "step": 6170 }, { "epoch": 17.91304347826087, "grad_norm": 0.4183441698551178, "learning_rate": 8.276064127017262e-05, "loss": 0.11, "step": 6180 }, { "epoch": 17.942028985507246, "grad_norm": 0.41134294867515564, "learning_rate": 8.269814129456189e-05, "loss": 0.0956, "step": 6190 }, { "epoch": 17.971014492753625, "grad_norm": 0.4862001836299896, "learning_rate": 8.263555192348143e-05, "loss": 0.1139, "step": 6200 }, { "epoch": 18.0, "grad_norm": 0.4908180236816406, "learning_rate": 8.257287332804819e-05, "loss": 0.094, "step": 6210 }, { "epoch": 18.028985507246375, "grad_norm": 0.37922942638397217, "learning_rate": 8.251010567962307e-05, "loss": 0.1156, "step": 6220 }, { "epoch": 18.057971014492754, "grad_norm": 0.4405559003353119, "learning_rate": 8.244724914981041e-05, "loss": 0.1219, "step": 6230 }, { "epoch": 18.08695652173913, "grad_norm": 0.3812404274940491, "learning_rate": 8.238430391045757e-05, "loss": 0.1019, "step": 6240 }, { "epoch": 18.115942028985508, "grad_norm": 0.4499792754650116, "learning_rate": 8.232127013365445e-05, "loss": 0.0975, "step": 6250 }, { "epoch": 18.144927536231883, "grad_norm": 0.5252455472946167, "learning_rate": 8.225814799173295e-05, "loss": 0.1159, "step": 6260 }, { "epoch": 18.17391304347826, "grad_norm": 0.44270288944244385, "learning_rate": 8.219493765726663e-05, "loss": 0.1103, "step": 6270 }, { "epoch": 18.202898550724637, "grad_norm": 0.2811424136161804, "learning_rate": 8.21316393030701e-05, "loss": 0.1243, "step": 6280 }, { "epoch": 18.231884057971016, "grad_norm": 0.4186232388019562, "learning_rate": 8.206825310219865e-05, "loss": 0.1174, "step": 6290 }, { "epoch": 18.26086956521739, "grad_norm": 0.2894100844860077, "learning_rate": 8.200477922794776e-05, "loss": 0.1061, "step": 6300 }, { "epoch": 18.28985507246377, "grad_norm": 0.3997898995876312, "learning_rate": 8.194121785385256e-05, "loss": 0.1153, "step": 6310 }, { "epoch": 18.318840579710145, "grad_norm": 0.2798968255519867, "learning_rate": 8.187756915368741e-05, "loss": 0.0976, "step": 6320 }, { "epoch": 18.347826086956523, "grad_norm": 0.3138371407985687, "learning_rate": 8.181383330146544e-05, "loss": 0.1135, "step": 6330 }, { "epoch": 18.3768115942029, "grad_norm": 0.3996056914329529, "learning_rate": 8.175001047143804e-05, "loss": 0.1135, "step": 6340 }, { "epoch": 18.405797101449274, "grad_norm": 0.39477649331092834, "learning_rate": 8.168610083809438e-05, "loss": 0.1052, "step": 6350 }, { "epoch": 18.434782608695652, "grad_norm": 0.35394132137298584, "learning_rate": 8.162210457616095e-05, "loss": 0.0962, "step": 6360 }, { "epoch": 18.463768115942027, "grad_norm": 0.28887924551963806, "learning_rate": 8.155802186060109e-05, "loss": 0.1019, "step": 6370 }, { "epoch": 18.492753623188406, "grad_norm": 0.272850900888443, "learning_rate": 8.149385286661453e-05, "loss": 0.127, "step": 6380 }, { "epoch": 18.52173913043478, "grad_norm": 0.5005936026573181, "learning_rate": 8.14295977696368e-05, "loss": 0.1174, "step": 6390 }, { "epoch": 18.55072463768116, "grad_norm": 0.38646388053894043, "learning_rate": 8.13652567453389e-05, "loss": 0.1069, "step": 6400 }, { "epoch": 18.579710144927535, "grad_norm": 0.3873347043991089, "learning_rate": 8.130082996962676e-05, "loss": 0.1235, "step": 6410 }, { "epoch": 18.608695652173914, "grad_norm": 0.33470475673675537, "learning_rate": 8.123631761864068e-05, "loss": 0.1031, "step": 6420 }, { "epoch": 18.63768115942029, "grad_norm": 0.4067200720310211, "learning_rate": 8.1171719868755e-05, "loss": 0.1193, "step": 6430 }, { "epoch": 18.666666666666668, "grad_norm": 0.34601399302482605, "learning_rate": 8.110703689657748e-05, "loss": 0.0968, "step": 6440 }, { "epoch": 18.695652173913043, "grad_norm": 0.4860565662384033, "learning_rate": 8.104226887894892e-05, "loss": 0.1008, "step": 6450 }, { "epoch": 18.72463768115942, "grad_norm": 0.51555997133255, "learning_rate": 8.097741599294257e-05, "loss": 0.1193, "step": 6460 }, { "epoch": 18.753623188405797, "grad_norm": 0.36039650440216064, "learning_rate": 8.091247841586378e-05, "loss": 0.1029, "step": 6470 }, { "epoch": 18.782608695652176, "grad_norm": 0.315164178609848, "learning_rate": 8.084745632524939e-05, "loss": 0.0904, "step": 6480 }, { "epoch": 18.81159420289855, "grad_norm": 0.3153921365737915, "learning_rate": 8.07823498988673e-05, "loss": 0.1038, "step": 6490 }, { "epoch": 18.840579710144926, "grad_norm": 0.3011777102947235, "learning_rate": 8.071715931471602e-05, "loss": 0.1245, "step": 6500 }, { "epoch": 18.869565217391305, "grad_norm": 0.3314365744590759, "learning_rate": 8.06518847510241e-05, "loss": 0.1001, "step": 6510 }, { "epoch": 18.89855072463768, "grad_norm": 0.3859410881996155, "learning_rate": 8.058652638624971e-05, "loss": 0.1122, "step": 6520 }, { "epoch": 18.92753623188406, "grad_norm": 0.3356384336948395, "learning_rate": 8.052108439908013e-05, "loss": 0.1144, "step": 6530 }, { "epoch": 18.956521739130434, "grad_norm": 0.35082948207855225, "learning_rate": 8.045555896843125e-05, "loss": 0.1079, "step": 6540 }, { "epoch": 18.985507246376812, "grad_norm": 0.5260385274887085, "learning_rate": 8.03899502734471e-05, "loss": 0.1046, "step": 6550 }, { "epoch": 19.014492753623188, "grad_norm": 0.3151768445968628, "learning_rate": 8.032425849349931e-05, "loss": 0.105, "step": 6560 }, { "epoch": 19.043478260869566, "grad_norm": 0.362244188785553, "learning_rate": 8.025848380818674e-05, "loss": 0.1005, "step": 6570 }, { "epoch": 19.07246376811594, "grad_norm": 0.41462433338165283, "learning_rate": 8.019262639733487e-05, "loss": 0.1198, "step": 6580 }, { "epoch": 19.10144927536232, "grad_norm": 0.40146404504776, "learning_rate": 8.012668644099531e-05, "loss": 0.0886, "step": 6590 }, { "epoch": 19.130434782608695, "grad_norm": 0.39127445220947266, "learning_rate": 8.006066411944542e-05, "loss": 0.0906, "step": 6600 }, { "epoch": 19.159420289855074, "grad_norm": 0.42401593923568726, "learning_rate": 7.999455961318769e-05, "loss": 0.1092, "step": 6610 }, { "epoch": 19.18840579710145, "grad_norm": 0.49508869647979736, "learning_rate": 7.992837310294932e-05, "loss": 0.1114, "step": 6620 }, { "epoch": 19.217391304347824, "grad_norm": 0.4461759328842163, "learning_rate": 7.986210476968167e-05, "loss": 0.1251, "step": 6630 }, { "epoch": 19.246376811594203, "grad_norm": 0.41962409019470215, "learning_rate": 7.97957547945599e-05, "loss": 0.0899, "step": 6640 }, { "epoch": 19.27536231884058, "grad_norm": 0.3262649178504944, "learning_rate": 7.972932335898226e-05, "loss": 0.0868, "step": 6650 }, { "epoch": 19.304347826086957, "grad_norm": 0.3167392611503601, "learning_rate": 7.966281064456975e-05, "loss": 0.1008, "step": 6660 }, { "epoch": 19.333333333333332, "grad_norm": 0.35801073908805847, "learning_rate": 7.959621683316563e-05, "loss": 0.0931, "step": 6670 }, { "epoch": 19.36231884057971, "grad_norm": 0.31944307684898376, "learning_rate": 7.952954210683481e-05, "loss": 0.1218, "step": 6680 }, { "epoch": 19.391304347826086, "grad_norm": 0.3943234384059906, "learning_rate": 7.946278664786345e-05, "loss": 0.1194, "step": 6690 }, { "epoch": 19.420289855072465, "grad_norm": 0.46412956714630127, "learning_rate": 7.939595063875842e-05, "loss": 0.0976, "step": 6700 }, { "epoch": 19.44927536231884, "grad_norm": 0.2884758412837982, "learning_rate": 7.932903426224683e-05, "loss": 0.1143, "step": 6710 }, { "epoch": 19.47826086956522, "grad_norm": 0.24540093541145325, "learning_rate": 7.926203770127552e-05, "loss": 0.096, "step": 6720 }, { "epoch": 19.507246376811594, "grad_norm": 0.40125906467437744, "learning_rate": 7.919496113901046e-05, "loss": 0.0998, "step": 6730 }, { "epoch": 19.536231884057973, "grad_norm": 0.366150438785553, "learning_rate": 7.912780475883649e-05, "loss": 0.1104, "step": 6740 }, { "epoch": 19.565217391304348, "grad_norm": 0.28204023838043213, "learning_rate": 7.906056874435652e-05, "loss": 0.0938, "step": 6750 }, { "epoch": 19.594202898550726, "grad_norm": 0.39345285296440125, "learning_rate": 7.899325327939131e-05, "loss": 0.0835, "step": 6760 }, { "epoch": 19.6231884057971, "grad_norm": 0.43696972727775574, "learning_rate": 7.892585854797872e-05, "loss": 0.1175, "step": 6770 }, { "epoch": 19.652173913043477, "grad_norm": 0.5411075949668884, "learning_rate": 7.88583847343734e-05, "loss": 0.1016, "step": 6780 }, { "epoch": 19.681159420289855, "grad_norm": 0.41619637608528137, "learning_rate": 7.879083202304616e-05, "loss": 0.0956, "step": 6790 }, { "epoch": 19.71014492753623, "grad_norm": 0.24727730453014374, "learning_rate": 7.872320059868355e-05, "loss": 0.102, "step": 6800 }, { "epoch": 19.73913043478261, "grad_norm": 0.2794191539287567, "learning_rate": 7.865549064618729e-05, "loss": 0.1155, "step": 6810 }, { "epoch": 19.768115942028984, "grad_norm": 0.4851526618003845, "learning_rate": 7.858770235067381e-05, "loss": 0.1132, "step": 6820 }, { "epoch": 19.797101449275363, "grad_norm": 0.38266780972480774, "learning_rate": 7.851983589747374e-05, "loss": 0.1163, "step": 6830 }, { "epoch": 19.82608695652174, "grad_norm": 0.31224480271339417, "learning_rate": 7.845189147213133e-05, "loss": 0.1113, "step": 6840 }, { "epoch": 19.855072463768117, "grad_norm": 0.26829686760902405, "learning_rate": 7.838386926040407e-05, "loss": 0.0976, "step": 6850 }, { "epoch": 19.884057971014492, "grad_norm": 0.46314260363578796, "learning_rate": 7.83157694482621e-05, "loss": 0.1108, "step": 6860 }, { "epoch": 19.91304347826087, "grad_norm": 0.3253716230392456, "learning_rate": 7.824759222188768e-05, "loss": 0.1163, "step": 6870 }, { "epoch": 19.942028985507246, "grad_norm": 0.22694610059261322, "learning_rate": 7.817933776767478e-05, "loss": 0.0966, "step": 6880 }, { "epoch": 19.971014492753625, "grad_norm": 0.424565851688385, "learning_rate": 7.811100627222842e-05, "loss": 0.1106, "step": 6890 }, { "epoch": 20.0, "grad_norm": 0.7507173418998718, "learning_rate": 7.804259792236435e-05, "loss": 0.1034, "step": 6900 }, { "epoch": 20.028985507246375, "grad_norm": 0.3750985264778137, "learning_rate": 7.797411290510835e-05, "loss": 0.1016, "step": 6910 }, { "epoch": 20.057971014492754, "grad_norm": 0.2645069658756256, "learning_rate": 7.790555140769586e-05, "loss": 0.1106, "step": 6920 }, { "epoch": 20.08695652173913, "grad_norm": 0.473185658454895, "learning_rate": 7.78369136175714e-05, "loss": 0.0891, "step": 6930 }, { "epoch": 20.115942028985508, "grad_norm": 0.3787136673927307, "learning_rate": 7.776819972238806e-05, "loss": 0.1062, "step": 6940 }, { "epoch": 20.144927536231883, "grad_norm": 0.20881570875644684, "learning_rate": 7.7699409910007e-05, "loss": 0.0942, "step": 6950 }, { "epoch": 20.17391304347826, "grad_norm": 0.36029985547065735, "learning_rate": 7.763054436849694e-05, "loss": 0.1139, "step": 6960 }, { "epoch": 20.202898550724637, "grad_norm": 0.3305976986885071, "learning_rate": 7.756160328613364e-05, "loss": 0.107, "step": 6970 }, { "epoch": 20.231884057971016, "grad_norm": 0.35445329546928406, "learning_rate": 7.749258685139942e-05, "loss": 0.0989, "step": 6980 }, { "epoch": 20.26086956521739, "grad_norm": 0.39083990454673767, "learning_rate": 7.742349525298253e-05, "loss": 0.1105, "step": 6990 }, { "epoch": 20.28985507246377, "grad_norm": 0.2868945896625519, "learning_rate": 7.735432867977679e-05, "loss": 0.0906, "step": 7000 }, { "epoch": 20.318840579710145, "grad_norm": 0.3364221155643463, "learning_rate": 7.728508732088096e-05, "loss": 0.1045, "step": 7010 }, { "epoch": 20.347826086956523, "grad_norm": 0.27390968799591064, "learning_rate": 7.721577136559825e-05, "loss": 0.0983, "step": 7020 }, { "epoch": 20.3768115942029, "grad_norm": 0.30403977632522583, "learning_rate": 7.714638100343588e-05, "loss": 0.0966, "step": 7030 }, { "epoch": 20.405797101449274, "grad_norm": 0.476375937461853, "learning_rate": 7.707691642410444e-05, "loss": 0.1009, "step": 7040 }, { "epoch": 20.434782608695652, "grad_norm": 0.39029183983802795, "learning_rate": 7.70073778175174e-05, "loss": 0.0917, "step": 7050 }, { "epoch": 20.463768115942027, "grad_norm": 0.29170235991477966, "learning_rate": 7.69377653737907e-05, "loss": 0.093, "step": 7060 }, { "epoch": 20.492753623188406, "grad_norm": 0.3420107364654541, "learning_rate": 7.686807928324209e-05, "loss": 0.0887, "step": 7070 }, { "epoch": 20.52173913043478, "grad_norm": 0.46952491998672485, "learning_rate": 7.679831973639065e-05, "loss": 0.1035, "step": 7080 }, { "epoch": 20.55072463768116, "grad_norm": 0.3527598977088928, "learning_rate": 7.672848692395637e-05, "loss": 0.1112, "step": 7090 }, { "epoch": 20.579710144927535, "grad_norm": 0.35709843039512634, "learning_rate": 7.665858103685944e-05, "loss": 0.1215, "step": 7100 }, { "epoch": 20.608695652173914, "grad_norm": 0.24026872217655182, "learning_rate": 7.658860226621991e-05, "loss": 0.1099, "step": 7110 }, { "epoch": 20.63768115942029, "grad_norm": 0.3530397117137909, "learning_rate": 7.651855080335708e-05, "loss": 0.1098, "step": 7120 }, { "epoch": 20.666666666666668, "grad_norm": 0.36064979434013367, "learning_rate": 7.644842683978896e-05, "loss": 0.0866, "step": 7130 }, { "epoch": 20.695652173913043, "grad_norm": 0.32545793056488037, "learning_rate": 7.63782305672318e-05, "loss": 0.0964, "step": 7140 }, { "epoch": 20.72463768115942, "grad_norm": 0.41704756021499634, "learning_rate": 7.63079621775995e-05, "loss": 0.1204, "step": 7150 }, { "epoch": 20.753623188405797, "grad_norm": 0.2773784399032593, "learning_rate": 7.623762186300319e-05, "loss": 0.1231, "step": 7160 }, { "epoch": 20.782608695652176, "grad_norm": 0.314942866563797, "learning_rate": 7.616720981575057e-05, "loss": 0.1082, "step": 7170 }, { "epoch": 20.81159420289855, "grad_norm": 0.28883859515190125, "learning_rate": 7.609672622834552e-05, "loss": 0.1026, "step": 7180 }, { "epoch": 20.840579710144926, "grad_norm": 0.3052704334259033, "learning_rate": 7.602617129348747e-05, "loss": 0.1078, "step": 7190 }, { "epoch": 20.869565217391305, "grad_norm": 0.40418189764022827, "learning_rate": 7.595554520407088e-05, "loss": 0.1008, "step": 7200 }, { "epoch": 20.89855072463768, "grad_norm": 0.547788143157959, "learning_rate": 7.588484815318484e-05, "loss": 0.1019, "step": 7210 }, { "epoch": 20.92753623188406, "grad_norm": 0.29421624541282654, "learning_rate": 7.581408033411234e-05, "loss": 0.0984, "step": 7220 }, { "epoch": 20.956521739130434, "grad_norm": 0.2703758478164673, "learning_rate": 7.574324194032995e-05, "loss": 0.105, "step": 7230 }, { "epoch": 20.985507246376812, "grad_norm": 0.3435475528240204, "learning_rate": 7.567233316550705e-05, "loss": 0.1285, "step": 7240 }, { "epoch": 21.014492753623188, "grad_norm": 0.33567100763320923, "learning_rate": 7.560135420350562e-05, "loss": 0.0879, "step": 7250 }, { "epoch": 21.043478260869566, "grad_norm": 0.28695228695869446, "learning_rate": 7.553030524837935e-05, "loss": 0.0969, "step": 7260 }, { "epoch": 21.07246376811594, "grad_norm": 0.41675615310668945, "learning_rate": 7.545918649437341e-05, "loss": 0.1269, "step": 7270 }, { "epoch": 21.10144927536232, "grad_norm": 0.501548171043396, "learning_rate": 7.538799813592377e-05, "loss": 0.1074, "step": 7280 }, { "epoch": 21.130434782608695, "grad_norm": 0.3565016984939575, "learning_rate": 7.531674036765662e-05, "loss": 0.1011, "step": 7290 }, { "epoch": 21.159420289855074, "grad_norm": 0.4155751168727875, "learning_rate": 7.524541338438807e-05, "loss": 0.1139, "step": 7300 }, { "epoch": 21.18840579710145, "grad_norm": 0.21688665449619293, "learning_rate": 7.517401738112328e-05, "loss": 0.0914, "step": 7310 }, { "epoch": 21.217391304347824, "grad_norm": 0.28088539838790894, "learning_rate": 7.510255255305628e-05, "loss": 0.1125, "step": 7320 }, { "epoch": 21.246376811594203, "grad_norm": 0.3268051743507385, "learning_rate": 7.503101909556911e-05, "loss": 0.0877, "step": 7330 }, { "epoch": 21.27536231884058, "grad_norm": 0.34027546644210815, "learning_rate": 7.495941720423154e-05, "loss": 0.1026, "step": 7340 }, { "epoch": 21.304347826086957, "grad_norm": 0.28073224425315857, "learning_rate": 7.488774707480042e-05, "loss": 0.0913, "step": 7350 }, { "epoch": 21.333333333333332, "grad_norm": 0.37974223494529724, "learning_rate": 7.481600890321911e-05, "loss": 0.1041, "step": 7360 }, { "epoch": 21.36231884057971, "grad_norm": 0.32456913590431213, "learning_rate": 7.474420288561708e-05, "loss": 0.1021, "step": 7370 }, { "epoch": 21.391304347826086, "grad_norm": 0.3720680773258209, "learning_rate": 7.467232921830921e-05, "loss": 0.0958, "step": 7380 }, { "epoch": 21.420289855072465, "grad_norm": 0.3506243824958801, "learning_rate": 7.460038809779537e-05, "loss": 0.1062, "step": 7390 }, { "epoch": 21.44927536231884, "grad_norm": 0.33805230259895325, "learning_rate": 7.452837972075983e-05, "loss": 0.0969, "step": 7400 }, { "epoch": 21.47826086956522, "grad_norm": 0.260945200920105, "learning_rate": 7.445630428407074e-05, "loss": 0.0879, "step": 7410 }, { "epoch": 21.507246376811594, "grad_norm": 0.3978862762451172, "learning_rate": 7.43841619847796e-05, "loss": 0.0979, "step": 7420 }, { "epoch": 21.536231884057973, "grad_norm": 0.3000033497810364, "learning_rate": 7.431195302012072e-05, "loss": 0.1194, "step": 7430 }, { "epoch": 21.565217391304348, "grad_norm": 0.30280905961990356, "learning_rate": 7.423967758751061e-05, "loss": 0.0883, "step": 7440 }, { "epoch": 21.594202898550726, "grad_norm": 0.26231029629707336, "learning_rate": 7.416733588454758e-05, "loss": 0.0773, "step": 7450 }, { "epoch": 21.6231884057971, "grad_norm": 0.3744719922542572, "learning_rate": 7.409492810901106e-05, "loss": 0.1047, "step": 7460 }, { "epoch": 21.652173913043477, "grad_norm": 0.27250558137893677, "learning_rate": 7.402245445886116e-05, "loss": 0.0996, "step": 7470 }, { "epoch": 21.681159420289855, "grad_norm": 0.44744259119033813, "learning_rate": 7.394991513223806e-05, "loss": 0.1119, "step": 7480 }, { "epoch": 21.71014492753623, "grad_norm": 0.41720837354660034, "learning_rate": 7.38773103274615e-05, "loss": 0.1009, "step": 7490 }, { "epoch": 21.73913043478261, "grad_norm": 0.4026874303817749, "learning_rate": 7.380464024303028e-05, "loss": 0.0899, "step": 7500 }, { "epoch": 21.768115942028984, "grad_norm": 0.39172300696372986, "learning_rate": 7.373190507762162e-05, "loss": 0.101, "step": 7510 }, { "epoch": 21.797101449275363, "grad_norm": 0.3168098032474518, "learning_rate": 7.365910503009066e-05, "loss": 0.074, "step": 7520 }, { "epoch": 21.82608695652174, "grad_norm": 0.28811538219451904, "learning_rate": 7.358624029946996e-05, "loss": 0.0993, "step": 7530 }, { "epoch": 21.855072463768117, "grad_norm": 0.4034368693828583, "learning_rate": 7.351331108496893e-05, "loss": 0.115, "step": 7540 }, { "epoch": 21.884057971014492, "grad_norm": 0.3656509220600128, "learning_rate": 7.344031758597325e-05, "loss": 0.112, "step": 7550 }, { "epoch": 21.91304347826087, "grad_norm": 0.4922838807106018, "learning_rate": 7.336726000204435e-05, "loss": 0.0972, "step": 7560 }, { "epoch": 21.942028985507246, "grad_norm": 0.3745553195476532, "learning_rate": 7.32941385329189e-05, "loss": 0.1027, "step": 7570 }, { "epoch": 21.971014492753625, "grad_norm": 0.39149320125579834, "learning_rate": 7.322095337850816e-05, "loss": 0.1151, "step": 7580 }, { "epoch": 22.0, "grad_norm": 0.8151898384094238, "learning_rate": 7.314770473889758e-05, "loss": 0.1026, "step": 7590 }, { "epoch": 22.028985507246375, "grad_norm": 0.4377081096172333, "learning_rate": 7.307439281434615e-05, "loss": 0.0823, "step": 7600 }, { "epoch": 22.057971014492754, "grad_norm": 0.35784757137298584, "learning_rate": 7.300101780528585e-05, "loss": 0.11, "step": 7610 }, { "epoch": 22.08695652173913, "grad_norm": 0.25670677423477173, "learning_rate": 7.292757991232117e-05, "loss": 0.1015, "step": 7620 }, { "epoch": 22.115942028985508, "grad_norm": 0.35505029559135437, "learning_rate": 7.285407933622848e-05, "loss": 0.1097, "step": 7630 }, { "epoch": 22.144927536231883, "grad_norm": 0.27871453762054443, "learning_rate": 7.278051627795557e-05, "loss": 0.0951, "step": 7640 }, { "epoch": 22.17391304347826, "grad_norm": 0.4752453565597534, "learning_rate": 7.270689093862105e-05, "loss": 0.1036, "step": 7650 }, { "epoch": 22.202898550724637, "grad_norm": 0.5493319630622864, "learning_rate": 7.263320351951374e-05, "loss": 0.1031, "step": 7660 }, { "epoch": 22.231884057971016, "grad_norm": 0.43251800537109375, "learning_rate": 7.255945422209227e-05, "loss": 0.0984, "step": 7670 }, { "epoch": 22.26086956521739, "grad_norm": 0.4127131998538971, "learning_rate": 7.248564324798437e-05, "loss": 0.0907, "step": 7680 }, { "epoch": 22.28985507246377, "grad_norm": 0.28903988003730774, "learning_rate": 7.241177079898644e-05, "loss": 0.086, "step": 7690 }, { "epoch": 22.318840579710145, "grad_norm": 0.35488802194595337, "learning_rate": 7.233783707706295e-05, "loss": 0.1017, "step": 7700 }, { "epoch": 22.347826086956523, "grad_norm": 0.2337232232093811, "learning_rate": 7.226384228434586e-05, "loss": 0.0888, "step": 7710 }, { "epoch": 22.3768115942029, "grad_norm": 0.2909092307090759, "learning_rate": 7.21897866231341e-05, "loss": 0.1124, "step": 7720 }, { "epoch": 22.405797101449274, "grad_norm": 0.3277481496334076, "learning_rate": 7.211567029589303e-05, "loss": 0.1086, "step": 7730 }, { "epoch": 22.434782608695652, "grad_norm": 0.2835393249988556, "learning_rate": 7.204149350525387e-05, "loss": 0.1085, "step": 7740 }, { "epoch": 22.463768115942027, "grad_norm": 0.3101160526275635, "learning_rate": 7.196725645401309e-05, "loss": 0.0959, "step": 7750 }, { "epoch": 22.492753623188406, "grad_norm": 0.42514339089393616, "learning_rate": 7.1892959345132e-05, "loss": 0.1108, "step": 7760 }, { "epoch": 22.52173913043478, "grad_norm": 0.39696502685546875, "learning_rate": 7.181860238173605e-05, "loss": 0.1352, "step": 7770 }, { "epoch": 22.55072463768116, "grad_norm": 0.43896979093551636, "learning_rate": 7.174418576711432e-05, "loss": 0.0971, "step": 7780 }, { "epoch": 22.579710144927535, "grad_norm": 0.47712811827659607, "learning_rate": 7.1669709704719e-05, "loss": 0.0877, "step": 7790 }, { "epoch": 22.608695652173914, "grad_norm": 0.3447103202342987, "learning_rate": 7.159517439816481e-05, "loss": 0.0971, "step": 7800 }, { "epoch": 22.63768115942029, "grad_norm": 0.39140835404396057, "learning_rate": 7.152058005122842e-05, "loss": 0.0885, "step": 7810 }, { "epoch": 22.666666666666668, "grad_norm": 0.28053638339042664, "learning_rate": 7.144592686784793e-05, "loss": 0.0945, "step": 7820 }, { "epoch": 22.695652173913043, "grad_norm": 0.3110656142234802, "learning_rate": 7.137121505212229e-05, "loss": 0.1094, "step": 7830 }, { "epoch": 22.72463768115942, "grad_norm": 0.38985612988471985, "learning_rate": 7.129644480831077e-05, "loss": 0.0794, "step": 7840 }, { "epoch": 22.753623188405797, "grad_norm": 0.49533525109291077, "learning_rate": 7.122161634083234e-05, "loss": 0.1002, "step": 7850 }, { "epoch": 22.782608695652176, "grad_norm": 0.43202659487724304, "learning_rate": 7.114672985426516e-05, "loss": 0.0962, "step": 7860 }, { "epoch": 22.81159420289855, "grad_norm": 0.4098835587501526, "learning_rate": 7.107178555334606e-05, "loss": 0.1022, "step": 7870 }, { "epoch": 22.840579710144926, "grad_norm": 0.39185699820518494, "learning_rate": 7.099678364296989e-05, "loss": 0.0911, "step": 7880 }, { "epoch": 22.869565217391305, "grad_norm": 0.38475117087364197, "learning_rate": 7.0921724328189e-05, "loss": 0.1046, "step": 7890 }, { "epoch": 22.89855072463768, "grad_norm": 0.3477749824523926, "learning_rate": 7.084660781421268e-05, "loss": 0.1047, "step": 7900 }, { "epoch": 22.92753623188406, "grad_norm": 0.32388657331466675, "learning_rate": 7.077143430640662e-05, "loss": 0.1111, "step": 7910 }, { "epoch": 22.956521739130434, "grad_norm": 0.28074944019317627, "learning_rate": 7.069620401029232e-05, "loss": 0.0952, "step": 7920 }, { "epoch": 22.985507246376812, "grad_norm": 0.4655712842941284, "learning_rate": 7.062091713154655e-05, "loss": 0.119, "step": 7930 }, { "epoch": 23.014492753623188, "grad_norm": 0.3347054719924927, "learning_rate": 7.054557387600075e-05, "loss": 0.1116, "step": 7940 }, { "epoch": 23.043478260869566, "grad_norm": 0.3056691288948059, "learning_rate": 7.04701744496405e-05, "loss": 0.0995, "step": 7950 }, { "epoch": 23.07246376811594, "grad_norm": 0.29524263739585876, "learning_rate": 7.039471905860495e-05, "loss": 0.0923, "step": 7960 }, { "epoch": 23.10144927536232, "grad_norm": 0.3292746841907501, "learning_rate": 7.031920790918628e-05, "loss": 0.102, "step": 7970 }, { "epoch": 23.130434782608695, "grad_norm": 0.3358573019504547, "learning_rate": 7.024364120782906e-05, "loss": 0.0919, "step": 7980 }, { "epoch": 23.159420289855074, "grad_norm": 0.4067601263523102, "learning_rate": 7.016801916112978e-05, "loss": 0.093, "step": 7990 }, { "epoch": 23.18840579710145, "grad_norm": 0.3560484051704407, "learning_rate": 7.009234197583623e-05, "loss": 0.1045, "step": 8000 }, { "epoch": 23.217391304347824, "grad_norm": 0.4393708407878876, "learning_rate": 7.001660985884692e-05, "loss": 0.0963, "step": 8010 }, { "epoch": 23.246376811594203, "grad_norm": 0.5236015915870667, "learning_rate": 6.994082301721063e-05, "loss": 0.1136, "step": 8020 }, { "epoch": 23.27536231884058, "grad_norm": 0.4401554465293884, "learning_rate": 6.986498165812563e-05, "loss": 0.0955, "step": 8030 }, { "epoch": 23.304347826086957, "grad_norm": 0.30517131090164185, "learning_rate": 6.978908598893932e-05, "loss": 0.0939, "step": 8040 }, { "epoch": 23.333333333333332, "grad_norm": 0.3316713869571686, "learning_rate": 6.971313621714756e-05, "loss": 0.0912, "step": 8050 }, { "epoch": 23.36231884057971, "grad_norm": 0.388837069272995, "learning_rate": 6.96371325503941e-05, "loss": 0.1064, "step": 8060 }, { "epoch": 23.391304347826086, "grad_norm": 0.2927514612674713, "learning_rate": 6.956107519647014e-05, "loss": 0.1115, "step": 8070 }, { "epoch": 23.420289855072465, "grad_norm": 0.4804588556289673, "learning_rate": 6.94849643633135e-05, "loss": 0.1035, "step": 8080 }, { "epoch": 23.44927536231884, "grad_norm": 0.32680946588516235, "learning_rate": 6.940880025900834e-05, "loss": 0.0984, "step": 8090 }, { "epoch": 23.47826086956522, "grad_norm": 0.392529159784317, "learning_rate": 6.933258309178438e-05, "loss": 0.1002, "step": 8100 }, { "epoch": 23.507246376811594, "grad_norm": 0.451831579208374, "learning_rate": 6.925631307001646e-05, "loss": 0.094, "step": 8110 }, { "epoch": 23.536231884057973, "grad_norm": 0.314008504152298, "learning_rate": 6.91799904022239e-05, "loss": 0.084, "step": 8120 }, { "epoch": 23.565217391304348, "grad_norm": 0.33511435985565186, "learning_rate": 6.910361529706997e-05, "loss": 0.0823, "step": 8130 }, { "epoch": 23.594202898550726, "grad_norm": 0.36016684770584106, "learning_rate": 6.902718796336131e-05, "loss": 0.1009, "step": 8140 }, { "epoch": 23.6231884057971, "grad_norm": 0.29572927951812744, "learning_rate": 6.895070861004729e-05, "loss": 0.1142, "step": 8150 }, { "epoch": 23.652173913043477, "grad_norm": 0.3417483866214752, "learning_rate": 6.887417744621956e-05, "loss": 0.1075, "step": 8160 }, { "epoch": 23.681159420289855, "grad_norm": 0.3952733278274536, "learning_rate": 6.87975946811114e-05, "loss": 0.0899, "step": 8170 }, { "epoch": 23.71014492753623, "grad_norm": 0.4189750552177429, "learning_rate": 6.872096052409718e-05, "loss": 0.0903, "step": 8180 }, { "epoch": 23.73913043478261, "grad_norm": 0.3113269805908203, "learning_rate": 6.864427518469174e-05, "loss": 0.098, "step": 8190 }, { "epoch": 23.768115942028984, "grad_norm": 0.29072266817092896, "learning_rate": 6.856753887254986e-05, "loss": 0.0961, "step": 8200 }, { "epoch": 23.797101449275363, "grad_norm": 0.31375062465667725, "learning_rate": 6.849075179746572e-05, "loss": 0.1149, "step": 8210 }, { "epoch": 23.82608695652174, "grad_norm": 0.3995482325553894, "learning_rate": 6.841391416937221e-05, "loss": 0.0941, "step": 8220 }, { "epoch": 23.855072463768117, "grad_norm": 0.4191873371601105, "learning_rate": 6.833702619834053e-05, "loss": 0.1021, "step": 8230 }, { "epoch": 23.884057971014492, "grad_norm": 0.3322891891002655, "learning_rate": 6.82600880945794e-05, "loss": 0.1022, "step": 8240 }, { "epoch": 23.91304347826087, "grad_norm": 0.37546929717063904, "learning_rate": 6.818310006843468e-05, "loss": 0.1051, "step": 8250 }, { "epoch": 23.942028985507246, "grad_norm": 0.4086068868637085, "learning_rate": 6.810606233038868e-05, "loss": 0.115, "step": 8260 }, { "epoch": 23.971014492753625, "grad_norm": 0.3562030494213104, "learning_rate": 6.802897509105966e-05, "loss": 0.094, "step": 8270 }, { "epoch": 24.0, "grad_norm": 0.8567777276039124, "learning_rate": 6.79518385612012e-05, "loss": 0.1065, "step": 8280 }, { "epoch": 24.028985507246375, "grad_norm": 0.6033879518508911, "learning_rate": 6.787465295170157e-05, "loss": 0.1118, "step": 8290 }, { "epoch": 24.057971014492754, "grad_norm": 0.4588029384613037, "learning_rate": 6.779741847358332e-05, "loss": 0.1, "step": 8300 }, { "epoch": 24.08695652173913, "grad_norm": 0.5255804657936096, "learning_rate": 6.772013533800256e-05, "loss": 0.1236, "step": 8310 }, { "epoch": 24.115942028985508, "grad_norm": 0.4105243980884552, "learning_rate": 6.764280375624843e-05, "loss": 0.1017, "step": 8320 }, { "epoch": 24.144927536231883, "grad_norm": 0.27915531396865845, "learning_rate": 6.756542393974252e-05, "loss": 0.1041, "step": 8330 }, { "epoch": 24.17391304347826, "grad_norm": 0.24191172420978546, "learning_rate": 6.748799610003828e-05, "loss": 0.0786, "step": 8340 }, { "epoch": 24.202898550724637, "grad_norm": 0.3112497329711914, "learning_rate": 6.741052044882048e-05, "loss": 0.108, "step": 8350 }, { "epoch": 24.231884057971016, "grad_norm": 0.42907461524009705, "learning_rate": 6.73329971979046e-05, "loss": 0.1122, "step": 8360 }, { "epoch": 24.26086956521739, "grad_norm": 0.39249175786972046, "learning_rate": 6.725542655923625e-05, "loss": 0.1023, "step": 8370 }, { "epoch": 24.28985507246377, "grad_norm": 0.3118097186088562, "learning_rate": 6.717780874489057e-05, "loss": 0.0926, "step": 8380 }, { "epoch": 24.318840579710145, "grad_norm": 0.4916854500770569, "learning_rate": 6.710014396707172e-05, "loss": 0.1157, "step": 8390 }, { "epoch": 24.347826086956523, "grad_norm": 0.30792731046676636, "learning_rate": 6.702243243811221e-05, "loss": 0.103, "step": 8400 }, { "epoch": 24.3768115942029, "grad_norm": 0.438876748085022, "learning_rate": 6.694467437047244e-05, "loss": 0.1035, "step": 8410 }, { "epoch": 24.405797101449274, "grad_norm": 0.4188535511493683, "learning_rate": 6.686686997673997e-05, "loss": 0.094, "step": 8420 }, { "epoch": 24.434782608695652, "grad_norm": 0.35675615072250366, "learning_rate": 6.678901946962903e-05, "loss": 0.0814, "step": 8430 }, { "epoch": 24.463768115942027, "grad_norm": 0.3431568145751953, "learning_rate": 6.671112306197996e-05, "loss": 0.0866, "step": 8440 }, { "epoch": 24.492753623188406, "grad_norm": 0.35794076323509216, "learning_rate": 6.663318096675854e-05, "loss": 0.1105, "step": 8450 }, { "epoch": 24.52173913043478, "grad_norm": 0.41272181272506714, "learning_rate": 6.655519339705552e-05, "loss": 0.0897, "step": 8460 }, { "epoch": 24.55072463768116, "grad_norm": 0.32626742124557495, "learning_rate": 6.647716056608588e-05, "loss": 0.0956, "step": 8470 }, { "epoch": 24.579710144927535, "grad_norm": 0.3472849130630493, "learning_rate": 6.639908268718843e-05, "loss": 0.0892, "step": 8480 }, { "epoch": 24.608695652173914, "grad_norm": 0.40613627433776855, "learning_rate": 6.632095997382514e-05, "loss": 0.0968, "step": 8490 }, { "epoch": 24.63768115942029, "grad_norm": 0.367157518863678, "learning_rate": 6.624279263958047e-05, "loss": 0.0907, "step": 8500 }, { "epoch": 24.666666666666668, "grad_norm": 0.3079644441604614, "learning_rate": 6.616458089816097e-05, "loss": 0.1052, "step": 8510 }, { "epoch": 24.695652173913043, "grad_norm": 0.29992493987083435, "learning_rate": 6.608632496339454e-05, "loss": 0.0841, "step": 8520 }, { "epoch": 24.72463768115942, "grad_norm": 0.4213980734348297, "learning_rate": 6.600802504922988e-05, "loss": 0.1172, "step": 8530 }, { "epoch": 24.753623188405797, "grad_norm": 0.42781922221183777, "learning_rate": 6.592968136973604e-05, "loss": 0.1114, "step": 8540 }, { "epoch": 24.782608695652176, "grad_norm": 0.3830643892288208, "learning_rate": 6.585129413910159e-05, "loss": 0.0979, "step": 8550 }, { "epoch": 24.81159420289855, "grad_norm": 0.33465775847435, "learning_rate": 6.577286357163424e-05, "loss": 0.1, "step": 8560 }, { "epoch": 24.840579710144926, "grad_norm": 0.4267924427986145, "learning_rate": 6.569438988176018e-05, "loss": 0.0926, "step": 8570 }, { "epoch": 24.869565217391305, "grad_norm": 0.3853652775287628, "learning_rate": 6.561587328402347e-05, "loss": 0.0893, "step": 8580 }, { "epoch": 24.89855072463768, "grad_norm": 0.3373638689517975, "learning_rate": 6.553731399308549e-05, "loss": 0.091, "step": 8590 }, { "epoch": 24.92753623188406, "grad_norm": 0.38048073649406433, "learning_rate": 6.545871222372436e-05, "loss": 0.0821, "step": 8600 }, { "epoch": 24.956521739130434, "grad_norm": 0.30811434984207153, "learning_rate": 6.538006819083426e-05, "loss": 0.0819, "step": 8610 }, { "epoch": 24.985507246376812, "grad_norm": 0.34115296602249146, "learning_rate": 6.530138210942505e-05, "loss": 0.1111, "step": 8620 }, { "epoch": 25.014492753623188, "grad_norm": 0.23322944343090057, "learning_rate": 6.522265419462141e-05, "loss": 0.0829, "step": 8630 }, { "epoch": 25.043478260869566, "grad_norm": 0.4348927438259125, "learning_rate": 6.514388466166248e-05, "loss": 0.0903, "step": 8640 }, { "epoch": 25.07246376811594, "grad_norm": 0.42378875613212585, "learning_rate": 6.506507372590119e-05, "loss": 0.0964, "step": 8650 }, { "epoch": 25.10144927536232, "grad_norm": 0.4011875092983246, "learning_rate": 6.498622160280355e-05, "loss": 0.0741, "step": 8660 }, { "epoch": 25.130434782608695, "grad_norm": 0.2823712229728699, "learning_rate": 6.490732850794832e-05, "loss": 0.0806, "step": 8670 }, { "epoch": 25.159420289855074, "grad_norm": 0.40900862216949463, "learning_rate": 6.482839465702616e-05, "loss": 0.0823, "step": 8680 }, { "epoch": 25.18840579710145, "grad_norm": 0.31911250948905945, "learning_rate": 6.474942026583923e-05, "loss": 0.0901, "step": 8690 }, { "epoch": 25.217391304347824, "grad_norm": 0.42689287662506104, "learning_rate": 6.467040555030052e-05, "loss": 0.1052, "step": 8700 }, { "epoch": 25.246376811594203, "grad_norm": 0.4792473018169403, "learning_rate": 6.459135072643321e-05, "loss": 0.1032, "step": 8710 }, { "epoch": 25.27536231884058, "grad_norm": 0.39220404624938965, "learning_rate": 6.451225601037019e-05, "loss": 0.0866, "step": 8720 }, { "epoch": 25.304347826086957, "grad_norm": 0.3560490012168884, "learning_rate": 6.443312161835338e-05, "loss": 0.0755, "step": 8730 }, { "epoch": 25.333333333333332, "grad_norm": 0.36267733573913574, "learning_rate": 6.43539477667332e-05, "loss": 0.1098, "step": 8740 }, { "epoch": 25.36231884057971, "grad_norm": 0.31615450978279114, "learning_rate": 6.427473467196793e-05, "loss": 0.1033, "step": 8750 }, { "epoch": 25.391304347826086, "grad_norm": 0.4009799659252167, "learning_rate": 6.419548255062315e-05, "loss": 0.0904, "step": 8760 }, { "epoch": 25.420289855072465, "grad_norm": 0.6001753211021423, "learning_rate": 6.411619161937112e-05, "loss": 0.0924, "step": 8770 }, { "epoch": 25.44927536231884, "grad_norm": 0.2777409851551056, "learning_rate": 6.403686209499022e-05, "loss": 0.0911, "step": 8780 }, { "epoch": 25.47826086956522, "grad_norm": 0.25605079531669617, "learning_rate": 6.395749419436437e-05, "loss": 0.1018, "step": 8790 }, { "epoch": 25.507246376811594, "grad_norm": 0.3277330696582794, "learning_rate": 6.387808813448234e-05, "loss": 0.09, "step": 8800 }, { "epoch": 25.536231884057973, "grad_norm": 0.22537122666835785, "learning_rate": 6.37986441324373e-05, "loss": 0.0835, "step": 8810 }, { "epoch": 25.565217391304348, "grad_norm": 0.427733838558197, "learning_rate": 6.37191624054261e-05, "loss": 0.0812, "step": 8820 }, { "epoch": 25.594202898550726, "grad_norm": 0.20617811381816864, "learning_rate": 6.363964317074872e-05, "loss": 0.0638, "step": 8830 }, { "epoch": 25.6231884057971, "grad_norm": 0.32776620984077454, "learning_rate": 6.356008664580776e-05, "loss": 0.0969, "step": 8840 }, { "epoch": 25.652173913043477, "grad_norm": 0.49045297503471375, "learning_rate": 6.348049304810771e-05, "loss": 0.0872, "step": 8850 }, { "epoch": 25.681159420289855, "grad_norm": 0.25885435938835144, "learning_rate": 6.340086259525442e-05, "loss": 0.0876, "step": 8860 }, { "epoch": 25.71014492753623, "grad_norm": 0.23793990910053253, "learning_rate": 6.332119550495448e-05, "loss": 0.0994, "step": 8870 }, { "epoch": 25.73913043478261, "grad_norm": 0.5432042479515076, "learning_rate": 6.324149199501473e-05, "loss": 0.0867, "step": 8880 }, { "epoch": 25.768115942028984, "grad_norm": 0.21161885559558868, "learning_rate": 6.316175228334146e-05, "loss": 0.0952, "step": 8890 }, { "epoch": 25.797101449275363, "grad_norm": 0.5830066204071045, "learning_rate": 6.308197658794003e-05, "loss": 0.1331, "step": 8900 }, { "epoch": 25.82608695652174, "grad_norm": 0.4310133159160614, "learning_rate": 6.300216512691417e-05, "loss": 0.1281, "step": 8910 }, { "epoch": 25.855072463768117, "grad_norm": 0.48247355222702026, "learning_rate": 6.292231811846532e-05, "loss": 0.0932, "step": 8920 }, { "epoch": 25.884057971014492, "grad_norm": 0.31100866198539734, "learning_rate": 6.284243578089217e-05, "loss": 0.0934, "step": 8930 }, { "epoch": 25.91304347826087, "grad_norm": 0.6842138171195984, "learning_rate": 6.276251833258999e-05, "loss": 0.0881, "step": 8940 }, { "epoch": 25.942028985507246, "grad_norm": 0.31012195348739624, "learning_rate": 6.268256599205003e-05, "loss": 0.1034, "step": 8950 }, { "epoch": 25.971014492753625, "grad_norm": 0.4207480847835541, "learning_rate": 6.260257897785892e-05, "loss": 0.1123, "step": 8960 }, { "epoch": 26.0, "grad_norm": 0.4856835901737213, "learning_rate": 6.252255750869811e-05, "loss": 0.0968, "step": 8970 }, { "epoch": 26.028985507246375, "grad_norm": 0.34793731570243835, "learning_rate": 6.244250180334325e-05, "loss": 0.0958, "step": 8980 }, { "epoch": 26.057971014492754, "grad_norm": 0.36127743124961853, "learning_rate": 6.236241208066356e-05, "loss": 0.0995, "step": 8990 }, { "epoch": 26.08695652173913, "grad_norm": 0.3173960745334625, "learning_rate": 6.228228855962133e-05, "loss": 0.089, "step": 9000 }, { "epoch": 26.115942028985508, "grad_norm": 0.45852774381637573, "learning_rate": 6.220213145927115e-05, "loss": 0.1077, "step": 9010 }, { "epoch": 26.144927536231883, "grad_norm": 0.3714202344417572, "learning_rate": 6.212194099875951e-05, "loss": 0.0766, "step": 9020 }, { "epoch": 26.17391304347826, "grad_norm": 0.5989710688591003, "learning_rate": 6.204171739732405e-05, "loss": 0.0974, "step": 9030 }, { "epoch": 26.202898550724637, "grad_norm": 0.3582770824432373, "learning_rate": 6.196146087429303e-05, "loss": 0.1153, "step": 9040 }, { "epoch": 26.231884057971016, "grad_norm": 0.3930160105228424, "learning_rate": 6.188117164908474e-05, "loss": 0.1032, "step": 9050 }, { "epoch": 26.26086956521739, "grad_norm": 0.4734560549259186, "learning_rate": 6.180084994120684e-05, "loss": 0.0911, "step": 9060 }, { "epoch": 26.28985507246377, "grad_norm": 0.36610832810401917, "learning_rate": 6.17204959702558e-05, "loss": 0.0814, "step": 9070 }, { "epoch": 26.318840579710145, "grad_norm": 0.37070533633232117, "learning_rate": 6.164010995591635e-05, "loss": 0.0913, "step": 9080 }, { "epoch": 26.347826086956523, "grad_norm": 0.36717358231544495, "learning_rate": 6.155969211796076e-05, "loss": 0.1182, "step": 9090 }, { "epoch": 26.3768115942029, "grad_norm": 0.39474284648895264, "learning_rate": 6.147924267624829e-05, "loss": 0.0764, "step": 9100 }, { "epoch": 26.405797101449274, "grad_norm": 0.3292117118835449, "learning_rate": 6.13987618507247e-05, "loss": 0.0933, "step": 9110 }, { "epoch": 26.434782608695652, "grad_norm": 0.4586057662963867, "learning_rate": 6.131824986142147e-05, "loss": 0.104, "step": 9120 }, { "epoch": 26.463768115942027, "grad_norm": 0.4136529266834259, "learning_rate": 6.123770692845529e-05, "loss": 0.1009, "step": 9130 }, { "epoch": 26.492753623188406, "grad_norm": 0.21014559268951416, "learning_rate": 6.11571332720275e-05, "loss": 0.097, "step": 9140 }, { "epoch": 26.52173913043478, "grad_norm": 0.34362557530403137, "learning_rate": 6.107652911242336e-05, "loss": 0.0935, "step": 9150 }, { "epoch": 26.55072463768116, "grad_norm": 0.40612903237342834, "learning_rate": 6.0995894670011586e-05, "loss": 0.1103, "step": 9160 }, { "epoch": 26.579710144927535, "grad_norm": 0.5520173907279968, "learning_rate": 6.091523016524368e-05, "loss": 0.08, "step": 9170 }, { "epoch": 26.608695652173914, "grad_norm": 0.34539029002189636, "learning_rate": 6.083453581865328e-05, "loss": 0.081, "step": 9180 }, { "epoch": 26.63768115942029, "grad_norm": 0.2292974442243576, "learning_rate": 6.075381185085568e-05, "loss": 0.0913, "step": 9190 }, { "epoch": 26.666666666666668, "grad_norm": 0.530166506767273, "learning_rate": 6.067305848254709e-05, "loss": 0.1242, "step": 9200 }, { "epoch": 26.695652173913043, "grad_norm": 0.313507616519928, "learning_rate": 6.059227593450418e-05, "loss": 0.091, "step": 9210 }, { "epoch": 26.72463768115942, "grad_norm": 0.22776463627815247, "learning_rate": 6.051146442758333e-05, "loss": 0.0891, "step": 9220 }, { "epoch": 26.753623188405797, "grad_norm": 0.35936057567596436, "learning_rate": 6.043062418272012e-05, "loss": 0.0893, "step": 9230 }, { "epoch": 26.782608695652176, "grad_norm": 0.4251636564731598, "learning_rate": 6.0349755420928666e-05, "loss": 0.0899, "step": 9240 }, { "epoch": 26.81159420289855, "grad_norm": 0.420236736536026, "learning_rate": 6.0268858363301105e-05, "loss": 0.0914, "step": 9250 }, { "epoch": 26.840579710144926, "grad_norm": 0.4716984033584595, "learning_rate": 6.018793323100689e-05, "loss": 0.1019, "step": 9260 }, { "epoch": 26.869565217391305, "grad_norm": 0.2790106534957886, "learning_rate": 6.0106980245292255e-05, "loss": 0.0795, "step": 9270 }, { "epoch": 26.89855072463768, "grad_norm": 0.6252140402793884, "learning_rate": 6.002599962747957e-05, "loss": 0.0852, "step": 9280 }, { "epoch": 26.92753623188406, "grad_norm": 0.43576961755752563, "learning_rate": 5.994499159896673e-05, "loss": 0.0998, "step": 9290 }, { "epoch": 26.956521739130434, "grad_norm": 0.6333770751953125, "learning_rate": 5.9863956381226607e-05, "loss": 0.0915, "step": 9300 }, { "epoch": 26.985507246376812, "grad_norm": 0.5500407814979553, "learning_rate": 5.9782894195806394e-05, "loss": 0.104, "step": 9310 }, { "epoch": 27.014492753623188, "grad_norm": 0.44380778074264526, "learning_rate": 5.9701805264327004e-05, "loss": 0.0836, "step": 9320 }, { "epoch": 27.043478260869566, "grad_norm": 0.41339370608329773, "learning_rate": 5.96206898084825e-05, "loss": 0.0898, "step": 9330 }, { "epoch": 27.07246376811594, "grad_norm": 0.42062732577323914, "learning_rate": 5.953954805003942e-05, "loss": 0.1016, "step": 9340 }, { "epoch": 27.10144927536232, "grad_norm": 0.26764097809791565, "learning_rate": 5.945838021083623e-05, "loss": 0.0953, "step": 9350 }, { "epoch": 27.130434782608695, "grad_norm": 0.3174140453338623, "learning_rate": 5.9377186512782714e-05, "loss": 0.1038, "step": 9360 }, { "epoch": 27.159420289855074, "grad_norm": 0.5403830409049988, "learning_rate": 5.929596717785935e-05, "loss": 0.0998, "step": 9370 }, { "epoch": 27.18840579710145, "grad_norm": 0.29460418224334717, "learning_rate": 5.921472242811668e-05, "loss": 0.0998, "step": 9380 }, { "epoch": 27.217391304347824, "grad_norm": 0.3835254907608032, "learning_rate": 5.913345248567475e-05, "loss": 0.0895, "step": 9390 }, { "epoch": 27.246376811594203, "grad_norm": 0.26222512125968933, "learning_rate": 5.905215757272248e-05, "loss": 0.0829, "step": 9400 }, { "epoch": 27.27536231884058, "grad_norm": 0.3459964990615845, "learning_rate": 5.897083791151706e-05, "loss": 0.0762, "step": 9410 }, { "epoch": 27.304347826086957, "grad_norm": 0.4421097934246063, "learning_rate": 5.888949372438336e-05, "loss": 0.0809, "step": 9420 }, { "epoch": 27.333333333333332, "grad_norm": 0.3622925877571106, "learning_rate": 5.8808125233713255e-05, "loss": 0.0906, "step": 9430 }, { "epoch": 27.36231884057971, "grad_norm": 0.25134244561195374, "learning_rate": 5.872673266196509e-05, "loss": 0.0879, "step": 9440 }, { "epoch": 27.391304347826086, "grad_norm": 0.5268398523330688, "learning_rate": 5.864531623166305e-05, "loss": 0.0896, "step": 9450 }, { "epoch": 27.420289855072465, "grad_norm": 0.2773943841457367, "learning_rate": 5.856387616539656e-05, "loss": 0.0992, "step": 9460 }, { "epoch": 27.44927536231884, "grad_norm": 0.40491020679473877, "learning_rate": 5.848241268581967e-05, "loss": 0.1059, "step": 9470 }, { "epoch": 27.47826086956522, "grad_norm": 0.5842623710632324, "learning_rate": 5.840092601565037e-05, "loss": 0.0952, "step": 9480 }, { "epoch": 27.507246376811594, "grad_norm": 0.4927104115486145, "learning_rate": 5.8319416377670144e-05, "loss": 0.1123, "step": 9490 }, { "epoch": 27.536231884057973, "grad_norm": 0.29302486777305603, "learning_rate": 5.82378839947232e-05, "loss": 0.106, "step": 9500 }, { "epoch": 27.565217391304348, "grad_norm": 0.4573745131492615, "learning_rate": 5.815632908971599e-05, "loss": 0.0952, "step": 9510 }, { "epoch": 27.594202898550726, "grad_norm": 0.4357374906539917, "learning_rate": 5.80747518856165e-05, "loss": 0.0924, "step": 9520 }, { "epoch": 27.6231884057971, "grad_norm": 0.3612167537212372, "learning_rate": 5.799315260545367e-05, "loss": 0.1071, "step": 9530 }, { "epoch": 27.652173913043477, "grad_norm": 0.3312841057777405, "learning_rate": 5.791153147231686e-05, "loss": 0.1093, "step": 9540 }, { "epoch": 27.681159420289855, "grad_norm": 0.43029627203941345, "learning_rate": 5.782988870935509e-05, "loss": 0.0969, "step": 9550 }, { "epoch": 27.71014492753623, "grad_norm": 0.371330201625824, "learning_rate": 5.774822453977657e-05, "loss": 0.0935, "step": 9560 }, { "epoch": 27.73913043478261, "grad_norm": 0.35629457235336304, "learning_rate": 5.7666539186848036e-05, "loss": 0.0972, "step": 9570 }, { "epoch": 27.768115942028984, "grad_norm": 0.37646907567977905, "learning_rate": 5.758483287389411e-05, "loss": 0.0836, "step": 9580 }, { "epoch": 27.797101449275363, "grad_norm": 0.26607057452201843, "learning_rate": 5.7503105824296735e-05, "loss": 0.1109, "step": 9590 }, { "epoch": 27.82608695652174, "grad_norm": 0.3088560998439789, "learning_rate": 5.742135826149453e-05, "loss": 0.0888, "step": 9600 }, { "epoch": 27.855072463768117, "grad_norm": 0.2338147908449173, "learning_rate": 5.7339590408982223e-05, "loss": 0.0929, "step": 9610 }, { "epoch": 27.884057971014492, "grad_norm": 0.33873507380485535, "learning_rate": 5.725780249031e-05, "loss": 0.0971, "step": 9620 }, { "epoch": 27.91304347826087, "grad_norm": 0.2373759001493454, "learning_rate": 5.717599472908292e-05, "loss": 0.0844, "step": 9630 }, { "epoch": 27.942028985507246, "grad_norm": 0.36954036355018616, "learning_rate": 5.7094167348960237e-05, "loss": 0.0979, "step": 9640 }, { "epoch": 27.971014492753625, "grad_norm": 0.44296813011169434, "learning_rate": 5.7012320573654945e-05, "loss": 0.0953, "step": 9650 }, { "epoch": 28.0, "grad_norm": 0.5584344267845154, "learning_rate": 5.693045462693295e-05, "loss": 0.0984, "step": 9660 }, { "epoch": 28.028985507246375, "grad_norm": 0.2427714765071869, "learning_rate": 5.684856973261266e-05, "loss": 0.0809, "step": 9670 }, { "epoch": 28.057971014492754, "grad_norm": 0.33059096336364746, "learning_rate": 5.6766666114564215e-05, "loss": 0.0917, "step": 9680 }, { "epoch": 28.08695652173913, "grad_norm": 0.32649749517440796, "learning_rate": 5.668474399670899e-05, "loss": 0.0882, "step": 9690 }, { "epoch": 28.115942028985508, "grad_norm": 0.2927171289920807, "learning_rate": 5.660280360301896e-05, "loss": 0.0931, "step": 9700 }, { "epoch": 28.144927536231883, "grad_norm": 0.3866276443004608, "learning_rate": 5.652084515751599e-05, "loss": 0.1069, "step": 9710 }, { "epoch": 28.17391304347826, "grad_norm": 0.30153888463974, "learning_rate": 5.643886888427137e-05, "loss": 0.0833, "step": 9720 }, { "epoch": 28.202898550724637, "grad_norm": 0.4071616232395172, "learning_rate": 5.6356875007405074e-05, "loss": 0.0932, "step": 9730 }, { "epoch": 28.231884057971016, "grad_norm": 0.3093550503253937, "learning_rate": 5.627486375108525e-05, "loss": 0.0786, "step": 9740 }, { "epoch": 28.26086956521739, "grad_norm": 0.4130619168281555, "learning_rate": 5.619283533952754e-05, "loss": 0.1169, "step": 9750 }, { "epoch": 28.28985507246377, "grad_norm": 0.19634698331356049, "learning_rate": 5.6110789996994474e-05, "loss": 0.0808, "step": 9760 }, { "epoch": 28.318840579710145, "grad_norm": 0.27509117126464844, "learning_rate": 5.602872794779491e-05, "loss": 0.0818, "step": 9770 }, { "epoch": 28.347826086956523, "grad_norm": 0.36869877576828003, "learning_rate": 5.594664941628334e-05, "loss": 0.0808, "step": 9780 }, { "epoch": 28.3768115942029, "grad_norm": 0.28021812438964844, "learning_rate": 5.5864554626859324e-05, "loss": 0.086, "step": 9790 }, { "epoch": 28.405797101449274, "grad_norm": 0.26831555366516113, "learning_rate": 5.578244380396691e-05, "loss": 0.0951, "step": 9800 }, { "epoch": 28.434782608695652, "grad_norm": 0.3775530457496643, "learning_rate": 5.570031717209394e-05, "loss": 0.0837, "step": 9810 }, { "epoch": 28.463768115942027, "grad_norm": 0.24371632933616638, "learning_rate": 5.561817495577147e-05, "loss": 0.082, "step": 9820 }, { "epoch": 28.492753623188406, "grad_norm": 0.3440195620059967, "learning_rate": 5.5536017379573215e-05, "loss": 0.086, "step": 9830 }, { "epoch": 28.52173913043478, "grad_norm": 0.26935017108917236, "learning_rate": 5.545384466811483e-05, "loss": 0.0972, "step": 9840 }, { "epoch": 28.55072463768116, "grad_norm": 0.38084691762924194, "learning_rate": 5.5371657046053384e-05, "loss": 0.1017, "step": 9850 }, { "epoch": 28.579710144927535, "grad_norm": 0.290239155292511, "learning_rate": 5.528945473808669e-05, "loss": 0.0915, "step": 9860 }, { "epoch": 28.608695652173914, "grad_norm": 0.33482253551483154, "learning_rate": 5.520723796895272e-05, "loss": 0.0908, "step": 9870 }, { "epoch": 28.63768115942029, "grad_norm": 0.3747408986091614, "learning_rate": 5.512500696342897e-05, "loss": 0.0844, "step": 9880 }, { "epoch": 28.666666666666668, "grad_norm": 0.4802875816822052, "learning_rate": 5.504276194633188e-05, "loss": 0.078, "step": 9890 }, { "epoch": 28.695652173913043, "grad_norm": 0.27169641852378845, "learning_rate": 5.49605031425162e-05, "loss": 0.0952, "step": 9900 }, { "epoch": 28.72463768115942, "grad_norm": 0.4197971522808075, "learning_rate": 5.487823077687434e-05, "loss": 0.0876, "step": 9910 }, { "epoch": 28.753623188405797, "grad_norm": 0.37185847759246826, "learning_rate": 5.4795945074335806e-05, "loss": 0.1035, "step": 9920 }, { "epoch": 28.782608695652176, "grad_norm": 0.4602510929107666, "learning_rate": 5.471364625986657e-05, "loss": 0.1092, "step": 9930 }, { "epoch": 28.81159420289855, "grad_norm": 0.26933249831199646, "learning_rate": 5.463133455846845e-05, "loss": 0.0695, "step": 9940 }, { "epoch": 28.840579710144926, "grad_norm": 0.4972953796386719, "learning_rate": 5.4549010195178505e-05, "loss": 0.0927, "step": 9950 }, { "epoch": 28.869565217391305, "grad_norm": 0.33794844150543213, "learning_rate": 5.446667339506838e-05, "loss": 0.0836, "step": 9960 }, { "epoch": 28.89855072463768, "grad_norm": 0.4375225007534027, "learning_rate": 5.4384324383243756e-05, "loss": 0.0749, "step": 9970 }, { "epoch": 28.92753623188406, "grad_norm": 0.3220159411430359, "learning_rate": 5.430196338484368e-05, "loss": 0.1062, "step": 9980 }, { "epoch": 28.956521739130434, "grad_norm": 0.2979547381401062, "learning_rate": 5.4219590625039975e-05, "loss": 0.0926, "step": 9990 }, { "epoch": 28.985507246376812, "grad_norm": 0.3251277208328247, "learning_rate": 5.413720632903664e-05, "loss": 0.0753, "step": 10000 }, { "epoch": 29.014492753623188, "grad_norm": 0.5778645873069763, "learning_rate": 5.405481072206917e-05, "loss": 0.1194, "step": 10010 }, { "epoch": 29.043478260869566, "grad_norm": 0.5020672082901001, "learning_rate": 5.397240402940402e-05, "loss": 0.0799, "step": 10020 }, { "epoch": 29.07246376811594, "grad_norm": 0.27410048246383667, "learning_rate": 5.388998647633794e-05, "loss": 0.0948, "step": 10030 }, { "epoch": 29.10144927536232, "grad_norm": 0.42518341541290283, "learning_rate": 5.380755828819737e-05, "loss": 0.0803, "step": 10040 }, { "epoch": 29.130434782608695, "grad_norm": 0.4766830503940582, "learning_rate": 5.3725119690337846e-05, "loss": 0.1079, "step": 10050 }, { "epoch": 29.159420289855074, "grad_norm": 0.3795958161354065, "learning_rate": 5.3642670908143324e-05, "loss": 0.1114, "step": 10060 }, { "epoch": 29.18840579710145, "grad_norm": 0.2962549924850464, "learning_rate": 5.356021216702562e-05, "loss": 0.1028, "step": 10070 }, { "epoch": 29.217391304347824, "grad_norm": 0.5517275929450989, "learning_rate": 5.347774369242381e-05, "loss": 0.1054, "step": 10080 }, { "epoch": 29.246376811594203, "grad_norm": 0.26086458563804626, "learning_rate": 5.3395265709803545e-05, "loss": 0.1065, "step": 10090 }, { "epoch": 29.27536231884058, "grad_norm": 0.287026971578598, "learning_rate": 5.331277844465647e-05, "loss": 0.0849, "step": 10100 }, { "epoch": 29.304347826086957, "grad_norm": 0.23269617557525635, "learning_rate": 5.323028212249963e-05, "loss": 0.0786, "step": 10110 }, { "epoch": 29.333333333333332, "grad_norm": 0.384395569562912, "learning_rate": 5.314777696887481e-05, "loss": 0.0739, "step": 10120 }, { "epoch": 29.36231884057971, "grad_norm": 0.3276943266391754, "learning_rate": 5.306526320934796e-05, "loss": 0.0752, "step": 10130 }, { "epoch": 29.391304347826086, "grad_norm": 0.4074258506298065, "learning_rate": 5.298274106950854e-05, "loss": 0.0975, "step": 10140 }, { "epoch": 29.420289855072465, "grad_norm": 0.48793792724609375, "learning_rate": 5.290021077496893e-05, "loss": 0.088, "step": 10150 }, { "epoch": 29.44927536231884, "grad_norm": 0.3513041138648987, "learning_rate": 5.2817672551363816e-05, "loss": 0.1068, "step": 10160 }, { "epoch": 29.47826086956522, "grad_norm": 0.4190158247947693, "learning_rate": 5.273512662434952e-05, "loss": 0.0749, "step": 10170 }, { "epoch": 29.507246376811594, "grad_norm": 0.41182804107666016, "learning_rate": 5.265257321960349e-05, "loss": 0.0832, "step": 10180 }, { "epoch": 29.536231884057973, "grad_norm": 0.406429648399353, "learning_rate": 5.257001256282357e-05, "loss": 0.0894, "step": 10190 }, { "epoch": 29.565217391304348, "grad_norm": 0.3909933269023895, "learning_rate": 5.248744487972742e-05, "loss": 0.0981, "step": 10200 }, { "epoch": 29.594202898550726, "grad_norm": 0.45473939180374146, "learning_rate": 5.240487039605196e-05, "loss": 0.0875, "step": 10210 }, { "epoch": 29.6231884057971, "grad_norm": 0.3364003300666809, "learning_rate": 5.232228933755267e-05, "loss": 0.0938, "step": 10220 }, { "epoch": 29.652173913043477, "grad_norm": 0.40386608242988586, "learning_rate": 5.2239701930003006e-05, "loss": 0.0972, "step": 10230 }, { "epoch": 29.681159420289855, "grad_norm": 0.4128904342651367, "learning_rate": 5.215710839919379e-05, "loss": 0.085, "step": 10240 }, { "epoch": 29.71014492753623, "grad_norm": 0.4223697781562805, "learning_rate": 5.207450897093257e-05, "loss": 0.0874, "step": 10250 }, { "epoch": 29.73913043478261, "grad_norm": 0.4211285710334778, "learning_rate": 5.1991903871043046e-05, "loss": 0.103, "step": 10260 }, { "epoch": 29.768115942028984, "grad_norm": 0.5267713665962219, "learning_rate": 5.190929332536439e-05, "loss": 0.0863, "step": 10270 }, { "epoch": 29.797101449275363, "grad_norm": 0.275651752948761, "learning_rate": 5.182667755975071e-05, "loss": 0.0865, "step": 10280 }, { "epoch": 29.82608695652174, "grad_norm": 0.3134634792804718, "learning_rate": 5.1744056800070315e-05, "loss": 0.0766, "step": 10290 }, { "epoch": 29.855072463768117, "grad_norm": 0.39577049016952515, "learning_rate": 5.166143127220524e-05, "loss": 0.0986, "step": 10300 }, { "epoch": 29.884057971014492, "grad_norm": 0.3079846501350403, "learning_rate": 5.1578801202050485e-05, "loss": 0.0919, "step": 10310 }, { "epoch": 29.91304347826087, "grad_norm": 0.3528546690940857, "learning_rate": 5.149616681551355e-05, "loss": 0.1022, "step": 10320 }, { "epoch": 29.942028985507246, "grad_norm": 0.45117315649986267, "learning_rate": 5.141352833851367e-05, "loss": 0.0895, "step": 10330 }, { "epoch": 29.971014492753625, "grad_norm": 0.3826615810394287, "learning_rate": 5.1330885996981285e-05, "loss": 0.0746, "step": 10340 }, { "epoch": 30.0, "grad_norm": 0.5462546348571777, "learning_rate": 5.124824001685741e-05, "loss": 0.0873, "step": 10350 }, { "epoch": 30.028985507246375, "grad_norm": 0.38107597827911377, "learning_rate": 5.116559062409298e-05, "loss": 0.0957, "step": 10360 }, { "epoch": 30.057971014492754, "grad_norm": 0.30049923062324524, "learning_rate": 5.10829380446483e-05, "loss": 0.0765, "step": 10370 }, { "epoch": 30.08695652173913, "grad_norm": 0.3036685585975647, "learning_rate": 5.100028250449235e-05, "loss": 0.0828, "step": 10380 }, { "epoch": 30.115942028985508, "grad_norm": 0.3812878727912903, "learning_rate": 5.0917624229602234e-05, "loss": 0.0929, "step": 10390 }, { "epoch": 30.144927536231883, "grad_norm": 0.3203854262828827, "learning_rate": 5.0834963445962524e-05, "loss": 0.0942, "step": 10400 }, { "epoch": 30.17391304347826, "grad_norm": 0.2617367208003998, "learning_rate": 5.075230037956461e-05, "loss": 0.0991, "step": 10410 }, { "epoch": 30.202898550724637, "grad_norm": 0.39057719707489014, "learning_rate": 5.0669635256406213e-05, "loss": 0.0895, "step": 10420 }, { "epoch": 30.231884057971016, "grad_norm": 0.4870263636112213, "learning_rate": 5.058696830249058e-05, "loss": 0.0991, "step": 10430 }, { "epoch": 30.26086956521739, "grad_norm": 0.40686681866645813, "learning_rate": 5.050429974382602e-05, "loss": 0.0821, "step": 10440 }, { "epoch": 30.28985507246377, "grad_norm": 0.32615211606025696, "learning_rate": 5.042162980642523e-05, "loss": 0.1024, "step": 10450 }, { "epoch": 30.318840579710145, "grad_norm": 0.47428640723228455, "learning_rate": 5.033895871630462e-05, "loss": 0.0855, "step": 10460 }, { "epoch": 30.347826086956523, "grad_norm": 0.45208922028541565, "learning_rate": 5.025628669948386e-05, "loss": 0.0922, "step": 10470 }, { "epoch": 30.3768115942029, "grad_norm": 0.41651803255081177, "learning_rate": 5.017361398198502e-05, "loss": 0.0776, "step": 10480 }, { "epoch": 30.405797101449274, "grad_norm": 0.40021809935569763, "learning_rate": 5.009094078983221e-05, "loss": 0.0828, "step": 10490 }, { "epoch": 30.434782608695652, "grad_norm": 0.40372738242149353, "learning_rate": 5.000826734905073e-05, "loss": 0.0947, "step": 10500 }, { "epoch": 30.463768115942027, "grad_norm": 0.21877968311309814, "learning_rate": 4.9925593885666645e-05, "loss": 0.0826, "step": 10510 }, { "epoch": 30.492753623188406, "grad_norm": 0.32412436604499817, "learning_rate": 4.984292062570602e-05, "loss": 0.1022, "step": 10520 }, { "epoch": 30.52173913043478, "grad_norm": 0.3431316018104553, "learning_rate": 4.976024779519442e-05, "loss": 0.079, "step": 10530 }, { "epoch": 30.55072463768116, "grad_norm": 0.3585143983364105, "learning_rate": 4.9677575620156194e-05, "loss": 0.0885, "step": 10540 }, { "epoch": 30.579710144927535, "grad_norm": 0.3738825023174286, "learning_rate": 4.959490432661391e-05, "loss": 0.1003, "step": 10550 }, { "epoch": 30.608695652173914, "grad_norm": 0.45717304944992065, "learning_rate": 4.9512234140587726e-05, "loss": 0.0908, "step": 10560 }, { "epoch": 30.63768115942029, "grad_norm": 0.43018513917922974, "learning_rate": 4.942956528809477e-05, "loss": 0.0899, "step": 10570 }, { "epoch": 30.666666666666668, "grad_norm": 0.4122094511985779, "learning_rate": 4.934689799514854e-05, "loss": 0.0976, "step": 10580 }, { "epoch": 30.695652173913043, "grad_norm": 0.4348907172679901, "learning_rate": 4.926423248775827e-05, "loss": 0.0883, "step": 10590 }, { "epoch": 30.72463768115942, "grad_norm": 0.45371150970458984, "learning_rate": 4.918156899192826e-05, "loss": 0.1057, "step": 10600 }, { "epoch": 30.753623188405797, "grad_norm": 0.3190701901912689, "learning_rate": 4.909890773365738e-05, "loss": 0.0998, "step": 10610 }, { "epoch": 30.782608695652176, "grad_norm": 0.26156431436538696, "learning_rate": 4.9016248938938344e-05, "loss": 0.086, "step": 10620 }, { "epoch": 30.81159420289855, "grad_norm": 0.24229975044727325, "learning_rate": 4.8933592833757156e-05, "loss": 0.075, "step": 10630 }, { "epoch": 30.840579710144926, "grad_norm": 0.47945863008499146, "learning_rate": 4.8850939644092435e-05, "loss": 0.0942, "step": 10640 }, { "epoch": 30.869565217391305, "grad_norm": 0.42868760228157043, "learning_rate": 4.876828959591485e-05, "loss": 0.1054, "step": 10650 }, { "epoch": 30.89855072463768, "grad_norm": 0.5427827835083008, "learning_rate": 4.8685642915186474e-05, "loss": 0.0908, "step": 10660 }, { "epoch": 30.92753623188406, "grad_norm": 0.546563982963562, "learning_rate": 4.860299982786018e-05, "loss": 0.0944, "step": 10670 }, { "epoch": 30.956521739130434, "grad_norm": 0.3557523190975189, "learning_rate": 4.852036055987901e-05, "loss": 0.0871, "step": 10680 }, { "epoch": 30.985507246376812, "grad_norm": 0.50401771068573, "learning_rate": 4.843772533717558e-05, "loss": 0.0869, "step": 10690 }, { "epoch": 31.014492753623188, "grad_norm": 0.3340211808681488, "learning_rate": 4.835509438567142e-05, "loss": 0.0857, "step": 10700 }, { "epoch": 31.043478260869566, "grad_norm": 0.2681577205657959, "learning_rate": 4.827246793127639e-05, "loss": 0.0765, "step": 10710 }, { "epoch": 31.07246376811594, "grad_norm": 0.5375443696975708, "learning_rate": 4.818984619988807e-05, "loss": 0.0977, "step": 10720 }, { "epoch": 31.10144927536232, "grad_norm": 0.33494704961776733, "learning_rate": 4.810722941739115e-05, "loss": 0.0857, "step": 10730 }, { "epoch": 31.130434782608695, "grad_norm": 0.44509807229042053, "learning_rate": 4.8024617809656684e-05, "loss": 0.0814, "step": 10740 }, { "epoch": 31.159420289855074, "grad_norm": 0.42321598529815674, "learning_rate": 4.794201160254171e-05, "loss": 0.0832, "step": 10750 }, { "epoch": 31.18840579710145, "grad_norm": 0.41145583987236023, "learning_rate": 4.785941102188844e-05, "loss": 0.101, "step": 10760 }, { "epoch": 31.217391304347824, "grad_norm": 0.23340976238250732, "learning_rate": 4.7776816293523686e-05, "loss": 0.0987, "step": 10770 }, { "epoch": 31.246376811594203, "grad_norm": 0.355365514755249, "learning_rate": 4.769422764325832e-05, "loss": 0.1148, "step": 10780 }, { "epoch": 31.27536231884058, "grad_norm": 0.3993210792541504, "learning_rate": 4.76116452968865e-05, "loss": 0.0802, "step": 10790 }, { "epoch": 31.304347826086957, "grad_norm": 0.23266702890396118, "learning_rate": 4.752906948018525e-05, "loss": 0.0755, "step": 10800 }, { "epoch": 31.333333333333332, "grad_norm": 0.2677353024482727, "learning_rate": 4.7446500418913684e-05, "loss": 0.0754, "step": 10810 }, { "epoch": 31.36231884057971, "grad_norm": 0.39404717087745667, "learning_rate": 4.736393833881247e-05, "loss": 0.0813, "step": 10820 }, { "epoch": 31.391304347826086, "grad_norm": 0.39271312952041626, "learning_rate": 4.7281383465603194e-05, "loss": 0.0935, "step": 10830 }, { "epoch": 31.420289855072465, "grad_norm": 0.40351206064224243, "learning_rate": 4.71988360249877e-05, "loss": 0.0677, "step": 10840 }, { "epoch": 31.44927536231884, "grad_norm": 0.40005189180374146, "learning_rate": 4.7116296242647554e-05, "loss": 0.1069, "step": 10850 }, { "epoch": 31.47826086956522, "grad_norm": 0.3219447433948517, "learning_rate": 4.703376434424336e-05, "loss": 0.0806, "step": 10860 }, { "epoch": 31.507246376811594, "grad_norm": 0.4746580123901367, "learning_rate": 4.695124055541421e-05, "loss": 0.0851, "step": 10870 }, { "epoch": 31.536231884057973, "grad_norm": 0.3610043525695801, "learning_rate": 4.6868725101776934e-05, "loss": 0.1042, "step": 10880 }, { "epoch": 31.565217391304348, "grad_norm": 0.3520298898220062, "learning_rate": 4.678621820892567e-05, "loss": 0.0718, "step": 10890 }, { "epoch": 31.594202898550726, "grad_norm": 0.4144718050956726, "learning_rate": 4.670372010243111e-05, "loss": 0.0957, "step": 10900 }, { "epoch": 31.6231884057971, "grad_norm": 0.35748976469039917, "learning_rate": 4.662123100783992e-05, "loss": 0.0911, "step": 10910 }, { "epoch": 31.652173913043477, "grad_norm": 0.21652653813362122, "learning_rate": 4.653875115067415e-05, "loss": 0.0731, "step": 10920 }, { "epoch": 31.681159420289855, "grad_norm": 0.3723653256893158, "learning_rate": 4.6456280756430545e-05, "loss": 0.0888, "step": 10930 }, { "epoch": 31.71014492753623, "grad_norm": 0.4222668409347534, "learning_rate": 4.637382005058004e-05, "loss": 0.1013, "step": 10940 }, { "epoch": 31.73913043478261, "grad_norm": 0.4263753294944763, "learning_rate": 4.629136925856705e-05, "loss": 0.0847, "step": 10950 }, { "epoch": 31.768115942028984, "grad_norm": 0.3686303198337555, "learning_rate": 4.6208928605808895e-05, "loss": 0.0952, "step": 10960 }, { "epoch": 31.797101449275363, "grad_norm": 0.4002050459384918, "learning_rate": 4.612649831769519e-05, "loss": 0.0825, "step": 10970 }, { "epoch": 31.82608695652174, "grad_norm": 0.2441813200712204, "learning_rate": 4.604407861958715e-05, "loss": 0.0955, "step": 10980 }, { "epoch": 31.855072463768117, "grad_norm": 0.30742359161376953, "learning_rate": 4.5961669736817114e-05, "loss": 0.0881, "step": 10990 }, { "epoch": 31.884057971014492, "grad_norm": 0.23788172006607056, "learning_rate": 4.5879271894687814e-05, "loss": 0.0935, "step": 11000 }, { "epoch": 31.91304347826087, "grad_norm": 0.3632306158542633, "learning_rate": 4.5796885318471826e-05, "loss": 0.0848, "step": 11010 }, { "epoch": 31.942028985507246, "grad_norm": 0.39893579483032227, "learning_rate": 4.571451023341086e-05, "loss": 0.1019, "step": 11020 }, { "epoch": 31.971014492753625, "grad_norm": 0.4167952835559845, "learning_rate": 4.563214686471527e-05, "loss": 0.0897, "step": 11030 }, { "epoch": 32.0, "grad_norm": 0.3372804820537567, "learning_rate": 4.5549795437563365e-05, "loss": 0.0789, "step": 11040 }, { "epoch": 32.028985507246375, "grad_norm": 0.3572154939174652, "learning_rate": 4.546745617710081e-05, "loss": 0.0829, "step": 11050 }, { "epoch": 32.05797101449275, "grad_norm": 0.32375410199165344, "learning_rate": 4.5385129308440014e-05, "loss": 0.0787, "step": 11060 }, { "epoch": 32.08695652173913, "grad_norm": 0.3995456099510193, "learning_rate": 4.530281505665944e-05, "loss": 0.1026, "step": 11070 }, { "epoch": 32.11594202898551, "grad_norm": 0.4150542616844177, "learning_rate": 4.5220513646803134e-05, "loss": 0.0941, "step": 11080 }, { "epoch": 32.14492753623188, "grad_norm": 0.32070857286453247, "learning_rate": 4.513822530388003e-05, "loss": 0.0693, "step": 11090 }, { "epoch": 32.17391304347826, "grad_norm": 0.36070406436920166, "learning_rate": 4.5055950252863296e-05, "loss": 0.0719, "step": 11100 }, { "epoch": 32.20289855072464, "grad_norm": 0.4651089906692505, "learning_rate": 4.4973688718689803e-05, "loss": 0.0776, "step": 11110 }, { "epoch": 32.231884057971016, "grad_norm": 0.39821431040763855, "learning_rate": 4.4891440926259406e-05, "loss": 0.0979, "step": 11120 }, { "epoch": 32.26086956521739, "grad_norm": 0.3794202506542206, "learning_rate": 4.480920710043443e-05, "loss": 0.1005, "step": 11130 }, { "epoch": 32.289855072463766, "grad_norm": 0.5193749070167542, "learning_rate": 4.4726987466039044e-05, "loss": 0.0971, "step": 11140 }, { "epoch": 32.31884057971015, "grad_norm": 0.2910986840724945, "learning_rate": 4.46447822478586e-05, "loss": 0.079, "step": 11150 }, { "epoch": 32.34782608695652, "grad_norm": 0.3999570310115814, "learning_rate": 4.4562591670638974e-05, "loss": 0.0967, "step": 11160 }, { "epoch": 32.3768115942029, "grad_norm": 0.33184731006622314, "learning_rate": 4.4480415959086105e-05, "loss": 0.0931, "step": 11170 }, { "epoch": 32.405797101449274, "grad_norm": 0.3531089723110199, "learning_rate": 4.439825533786522e-05, "loss": 0.0847, "step": 11180 }, { "epoch": 32.43478260869565, "grad_norm": 0.45204806327819824, "learning_rate": 4.431611003160035e-05, "loss": 0.0856, "step": 11190 }, { "epoch": 32.46376811594203, "grad_norm": 0.328259140253067, "learning_rate": 4.4233980264873636e-05, "loss": 0.0916, "step": 11200 }, { "epoch": 32.492753623188406, "grad_norm": 0.30385860800743103, "learning_rate": 4.4151866262224684e-05, "loss": 0.0831, "step": 11210 }, { "epoch": 32.52173913043478, "grad_norm": 0.34350085258483887, "learning_rate": 4.406976824815006e-05, "loss": 0.0829, "step": 11220 }, { "epoch": 32.55072463768116, "grad_norm": 0.381274551153183, "learning_rate": 4.3987686447102595e-05, "loss": 0.0889, "step": 11230 }, { "epoch": 32.57971014492754, "grad_norm": 0.4919489920139313, "learning_rate": 4.3905621083490804e-05, "loss": 0.0786, "step": 11240 }, { "epoch": 32.608695652173914, "grad_norm": 0.4313332438468933, "learning_rate": 4.3823572381678286e-05, "loss": 0.0832, "step": 11250 }, { "epoch": 32.63768115942029, "grad_norm": 0.3867364823818207, "learning_rate": 4.374154056598301e-05, "loss": 0.0911, "step": 11260 }, { "epoch": 32.666666666666664, "grad_norm": 0.4290856719017029, "learning_rate": 4.3659525860676845e-05, "loss": 0.0818, "step": 11270 }, { "epoch": 32.69565217391305, "grad_norm": 0.2989586591720581, "learning_rate": 4.3577528489984854e-05, "loss": 0.0816, "step": 11280 }, { "epoch": 32.72463768115942, "grad_norm": 0.3265022039413452, "learning_rate": 4.349554867808476e-05, "loss": 0.077, "step": 11290 }, { "epoch": 32.7536231884058, "grad_norm": 0.5287574529647827, "learning_rate": 4.34135866491062e-05, "loss": 0.0736, "step": 11300 }, { "epoch": 32.78260869565217, "grad_norm": 0.4195975661277771, "learning_rate": 4.333164262713022e-05, "loss": 0.0734, "step": 11310 }, { "epoch": 32.81159420289855, "grad_norm": 0.27101531624794006, "learning_rate": 4.324971683618868e-05, "loss": 0.0776, "step": 11320 }, { "epoch": 32.84057971014493, "grad_norm": 0.28514423966407776, "learning_rate": 4.316780950026354e-05, "loss": 0.0958, "step": 11330 }, { "epoch": 32.869565217391305, "grad_norm": 0.45822855830192566, "learning_rate": 4.308592084328637e-05, "loss": 0.0972, "step": 11340 }, { "epoch": 32.89855072463768, "grad_norm": 0.4056869149208069, "learning_rate": 4.3004051089137576e-05, "loss": 0.0871, "step": 11350 }, { "epoch": 32.927536231884055, "grad_norm": 0.4822801649570465, "learning_rate": 4.292220046164597e-05, "loss": 0.0781, "step": 11360 }, { "epoch": 32.95652173913044, "grad_norm": 0.4903472661972046, "learning_rate": 4.2840369184588035e-05, "loss": 0.1022, "step": 11370 }, { "epoch": 32.98550724637681, "grad_norm": 0.2708165645599365, "learning_rate": 4.2758557481687345e-05, "loss": 0.0724, "step": 11380 }, { "epoch": 33.01449275362319, "grad_norm": 0.46164244413375854, "learning_rate": 4.267676557661403e-05, "loss": 0.063, "step": 11390 }, { "epoch": 33.04347826086956, "grad_norm": 0.3026619553565979, "learning_rate": 4.2594993692983955e-05, "loss": 0.0824, "step": 11400 }, { "epoch": 33.072463768115945, "grad_norm": 0.28057217597961426, "learning_rate": 4.251324205435837e-05, "loss": 0.089, "step": 11410 }, { "epoch": 33.10144927536232, "grad_norm": 0.18814432621002197, "learning_rate": 4.243151088424312e-05, "loss": 0.0838, "step": 11420 }, { "epoch": 33.130434782608695, "grad_norm": 0.3627355098724365, "learning_rate": 4.234980040608813e-05, "loss": 0.0754, "step": 11430 }, { "epoch": 33.15942028985507, "grad_norm": 0.3194730877876282, "learning_rate": 4.22681108432867e-05, "loss": 0.0857, "step": 11440 }, { "epoch": 33.18840579710145, "grad_norm": 0.387783020734787, "learning_rate": 4.2186442419174984e-05, "loss": 0.0851, "step": 11450 }, { "epoch": 33.21739130434783, "grad_norm": 0.34020793437957764, "learning_rate": 4.210479535703133e-05, "loss": 0.0821, "step": 11460 }, { "epoch": 33.2463768115942, "grad_norm": 0.48423564434051514, "learning_rate": 4.202316988007567e-05, "loss": 0.0985, "step": 11470 }, { "epoch": 33.27536231884058, "grad_norm": 0.4145282506942749, "learning_rate": 4.194156621146901e-05, "loss": 0.0704, "step": 11480 }, { "epoch": 33.30434782608695, "grad_norm": 0.602695643901825, "learning_rate": 4.1859984574312596e-05, "loss": 0.0846, "step": 11490 }, { "epoch": 33.333333333333336, "grad_norm": 0.23501792550086975, "learning_rate": 4.177842519164752e-05, "loss": 0.0817, "step": 11500 }, { "epoch": 33.36231884057971, "grad_norm": 0.43396809697151184, "learning_rate": 4.169688828645404e-05, "loss": 0.103, "step": 11510 }, { "epoch": 33.391304347826086, "grad_norm": 0.4772212505340576, "learning_rate": 4.161537408165092e-05, "loss": 0.0721, "step": 11520 }, { "epoch": 33.42028985507246, "grad_norm": 0.3769497573375702, "learning_rate": 4.1533882800094924e-05, "loss": 0.1031, "step": 11530 }, { "epoch": 33.44927536231884, "grad_norm": 0.4361927807331085, "learning_rate": 4.145241466458005e-05, "loss": 0.0785, "step": 11540 }, { "epoch": 33.47826086956522, "grad_norm": 0.47106435894966125, "learning_rate": 4.13709698978371e-05, "loss": 0.0805, "step": 11550 }, { "epoch": 33.507246376811594, "grad_norm": 0.19365593791007996, "learning_rate": 4.1289548722532944e-05, "loss": 0.0749, "step": 11560 }, { "epoch": 33.53623188405797, "grad_norm": 0.47546547651290894, "learning_rate": 4.120815136126999e-05, "loss": 0.0852, "step": 11570 }, { "epoch": 33.56521739130435, "grad_norm": 0.41180577874183655, "learning_rate": 4.112677803658548e-05, "loss": 0.0806, "step": 11580 }, { "epoch": 33.594202898550726, "grad_norm": 0.2787127196788788, "learning_rate": 4.1045428970951e-05, "loss": 0.0899, "step": 11590 }, { "epoch": 33.6231884057971, "grad_norm": 0.3546220660209656, "learning_rate": 4.0964104386771785e-05, "loss": 0.0813, "step": 11600 }, { "epoch": 33.65217391304348, "grad_norm": 0.4572994112968445, "learning_rate": 4.0882804506386144e-05, "loss": 0.09, "step": 11610 }, { "epoch": 33.68115942028985, "grad_norm": 0.33741870522499084, "learning_rate": 4.080152955206485e-05, "loss": 0.074, "step": 11620 }, { "epoch": 33.710144927536234, "grad_norm": 0.24237462878227234, "learning_rate": 4.0720279746010505e-05, "loss": 0.0767, "step": 11630 }, { "epoch": 33.73913043478261, "grad_norm": 0.37967872619628906, "learning_rate": 4.063905531035699e-05, "loss": 0.0715, "step": 11640 }, { "epoch": 33.768115942028984, "grad_norm": 0.25618433952331543, "learning_rate": 4.055785646716882e-05, "loss": 0.0743, "step": 11650 }, { "epoch": 33.79710144927536, "grad_norm": 0.3028956949710846, "learning_rate": 4.047668343844051e-05, "loss": 0.0948, "step": 11660 }, { "epoch": 33.82608695652174, "grad_norm": 0.28945979475975037, "learning_rate": 4.039553644609604e-05, "loss": 0.0783, "step": 11670 }, { "epoch": 33.85507246376812, "grad_norm": 0.4274953603744507, "learning_rate": 4.0314415711988176e-05, "loss": 0.0846, "step": 11680 }, { "epoch": 33.88405797101449, "grad_norm": 0.4359511137008667, "learning_rate": 4.023332145789792e-05, "loss": 0.0772, "step": 11690 }, { "epoch": 33.91304347826087, "grad_norm": 0.2297302633523941, "learning_rate": 4.015225390553385e-05, "loss": 0.0663, "step": 11700 }, { "epoch": 33.94202898550725, "grad_norm": 0.4641404449939728, "learning_rate": 4.007121327653158e-05, "loss": 0.0822, "step": 11710 }, { "epoch": 33.971014492753625, "grad_norm": 0.3523867726325989, "learning_rate": 3.9990199792453064e-05, "loss": 0.0897, "step": 11720 }, { "epoch": 34.0, "grad_norm": 0.6376750469207764, "learning_rate": 3.9909213674786103e-05, "loss": 0.082, "step": 11730 }, { "epoch": 34.028985507246375, "grad_norm": 0.4435945749282837, "learning_rate": 3.982825514494363e-05, "loss": 0.0849, "step": 11740 }, { "epoch": 34.05797101449275, "grad_norm": 0.3722585439682007, "learning_rate": 3.974732442426319e-05, "loss": 0.0991, "step": 11750 }, { "epoch": 34.08695652173913, "grad_norm": 0.4315265715122223, "learning_rate": 3.966642173400629e-05, "loss": 0.0878, "step": 11760 }, { "epoch": 34.11594202898551, "grad_norm": 0.45117440819740295, "learning_rate": 3.9585547295357764e-05, "loss": 0.0948, "step": 11770 }, { "epoch": 34.14492753623188, "grad_norm": 0.4286547005176544, "learning_rate": 3.950470132942526e-05, "loss": 0.082, "step": 11780 }, { "epoch": 34.17391304347826, "grad_norm": 0.34111738204956055, "learning_rate": 3.942388405723856e-05, "loss": 0.073, "step": 11790 }, { "epoch": 34.20289855072464, "grad_norm": 0.24257983267307281, "learning_rate": 3.9343095699749e-05, "loss": 0.0707, "step": 11800 }, { "epoch": 34.231884057971016, "grad_norm": 0.40664252638816833, "learning_rate": 3.9262336477828874e-05, "loss": 0.1052, "step": 11810 }, { "epoch": 34.26086956521739, "grad_norm": 0.2790059745311737, "learning_rate": 3.9181606612270794e-05, "loss": 0.0863, "step": 11820 }, { "epoch": 34.289855072463766, "grad_norm": 0.3338426351547241, "learning_rate": 3.910090632378713e-05, "loss": 0.1013, "step": 11830 }, { "epoch": 34.31884057971015, "grad_norm": 0.3759063184261322, "learning_rate": 3.90202358330094e-05, "loss": 0.089, "step": 11840 }, { "epoch": 34.34782608695652, "grad_norm": 0.3479987680912018, "learning_rate": 3.8939595360487656e-05, "loss": 0.0699, "step": 11850 }, { "epoch": 34.3768115942029, "grad_norm": 0.42943084239959717, "learning_rate": 3.885898512668984e-05, "loss": 0.1114, "step": 11860 }, { "epoch": 34.405797101449274, "grad_norm": 0.2323223501443863, "learning_rate": 3.877840535200127e-05, "loss": 0.0878, "step": 11870 }, { "epoch": 34.43478260869565, "grad_norm": 0.5184713006019592, "learning_rate": 3.869785625672397e-05, "loss": 0.0974, "step": 11880 }, { "epoch": 34.46376811594203, "grad_norm": 0.267502099275589, "learning_rate": 3.8617338061076094e-05, "loss": 0.0563, "step": 11890 }, { "epoch": 34.492753623188406, "grad_norm": 0.42632079124450684, "learning_rate": 3.853685098519132e-05, "loss": 0.0739, "step": 11900 }, { "epoch": 34.52173913043478, "grad_norm": 0.30418580770492554, "learning_rate": 3.845639524911823e-05, "loss": 0.0976, "step": 11910 }, { "epoch": 34.55072463768116, "grad_norm": 0.38783854246139526, "learning_rate": 3.837597107281974e-05, "loss": 0.0738, "step": 11920 }, { "epoch": 34.57971014492754, "grad_norm": 0.19843190908432007, "learning_rate": 3.829557867617247e-05, "loss": 0.0796, "step": 11930 }, { "epoch": 34.608695652173914, "grad_norm": 0.3146209120750427, "learning_rate": 3.821521827896618e-05, "loss": 0.0826, "step": 11940 }, { "epoch": 34.63768115942029, "grad_norm": 0.42972853779792786, "learning_rate": 3.81348901009031e-05, "loss": 0.0984, "step": 11950 }, { "epoch": 34.666666666666664, "grad_norm": 0.28957119584083557, "learning_rate": 3.805459436159741e-05, "loss": 0.0714, "step": 11960 }, { "epoch": 34.69565217391305, "grad_norm": 0.3170105814933777, "learning_rate": 3.797433128057461e-05, "loss": 0.0817, "step": 11970 }, { "epoch": 34.72463768115942, "grad_norm": 0.443141907453537, "learning_rate": 3.789410107727089e-05, "loss": 0.0931, "step": 11980 }, { "epoch": 34.7536231884058, "grad_norm": 0.4638511538505554, "learning_rate": 3.781390397103257e-05, "loss": 0.0917, "step": 11990 }, { "epoch": 34.78260869565217, "grad_norm": 0.5074764490127563, "learning_rate": 3.7733740181115455e-05, "loss": 0.0919, "step": 12000 }, { "epoch": 34.81159420289855, "grad_norm": 0.32013916969299316, "learning_rate": 3.7653609926684306e-05, "loss": 0.0784, "step": 12010 }, { "epoch": 34.84057971014493, "grad_norm": 0.29025906324386597, "learning_rate": 3.757351342681217e-05, "loss": 0.0751, "step": 12020 }, { "epoch": 34.869565217391305, "grad_norm": 0.33754485845565796, "learning_rate": 3.749345090047982e-05, "loss": 0.082, "step": 12030 }, { "epoch": 34.89855072463768, "grad_norm": 0.2577219307422638, "learning_rate": 3.741342256657515e-05, "loss": 0.083, "step": 12040 }, { "epoch": 34.927536231884055, "grad_norm": 0.4835989773273468, "learning_rate": 3.7333428643892567e-05, "loss": 0.096, "step": 12050 }, { "epoch": 34.95652173913044, "grad_norm": 0.5097367763519287, "learning_rate": 3.725346935113239e-05, "loss": 0.0939, "step": 12060 }, { "epoch": 34.98550724637681, "grad_norm": 0.47239720821380615, "learning_rate": 3.717354490690029e-05, "loss": 0.0732, "step": 12070 }, { "epoch": 35.01449275362319, "grad_norm": 0.36919161677360535, "learning_rate": 3.709365552970664e-05, "loss": 0.0824, "step": 12080 }, { "epoch": 35.04347826086956, "grad_norm": 0.3409859836101532, "learning_rate": 3.7013801437965945e-05, "loss": 0.0803, "step": 12090 }, { "epoch": 35.072463768115945, "grad_norm": 0.3615312874317169, "learning_rate": 3.693398284999623e-05, "loss": 0.0951, "step": 12100 }, { "epoch": 35.10144927536232, "grad_norm": 0.5234674215316772, "learning_rate": 3.6854199984018484e-05, "loss": 0.0834, "step": 12110 }, { "epoch": 35.130434782608695, "grad_norm": 0.2838694155216217, "learning_rate": 3.677445305815601e-05, "loss": 0.091, "step": 12120 }, { "epoch": 35.15942028985507, "grad_norm": 0.5254635810852051, "learning_rate": 3.669474229043387e-05, "loss": 0.0929, "step": 12130 }, { "epoch": 35.18840579710145, "grad_norm": 0.32632967829704285, "learning_rate": 3.6615067898778235e-05, "loss": 0.0873, "step": 12140 }, { "epoch": 35.21739130434783, "grad_norm": 0.3260731101036072, "learning_rate": 3.6535430101015866e-05, "loss": 0.054, "step": 12150 }, { "epoch": 35.2463768115942, "grad_norm": 0.3727055788040161, "learning_rate": 3.645582911487345e-05, "loss": 0.0738, "step": 12160 }, { "epoch": 35.27536231884058, "grad_norm": 0.27279332280158997, "learning_rate": 3.637626515797706e-05, "loss": 0.0718, "step": 12170 }, { "epoch": 35.30434782608695, "grad_norm": 0.4319758415222168, "learning_rate": 3.629673844785152e-05, "loss": 0.0754, "step": 12180 }, { "epoch": 35.333333333333336, "grad_norm": 0.49372681975364685, "learning_rate": 3.621724920191979e-05, "loss": 0.0778, "step": 12190 }, { "epoch": 35.36231884057971, "grad_norm": 0.27620404958724976, "learning_rate": 3.6137797637502444e-05, "loss": 0.0776, "step": 12200 }, { "epoch": 35.391304347826086, "grad_norm": 0.4745093286037445, "learning_rate": 3.6058383971817035e-05, "loss": 0.091, "step": 12210 }, { "epoch": 35.42028985507246, "grad_norm": 0.49664023518562317, "learning_rate": 3.59790084219775e-05, "loss": 0.0892, "step": 12220 }, { "epoch": 35.44927536231884, "grad_norm": 0.30979496240615845, "learning_rate": 3.589967120499353e-05, "loss": 0.074, "step": 12230 }, { "epoch": 35.47826086956522, "grad_norm": 0.460953950881958, "learning_rate": 3.5820372537770075e-05, "loss": 0.08, "step": 12240 }, { "epoch": 35.507246376811594, "grad_norm": 0.31548449397087097, "learning_rate": 3.5741112637106655e-05, "loss": 0.0892, "step": 12250 }, { "epoch": 35.53623188405797, "grad_norm": 0.38868752121925354, "learning_rate": 3.5661891719696804e-05, "loss": 0.0803, "step": 12260 }, { "epoch": 35.56521739130435, "grad_norm": 0.39552441239356995, "learning_rate": 3.5582710002127504e-05, "loss": 0.0709, "step": 12270 }, { "epoch": 35.594202898550726, "grad_norm": 0.3134962022304535, "learning_rate": 3.550356770087853e-05, "loss": 0.0835, "step": 12280 }, { "epoch": 35.6231884057971, "grad_norm": 0.42194268107414246, "learning_rate": 3.5424465032321914e-05, "loss": 0.076, "step": 12290 }, { "epoch": 35.65217391304348, "grad_norm": 0.44927000999450684, "learning_rate": 3.5345402212721335e-05, "loss": 0.1047, "step": 12300 }, { "epoch": 35.68115942028985, "grad_norm": 0.4046900272369385, "learning_rate": 3.526637945823152e-05, "loss": 0.0871, "step": 12310 }, { "epoch": 35.710144927536234, "grad_norm": 0.34118810296058655, "learning_rate": 3.518739698489767e-05, "loss": 0.076, "step": 12320 }, { "epoch": 35.73913043478261, "grad_norm": 0.1889665573835373, "learning_rate": 3.510845500865485e-05, "loss": 0.078, "step": 12330 }, { "epoch": 35.768115942028984, "grad_norm": 0.25734132528305054, "learning_rate": 3.502955374532739e-05, "loss": 0.0808, "step": 12340 }, { "epoch": 35.79710144927536, "grad_norm": 0.4329688549041748, "learning_rate": 3.495069341062836e-05, "loss": 0.0949, "step": 12350 }, { "epoch": 35.82608695652174, "grad_norm": 0.4507119655609131, "learning_rate": 3.4871874220158896e-05, "loss": 0.0868, "step": 12360 }, { "epoch": 35.85507246376812, "grad_norm": 0.42284590005874634, "learning_rate": 3.479309638940762e-05, "loss": 0.0928, "step": 12370 }, { "epoch": 35.88405797101449, "grad_norm": 0.31752341985702515, "learning_rate": 3.4714360133750146e-05, "loss": 0.0824, "step": 12380 }, { "epoch": 35.91304347826087, "grad_norm": 0.31320276856422424, "learning_rate": 3.463566566844839e-05, "loss": 0.0768, "step": 12390 }, { "epoch": 35.94202898550725, "grad_norm": 0.46019718050956726, "learning_rate": 3.4557013208650016e-05, "loss": 0.0783, "step": 12400 }, { "epoch": 35.971014492753625, "grad_norm": 0.3470844626426697, "learning_rate": 3.4478402969387857e-05, "loss": 0.0874, "step": 12410 }, { "epoch": 36.0, "grad_norm": 1.3857176303863525, "learning_rate": 3.4399835165579266e-05, "loss": 0.0754, "step": 12420 }, { "epoch": 36.028985507246375, "grad_norm": 0.3289060592651367, "learning_rate": 3.4321310012025645e-05, "loss": 0.0942, "step": 12430 }, { "epoch": 36.05797101449275, "grad_norm": 0.2967238426208496, "learning_rate": 3.424282772341176e-05, "loss": 0.076, "step": 12440 }, { "epoch": 36.08695652173913, "grad_norm": 0.3292827308177948, "learning_rate": 3.416438851430519e-05, "loss": 0.0995, "step": 12450 }, { "epoch": 36.11594202898551, "grad_norm": 0.3444810211658478, "learning_rate": 3.408599259915577e-05, "loss": 0.0739, "step": 12460 }, { "epoch": 36.14492753623188, "grad_norm": 0.40988513827323914, "learning_rate": 3.400764019229487e-05, "loss": 0.0793, "step": 12470 }, { "epoch": 36.17391304347826, "grad_norm": 0.37536290287971497, "learning_rate": 3.3929331507935035e-05, "loss": 0.0983, "step": 12480 }, { "epoch": 36.20289855072464, "grad_norm": 0.45117539167404175, "learning_rate": 3.3851066760169196e-05, "loss": 0.0981, "step": 12490 }, { "epoch": 36.231884057971016, "grad_norm": 0.4401688575744629, "learning_rate": 3.377284616297021e-05, "loss": 0.0702, "step": 12500 }, { "epoch": 36.26086956521739, "grad_norm": 0.24332067370414734, "learning_rate": 3.3694669930190166e-05, "loss": 0.0741, "step": 12510 }, { "epoch": 36.289855072463766, "grad_norm": 0.38454926013946533, "learning_rate": 3.36165382755599e-05, "loss": 0.0926, "step": 12520 }, { "epoch": 36.31884057971015, "grad_norm": 0.35665246844291687, "learning_rate": 3.35384514126884e-05, "loss": 0.0686, "step": 12530 }, { "epoch": 36.34782608695652, "grad_norm": 0.4824955463409424, "learning_rate": 3.3460409555062154e-05, "loss": 0.084, "step": 12540 }, { "epoch": 36.3768115942029, "grad_norm": 0.4470244348049164, "learning_rate": 3.3382412916044645e-05, "loss": 0.1034, "step": 12550 }, { "epoch": 36.405797101449274, "grad_norm": 0.3308650553226471, "learning_rate": 3.330446170887566e-05, "loss": 0.0708, "step": 12560 }, { "epoch": 36.43478260869565, "grad_norm": 0.2681847810745239, "learning_rate": 3.3226556146670834e-05, "loss": 0.0748, "step": 12570 }, { "epoch": 36.46376811594203, "grad_norm": 0.4676291048526764, "learning_rate": 3.314869644242102e-05, "loss": 0.0849, "step": 12580 }, { "epoch": 36.492753623188406, "grad_norm": 0.468152791261673, "learning_rate": 3.3070882808991674e-05, "loss": 0.0726, "step": 12590 }, { "epoch": 36.52173913043478, "grad_norm": 0.423662930727005, "learning_rate": 3.2993115459122305e-05, "loss": 0.0832, "step": 12600 }, { "epoch": 36.55072463768116, "grad_norm": 0.4952705502510071, "learning_rate": 3.2915394605425835e-05, "loss": 0.086, "step": 12610 }, { "epoch": 36.57971014492754, "grad_norm": 0.3361116945743561, "learning_rate": 3.283772046038816e-05, "loss": 0.0686, "step": 12620 }, { "epoch": 36.608695652173914, "grad_norm": 0.35378262400627136, "learning_rate": 3.276009323636739e-05, "loss": 0.0956, "step": 12630 }, { "epoch": 36.63768115942029, "grad_norm": 0.26826876401901245, "learning_rate": 3.268251314559344e-05, "loss": 0.0725, "step": 12640 }, { "epoch": 36.666666666666664, "grad_norm": 0.4471190571784973, "learning_rate": 3.2604980400167254e-05, "loss": 0.0886, "step": 12650 }, { "epoch": 36.69565217391305, "grad_norm": 0.26007452607154846, "learning_rate": 3.252749521206042e-05, "loss": 0.0736, "step": 12660 }, { "epoch": 36.72463768115942, "grad_norm": 0.3644675016403198, "learning_rate": 3.2450057793114494e-05, "loss": 0.0859, "step": 12670 }, { "epoch": 36.7536231884058, "grad_norm": 0.3555355966091156, "learning_rate": 3.2372668355040435e-05, "loss": 0.0952, "step": 12680 }, { "epoch": 36.78260869565217, "grad_norm": 0.3508759140968323, "learning_rate": 3.2295327109418005e-05, "loss": 0.0761, "step": 12690 }, { "epoch": 36.81159420289855, "grad_norm": 0.3372611999511719, "learning_rate": 3.221803426769518e-05, "loss": 0.1055, "step": 12700 }, { "epoch": 36.84057971014493, "grad_norm": 0.45002785325050354, "learning_rate": 3.214079004118768e-05, "loss": 0.0677, "step": 12710 }, { "epoch": 36.869565217391305, "grad_norm": 0.5220909118652344, "learning_rate": 3.2063594641078234e-05, "loss": 0.0679, "step": 12720 }, { "epoch": 36.89855072463768, "grad_norm": 0.33023321628570557, "learning_rate": 3.198644827841616e-05, "loss": 0.0854, "step": 12730 }, { "epoch": 36.927536231884055, "grad_norm": 0.37969428300857544, "learning_rate": 3.1909351164116654e-05, "loss": 0.0975, "step": 12740 }, { "epoch": 36.95652173913044, "grad_norm": 0.39646878838539124, "learning_rate": 3.183230350896026e-05, "loss": 0.0651, "step": 12750 }, { "epoch": 36.98550724637681, "grad_norm": 0.42903590202331543, "learning_rate": 3.1755305523592337e-05, "loss": 0.0964, "step": 12760 }, { "epoch": 37.01449275362319, "grad_norm": 0.3350338339805603, "learning_rate": 3.167835741852245e-05, "loss": 0.0747, "step": 12770 }, { "epoch": 37.04347826086956, "grad_norm": 0.5324596762657166, "learning_rate": 3.160145940412378e-05, "loss": 0.0865, "step": 12780 }, { "epoch": 37.072463768115945, "grad_norm": 0.5436109900474548, "learning_rate": 3.1524611690632545e-05, "loss": 0.0853, "step": 12790 }, { "epoch": 37.10144927536232, "grad_norm": 0.4058521091938019, "learning_rate": 3.144781448814746e-05, "loss": 0.0611, "step": 12800 }, { "epoch": 37.130434782608695, "grad_norm": 0.222909078001976, "learning_rate": 3.1371068006629145e-05, "loss": 0.0849, "step": 12810 }, { "epoch": 37.15942028985507, "grad_norm": 0.3150401711463928, "learning_rate": 3.129437245589956e-05, "loss": 0.0661, "step": 12820 }, { "epoch": 37.18840579710145, "grad_norm": 0.5720604062080383, "learning_rate": 3.121772804564143e-05, "loss": 0.1058, "step": 12830 }, { "epoch": 37.21739130434783, "grad_norm": 0.36148929595947266, "learning_rate": 3.11411349853976e-05, "loss": 0.0647, "step": 12840 }, { "epoch": 37.2463768115942, "grad_norm": 0.4873165190219879, "learning_rate": 3.10645934845706e-05, "loss": 0.0919, "step": 12850 }, { "epoch": 37.27536231884058, "grad_norm": 0.6560083627700806, "learning_rate": 3.098810375242196e-05, "loss": 0.0857, "step": 12860 }, { "epoch": 37.30434782608695, "grad_norm": 0.37037011981010437, "learning_rate": 3.0911665998071704e-05, "loss": 0.084, "step": 12870 }, { "epoch": 37.333333333333336, "grad_norm": 0.2736794650554657, "learning_rate": 3.083528043049774e-05, "loss": 0.0629, "step": 12880 }, { "epoch": 37.36231884057971, "grad_norm": 0.39787065982818604, "learning_rate": 3.0758947258535255e-05, "loss": 0.0937, "step": 12890 }, { "epoch": 37.391304347826086, "grad_norm": 0.2980014979839325, "learning_rate": 3.068266669087625e-05, "loss": 0.0747, "step": 12900 }, { "epoch": 37.42028985507246, "grad_norm": 0.38902172446250916, "learning_rate": 3.060643893606887e-05, "loss": 0.0922, "step": 12910 }, { "epoch": 37.44927536231884, "grad_norm": 0.412036269903183, "learning_rate": 3.053026420251693e-05, "loss": 0.0877, "step": 12920 }, { "epoch": 37.47826086956522, "grad_norm": 0.36954089999198914, "learning_rate": 3.0454142698479183e-05, "loss": 0.1029, "step": 12930 }, { "epoch": 37.507246376811594, "grad_norm": 0.521973192691803, "learning_rate": 3.0378074632068954e-05, "loss": 0.0682, "step": 12940 }, { "epoch": 37.53623188405797, "grad_norm": 0.2521456182003021, "learning_rate": 3.0302060211253408e-05, "loss": 0.07, "step": 12950 }, { "epoch": 37.56521739130435, "grad_norm": 0.4917527139186859, "learning_rate": 3.0226099643853073e-05, "loss": 0.0878, "step": 12960 }, { "epoch": 37.594202898550726, "grad_norm": 0.339530348777771, "learning_rate": 3.0150193137541283e-05, "loss": 0.069, "step": 12970 }, { "epoch": 37.6231884057971, "grad_norm": 0.3518831133842468, "learning_rate": 3.0074340899843467e-05, "loss": 0.0816, "step": 12980 }, { "epoch": 37.65217391304348, "grad_norm": 0.4143315553665161, "learning_rate": 2.999854313813677e-05, "loss": 0.0988, "step": 12990 }, { "epoch": 37.68115942028985, "grad_norm": 0.31359317898750305, "learning_rate": 2.9922800059649382e-05, "loss": 0.0671, "step": 13000 }, { "epoch": 37.710144927536234, "grad_norm": 0.49539920687675476, "learning_rate": 2.9847111871459976e-05, "loss": 0.0752, "step": 13010 }, { "epoch": 37.73913043478261, "grad_norm": 0.42408648133277893, "learning_rate": 2.977147878049721e-05, "loss": 0.0762, "step": 13020 }, { "epoch": 37.768115942028984, "grad_norm": 0.5186890959739685, "learning_rate": 2.9695900993539006e-05, "loss": 0.0895, "step": 13030 }, { "epoch": 37.79710144927536, "grad_norm": 0.46351712942123413, "learning_rate": 2.9620378717212183e-05, "loss": 0.1007, "step": 13040 }, { "epoch": 37.82608695652174, "grad_norm": 0.6148757934570312, "learning_rate": 2.9544912157991745e-05, "loss": 0.0661, "step": 13050 }, { "epoch": 37.85507246376812, "grad_norm": 0.43662676215171814, "learning_rate": 2.9469501522200405e-05, "loss": 0.0761, "step": 13060 }, { "epoch": 37.88405797101449, "grad_norm": 0.4326452910900116, "learning_rate": 2.9394147016007946e-05, "loss": 0.0965, "step": 13070 }, { "epoch": 37.91304347826087, "grad_norm": 0.5132485032081604, "learning_rate": 2.9318848845430702e-05, "loss": 0.0817, "step": 13080 }, { "epoch": 37.94202898550725, "grad_norm": 0.4048340618610382, "learning_rate": 2.9243607216331013e-05, "loss": 0.0867, "step": 13090 }, { "epoch": 37.971014492753625, "grad_norm": 0.5179027915000916, "learning_rate": 2.916842233441661e-05, "loss": 0.0914, "step": 13100 }, { "epoch": 38.0, "grad_norm": 0.6405589580535889, "learning_rate": 2.90932944052401e-05, "loss": 0.0758, "step": 13110 }, { "epoch": 38.028985507246375, "grad_norm": 0.3282417356967926, "learning_rate": 2.9018223634198354e-05, "loss": 0.0814, "step": 13120 }, { "epoch": 38.05797101449275, "grad_norm": 0.25214284658432007, "learning_rate": 2.8943210226532025e-05, "loss": 0.0662, "step": 13130 }, { "epoch": 38.08695652173913, "grad_norm": 0.6154152750968933, "learning_rate": 2.8868254387324857e-05, "loss": 0.0793, "step": 13140 }, { "epoch": 38.11594202898551, "grad_norm": 0.4001002907752991, "learning_rate": 2.8793356321503306e-05, "loss": 0.0851, "step": 13150 }, { "epoch": 38.14492753623188, "grad_norm": 0.2872644066810608, "learning_rate": 2.87185162338358e-05, "loss": 0.0664, "step": 13160 }, { "epoch": 38.17391304347826, "grad_norm": 0.385065495967865, "learning_rate": 2.8643734328932253e-05, "loss": 0.077, "step": 13170 }, { "epoch": 38.20289855072464, "grad_norm": 0.32745644450187683, "learning_rate": 2.856901081124359e-05, "loss": 0.0762, "step": 13180 }, { "epoch": 38.231884057971016, "grad_norm": 0.3578251004219055, "learning_rate": 2.8494345885061002e-05, "loss": 0.0873, "step": 13190 }, { "epoch": 38.26086956521739, "grad_norm": 0.4024776816368103, "learning_rate": 2.8419739754515616e-05, "loss": 0.0674, "step": 13200 }, { "epoch": 38.289855072463766, "grad_norm": 0.23126451671123505, "learning_rate": 2.8345192623577666e-05, "loss": 0.096, "step": 13210 }, { "epoch": 38.31884057971015, "grad_norm": 0.44609886407852173, "learning_rate": 2.8270704696056193e-05, "loss": 0.0924, "step": 13220 }, { "epoch": 38.34782608695652, "grad_norm": 0.28004297614097595, "learning_rate": 2.8196276175598367e-05, "loss": 0.0824, "step": 13230 }, { "epoch": 38.3768115942029, "grad_norm": 0.4256015419960022, "learning_rate": 2.8121907265688884e-05, "loss": 0.0793, "step": 13240 }, { "epoch": 38.405797101449274, "grad_norm": 0.28294479846954346, "learning_rate": 2.804759816964957e-05, "loss": 0.0757, "step": 13250 }, { "epoch": 38.43478260869565, "grad_norm": 0.36253151297569275, "learning_rate": 2.797334909063857e-05, "loss": 0.0638, "step": 13260 }, { "epoch": 38.46376811594203, "grad_norm": 0.3807222247123718, "learning_rate": 2.7899160231650056e-05, "loss": 0.0824, "step": 13270 }, { "epoch": 38.492753623188406, "grad_norm": 0.2997818887233734, "learning_rate": 2.7825031795513585e-05, "loss": 0.084, "step": 13280 }, { "epoch": 38.52173913043478, "grad_norm": 0.24102069437503815, "learning_rate": 2.775096398489341e-05, "loss": 0.0893, "step": 13290 }, { "epoch": 38.55072463768116, "grad_norm": 0.258094847202301, "learning_rate": 2.7676957002288163e-05, "loss": 0.0814, "step": 13300 }, { "epoch": 38.57971014492754, "grad_norm": 0.4139418303966522, "learning_rate": 2.760301105003003e-05, "loss": 0.0803, "step": 13310 }, { "epoch": 38.608695652173914, "grad_norm": 0.31138837337493896, "learning_rate": 2.752912633028446e-05, "loss": 0.0783, "step": 13320 }, { "epoch": 38.63768115942029, "grad_norm": 0.4925903379917145, "learning_rate": 2.7455303045049474e-05, "loss": 0.0839, "step": 13330 }, { "epoch": 38.666666666666664, "grad_norm": 0.3583664894104004, "learning_rate": 2.7381541396155098e-05, "loss": 0.071, "step": 13340 }, { "epoch": 38.69565217391305, "grad_norm": 0.28774356842041016, "learning_rate": 2.730784158526286e-05, "loss": 0.0875, "step": 13350 }, { "epoch": 38.72463768115942, "grad_norm": 0.43696558475494385, "learning_rate": 2.723420381386521e-05, "loss": 0.0782, "step": 13360 }, { "epoch": 38.7536231884058, "grad_norm": 0.3710800111293793, "learning_rate": 2.7160628283285018e-05, "loss": 0.0719, "step": 13370 }, { "epoch": 38.78260869565217, "grad_norm": 0.3696930408477783, "learning_rate": 2.7087115194675007e-05, "loss": 0.0656, "step": 13380 }, { "epoch": 38.81159420289855, "grad_norm": 0.3197194039821625, "learning_rate": 2.701366474901712e-05, "loss": 0.0755, "step": 13390 }, { "epoch": 38.84057971014493, "grad_norm": 0.3476333022117615, "learning_rate": 2.6940277147122085e-05, "loss": 0.0834, "step": 13400 }, { "epoch": 38.869565217391305, "grad_norm": 0.3637937307357788, "learning_rate": 2.686695258962878e-05, "loss": 0.0745, "step": 13410 }, { "epoch": 38.89855072463768, "grad_norm": 0.5231657028198242, "learning_rate": 2.679369127700375e-05, "loss": 0.0807, "step": 13420 }, { "epoch": 38.927536231884055, "grad_norm": 0.35336682200431824, "learning_rate": 2.672049340954067e-05, "loss": 0.072, "step": 13430 }, { "epoch": 38.95652173913044, "grad_norm": 0.5302248597145081, "learning_rate": 2.6647359187359676e-05, "loss": 0.0931, "step": 13440 }, { "epoch": 38.98550724637681, "grad_norm": 0.4057472348213196, "learning_rate": 2.6574288810406946e-05, "loss": 0.0808, "step": 13450 }, { "epoch": 39.01449275362319, "grad_norm": 0.40481290221214294, "learning_rate": 2.6501282478454083e-05, "loss": 0.0742, "step": 13460 }, { "epoch": 39.04347826086956, "grad_norm": 0.5995214581489563, "learning_rate": 2.6428340391097618e-05, "loss": 0.0842, "step": 13470 }, { "epoch": 39.072463768115945, "grad_norm": 0.46385887265205383, "learning_rate": 2.6355462747758485e-05, "loss": 0.0764, "step": 13480 }, { "epoch": 39.10144927536232, "grad_norm": 0.21818841993808746, "learning_rate": 2.6282649747681304e-05, "loss": 0.0689, "step": 13490 }, { "epoch": 39.130434782608695, "grad_norm": 0.24269723892211914, "learning_rate": 2.620990158993406e-05, "loss": 0.0674, "step": 13500 }, { "epoch": 39.15942028985507, "grad_norm": 0.18235942721366882, "learning_rate": 2.6137218473407477e-05, "loss": 0.0781, "step": 13510 }, { "epoch": 39.18840579710145, "grad_norm": 0.30598685145378113, "learning_rate": 2.606460059681436e-05, "loss": 0.0881, "step": 13520 }, { "epoch": 39.21739130434783, "grad_norm": 0.3079904317855835, "learning_rate": 2.599204815868928e-05, "loss": 0.0796, "step": 13530 }, { "epoch": 39.2463768115942, "grad_norm": 0.6565821170806885, "learning_rate": 2.5919561357387756e-05, "loss": 0.0723, "step": 13540 }, { "epoch": 39.27536231884058, "grad_norm": 0.30793699622154236, "learning_rate": 2.5847140391085972e-05, "loss": 0.0741, "step": 13550 }, { "epoch": 39.30434782608695, "grad_norm": 0.40903565287590027, "learning_rate": 2.5774785457780103e-05, "loss": 0.0895, "step": 13560 }, { "epoch": 39.333333333333336, "grad_norm": 0.31521743535995483, "learning_rate": 2.5702496755285753e-05, "loss": 0.0635, "step": 13570 }, { "epoch": 39.36231884057971, "grad_norm": 0.49470698833465576, "learning_rate": 2.5630274481237483e-05, "loss": 0.0812, "step": 13580 }, { "epoch": 39.391304347826086, "grad_norm": 0.49831944704055786, "learning_rate": 2.5558118833088197e-05, "loss": 0.0764, "step": 13590 }, { "epoch": 39.42028985507246, "grad_norm": 0.3357720971107483, "learning_rate": 2.548603000810872e-05, "loss": 0.0789, "step": 13600 }, { "epoch": 39.44927536231884, "grad_norm": 0.49669551849365234, "learning_rate": 2.5414008203387152e-05, "loss": 0.0775, "step": 13610 }, { "epoch": 39.47826086956522, "grad_norm": 0.45243167877197266, "learning_rate": 2.534205361582834e-05, "loss": 0.0931, "step": 13620 }, { "epoch": 39.507246376811594, "grad_norm": 0.4529440701007843, "learning_rate": 2.527016644215338e-05, "loss": 0.0857, "step": 13630 }, { "epoch": 39.53623188405797, "grad_norm": 0.3923579156398773, "learning_rate": 2.519834687889905e-05, "loss": 0.0907, "step": 13640 }, { "epoch": 39.56521739130435, "grad_norm": 0.46026331186294556, "learning_rate": 2.5126595122417295e-05, "loss": 0.0653, "step": 13650 }, { "epoch": 39.594202898550726, "grad_norm": 0.3783218264579773, "learning_rate": 2.5054911368874713e-05, "loss": 0.0894, "step": 13660 }, { "epoch": 39.6231884057971, "grad_norm": 0.2474319487810135, "learning_rate": 2.4983295814251916e-05, "loss": 0.0855, "step": 13670 }, { "epoch": 39.65217391304348, "grad_norm": 0.2657444179058075, "learning_rate": 2.4911748654343105e-05, "loss": 0.0811, "step": 13680 }, { "epoch": 39.68115942028985, "grad_norm": 0.3964589238166809, "learning_rate": 2.4840270084755463e-05, "loss": 0.0719, "step": 13690 }, { "epoch": 39.710144927536234, "grad_norm": 0.4461621046066284, "learning_rate": 2.4768860300908685e-05, "loss": 0.069, "step": 13700 }, { "epoch": 39.73913043478261, "grad_norm": 0.32302120327949524, "learning_rate": 2.469751949803443e-05, "loss": 0.0827, "step": 13710 }, { "epoch": 39.768115942028984, "grad_norm": 0.29357752203941345, "learning_rate": 2.4626247871175666e-05, "loss": 0.0605, "step": 13720 }, { "epoch": 39.79710144927536, "grad_norm": 0.5546101331710815, "learning_rate": 2.4555045615186346e-05, "loss": 0.0806, "step": 13730 }, { "epoch": 39.82608695652174, "grad_norm": 0.4854411482810974, "learning_rate": 2.4483912924730677e-05, "loss": 0.0825, "step": 13740 }, { "epoch": 39.85507246376812, "grad_norm": 0.3219527304172516, "learning_rate": 2.4412849994282742e-05, "loss": 0.072, "step": 13750 }, { "epoch": 39.88405797101449, "grad_norm": 0.3878593146800995, "learning_rate": 2.434185701812592e-05, "loss": 0.0763, "step": 13760 }, { "epoch": 39.91304347826087, "grad_norm": 0.43568170070648193, "learning_rate": 2.4270934190352218e-05, "loss": 0.0837, "step": 13770 }, { "epoch": 39.94202898550725, "grad_norm": 0.3280969560146332, "learning_rate": 2.4200081704861998e-05, "loss": 0.0852, "step": 13780 }, { "epoch": 39.971014492753625, "grad_norm": 0.4428047239780426, "learning_rate": 2.412929975536321e-05, "loss": 0.0778, "step": 13790 }, { "epoch": 40.0, "grad_norm": 1.2345435619354248, "learning_rate": 2.4058588535371017e-05, "loss": 0.0667, "step": 13800 }, { "epoch": 40.028985507246375, "grad_norm": 0.3271735906600952, "learning_rate": 2.3987948238207243e-05, "loss": 0.0644, "step": 13810 }, { "epoch": 40.05797101449275, "grad_norm": 0.37233906984329224, "learning_rate": 2.3917379056999678e-05, "loss": 0.0809, "step": 13820 }, { "epoch": 40.08695652173913, "grad_norm": 0.34821170568466187, "learning_rate": 2.3846881184681824e-05, "loss": 0.0712, "step": 13830 }, { "epoch": 40.11594202898551, "grad_norm": 0.47559064626693726, "learning_rate": 2.377645481399214e-05, "loss": 0.071, "step": 13840 }, { "epoch": 40.14492753623188, "grad_norm": 0.40675798058509827, "learning_rate": 2.3706100137473667e-05, "loss": 0.0876, "step": 13850 }, { "epoch": 40.17391304347826, "grad_norm": 0.33387240767478943, "learning_rate": 2.3635817347473394e-05, "loss": 0.0763, "step": 13860 }, { "epoch": 40.20289855072464, "grad_norm": 0.38296619057655334, "learning_rate": 2.3565606636141757e-05, "loss": 0.0861, "step": 13870 }, { "epoch": 40.231884057971016, "grad_norm": 0.35619163513183594, "learning_rate": 2.3495468195432203e-05, "loss": 0.0875, "step": 13880 }, { "epoch": 40.26086956521739, "grad_norm": 0.38837236166000366, "learning_rate": 2.3425402217100507e-05, "loss": 0.076, "step": 13890 }, { "epoch": 40.289855072463766, "grad_norm": 0.5374419093132019, "learning_rate": 2.3355408892704424e-05, "loss": 0.0872, "step": 13900 }, { "epoch": 40.31884057971015, "grad_norm": 0.399505078792572, "learning_rate": 2.3285488413603003e-05, "loss": 0.0688, "step": 13910 }, { "epoch": 40.34782608695652, "grad_norm": 0.41612479090690613, "learning_rate": 2.321564097095615e-05, "loss": 0.0845, "step": 13920 }, { "epoch": 40.3768115942029, "grad_norm": 0.5337821841239929, "learning_rate": 2.3145866755724142e-05, "loss": 0.0841, "step": 13930 }, { "epoch": 40.405797101449274, "grad_norm": 0.4810619652271271, "learning_rate": 2.307616595866699e-05, "loss": 0.0757, "step": 13940 }, { "epoch": 40.43478260869565, "grad_norm": 0.7072311043739319, "learning_rate": 2.3006538770344032e-05, "loss": 0.0872, "step": 13950 }, { "epoch": 40.46376811594203, "grad_norm": 0.4162898659706116, "learning_rate": 2.293698538111334e-05, "loss": 0.0834, "step": 13960 }, { "epoch": 40.492753623188406, "grad_norm": 0.40901780128479004, "learning_rate": 2.28675059811312e-05, "loss": 0.0647, "step": 13970 }, { "epoch": 40.52173913043478, "grad_norm": 0.32501867413520813, "learning_rate": 2.279810076035167e-05, "loss": 0.0776, "step": 13980 }, { "epoch": 40.55072463768116, "grad_norm": 0.32248783111572266, "learning_rate": 2.272876990852596e-05, "loss": 0.0749, "step": 13990 }, { "epoch": 40.57971014492754, "grad_norm": 0.36385807394981384, "learning_rate": 2.265951361520195e-05, "loss": 0.0853, "step": 14000 }, { "epoch": 40.608695652173914, "grad_norm": 0.3925250470638275, "learning_rate": 2.2590332069723748e-05, "loss": 0.0877, "step": 14010 }, { "epoch": 40.63768115942029, "grad_norm": 0.3202069401741028, "learning_rate": 2.2521225461231004e-05, "loss": 0.0622, "step": 14020 }, { "epoch": 40.666666666666664, "grad_norm": 0.335860013961792, "learning_rate": 2.2452193978658597e-05, "loss": 0.0798, "step": 14030 }, { "epoch": 40.69565217391305, "grad_norm": 0.2558061480522156, "learning_rate": 2.238323781073594e-05, "loss": 0.0728, "step": 14040 }, { "epoch": 40.72463768115942, "grad_norm": 0.551850438117981, "learning_rate": 2.2314357145986552e-05, "loss": 0.0729, "step": 14050 }, { "epoch": 40.7536231884058, "grad_norm": 0.36312124133110046, "learning_rate": 2.224555217272757e-05, "loss": 0.0964, "step": 14060 }, { "epoch": 40.78260869565217, "grad_norm": 0.2239280343055725, "learning_rate": 2.2176823079069127e-05, "loss": 0.079, "step": 14070 }, { "epoch": 40.81159420289855, "grad_norm": 0.35991764068603516, "learning_rate": 2.210817005291398e-05, "loss": 0.085, "step": 14080 }, { "epoch": 40.84057971014493, "grad_norm": 0.46211162209510803, "learning_rate": 2.203959328195686e-05, "loss": 0.0719, "step": 14090 }, { "epoch": 40.869565217391305, "grad_norm": 0.4440068006515503, "learning_rate": 2.1971092953684026e-05, "loss": 0.0933, "step": 14100 }, { "epoch": 40.89855072463768, "grad_norm": 0.5218793749809265, "learning_rate": 2.1902669255372788e-05, "loss": 0.0777, "step": 14110 }, { "epoch": 40.927536231884055, "grad_norm": 0.23008324205875397, "learning_rate": 2.1834322374090897e-05, "loss": 0.0849, "step": 14120 }, { "epoch": 40.95652173913044, "grad_norm": 0.2903602719306946, "learning_rate": 2.1766052496696153e-05, "loss": 0.0825, "step": 14130 }, { "epoch": 40.98550724637681, "grad_norm": 0.22217999398708344, "learning_rate": 2.169785980983577e-05, "loss": 0.0672, "step": 14140 }, { "epoch": 41.01449275362319, "grad_norm": 0.2826724350452423, "learning_rate": 2.162974449994593e-05, "loss": 0.0776, "step": 14150 }, { "epoch": 41.04347826086956, "grad_norm": 0.4253155589103699, "learning_rate": 2.1561706753251337e-05, "loss": 0.0682, "step": 14160 }, { "epoch": 41.072463768115945, "grad_norm": 0.5486535429954529, "learning_rate": 2.1493746755764544e-05, "loss": 0.0831, "step": 14170 }, { "epoch": 41.10144927536232, "grad_norm": 0.31310802698135376, "learning_rate": 2.1425864693285635e-05, "loss": 0.0705, "step": 14180 }, { "epoch": 41.130434782608695, "grad_norm": 0.4277971088886261, "learning_rate": 2.1358060751401547e-05, "loss": 0.0734, "step": 14190 }, { "epoch": 41.15942028985507, "grad_norm": 0.25638988614082336, "learning_rate": 2.129033511548566e-05, "loss": 0.0608, "step": 14200 }, { "epoch": 41.18840579710145, "grad_norm": 0.3612518608570099, "learning_rate": 2.1222687970697315e-05, "loss": 0.0726, "step": 14210 }, { "epoch": 41.21739130434783, "grad_norm": 0.438911110162735, "learning_rate": 2.1155119501981173e-05, "loss": 0.0758, "step": 14220 }, { "epoch": 41.2463768115942, "grad_norm": 0.45236918330192566, "learning_rate": 2.1087629894066895e-05, "loss": 0.0752, "step": 14230 }, { "epoch": 41.27536231884058, "grad_norm": 0.3085053861141205, "learning_rate": 2.1020219331468473e-05, "loss": 0.07, "step": 14240 }, { "epoch": 41.30434782608695, "grad_norm": 0.4090859889984131, "learning_rate": 2.095288799848379e-05, "loss": 0.073, "step": 14250 }, { "epoch": 41.333333333333336, "grad_norm": 0.25771570205688477, "learning_rate": 2.088563607919417e-05, "loss": 0.0696, "step": 14260 }, { "epoch": 41.36231884057971, "grad_norm": 0.3390887975692749, "learning_rate": 2.0818463757463786e-05, "loss": 0.0845, "step": 14270 }, { "epoch": 41.391304347826086, "grad_norm": 0.41618505120277405, "learning_rate": 2.0751371216939175e-05, "loss": 0.0863, "step": 14280 }, { "epoch": 41.42028985507246, "grad_norm": 0.3998986780643463, "learning_rate": 2.068435864104882e-05, "loss": 0.0709, "step": 14290 }, { "epoch": 41.44927536231884, "grad_norm": 0.6030775308609009, "learning_rate": 2.0617426213002506e-05, "loss": 0.0828, "step": 14300 }, { "epoch": 41.47826086956522, "grad_norm": 0.35719752311706543, "learning_rate": 2.055057411579097e-05, "loss": 0.08, "step": 14310 }, { "epoch": 41.507246376811594, "grad_norm": 0.3750839829444885, "learning_rate": 2.0483802532185286e-05, "loss": 0.062, "step": 14320 }, { "epoch": 41.53623188405797, "grad_norm": 0.303724467754364, "learning_rate": 2.041711164473638e-05, "loss": 0.0762, "step": 14330 }, { "epoch": 41.56521739130435, "grad_norm": 0.3491968810558319, "learning_rate": 2.0350501635774637e-05, "loss": 0.0681, "step": 14340 }, { "epoch": 41.594202898550726, "grad_norm": 0.31283631920814514, "learning_rate": 2.0283972687409247e-05, "loss": 0.099, "step": 14350 }, { "epoch": 41.6231884057971, "grad_norm": 0.3491584360599518, "learning_rate": 2.021752498152784e-05, "loss": 0.0799, "step": 14360 }, { "epoch": 41.65217391304348, "grad_norm": 0.40594348311424255, "learning_rate": 2.015115869979589e-05, "loss": 0.0673, "step": 14370 }, { "epoch": 41.68115942028985, "grad_norm": 0.2607230842113495, "learning_rate": 2.0084874023656265e-05, "loss": 0.0678, "step": 14380 }, { "epoch": 41.710144927536234, "grad_norm": 0.4588332176208496, "learning_rate": 2.001867113432877e-05, "loss": 0.0739, "step": 14390 }, { "epoch": 41.73913043478261, "grad_norm": 0.47973960638046265, "learning_rate": 1.995255021280954e-05, "loss": 0.0799, "step": 14400 }, { "epoch": 41.768115942028984, "grad_norm": 0.39496302604675293, "learning_rate": 1.9886511439870688e-05, "loss": 0.0755, "step": 14410 }, { "epoch": 41.79710144927536, "grad_norm": 0.47075384855270386, "learning_rate": 1.9820554996059675e-05, "loss": 0.0843, "step": 14420 }, { "epoch": 41.82608695652174, "grad_norm": 0.3595154583454132, "learning_rate": 1.9754681061698893e-05, "loss": 0.0847, "step": 14430 }, { "epoch": 41.85507246376812, "grad_norm": 0.4555226266384125, "learning_rate": 1.9688889816885185e-05, "loss": 0.0703, "step": 14440 }, { "epoch": 41.88405797101449, "grad_norm": 0.4868208169937134, "learning_rate": 1.962318144148928e-05, "loss": 0.0852, "step": 14450 }, { "epoch": 41.91304347826087, "grad_norm": 0.3341791331768036, "learning_rate": 1.955755611515539e-05, "loss": 0.0665, "step": 14460 }, { "epoch": 41.94202898550725, "grad_norm": 0.4766240119934082, "learning_rate": 1.9492014017300642e-05, "loss": 0.0717, "step": 14470 }, { "epoch": 41.971014492753625, "grad_norm": 0.4072614908218384, "learning_rate": 1.942655532711461e-05, "loss": 0.0689, "step": 14480 }, { "epoch": 42.0, "grad_norm": 0.5508348345756531, "learning_rate": 1.9361180223558882e-05, "loss": 0.0654, "step": 14490 }, { "epoch": 42.028985507246375, "grad_norm": 0.3589998483657837, "learning_rate": 1.929588888536647e-05, "loss": 0.0736, "step": 14500 }, { "epoch": 42.05797101449275, "grad_norm": 0.33736053109169006, "learning_rate": 1.9230681491041425e-05, "loss": 0.0682, "step": 14510 }, { "epoch": 42.08695652173913, "grad_norm": 0.4881956875324249, "learning_rate": 1.9165558218858264e-05, "loss": 0.0817, "step": 14520 }, { "epoch": 42.11594202898551, "grad_norm": 0.5997191071510315, "learning_rate": 1.9100519246861505e-05, "loss": 0.0722, "step": 14530 }, { "epoch": 42.14492753623188, "grad_norm": 0.4747546911239624, "learning_rate": 1.9035564752865248e-05, "loss": 0.0624, "step": 14540 }, { "epoch": 42.17391304347826, "grad_norm": 0.391609251499176, "learning_rate": 1.897069491445258e-05, "loss": 0.088, "step": 14550 }, { "epoch": 42.20289855072464, "grad_norm": 0.5286002159118652, "learning_rate": 1.890590990897515e-05, "loss": 0.0921, "step": 14560 }, { "epoch": 42.231884057971016, "grad_norm": 0.3322617709636688, "learning_rate": 1.884120991355272e-05, "loss": 0.0753, "step": 14570 }, { "epoch": 42.26086956521739, "grad_norm": 0.366778165102005, "learning_rate": 1.8776595105072576e-05, "loss": 0.071, "step": 14580 }, { "epoch": 42.289855072463766, "grad_norm": 0.5647521018981934, "learning_rate": 1.8712065660189166e-05, "loss": 0.0901, "step": 14590 }, { "epoch": 42.31884057971015, "grad_norm": 0.44216540455818176, "learning_rate": 1.8647621755323513e-05, "loss": 0.0754, "step": 14600 }, { "epoch": 42.34782608695652, "grad_norm": 0.41718125343322754, "learning_rate": 1.858326356666278e-05, "loss": 0.0798, "step": 14610 }, { "epoch": 42.3768115942029, "grad_norm": 0.3692278563976288, "learning_rate": 1.851899127015983e-05, "loss": 0.0687, "step": 14620 }, { "epoch": 42.405797101449274, "grad_norm": 0.5888849496841431, "learning_rate": 1.8454805041532626e-05, "loss": 0.0605, "step": 14630 }, { "epoch": 42.43478260869565, "grad_norm": 0.366144061088562, "learning_rate": 1.8390705056263906e-05, "loss": 0.0665, "step": 14640 }, { "epoch": 42.46376811594203, "grad_norm": 0.4007920026779175, "learning_rate": 1.832669148960057e-05, "loss": 0.0707, "step": 14650 }, { "epoch": 42.492753623188406, "grad_norm": 0.36319825053215027, "learning_rate": 1.8262764516553233e-05, "loss": 0.0604, "step": 14660 }, { "epoch": 42.52173913043478, "grad_norm": 0.5968917012214661, "learning_rate": 1.8198924311895843e-05, "loss": 0.0792, "step": 14670 }, { "epoch": 42.55072463768116, "grad_norm": 0.3557155132293701, "learning_rate": 1.813517105016505e-05, "loss": 0.0587, "step": 14680 }, { "epoch": 42.57971014492754, "grad_norm": 0.3647300899028778, "learning_rate": 1.8071504905659888e-05, "loss": 0.0678, "step": 14690 }, { "epoch": 42.608695652173914, "grad_norm": 0.5016182065010071, "learning_rate": 1.800792605244109e-05, "loss": 0.0726, "step": 14700 }, { "epoch": 42.63768115942029, "grad_norm": 0.39856255054473877, "learning_rate": 1.7944434664330844e-05, "loss": 0.0852, "step": 14710 }, { "epoch": 42.666666666666664, "grad_norm": 0.3633764684200287, "learning_rate": 1.7881030914912212e-05, "loss": 0.08, "step": 14720 }, { "epoch": 42.69565217391305, "grad_norm": 0.36024579405784607, "learning_rate": 1.7817714977528577e-05, "loss": 0.0686, "step": 14730 }, { "epoch": 42.72463768115942, "grad_norm": 0.40388357639312744, "learning_rate": 1.7754487025283332e-05, "loss": 0.0657, "step": 14740 }, { "epoch": 42.7536231884058, "grad_norm": 0.5098476409912109, "learning_rate": 1.7691347231039275e-05, "loss": 0.0651, "step": 14750 }, { "epoch": 42.78260869565217, "grad_norm": 0.4363411068916321, "learning_rate": 1.7628295767418164e-05, "loss": 0.0966, "step": 14760 }, { "epoch": 42.81159420289855, "grad_norm": 0.48385173082351685, "learning_rate": 1.7565332806800333e-05, "loss": 0.0751, "step": 14770 }, { "epoch": 42.84057971014493, "grad_norm": 0.4358624815940857, "learning_rate": 1.750245852132408e-05, "loss": 0.087, "step": 14780 }, { "epoch": 42.869565217391305, "grad_norm": 0.4145340621471405, "learning_rate": 1.7439673082885323e-05, "loss": 0.0738, "step": 14790 }, { "epoch": 42.89855072463768, "grad_norm": 0.4053754508495331, "learning_rate": 1.7376976663137047e-05, "loss": 0.0895, "step": 14800 }, { "epoch": 42.927536231884055, "grad_norm": 0.2905048131942749, "learning_rate": 1.7314369433488853e-05, "loss": 0.0622, "step": 14810 }, { "epoch": 42.95652173913044, "grad_norm": 0.5020401477813721, "learning_rate": 1.7251851565106548e-05, "loss": 0.0642, "step": 14820 }, { "epoch": 42.98550724637681, "grad_norm": 0.4154917597770691, "learning_rate": 1.7189423228911574e-05, "loss": 0.0807, "step": 14830 }, { "epoch": 43.01449275362319, "grad_norm": 0.5019571781158447, "learning_rate": 1.7127084595580606e-05, "loss": 0.0779, "step": 14840 }, { "epoch": 43.04347826086956, "grad_norm": 0.3335070312023163, "learning_rate": 1.706483583554513e-05, "loss": 0.0811, "step": 14850 }, { "epoch": 43.072463768115945, "grad_norm": 0.3166472911834717, "learning_rate": 1.700267711899083e-05, "loss": 0.0729, "step": 14860 }, { "epoch": 43.10144927536232, "grad_norm": 0.45485633611679077, "learning_rate": 1.69406086158573e-05, "loss": 0.0674, "step": 14870 }, { "epoch": 43.130434782608695, "grad_norm": 0.27782437205314636, "learning_rate": 1.6878630495837455e-05, "loss": 0.0833, "step": 14880 }, { "epoch": 43.15942028985507, "grad_norm": 0.24997830390930176, "learning_rate": 1.681674292837707e-05, "loss": 0.0649, "step": 14890 }, { "epoch": 43.18840579710145, "grad_norm": 0.291838675737381, "learning_rate": 1.6754946082674444e-05, "loss": 0.0664, "step": 14900 }, { "epoch": 43.21739130434783, "grad_norm": 0.3121786117553711, "learning_rate": 1.6693240127679748e-05, "loss": 0.0733, "step": 14910 }, { "epoch": 43.2463768115942, "grad_norm": 0.38471075892448425, "learning_rate": 1.663162523209475e-05, "loss": 0.0821, "step": 14920 }, { "epoch": 43.27536231884058, "grad_norm": 0.5700430274009705, "learning_rate": 1.6570101564372193e-05, "loss": 0.0669, "step": 14930 }, { "epoch": 43.30434782608695, "grad_norm": 0.5257859230041504, "learning_rate": 1.650866929271543e-05, "loss": 0.0602, "step": 14940 }, { "epoch": 43.333333333333336, "grad_norm": 0.4088708162307739, "learning_rate": 1.644732858507797e-05, "loss": 0.0871, "step": 14950 }, { "epoch": 43.36231884057971, "grad_norm": 0.5116233825683594, "learning_rate": 1.6386079609162943e-05, "loss": 0.0598, "step": 14960 }, { "epoch": 43.391304347826086, "grad_norm": 0.2616664469242096, "learning_rate": 1.6324922532422742e-05, "loss": 0.0606, "step": 14970 }, { "epoch": 43.42028985507246, "grad_norm": 0.5427923798561096, "learning_rate": 1.6263857522058434e-05, "loss": 0.0937, "step": 14980 }, { "epoch": 43.44927536231884, "grad_norm": 0.3789597153663635, "learning_rate": 1.6202884745019443e-05, "loss": 0.0851, "step": 14990 }, { "epoch": 43.47826086956522, "grad_norm": 0.46611571311950684, "learning_rate": 1.614200436800304e-05, "loss": 0.0783, "step": 15000 }, { "epoch": 43.507246376811594, "grad_norm": 0.37547364830970764, "learning_rate": 1.6081216557453814e-05, "loss": 0.0833, "step": 15010 }, { "epoch": 43.53623188405797, "grad_norm": 0.3774726688861847, "learning_rate": 1.6020521479563367e-05, "loss": 0.0767, "step": 15020 }, { "epoch": 43.56521739130435, "grad_norm": 0.44292446970939636, "learning_rate": 1.5959919300269654e-05, "loss": 0.0728, "step": 15030 }, { "epoch": 43.594202898550726, "grad_norm": 0.5792534351348877, "learning_rate": 1.5899410185256764e-05, "loss": 0.0593, "step": 15040 }, { "epoch": 43.6231884057971, "grad_norm": 0.2785523235797882, "learning_rate": 1.583899429995431e-05, "loss": 0.0612, "step": 15050 }, { "epoch": 43.65217391304348, "grad_norm": 0.29454028606414795, "learning_rate": 1.5778671809536993e-05, "loss": 0.0751, "step": 15060 }, { "epoch": 43.68115942028985, "grad_norm": 0.2879396378993988, "learning_rate": 1.5718442878924246e-05, "loss": 0.0883, "step": 15070 }, { "epoch": 43.710144927536234, "grad_norm": 1.1070629358291626, "learning_rate": 1.5658307672779593e-05, "loss": 0.093, "step": 15080 }, { "epoch": 43.73913043478261, "grad_norm": 0.29835617542266846, "learning_rate": 1.5598266355510427e-05, "loss": 0.0657, "step": 15090 }, { "epoch": 43.768115942028984, "grad_norm": 0.4190385937690735, "learning_rate": 1.553831909126744e-05, "loss": 0.0742, "step": 15100 }, { "epoch": 43.79710144927536, "grad_norm": 0.34586817026138306, "learning_rate": 1.5478466043944135e-05, "loss": 0.0715, "step": 15110 }, { "epoch": 43.82608695652174, "grad_norm": 0.37232398986816406, "learning_rate": 1.5418707377176468e-05, "loss": 0.0695, "step": 15120 }, { "epoch": 43.85507246376812, "grad_norm": 0.42787492275238037, "learning_rate": 1.535904325434233e-05, "loss": 0.0959, "step": 15130 }, { "epoch": 43.88405797101449, "grad_norm": 0.8969880938529968, "learning_rate": 1.529947383856118e-05, "loss": 0.0693, "step": 15140 }, { "epoch": 43.91304347826087, "grad_norm": 0.38823625445365906, "learning_rate": 1.5239999292693524e-05, "loss": 0.0825, "step": 15150 }, { "epoch": 43.94202898550725, "grad_norm": 0.2747124135494232, "learning_rate": 1.5180619779340505e-05, "loss": 0.0809, "step": 15160 }, { "epoch": 43.971014492753625, "grad_norm": 0.420537531375885, "learning_rate": 1.5121335460843428e-05, "loss": 0.0634, "step": 15170 }, { "epoch": 44.0, "grad_norm": 0.784938395023346, "learning_rate": 1.5062146499283347e-05, "loss": 0.073, "step": 15180 }, { "epoch": 44.028985507246375, "grad_norm": 0.47105634212493896, "learning_rate": 1.5003053056480643e-05, "loss": 0.0736, "step": 15190 }, { "epoch": 44.05797101449275, "grad_norm": 0.2737712860107422, "learning_rate": 1.4944055293994551e-05, "loss": 0.0674, "step": 15200 }, { "epoch": 44.08695652173913, "grad_norm": 0.6026032567024231, "learning_rate": 1.4885153373122656e-05, "loss": 0.0922, "step": 15210 }, { "epoch": 44.11594202898551, "grad_norm": 0.3727162182331085, "learning_rate": 1.482634745490059e-05, "loss": 0.0644, "step": 15220 }, { "epoch": 44.14492753623188, "grad_norm": 0.47362762689590454, "learning_rate": 1.4767637700101466e-05, "loss": 0.066, "step": 15230 }, { "epoch": 44.17391304347826, "grad_norm": 0.35355237126350403, "learning_rate": 1.4709024269235528e-05, "loss": 0.0617, "step": 15240 }, { "epoch": 44.20289855072464, "grad_norm": 0.3178042471408844, "learning_rate": 1.4650507322549684e-05, "loss": 0.1073, "step": 15250 }, { "epoch": 44.231884057971016, "grad_norm": 0.5713096857070923, "learning_rate": 1.4592087020026972e-05, "loss": 0.0697, "step": 15260 }, { "epoch": 44.26086956521739, "grad_norm": 0.39644819498062134, "learning_rate": 1.4533763521386318e-05, "loss": 0.0787, "step": 15270 }, { "epoch": 44.289855072463766, "grad_norm": 0.3511520326137543, "learning_rate": 1.44755369860819e-05, "loss": 0.0637, "step": 15280 }, { "epoch": 44.31884057971015, "grad_norm": 0.5535669326782227, "learning_rate": 1.441740757330287e-05, "loss": 0.0936, "step": 15290 }, { "epoch": 44.34782608695652, "grad_norm": 0.5639561414718628, "learning_rate": 1.4359375441972844e-05, "loss": 0.0809, "step": 15300 }, { "epoch": 44.3768115942029, "grad_norm": 0.3432080149650574, "learning_rate": 1.4301440750749395e-05, "loss": 0.0813, "step": 15310 }, { "epoch": 44.405797101449274, "grad_norm": 0.3394940495491028, "learning_rate": 1.4243603658023808e-05, "loss": 0.0816, "step": 15320 }, { "epoch": 44.43478260869565, "grad_norm": 0.3588254451751709, "learning_rate": 1.4185864321920444e-05, "loss": 0.0711, "step": 15330 }, { "epoch": 44.46376811594203, "grad_norm": 0.3964613676071167, "learning_rate": 1.4128222900296485e-05, "loss": 0.0795, "step": 15340 }, { "epoch": 44.492753623188406, "grad_norm": 0.38622230291366577, "learning_rate": 1.407067955074135e-05, "loss": 0.0716, "step": 15350 }, { "epoch": 44.52173913043478, "grad_norm": 0.28652891516685486, "learning_rate": 1.4013234430576356e-05, "loss": 0.067, "step": 15360 }, { "epoch": 44.55072463768116, "grad_norm": 0.3979763388633728, "learning_rate": 1.3955887696854286e-05, "loss": 0.0761, "step": 15370 }, { "epoch": 44.57971014492754, "grad_norm": 0.4278284013271332, "learning_rate": 1.38986395063589e-05, "loss": 0.073, "step": 15380 }, { "epoch": 44.608695652173914, "grad_norm": 0.40081092715263367, "learning_rate": 1.3841490015604597e-05, "loss": 0.0859, "step": 15390 }, { "epoch": 44.63768115942029, "grad_norm": 0.45146530866622925, "learning_rate": 1.3784439380835879e-05, "loss": 0.0809, "step": 15400 }, { "epoch": 44.666666666666664, "grad_norm": 0.3806000351905823, "learning_rate": 1.3727487758026986e-05, "loss": 0.0725, "step": 15410 }, { "epoch": 44.69565217391305, "grad_norm": 0.5500205755233765, "learning_rate": 1.3670635302881525e-05, "loss": 0.0737, "step": 15420 }, { "epoch": 44.72463768115942, "grad_norm": 0.2973146438598633, "learning_rate": 1.3613882170831888e-05, "loss": 0.0739, "step": 15430 }, { "epoch": 44.7536231884058, "grad_norm": 0.4235207736492157, "learning_rate": 1.355722851703901e-05, "loss": 0.0837, "step": 15440 }, { "epoch": 44.78260869565217, "grad_norm": 0.3844519853591919, "learning_rate": 1.3500674496391814e-05, "loss": 0.0669, "step": 15450 }, { "epoch": 44.81159420289855, "grad_norm": 0.3494715988636017, "learning_rate": 1.3444220263506795e-05, "loss": 0.0587, "step": 15460 }, { "epoch": 44.84057971014493, "grad_norm": 0.5101982355117798, "learning_rate": 1.3387865972727714e-05, "loss": 0.0871, "step": 15470 }, { "epoch": 44.869565217391305, "grad_norm": 0.3597027361392975, "learning_rate": 1.3331611778125036e-05, "loss": 0.0728, "step": 15480 }, { "epoch": 44.89855072463768, "grad_norm": 0.5626224279403687, "learning_rate": 1.3275457833495564e-05, "loss": 0.0804, "step": 15490 }, { "epoch": 44.927536231884055, "grad_norm": 0.3257477581501007, "learning_rate": 1.3219404292362065e-05, "loss": 0.0632, "step": 15500 }, { "epoch": 44.95652173913044, "grad_norm": 0.4441049098968506, "learning_rate": 1.3163451307972751e-05, "loss": 0.0695, "step": 15510 }, { "epoch": 44.98550724637681, "grad_norm": 0.3859218657016754, "learning_rate": 1.3107599033300977e-05, "loss": 0.0671, "step": 15520 }, { "epoch": 45.01449275362319, "grad_norm": 0.4354454278945923, "learning_rate": 1.305184762104471e-05, "loss": 0.0645, "step": 15530 }, { "epoch": 45.04347826086956, "grad_norm": 0.2836010158061981, "learning_rate": 1.2996197223626178e-05, "loss": 0.0645, "step": 15540 }, { "epoch": 45.072463768115945, "grad_norm": 0.3890087306499481, "learning_rate": 1.2940647993191457e-05, "loss": 0.0631, "step": 15550 }, { "epoch": 45.10144927536232, "grad_norm": 0.3880113959312439, "learning_rate": 1.2885200081610005e-05, "loss": 0.0642, "step": 15560 }, { "epoch": 45.130434782608695, "grad_norm": 0.47158360481262207, "learning_rate": 1.2829853640474316e-05, "loss": 0.076, "step": 15570 }, { "epoch": 45.15942028985507, "grad_norm": 0.4908730089664459, "learning_rate": 1.2774608821099438e-05, "loss": 0.082, "step": 15580 }, { "epoch": 45.18840579710145, "grad_norm": 0.44620388746261597, "learning_rate": 1.2719465774522577e-05, "loss": 0.0805, "step": 15590 }, { "epoch": 45.21739130434783, "grad_norm": 0.39248040318489075, "learning_rate": 1.2664424651502755e-05, "loss": 0.0798, "step": 15600 }, { "epoch": 45.2463768115942, "grad_norm": 0.4180006980895996, "learning_rate": 1.260948560252026e-05, "loss": 0.0856, "step": 15610 }, { "epoch": 45.27536231884058, "grad_norm": 0.44177964329719543, "learning_rate": 1.2554648777776396e-05, "loss": 0.0949, "step": 15620 }, { "epoch": 45.30434782608695, "grad_norm": 0.33813127875328064, "learning_rate": 1.2499914327192919e-05, "loss": 0.0781, "step": 15630 }, { "epoch": 45.333333333333336, "grad_norm": 0.3105308711528778, "learning_rate": 1.2445282400411722e-05, "loss": 0.07, "step": 15640 }, { "epoch": 45.36231884057971, "grad_norm": 0.2976597249507904, "learning_rate": 1.2390753146794437e-05, "loss": 0.0752, "step": 15650 }, { "epoch": 45.391304347826086, "grad_norm": 0.4583851993083954, "learning_rate": 1.2336326715421925e-05, "loss": 0.0767, "step": 15660 }, { "epoch": 45.42028985507246, "grad_norm": 0.41092222929000854, "learning_rate": 1.2282003255094005e-05, "loss": 0.0728, "step": 15670 }, { "epoch": 45.44927536231884, "grad_norm": 0.3140925467014313, "learning_rate": 1.2227782914328928e-05, "loss": 0.069, "step": 15680 }, { "epoch": 45.47826086956522, "grad_norm": 0.355333149433136, "learning_rate": 1.2173665841363018e-05, "loss": 0.0711, "step": 15690 }, { "epoch": 45.507246376811594, "grad_norm": 0.3979286253452301, "learning_rate": 1.211965218415032e-05, "loss": 0.0755, "step": 15700 }, { "epoch": 45.53623188405797, "grad_norm": 0.27833595871925354, "learning_rate": 1.2065742090362082e-05, "loss": 0.0804, "step": 15710 }, { "epoch": 45.56521739130435, "grad_norm": 0.3665226101875305, "learning_rate": 1.2011935707386457e-05, "loss": 0.0959, "step": 15720 }, { "epoch": 45.594202898550726, "grad_norm": 0.3983865976333618, "learning_rate": 1.1958233182328044e-05, "loss": 0.0809, "step": 15730 }, { "epoch": 45.6231884057971, "grad_norm": 0.3656999468803406, "learning_rate": 1.1904634662007474e-05, "loss": 0.0675, "step": 15740 }, { "epoch": 45.65217391304348, "grad_norm": 0.47683125734329224, "learning_rate": 1.1851140292961088e-05, "loss": 0.0754, "step": 15750 }, { "epoch": 45.68115942028985, "grad_norm": 0.5368967652320862, "learning_rate": 1.1797750221440424e-05, "loss": 0.0786, "step": 15760 }, { "epoch": 45.710144927536234, "grad_norm": 0.37085482478141785, "learning_rate": 1.1744464593411897e-05, "loss": 0.0735, "step": 15770 }, { "epoch": 45.73913043478261, "grad_norm": 0.3648932874202728, "learning_rate": 1.1691283554556399e-05, "loss": 0.0688, "step": 15780 }, { "epoch": 45.768115942028984, "grad_norm": 0.25463685393333435, "learning_rate": 1.1638207250268834e-05, "loss": 0.0658, "step": 15790 }, { "epoch": 45.79710144927536, "grad_norm": 0.2738022804260254, "learning_rate": 1.158523582565782e-05, "loss": 0.0851, "step": 15800 }, { "epoch": 45.82608695652174, "grad_norm": 0.43908044695854187, "learning_rate": 1.1532369425545192e-05, "loss": 0.079, "step": 15810 }, { "epoch": 45.85507246376812, "grad_norm": 0.424430251121521, "learning_rate": 1.1479608194465662e-05, "loss": 0.0783, "step": 15820 }, { "epoch": 45.88405797101449, "grad_norm": 0.4064854681491852, "learning_rate": 1.1426952276666442e-05, "loss": 0.0687, "step": 15830 }, { "epoch": 45.91304347826087, "grad_norm": 0.5900323987007141, "learning_rate": 1.1374401816106778e-05, "loss": 0.0811, "step": 15840 }, { "epoch": 45.94202898550725, "grad_norm": 0.3530072867870331, "learning_rate": 1.1321956956457646e-05, "loss": 0.0707, "step": 15850 }, { "epoch": 45.971014492753625, "grad_norm": 0.4914955794811249, "learning_rate": 1.1269617841101277e-05, "loss": 0.0663, "step": 15860 }, { "epoch": 46.0, "grad_norm": 0.6903124451637268, "learning_rate": 1.1217384613130804e-05, "loss": 0.0757, "step": 15870 }, { "epoch": 46.028985507246375, "grad_norm": 0.35140737891197205, "learning_rate": 1.11652574153499e-05, "loss": 0.0689, "step": 15880 }, { "epoch": 46.05797101449275, "grad_norm": 0.45175376534461975, "learning_rate": 1.1113236390272303e-05, "loss": 0.0698, "step": 15890 }, { "epoch": 46.08695652173913, "grad_norm": 0.5367652773857117, "learning_rate": 1.106132168012155e-05, "loss": 0.0757, "step": 15900 }, { "epoch": 46.11594202898551, "grad_norm": 0.47009265422821045, "learning_rate": 1.1009513426830448e-05, "loss": 0.0658, "step": 15910 }, { "epoch": 46.14492753623188, "grad_norm": 0.26874783635139465, "learning_rate": 1.0957811772040777e-05, "loss": 0.0735, "step": 15920 }, { "epoch": 46.17391304347826, "grad_norm": 0.5538775324821472, "learning_rate": 1.0906216857102913e-05, "loss": 0.073, "step": 15930 }, { "epoch": 46.20289855072464, "grad_norm": 0.33384883403778076, "learning_rate": 1.0854728823075355e-05, "loss": 0.0662, "step": 15940 }, { "epoch": 46.231884057971016, "grad_norm": 0.35423901677131653, "learning_rate": 1.0803347810724452e-05, "loss": 0.0773, "step": 15950 }, { "epoch": 46.26086956521739, "grad_norm": 0.3087175488471985, "learning_rate": 1.0752073960523911e-05, "loss": 0.0588, "step": 15960 }, { "epoch": 46.289855072463766, "grad_norm": 0.22049643099308014, "learning_rate": 1.070090741265447e-05, "loss": 0.0737, "step": 15970 }, { "epoch": 46.31884057971015, "grad_norm": 0.3322051763534546, "learning_rate": 1.0649848307003547e-05, "loss": 0.0654, "step": 15980 }, { "epoch": 46.34782608695652, "grad_norm": 0.42505577206611633, "learning_rate": 1.0598896783164757e-05, "loss": 0.0815, "step": 15990 }, { "epoch": 46.3768115942029, "grad_norm": 0.26743263006210327, "learning_rate": 1.0548052980437645e-05, "loss": 0.0557, "step": 16000 }, { "epoch": 46.405797101449274, "grad_norm": 0.3872216045856476, "learning_rate": 1.049731703782722e-05, "loss": 0.0784, "step": 16010 }, { "epoch": 46.43478260869565, "grad_norm": 0.4108617603778839, "learning_rate": 1.0446689094043587e-05, "loss": 0.0696, "step": 16020 }, { "epoch": 46.46376811594203, "grad_norm": 0.6282533407211304, "learning_rate": 1.039616928750165e-05, "loss": 0.0654, "step": 16030 }, { "epoch": 46.492753623188406, "grad_norm": 0.28340834379196167, "learning_rate": 1.0345757756320612e-05, "loss": 0.0656, "step": 16040 }, { "epoch": 46.52173913043478, "grad_norm": 0.521857738494873, "learning_rate": 1.0295454638323666e-05, "loss": 0.0702, "step": 16050 }, { "epoch": 46.55072463768116, "grad_norm": 0.23587919771671295, "learning_rate": 1.0245260071037632e-05, "loss": 0.068, "step": 16060 }, { "epoch": 46.57971014492754, "grad_norm": 0.36565327644348145, "learning_rate": 1.0195174191692518e-05, "loss": 0.0866, "step": 16070 }, { "epoch": 46.608695652173914, "grad_norm": 0.6374870538711548, "learning_rate": 1.014519713722124e-05, "loss": 0.0832, "step": 16080 }, { "epoch": 46.63768115942029, "grad_norm": 0.2241542786359787, "learning_rate": 1.0095329044259132e-05, "loss": 0.0719, "step": 16090 }, { "epoch": 46.666666666666664, "grad_norm": 0.5143556594848633, "learning_rate": 1.004557004914365e-05, "loss": 0.074, "step": 16100 }, { "epoch": 46.69565217391305, "grad_norm": 0.35472121834754944, "learning_rate": 9.995920287914007e-06, "loss": 0.0677, "step": 16110 }, { "epoch": 46.72463768115942, "grad_norm": 0.3925060033798218, "learning_rate": 9.946379896310737e-06, "loss": 0.0583, "step": 16120 }, { "epoch": 46.7536231884058, "grad_norm": 0.40613794326782227, "learning_rate": 9.896949009775396e-06, "loss": 0.0784, "step": 16130 }, { "epoch": 46.78260869565217, "grad_norm": 0.3456399738788605, "learning_rate": 9.847627763450134e-06, "loss": 0.0606, "step": 16140 }, { "epoch": 46.81159420289855, "grad_norm": 0.5274021029472351, "learning_rate": 9.798416292177337e-06, "loss": 0.0621, "step": 16150 }, { "epoch": 46.84057971014493, "grad_norm": 0.39418211579322815, "learning_rate": 9.74931473049932e-06, "loss": 0.0675, "step": 16160 }, { "epoch": 46.869565217391305, "grad_norm": 0.24988050758838654, "learning_rate": 9.700323212657847e-06, "loss": 0.0582, "step": 16170 }, { "epoch": 46.89855072463768, "grad_norm": 0.5177226066589355, "learning_rate": 9.65144187259388e-06, "loss": 0.0812, "step": 16180 }, { "epoch": 46.927536231884055, "grad_norm": 0.40939176082611084, "learning_rate": 9.602670843947132e-06, "loss": 0.0729, "step": 16190 }, { "epoch": 46.95652173913044, "grad_norm": 0.3772121071815491, "learning_rate": 9.554010260055713e-06, "loss": 0.0619, "step": 16200 }, { "epoch": 46.98550724637681, "grad_norm": 0.438474178314209, "learning_rate": 9.505460253955834e-06, "loss": 0.069, "step": 16210 }, { "epoch": 47.01449275362319, "grad_norm": 0.4537893831729889, "learning_rate": 9.457020958381324e-06, "loss": 0.0744, "step": 16220 }, { "epoch": 47.04347826086956, "grad_norm": 0.44046300649642944, "learning_rate": 9.408692505763395e-06, "loss": 0.0555, "step": 16230 }, { "epoch": 47.072463768115945, "grad_norm": 0.3748812973499298, "learning_rate": 9.360475028230181e-06, "loss": 0.0749, "step": 16240 }, { "epoch": 47.10144927536232, "grad_norm": 0.7424555420875549, "learning_rate": 9.312368657606412e-06, "loss": 0.0795, "step": 16250 }, { "epoch": 47.130434782608695, "grad_norm": 0.3517577648162842, "learning_rate": 9.264373525413096e-06, "loss": 0.0718, "step": 16260 }, { "epoch": 47.15942028985507, "grad_norm": 0.5834726691246033, "learning_rate": 9.216489762867058e-06, "loss": 0.0908, "step": 16270 }, { "epoch": 47.18840579710145, "grad_norm": 0.5662937164306641, "learning_rate": 9.168717500880708e-06, "loss": 0.0707, "step": 16280 }, { "epoch": 47.21739130434783, "grad_norm": 0.2651749849319458, "learning_rate": 9.121056870061574e-06, "loss": 0.0579, "step": 16290 }, { "epoch": 47.2463768115942, "grad_norm": 0.5331082940101624, "learning_rate": 9.073508000711983e-06, "loss": 0.0682, "step": 16300 }, { "epoch": 47.27536231884058, "grad_norm": 0.46843448281288147, "learning_rate": 9.026071022828758e-06, "loss": 0.0626, "step": 16310 }, { "epoch": 47.30434782608695, "grad_norm": 0.443735808134079, "learning_rate": 8.978746066102771e-06, "loss": 0.0669, "step": 16320 }, { "epoch": 47.333333333333336, "grad_norm": 0.416926771402359, "learning_rate": 8.931533259918634e-06, "loss": 0.0636, "step": 16330 }, { "epoch": 47.36231884057971, "grad_norm": 0.38428860902786255, "learning_rate": 8.884432733354382e-06, "loss": 0.0706, "step": 16340 }, { "epoch": 47.391304347826086, "grad_norm": 0.47722136974334717, "learning_rate": 8.837444615181029e-06, "loss": 0.0748, "step": 16350 }, { "epoch": 47.42028985507246, "grad_norm": 0.5005432367324829, "learning_rate": 8.790569033862323e-06, "loss": 0.0829, "step": 16360 }, { "epoch": 47.44927536231884, "grad_norm": 0.2604803442955017, "learning_rate": 8.7438061175543e-06, "loss": 0.0837, "step": 16370 }, { "epoch": 47.47826086956522, "grad_norm": 0.4663313925266266, "learning_rate": 8.697155994104978e-06, "loss": 0.088, "step": 16380 }, { "epoch": 47.507246376811594, "grad_norm": 0.28729355335235596, "learning_rate": 8.650618791054033e-06, "loss": 0.0562, "step": 16390 }, { "epoch": 47.53623188405797, "grad_norm": 0.40525946021080017, "learning_rate": 8.604194635632373e-06, "loss": 0.0748, "step": 16400 }, { "epoch": 47.56521739130435, "grad_norm": 0.4354046583175659, "learning_rate": 8.557883654761906e-06, "loss": 0.0625, "step": 16410 }, { "epoch": 47.594202898550726, "grad_norm": 0.35681065917015076, "learning_rate": 8.511685975055061e-06, "loss": 0.0572, "step": 16420 }, { "epoch": 47.6231884057971, "grad_norm": 0.22976984083652496, "learning_rate": 8.46560172281452e-06, "loss": 0.0609, "step": 16430 }, { "epoch": 47.65217391304348, "grad_norm": 0.3279632031917572, "learning_rate": 8.419631024032893e-06, "loss": 0.0663, "step": 16440 }, { "epoch": 47.68115942028985, "grad_norm": 0.41450434923171997, "learning_rate": 8.373774004392293e-06, "loss": 0.0787, "step": 16450 }, { "epoch": 47.710144927536234, "grad_norm": 0.4587912857532501, "learning_rate": 8.32803078926409e-06, "loss": 0.0691, "step": 16460 }, { "epoch": 47.73913043478261, "grad_norm": 0.33752039074897766, "learning_rate": 8.282401503708454e-06, "loss": 0.0719, "step": 16470 }, { "epoch": 47.768115942028984, "grad_norm": 0.5777345895767212, "learning_rate": 8.23688627247412e-06, "loss": 0.0667, "step": 16480 }, { "epoch": 47.79710144927536, "grad_norm": 0.5599574446678162, "learning_rate": 8.191485219998007e-06, "loss": 0.0726, "step": 16490 }, { "epoch": 47.82608695652174, "grad_norm": 0.42396897077560425, "learning_rate": 8.146198470404843e-06, "loss": 0.0619, "step": 16500 }, { "epoch": 47.85507246376812, "grad_norm": 0.5751467347145081, "learning_rate": 8.101026147506897e-06, "loss": 0.0925, "step": 16510 }, { "epoch": 47.88405797101449, "grad_norm": 0.4107610285282135, "learning_rate": 8.05596837480353e-06, "loss": 0.074, "step": 16520 }, { "epoch": 47.91304347826087, "grad_norm": 0.42101240158081055, "learning_rate": 8.011025275480998e-06, "loss": 0.0775, "step": 16530 }, { "epoch": 47.94202898550725, "grad_norm": 0.2733308970928192, "learning_rate": 7.966196972412027e-06, "loss": 0.0805, "step": 16540 }, { "epoch": 47.971014492753625, "grad_norm": 0.4154696762561798, "learning_rate": 7.92148358815547e-06, "loss": 0.0894, "step": 16550 }, { "epoch": 48.0, "grad_norm": 1.2088205814361572, "learning_rate": 7.87688524495604e-06, "loss": 0.0954, "step": 16560 }, { "epoch": 48.028985507246375, "grad_norm": 0.4644067883491516, "learning_rate": 7.83240206474386e-06, "loss": 0.0566, "step": 16570 }, { "epoch": 48.05797101449275, "grad_norm": 0.6014934778213501, "learning_rate": 7.788034169134272e-06, "loss": 0.0705, "step": 16580 }, { "epoch": 48.08695652173913, "grad_norm": 0.49670958518981934, "learning_rate": 7.743781679427414e-06, "loss": 0.0864, "step": 16590 }, { "epoch": 48.11594202898551, "grad_norm": 0.41624578833580017, "learning_rate": 7.699644716607895e-06, "loss": 0.0665, "step": 16600 }, { "epoch": 48.14492753623188, "grad_norm": 0.4324892461299896, "learning_rate": 7.655623401344486e-06, "loss": 0.0764, "step": 16610 }, { "epoch": 48.17391304347826, "grad_norm": 0.31850993633270264, "learning_rate": 7.611717853989775e-06, "loss": 0.0583, "step": 16620 }, { "epoch": 48.20289855072464, "grad_norm": 0.2776861786842346, "learning_rate": 7.567928194579854e-06, "loss": 0.0611, "step": 16630 }, { "epoch": 48.231884057971016, "grad_norm": 0.4844400882720947, "learning_rate": 7.524254542833997e-06, "loss": 0.0851, "step": 16640 }, { "epoch": 48.26086956521739, "grad_norm": 0.29021748900413513, "learning_rate": 7.480697018154286e-06, "loss": 0.0714, "step": 16650 }, { "epoch": 48.289855072463766, "grad_norm": 0.2596271336078644, "learning_rate": 7.437255739625332e-06, "loss": 0.0678, "step": 16660 }, { "epoch": 48.31884057971015, "grad_norm": 0.30910682678222656, "learning_rate": 7.393930826013923e-06, "loss": 0.077, "step": 16670 }, { "epoch": 48.34782608695652, "grad_norm": 0.5534478425979614, "learning_rate": 7.350722395768722e-06, "loss": 0.0778, "step": 16680 }, { "epoch": 48.3768115942029, "grad_norm": 0.38438495993614197, "learning_rate": 7.307630567019963e-06, "loss": 0.0653, "step": 16690 }, { "epoch": 48.405797101449274, "grad_norm": 0.366256445646286, "learning_rate": 7.264655457579e-06, "loss": 0.088, "step": 16700 }, { "epoch": 48.43478260869565, "grad_norm": 0.38456887006759644, "learning_rate": 7.221797184938184e-06, "loss": 0.0714, "step": 16710 }, { "epoch": 48.46376811594203, "grad_norm": 0.41040605306625366, "learning_rate": 7.179055866270373e-06, "loss": 0.0714, "step": 16720 }, { "epoch": 48.492753623188406, "grad_norm": 0.5971121788024902, "learning_rate": 7.136431618428707e-06, "loss": 0.0677, "step": 16730 }, { "epoch": 48.52173913043478, "grad_norm": 0.4219498634338379, "learning_rate": 7.09392455794628e-06, "loss": 0.0816, "step": 16740 }, { "epoch": 48.55072463768116, "grad_norm": 0.4855560064315796, "learning_rate": 7.051534801035725e-06, "loss": 0.0734, "step": 16750 }, { "epoch": 48.57971014492754, "grad_norm": 0.8344466686248779, "learning_rate": 7.00926246358905e-06, "loss": 0.0749, "step": 16760 }, { "epoch": 48.608695652173914, "grad_norm": 0.35824501514434814, "learning_rate": 6.967107661177191e-06, "loss": 0.0752, "step": 16770 }, { "epoch": 48.63768115942029, "grad_norm": 0.4434044361114502, "learning_rate": 6.925070509049786e-06, "loss": 0.0719, "step": 16780 }, { "epoch": 48.666666666666664, "grad_norm": 0.5850719809532166, "learning_rate": 6.883151122134812e-06, "loss": 0.0797, "step": 16790 }, { "epoch": 48.69565217391305, "grad_norm": 0.42451825737953186, "learning_rate": 6.8413496150382394e-06, "loss": 0.0795, "step": 16800 }, { "epoch": 48.72463768115942, "grad_norm": 0.40962865948677063, "learning_rate": 6.7996661020438165e-06, "loss": 0.0754, "step": 16810 }, { "epoch": 48.7536231884058, "grad_norm": 0.48921623826026917, "learning_rate": 6.758100697112662e-06, "loss": 0.0566, "step": 16820 }, { "epoch": 48.78260869565217, "grad_norm": 0.5247887969017029, "learning_rate": 6.716653513883026e-06, "loss": 0.0769, "step": 16830 }, { "epoch": 48.81159420289855, "grad_norm": 0.4218703508377075, "learning_rate": 6.675324665669913e-06, "loss": 0.0563, "step": 16840 }, { "epoch": 48.84057971014493, "grad_norm": 0.42967677116394043, "learning_rate": 6.634114265464803e-06, "loss": 0.0745, "step": 16850 }, { "epoch": 48.869565217391305, "grad_norm": 0.4079236686229706, "learning_rate": 6.59302242593538e-06, "loss": 0.072, "step": 16860 }, { "epoch": 48.89855072463768, "grad_norm": 0.3544526994228363, "learning_rate": 6.552049259425141e-06, "loss": 0.0701, "step": 16870 }, { "epoch": 48.927536231884055, "grad_norm": 0.625856876373291, "learning_rate": 6.511194877953181e-06, "loss": 0.0625, "step": 16880 }, { "epoch": 48.95652173913044, "grad_norm": 0.2915985882282257, "learning_rate": 6.470459393213813e-06, "loss": 0.073, "step": 16890 }, { "epoch": 48.98550724637681, "grad_norm": 0.4318452477455139, "learning_rate": 6.429842916576279e-06, "loss": 0.071, "step": 16900 }, { "epoch": 49.01449275362319, "grad_norm": 0.3133067488670349, "learning_rate": 6.389345559084503e-06, "loss": 0.0619, "step": 16910 }, { "epoch": 49.04347826086956, "grad_norm": 0.36420461535453796, "learning_rate": 6.348967431456682e-06, "loss": 0.0653, "step": 16920 }, { "epoch": 49.072463768115945, "grad_norm": 0.3078491687774658, "learning_rate": 6.30870864408511e-06, "loss": 0.0695, "step": 16930 }, { "epoch": 49.10144927536232, "grad_norm": 0.39188358187675476, "learning_rate": 6.268569307035754e-06, "loss": 0.0694, "step": 16940 }, { "epoch": 49.130434782608695, "grad_norm": 0.7680745720863342, "learning_rate": 6.228549530048022e-06, "loss": 0.0811, "step": 16950 }, { "epoch": 49.15942028985507, "grad_norm": 0.46656176447868347, "learning_rate": 6.1886494225344814e-06, "loss": 0.0599, "step": 16960 }, { "epoch": 49.18840579710145, "grad_norm": 0.3459886610507965, "learning_rate": 6.148869093580479e-06, "loss": 0.0623, "step": 16970 }, { "epoch": 49.21739130434783, "grad_norm": 0.5934762954711914, "learning_rate": 6.109208651943921e-06, "loss": 0.0709, "step": 16980 }, { "epoch": 49.2463768115942, "grad_norm": 0.6150892972946167, "learning_rate": 6.069668206054946e-06, "loss": 0.0758, "step": 16990 }, { "epoch": 49.27536231884058, "grad_norm": 0.2814181447029114, "learning_rate": 6.0302478640156145e-06, "loss": 0.0862, "step": 17000 }, { "epoch": 49.30434782608695, "grad_norm": 0.34793612360954285, "learning_rate": 5.990947733599644e-06, "loss": 0.0712, "step": 17010 }, { "epoch": 49.333333333333336, "grad_norm": 0.44212785363197327, "learning_rate": 5.951767922252105e-06, "loss": 0.0711, "step": 17020 }, { "epoch": 49.36231884057971, "grad_norm": 0.5267791748046875, "learning_rate": 5.912708537089068e-06, "loss": 0.0682, "step": 17030 }, { "epoch": 49.391304347826086, "grad_norm": 0.43204671144485474, "learning_rate": 5.873769684897434e-06, "loss": 0.0726, "step": 17040 }, { "epoch": 49.42028985507246, "grad_norm": 0.4965454041957855, "learning_rate": 5.834951472134514e-06, "loss": 0.0664, "step": 17050 }, { "epoch": 49.44927536231884, "grad_norm": 0.4370034635066986, "learning_rate": 5.796254004927832e-06, "loss": 0.0796, "step": 17060 }, { "epoch": 49.47826086956522, "grad_norm": 0.3659558594226837, "learning_rate": 5.757677389074806e-06, "loss": 0.0619, "step": 17070 }, { "epoch": 49.507246376811594, "grad_norm": 0.6674015522003174, "learning_rate": 5.719221730042385e-06, "loss": 0.073, "step": 17080 }, { "epoch": 49.53623188405797, "grad_norm": 0.37986722588539124, "learning_rate": 5.680887132966911e-06, "loss": 0.0882, "step": 17090 }, { "epoch": 49.56521739130435, "grad_norm": 0.2692256271839142, "learning_rate": 5.642673702653683e-06, "loss": 0.0748, "step": 17100 }, { "epoch": 49.594202898550726, "grad_norm": 0.5755336880683899, "learning_rate": 5.604581543576781e-06, "loss": 0.0698, "step": 17110 }, { "epoch": 49.6231884057971, "grad_norm": 0.3448822796344757, "learning_rate": 5.566610759878704e-06, "loss": 0.074, "step": 17120 }, { "epoch": 49.65217391304348, "grad_norm": 0.4350120723247528, "learning_rate": 5.528761455370119e-06, "loss": 0.0778, "step": 17130 }, { "epoch": 49.68115942028985, "grad_norm": 0.44996991753578186, "learning_rate": 5.491033733529594e-06, "loss": 0.07, "step": 17140 }, { "epoch": 49.710144927536234, "grad_norm": 0.42578864097595215, "learning_rate": 5.453427697503255e-06, "loss": 0.0609, "step": 17150 }, { "epoch": 49.73913043478261, "grad_norm": 0.43065088987350464, "learning_rate": 5.415943450104599e-06, "loss": 0.0767, "step": 17160 }, { "epoch": 49.768115942028984, "grad_norm": 0.49892979860305786, "learning_rate": 5.378581093814111e-06, "loss": 0.0626, "step": 17170 }, { "epoch": 49.79710144927536, "grad_norm": 0.44104957580566406, "learning_rate": 5.3413407307790375e-06, "loss": 0.063, "step": 17180 }, { "epoch": 49.82608695652174, "grad_norm": 0.366802453994751, "learning_rate": 5.30422246281313e-06, "loss": 0.0713, "step": 17190 }, { "epoch": 49.85507246376812, "grad_norm": 0.6189764738082886, "learning_rate": 5.267226391396296e-06, "loss": 0.064, "step": 17200 }, { "epoch": 49.88405797101449, "grad_norm": 0.39267656207084656, "learning_rate": 5.2303526176744e-06, "loss": 0.0665, "step": 17210 }, { "epoch": 49.91304347826087, "grad_norm": 0.3036266267299652, "learning_rate": 5.193601242458929e-06, "loss": 0.0562, "step": 17220 }, { "epoch": 49.94202898550725, "grad_norm": 0.457115113735199, "learning_rate": 5.156972366226714e-06, "loss": 0.0588, "step": 17230 }, { "epoch": 49.971014492753625, "grad_norm": 0.5933891534805298, "learning_rate": 5.120466089119735e-06, "loss": 0.0705, "step": 17240 }, { "epoch": 50.0, "grad_norm": 0.7844343781471252, "learning_rate": 5.084082510944749e-06, "loss": 0.0569, "step": 17250 }, { "epoch": 50.028985507246375, "grad_norm": 0.3615298867225647, "learning_rate": 5.047821731173058e-06, "loss": 0.0672, "step": 17260 }, { "epoch": 50.05797101449275, "grad_norm": 0.614403247833252, "learning_rate": 5.011683848940274e-06, "loss": 0.073, "step": 17270 }, { "epoch": 50.08695652173913, "grad_norm": 0.42543458938598633, "learning_rate": 4.975668963045954e-06, "loss": 0.0728, "step": 17280 }, { "epoch": 50.11594202898551, "grad_norm": 0.54520583152771, "learning_rate": 4.9397771719534525e-06, "loss": 0.0766, "step": 17290 }, { "epoch": 50.14492753623188, "grad_norm": 0.41185370087623596, "learning_rate": 4.904008573789548e-06, "loss": 0.0699, "step": 17300 }, { "epoch": 50.17391304347826, "grad_norm": 0.3548359274864197, "learning_rate": 4.8683632663442005e-06, "loss": 0.0632, "step": 17310 }, { "epoch": 50.20289855072464, "grad_norm": 0.32171154022216797, "learning_rate": 4.832841347070343e-06, "loss": 0.0659, "step": 17320 }, { "epoch": 50.231884057971016, "grad_norm": 0.6406564712524414, "learning_rate": 4.797442913083539e-06, "loss": 0.0858, "step": 17330 }, { "epoch": 50.26086956521739, "grad_norm": 0.6928719282150269, "learning_rate": 4.7621680611617596e-06, "loss": 0.0716, "step": 17340 }, { "epoch": 50.289855072463766, "grad_norm": 0.43120306730270386, "learning_rate": 4.727016887745095e-06, "loss": 0.0797, "step": 17350 }, { "epoch": 50.31884057971015, "grad_norm": 0.3705822229385376, "learning_rate": 4.691989488935511e-06, "loss": 0.0696, "step": 17360 }, { "epoch": 50.34782608695652, "grad_norm": 0.7047597169876099, "learning_rate": 4.657085960496588e-06, "loss": 0.0684, "step": 17370 }, { "epoch": 50.3768115942029, "grad_norm": 0.4047369062900543, "learning_rate": 4.6223063978532265e-06, "loss": 0.0625, "step": 17380 }, { "epoch": 50.405797101449274, "grad_norm": 0.27197515964508057, "learning_rate": 4.587650896091439e-06, "loss": 0.0793, "step": 17390 }, { "epoch": 50.43478260869565, "grad_norm": 0.47317975759506226, "learning_rate": 4.553119549958035e-06, "loss": 0.0751, "step": 17400 }, { "epoch": 50.46376811594203, "grad_norm": 0.4325650930404663, "learning_rate": 4.518712453860385e-06, "loss": 0.0701, "step": 17410 }, { "epoch": 50.492753623188406, "grad_norm": 0.446105033159256, "learning_rate": 4.484429701866205e-06, "loss": 0.07, "step": 17420 }, { "epoch": 50.52173913043478, "grad_norm": 0.5336192846298218, "learning_rate": 4.4502713877031975e-06, "loss": 0.0799, "step": 17430 }, { "epoch": 50.55072463768116, "grad_norm": 0.4086814522743225, "learning_rate": 4.416237604758911e-06, "loss": 0.0698, "step": 17440 }, { "epoch": 50.57971014492754, "grad_norm": 0.25821778178215027, "learning_rate": 4.3823284460804025e-06, "loss": 0.0602, "step": 17450 }, { "epoch": 50.608695652173914, "grad_norm": 0.31673121452331543, "learning_rate": 4.348544004374011e-06, "loss": 0.0723, "step": 17460 }, { "epoch": 50.63768115942029, "grad_norm": 0.5380536913871765, "learning_rate": 4.314884372005123e-06, "loss": 0.0802, "step": 17470 }, { "epoch": 50.666666666666664, "grad_norm": 0.2931354343891144, "learning_rate": 4.281349640997867e-06, "loss": 0.0503, "step": 17480 }, { "epoch": 50.69565217391305, "grad_norm": 0.42437413334846497, "learning_rate": 4.247939903034942e-06, "loss": 0.0509, "step": 17490 }, { "epoch": 50.72463768115942, "grad_norm": 0.309953510761261, "learning_rate": 4.214655249457284e-06, "loss": 0.0629, "step": 17500 }, { "epoch": 50.7536231884058, "grad_norm": 0.284572571516037, "learning_rate": 4.181495771263855e-06, "loss": 0.075, "step": 17510 }, { "epoch": 50.78260869565217, "grad_norm": 0.47553151845932007, "learning_rate": 4.148461559111427e-06, "loss": 0.0803, "step": 17520 }, { "epoch": 50.81159420289855, "grad_norm": 0.6055026650428772, "learning_rate": 4.115552703314252e-06, "loss": 0.0752, "step": 17530 }, { "epoch": 50.84057971014493, "grad_norm": 0.2628669738769531, "learning_rate": 4.082769293843886e-06, "loss": 0.0702, "step": 17540 }, { "epoch": 50.869565217391305, "grad_norm": 0.3795563280582428, "learning_rate": 4.050111420328939e-06, "loss": 0.0706, "step": 17550 }, { "epoch": 50.89855072463768, "grad_norm": 0.26477569341659546, "learning_rate": 4.017579172054764e-06, "loss": 0.0796, "step": 17560 }, { "epoch": 50.927536231884055, "grad_norm": 0.4309571087360382, "learning_rate": 3.985172637963308e-06, "loss": 0.0889, "step": 17570 }, { "epoch": 50.95652173913044, "grad_norm": 0.40319034457206726, "learning_rate": 3.952891906652784e-06, "loss": 0.072, "step": 17580 }, { "epoch": 50.98550724637681, "grad_norm": 0.3346292972564697, "learning_rate": 3.920737066377478e-06, "loss": 0.0716, "step": 17590 }, { "epoch": 51.01449275362319, "grad_norm": 0.29451116919517517, "learning_rate": 3.888708205047509e-06, "loss": 0.0625, "step": 17600 }, { "epoch": 51.04347826086956, "grad_norm": 0.3001940846443176, "learning_rate": 3.856805410228542e-06, "loss": 0.0602, "step": 17610 }, { "epoch": 51.072463768115945, "grad_norm": 0.34385746717453003, "learning_rate": 3.82502876914162e-06, "loss": 0.0597, "step": 17620 }, { "epoch": 51.10144927536232, "grad_norm": 0.5033257007598877, "learning_rate": 3.7933783686628586e-06, "loss": 0.0668, "step": 17630 }, { "epoch": 51.130434782608695, "grad_norm": 0.5406177043914795, "learning_rate": 3.7618542953232306e-06, "loss": 0.0621, "step": 17640 }, { "epoch": 51.15942028985507, "grad_norm": 0.3336744010448456, "learning_rate": 3.7304566353083658e-06, "loss": 0.0705, "step": 17650 }, { "epoch": 51.18840579710145, "grad_norm": 0.5173831582069397, "learning_rate": 3.6991854744582555e-06, "loss": 0.0848, "step": 17660 }, { "epoch": 51.21739130434783, "grad_norm": 0.32535767555236816, "learning_rate": 3.6680408982670777e-06, "loss": 0.0812, "step": 17670 }, { "epoch": 51.2463768115942, "grad_norm": 0.33591246604919434, "learning_rate": 3.637022991882899e-06, "loss": 0.0522, "step": 17680 }, { "epoch": 51.27536231884058, "grad_norm": 0.3357798159122467, "learning_rate": 3.606131840107485e-06, "loss": 0.0674, "step": 17690 }, { "epoch": 51.30434782608695, "grad_norm": 0.4017263948917389, "learning_rate": 3.575367527396084e-06, "loss": 0.0663, "step": 17700 }, { "epoch": 51.333333333333336, "grad_norm": 0.5477839112281799, "learning_rate": 3.5447301378571386e-06, "loss": 0.0748, "step": 17710 }, { "epoch": 51.36231884057971, "grad_norm": 0.4583459198474884, "learning_rate": 3.514219755252113e-06, "loss": 0.072, "step": 17720 }, { "epoch": 51.391304347826086, "grad_norm": 0.42150282859802246, "learning_rate": 3.4838364629952213e-06, "loss": 0.0756, "step": 17730 }, { "epoch": 51.42028985507246, "grad_norm": 0.29464593529701233, "learning_rate": 3.4535803441532123e-06, "loss": 0.0656, "step": 17740 }, { "epoch": 51.44927536231884, "grad_norm": 0.4603271484375, "learning_rate": 3.4234514814451836e-06, "loss": 0.0719, "step": 17750 }, { "epoch": 51.47826086956522, "grad_norm": 0.4149802625179291, "learning_rate": 3.393449957242273e-06, "loss": 0.0705, "step": 17760 }, { "epoch": 51.507246376811594, "grad_norm": 0.5083811283111572, "learning_rate": 3.363575853567524e-06, "loss": 0.0743, "step": 17770 }, { "epoch": 51.53623188405797, "grad_norm": 0.3474845588207245, "learning_rate": 3.3338292520955826e-06, "loss": 0.0723, "step": 17780 }, { "epoch": 51.56521739130435, "grad_norm": 0.39282166957855225, "learning_rate": 3.304210234152516e-06, "loss": 0.0807, "step": 17790 }, { "epoch": 51.594202898550726, "grad_norm": 0.347689688205719, "learning_rate": 3.2747188807155993e-06, "loss": 0.0676, "step": 17800 }, { "epoch": 51.6231884057971, "grad_norm": 0.234902486205101, "learning_rate": 3.2453552724130643e-06, "loss": 0.0594, "step": 17810 }, { "epoch": 51.65217391304348, "grad_norm": 0.6985359191894531, "learning_rate": 3.216119489523889e-06, "loss": 0.0739, "step": 17820 }, { "epoch": 51.68115942028985, "grad_norm": 0.4158734679222107, "learning_rate": 3.1870116119775917e-06, "loss": 0.0692, "step": 17830 }, { "epoch": 51.710144927536234, "grad_norm": 0.30131009221076965, "learning_rate": 3.158031719353999e-06, "loss": 0.0759, "step": 17840 }, { "epoch": 51.73913043478261, "grad_norm": 0.404659628868103, "learning_rate": 3.1291798908830273e-06, "loss": 0.0775, "step": 17850 }, { "epoch": 51.768115942028984, "grad_norm": 0.37496450543403625, "learning_rate": 3.1004562054444853e-06, "loss": 0.0661, "step": 17860 }, { "epoch": 51.79710144927536, "grad_norm": 0.39755377173423767, "learning_rate": 3.071860741567806e-06, "loss": 0.06, "step": 17870 }, { "epoch": 51.82608695652174, "grad_norm": 0.4132349193096161, "learning_rate": 3.04339357743193e-06, "loss": 0.0781, "step": 17880 }, { "epoch": 51.85507246376812, "grad_norm": 0.3833078444004059, "learning_rate": 3.0150547908649628e-06, "loss": 0.07, "step": 17890 }, { "epoch": 51.88405797101449, "grad_norm": 0.3717828392982483, "learning_rate": 2.9868444593440957e-06, "loss": 0.0656, "step": 17900 }, { "epoch": 51.91304347826087, "grad_norm": 0.37185606360435486, "learning_rate": 2.9587626599952846e-06, "loss": 0.0821, "step": 17910 }, { "epoch": 51.94202898550725, "grad_norm": 0.404291570186615, "learning_rate": 2.930809469593082e-06, "loss": 0.0584, "step": 17920 }, { "epoch": 51.971014492753625, "grad_norm": 0.5094296336174011, "learning_rate": 2.9029849645604733e-06, "loss": 0.0719, "step": 17930 }, { "epoch": 52.0, "grad_norm": 0.4158974587917328, "learning_rate": 2.8752892209685632e-06, "loss": 0.0741, "step": 17940 }, { "epoch": 52.028985507246375, "grad_norm": 0.49970102310180664, "learning_rate": 2.847722314536483e-06, "loss": 0.0666, "step": 17950 }, { "epoch": 52.05797101449275, "grad_norm": 0.35678717494010925, "learning_rate": 2.820284320631078e-06, "loss": 0.0647, "step": 17960 }, { "epoch": 52.08695652173913, "grad_norm": 0.2367994338274002, "learning_rate": 2.792975314266788e-06, "loss": 0.0814, "step": 17970 }, { "epoch": 52.11594202898551, "grad_norm": 0.3695875406265259, "learning_rate": 2.7657953701054007e-06, "loss": 0.0676, "step": 17980 }, { "epoch": 52.14492753623188, "grad_norm": 0.5966640114784241, "learning_rate": 2.7387445624558306e-06, "loss": 0.093, "step": 17990 }, { "epoch": 52.17391304347826, "grad_norm": 0.2616029679775238, "learning_rate": 2.7118229652739747e-06, "loss": 0.0826, "step": 18000 }, { "epoch": 52.20289855072464, "grad_norm": 0.39342010021209717, "learning_rate": 2.6850306521624236e-06, "loss": 0.085, "step": 18010 }, { "epoch": 52.231884057971016, "grad_norm": 0.374451607465744, "learning_rate": 2.6583676963703507e-06, "loss": 0.0573, "step": 18020 }, { "epoch": 52.26086956521739, "grad_norm": 0.2896682024002075, "learning_rate": 2.631834170793268e-06, "loss": 0.0614, "step": 18030 }, { "epoch": 52.289855072463766, "grad_norm": 0.44059228897094727, "learning_rate": 2.6054301479728036e-06, "loss": 0.0648, "step": 18040 }, { "epoch": 52.31884057971015, "grad_norm": 0.6553167700767517, "learning_rate": 2.579155700096575e-06, "loss": 0.0525, "step": 18050 }, { "epoch": 52.34782608695652, "grad_norm": 0.31062379479408264, "learning_rate": 2.5530108989978873e-06, "loss": 0.0634, "step": 18060 }, { "epoch": 52.3768115942029, "grad_norm": 0.36778193712234497, "learning_rate": 2.5269958161556416e-06, "loss": 0.0691, "step": 18070 }, { "epoch": 52.405797101449274, "grad_norm": 0.54738450050354, "learning_rate": 2.5011105226940888e-06, "loss": 0.0725, "step": 18080 }, { "epoch": 52.43478260869565, "grad_norm": 0.4340762197971344, "learning_rate": 2.4753550893826248e-06, "loss": 0.0569, "step": 18090 }, { "epoch": 52.46376811594203, "grad_norm": 0.4225577116012573, "learning_rate": 2.4497295866356296e-06, "loss": 0.0707, "step": 18100 }, { "epoch": 52.492753623188406, "grad_norm": 0.4461411237716675, "learning_rate": 2.424234084512228e-06, "loss": 0.0718, "step": 18110 }, { "epoch": 52.52173913043478, "grad_norm": 0.4382653832435608, "learning_rate": 2.3988686527161687e-06, "loss": 0.0731, "step": 18120 }, { "epoch": 52.55072463768116, "grad_norm": 0.3827999234199524, "learning_rate": 2.373633360595573e-06, "loss": 0.071, "step": 18130 }, { "epoch": 52.57971014492754, "grad_norm": 0.4236376881599426, "learning_rate": 2.3485282771427585e-06, "loss": 0.079, "step": 18140 }, { "epoch": 52.608695652173914, "grad_norm": 0.33686360716819763, "learning_rate": 2.3235534709940665e-06, "loss": 0.0646, "step": 18150 }, { "epoch": 52.63768115942029, "grad_norm": 0.43788444995880127, "learning_rate": 2.2987090104296617e-06, "loss": 0.0696, "step": 18160 }, { "epoch": 52.666666666666664, "grad_norm": 0.3673740029335022, "learning_rate": 2.273994963373355e-06, "loss": 0.0946, "step": 18170 }, { "epoch": 52.69565217391305, "grad_norm": 0.4890417456626892, "learning_rate": 2.249411397392409e-06, "loss": 0.0865, "step": 18180 }, { "epoch": 52.72463768115942, "grad_norm": 0.49079957604408264, "learning_rate": 2.2249583796973506e-06, "loss": 0.0627, "step": 18190 }, { "epoch": 52.7536231884058, "grad_norm": 0.4490756690502167, "learning_rate": 2.200635977141796e-06, "loss": 0.0642, "step": 18200 }, { "epoch": 52.78260869565217, "grad_norm": 0.45918673276901245, "learning_rate": 2.17644425622226e-06, "loss": 0.0739, "step": 18210 }, { "epoch": 52.81159420289855, "grad_norm": 0.3255002796649933, "learning_rate": 2.152383283077991e-06, "loss": 0.0494, "step": 18220 }, { "epoch": 52.84057971014493, "grad_norm": 0.3877251446247101, "learning_rate": 2.128453123490781e-06, "loss": 0.0658, "step": 18230 }, { "epoch": 52.869565217391305, "grad_norm": 0.6161091327667236, "learning_rate": 2.1046538428847462e-06, "loss": 0.057, "step": 18240 }, { "epoch": 52.89855072463768, "grad_norm": 0.5799849629402161, "learning_rate": 2.0809855063262273e-06, "loss": 0.0708, "step": 18250 }, { "epoch": 52.927536231884055, "grad_norm": 0.4724336862564087, "learning_rate": 2.057448178523558e-06, "loss": 0.0686, "step": 18260 }, { "epoch": 52.95652173913044, "grad_norm": 0.20595254004001617, "learning_rate": 2.034041923826885e-06, "loss": 0.0584, "step": 18270 }, { "epoch": 52.98550724637681, "grad_norm": 0.2347278743982315, "learning_rate": 2.0107668062280204e-06, "loss": 0.0773, "step": 18280 }, { "epoch": 53.01449275362319, "grad_norm": 0.3663756549358368, "learning_rate": 1.9876228893602357e-06, "loss": 0.0704, "step": 18290 }, { "epoch": 53.04347826086956, "grad_norm": 0.32941168546676636, "learning_rate": 1.9646102364981266e-06, "loss": 0.0714, "step": 18300 }, { "epoch": 53.072463768115945, "grad_norm": 0.3670329749584198, "learning_rate": 1.9417289105574053e-06, "loss": 0.0665, "step": 18310 }, { "epoch": 53.10144927536232, "grad_norm": 0.33528444170951843, "learning_rate": 1.9189789740947427e-06, "loss": 0.074, "step": 18320 }, { "epoch": 53.130434782608695, "grad_norm": 0.27513110637664795, "learning_rate": 1.896360489307597e-06, "loss": 0.0552, "step": 18330 }, { "epoch": 53.15942028985507, "grad_norm": 0.436108261346817, "learning_rate": 1.8738735180340362e-06, "loss": 0.076, "step": 18340 }, { "epoch": 53.18840579710145, "grad_norm": 0.3029859960079193, "learning_rate": 1.8515181217525824e-06, "loss": 0.066, "step": 18350 }, { "epoch": 53.21739130434783, "grad_norm": 0.5512310266494751, "learning_rate": 1.8292943615820457e-06, "loss": 0.0824, "step": 18360 }, { "epoch": 53.2463768115942, "grad_norm": 0.39598506689071655, "learning_rate": 1.8072022982813296e-06, "loss": 0.0773, "step": 18370 }, { "epoch": 53.27536231884058, "grad_norm": 0.6229205131530762, "learning_rate": 1.7852419922492925e-06, "loss": 0.0924, "step": 18380 }, { "epoch": 53.30434782608695, "grad_norm": 0.5619914531707764, "learning_rate": 1.763413503524569e-06, "loss": 0.0716, "step": 18390 }, { "epoch": 53.333333333333336, "grad_norm": 0.4665543735027313, "learning_rate": 1.7417168917854165e-06, "loss": 0.0844, "step": 18400 }, { "epoch": 53.36231884057971, "grad_norm": 0.29430925846099854, "learning_rate": 1.720152216349552e-06, "loss": 0.0753, "step": 18410 }, { "epoch": 53.391304347826086, "grad_norm": 0.6078800559043884, "learning_rate": 1.6987195361739595e-06, "loss": 0.0568, "step": 18420 }, { "epoch": 53.42028985507246, "grad_norm": 0.3402520418167114, "learning_rate": 1.6774189098547832e-06, "loss": 0.0692, "step": 18430 }, { "epoch": 53.44927536231884, "grad_norm": 0.46443086862564087, "learning_rate": 1.6562503956271069e-06, "loss": 0.0695, "step": 18440 }, { "epoch": 53.47826086956522, "grad_norm": 0.3997274339199066, "learning_rate": 1.6352140513648417e-06, "loss": 0.0691, "step": 18450 }, { "epoch": 53.507246376811594, "grad_norm": 0.3876442015171051, "learning_rate": 1.6143099345805712e-06, "loss": 0.0655, "step": 18460 }, { "epoch": 53.53623188405797, "grad_norm": 0.6733325123786926, "learning_rate": 1.5935381024253293e-06, "loss": 0.0677, "step": 18470 }, { "epoch": 53.56521739130435, "grad_norm": 0.35250145196914673, "learning_rate": 1.572898611688517e-06, "loss": 0.0588, "step": 18480 }, { "epoch": 53.594202898550726, "grad_norm": 0.3560972213745117, "learning_rate": 1.5523915187977133e-06, "loss": 0.0863, "step": 18490 }, { "epoch": 53.6231884057971, "grad_norm": 0.2549573481082916, "learning_rate": 1.532016879818532e-06, "loss": 0.066, "step": 18500 }, { "epoch": 53.65217391304348, "grad_norm": 0.5657981634140015, "learning_rate": 1.51177475045447e-06, "loss": 0.0732, "step": 18510 }, { "epoch": 53.68115942028985, "grad_norm": 0.45418161153793335, "learning_rate": 1.4916651860467035e-06, "loss": 0.0641, "step": 18520 }, { "epoch": 53.710144927536234, "grad_norm": 0.6455032825469971, "learning_rate": 1.471688241574043e-06, "loss": 0.06, "step": 18530 }, { "epoch": 53.73913043478261, "grad_norm": 0.41018450260162354, "learning_rate": 1.451843971652672e-06, "loss": 0.0691, "step": 18540 }, { "epoch": 53.768115942028984, "grad_norm": 0.2655918002128601, "learning_rate": 1.432132430536076e-06, "loss": 0.068, "step": 18550 }, { "epoch": 53.79710144927536, "grad_norm": 0.4377053380012512, "learning_rate": 1.412553672114869e-06, "loss": 0.0768, "step": 18560 }, { "epoch": 53.82608695652174, "grad_norm": 0.44129469990730286, "learning_rate": 1.3931077499166056e-06, "loss": 0.0715, "step": 18570 }, { "epoch": 53.85507246376812, "grad_norm": 0.5908977389335632, "learning_rate": 1.3737947171057085e-06, "loss": 0.0674, "step": 18580 }, { "epoch": 53.88405797101449, "grad_norm": 0.5041825771331787, "learning_rate": 1.3546146264832582e-06, "loss": 0.074, "step": 18590 }, { "epoch": 53.91304347826087, "grad_norm": 0.5081062912940979, "learning_rate": 1.3355675304869086e-06, "loss": 0.063, "step": 18600 }, { "epoch": 53.94202898550725, "grad_norm": 0.435222327709198, "learning_rate": 1.3166534811906827e-06, "loss": 0.0799, "step": 18610 }, { "epoch": 53.971014492753625, "grad_norm": 0.1768321990966797, "learning_rate": 1.2978725303048666e-06, "loss": 0.048, "step": 18620 }, { "epoch": 54.0, "grad_norm": 0.6796761751174927, "learning_rate": 1.2792247291758762e-06, "loss": 0.0614, "step": 18630 }, { "epoch": 54.028985507246375, "grad_norm": 0.42444196343421936, "learning_rate": 1.2607101287860635e-06, "loss": 0.0799, "step": 18640 }, { "epoch": 54.05797101449275, "grad_norm": 0.3279927670955658, "learning_rate": 1.2423287797536654e-06, "loss": 0.0588, "step": 18650 }, { "epoch": 54.08695652173913, "grad_norm": 0.46143797039985657, "learning_rate": 1.2240807323325776e-06, "loss": 0.0818, "step": 18660 }, { "epoch": 54.11594202898551, "grad_norm": 0.4536832571029663, "learning_rate": 1.205966036412254e-06, "loss": 0.0698, "step": 18670 }, { "epoch": 54.14492753623188, "grad_norm": 0.48404473066329956, "learning_rate": 1.1879847415175949e-06, "loss": 0.062, "step": 18680 }, { "epoch": 54.17391304347826, "grad_norm": 0.2790204584598541, "learning_rate": 1.1701368968087712e-06, "loss": 0.0724, "step": 18690 }, { "epoch": 54.20289855072464, "grad_norm": 0.355579137802124, "learning_rate": 1.1524225510811116e-06, "loss": 0.0625, "step": 18700 }, { "epoch": 54.231884057971016, "grad_norm": 0.3410792946815491, "learning_rate": 1.1348417527649535e-06, "loss": 0.0593, "step": 18710 }, { "epoch": 54.26086956521739, "grad_norm": 0.4553377032279968, "learning_rate": 1.1173945499255268e-06, "loss": 0.0645, "step": 18720 }, { "epoch": 54.289855072463766, "grad_norm": 0.5085041522979736, "learning_rate": 1.1000809902628307e-06, "loss": 0.0702, "step": 18730 }, { "epoch": 54.31884057971015, "grad_norm": 0.3210165500640869, "learning_rate": 1.082901121111468e-06, "loss": 0.0701, "step": 18740 }, { "epoch": 54.34782608695652, "grad_norm": 0.3031083047389984, "learning_rate": 1.0658549894405456e-06, "loss": 0.0625, "step": 18750 }, { "epoch": 54.3768115942029, "grad_norm": 0.3490734100341797, "learning_rate": 1.0489426418535342e-06, "loss": 0.0692, "step": 18760 }, { "epoch": 54.405797101449274, "grad_norm": 0.3119370639324188, "learning_rate": 1.0321641245881474e-06, "loss": 0.0505, "step": 18770 }, { "epoch": 54.43478260869565, "grad_norm": 0.4013970196247101, "learning_rate": 1.015519483516214e-06, "loss": 0.0755, "step": 18780 }, { "epoch": 54.46376811594203, "grad_norm": 0.47671663761138916, "learning_rate": 9.990087641435443e-07, "loss": 0.07, "step": 18790 }, { "epoch": 54.492753623188406, "grad_norm": 0.3680475950241089, "learning_rate": 9.826320116098132e-07, "loss": 0.0586, "step": 18800 }, { "epoch": 54.52173913043478, "grad_norm": 0.6233283281326294, "learning_rate": 9.663892706884447e-07, "loss": 0.0733, "step": 18810 }, { "epoch": 54.55072463768116, "grad_norm": 0.3389962613582611, "learning_rate": 9.502805857864616e-07, "loss": 0.0824, "step": 18820 }, { "epoch": 54.57971014492754, "grad_norm": 0.36238715052604675, "learning_rate": 9.34306000944396e-07, "loss": 0.0701, "step": 18830 }, { "epoch": 54.608695652173914, "grad_norm": 0.5171844363212585, "learning_rate": 9.184655598361624e-07, "loss": 0.0834, "step": 18840 }, { "epoch": 54.63768115942029, "grad_norm": 0.36652058362960815, "learning_rate": 9.027593057689076e-07, "loss": 0.0802, "step": 18850 }, { "epoch": 54.666666666666664, "grad_norm": 0.48718181252479553, "learning_rate": 8.871872816829441e-07, "loss": 0.0716, "step": 18860 }, { "epoch": 54.69565217391305, "grad_norm": 0.5928740501403809, "learning_rate": 8.717495301515777e-07, "loss": 0.0729, "step": 18870 }, { "epoch": 54.72463768115942, "grad_norm": 0.30666327476501465, "learning_rate": 8.564460933810415e-07, "loss": 0.0821, "step": 18880 }, { "epoch": 54.7536231884058, "grad_norm": 0.3472791016101837, "learning_rate": 8.412770132103453e-07, "loss": 0.0778, "step": 18890 }, { "epoch": 54.78260869565217, "grad_norm": 0.36346590518951416, "learning_rate": 8.262423311111711e-07, "loss": 0.0627, "step": 18900 }, { "epoch": 54.81159420289855, "grad_norm": 0.41762474179267883, "learning_rate": 8.113420881877665e-07, "loss": 0.0604, "step": 18910 }, { "epoch": 54.84057971014493, "grad_norm": 0.42856791615486145, "learning_rate": 7.965763251768288e-07, "loss": 0.0625, "step": 18920 }, { "epoch": 54.869565217391305, "grad_norm": 0.46118825674057007, "learning_rate": 7.819450824473995e-07, "loss": 0.0842, "step": 18930 }, { "epoch": 54.89855072463768, "grad_norm": 0.41677844524383545, "learning_rate": 7.674484000007198e-07, "loss": 0.0759, "step": 18940 }, { "epoch": 54.927536231884055, "grad_norm": 0.47534629702568054, "learning_rate": 7.530863174701752e-07, "loss": 0.0646, "step": 18950 }, { "epoch": 54.95652173913044, "grad_norm": 0.4878372550010681, "learning_rate": 7.38858874121151e-07, "loss": 0.0708, "step": 18960 }, { "epoch": 54.98550724637681, "grad_norm": 0.42019039392471313, "learning_rate": 7.247661088509328e-07, "loss": 0.0783, "step": 18970 }, { "epoch": 55.01449275362319, "grad_norm": 0.30006134510040283, "learning_rate": 7.108080601886002e-07, "loss": 0.0724, "step": 18980 }, { "epoch": 55.04347826086956, "grad_norm": 0.30366283655166626, "learning_rate": 6.969847662949336e-07, "loss": 0.0722, "step": 18990 }, { "epoch": 55.072463768115945, "grad_norm": 0.39432230591773987, "learning_rate": 6.832962649622798e-07, "loss": 0.0727, "step": 19000 }, { "epoch": 55.10144927536232, "grad_norm": 0.34140005707740784, "learning_rate": 6.697425936144863e-07, "loss": 0.0677, "step": 19010 }, { "epoch": 55.130434782608695, "grad_norm": 0.33747854828834534, "learning_rate": 6.563237893067731e-07, "loss": 0.0658, "step": 19020 }, { "epoch": 55.15942028985507, "grad_norm": 0.3701976537704468, "learning_rate": 6.430398887256328e-07, "loss": 0.0805, "step": 19030 }, { "epoch": 55.18840579710145, "grad_norm": 0.3153843581676483, "learning_rate": 6.298909281887478e-07, "loss": 0.0661, "step": 19040 }, { "epoch": 55.21739130434783, "grad_norm": 0.4111367464065552, "learning_rate": 6.168769436448673e-07, "loss": 0.0656, "step": 19050 }, { "epoch": 55.2463768115942, "grad_norm": 0.3601897060871124, "learning_rate": 6.03997970673742e-07, "loss": 0.0813, "step": 19060 }, { "epoch": 55.27536231884058, "grad_norm": 0.4176798462867737, "learning_rate": 5.912540444859782e-07, "loss": 0.0648, "step": 19070 }, { "epoch": 55.30434782608695, "grad_norm": 0.4172719717025757, "learning_rate": 5.786451999229837e-07, "loss": 0.0701, "step": 19080 }, { "epoch": 55.333333333333336, "grad_norm": 0.4788095951080322, "learning_rate": 5.661714714568722e-07, "loss": 0.0796, "step": 19090 }, { "epoch": 55.36231884057971, "grad_norm": 0.3786677122116089, "learning_rate": 5.538328931903259e-07, "loss": 0.0746, "step": 19100 }, { "epoch": 55.391304347826086, "grad_norm": 0.3475906252861023, "learning_rate": 5.416294988565551e-07, "loss": 0.0623, "step": 19110 }, { "epoch": 55.42028985507246, "grad_norm": 0.549476146697998, "learning_rate": 5.29561321819172e-07, "loss": 0.0828, "step": 19120 }, { "epoch": 55.44927536231884, "grad_norm": 0.3060716986656189, "learning_rate": 5.176283950721061e-07, "loss": 0.0727, "step": 19130 }, { "epoch": 55.47826086956522, "grad_norm": 0.41936513781547546, "learning_rate": 5.058307512395332e-07, "loss": 0.0706, "step": 19140 }, { "epoch": 55.507246376811594, "grad_norm": 0.5774510502815247, "learning_rate": 4.941684225757526e-07, "loss": 0.0804, "step": 19150 }, { "epoch": 55.53623188405797, "grad_norm": 0.30946260690689087, "learning_rate": 4.826414409651314e-07, "loss": 0.0651, "step": 19160 }, { "epoch": 55.56521739130435, "grad_norm": 0.45470932126045227, "learning_rate": 4.712498379219943e-07, "loss": 0.0778, "step": 19170 }, { "epoch": 55.594202898550726, "grad_norm": 0.5117192268371582, "learning_rate": 4.599936445905506e-07, "loss": 0.0633, "step": 19180 }, { "epoch": 55.6231884057971, "grad_norm": 0.37709343433380127, "learning_rate": 4.4887289174480594e-07, "loss": 0.0653, "step": 19190 }, { "epoch": 55.65217391304348, "grad_norm": 0.5725303888320923, "learning_rate": 4.378876097884621e-07, "loss": 0.0755, "step": 19200 }, { "epoch": 55.68115942028985, "grad_norm": 0.3886399269104004, "learning_rate": 4.2703782875487264e-07, "loss": 0.055, "step": 19210 }, { "epoch": 55.710144927536234, "grad_norm": 0.32867905497550964, "learning_rate": 4.163235783069208e-07, "loss": 0.0776, "step": 19220 }, { "epoch": 55.73913043478261, "grad_norm": 0.3088054656982422, "learning_rate": 4.057448877369585e-07, "loss": 0.0638, "step": 19230 }, { "epoch": 55.768115942028984, "grad_norm": 0.4054979085922241, "learning_rate": 3.9530178596672295e-07, "loss": 0.068, "step": 19240 }, { "epoch": 55.79710144927536, "grad_norm": 0.2850719094276428, "learning_rate": 3.849943015472479e-07, "loss": 0.0561, "step": 19250 }, { "epoch": 55.82608695652174, "grad_norm": 0.4662979543209076, "learning_rate": 3.748224626588137e-07, "loss": 0.061, "step": 19260 }, { "epoch": 55.85507246376812, "grad_norm": 0.30237099528312683, "learning_rate": 3.647862971108307e-07, "loss": 0.0579, "step": 19270 }, { "epoch": 55.88405797101449, "grad_norm": 0.3592722713947296, "learning_rate": 3.5488583234179473e-07, "loss": 0.0831, "step": 19280 }, { "epoch": 55.91304347826087, "grad_norm": 0.5748406052589417, "learning_rate": 3.4512109541920413e-07, "loss": 0.0811, "step": 19290 }, { "epoch": 55.94202898550725, "grad_norm": 0.3352035582065582, "learning_rate": 3.354921130394706e-07, "loss": 0.067, "step": 19300 }, { "epoch": 55.971014492753625, "grad_norm": 0.18802602589130402, "learning_rate": 3.259989115278639e-07, "loss": 0.0723, "step": 19310 }, { "epoch": 56.0, "grad_norm": 0.27592775225639343, "learning_rate": 3.1664151683843403e-07, "loss": 0.0516, "step": 19320 }, { "epoch": 56.028985507246375, "grad_norm": 0.41913115978240967, "learning_rate": 3.074199545539447e-07, "loss": 0.0654, "step": 19330 }, { "epoch": 56.05797101449275, "grad_norm": 0.39871251583099365, "learning_rate": 2.983342498857955e-07, "loss": 0.0686, "step": 19340 }, { "epoch": 56.08695652173913, "grad_norm": 0.19229915738105774, "learning_rate": 2.893844276739499e-07, "loss": 0.0825, "step": 19350 }, { "epoch": 56.11594202898551, "grad_norm": 0.36285632848739624, "learning_rate": 2.8057051238688514e-07, "loss": 0.0572, "step": 19360 }, { "epoch": 56.14492753623188, "grad_norm": 0.4675893187522888, "learning_rate": 2.71892528121509e-07, "loss": 0.0532, "step": 19370 }, { "epoch": 56.17391304347826, "grad_norm": 0.3729880452156067, "learning_rate": 2.633504986030988e-07, "loss": 0.0656, "step": 19380 }, { "epoch": 56.20289855072464, "grad_norm": 0.28056463599205017, "learning_rate": 2.549444471852347e-07, "loss": 0.0687, "step": 19390 }, { "epoch": 56.231884057971016, "grad_norm": 0.37210413813591003, "learning_rate": 2.4667439684974423e-07, "loss": 0.0588, "step": 19400 }, { "epoch": 56.26086956521739, "grad_norm": 0.5405174493789673, "learning_rate": 2.3854037020662467e-07, "loss": 0.0753, "step": 19410 }, { "epoch": 56.289855072463766, "grad_norm": 0.44210976362228394, "learning_rate": 2.3054238949399288e-07, "loss": 0.0792, "step": 19420 }, { "epoch": 56.31884057971015, "grad_norm": 0.2712061405181885, "learning_rate": 2.2268047657802993e-07, "loss": 0.0683, "step": 19430 }, { "epoch": 56.34782608695652, "grad_norm": 0.5898895263671875, "learning_rate": 2.149546529529034e-07, "loss": 0.0746, "step": 19440 }, { "epoch": 56.3768115942029, "grad_norm": 0.3706595301628113, "learning_rate": 2.0736493974071736e-07, "loss": 0.0665, "step": 19450 }, { "epoch": 56.405797101449274, "grad_norm": 0.29560816287994385, "learning_rate": 1.9991135769145686e-07, "loss": 0.0621, "step": 19460 }, { "epoch": 56.43478260869565, "grad_norm": 0.37435662746429443, "learning_rate": 1.9259392718293245e-07, "loss": 0.059, "step": 19470 }, { "epoch": 56.46376811594203, "grad_norm": 0.3982348144054413, "learning_rate": 1.8541266822072467e-07, "loss": 0.0747, "step": 19480 }, { "epoch": 56.492753623188406, "grad_norm": 0.4578276574611664, "learning_rate": 1.7836760043811184e-07, "loss": 0.0829, "step": 19490 }, { "epoch": 56.52173913043478, "grad_norm": 0.4532429575920105, "learning_rate": 1.7145874309604792e-07, "loss": 0.0877, "step": 19500 }, { "epoch": 56.55072463768116, "grad_norm": 0.2143554836511612, "learning_rate": 1.6468611508308474e-07, "loss": 0.0706, "step": 19510 }, { "epoch": 56.57971014492754, "grad_norm": 0.4162771999835968, "learning_rate": 1.5804973491532204e-07, "loss": 0.0669, "step": 19520 }, { "epoch": 56.608695652173914, "grad_norm": 0.3558191657066345, "learning_rate": 1.5154962073637424e-07, "loss": 0.0755, "step": 19530 }, { "epoch": 56.63768115942029, "grad_norm": 0.23699668049812317, "learning_rate": 1.4518579031730372e-07, "loss": 0.0614, "step": 19540 }, { "epoch": 56.666666666666664, "grad_norm": 0.24774578213691711, "learning_rate": 1.389582610565876e-07, "loss": 0.0624, "step": 19550 }, { "epoch": 56.69565217391305, "grad_norm": 0.20402128994464874, "learning_rate": 1.3286704998003995e-07, "loss": 0.0618, "step": 19560 }, { "epoch": 56.72463768115942, "grad_norm": 0.3410924971103668, "learning_rate": 1.2691217374080632e-07, "loss": 0.085, "step": 19570 }, { "epoch": 56.7536231884058, "grad_norm": 0.43285879492759705, "learning_rate": 1.2109364861929705e-07, "loss": 0.07, "step": 19580 }, { "epoch": 56.78260869565217, "grad_norm": 0.3403910994529724, "learning_rate": 1.1541149052312628e-07, "loss": 0.0689, "step": 19590 }, { "epoch": 56.81159420289855, "grad_norm": 0.3822014331817627, "learning_rate": 1.0986571498710074e-07, "loss": 0.0658, "step": 19600 }, { "epoch": 56.84057971014493, "grad_norm": 0.4609832465648651, "learning_rate": 1.0445633717316438e-07, "loss": 0.0578, "step": 19610 }, { "epoch": 56.869565217391305, "grad_norm": 0.5328177809715271, "learning_rate": 9.918337187034277e-08, "loss": 0.0715, "step": 19620 }, { "epoch": 56.89855072463768, "grad_norm": 0.25629356503486633, "learning_rate": 9.404683349472643e-08, "loss": 0.063, "step": 19630 }, { "epoch": 56.927536231884055, "grad_norm": 0.40432241559028625, "learning_rate": 8.904673608940983e-08, "loss": 0.061, "step": 19640 }, { "epoch": 56.95652173913044, "grad_norm": 0.8808780908584595, "learning_rate": 8.418309332447471e-08, "loss": 0.0696, "step": 19650 }, { "epoch": 56.98550724637681, "grad_norm": 0.33845722675323486, "learning_rate": 7.945591849692902e-08, "loss": 0.0661, "step": 19660 }, { "epoch": 57.01449275362319, "grad_norm": 0.4411575198173523, "learning_rate": 7.486522453069578e-08, "loss": 0.0808, "step": 19670 }, { "epoch": 57.04347826086956, "grad_norm": 0.3071605861186981, "learning_rate": 7.041102397655208e-08, "loss": 0.0691, "step": 19680 }, { "epoch": 57.072463768115945, "grad_norm": 0.3181021213531494, "learning_rate": 6.609332901210685e-08, "loss": 0.0534, "step": 19690 }, { "epoch": 57.10144927536232, "grad_norm": 0.3056909441947937, "learning_rate": 6.191215144178419e-08, "loss": 0.0846, "step": 19700 }, { "epoch": 57.130434782608695, "grad_norm": 0.2235526442527771, "learning_rate": 5.786750269675678e-08, "loss": 0.0676, "step": 19710 }, { "epoch": 57.15942028985507, "grad_norm": 0.4224703013896942, "learning_rate": 5.395939383494031e-08, "loss": 0.0706, "step": 19720 }, { "epoch": 57.18840579710145, "grad_norm": 0.34424740076065063, "learning_rate": 5.018783554095463e-08, "loss": 0.0735, "step": 19730 }, { "epoch": 57.21739130434783, "grad_norm": 0.3661525249481201, "learning_rate": 4.655283812610156e-08, "loss": 0.0866, "step": 19740 }, { "epoch": 57.2463768115942, "grad_norm": 0.3965482711791992, "learning_rate": 4.305441152831491e-08, "loss": 0.0684, "step": 19750 }, { "epoch": 57.27536231884058, "grad_norm": 0.27061599493026733, "learning_rate": 3.9692565312171584e-08, "loss": 0.057, "step": 19760 }, { "epoch": 57.30434782608695, "grad_norm": 0.35860663652420044, "learning_rate": 3.6467308668824975e-08, "loss": 0.0853, "step": 19770 }, { "epoch": 57.333333333333336, "grad_norm": 0.4602019786834717, "learning_rate": 3.3378650416004964e-08, "loss": 0.0638, "step": 19780 }, { "epoch": 57.36231884057971, "grad_norm": 0.39653271436691284, "learning_rate": 3.042659899797906e-08, "loss": 0.0645, "step": 19790 }, { "epoch": 57.391304347826086, "grad_norm": 0.3290475308895111, "learning_rate": 2.76111624855524e-08, "loss": 0.0728, "step": 19800 }, { "epoch": 57.42028985507246, "grad_norm": 0.4566417634487152, "learning_rate": 2.4932348576017784e-08, "loss": 0.0629, "step": 19810 }, { "epoch": 57.44927536231884, "grad_norm": 0.45807573199272156, "learning_rate": 2.239016459314458e-08, "loss": 0.0704, "step": 19820 }, { "epoch": 57.47826086956522, "grad_norm": 0.47588860988616943, "learning_rate": 1.9984617487173174e-08, "loss": 0.0676, "step": 19830 }, { "epoch": 57.507246376811594, "grad_norm": 0.514003336429596, "learning_rate": 1.7715713834776105e-08, "loss": 0.0798, "step": 19840 }, { "epoch": 57.53623188405797, "grad_norm": 0.35065725445747375, "learning_rate": 1.5583459839046964e-08, "loss": 0.0697, "step": 19850 }, { "epoch": 57.56521739130435, "grad_norm": 0.4014118015766144, "learning_rate": 1.3587861329489304e-08, "loss": 0.0714, "step": 19860 }, { "epoch": 57.594202898550726, "grad_norm": 0.39274537563323975, "learning_rate": 1.1728923761994415e-08, "loss": 0.0758, "step": 19870 }, { "epoch": 57.6231884057971, "grad_norm": 0.4963073134422302, "learning_rate": 1.0006652218819135e-08, "loss": 0.0554, "step": 19880 }, { "epoch": 57.65217391304348, "grad_norm": 0.512388288974762, "learning_rate": 8.421051408596947e-09, "loss": 0.0675, "step": 19890 }, { "epoch": 57.68115942028985, "grad_norm": 0.4509943127632141, "learning_rate": 6.972125666299123e-09, "loss": 0.0789, "step": 19900 }, { "epoch": 57.710144927536234, "grad_norm": 0.4671258330345154, "learning_rate": 5.659878953229169e-09, "loss": 0.0728, "step": 19910 }, { "epoch": 57.73913043478261, "grad_norm": 0.34714794158935547, "learning_rate": 4.48431485701728e-09, "loss": 0.0584, "step": 19920 }, { "epoch": 57.768115942028984, "grad_norm": 0.2956154942512512, "learning_rate": 3.4454365916203322e-09, "loss": 0.0642, "step": 19930 }, { "epoch": 57.79710144927536, "grad_norm": 0.43527641892433167, "learning_rate": 2.5432469972830332e-09, "loss": 0.0948, "step": 19940 }, { "epoch": 57.82608695652174, "grad_norm": 0.50961834192276, "learning_rate": 1.7777485405601203e-09, "loss": 0.0671, "step": 19950 }, { "epoch": 57.85507246376812, "grad_norm": 0.2853828966617584, "learning_rate": 1.1489433142941597e-09, "loss": 0.0573, "step": 19960 }, { "epoch": 57.88405797101449, "grad_norm": 0.4659963846206665, "learning_rate": 6.568330376210963e-10, "loss": 0.0693, "step": 19970 }, { "epoch": 57.91304347826087, "grad_norm": 0.45528364181518555, "learning_rate": 3.0141905594249787e-10, "loss": 0.0691, "step": 19980 }, { "epoch": 57.94202898550725, "grad_norm": 0.4776972532272339, "learning_rate": 8.270234094776008e-11, "loss": 0.0694, "step": 19990 }, { "epoch": 57.971014492753625, "grad_norm": 0.5331573486328125, "learning_rate": 6.834906085551041e-13, "loss": 0.0711, "step": 20000 }, { "epoch": 57.971014492753625, "step": 20000, "total_flos": 0.0, "train_loss": 0.112607872004807, "train_runtime": 9863.6213, "train_samples_per_second": 24.332, "train_steps_per_second": 2.028 } ], "logging_steps": 10, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 58, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 12, "trial_name": null, "trial_params": null }