| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 46.3768115942029, |
| "eval_steps": 500, |
| "global_step": 16000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.028985507246376812, |
| "grad_norm": 3.0505027770996094, |
| "learning_rate": 9e-07, |
| "loss": 1.5657, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.057971014492753624, |
| "grad_norm": 2.080113410949707, |
| "learning_rate": 1.9e-06, |
| "loss": 1.5118, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "grad_norm": 2.4925858974456787, |
| "learning_rate": 2.9e-06, |
| "loss": 1.2433, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11594202898550725, |
| "grad_norm": 1.7392232418060303, |
| "learning_rate": 3.9e-06, |
| "loss": 1.3809, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.14492753623188406, |
| "grad_norm": 1.8013620376586914, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.3118, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 1.5780786275863647, |
| "learning_rate": 5.9e-06, |
| "loss": 1.1046, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2028985507246377, |
| "grad_norm": 1.2937341928482056, |
| "learning_rate": 6.900000000000001e-06, |
| "loss": 1.2428, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2318840579710145, |
| "grad_norm": 1.147234559059143, |
| "learning_rate": 7.9e-06, |
| "loss": 1.2453, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 0.7600051760673523, |
| "learning_rate": 8.9e-06, |
| "loss": 0.6767, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2898550724637681, |
| "grad_norm": 0.8683933615684509, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 1.0279, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3188405797101449, |
| "grad_norm": 0.6988456845283508, |
| "learning_rate": 1.09e-05, |
| "loss": 0.857, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.8881454467773438, |
| "learning_rate": 1.19e-05, |
| "loss": 0.8046, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.37681159420289856, |
| "grad_norm": 0.40000322461128235, |
| "learning_rate": 1.29e-05, |
| "loss": 0.774, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4057971014492754, |
| "grad_norm": 1.2142903804779053, |
| "learning_rate": 1.3900000000000002e-05, |
| "loss": 0.5516, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.9134606719017029, |
| "learning_rate": 1.49e-05, |
| "loss": 0.7689, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.463768115942029, |
| "grad_norm": 0.978635311126709, |
| "learning_rate": 1.59e-05, |
| "loss": 0.7034, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4927536231884058, |
| "grad_norm": 0.9544720649719238, |
| "learning_rate": 1.69e-05, |
| "loss": 0.5249, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 0.750441312789917, |
| "learning_rate": 1.79e-05, |
| "loss": 0.5178, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5507246376811594, |
| "grad_norm": 0.8242844939231873, |
| "learning_rate": 1.8900000000000002e-05, |
| "loss": 0.4664, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5797101449275363, |
| "grad_norm": 0.9814381003379822, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.6898, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "grad_norm": 0.7581349611282349, |
| "learning_rate": 2.09e-05, |
| "loss": 0.5289, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6376811594202898, |
| "grad_norm": 0.9911883473396301, |
| "learning_rate": 2.19e-05, |
| "loss": 0.4249, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.9439306855201721, |
| "learning_rate": 2.29e-05, |
| "loss": 0.4135, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 1.3483587503433228, |
| "learning_rate": 2.39e-05, |
| "loss": 0.3891, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7246376811594203, |
| "grad_norm": 0.819063663482666, |
| "learning_rate": 2.4900000000000002e-05, |
| "loss": 0.4549, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7536231884057971, |
| "grad_norm": 0.8383818864822388, |
| "learning_rate": 2.5900000000000003e-05, |
| "loss": 0.4251, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 0.9047835469245911, |
| "learning_rate": 2.6900000000000003e-05, |
| "loss": 0.4383, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8115942028985508, |
| "grad_norm": 0.7909944653511047, |
| "learning_rate": 2.7900000000000004e-05, |
| "loss": 0.3907, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8405797101449275, |
| "grad_norm": 0.8012731075286865, |
| "learning_rate": 2.8899999999999998e-05, |
| "loss": 0.4489, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 1.4028682708740234, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 0.3037, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8985507246376812, |
| "grad_norm": 1.488762617111206, |
| "learning_rate": 3.09e-05, |
| "loss": 0.3911, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.927536231884058, |
| "grad_norm": 0.7830433249473572, |
| "learning_rate": 3.19e-05, |
| "loss": 0.3919, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "grad_norm": 1.5407651662826538, |
| "learning_rate": 3.29e-05, |
| "loss": 0.3686, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9855072463768116, |
| "grad_norm": 0.9575673937797546, |
| "learning_rate": 3.3900000000000004e-05, |
| "loss": 0.3367, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0144927536231885, |
| "grad_norm": 1.3226127624511719, |
| "learning_rate": 3.49e-05, |
| "loss": 0.3767, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0434782608695652, |
| "grad_norm": 1.4169162511825562, |
| "learning_rate": 3.59e-05, |
| "loss": 0.3338, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0724637681159421, |
| "grad_norm": 1.7206474542617798, |
| "learning_rate": 3.69e-05, |
| "loss": 0.3345, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1014492753623188, |
| "grad_norm": 1.4332363605499268, |
| "learning_rate": 3.79e-05, |
| "loss": 0.3272, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1304347826086956, |
| "grad_norm": 1.1724469661712646, |
| "learning_rate": 3.8900000000000004e-05, |
| "loss": 0.2866, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.1594202898550725, |
| "grad_norm": 0.83205646276474, |
| "learning_rate": 3.99e-05, |
| "loss": 0.356, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1884057971014492, |
| "grad_norm": 1.1642824411392212, |
| "learning_rate": 4.09e-05, |
| "loss": 0.2858, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2173913043478262, |
| "grad_norm": 1.0703731775283813, |
| "learning_rate": 4.19e-05, |
| "loss": 0.3091, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.2463768115942029, |
| "grad_norm": 0.886146605014801, |
| "learning_rate": 4.29e-05, |
| "loss": 0.3163, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.2753623188405796, |
| "grad_norm": 0.8630309104919434, |
| "learning_rate": 4.39e-05, |
| "loss": 0.2843, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "grad_norm": 0.8399680256843567, |
| "learning_rate": 4.49e-05, |
| "loss": 0.2451, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 1.4553627967834473, |
| "learning_rate": 4.5900000000000004e-05, |
| "loss": 0.2888, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.3623188405797102, |
| "grad_norm": 1.8121979236602783, |
| "learning_rate": 4.69e-05, |
| "loss": 0.257, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.391304347826087, |
| "grad_norm": 1.165885329246521, |
| "learning_rate": 4.79e-05, |
| "loss": 0.2579, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.4202898550724639, |
| "grad_norm": 0.8950861096382141, |
| "learning_rate": 4.89e-05, |
| "loss": 0.279, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.4492753623188406, |
| "grad_norm": 0.8436807990074158, |
| "learning_rate": 4.99e-05, |
| "loss": 0.2503, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4782608695652173, |
| "grad_norm": 0.5035578012466431, |
| "learning_rate": 5.0900000000000004e-05, |
| "loss": 0.2177, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.5072463768115942, |
| "grad_norm": 1.0943514108657837, |
| "learning_rate": 5.19e-05, |
| "loss": 0.3226, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.5362318840579712, |
| "grad_norm": 0.7721551060676575, |
| "learning_rate": 5.2900000000000005e-05, |
| "loss": 0.2258, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.5652173913043477, |
| "grad_norm": 1.0129557847976685, |
| "learning_rate": 5.390000000000001e-05, |
| "loss": 0.299, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.5942028985507246, |
| "grad_norm": 1.014032006263733, |
| "learning_rate": 5.4900000000000006e-05, |
| "loss": 0.2733, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.6231884057971016, |
| "grad_norm": 1.73903489112854, |
| "learning_rate": 5.590000000000001e-05, |
| "loss": 0.2611, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.6521739130434783, |
| "grad_norm": 2.070592164993286, |
| "learning_rate": 5.69e-05, |
| "loss": 0.2514, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.681159420289855, |
| "grad_norm": 1.2096529006958008, |
| "learning_rate": 5.79e-05, |
| "loss": 0.2635, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.710144927536232, |
| "grad_norm": 0.9375045895576477, |
| "learning_rate": 5.89e-05, |
| "loss": 0.2542, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "grad_norm": 0.8468955755233765, |
| "learning_rate": 5.99e-05, |
| "loss": 0.2525, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7681159420289854, |
| "grad_norm": 0.9298123717308044, |
| "learning_rate": 6.09e-05, |
| "loss": 0.2511, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.7971014492753623, |
| "grad_norm": 0.8824529647827148, |
| "learning_rate": 6.19e-05, |
| "loss": 0.2373, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.8260869565217392, |
| "grad_norm": 0.7265031337738037, |
| "learning_rate": 6.29e-05, |
| "loss": 0.2139, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.855072463768116, |
| "grad_norm": 1.0328197479248047, |
| "learning_rate": 6.390000000000001e-05, |
| "loss": 0.2141, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.8840579710144927, |
| "grad_norm": 0.5020371079444885, |
| "learning_rate": 6.49e-05, |
| "loss": 0.2348, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.9130434782608696, |
| "grad_norm": 0.7637607455253601, |
| "learning_rate": 6.59e-05, |
| "loss": 0.2097, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.9420289855072463, |
| "grad_norm": 0.8997554779052734, |
| "learning_rate": 6.690000000000001e-05, |
| "loss": 0.2228, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.971014492753623, |
| "grad_norm": 0.7784063816070557, |
| "learning_rate": 6.790000000000001e-05, |
| "loss": 0.2412, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.9886015057563782, |
| "learning_rate": 6.89e-05, |
| "loss": 0.2263, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.028985507246377, |
| "grad_norm": 0.7861230969429016, |
| "learning_rate": 6.99e-05, |
| "loss": 0.2281, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.0579710144927534, |
| "grad_norm": 0.6980922222137451, |
| "learning_rate": 7.09e-05, |
| "loss": 0.2295, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.0869565217391304, |
| "grad_norm": 0.9516819715499878, |
| "learning_rate": 7.19e-05, |
| "loss": 0.2485, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.1159420289855073, |
| "grad_norm": 0.7088673710823059, |
| "learning_rate": 7.29e-05, |
| "loss": 0.2256, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.1449275362318843, |
| "grad_norm": 0.8767524361610413, |
| "learning_rate": 7.390000000000001e-05, |
| "loss": 0.2105, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.1739130434782608, |
| "grad_norm": 0.5966852903366089, |
| "learning_rate": 7.49e-05, |
| "loss": 0.2192, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.2028985507246377, |
| "grad_norm": 0.7955141067504883, |
| "learning_rate": 7.59e-05, |
| "loss": 0.2166, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.2318840579710146, |
| "grad_norm": 0.8298391699790955, |
| "learning_rate": 7.69e-05, |
| "loss": 0.2583, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.260869565217391, |
| "grad_norm": 1.2403712272644043, |
| "learning_rate": 7.790000000000001e-05, |
| "loss": 0.2428, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.289855072463768, |
| "grad_norm": 1.2657474279403687, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 0.2025, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.318840579710145, |
| "grad_norm": 1.0565385818481445, |
| "learning_rate": 7.99e-05, |
| "loss": 0.1949, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.3478260869565215, |
| "grad_norm": 1.0542415380477905, |
| "learning_rate": 8.090000000000001e-05, |
| "loss": 0.2692, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.3768115942028984, |
| "grad_norm": 0.6383161544799805, |
| "learning_rate": 8.19e-05, |
| "loss": 0.221, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.4057971014492754, |
| "grad_norm": 0.8400139808654785, |
| "learning_rate": 8.29e-05, |
| "loss": 0.1894, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.4347826086956523, |
| "grad_norm": 0.9511343240737915, |
| "learning_rate": 8.39e-05, |
| "loss": 0.2402, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.463768115942029, |
| "grad_norm": 1.1040838956832886, |
| "learning_rate": 8.49e-05, |
| "loss": 0.1974, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.4927536231884058, |
| "grad_norm": 0.8064889311790466, |
| "learning_rate": 8.59e-05, |
| "loss": 0.2312, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.5217391304347827, |
| "grad_norm": 0.7647086381912231, |
| "learning_rate": 8.69e-05, |
| "loss": 0.1977, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.550724637681159, |
| "grad_norm": 0.8380846977233887, |
| "learning_rate": 8.790000000000001e-05, |
| "loss": 0.2233, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.579710144927536, |
| "grad_norm": 0.48276486992836, |
| "learning_rate": 8.89e-05, |
| "loss": 0.1741, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.608695652173913, |
| "grad_norm": 0.7797939777374268, |
| "learning_rate": 8.99e-05, |
| "loss": 0.1951, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.63768115942029, |
| "grad_norm": 0.8178322911262512, |
| "learning_rate": 9.090000000000001e-05, |
| "loss": 0.2087, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.8776262998580933, |
| "learning_rate": 9.190000000000001e-05, |
| "loss": 0.1914, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.6956521739130435, |
| "grad_norm": 0.46826550364494324, |
| "learning_rate": 9.290000000000001e-05, |
| "loss": 0.1902, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.7246376811594204, |
| "grad_norm": 0.9637788534164429, |
| "learning_rate": 9.39e-05, |
| "loss": 0.2052, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.753623188405797, |
| "grad_norm": 1.1427522897720337, |
| "learning_rate": 9.49e-05, |
| "loss": 0.2212, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.782608695652174, |
| "grad_norm": 0.6853973865509033, |
| "learning_rate": 9.59e-05, |
| "loss": 0.1921, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.8115942028985508, |
| "grad_norm": 0.8581897020339966, |
| "learning_rate": 9.69e-05, |
| "loss": 0.2215, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.8405797101449277, |
| "grad_norm": 0.9988269805908203, |
| "learning_rate": 9.790000000000001e-05, |
| "loss": 0.1838, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.869565217391304, |
| "grad_norm": 0.3667157292366028, |
| "learning_rate": 9.89e-05, |
| "loss": 0.156, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.898550724637681, |
| "grad_norm": 0.8472527265548706, |
| "learning_rate": 9.99e-05, |
| "loss": 0.1936, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.927536231884058, |
| "grad_norm": 0.6419370770454407, |
| "learning_rate": 9.999994463727085e-05, |
| "loss": 0.1723, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.9565217391304346, |
| "grad_norm": 1.061924934387207, |
| "learning_rate": 9.999975326009292e-05, |
| "loss": 0.24, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.9855072463768115, |
| "grad_norm": 0.44797030091285706, |
| "learning_rate": 9.999942518549879e-05, |
| "loss": 0.1526, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.0144927536231885, |
| "grad_norm": 0.5258593559265137, |
| "learning_rate": 9.999896041438544e-05, |
| "loss": 0.2082, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.0434782608695654, |
| "grad_norm": 0.7527342438697815, |
| "learning_rate": 9.999835894802353e-05, |
| "loss": 0.166, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.072463768115942, |
| "grad_norm": 0.8045316934585571, |
| "learning_rate": 9.999762078805743e-05, |
| "loss": 0.1526, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.101449275362319, |
| "grad_norm": 0.6621928215026855, |
| "learning_rate": 9.999674593650526e-05, |
| "loss": 0.1965, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.130434782608696, |
| "grad_norm": 0.6621638536453247, |
| "learning_rate": 9.99957343957588e-05, |
| "loss": 0.1575, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.1594202898550723, |
| "grad_norm": 0.6635481715202332, |
| "learning_rate": 9.99945861685836e-05, |
| "loss": 0.1943, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.1884057971014492, |
| "grad_norm": 1.0563372373580933, |
| "learning_rate": 9.999330125811884e-05, |
| "loss": 0.1864, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.217391304347826, |
| "grad_norm": 0.7428378462791443, |
| "learning_rate": 9.999187966787744e-05, |
| "loss": 0.2003, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.246376811594203, |
| "grad_norm": 0.6000686287879944, |
| "learning_rate": 9.999032140174595e-05, |
| "loss": 0.1587, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.2753623188405796, |
| "grad_norm": 0.8239452838897705, |
| "learning_rate": 9.998862646398464e-05, |
| "loss": 0.1838, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.3043478260869565, |
| "grad_norm": 0.6900084018707275, |
| "learning_rate": 9.998679485922739e-05, |
| "loss": 0.2002, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 1.6132053136825562, |
| "learning_rate": 9.998482659248174e-05, |
| "loss": 0.2293, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.36231884057971, |
| "grad_norm": 0.6084638237953186, |
| "learning_rate": 9.998272166912883e-05, |
| "loss": 0.1645, |
| "step": 1160 |
| }, |
| { |
| "epoch": 3.391304347826087, |
| "grad_norm": 0.5943679809570312, |
| "learning_rate": 9.998048009492347e-05, |
| "loss": 0.1763, |
| "step": 1170 |
| }, |
| { |
| "epoch": 3.420289855072464, |
| "grad_norm": 0.5672821998596191, |
| "learning_rate": 9.997810187599403e-05, |
| "loss": 0.1679, |
| "step": 1180 |
| }, |
| { |
| "epoch": 3.449275362318841, |
| "grad_norm": 1.185848593711853, |
| "learning_rate": 9.997558701884249e-05, |
| "loss": 0.2152, |
| "step": 1190 |
| }, |
| { |
| "epoch": 3.4782608695652173, |
| "grad_norm": 0.7329660058021545, |
| "learning_rate": 9.997293553034433e-05, |
| "loss": 0.1943, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.5072463768115942, |
| "grad_norm": 0.6363108158111572, |
| "learning_rate": 9.997014741774866e-05, |
| "loss": 0.1579, |
| "step": 1210 |
| }, |
| { |
| "epoch": 3.536231884057971, |
| "grad_norm": 1.2481898069381714, |
| "learning_rate": 9.996722268867803e-05, |
| "loss": 0.1869, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.5652173913043477, |
| "grad_norm": 0.8098170757293701, |
| "learning_rate": 9.996416135112858e-05, |
| "loss": 0.2126, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.5942028985507246, |
| "grad_norm": 0.6532134413719177, |
| "learning_rate": 9.996096341346988e-05, |
| "loss": 0.2359, |
| "step": 1240 |
| }, |
| { |
| "epoch": 3.6231884057971016, |
| "grad_norm": 0.774456262588501, |
| "learning_rate": 9.995762888444495e-05, |
| "loss": 0.2043, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.6521739130434785, |
| "grad_norm": 0.7362341284751892, |
| "learning_rate": 9.995415777317027e-05, |
| "loss": 0.1705, |
| "step": 1260 |
| }, |
| { |
| "epoch": 3.681159420289855, |
| "grad_norm": 0.6909469366073608, |
| "learning_rate": 9.995055008913574e-05, |
| "loss": 0.1981, |
| "step": 1270 |
| }, |
| { |
| "epoch": 3.710144927536232, |
| "grad_norm": 0.5451234579086304, |
| "learning_rate": 9.994680584220463e-05, |
| "loss": 0.1705, |
| "step": 1280 |
| }, |
| { |
| "epoch": 3.7391304347826084, |
| "grad_norm": 0.7192392945289612, |
| "learning_rate": 9.994292504261355e-05, |
| "loss": 0.1707, |
| "step": 1290 |
| }, |
| { |
| "epoch": 3.7681159420289854, |
| "grad_norm": 0.5111631751060486, |
| "learning_rate": 9.993890770097247e-05, |
| "loss": 0.2049, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.7971014492753623, |
| "grad_norm": 0.5530916452407837, |
| "learning_rate": 9.993475382826467e-05, |
| "loss": 0.1931, |
| "step": 1310 |
| }, |
| { |
| "epoch": 3.8260869565217392, |
| "grad_norm": 0.4613671898841858, |
| "learning_rate": 9.993046343584664e-05, |
| "loss": 0.1553, |
| "step": 1320 |
| }, |
| { |
| "epoch": 3.855072463768116, |
| "grad_norm": 0.5719594359397888, |
| "learning_rate": 9.992603653544816e-05, |
| "loss": 0.1865, |
| "step": 1330 |
| }, |
| { |
| "epoch": 3.8840579710144927, |
| "grad_norm": 0.6633929014205933, |
| "learning_rate": 9.992147313917222e-05, |
| "loss": 0.1901, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.9130434782608696, |
| "grad_norm": 0.3168647587299347, |
| "learning_rate": 9.991677325949497e-05, |
| "loss": 0.1871, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.942028985507246, |
| "grad_norm": 0.35858315229415894, |
| "learning_rate": 9.991193690926568e-05, |
| "loss": 0.1533, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.971014492753623, |
| "grad_norm": 0.35452893376350403, |
| "learning_rate": 9.990696410170678e-05, |
| "loss": 0.1843, |
| "step": 1370 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.4836504459381104, |
| "learning_rate": 9.990185485041371e-05, |
| "loss": 0.1691, |
| "step": 1380 |
| }, |
| { |
| "epoch": 4.028985507246377, |
| "grad_norm": 0.7394298315048218, |
| "learning_rate": 9.989660916935498e-05, |
| "loss": 0.1648, |
| "step": 1390 |
| }, |
| { |
| "epoch": 4.057971014492754, |
| "grad_norm": 0.8527777791023254, |
| "learning_rate": 9.989122707287208e-05, |
| "loss": 0.1741, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.086956521739131, |
| "grad_norm": 0.6024882197380066, |
| "learning_rate": 9.988570857567945e-05, |
| "loss": 0.1863, |
| "step": 1410 |
| }, |
| { |
| "epoch": 4.115942028985507, |
| "grad_norm": 0.6260817050933838, |
| "learning_rate": 9.988005369286446e-05, |
| "loss": 0.1815, |
| "step": 1420 |
| }, |
| { |
| "epoch": 4.144927536231884, |
| "grad_norm": 0.9622341394424438, |
| "learning_rate": 9.987426243988734e-05, |
| "loss": 0.1698, |
| "step": 1430 |
| }, |
| { |
| "epoch": 4.173913043478261, |
| "grad_norm": 0.5575575232505798, |
| "learning_rate": 9.986833483258114e-05, |
| "loss": 0.1753, |
| "step": 1440 |
| }, |
| { |
| "epoch": 4.202898550724638, |
| "grad_norm": 0.24518761038780212, |
| "learning_rate": 9.986227088715173e-05, |
| "loss": 0.16, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.231884057971015, |
| "grad_norm": 0.5677102208137512, |
| "learning_rate": 9.98560706201777e-05, |
| "loss": 0.1746, |
| "step": 1460 |
| }, |
| { |
| "epoch": 4.260869565217392, |
| "grad_norm": 0.35185858607292175, |
| "learning_rate": 9.984973404861036e-05, |
| "loss": 0.152, |
| "step": 1470 |
| }, |
| { |
| "epoch": 4.2898550724637685, |
| "grad_norm": 0.5845288038253784, |
| "learning_rate": 9.984326118977361e-05, |
| "loss": 0.1458, |
| "step": 1480 |
| }, |
| { |
| "epoch": 4.318840579710145, |
| "grad_norm": 0.5872308611869812, |
| "learning_rate": 9.983665206136406e-05, |
| "loss": 0.1783, |
| "step": 1490 |
| }, |
| { |
| "epoch": 4.3478260869565215, |
| "grad_norm": 0.6161956787109375, |
| "learning_rate": 9.982990668145075e-05, |
| "loss": 0.1617, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.3768115942028984, |
| "grad_norm": 0.48462975025177, |
| "learning_rate": 9.982302506847534e-05, |
| "loss": 0.1544, |
| "step": 1510 |
| }, |
| { |
| "epoch": 4.405797101449275, |
| "grad_norm": 0.43805649876594543, |
| "learning_rate": 9.981600724125189e-05, |
| "loss": 0.1632, |
| "step": 1520 |
| }, |
| { |
| "epoch": 4.434782608695652, |
| "grad_norm": 0.6712663173675537, |
| "learning_rate": 9.980885321896685e-05, |
| "loss": 0.1681, |
| "step": 1530 |
| }, |
| { |
| "epoch": 4.463768115942029, |
| "grad_norm": 0.46296727657318115, |
| "learning_rate": 9.980156302117905e-05, |
| "loss": 0.147, |
| "step": 1540 |
| }, |
| { |
| "epoch": 4.492753623188406, |
| "grad_norm": 0.47002753615379333, |
| "learning_rate": 9.979413666781963e-05, |
| "loss": 0.1285, |
| "step": 1550 |
| }, |
| { |
| "epoch": 4.521739130434782, |
| "grad_norm": 0.508978009223938, |
| "learning_rate": 9.978657417919193e-05, |
| "loss": 0.1611, |
| "step": 1560 |
| }, |
| { |
| "epoch": 4.550724637681159, |
| "grad_norm": 0.5047881007194519, |
| "learning_rate": 9.977887557597153e-05, |
| "loss": 0.169, |
| "step": 1570 |
| }, |
| { |
| "epoch": 4.579710144927536, |
| "grad_norm": 0.5661750435829163, |
| "learning_rate": 9.97710408792061e-05, |
| "loss": 0.1745, |
| "step": 1580 |
| }, |
| { |
| "epoch": 4.608695652173913, |
| "grad_norm": 0.33027854561805725, |
| "learning_rate": 9.976307011031542e-05, |
| "loss": 0.1515, |
| "step": 1590 |
| }, |
| { |
| "epoch": 4.63768115942029, |
| "grad_norm": 0.5191190838813782, |
| "learning_rate": 9.975496329109126e-05, |
| "loss": 0.1812, |
| "step": 1600 |
| }, |
| { |
| "epoch": 4.666666666666667, |
| "grad_norm": 0.6009054183959961, |
| "learning_rate": 9.974672044369732e-05, |
| "loss": 0.154, |
| "step": 1610 |
| }, |
| { |
| "epoch": 4.695652173913043, |
| "grad_norm": 0.83514004945755, |
| "learning_rate": 9.97383415906693e-05, |
| "loss": 0.1915, |
| "step": 1620 |
| }, |
| { |
| "epoch": 4.72463768115942, |
| "grad_norm": 0.7153990864753723, |
| "learning_rate": 9.97298267549146e-05, |
| "loss": 0.151, |
| "step": 1630 |
| }, |
| { |
| "epoch": 4.753623188405797, |
| "grad_norm": 0.5760650634765625, |
| "learning_rate": 9.972117595971249e-05, |
| "loss": 0.1613, |
| "step": 1640 |
| }, |
| { |
| "epoch": 4.782608695652174, |
| "grad_norm": 0.46681898832321167, |
| "learning_rate": 9.971238922871391e-05, |
| "loss": 0.1547, |
| "step": 1650 |
| }, |
| { |
| "epoch": 4.811594202898551, |
| "grad_norm": 0.6712074875831604, |
| "learning_rate": 9.970346658594142e-05, |
| "loss": 0.1693, |
| "step": 1660 |
| }, |
| { |
| "epoch": 4.840579710144928, |
| "grad_norm": 0.41927066445350647, |
| "learning_rate": 9.969440805578923e-05, |
| "loss": 0.1537, |
| "step": 1670 |
| }, |
| { |
| "epoch": 4.869565217391305, |
| "grad_norm": 0.718482255935669, |
| "learning_rate": 9.968521366302298e-05, |
| "loss": 0.1503, |
| "step": 1680 |
| }, |
| { |
| "epoch": 4.898550724637682, |
| "grad_norm": 0.41100355982780457, |
| "learning_rate": 9.967588343277981e-05, |
| "loss": 0.131, |
| "step": 1690 |
| }, |
| { |
| "epoch": 4.927536231884058, |
| "grad_norm": 0.6164652705192566, |
| "learning_rate": 9.966641739056818e-05, |
| "loss": 0.1633, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.956521739130435, |
| "grad_norm": 0.6644942760467529, |
| "learning_rate": 9.965681556226793e-05, |
| "loss": 0.1686, |
| "step": 1710 |
| }, |
| { |
| "epoch": 4.9855072463768115, |
| "grad_norm": 0.6024698615074158, |
| "learning_rate": 9.964707797413006e-05, |
| "loss": 0.1629, |
| "step": 1720 |
| }, |
| { |
| "epoch": 5.0144927536231885, |
| "grad_norm": 0.37680429220199585, |
| "learning_rate": 9.963720465277679e-05, |
| "loss": 0.1634, |
| "step": 1730 |
| }, |
| { |
| "epoch": 5.043478260869565, |
| "grad_norm": 0.6451659798622131, |
| "learning_rate": 9.96271956252014e-05, |
| "loss": 0.1613, |
| "step": 1740 |
| }, |
| { |
| "epoch": 5.072463768115942, |
| "grad_norm": 0.28793832659721375, |
| "learning_rate": 9.961705091876816e-05, |
| "loss": 0.1589, |
| "step": 1750 |
| }, |
| { |
| "epoch": 5.101449275362318, |
| "grad_norm": 0.59237140417099, |
| "learning_rate": 9.960677056121235e-05, |
| "loss": 0.1607, |
| "step": 1760 |
| }, |
| { |
| "epoch": 5.130434782608695, |
| "grad_norm": 0.47422319650650024, |
| "learning_rate": 9.959635458064005e-05, |
| "loss": 0.1916, |
| "step": 1770 |
| }, |
| { |
| "epoch": 5.159420289855072, |
| "grad_norm": 0.681136965751648, |
| "learning_rate": 9.958580300552815e-05, |
| "loss": 0.1624, |
| "step": 1780 |
| }, |
| { |
| "epoch": 5.188405797101449, |
| "grad_norm": 0.6878365874290466, |
| "learning_rate": 9.957511586472426e-05, |
| "loss": 0.1762, |
| "step": 1790 |
| }, |
| { |
| "epoch": 5.217391304347826, |
| "grad_norm": 0.5597853064537048, |
| "learning_rate": 9.956429318744662e-05, |
| "loss": 0.1648, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.246376811594203, |
| "grad_norm": 0.5032410621643066, |
| "learning_rate": 9.955333500328404e-05, |
| "loss": 0.1482, |
| "step": 1810 |
| }, |
| { |
| "epoch": 5.27536231884058, |
| "grad_norm": 0.6717603802680969, |
| "learning_rate": 9.95422413421957e-05, |
| "loss": 0.1815, |
| "step": 1820 |
| }, |
| { |
| "epoch": 5.304347826086957, |
| "grad_norm": 0.5992377400398254, |
| "learning_rate": 9.953101223451133e-05, |
| "loss": 0.1551, |
| "step": 1830 |
| }, |
| { |
| "epoch": 5.333333333333333, |
| "grad_norm": 0.35043808817863464, |
| "learning_rate": 9.951964771093085e-05, |
| "loss": 0.1493, |
| "step": 1840 |
| }, |
| { |
| "epoch": 5.36231884057971, |
| "grad_norm": 0.49411511421203613, |
| "learning_rate": 9.950814780252442e-05, |
| "loss": 0.1561, |
| "step": 1850 |
| }, |
| { |
| "epoch": 5.391304347826087, |
| "grad_norm": 0.5951570868492126, |
| "learning_rate": 9.949651254073236e-05, |
| "loss": 0.1675, |
| "step": 1860 |
| }, |
| { |
| "epoch": 5.420289855072464, |
| "grad_norm": 0.6489980220794678, |
| "learning_rate": 9.948474195736504e-05, |
| "loss": 0.1579, |
| "step": 1870 |
| }, |
| { |
| "epoch": 5.449275362318841, |
| "grad_norm": 0.5115748047828674, |
| "learning_rate": 9.947283608460277e-05, |
| "loss": 0.1999, |
| "step": 1880 |
| }, |
| { |
| "epoch": 5.478260869565218, |
| "grad_norm": 0.4821164906024933, |
| "learning_rate": 9.946079495499577e-05, |
| "loss": 0.1695, |
| "step": 1890 |
| }, |
| { |
| "epoch": 5.507246376811594, |
| "grad_norm": 0.40529024600982666, |
| "learning_rate": 9.944861860146401e-05, |
| "loss": 0.1764, |
| "step": 1900 |
| }, |
| { |
| "epoch": 5.536231884057971, |
| "grad_norm": 0.46906864643096924, |
| "learning_rate": 9.943630705729719e-05, |
| "loss": 0.1572, |
| "step": 1910 |
| }, |
| { |
| "epoch": 5.565217391304348, |
| "grad_norm": 0.34866201877593994, |
| "learning_rate": 9.942386035615459e-05, |
| "loss": 0.1155, |
| "step": 1920 |
| }, |
| { |
| "epoch": 5.594202898550725, |
| "grad_norm": 0.6494722962379456, |
| "learning_rate": 9.941127853206503e-05, |
| "loss": 0.1588, |
| "step": 1930 |
| }, |
| { |
| "epoch": 5.6231884057971016, |
| "grad_norm": 0.4848741292953491, |
| "learning_rate": 9.939856161942673e-05, |
| "loss": 0.1489, |
| "step": 1940 |
| }, |
| { |
| "epoch": 5.6521739130434785, |
| "grad_norm": 0.5746407508850098, |
| "learning_rate": 9.938570965300724e-05, |
| "loss": 0.1503, |
| "step": 1950 |
| }, |
| { |
| "epoch": 5.681159420289855, |
| "grad_norm": 0.6178921461105347, |
| "learning_rate": 9.937272266794335e-05, |
| "loss": 0.1297, |
| "step": 1960 |
| }, |
| { |
| "epoch": 5.710144927536232, |
| "grad_norm": 0.48752641677856445, |
| "learning_rate": 9.935960069974096e-05, |
| "loss": 0.1125, |
| "step": 1970 |
| }, |
| { |
| "epoch": 5.739130434782608, |
| "grad_norm": 0.4455469846725464, |
| "learning_rate": 9.934634378427506e-05, |
| "loss": 0.1523, |
| "step": 1980 |
| }, |
| { |
| "epoch": 5.768115942028985, |
| "grad_norm": 0.8876426219940186, |
| "learning_rate": 9.933295195778954e-05, |
| "loss": 0.1284, |
| "step": 1990 |
| }, |
| { |
| "epoch": 5.797101449275362, |
| "grad_norm": 0.5639053583145142, |
| "learning_rate": 9.931942525689715e-05, |
| "loss": 0.1557, |
| "step": 2000 |
| }, |
| { |
| "epoch": 5.826086956521739, |
| "grad_norm": 0.5348621606826782, |
| "learning_rate": 9.930576371857936e-05, |
| "loss": 0.1416, |
| "step": 2010 |
| }, |
| { |
| "epoch": 5.855072463768116, |
| "grad_norm": 0.4637743830680847, |
| "learning_rate": 9.929196738018629e-05, |
| "loss": 0.1387, |
| "step": 2020 |
| }, |
| { |
| "epoch": 5.884057971014493, |
| "grad_norm": 0.7224751114845276, |
| "learning_rate": 9.927803627943662e-05, |
| "loss": 0.1483, |
| "step": 2030 |
| }, |
| { |
| "epoch": 5.913043478260869, |
| "grad_norm": 0.4575344920158386, |
| "learning_rate": 9.926397045441744e-05, |
| "loss": 0.1525, |
| "step": 2040 |
| }, |
| { |
| "epoch": 5.942028985507246, |
| "grad_norm": 0.4177353084087372, |
| "learning_rate": 9.924976994358417e-05, |
| "loss": 0.137, |
| "step": 2050 |
| }, |
| { |
| "epoch": 5.971014492753623, |
| "grad_norm": 0.5887998938560486, |
| "learning_rate": 9.923543478576048e-05, |
| "loss": 0.1799, |
| "step": 2060 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.6577372550964355, |
| "learning_rate": 9.922096502013813e-05, |
| "loss": 0.1675, |
| "step": 2070 |
| }, |
| { |
| "epoch": 6.028985507246377, |
| "grad_norm": 0.6861566305160522, |
| "learning_rate": 9.92063606862769e-05, |
| "loss": 0.143, |
| "step": 2080 |
| }, |
| { |
| "epoch": 6.057971014492754, |
| "grad_norm": 0.5720553994178772, |
| "learning_rate": 9.919162182410453e-05, |
| "loss": 0.1264, |
| "step": 2090 |
| }, |
| { |
| "epoch": 6.086956521739131, |
| "grad_norm": 0.6558146476745605, |
| "learning_rate": 9.917674847391645e-05, |
| "loss": 0.1398, |
| "step": 2100 |
| }, |
| { |
| "epoch": 6.115942028985507, |
| "grad_norm": 0.4062115252017975, |
| "learning_rate": 9.916174067637584e-05, |
| "loss": 0.1402, |
| "step": 2110 |
| }, |
| { |
| "epoch": 6.144927536231884, |
| "grad_norm": 0.5962466597557068, |
| "learning_rate": 9.914659847251348e-05, |
| "loss": 0.1459, |
| "step": 2120 |
| }, |
| { |
| "epoch": 6.173913043478261, |
| "grad_norm": 0.5116047263145447, |
| "learning_rate": 9.913132190372753e-05, |
| "loss": 0.1502, |
| "step": 2130 |
| }, |
| { |
| "epoch": 6.202898550724638, |
| "grad_norm": 0.6019411683082581, |
| "learning_rate": 9.911591101178359e-05, |
| "loss": 0.1373, |
| "step": 2140 |
| }, |
| { |
| "epoch": 6.231884057971015, |
| "grad_norm": 0.7383087873458862, |
| "learning_rate": 9.910036583881443e-05, |
| "loss": 0.1614, |
| "step": 2150 |
| }, |
| { |
| "epoch": 6.260869565217392, |
| "grad_norm": 0.6318684816360474, |
| "learning_rate": 9.908468642731995e-05, |
| "loss": 0.1571, |
| "step": 2160 |
| }, |
| { |
| "epoch": 6.2898550724637685, |
| "grad_norm": 0.4686439633369446, |
| "learning_rate": 9.906887282016707e-05, |
| "loss": 0.1431, |
| "step": 2170 |
| }, |
| { |
| "epoch": 6.318840579710145, |
| "grad_norm": 0.5213261842727661, |
| "learning_rate": 9.90529250605896e-05, |
| "loss": 0.1661, |
| "step": 2180 |
| }, |
| { |
| "epoch": 6.3478260869565215, |
| "grad_norm": 0.5317389369010925, |
| "learning_rate": 9.903684319218809e-05, |
| "loss": 0.1251, |
| "step": 2190 |
| }, |
| { |
| "epoch": 6.3768115942028984, |
| "grad_norm": 0.4725372791290283, |
| "learning_rate": 9.902062725892976e-05, |
| "loss": 0.1367, |
| "step": 2200 |
| }, |
| { |
| "epoch": 6.405797101449275, |
| "grad_norm": 0.5488022565841675, |
| "learning_rate": 9.900427730514834e-05, |
| "loss": 0.1295, |
| "step": 2210 |
| }, |
| { |
| "epoch": 6.434782608695652, |
| "grad_norm": 0.402173787355423, |
| "learning_rate": 9.8987793375544e-05, |
| "loss": 0.1478, |
| "step": 2220 |
| }, |
| { |
| "epoch": 6.463768115942029, |
| "grad_norm": 0.6250830292701721, |
| "learning_rate": 9.897117551518318e-05, |
| "loss": 0.1516, |
| "step": 2230 |
| }, |
| { |
| "epoch": 6.492753623188406, |
| "grad_norm": 0.4163563549518585, |
| "learning_rate": 9.895442376949844e-05, |
| "loss": 0.1209, |
| "step": 2240 |
| }, |
| { |
| "epoch": 6.521739130434782, |
| "grad_norm": 0.709176778793335, |
| "learning_rate": 9.893753818428845e-05, |
| "loss": 0.1412, |
| "step": 2250 |
| }, |
| { |
| "epoch": 6.550724637681159, |
| "grad_norm": 0.526637077331543, |
| "learning_rate": 9.892051880571773e-05, |
| "loss": 0.1622, |
| "step": 2260 |
| }, |
| { |
| "epoch": 6.579710144927536, |
| "grad_norm": 0.5909827351570129, |
| "learning_rate": 9.890336568031663e-05, |
| "loss": 0.156, |
| "step": 2270 |
| }, |
| { |
| "epoch": 6.608695652173913, |
| "grad_norm": 0.6670017838478088, |
| "learning_rate": 9.888607885498113e-05, |
| "loss": 0.1487, |
| "step": 2280 |
| }, |
| { |
| "epoch": 6.63768115942029, |
| "grad_norm": 0.6181092858314514, |
| "learning_rate": 9.886865837697275e-05, |
| "loss": 0.151, |
| "step": 2290 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.4304220378398895, |
| "learning_rate": 9.88511042939184e-05, |
| "loss": 0.1463, |
| "step": 2300 |
| }, |
| { |
| "epoch": 6.695652173913043, |
| "grad_norm": 0.40652596950531006, |
| "learning_rate": 9.883341665381028e-05, |
| "loss": 0.1495, |
| "step": 2310 |
| }, |
| { |
| "epoch": 6.72463768115942, |
| "grad_norm": 0.43385979533195496, |
| "learning_rate": 9.881559550500575e-05, |
| "loss": 0.1357, |
| "step": 2320 |
| }, |
| { |
| "epoch": 6.753623188405797, |
| "grad_norm": 0.4716493487358093, |
| "learning_rate": 9.879764089622712e-05, |
| "loss": 0.1589, |
| "step": 2330 |
| }, |
| { |
| "epoch": 6.782608695652174, |
| "grad_norm": 0.4198303520679474, |
| "learning_rate": 9.87795528765616e-05, |
| "loss": 0.1314, |
| "step": 2340 |
| }, |
| { |
| "epoch": 6.811594202898551, |
| "grad_norm": 0.5235840082168579, |
| "learning_rate": 9.876133149546118e-05, |
| "loss": 0.1525, |
| "step": 2350 |
| }, |
| { |
| "epoch": 6.840579710144928, |
| "grad_norm": 0.3913216292858124, |
| "learning_rate": 9.874297680274238e-05, |
| "loss": 0.1571, |
| "step": 2360 |
| }, |
| { |
| "epoch": 6.869565217391305, |
| "grad_norm": 0.38975727558135986, |
| "learning_rate": 9.872448884858624e-05, |
| "loss": 0.1561, |
| "step": 2370 |
| }, |
| { |
| "epoch": 6.898550724637682, |
| "grad_norm": 0.2768588662147522, |
| "learning_rate": 9.870586768353815e-05, |
| "loss": 0.1152, |
| "step": 2380 |
| }, |
| { |
| "epoch": 6.927536231884058, |
| "grad_norm": 0.48241758346557617, |
| "learning_rate": 9.868711335850764e-05, |
| "loss": 0.1588, |
| "step": 2390 |
| }, |
| { |
| "epoch": 6.956521739130435, |
| "grad_norm": 0.4768286347389221, |
| "learning_rate": 9.866822592476833e-05, |
| "loss": 0.1518, |
| "step": 2400 |
| }, |
| { |
| "epoch": 6.9855072463768115, |
| "grad_norm": 0.5642341375350952, |
| "learning_rate": 9.86492054339577e-05, |
| "loss": 0.1345, |
| "step": 2410 |
| }, |
| { |
| "epoch": 7.0144927536231885, |
| "grad_norm": 0.4740188717842102, |
| "learning_rate": 9.863005193807711e-05, |
| "loss": 0.1148, |
| "step": 2420 |
| }, |
| { |
| "epoch": 7.043478260869565, |
| "grad_norm": 0.3090324103832245, |
| "learning_rate": 9.861076548949143e-05, |
| "loss": 0.1197, |
| "step": 2430 |
| }, |
| { |
| "epoch": 7.072463768115942, |
| "grad_norm": 0.4523588716983795, |
| "learning_rate": 9.859134614092912e-05, |
| "loss": 0.1443, |
| "step": 2440 |
| }, |
| { |
| "epoch": 7.101449275362318, |
| "grad_norm": 0.539725124835968, |
| "learning_rate": 9.857179394548191e-05, |
| "loss": 0.1371, |
| "step": 2450 |
| }, |
| { |
| "epoch": 7.130434782608695, |
| "grad_norm": 0.5571834444999695, |
| "learning_rate": 9.855210895660477e-05, |
| "loss": 0.1456, |
| "step": 2460 |
| }, |
| { |
| "epoch": 7.159420289855072, |
| "grad_norm": 0.4227403402328491, |
| "learning_rate": 9.853229122811568e-05, |
| "loss": 0.1377, |
| "step": 2470 |
| }, |
| { |
| "epoch": 7.188405797101449, |
| "grad_norm": 0.4217086434364319, |
| "learning_rate": 9.851234081419559e-05, |
| "loss": 0.1331, |
| "step": 2480 |
| }, |
| { |
| "epoch": 7.217391304347826, |
| "grad_norm": 0.47015127539634705, |
| "learning_rate": 9.849225776938814e-05, |
| "loss": 0.1382, |
| "step": 2490 |
| }, |
| { |
| "epoch": 7.246376811594203, |
| "grad_norm": 0.6300743818283081, |
| "learning_rate": 9.847204214859964e-05, |
| "loss": 0.1437, |
| "step": 2500 |
| }, |
| { |
| "epoch": 7.27536231884058, |
| "grad_norm": 0.49502405524253845, |
| "learning_rate": 9.845169400709879e-05, |
| "loss": 0.1415, |
| "step": 2510 |
| }, |
| { |
| "epoch": 7.304347826086957, |
| "grad_norm": 0.5468514561653137, |
| "learning_rate": 9.843121340051664e-05, |
| "loss": 0.1363, |
| "step": 2520 |
| }, |
| { |
| "epoch": 7.333333333333333, |
| "grad_norm": 0.5560225248336792, |
| "learning_rate": 9.841060038484641e-05, |
| "loss": 0.14, |
| "step": 2530 |
| }, |
| { |
| "epoch": 7.36231884057971, |
| "grad_norm": 0.6520473957061768, |
| "learning_rate": 9.838985501644328e-05, |
| "loss": 0.1538, |
| "step": 2540 |
| }, |
| { |
| "epoch": 7.391304347826087, |
| "grad_norm": 0.71478271484375, |
| "learning_rate": 9.83689773520243e-05, |
| "loss": 0.1521, |
| "step": 2550 |
| }, |
| { |
| "epoch": 7.420289855072464, |
| "grad_norm": 0.41255566477775574, |
| "learning_rate": 9.834796744866819e-05, |
| "loss": 0.1469, |
| "step": 2560 |
| }, |
| { |
| "epoch": 7.449275362318841, |
| "grad_norm": 0.41565924882888794, |
| "learning_rate": 9.832682536381525e-05, |
| "loss": 0.1522, |
| "step": 2570 |
| }, |
| { |
| "epoch": 7.478260869565218, |
| "grad_norm": 0.6504526138305664, |
| "learning_rate": 9.830555115526711e-05, |
| "loss": 0.1318, |
| "step": 2580 |
| }, |
| { |
| "epoch": 7.507246376811594, |
| "grad_norm": 0.3729122281074524, |
| "learning_rate": 9.828414488118667e-05, |
| "loss": 0.108, |
| "step": 2590 |
| }, |
| { |
| "epoch": 7.536231884057971, |
| "grad_norm": 0.6625639796257019, |
| "learning_rate": 9.826260660009785e-05, |
| "loss": 0.1773, |
| "step": 2600 |
| }, |
| { |
| "epoch": 7.565217391304348, |
| "grad_norm": 1.0479519367218018, |
| "learning_rate": 9.824093637088547e-05, |
| "loss": 0.1384, |
| "step": 2610 |
| }, |
| { |
| "epoch": 7.594202898550725, |
| "grad_norm": 0.4728688597679138, |
| "learning_rate": 9.821913425279514e-05, |
| "loss": 0.144, |
| "step": 2620 |
| }, |
| { |
| "epoch": 7.6231884057971016, |
| "grad_norm": 0.5890956521034241, |
| "learning_rate": 9.8197200305433e-05, |
| "loss": 0.1556, |
| "step": 2630 |
| }, |
| { |
| "epoch": 7.6521739130434785, |
| "grad_norm": 0.5349107384681702, |
| "learning_rate": 9.817513458876564e-05, |
| "loss": 0.1333, |
| "step": 2640 |
| }, |
| { |
| "epoch": 7.681159420289855, |
| "grad_norm": 0.3802502155303955, |
| "learning_rate": 9.815293716311987e-05, |
| "loss": 0.1366, |
| "step": 2650 |
| }, |
| { |
| "epoch": 7.710144927536232, |
| "grad_norm": 0.539300262928009, |
| "learning_rate": 9.813060808918262e-05, |
| "loss": 0.1531, |
| "step": 2660 |
| }, |
| { |
| "epoch": 7.739130434782608, |
| "grad_norm": 0.45709091424942017, |
| "learning_rate": 9.810814742800069e-05, |
| "loss": 0.1543, |
| "step": 2670 |
| }, |
| { |
| "epoch": 7.768115942028985, |
| "grad_norm": 0.44815441966056824, |
| "learning_rate": 9.808555524098074e-05, |
| "loss": 0.1281, |
| "step": 2680 |
| }, |
| { |
| "epoch": 7.797101449275362, |
| "grad_norm": 0.45325276255607605, |
| "learning_rate": 9.806283158988887e-05, |
| "loss": 0.136, |
| "step": 2690 |
| }, |
| { |
| "epoch": 7.826086956521739, |
| "grad_norm": 0.41119185090065, |
| "learning_rate": 9.803997653685072e-05, |
| "loss": 0.1382, |
| "step": 2700 |
| }, |
| { |
| "epoch": 7.855072463768116, |
| "grad_norm": 0.5879584550857544, |
| "learning_rate": 9.801699014435112e-05, |
| "loss": 0.1433, |
| "step": 2710 |
| }, |
| { |
| "epoch": 7.884057971014493, |
| "grad_norm": 0.3625235855579376, |
| "learning_rate": 9.799387247523398e-05, |
| "loss": 0.127, |
| "step": 2720 |
| }, |
| { |
| "epoch": 7.913043478260869, |
| "grad_norm": 0.6583592891693115, |
| "learning_rate": 9.797062359270215e-05, |
| "loss": 0.16, |
| "step": 2730 |
| }, |
| { |
| "epoch": 7.942028985507246, |
| "grad_norm": 0.3526526689529419, |
| "learning_rate": 9.794724356031715e-05, |
| "loss": 0.1129, |
| "step": 2740 |
| }, |
| { |
| "epoch": 7.971014492753623, |
| "grad_norm": 0.4039490818977356, |
| "learning_rate": 9.792373244199913e-05, |
| "loss": 0.145, |
| "step": 2750 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.9839149117469788, |
| "learning_rate": 9.790009030202658e-05, |
| "loss": 0.1548, |
| "step": 2760 |
| }, |
| { |
| "epoch": 8.028985507246377, |
| "grad_norm": 0.5473302602767944, |
| "learning_rate": 9.78763172050362e-05, |
| "loss": 0.1357, |
| "step": 2770 |
| }, |
| { |
| "epoch": 8.057971014492754, |
| "grad_norm": 0.4842037260532379, |
| "learning_rate": 9.785241321602274e-05, |
| "loss": 0.1599, |
| "step": 2780 |
| }, |
| { |
| "epoch": 8.08695652173913, |
| "grad_norm": 0.6084038615226746, |
| "learning_rate": 9.782837840033879e-05, |
| "loss": 0.1236, |
| "step": 2790 |
| }, |
| { |
| "epoch": 8.115942028985508, |
| "grad_norm": 0.5223290324211121, |
| "learning_rate": 9.780421282369461e-05, |
| "loss": 0.1185, |
| "step": 2800 |
| }, |
| { |
| "epoch": 8.144927536231885, |
| "grad_norm": 0.49084579944610596, |
| "learning_rate": 9.777991655215797e-05, |
| "loss": 0.1335, |
| "step": 2810 |
| }, |
| { |
| "epoch": 8.173913043478262, |
| "grad_norm": 0.5133453607559204, |
| "learning_rate": 9.775548965215394e-05, |
| "loss": 0.143, |
| "step": 2820 |
| }, |
| { |
| "epoch": 8.202898550724637, |
| "grad_norm": 0.5703955292701721, |
| "learning_rate": 9.773093219046474e-05, |
| "loss": 0.1714, |
| "step": 2830 |
| }, |
| { |
| "epoch": 8.231884057971014, |
| "grad_norm": 0.3753199279308319, |
| "learning_rate": 9.770624423422954e-05, |
| "loss": 0.1514, |
| "step": 2840 |
| }, |
| { |
| "epoch": 8.26086956521739, |
| "grad_norm": 0.3518688678741455, |
| "learning_rate": 9.768142585094426e-05, |
| "loss": 0.1448, |
| "step": 2850 |
| }, |
| { |
| "epoch": 8.289855072463768, |
| "grad_norm": 0.5194658041000366, |
| "learning_rate": 9.765647710846142e-05, |
| "loss": 0.1319, |
| "step": 2860 |
| }, |
| { |
| "epoch": 8.318840579710145, |
| "grad_norm": 0.4543875455856323, |
| "learning_rate": 9.763139807498991e-05, |
| "loss": 0.1525, |
| "step": 2870 |
| }, |
| { |
| "epoch": 8.347826086956522, |
| "grad_norm": 0.5964239239692688, |
| "learning_rate": 9.760618881909487e-05, |
| "loss": 0.1428, |
| "step": 2880 |
| }, |
| { |
| "epoch": 8.376811594202898, |
| "grad_norm": 0.31862547993659973, |
| "learning_rate": 9.758084940969744e-05, |
| "loss": 0.1424, |
| "step": 2890 |
| }, |
| { |
| "epoch": 8.405797101449275, |
| "grad_norm": 0.5183411836624146, |
| "learning_rate": 9.755537991607459e-05, |
| "loss": 0.1235, |
| "step": 2900 |
| }, |
| { |
| "epoch": 8.434782608695652, |
| "grad_norm": 0.5497164130210876, |
| "learning_rate": 9.752978040785895e-05, |
| "loss": 0.1226, |
| "step": 2910 |
| }, |
| { |
| "epoch": 8.46376811594203, |
| "grad_norm": 0.5015374422073364, |
| "learning_rate": 9.750405095503859e-05, |
| "loss": 0.126, |
| "step": 2920 |
| }, |
| { |
| "epoch": 8.492753623188406, |
| "grad_norm": 0.3834163546562195, |
| "learning_rate": 9.747819162795686e-05, |
| "loss": 0.1299, |
| "step": 2930 |
| }, |
| { |
| "epoch": 8.521739130434783, |
| "grad_norm": 0.4107052981853485, |
| "learning_rate": 9.745220249731217e-05, |
| "loss": 0.1399, |
| "step": 2940 |
| }, |
| { |
| "epoch": 8.55072463768116, |
| "grad_norm": 0.6754370331764221, |
| "learning_rate": 9.742608363415781e-05, |
| "loss": 0.1369, |
| "step": 2950 |
| }, |
| { |
| "epoch": 8.579710144927537, |
| "grad_norm": 0.38062620162963867, |
| "learning_rate": 9.739983510990176e-05, |
| "loss": 0.1303, |
| "step": 2960 |
| }, |
| { |
| "epoch": 8.608695652173914, |
| "grad_norm": 0.5319868326187134, |
| "learning_rate": 9.737345699630647e-05, |
| "loss": 0.1393, |
| "step": 2970 |
| }, |
| { |
| "epoch": 8.63768115942029, |
| "grad_norm": 0.28532159328460693, |
| "learning_rate": 9.734694936548869e-05, |
| "loss": 0.1368, |
| "step": 2980 |
| }, |
| { |
| "epoch": 8.666666666666666, |
| "grad_norm": 0.6283175945281982, |
| "learning_rate": 9.732031228991932e-05, |
| "loss": 0.137, |
| "step": 2990 |
| }, |
| { |
| "epoch": 8.695652173913043, |
| "grad_norm": 0.4746125042438507, |
| "learning_rate": 9.729354584242302e-05, |
| "loss": 0.1409, |
| "step": 3000 |
| }, |
| { |
| "epoch": 8.72463768115942, |
| "grad_norm": 0.6005597114562988, |
| "learning_rate": 9.726665009617832e-05, |
| "loss": 0.1407, |
| "step": 3010 |
| }, |
| { |
| "epoch": 8.753623188405797, |
| "grad_norm": 0.4808926284313202, |
| "learning_rate": 9.723962512471714e-05, |
| "loss": 0.1552, |
| "step": 3020 |
| }, |
| { |
| "epoch": 8.782608695652174, |
| "grad_norm": 0.5887641310691833, |
| "learning_rate": 9.72124710019247e-05, |
| "loss": 0.1336, |
| "step": 3030 |
| }, |
| { |
| "epoch": 8.81159420289855, |
| "grad_norm": 0.34358280897140503, |
| "learning_rate": 9.718518780203934e-05, |
| "loss": 0.1367, |
| "step": 3040 |
| }, |
| { |
| "epoch": 8.840579710144928, |
| "grad_norm": 0.4416921138763428, |
| "learning_rate": 9.715777559965228e-05, |
| "loss": 0.1232, |
| "step": 3050 |
| }, |
| { |
| "epoch": 8.869565217391305, |
| "grad_norm": 0.6384701132774353, |
| "learning_rate": 9.713023446970746e-05, |
| "loss": 0.1429, |
| "step": 3060 |
| }, |
| { |
| "epoch": 8.898550724637682, |
| "grad_norm": 0.5382649302482605, |
| "learning_rate": 9.710256448750126e-05, |
| "loss": 0.1606, |
| "step": 3070 |
| }, |
| { |
| "epoch": 8.927536231884059, |
| "grad_norm": 0.3950713276863098, |
| "learning_rate": 9.707476572868235e-05, |
| "loss": 0.131, |
| "step": 3080 |
| }, |
| { |
| "epoch": 8.956521739130435, |
| "grad_norm": 0.38749822974205017, |
| "learning_rate": 9.704683826925149e-05, |
| "loss": 0.1158, |
| "step": 3090 |
| }, |
| { |
| "epoch": 8.985507246376812, |
| "grad_norm": 0.4517150819301605, |
| "learning_rate": 9.701878218556129e-05, |
| "loss": 0.166, |
| "step": 3100 |
| }, |
| { |
| "epoch": 9.014492753623188, |
| "grad_norm": 0.47911375761032104, |
| "learning_rate": 9.699059755431598e-05, |
| "loss": 0.1177, |
| "step": 3110 |
| }, |
| { |
| "epoch": 9.043478260869565, |
| "grad_norm": 0.2541674077510834, |
| "learning_rate": 9.696228445257132e-05, |
| "loss": 0.1254, |
| "step": 3120 |
| }, |
| { |
| "epoch": 9.072463768115941, |
| "grad_norm": 0.498009592294693, |
| "learning_rate": 9.693384295773419e-05, |
| "loss": 0.1603, |
| "step": 3130 |
| }, |
| { |
| "epoch": 9.101449275362318, |
| "grad_norm": 0.443220317363739, |
| "learning_rate": 9.690527314756259e-05, |
| "loss": 0.1382, |
| "step": 3140 |
| }, |
| { |
| "epoch": 9.130434782608695, |
| "grad_norm": 0.32711514830589294, |
| "learning_rate": 9.687657510016527e-05, |
| "loss": 0.1351, |
| "step": 3150 |
| }, |
| { |
| "epoch": 9.159420289855072, |
| "grad_norm": 0.4041106402873993, |
| "learning_rate": 9.684774889400161e-05, |
| "loss": 0.132, |
| "step": 3160 |
| }, |
| { |
| "epoch": 9.18840579710145, |
| "grad_norm": 0.3735228180885315, |
| "learning_rate": 9.681879460788135e-05, |
| "loss": 0.1204, |
| "step": 3170 |
| }, |
| { |
| "epoch": 9.217391304347826, |
| "grad_norm": 0.4736388921737671, |
| "learning_rate": 9.67897123209644e-05, |
| "loss": 0.1156, |
| "step": 3180 |
| }, |
| { |
| "epoch": 9.246376811594203, |
| "grad_norm": 0.39969536662101746, |
| "learning_rate": 9.676050211276062e-05, |
| "loss": 0.1488, |
| "step": 3190 |
| }, |
| { |
| "epoch": 9.27536231884058, |
| "grad_norm": 0.5019108057022095, |
| "learning_rate": 9.673116406312962e-05, |
| "loss": 0.1351, |
| "step": 3200 |
| }, |
| { |
| "epoch": 9.304347826086957, |
| "grad_norm": 0.45118093490600586, |
| "learning_rate": 9.67016982522805e-05, |
| "loss": 0.1263, |
| "step": 3210 |
| }, |
| { |
| "epoch": 9.333333333333334, |
| "grad_norm": 0.5472857356071472, |
| "learning_rate": 9.667210476077164e-05, |
| "loss": 0.1648, |
| "step": 3220 |
| }, |
| { |
| "epoch": 9.36231884057971, |
| "grad_norm": 0.32493582367897034, |
| "learning_rate": 9.664238366951055e-05, |
| "loss": 0.1309, |
| "step": 3230 |
| }, |
| { |
| "epoch": 9.391304347826088, |
| "grad_norm": 0.7096918821334839, |
| "learning_rate": 9.661253505975355e-05, |
| "loss": 0.1383, |
| "step": 3240 |
| }, |
| { |
| "epoch": 9.420289855072463, |
| "grad_norm": 0.5345839858055115, |
| "learning_rate": 9.658255901310557e-05, |
| "loss": 0.1198, |
| "step": 3250 |
| }, |
| { |
| "epoch": 9.44927536231884, |
| "grad_norm": 0.5087151527404785, |
| "learning_rate": 9.655245561152e-05, |
| "loss": 0.1199, |
| "step": 3260 |
| }, |
| { |
| "epoch": 9.478260869565217, |
| "grad_norm": 0.2939687967300415, |
| "learning_rate": 9.65222249372984e-05, |
| "loss": 0.1342, |
| "step": 3270 |
| }, |
| { |
| "epoch": 9.507246376811594, |
| "grad_norm": 0.3696477711200714, |
| "learning_rate": 9.649186707309026e-05, |
| "loss": 0.1361, |
| "step": 3280 |
| }, |
| { |
| "epoch": 9.53623188405797, |
| "grad_norm": 0.4263698160648346, |
| "learning_rate": 9.646138210189283e-05, |
| "loss": 0.1453, |
| "step": 3290 |
| }, |
| { |
| "epoch": 9.565217391304348, |
| "grad_norm": 0.40898415446281433, |
| "learning_rate": 9.643077010705087e-05, |
| "loss": 0.112, |
| "step": 3300 |
| }, |
| { |
| "epoch": 9.594202898550725, |
| "grad_norm": 0.37168997526168823, |
| "learning_rate": 9.640003117225637e-05, |
| "loss": 0.1338, |
| "step": 3310 |
| }, |
| { |
| "epoch": 9.623188405797102, |
| "grad_norm": 0.4604577124118805, |
| "learning_rate": 9.636916538154846e-05, |
| "loss": 0.1511, |
| "step": 3320 |
| }, |
| { |
| "epoch": 9.652173913043478, |
| "grad_norm": 0.5092346668243408, |
| "learning_rate": 9.633817281931296e-05, |
| "loss": 0.1197, |
| "step": 3330 |
| }, |
| { |
| "epoch": 9.681159420289855, |
| "grad_norm": 0.43370747566223145, |
| "learning_rate": 9.630705357028242e-05, |
| "loss": 0.144, |
| "step": 3340 |
| }, |
| { |
| "epoch": 9.710144927536232, |
| "grad_norm": 0.4658154249191284, |
| "learning_rate": 9.627580771953563e-05, |
| "loss": 0.1453, |
| "step": 3350 |
| }, |
| { |
| "epoch": 9.73913043478261, |
| "grad_norm": 0.4420405924320221, |
| "learning_rate": 9.624443535249759e-05, |
| "loss": 0.1331, |
| "step": 3360 |
| }, |
| { |
| "epoch": 9.768115942028986, |
| "grad_norm": 0.4711594879627228, |
| "learning_rate": 9.621293655493913e-05, |
| "loss": 0.1204, |
| "step": 3370 |
| }, |
| { |
| "epoch": 9.797101449275363, |
| "grad_norm": 0.2817968428134918, |
| "learning_rate": 9.618131141297675e-05, |
| "loss": 0.1309, |
| "step": 3380 |
| }, |
| { |
| "epoch": 9.826086956521738, |
| "grad_norm": 0.3537946343421936, |
| "learning_rate": 9.614956001307242e-05, |
| "loss": 0.1464, |
| "step": 3390 |
| }, |
| { |
| "epoch": 9.855072463768115, |
| "grad_norm": 0.30007612705230713, |
| "learning_rate": 9.611768244203321e-05, |
| "loss": 0.1186, |
| "step": 3400 |
| }, |
| { |
| "epoch": 9.884057971014492, |
| "grad_norm": 0.41064971685409546, |
| "learning_rate": 9.60856787870112e-05, |
| "loss": 0.1263, |
| "step": 3410 |
| }, |
| { |
| "epoch": 9.91304347826087, |
| "grad_norm": 0.4655996263027191, |
| "learning_rate": 9.605354913550318e-05, |
| "loss": 0.1514, |
| "step": 3420 |
| }, |
| { |
| "epoch": 9.942028985507246, |
| "grad_norm": 0.5630468726158142, |
| "learning_rate": 9.602129357535037e-05, |
| "loss": 0.1315, |
| "step": 3430 |
| }, |
| { |
| "epoch": 9.971014492753623, |
| "grad_norm": 0.7113257646560669, |
| "learning_rate": 9.598891219473825e-05, |
| "loss": 0.1179, |
| "step": 3440 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.7581853866577148, |
| "learning_rate": 9.595640508219625e-05, |
| "loss": 0.1434, |
| "step": 3450 |
| }, |
| { |
| "epoch": 10.028985507246377, |
| "grad_norm": 0.6476505994796753, |
| "learning_rate": 9.592377232659761e-05, |
| "loss": 0.1276, |
| "step": 3460 |
| }, |
| { |
| "epoch": 10.057971014492754, |
| "grad_norm": 0.4075034260749817, |
| "learning_rate": 9.589101401715904e-05, |
| "loss": 0.142, |
| "step": 3470 |
| }, |
| { |
| "epoch": 10.08695652173913, |
| "grad_norm": 0.7294759154319763, |
| "learning_rate": 9.585813024344045e-05, |
| "loss": 0.1464, |
| "step": 3480 |
| }, |
| { |
| "epoch": 10.115942028985508, |
| "grad_norm": 0.3397752642631531, |
| "learning_rate": 9.58251210953449e-05, |
| "loss": 0.1374, |
| "step": 3490 |
| }, |
| { |
| "epoch": 10.144927536231885, |
| "grad_norm": 0.4181293547153473, |
| "learning_rate": 9.579198666311809e-05, |
| "loss": 0.1442, |
| "step": 3500 |
| }, |
| { |
| "epoch": 10.173913043478262, |
| "grad_norm": 0.45683369040489197, |
| "learning_rate": 9.575872703734832e-05, |
| "loss": 0.142, |
| "step": 3510 |
| }, |
| { |
| "epoch": 10.202898550724637, |
| "grad_norm": 0.37618064880371094, |
| "learning_rate": 9.572534230896611e-05, |
| "loss": 0.1256, |
| "step": 3520 |
| }, |
| { |
| "epoch": 10.231884057971014, |
| "grad_norm": 0.581132709980011, |
| "learning_rate": 9.569183256924403e-05, |
| "loss": 0.1547, |
| "step": 3530 |
| }, |
| { |
| "epoch": 10.26086956521739, |
| "grad_norm": 0.35314807295799255, |
| "learning_rate": 9.565819790979646e-05, |
| "loss": 0.119, |
| "step": 3540 |
| }, |
| { |
| "epoch": 10.289855072463768, |
| "grad_norm": 0.43084269762039185, |
| "learning_rate": 9.562443842257925e-05, |
| "loss": 0.1155, |
| "step": 3550 |
| }, |
| { |
| "epoch": 10.318840579710145, |
| "grad_norm": 0.37022560834884644, |
| "learning_rate": 9.559055419988956e-05, |
| "loss": 0.1609, |
| "step": 3560 |
| }, |
| { |
| "epoch": 10.347826086956522, |
| "grad_norm": 0.2883586883544922, |
| "learning_rate": 9.555654533436557e-05, |
| "loss": 0.1052, |
| "step": 3570 |
| }, |
| { |
| "epoch": 10.376811594202898, |
| "grad_norm": 0.5148602724075317, |
| "learning_rate": 9.552241191898621e-05, |
| "loss": 0.1423, |
| "step": 3580 |
| }, |
| { |
| "epoch": 10.405797101449275, |
| "grad_norm": 0.4749770164489746, |
| "learning_rate": 9.548815404707092e-05, |
| "loss": 0.1194, |
| "step": 3590 |
| }, |
| { |
| "epoch": 10.434782608695652, |
| "grad_norm": 0.4021095335483551, |
| "learning_rate": 9.545377181227942e-05, |
| "loss": 0.124, |
| "step": 3600 |
| }, |
| { |
| "epoch": 10.46376811594203, |
| "grad_norm": 0.30841973423957825, |
| "learning_rate": 9.541926530861145e-05, |
| "loss": 0.1195, |
| "step": 3610 |
| }, |
| { |
| "epoch": 10.492753623188406, |
| "grad_norm": 0.3576466739177704, |
| "learning_rate": 9.538463463040645e-05, |
| "loss": 0.1169, |
| "step": 3620 |
| }, |
| { |
| "epoch": 10.521739130434783, |
| "grad_norm": 0.5112766027450562, |
| "learning_rate": 9.534987987234337e-05, |
| "loss": 0.1283, |
| "step": 3630 |
| }, |
| { |
| "epoch": 10.55072463768116, |
| "grad_norm": 0.27624791860580444, |
| "learning_rate": 9.53150011294404e-05, |
| "loss": 0.1042, |
| "step": 3640 |
| }, |
| { |
| "epoch": 10.579710144927537, |
| "grad_norm": 0.4616936445236206, |
| "learning_rate": 9.527999849705471e-05, |
| "loss": 0.1214, |
| "step": 3650 |
| }, |
| { |
| "epoch": 10.608695652173914, |
| "grad_norm": 0.2872353792190552, |
| "learning_rate": 9.524487207088213e-05, |
| "loss": 0.1272, |
| "step": 3660 |
| }, |
| { |
| "epoch": 10.63768115942029, |
| "grad_norm": 0.3924836218357086, |
| "learning_rate": 9.520962194695698e-05, |
| "loss": 0.1152, |
| "step": 3670 |
| }, |
| { |
| "epoch": 10.666666666666666, |
| "grad_norm": 0.5018351078033447, |
| "learning_rate": 9.517424822165175e-05, |
| "loss": 0.1425, |
| "step": 3680 |
| }, |
| { |
| "epoch": 10.695652173913043, |
| "grad_norm": 0.4114161431789398, |
| "learning_rate": 9.513875099167685e-05, |
| "loss": 0.1287, |
| "step": 3690 |
| }, |
| { |
| "epoch": 10.72463768115942, |
| "grad_norm": 0.8867626190185547, |
| "learning_rate": 9.510313035408035e-05, |
| "loss": 0.1243, |
| "step": 3700 |
| }, |
| { |
| "epoch": 10.753623188405797, |
| "grad_norm": 0.48639723658561707, |
| "learning_rate": 9.506738640624775e-05, |
| "loss": 0.1244, |
| "step": 3710 |
| }, |
| { |
| "epoch": 10.782608695652174, |
| "grad_norm": 0.5300337672233582, |
| "learning_rate": 9.50315192459016e-05, |
| "loss": 0.1339, |
| "step": 3720 |
| }, |
| { |
| "epoch": 10.81159420289855, |
| "grad_norm": 0.4277614951133728, |
| "learning_rate": 9.499552897110136e-05, |
| "loss": 0.148, |
| "step": 3730 |
| }, |
| { |
| "epoch": 10.840579710144928, |
| "grad_norm": 0.41532713174819946, |
| "learning_rate": 9.495941568024304e-05, |
| "loss": 0.1276, |
| "step": 3740 |
| }, |
| { |
| "epoch": 10.869565217391305, |
| "grad_norm": 0.37189435958862305, |
| "learning_rate": 9.492317947205904e-05, |
| "loss": 0.1215, |
| "step": 3750 |
| }, |
| { |
| "epoch": 10.898550724637682, |
| "grad_norm": 0.4247940182685852, |
| "learning_rate": 9.488682044561775e-05, |
| "loss": 0.1248, |
| "step": 3760 |
| }, |
| { |
| "epoch": 10.927536231884059, |
| "grad_norm": 0.4739855229854584, |
| "learning_rate": 9.485033870032335e-05, |
| "loss": 0.1156, |
| "step": 3770 |
| }, |
| { |
| "epoch": 10.956521739130435, |
| "grad_norm": 0.275510311126709, |
| "learning_rate": 9.481373433591556e-05, |
| "loss": 0.129, |
| "step": 3780 |
| }, |
| { |
| "epoch": 10.985507246376812, |
| "grad_norm": 0.4555635154247284, |
| "learning_rate": 9.47770074524693e-05, |
| "loss": 0.1377, |
| "step": 3790 |
| }, |
| { |
| "epoch": 11.014492753623188, |
| "grad_norm": 0.4588840901851654, |
| "learning_rate": 9.474015815039446e-05, |
| "loss": 0.1352, |
| "step": 3800 |
| }, |
| { |
| "epoch": 11.043478260869565, |
| "grad_norm": 0.27891016006469727, |
| "learning_rate": 9.470318653043565e-05, |
| "loss": 0.1242, |
| "step": 3810 |
| }, |
| { |
| "epoch": 11.072463768115941, |
| "grad_norm": 0.34980854392051697, |
| "learning_rate": 9.466609269367185e-05, |
| "loss": 0.1303, |
| "step": 3820 |
| }, |
| { |
| "epoch": 11.101449275362318, |
| "grad_norm": 0.4605090022087097, |
| "learning_rate": 9.46288767415162e-05, |
| "loss": 0.1186, |
| "step": 3830 |
| }, |
| { |
| "epoch": 11.130434782608695, |
| "grad_norm": 0.2761806845664978, |
| "learning_rate": 9.459153877571567e-05, |
| "loss": 0.1285, |
| "step": 3840 |
| }, |
| { |
| "epoch": 11.159420289855072, |
| "grad_norm": 0.4459534287452698, |
| "learning_rate": 9.455407889835087e-05, |
| "loss": 0.1129, |
| "step": 3850 |
| }, |
| { |
| "epoch": 11.18840579710145, |
| "grad_norm": 0.40482795238494873, |
| "learning_rate": 9.451649721183564e-05, |
| "loss": 0.1553, |
| "step": 3860 |
| }, |
| { |
| "epoch": 11.217391304347826, |
| "grad_norm": 0.596967875957489, |
| "learning_rate": 9.447879381891692e-05, |
| "loss": 0.1389, |
| "step": 3870 |
| }, |
| { |
| "epoch": 11.246376811594203, |
| "grad_norm": 0.4592018127441406, |
| "learning_rate": 9.444096882267428e-05, |
| "loss": 0.1375, |
| "step": 3880 |
| }, |
| { |
| "epoch": 11.27536231884058, |
| "grad_norm": 0.4663671851158142, |
| "learning_rate": 9.440302232651988e-05, |
| "loss": 0.1164, |
| "step": 3890 |
| }, |
| { |
| "epoch": 11.304347826086957, |
| "grad_norm": 0.42845603823661804, |
| "learning_rate": 9.436495443419795e-05, |
| "loss": 0.1206, |
| "step": 3900 |
| }, |
| { |
| "epoch": 11.333333333333334, |
| "grad_norm": 0.39661505818367004, |
| "learning_rate": 9.432676524978466e-05, |
| "loss": 0.1007, |
| "step": 3910 |
| }, |
| { |
| "epoch": 11.36231884057971, |
| "grad_norm": 0.3809431195259094, |
| "learning_rate": 9.42884548776878e-05, |
| "loss": 0.147, |
| "step": 3920 |
| }, |
| { |
| "epoch": 11.391304347826088, |
| "grad_norm": 0.3601577877998352, |
| "learning_rate": 9.425002342264646e-05, |
| "loss": 0.1223, |
| "step": 3930 |
| }, |
| { |
| "epoch": 11.420289855072463, |
| "grad_norm": 0.4095447063446045, |
| "learning_rate": 9.421147098973077e-05, |
| "loss": 0.1101, |
| "step": 3940 |
| }, |
| { |
| "epoch": 11.44927536231884, |
| "grad_norm": 0.43890243768692017, |
| "learning_rate": 9.41727976843416e-05, |
| "loss": 0.1257, |
| "step": 3950 |
| }, |
| { |
| "epoch": 11.478260869565217, |
| "grad_norm": 0.31772735714912415, |
| "learning_rate": 9.413400361221029e-05, |
| "loss": 0.1126, |
| "step": 3960 |
| }, |
| { |
| "epoch": 11.507246376811594, |
| "grad_norm": 0.3342031240463257, |
| "learning_rate": 9.409508887939835e-05, |
| "loss": 0.1275, |
| "step": 3970 |
| }, |
| { |
| "epoch": 11.53623188405797, |
| "grad_norm": 0.3726749122142792, |
| "learning_rate": 9.40560535922972e-05, |
| "loss": 0.1108, |
| "step": 3980 |
| }, |
| { |
| "epoch": 11.565217391304348, |
| "grad_norm": 0.4039180278778076, |
| "learning_rate": 9.40168978576278e-05, |
| "loss": 0.1288, |
| "step": 3990 |
| }, |
| { |
| "epoch": 11.594202898550725, |
| "grad_norm": 0.4435559809207916, |
| "learning_rate": 9.397762178244043e-05, |
| "loss": 0.1298, |
| "step": 4000 |
| }, |
| { |
| "epoch": 11.623188405797102, |
| "grad_norm": 0.48986756801605225, |
| "learning_rate": 9.393822547411439e-05, |
| "loss": 0.1584, |
| "step": 4010 |
| }, |
| { |
| "epoch": 11.652173913043478, |
| "grad_norm": 0.33243680000305176, |
| "learning_rate": 9.389870904035769e-05, |
| "loss": 0.1322, |
| "step": 4020 |
| }, |
| { |
| "epoch": 11.681159420289855, |
| "grad_norm": 0.27870336174964905, |
| "learning_rate": 9.385907258920672e-05, |
| "loss": 0.1187, |
| "step": 4030 |
| }, |
| { |
| "epoch": 11.710144927536232, |
| "grad_norm": 0.4363289773464203, |
| "learning_rate": 9.381931622902607e-05, |
| "loss": 0.1322, |
| "step": 4040 |
| }, |
| { |
| "epoch": 11.73913043478261, |
| "grad_norm": 0.39369621872901917, |
| "learning_rate": 9.377944006850807e-05, |
| "loss": 0.1221, |
| "step": 4050 |
| }, |
| { |
| "epoch": 11.768115942028986, |
| "grad_norm": 0.4057519733905792, |
| "learning_rate": 9.373944421667265e-05, |
| "loss": 0.1439, |
| "step": 4060 |
| }, |
| { |
| "epoch": 11.797101449275363, |
| "grad_norm": 0.4745919406414032, |
| "learning_rate": 9.369932878286691e-05, |
| "loss": 0.1367, |
| "step": 4070 |
| }, |
| { |
| "epoch": 11.826086956521738, |
| "grad_norm": 0.5527012944221497, |
| "learning_rate": 9.365909387676494e-05, |
| "loss": 0.1388, |
| "step": 4080 |
| }, |
| { |
| "epoch": 11.855072463768115, |
| "grad_norm": 0.4839910566806793, |
| "learning_rate": 9.361873960836744e-05, |
| "loss": 0.1204, |
| "step": 4090 |
| }, |
| { |
| "epoch": 11.884057971014492, |
| "grad_norm": 0.4102983772754669, |
| "learning_rate": 9.357826608800142e-05, |
| "loss": 0.1202, |
| "step": 4100 |
| }, |
| { |
| "epoch": 11.91304347826087, |
| "grad_norm": 0.382380872964859, |
| "learning_rate": 9.353767342631994e-05, |
| "loss": 0.1247, |
| "step": 4110 |
| }, |
| { |
| "epoch": 11.942028985507246, |
| "grad_norm": 0.384352445602417, |
| "learning_rate": 9.34969617343018e-05, |
| "loss": 0.1364, |
| "step": 4120 |
| }, |
| { |
| "epoch": 11.971014492753623, |
| "grad_norm": 0.46882691979408264, |
| "learning_rate": 9.345613112325122e-05, |
| "loss": 0.1298, |
| "step": 4130 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.986838698387146, |
| "learning_rate": 9.34151817047975e-05, |
| "loss": 0.1259, |
| "step": 4140 |
| }, |
| { |
| "epoch": 12.028985507246377, |
| "grad_norm": 0.41458386182785034, |
| "learning_rate": 9.33741135908948e-05, |
| "loss": 0.1174, |
| "step": 4150 |
| }, |
| { |
| "epoch": 12.057971014492754, |
| "grad_norm": 0.5669786930084229, |
| "learning_rate": 9.33329268938218e-05, |
| "loss": 0.1255, |
| "step": 4160 |
| }, |
| { |
| "epoch": 12.08695652173913, |
| "grad_norm": 0.418151319026947, |
| "learning_rate": 9.329162172618132e-05, |
| "loss": 0.1223, |
| "step": 4170 |
| }, |
| { |
| "epoch": 12.115942028985508, |
| "grad_norm": 0.4376254677772522, |
| "learning_rate": 9.325019820090013e-05, |
| "loss": 0.1284, |
| "step": 4180 |
| }, |
| { |
| "epoch": 12.144927536231885, |
| "grad_norm": 0.5084844827651978, |
| "learning_rate": 9.320865643122855e-05, |
| "loss": 0.1225, |
| "step": 4190 |
| }, |
| { |
| "epoch": 12.173913043478262, |
| "grad_norm": 0.3965758979320526, |
| "learning_rate": 9.316699653074023e-05, |
| "loss": 0.1332, |
| "step": 4200 |
| }, |
| { |
| "epoch": 12.202898550724637, |
| "grad_norm": 0.39621663093566895, |
| "learning_rate": 9.312521861333172e-05, |
| "loss": 0.1141, |
| "step": 4210 |
| }, |
| { |
| "epoch": 12.231884057971014, |
| "grad_norm": 0.4206016957759857, |
| "learning_rate": 9.308332279322224e-05, |
| "loss": 0.1282, |
| "step": 4220 |
| }, |
| { |
| "epoch": 12.26086956521739, |
| "grad_norm": 0.34612345695495605, |
| "learning_rate": 9.304130918495338e-05, |
| "loss": 0.1067, |
| "step": 4230 |
| }, |
| { |
| "epoch": 12.289855072463768, |
| "grad_norm": 0.5288470387458801, |
| "learning_rate": 9.299917790338874e-05, |
| "loss": 0.1364, |
| "step": 4240 |
| }, |
| { |
| "epoch": 12.318840579710145, |
| "grad_norm": 0.49290069937705994, |
| "learning_rate": 9.295692906371363e-05, |
| "loss": 0.1348, |
| "step": 4250 |
| }, |
| { |
| "epoch": 12.347826086956522, |
| "grad_norm": 0.2043665647506714, |
| "learning_rate": 9.291456278143476e-05, |
| "loss": 0.1127, |
| "step": 4260 |
| }, |
| { |
| "epoch": 12.376811594202898, |
| "grad_norm": 0.6402058005332947, |
| "learning_rate": 9.287207917237994e-05, |
| "loss": 0.1295, |
| "step": 4270 |
| }, |
| { |
| "epoch": 12.405797101449275, |
| "grad_norm": 0.29695194959640503, |
| "learning_rate": 9.282947835269773e-05, |
| "loss": 0.102, |
| "step": 4280 |
| }, |
| { |
| "epoch": 12.434782608695652, |
| "grad_norm": 0.3424241244792938, |
| "learning_rate": 9.278676043885715e-05, |
| "loss": 0.1275, |
| "step": 4290 |
| }, |
| { |
| "epoch": 12.46376811594203, |
| "grad_norm": 0.4913289546966553, |
| "learning_rate": 9.274392554764733e-05, |
| "loss": 0.1413, |
| "step": 4300 |
| }, |
| { |
| "epoch": 12.492753623188406, |
| "grad_norm": 0.3618018627166748, |
| "learning_rate": 9.270097379617723e-05, |
| "loss": 0.1103, |
| "step": 4310 |
| }, |
| { |
| "epoch": 12.521739130434783, |
| "grad_norm": 0.42373889684677124, |
| "learning_rate": 9.26579053018753e-05, |
| "loss": 0.1198, |
| "step": 4320 |
| }, |
| { |
| "epoch": 12.55072463768116, |
| "grad_norm": 0.3397703170776367, |
| "learning_rate": 9.261472018248918e-05, |
| "loss": 0.1132, |
| "step": 4330 |
| }, |
| { |
| "epoch": 12.579710144927537, |
| "grad_norm": 0.4344271719455719, |
| "learning_rate": 9.25714185560853e-05, |
| "loss": 0.1173, |
| "step": 4340 |
| }, |
| { |
| "epoch": 12.608695652173914, |
| "grad_norm": 0.4063388705253601, |
| "learning_rate": 9.252800054104868e-05, |
| "loss": 0.108, |
| "step": 4350 |
| }, |
| { |
| "epoch": 12.63768115942029, |
| "grad_norm": 0.3664158880710602, |
| "learning_rate": 9.248446625608252e-05, |
| "loss": 0.1152, |
| "step": 4360 |
| }, |
| { |
| "epoch": 12.666666666666666, |
| "grad_norm": 0.35261791944503784, |
| "learning_rate": 9.244081582020789e-05, |
| "loss": 0.1277, |
| "step": 4370 |
| }, |
| { |
| "epoch": 12.695652173913043, |
| "grad_norm": 0.4147641062736511, |
| "learning_rate": 9.239704935276339e-05, |
| "loss": 0.1108, |
| "step": 4380 |
| }, |
| { |
| "epoch": 12.72463768115942, |
| "grad_norm": 0.5231832265853882, |
| "learning_rate": 9.235316697340489e-05, |
| "loss": 0.1287, |
| "step": 4390 |
| }, |
| { |
| "epoch": 12.753623188405797, |
| "grad_norm": 0.40551823377609253, |
| "learning_rate": 9.230916880210512e-05, |
| "loss": 0.1171, |
| "step": 4400 |
| }, |
| { |
| "epoch": 12.782608695652174, |
| "grad_norm": 0.27819085121154785, |
| "learning_rate": 9.226505495915342e-05, |
| "loss": 0.1384, |
| "step": 4410 |
| }, |
| { |
| "epoch": 12.81159420289855, |
| "grad_norm": 0.40564286708831787, |
| "learning_rate": 9.222082556515536e-05, |
| "loss": 0.1157, |
| "step": 4420 |
| }, |
| { |
| "epoch": 12.840579710144928, |
| "grad_norm": 0.4431588351726532, |
| "learning_rate": 9.217648074103242e-05, |
| "loss": 0.1224, |
| "step": 4430 |
| }, |
| { |
| "epoch": 12.869565217391305, |
| "grad_norm": 0.34970754384994507, |
| "learning_rate": 9.213202060802161e-05, |
| "loss": 0.1189, |
| "step": 4440 |
| }, |
| { |
| "epoch": 12.898550724637682, |
| "grad_norm": 0.29916661977767944, |
| "learning_rate": 9.208744528767528e-05, |
| "loss": 0.1139, |
| "step": 4450 |
| }, |
| { |
| "epoch": 12.927536231884059, |
| "grad_norm": 0.3757326304912567, |
| "learning_rate": 9.204275490186064e-05, |
| "loss": 0.1073, |
| "step": 4460 |
| }, |
| { |
| "epoch": 12.956521739130435, |
| "grad_norm": 0.43750470876693726, |
| "learning_rate": 9.199794957275949e-05, |
| "loss": 0.1354, |
| "step": 4470 |
| }, |
| { |
| "epoch": 12.985507246376812, |
| "grad_norm": 0.3462923467159271, |
| "learning_rate": 9.19530294228679e-05, |
| "loss": 0.109, |
| "step": 4480 |
| }, |
| { |
| "epoch": 13.014492753623188, |
| "grad_norm": 0.23552751541137695, |
| "learning_rate": 9.190799457499583e-05, |
| "loss": 0.1315, |
| "step": 4490 |
| }, |
| { |
| "epoch": 13.043478260869565, |
| "grad_norm": 0.44175973534584045, |
| "learning_rate": 9.186284515226686e-05, |
| "loss": 0.1313, |
| "step": 4500 |
| }, |
| { |
| "epoch": 13.072463768115941, |
| "grad_norm": 0.43847179412841797, |
| "learning_rate": 9.181758127811777e-05, |
| "loss": 0.1329, |
| "step": 4510 |
| }, |
| { |
| "epoch": 13.101449275362318, |
| "grad_norm": 0.31816014647483826, |
| "learning_rate": 9.177220307629825e-05, |
| "loss": 0.1265, |
| "step": 4520 |
| }, |
| { |
| "epoch": 13.130434782608695, |
| "grad_norm": 0.4455469846725464, |
| "learning_rate": 9.172671067087059e-05, |
| "loss": 0.1069, |
| "step": 4530 |
| }, |
| { |
| "epoch": 13.159420289855072, |
| "grad_norm": 0.2768830358982086, |
| "learning_rate": 9.16811041862093e-05, |
| "loss": 0.1166, |
| "step": 4540 |
| }, |
| { |
| "epoch": 13.18840579710145, |
| "grad_norm": 0.39586612582206726, |
| "learning_rate": 9.163538374700076e-05, |
| "loss": 0.1239, |
| "step": 4550 |
| }, |
| { |
| "epoch": 13.217391304347826, |
| "grad_norm": 0.6842658519744873, |
| "learning_rate": 9.158954947824287e-05, |
| "loss": 0.1196, |
| "step": 4560 |
| }, |
| { |
| "epoch": 13.246376811594203, |
| "grad_norm": 0.3051077127456665, |
| "learning_rate": 9.154360150524482e-05, |
| "loss": 0.1277, |
| "step": 4570 |
| }, |
| { |
| "epoch": 13.27536231884058, |
| "grad_norm": 0.32419049739837646, |
| "learning_rate": 9.14975399536266e-05, |
| "loss": 0.1328, |
| "step": 4580 |
| }, |
| { |
| "epoch": 13.304347826086957, |
| "grad_norm": 0.49009594321250916, |
| "learning_rate": 9.14513649493187e-05, |
| "loss": 0.11, |
| "step": 4590 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 0.41023188829421997, |
| "learning_rate": 9.140507661856187e-05, |
| "loss": 0.1204, |
| "step": 4600 |
| }, |
| { |
| "epoch": 13.36231884057971, |
| "grad_norm": 0.27681684494018555, |
| "learning_rate": 9.135867508790661e-05, |
| "loss": 0.127, |
| "step": 4610 |
| }, |
| { |
| "epoch": 13.391304347826088, |
| "grad_norm": 0.33429259061813354, |
| "learning_rate": 9.131216048421291e-05, |
| "loss": 0.1056, |
| "step": 4620 |
| }, |
| { |
| "epoch": 13.420289855072463, |
| "grad_norm": 0.3825032114982605, |
| "learning_rate": 9.126553293464998e-05, |
| "loss": 0.1296, |
| "step": 4630 |
| }, |
| { |
| "epoch": 13.44927536231884, |
| "grad_norm": 0.28926411271095276, |
| "learning_rate": 9.121879256669572e-05, |
| "loss": 0.1088, |
| "step": 4640 |
| }, |
| { |
| "epoch": 13.478260869565217, |
| "grad_norm": 0.24572978913784027, |
| "learning_rate": 9.117193950813652e-05, |
| "loss": 0.1068, |
| "step": 4650 |
| }, |
| { |
| "epoch": 13.507246376811594, |
| "grad_norm": 0.462626576423645, |
| "learning_rate": 9.112497388706685e-05, |
| "loss": 0.1119, |
| "step": 4660 |
| }, |
| { |
| "epoch": 13.53623188405797, |
| "grad_norm": 0.4677536189556122, |
| "learning_rate": 9.10778958318889e-05, |
| "loss": 0.1113, |
| "step": 4670 |
| }, |
| { |
| "epoch": 13.565217391304348, |
| "grad_norm": 0.3768196105957031, |
| "learning_rate": 9.103070547131232e-05, |
| "loss": 0.111, |
| "step": 4680 |
| }, |
| { |
| "epoch": 13.594202898550725, |
| "grad_norm": 0.28670257329940796, |
| "learning_rate": 9.098340293435375e-05, |
| "loss": 0.1007, |
| "step": 4690 |
| }, |
| { |
| "epoch": 13.623188405797102, |
| "grad_norm": 0.3326264023780823, |
| "learning_rate": 9.093598835033649e-05, |
| "loss": 0.1417, |
| "step": 4700 |
| }, |
| { |
| "epoch": 13.652173913043478, |
| "grad_norm": 0.4190509021282196, |
| "learning_rate": 9.088846184889021e-05, |
| "loss": 0.1094, |
| "step": 4710 |
| }, |
| { |
| "epoch": 13.681159420289855, |
| "grad_norm": 0.48827919363975525, |
| "learning_rate": 9.084082355995057e-05, |
| "loss": 0.1145, |
| "step": 4720 |
| }, |
| { |
| "epoch": 13.710144927536232, |
| "grad_norm": 0.42035019397735596, |
| "learning_rate": 9.079307361375882e-05, |
| "loss": 0.1408, |
| "step": 4730 |
| }, |
| { |
| "epoch": 13.73913043478261, |
| "grad_norm": 0.35590943694114685, |
| "learning_rate": 9.074521214086149e-05, |
| "loss": 0.1125, |
| "step": 4740 |
| }, |
| { |
| "epoch": 13.768115942028986, |
| "grad_norm": 0.3481467068195343, |
| "learning_rate": 9.069723927211001e-05, |
| "loss": 0.1306, |
| "step": 4750 |
| }, |
| { |
| "epoch": 13.797101449275363, |
| "grad_norm": 0.5402430891990662, |
| "learning_rate": 9.064915513866037e-05, |
| "loss": 0.131, |
| "step": 4760 |
| }, |
| { |
| "epoch": 13.826086956521738, |
| "grad_norm": 0.4278501272201538, |
| "learning_rate": 9.060095987197279e-05, |
| "loss": 0.1275, |
| "step": 4770 |
| }, |
| { |
| "epoch": 13.855072463768115, |
| "grad_norm": 0.27769970893859863, |
| "learning_rate": 9.055265360381126e-05, |
| "loss": 0.1186, |
| "step": 4780 |
| }, |
| { |
| "epoch": 13.884057971014492, |
| "grad_norm": 0.258645236492157, |
| "learning_rate": 9.050423646624326e-05, |
| "loss": 0.1288, |
| "step": 4790 |
| }, |
| { |
| "epoch": 13.91304347826087, |
| "grad_norm": 0.39688029885292053, |
| "learning_rate": 9.045570859163943e-05, |
| "loss": 0.1174, |
| "step": 4800 |
| }, |
| { |
| "epoch": 13.942028985507246, |
| "grad_norm": 0.4738856554031372, |
| "learning_rate": 9.04070701126731e-05, |
| "loss": 0.1179, |
| "step": 4810 |
| }, |
| { |
| "epoch": 13.971014492753623, |
| "grad_norm": 0.4535987675189972, |
| "learning_rate": 9.035832116232001e-05, |
| "loss": 0.123, |
| "step": 4820 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.941590428352356, |
| "learning_rate": 9.030946187385796e-05, |
| "loss": 0.1209, |
| "step": 4830 |
| }, |
| { |
| "epoch": 14.028985507246377, |
| "grad_norm": 0.5170802474021912, |
| "learning_rate": 9.026049238086635e-05, |
| "loss": 0.1152, |
| "step": 4840 |
| }, |
| { |
| "epoch": 14.057971014492754, |
| "grad_norm": 0.2910565435886383, |
| "learning_rate": 9.021141281722591e-05, |
| "loss": 0.1163, |
| "step": 4850 |
| }, |
| { |
| "epoch": 14.08695652173913, |
| "grad_norm": 0.4192429482936859, |
| "learning_rate": 9.01622233171183e-05, |
| "loss": 0.0962, |
| "step": 4860 |
| }, |
| { |
| "epoch": 14.115942028985508, |
| "grad_norm": 0.3841983377933502, |
| "learning_rate": 9.011292401502574e-05, |
| "loss": 0.122, |
| "step": 4870 |
| }, |
| { |
| "epoch": 14.144927536231885, |
| "grad_norm": 0.42944851517677307, |
| "learning_rate": 9.006351504573063e-05, |
| "loss": 0.1251, |
| "step": 4880 |
| }, |
| { |
| "epoch": 14.173913043478262, |
| "grad_norm": 0.3717688322067261, |
| "learning_rate": 9.001399654431519e-05, |
| "loss": 0.1096, |
| "step": 4890 |
| }, |
| { |
| "epoch": 14.202898550724637, |
| "grad_norm": 0.37804022431373596, |
| "learning_rate": 8.996436864616116e-05, |
| "loss": 0.1209, |
| "step": 4900 |
| }, |
| { |
| "epoch": 14.231884057971014, |
| "grad_norm": 0.3129970133304596, |
| "learning_rate": 8.991463148694925e-05, |
| "loss": 0.1097, |
| "step": 4910 |
| }, |
| { |
| "epoch": 14.26086956521739, |
| "grad_norm": 0.40859848260879517, |
| "learning_rate": 8.986478520265902e-05, |
| "loss": 0.1214, |
| "step": 4920 |
| }, |
| { |
| "epoch": 14.289855072463768, |
| "grad_norm": 0.3709128797054291, |
| "learning_rate": 8.981482992956827e-05, |
| "loss": 0.1264, |
| "step": 4930 |
| }, |
| { |
| "epoch": 14.318840579710145, |
| "grad_norm": 0.3855811059474945, |
| "learning_rate": 8.976476580425282e-05, |
| "loss": 0.1113, |
| "step": 4940 |
| }, |
| { |
| "epoch": 14.347826086956522, |
| "grad_norm": 0.28712448477745056, |
| "learning_rate": 8.971459296358606e-05, |
| "loss": 0.0821, |
| "step": 4950 |
| }, |
| { |
| "epoch": 14.376811594202898, |
| "grad_norm": 0.48466065526008606, |
| "learning_rate": 8.966431154473864e-05, |
| "loss": 0.1493, |
| "step": 4960 |
| }, |
| { |
| "epoch": 14.405797101449275, |
| "grad_norm": 0.30625486373901367, |
| "learning_rate": 8.961392168517803e-05, |
| "loss": 0.1163, |
| "step": 4970 |
| }, |
| { |
| "epoch": 14.434782608695652, |
| "grad_norm": 0.32612621784210205, |
| "learning_rate": 8.956342352266821e-05, |
| "loss": 0.1294, |
| "step": 4980 |
| }, |
| { |
| "epoch": 14.46376811594203, |
| "grad_norm": 0.39533373713493347, |
| "learning_rate": 8.95128171952692e-05, |
| "loss": 0.1141, |
| "step": 4990 |
| }, |
| { |
| "epoch": 14.492753623188406, |
| "grad_norm": 0.5708385109901428, |
| "learning_rate": 8.946210284133676e-05, |
| "loss": 0.1435, |
| "step": 5000 |
| }, |
| { |
| "epoch": 14.521739130434783, |
| "grad_norm": 0.41702768206596375, |
| "learning_rate": 8.941128059952201e-05, |
| "loss": 0.1244, |
| "step": 5010 |
| }, |
| { |
| "epoch": 14.55072463768116, |
| "grad_norm": 0.581706702709198, |
| "learning_rate": 8.936035060877102e-05, |
| "loss": 0.1013, |
| "step": 5020 |
| }, |
| { |
| "epoch": 14.579710144927537, |
| "grad_norm": 0.508090078830719, |
| "learning_rate": 8.930931300832443e-05, |
| "loss": 0.0987, |
| "step": 5030 |
| }, |
| { |
| "epoch": 14.608695652173914, |
| "grad_norm": 0.32675766944885254, |
| "learning_rate": 8.925816793771711e-05, |
| "loss": 0.1173, |
| "step": 5040 |
| }, |
| { |
| "epoch": 14.63768115942029, |
| "grad_norm": 0.4030362069606781, |
| "learning_rate": 8.92069155367777e-05, |
| "loss": 0.1054, |
| "step": 5050 |
| }, |
| { |
| "epoch": 14.666666666666666, |
| "grad_norm": 0.4901740550994873, |
| "learning_rate": 8.915555594562834e-05, |
| "loss": 0.1197, |
| "step": 5060 |
| }, |
| { |
| "epoch": 14.695652173913043, |
| "grad_norm": 0.43186917901039124, |
| "learning_rate": 8.910408930468416e-05, |
| "loss": 0.1146, |
| "step": 5070 |
| }, |
| { |
| "epoch": 14.72463768115942, |
| "grad_norm": 0.3401460647583008, |
| "learning_rate": 8.905251575465303e-05, |
| "loss": 0.1237, |
| "step": 5080 |
| }, |
| { |
| "epoch": 14.753623188405797, |
| "grad_norm": 0.2620072662830353, |
| "learning_rate": 8.900083543653502e-05, |
| "loss": 0.123, |
| "step": 5090 |
| }, |
| { |
| "epoch": 14.782608695652174, |
| "grad_norm": 0.3774551451206207, |
| "learning_rate": 8.894904849162218e-05, |
| "loss": 0.1237, |
| "step": 5100 |
| }, |
| { |
| "epoch": 14.81159420289855, |
| "grad_norm": 0.4038746654987335, |
| "learning_rate": 8.889715506149802e-05, |
| "loss": 0.115, |
| "step": 5110 |
| }, |
| { |
| "epoch": 14.840579710144928, |
| "grad_norm": 0.4395363926887512, |
| "learning_rate": 8.884515528803722e-05, |
| "loss": 0.1139, |
| "step": 5120 |
| }, |
| { |
| "epoch": 14.869565217391305, |
| "grad_norm": 0.34769847989082336, |
| "learning_rate": 8.879304931340517e-05, |
| "loss": 0.1211, |
| "step": 5130 |
| }, |
| { |
| "epoch": 14.898550724637682, |
| "grad_norm": 0.3238866925239563, |
| "learning_rate": 8.874083728005759e-05, |
| "loss": 0.1181, |
| "step": 5140 |
| }, |
| { |
| "epoch": 14.927536231884059, |
| "grad_norm": 0.43937593698501587, |
| "learning_rate": 8.868851933074021e-05, |
| "loss": 0.1232, |
| "step": 5150 |
| }, |
| { |
| "epoch": 14.956521739130435, |
| "grad_norm": 0.4402833580970764, |
| "learning_rate": 8.863609560848829e-05, |
| "loss": 0.1365, |
| "step": 5160 |
| }, |
| { |
| "epoch": 14.985507246376812, |
| "grad_norm": 0.6102784276008606, |
| "learning_rate": 8.85835662566263e-05, |
| "loss": 0.1248, |
| "step": 5170 |
| }, |
| { |
| "epoch": 15.014492753623188, |
| "grad_norm": 0.28894439339637756, |
| "learning_rate": 8.853093141876747e-05, |
| "loss": 0.1016, |
| "step": 5180 |
| }, |
| { |
| "epoch": 15.043478260869565, |
| "grad_norm": 0.2645789086818695, |
| "learning_rate": 8.847819123881343e-05, |
| "loss": 0.1256, |
| "step": 5190 |
| }, |
| { |
| "epoch": 15.072463768115941, |
| "grad_norm": 0.38724544644355774, |
| "learning_rate": 8.842534586095383e-05, |
| "loss": 0.1432, |
| "step": 5200 |
| }, |
| { |
| "epoch": 15.101449275362318, |
| "grad_norm": 0.2536871135234833, |
| "learning_rate": 8.837239542966593e-05, |
| "loss": 0.1033, |
| "step": 5210 |
| }, |
| { |
| "epoch": 15.130434782608695, |
| "grad_norm": 0.337372750043869, |
| "learning_rate": 8.831934008971417e-05, |
| "loss": 0.1231, |
| "step": 5220 |
| }, |
| { |
| "epoch": 15.159420289855072, |
| "grad_norm": 0.3590666353702545, |
| "learning_rate": 8.826617998614982e-05, |
| "loss": 0.109, |
| "step": 5230 |
| }, |
| { |
| "epoch": 15.18840579710145, |
| "grad_norm": 0.37052637338638306, |
| "learning_rate": 8.821291526431056e-05, |
| "loss": 0.1001, |
| "step": 5240 |
| }, |
| { |
| "epoch": 15.217391304347826, |
| "grad_norm": 0.5083751082420349, |
| "learning_rate": 8.815954606982015e-05, |
| "loss": 0.1224, |
| "step": 5250 |
| }, |
| { |
| "epoch": 15.246376811594203, |
| "grad_norm": 0.3430265486240387, |
| "learning_rate": 8.810607254858789e-05, |
| "loss": 0.1201, |
| "step": 5260 |
| }, |
| { |
| "epoch": 15.27536231884058, |
| "grad_norm": 0.6075800061225891, |
| "learning_rate": 8.805249484680838e-05, |
| "loss": 0.1281, |
| "step": 5270 |
| }, |
| { |
| "epoch": 15.304347826086957, |
| "grad_norm": 0.6015037894248962, |
| "learning_rate": 8.799881311096096e-05, |
| "loss": 0.1337, |
| "step": 5280 |
| }, |
| { |
| "epoch": 15.333333333333334, |
| "grad_norm": 0.3478599786758423, |
| "learning_rate": 8.794502748780949e-05, |
| "loss": 0.1363, |
| "step": 5290 |
| }, |
| { |
| "epoch": 15.36231884057971, |
| "grad_norm": 0.39971593022346497, |
| "learning_rate": 8.78911381244018e-05, |
| "loss": 0.1015, |
| "step": 5300 |
| }, |
| { |
| "epoch": 15.391304347826088, |
| "grad_norm": 0.38049763441085815, |
| "learning_rate": 8.783714516806933e-05, |
| "loss": 0.1209, |
| "step": 5310 |
| }, |
| { |
| "epoch": 15.420289855072463, |
| "grad_norm": 0.33554980158805847, |
| "learning_rate": 8.77830487664268e-05, |
| "loss": 0.1077, |
| "step": 5320 |
| }, |
| { |
| "epoch": 15.44927536231884, |
| "grad_norm": 0.2598898410797119, |
| "learning_rate": 8.772884906737167e-05, |
| "loss": 0.1056, |
| "step": 5330 |
| }, |
| { |
| "epoch": 15.478260869565217, |
| "grad_norm": 0.30635103583335876, |
| "learning_rate": 8.767454621908387e-05, |
| "loss": 0.1182, |
| "step": 5340 |
| }, |
| { |
| "epoch": 15.507246376811594, |
| "grad_norm": 0.31595268845558167, |
| "learning_rate": 8.76201403700253e-05, |
| "loss": 0.0987, |
| "step": 5350 |
| }, |
| { |
| "epoch": 15.53623188405797, |
| "grad_norm": 0.4669897258281708, |
| "learning_rate": 8.756563166893949e-05, |
| "loss": 0.1093, |
| "step": 5360 |
| }, |
| { |
| "epoch": 15.565217391304348, |
| "grad_norm": 0.41924533247947693, |
| "learning_rate": 8.751102026485113e-05, |
| "loss": 0.0981, |
| "step": 5370 |
| }, |
| { |
| "epoch": 15.594202898550725, |
| "grad_norm": 0.3114607334136963, |
| "learning_rate": 8.745630630706571e-05, |
| "loss": 0.1265, |
| "step": 5380 |
| }, |
| { |
| "epoch": 15.623188405797102, |
| "grad_norm": 0.33994221687316895, |
| "learning_rate": 8.740148994516912e-05, |
| "loss": 0.1061, |
| "step": 5390 |
| }, |
| { |
| "epoch": 15.652173913043478, |
| "grad_norm": 0.5424929857254028, |
| "learning_rate": 8.73465713290272e-05, |
| "loss": 0.1112, |
| "step": 5400 |
| }, |
| { |
| "epoch": 15.681159420289855, |
| "grad_norm": 0.4351734519004822, |
| "learning_rate": 8.729155060878533e-05, |
| "loss": 0.1043, |
| "step": 5410 |
| }, |
| { |
| "epoch": 15.710144927536232, |
| "grad_norm": 0.33228495717048645, |
| "learning_rate": 8.723642793486809e-05, |
| "loss": 0.1257, |
| "step": 5420 |
| }, |
| { |
| "epoch": 15.73913043478261, |
| "grad_norm": 0.4116186201572418, |
| "learning_rate": 8.718120345797873e-05, |
| "loss": 0.1102, |
| "step": 5430 |
| }, |
| { |
| "epoch": 15.768115942028986, |
| "grad_norm": 0.38537874817848206, |
| "learning_rate": 8.712587732909889e-05, |
| "loss": 0.1315, |
| "step": 5440 |
| }, |
| { |
| "epoch": 15.797101449275363, |
| "grad_norm": 0.2920888066291809, |
| "learning_rate": 8.707044969948806e-05, |
| "loss": 0.1393, |
| "step": 5450 |
| }, |
| { |
| "epoch": 15.826086956521738, |
| "grad_norm": 0.3017374277114868, |
| "learning_rate": 8.701492072068329e-05, |
| "loss": 0.1181, |
| "step": 5460 |
| }, |
| { |
| "epoch": 15.855072463768115, |
| "grad_norm": 0.3454197645187378, |
| "learning_rate": 8.695929054449869e-05, |
| "loss": 0.1144, |
| "step": 5470 |
| }, |
| { |
| "epoch": 15.884057971014492, |
| "grad_norm": 0.3054383099079132, |
| "learning_rate": 8.690355932302501e-05, |
| "loss": 0.1149, |
| "step": 5480 |
| }, |
| { |
| "epoch": 15.91304347826087, |
| "grad_norm": 0.6223363280296326, |
| "learning_rate": 8.684772720862931e-05, |
| "loss": 0.1138, |
| "step": 5490 |
| }, |
| { |
| "epoch": 15.942028985507246, |
| "grad_norm": 0.33070531487464905, |
| "learning_rate": 8.679179435395446e-05, |
| "loss": 0.1074, |
| "step": 5500 |
| }, |
| { |
| "epoch": 15.971014492753623, |
| "grad_norm": 0.3179458677768707, |
| "learning_rate": 8.673576091191874e-05, |
| "loss": 0.109, |
| "step": 5510 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.2908962070941925, |
| "learning_rate": 8.667962703571541e-05, |
| "loss": 0.0997, |
| "step": 5520 |
| }, |
| { |
| "epoch": 16.028985507246375, |
| "grad_norm": 0.45387428998947144, |
| "learning_rate": 8.662339287881238e-05, |
| "loss": 0.1132, |
| "step": 5530 |
| }, |
| { |
| "epoch": 16.057971014492754, |
| "grad_norm": 0.34306666254997253, |
| "learning_rate": 8.656705859495169e-05, |
| "loss": 0.099, |
| "step": 5540 |
| }, |
| { |
| "epoch": 16.08695652173913, |
| "grad_norm": 0.317571222782135, |
| "learning_rate": 8.651062433814912e-05, |
| "loss": 0.1142, |
| "step": 5550 |
| }, |
| { |
| "epoch": 16.115942028985508, |
| "grad_norm": 0.2807283103466034, |
| "learning_rate": 8.645409026269375e-05, |
| "loss": 0.1085, |
| "step": 5560 |
| }, |
| { |
| "epoch": 16.144927536231883, |
| "grad_norm": 0.48772743344306946, |
| "learning_rate": 8.639745652314759e-05, |
| "loss": 0.1222, |
| "step": 5570 |
| }, |
| { |
| "epoch": 16.17391304347826, |
| "grad_norm": 0.3181246519088745, |
| "learning_rate": 8.634072327434515e-05, |
| "loss": 0.1113, |
| "step": 5580 |
| }, |
| { |
| "epoch": 16.202898550724637, |
| "grad_norm": 0.28259527683258057, |
| "learning_rate": 8.628389067139294e-05, |
| "loss": 0.0973, |
| "step": 5590 |
| }, |
| { |
| "epoch": 16.231884057971016, |
| "grad_norm": 0.21859432756900787, |
| "learning_rate": 8.622695886966911e-05, |
| "loss": 0.105, |
| "step": 5600 |
| }, |
| { |
| "epoch": 16.26086956521739, |
| "grad_norm": 0.38870155811309814, |
| "learning_rate": 8.616992802482308e-05, |
| "loss": 0.1054, |
| "step": 5610 |
| }, |
| { |
| "epoch": 16.28985507246377, |
| "grad_norm": 0.30381137132644653, |
| "learning_rate": 8.611279829277496e-05, |
| "loss": 0.1095, |
| "step": 5620 |
| }, |
| { |
| "epoch": 16.318840579710145, |
| "grad_norm": 0.33329617977142334, |
| "learning_rate": 8.605556982971528e-05, |
| "loss": 0.0896, |
| "step": 5630 |
| }, |
| { |
| "epoch": 16.347826086956523, |
| "grad_norm": 0.3171881437301636, |
| "learning_rate": 8.599824279210447e-05, |
| "loss": 0.1097, |
| "step": 5640 |
| }, |
| { |
| "epoch": 16.3768115942029, |
| "grad_norm": 0.36195775866508484, |
| "learning_rate": 8.594081733667243e-05, |
| "loss": 0.1088, |
| "step": 5650 |
| }, |
| { |
| "epoch": 16.405797101449274, |
| "grad_norm": 0.3968923091888428, |
| "learning_rate": 8.58832936204182e-05, |
| "loss": 0.0995, |
| "step": 5660 |
| }, |
| { |
| "epoch": 16.434782608695652, |
| "grad_norm": 0.515150249004364, |
| "learning_rate": 8.582567180060942e-05, |
| "loss": 0.107, |
| "step": 5670 |
| }, |
| { |
| "epoch": 16.463768115942027, |
| "grad_norm": 0.4465225040912628, |
| "learning_rate": 8.576795203478194e-05, |
| "loss": 0.123, |
| "step": 5680 |
| }, |
| { |
| "epoch": 16.492753623188406, |
| "grad_norm": 0.27907755970954895, |
| "learning_rate": 8.571013448073939e-05, |
| "loss": 0.1023, |
| "step": 5690 |
| }, |
| { |
| "epoch": 16.52173913043478, |
| "grad_norm": 0.4790158271789551, |
| "learning_rate": 8.565221929655275e-05, |
| "loss": 0.1154, |
| "step": 5700 |
| }, |
| { |
| "epoch": 16.55072463768116, |
| "grad_norm": 0.5309686660766602, |
| "learning_rate": 8.559420664055992e-05, |
| "loss": 0.1308, |
| "step": 5710 |
| }, |
| { |
| "epoch": 16.579710144927535, |
| "grad_norm": 0.36980125308036804, |
| "learning_rate": 8.553609667136532e-05, |
| "loss": 0.1177, |
| "step": 5720 |
| }, |
| { |
| "epoch": 16.608695652173914, |
| "grad_norm": 0.33945196866989136, |
| "learning_rate": 8.547788954783936e-05, |
| "loss": 0.1511, |
| "step": 5730 |
| }, |
| { |
| "epoch": 16.63768115942029, |
| "grad_norm": 0.26327815651893616, |
| "learning_rate": 8.541958542911808e-05, |
| "loss": 0.1238, |
| "step": 5740 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 0.351123571395874, |
| "learning_rate": 8.536118447460275e-05, |
| "loss": 0.0927, |
| "step": 5750 |
| }, |
| { |
| "epoch": 16.695652173913043, |
| "grad_norm": 0.3815719187259674, |
| "learning_rate": 8.530268684395932e-05, |
| "loss": 0.1071, |
| "step": 5760 |
| }, |
| { |
| "epoch": 16.72463768115942, |
| "grad_norm": 0.4307467043399811, |
| "learning_rate": 8.524409269711807e-05, |
| "loss": 0.1098, |
| "step": 5770 |
| }, |
| { |
| "epoch": 16.753623188405797, |
| "grad_norm": 0.33247116208076477, |
| "learning_rate": 8.51854021942732e-05, |
| "loss": 0.121, |
| "step": 5780 |
| }, |
| { |
| "epoch": 16.782608695652176, |
| "grad_norm": 0.29645007848739624, |
| "learning_rate": 8.512661549588227e-05, |
| "loss": 0.0983, |
| "step": 5790 |
| }, |
| { |
| "epoch": 16.81159420289855, |
| "grad_norm": 0.3584914207458496, |
| "learning_rate": 8.506773276266588e-05, |
| "loss": 0.1093, |
| "step": 5800 |
| }, |
| { |
| "epoch": 16.840579710144926, |
| "grad_norm": 0.4944992959499359, |
| "learning_rate": 8.500875415560721e-05, |
| "loss": 0.12, |
| "step": 5810 |
| }, |
| { |
| "epoch": 16.869565217391305, |
| "grad_norm": 0.3007963001728058, |
| "learning_rate": 8.494967983595144e-05, |
| "loss": 0.1165, |
| "step": 5820 |
| }, |
| { |
| "epoch": 16.89855072463768, |
| "grad_norm": 0.2885436415672302, |
| "learning_rate": 8.489050996520558e-05, |
| "loss": 0.1295, |
| "step": 5830 |
| }, |
| { |
| "epoch": 16.92753623188406, |
| "grad_norm": 0.45604297518730164, |
| "learning_rate": 8.483124470513775e-05, |
| "loss": 0.1038, |
| "step": 5840 |
| }, |
| { |
| "epoch": 16.956521739130434, |
| "grad_norm": 0.34743809700012207, |
| "learning_rate": 8.477188421777692e-05, |
| "loss": 0.1114, |
| "step": 5850 |
| }, |
| { |
| "epoch": 16.985507246376812, |
| "grad_norm": 0.3643774390220642, |
| "learning_rate": 8.47124286654124e-05, |
| "loss": 0.1207, |
| "step": 5860 |
| }, |
| { |
| "epoch": 17.014492753623188, |
| "grad_norm": 0.40412595868110657, |
| "learning_rate": 8.465287821059341e-05, |
| "loss": 0.1355, |
| "step": 5870 |
| }, |
| { |
| "epoch": 17.043478260869566, |
| "grad_norm": 0.27604588866233826, |
| "learning_rate": 8.45932330161286e-05, |
| "loss": 0.1053, |
| "step": 5880 |
| }, |
| { |
| "epoch": 17.07246376811594, |
| "grad_norm": 0.472373366355896, |
| "learning_rate": 8.453349324508567e-05, |
| "loss": 0.1032, |
| "step": 5890 |
| }, |
| { |
| "epoch": 17.10144927536232, |
| "grad_norm": 0.4434383511543274, |
| "learning_rate": 8.447365906079088e-05, |
| "loss": 0.1033, |
| "step": 5900 |
| }, |
| { |
| "epoch": 17.130434782608695, |
| "grad_norm": 0.38238826394081116, |
| "learning_rate": 8.441373062682856e-05, |
| "loss": 0.1056, |
| "step": 5910 |
| }, |
| { |
| "epoch": 17.159420289855074, |
| "grad_norm": 0.34880152344703674, |
| "learning_rate": 8.43537081070408e-05, |
| "loss": 0.0964, |
| "step": 5920 |
| }, |
| { |
| "epoch": 17.18840579710145, |
| "grad_norm": 0.46405625343322754, |
| "learning_rate": 8.429359166552689e-05, |
| "loss": 0.1363, |
| "step": 5930 |
| }, |
| { |
| "epoch": 17.217391304347824, |
| "grad_norm": 0.35732561349868774, |
| "learning_rate": 8.423338146664284e-05, |
| "loss": 0.1046, |
| "step": 5940 |
| }, |
| { |
| "epoch": 17.246376811594203, |
| "grad_norm": 0.3758239150047302, |
| "learning_rate": 8.417307767500107e-05, |
| "loss": 0.0963, |
| "step": 5950 |
| }, |
| { |
| "epoch": 17.27536231884058, |
| "grad_norm": 0.40921303629875183, |
| "learning_rate": 8.411268045546983e-05, |
| "loss": 0.12, |
| "step": 5960 |
| }, |
| { |
| "epoch": 17.304347826086957, |
| "grad_norm": 0.30313900113105774, |
| "learning_rate": 8.405218997317281e-05, |
| "loss": 0.1213, |
| "step": 5970 |
| }, |
| { |
| "epoch": 17.333333333333332, |
| "grad_norm": 0.27081504464149475, |
| "learning_rate": 8.399160639348869e-05, |
| "loss": 0.106, |
| "step": 5980 |
| }, |
| { |
| "epoch": 17.36231884057971, |
| "grad_norm": 0.32741713523864746, |
| "learning_rate": 8.393092988205065e-05, |
| "loss": 0.089, |
| "step": 5990 |
| }, |
| { |
| "epoch": 17.391304347826086, |
| "grad_norm": 0.350293904542923, |
| "learning_rate": 8.387016060474597e-05, |
| "loss": 0.1074, |
| "step": 6000 |
| }, |
| { |
| "epoch": 17.420289855072465, |
| "grad_norm": 0.46384942531585693, |
| "learning_rate": 8.380929872771551e-05, |
| "loss": 0.1151, |
| "step": 6010 |
| }, |
| { |
| "epoch": 17.44927536231884, |
| "grad_norm": 0.38906311988830566, |
| "learning_rate": 8.374834441735335e-05, |
| "loss": 0.0944, |
| "step": 6020 |
| }, |
| { |
| "epoch": 17.47826086956522, |
| "grad_norm": 0.4116496443748474, |
| "learning_rate": 8.368729784030622e-05, |
| "loss": 0.1009, |
| "step": 6030 |
| }, |
| { |
| "epoch": 17.507246376811594, |
| "grad_norm": 0.36326107382774353, |
| "learning_rate": 8.362615916347315e-05, |
| "loss": 0.117, |
| "step": 6040 |
| }, |
| { |
| "epoch": 17.536231884057973, |
| "grad_norm": 0.4073273539543152, |
| "learning_rate": 8.356492855400493e-05, |
| "loss": 0.1196, |
| "step": 6050 |
| }, |
| { |
| "epoch": 17.565217391304348, |
| "grad_norm": 0.3261200189590454, |
| "learning_rate": 8.350360617930371e-05, |
| "loss": 0.0887, |
| "step": 6060 |
| }, |
| { |
| "epoch": 17.594202898550726, |
| "grad_norm": 0.2686854302883148, |
| "learning_rate": 8.344219220702255e-05, |
| "loss": 0.103, |
| "step": 6070 |
| }, |
| { |
| "epoch": 17.6231884057971, |
| "grad_norm": 0.30324316024780273, |
| "learning_rate": 8.338068680506485e-05, |
| "loss": 0.1244, |
| "step": 6080 |
| }, |
| { |
| "epoch": 17.652173913043477, |
| "grad_norm": 0.3971955180168152, |
| "learning_rate": 8.33190901415841e-05, |
| "loss": 0.1114, |
| "step": 6090 |
| }, |
| { |
| "epoch": 17.681159420289855, |
| "grad_norm": 0.23621766269207, |
| "learning_rate": 8.325740238498317e-05, |
| "loss": 0.1151, |
| "step": 6100 |
| }, |
| { |
| "epoch": 17.71014492753623, |
| "grad_norm": 0.3847745954990387, |
| "learning_rate": 8.319562370391406e-05, |
| "loss": 0.1067, |
| "step": 6110 |
| }, |
| { |
| "epoch": 17.73913043478261, |
| "grad_norm": 0.29595401883125305, |
| "learning_rate": 8.31337542672773e-05, |
| "loss": 0.0946, |
| "step": 6120 |
| }, |
| { |
| "epoch": 17.768115942028984, |
| "grad_norm": 0.36179453134536743, |
| "learning_rate": 8.307179424422158e-05, |
| "loss": 0.1058, |
| "step": 6130 |
| }, |
| { |
| "epoch": 17.797101449275363, |
| "grad_norm": 0.28337523341178894, |
| "learning_rate": 8.300974380414327e-05, |
| "loss": 0.0932, |
| "step": 6140 |
| }, |
| { |
| "epoch": 17.82608695652174, |
| "grad_norm": 0.3820880651473999, |
| "learning_rate": 8.294760311668586e-05, |
| "loss": 0.1179, |
| "step": 6150 |
| }, |
| { |
| "epoch": 17.855072463768117, |
| "grad_norm": 0.19762246310710907, |
| "learning_rate": 8.288537235173961e-05, |
| "loss": 0.1321, |
| "step": 6160 |
| }, |
| { |
| "epoch": 17.884057971014492, |
| "grad_norm": 0.4157634973526001, |
| "learning_rate": 8.282305167944108e-05, |
| "loss": 0.1054, |
| "step": 6170 |
| }, |
| { |
| "epoch": 17.91304347826087, |
| "grad_norm": 0.4183441698551178, |
| "learning_rate": 8.276064127017262e-05, |
| "loss": 0.11, |
| "step": 6180 |
| }, |
| { |
| "epoch": 17.942028985507246, |
| "grad_norm": 0.41134294867515564, |
| "learning_rate": 8.269814129456189e-05, |
| "loss": 0.0956, |
| "step": 6190 |
| }, |
| { |
| "epoch": 17.971014492753625, |
| "grad_norm": 0.4862001836299896, |
| "learning_rate": 8.263555192348143e-05, |
| "loss": 0.1139, |
| "step": 6200 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.4908180236816406, |
| "learning_rate": 8.257287332804819e-05, |
| "loss": 0.094, |
| "step": 6210 |
| }, |
| { |
| "epoch": 18.028985507246375, |
| "grad_norm": 0.37922942638397217, |
| "learning_rate": 8.251010567962307e-05, |
| "loss": 0.1156, |
| "step": 6220 |
| }, |
| { |
| "epoch": 18.057971014492754, |
| "grad_norm": 0.4405559003353119, |
| "learning_rate": 8.244724914981041e-05, |
| "loss": 0.1219, |
| "step": 6230 |
| }, |
| { |
| "epoch": 18.08695652173913, |
| "grad_norm": 0.3812404274940491, |
| "learning_rate": 8.238430391045757e-05, |
| "loss": 0.1019, |
| "step": 6240 |
| }, |
| { |
| "epoch": 18.115942028985508, |
| "grad_norm": 0.4499792754650116, |
| "learning_rate": 8.232127013365445e-05, |
| "loss": 0.0975, |
| "step": 6250 |
| }, |
| { |
| "epoch": 18.144927536231883, |
| "grad_norm": 0.5252455472946167, |
| "learning_rate": 8.225814799173295e-05, |
| "loss": 0.1159, |
| "step": 6260 |
| }, |
| { |
| "epoch": 18.17391304347826, |
| "grad_norm": 0.44270288944244385, |
| "learning_rate": 8.219493765726663e-05, |
| "loss": 0.1103, |
| "step": 6270 |
| }, |
| { |
| "epoch": 18.202898550724637, |
| "grad_norm": 0.2811424136161804, |
| "learning_rate": 8.21316393030701e-05, |
| "loss": 0.1243, |
| "step": 6280 |
| }, |
| { |
| "epoch": 18.231884057971016, |
| "grad_norm": 0.4186232388019562, |
| "learning_rate": 8.206825310219865e-05, |
| "loss": 0.1174, |
| "step": 6290 |
| }, |
| { |
| "epoch": 18.26086956521739, |
| "grad_norm": 0.2894100844860077, |
| "learning_rate": 8.200477922794776e-05, |
| "loss": 0.1061, |
| "step": 6300 |
| }, |
| { |
| "epoch": 18.28985507246377, |
| "grad_norm": 0.3997898995876312, |
| "learning_rate": 8.194121785385256e-05, |
| "loss": 0.1153, |
| "step": 6310 |
| }, |
| { |
| "epoch": 18.318840579710145, |
| "grad_norm": 0.2798968255519867, |
| "learning_rate": 8.187756915368741e-05, |
| "loss": 0.0976, |
| "step": 6320 |
| }, |
| { |
| "epoch": 18.347826086956523, |
| "grad_norm": 0.3138371407985687, |
| "learning_rate": 8.181383330146544e-05, |
| "loss": 0.1135, |
| "step": 6330 |
| }, |
| { |
| "epoch": 18.3768115942029, |
| "grad_norm": 0.3996056914329529, |
| "learning_rate": 8.175001047143804e-05, |
| "loss": 0.1135, |
| "step": 6340 |
| }, |
| { |
| "epoch": 18.405797101449274, |
| "grad_norm": 0.39477649331092834, |
| "learning_rate": 8.168610083809438e-05, |
| "loss": 0.1052, |
| "step": 6350 |
| }, |
| { |
| "epoch": 18.434782608695652, |
| "grad_norm": 0.35394132137298584, |
| "learning_rate": 8.162210457616095e-05, |
| "loss": 0.0962, |
| "step": 6360 |
| }, |
| { |
| "epoch": 18.463768115942027, |
| "grad_norm": 0.28887924551963806, |
| "learning_rate": 8.155802186060109e-05, |
| "loss": 0.1019, |
| "step": 6370 |
| }, |
| { |
| "epoch": 18.492753623188406, |
| "grad_norm": 0.272850900888443, |
| "learning_rate": 8.149385286661453e-05, |
| "loss": 0.127, |
| "step": 6380 |
| }, |
| { |
| "epoch": 18.52173913043478, |
| "grad_norm": 0.5005936026573181, |
| "learning_rate": 8.14295977696368e-05, |
| "loss": 0.1174, |
| "step": 6390 |
| }, |
| { |
| "epoch": 18.55072463768116, |
| "grad_norm": 0.38646388053894043, |
| "learning_rate": 8.13652567453389e-05, |
| "loss": 0.1069, |
| "step": 6400 |
| }, |
| { |
| "epoch": 18.579710144927535, |
| "grad_norm": 0.3873347043991089, |
| "learning_rate": 8.130082996962676e-05, |
| "loss": 0.1235, |
| "step": 6410 |
| }, |
| { |
| "epoch": 18.608695652173914, |
| "grad_norm": 0.33470475673675537, |
| "learning_rate": 8.123631761864068e-05, |
| "loss": 0.1031, |
| "step": 6420 |
| }, |
| { |
| "epoch": 18.63768115942029, |
| "grad_norm": 0.4067200720310211, |
| "learning_rate": 8.1171719868755e-05, |
| "loss": 0.1193, |
| "step": 6430 |
| }, |
| { |
| "epoch": 18.666666666666668, |
| "grad_norm": 0.34601399302482605, |
| "learning_rate": 8.110703689657748e-05, |
| "loss": 0.0968, |
| "step": 6440 |
| }, |
| { |
| "epoch": 18.695652173913043, |
| "grad_norm": 0.4860565662384033, |
| "learning_rate": 8.104226887894892e-05, |
| "loss": 0.1008, |
| "step": 6450 |
| }, |
| { |
| "epoch": 18.72463768115942, |
| "grad_norm": 0.51555997133255, |
| "learning_rate": 8.097741599294257e-05, |
| "loss": 0.1193, |
| "step": 6460 |
| }, |
| { |
| "epoch": 18.753623188405797, |
| "grad_norm": 0.36039650440216064, |
| "learning_rate": 8.091247841586378e-05, |
| "loss": 0.1029, |
| "step": 6470 |
| }, |
| { |
| "epoch": 18.782608695652176, |
| "grad_norm": 0.315164178609848, |
| "learning_rate": 8.084745632524939e-05, |
| "loss": 0.0904, |
| "step": 6480 |
| }, |
| { |
| "epoch": 18.81159420289855, |
| "grad_norm": 0.3153921365737915, |
| "learning_rate": 8.07823498988673e-05, |
| "loss": 0.1038, |
| "step": 6490 |
| }, |
| { |
| "epoch": 18.840579710144926, |
| "grad_norm": 0.3011777102947235, |
| "learning_rate": 8.071715931471602e-05, |
| "loss": 0.1245, |
| "step": 6500 |
| }, |
| { |
| "epoch": 18.869565217391305, |
| "grad_norm": 0.3314365744590759, |
| "learning_rate": 8.06518847510241e-05, |
| "loss": 0.1001, |
| "step": 6510 |
| }, |
| { |
| "epoch": 18.89855072463768, |
| "grad_norm": 0.3859410881996155, |
| "learning_rate": 8.058652638624971e-05, |
| "loss": 0.1122, |
| "step": 6520 |
| }, |
| { |
| "epoch": 18.92753623188406, |
| "grad_norm": 0.3356384336948395, |
| "learning_rate": 8.052108439908013e-05, |
| "loss": 0.1144, |
| "step": 6530 |
| }, |
| { |
| "epoch": 18.956521739130434, |
| "grad_norm": 0.35082948207855225, |
| "learning_rate": 8.045555896843125e-05, |
| "loss": 0.1079, |
| "step": 6540 |
| }, |
| { |
| "epoch": 18.985507246376812, |
| "grad_norm": 0.5260385274887085, |
| "learning_rate": 8.03899502734471e-05, |
| "loss": 0.1046, |
| "step": 6550 |
| }, |
| { |
| "epoch": 19.014492753623188, |
| "grad_norm": 0.3151768445968628, |
| "learning_rate": 8.032425849349931e-05, |
| "loss": 0.105, |
| "step": 6560 |
| }, |
| { |
| "epoch": 19.043478260869566, |
| "grad_norm": 0.362244188785553, |
| "learning_rate": 8.025848380818674e-05, |
| "loss": 0.1005, |
| "step": 6570 |
| }, |
| { |
| "epoch": 19.07246376811594, |
| "grad_norm": 0.41462433338165283, |
| "learning_rate": 8.019262639733487e-05, |
| "loss": 0.1198, |
| "step": 6580 |
| }, |
| { |
| "epoch": 19.10144927536232, |
| "grad_norm": 0.40146404504776, |
| "learning_rate": 8.012668644099531e-05, |
| "loss": 0.0886, |
| "step": 6590 |
| }, |
| { |
| "epoch": 19.130434782608695, |
| "grad_norm": 0.39127445220947266, |
| "learning_rate": 8.006066411944542e-05, |
| "loss": 0.0906, |
| "step": 6600 |
| }, |
| { |
| "epoch": 19.159420289855074, |
| "grad_norm": 0.42401593923568726, |
| "learning_rate": 7.999455961318769e-05, |
| "loss": 0.1092, |
| "step": 6610 |
| }, |
| { |
| "epoch": 19.18840579710145, |
| "grad_norm": 0.49508869647979736, |
| "learning_rate": 7.992837310294932e-05, |
| "loss": 0.1114, |
| "step": 6620 |
| }, |
| { |
| "epoch": 19.217391304347824, |
| "grad_norm": 0.4461759328842163, |
| "learning_rate": 7.986210476968167e-05, |
| "loss": 0.1251, |
| "step": 6630 |
| }, |
| { |
| "epoch": 19.246376811594203, |
| "grad_norm": 0.41962409019470215, |
| "learning_rate": 7.97957547945599e-05, |
| "loss": 0.0899, |
| "step": 6640 |
| }, |
| { |
| "epoch": 19.27536231884058, |
| "grad_norm": 0.3262649178504944, |
| "learning_rate": 7.972932335898226e-05, |
| "loss": 0.0868, |
| "step": 6650 |
| }, |
| { |
| "epoch": 19.304347826086957, |
| "grad_norm": 0.3167392611503601, |
| "learning_rate": 7.966281064456975e-05, |
| "loss": 0.1008, |
| "step": 6660 |
| }, |
| { |
| "epoch": 19.333333333333332, |
| "grad_norm": 0.35801073908805847, |
| "learning_rate": 7.959621683316563e-05, |
| "loss": 0.0931, |
| "step": 6670 |
| }, |
| { |
| "epoch": 19.36231884057971, |
| "grad_norm": 0.31944307684898376, |
| "learning_rate": 7.952954210683481e-05, |
| "loss": 0.1218, |
| "step": 6680 |
| }, |
| { |
| "epoch": 19.391304347826086, |
| "grad_norm": 0.3943234384059906, |
| "learning_rate": 7.946278664786345e-05, |
| "loss": 0.1194, |
| "step": 6690 |
| }, |
| { |
| "epoch": 19.420289855072465, |
| "grad_norm": 0.46412956714630127, |
| "learning_rate": 7.939595063875842e-05, |
| "loss": 0.0976, |
| "step": 6700 |
| }, |
| { |
| "epoch": 19.44927536231884, |
| "grad_norm": 0.2884758412837982, |
| "learning_rate": 7.932903426224683e-05, |
| "loss": 0.1143, |
| "step": 6710 |
| }, |
| { |
| "epoch": 19.47826086956522, |
| "grad_norm": 0.24540093541145325, |
| "learning_rate": 7.926203770127552e-05, |
| "loss": 0.096, |
| "step": 6720 |
| }, |
| { |
| "epoch": 19.507246376811594, |
| "grad_norm": 0.40125906467437744, |
| "learning_rate": 7.919496113901046e-05, |
| "loss": 0.0998, |
| "step": 6730 |
| }, |
| { |
| "epoch": 19.536231884057973, |
| "grad_norm": 0.366150438785553, |
| "learning_rate": 7.912780475883649e-05, |
| "loss": 0.1104, |
| "step": 6740 |
| }, |
| { |
| "epoch": 19.565217391304348, |
| "grad_norm": 0.28204023838043213, |
| "learning_rate": 7.906056874435652e-05, |
| "loss": 0.0938, |
| "step": 6750 |
| }, |
| { |
| "epoch": 19.594202898550726, |
| "grad_norm": 0.39345285296440125, |
| "learning_rate": 7.899325327939131e-05, |
| "loss": 0.0835, |
| "step": 6760 |
| }, |
| { |
| "epoch": 19.6231884057971, |
| "grad_norm": 0.43696972727775574, |
| "learning_rate": 7.892585854797872e-05, |
| "loss": 0.1175, |
| "step": 6770 |
| }, |
| { |
| "epoch": 19.652173913043477, |
| "grad_norm": 0.5411075949668884, |
| "learning_rate": 7.88583847343734e-05, |
| "loss": 0.1016, |
| "step": 6780 |
| }, |
| { |
| "epoch": 19.681159420289855, |
| "grad_norm": 0.41619637608528137, |
| "learning_rate": 7.879083202304616e-05, |
| "loss": 0.0956, |
| "step": 6790 |
| }, |
| { |
| "epoch": 19.71014492753623, |
| "grad_norm": 0.24727730453014374, |
| "learning_rate": 7.872320059868355e-05, |
| "loss": 0.102, |
| "step": 6800 |
| }, |
| { |
| "epoch": 19.73913043478261, |
| "grad_norm": 0.2794191539287567, |
| "learning_rate": 7.865549064618729e-05, |
| "loss": 0.1155, |
| "step": 6810 |
| }, |
| { |
| "epoch": 19.768115942028984, |
| "grad_norm": 0.4851526618003845, |
| "learning_rate": 7.858770235067381e-05, |
| "loss": 0.1132, |
| "step": 6820 |
| }, |
| { |
| "epoch": 19.797101449275363, |
| "grad_norm": 0.38266780972480774, |
| "learning_rate": 7.851983589747374e-05, |
| "loss": 0.1163, |
| "step": 6830 |
| }, |
| { |
| "epoch": 19.82608695652174, |
| "grad_norm": 0.31224480271339417, |
| "learning_rate": 7.845189147213133e-05, |
| "loss": 0.1113, |
| "step": 6840 |
| }, |
| { |
| "epoch": 19.855072463768117, |
| "grad_norm": 0.26829686760902405, |
| "learning_rate": 7.838386926040407e-05, |
| "loss": 0.0976, |
| "step": 6850 |
| }, |
| { |
| "epoch": 19.884057971014492, |
| "grad_norm": 0.46314260363578796, |
| "learning_rate": 7.83157694482621e-05, |
| "loss": 0.1108, |
| "step": 6860 |
| }, |
| { |
| "epoch": 19.91304347826087, |
| "grad_norm": 0.3253716230392456, |
| "learning_rate": 7.824759222188768e-05, |
| "loss": 0.1163, |
| "step": 6870 |
| }, |
| { |
| "epoch": 19.942028985507246, |
| "grad_norm": 0.22694610059261322, |
| "learning_rate": 7.817933776767478e-05, |
| "loss": 0.0966, |
| "step": 6880 |
| }, |
| { |
| "epoch": 19.971014492753625, |
| "grad_norm": 0.424565851688385, |
| "learning_rate": 7.811100627222842e-05, |
| "loss": 0.1106, |
| "step": 6890 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.7507173418998718, |
| "learning_rate": 7.804259792236435e-05, |
| "loss": 0.1034, |
| "step": 6900 |
| }, |
| { |
| "epoch": 20.028985507246375, |
| "grad_norm": 0.3750985264778137, |
| "learning_rate": 7.797411290510835e-05, |
| "loss": 0.1016, |
| "step": 6910 |
| }, |
| { |
| "epoch": 20.057971014492754, |
| "grad_norm": 0.2645069658756256, |
| "learning_rate": 7.790555140769586e-05, |
| "loss": 0.1106, |
| "step": 6920 |
| }, |
| { |
| "epoch": 20.08695652173913, |
| "grad_norm": 0.473185658454895, |
| "learning_rate": 7.78369136175714e-05, |
| "loss": 0.0891, |
| "step": 6930 |
| }, |
| { |
| "epoch": 20.115942028985508, |
| "grad_norm": 0.3787136673927307, |
| "learning_rate": 7.776819972238806e-05, |
| "loss": 0.1062, |
| "step": 6940 |
| }, |
| { |
| "epoch": 20.144927536231883, |
| "grad_norm": 0.20881570875644684, |
| "learning_rate": 7.7699409910007e-05, |
| "loss": 0.0942, |
| "step": 6950 |
| }, |
| { |
| "epoch": 20.17391304347826, |
| "grad_norm": 0.36029985547065735, |
| "learning_rate": 7.763054436849694e-05, |
| "loss": 0.1139, |
| "step": 6960 |
| }, |
| { |
| "epoch": 20.202898550724637, |
| "grad_norm": 0.3305976986885071, |
| "learning_rate": 7.756160328613364e-05, |
| "loss": 0.107, |
| "step": 6970 |
| }, |
| { |
| "epoch": 20.231884057971016, |
| "grad_norm": 0.35445329546928406, |
| "learning_rate": 7.749258685139942e-05, |
| "loss": 0.0989, |
| "step": 6980 |
| }, |
| { |
| "epoch": 20.26086956521739, |
| "grad_norm": 0.39083990454673767, |
| "learning_rate": 7.742349525298253e-05, |
| "loss": 0.1105, |
| "step": 6990 |
| }, |
| { |
| "epoch": 20.28985507246377, |
| "grad_norm": 0.2868945896625519, |
| "learning_rate": 7.735432867977679e-05, |
| "loss": 0.0906, |
| "step": 7000 |
| }, |
| { |
| "epoch": 20.318840579710145, |
| "grad_norm": 0.3364221155643463, |
| "learning_rate": 7.728508732088096e-05, |
| "loss": 0.1045, |
| "step": 7010 |
| }, |
| { |
| "epoch": 20.347826086956523, |
| "grad_norm": 0.27390968799591064, |
| "learning_rate": 7.721577136559825e-05, |
| "loss": 0.0983, |
| "step": 7020 |
| }, |
| { |
| "epoch": 20.3768115942029, |
| "grad_norm": 0.30403977632522583, |
| "learning_rate": 7.714638100343588e-05, |
| "loss": 0.0966, |
| "step": 7030 |
| }, |
| { |
| "epoch": 20.405797101449274, |
| "grad_norm": 0.476375937461853, |
| "learning_rate": 7.707691642410444e-05, |
| "loss": 0.1009, |
| "step": 7040 |
| }, |
| { |
| "epoch": 20.434782608695652, |
| "grad_norm": 0.39029183983802795, |
| "learning_rate": 7.70073778175174e-05, |
| "loss": 0.0917, |
| "step": 7050 |
| }, |
| { |
| "epoch": 20.463768115942027, |
| "grad_norm": 0.29170235991477966, |
| "learning_rate": 7.69377653737907e-05, |
| "loss": 0.093, |
| "step": 7060 |
| }, |
| { |
| "epoch": 20.492753623188406, |
| "grad_norm": 0.3420107364654541, |
| "learning_rate": 7.686807928324209e-05, |
| "loss": 0.0887, |
| "step": 7070 |
| }, |
| { |
| "epoch": 20.52173913043478, |
| "grad_norm": 0.46952491998672485, |
| "learning_rate": 7.679831973639065e-05, |
| "loss": 0.1035, |
| "step": 7080 |
| }, |
| { |
| "epoch": 20.55072463768116, |
| "grad_norm": 0.3527598977088928, |
| "learning_rate": 7.672848692395637e-05, |
| "loss": 0.1112, |
| "step": 7090 |
| }, |
| { |
| "epoch": 20.579710144927535, |
| "grad_norm": 0.35709843039512634, |
| "learning_rate": 7.665858103685944e-05, |
| "loss": 0.1215, |
| "step": 7100 |
| }, |
| { |
| "epoch": 20.608695652173914, |
| "grad_norm": 0.24026872217655182, |
| "learning_rate": 7.658860226621991e-05, |
| "loss": 0.1099, |
| "step": 7110 |
| }, |
| { |
| "epoch": 20.63768115942029, |
| "grad_norm": 0.3530397117137909, |
| "learning_rate": 7.651855080335708e-05, |
| "loss": 0.1098, |
| "step": 7120 |
| }, |
| { |
| "epoch": 20.666666666666668, |
| "grad_norm": 0.36064979434013367, |
| "learning_rate": 7.644842683978896e-05, |
| "loss": 0.0866, |
| "step": 7130 |
| }, |
| { |
| "epoch": 20.695652173913043, |
| "grad_norm": 0.32545793056488037, |
| "learning_rate": 7.63782305672318e-05, |
| "loss": 0.0964, |
| "step": 7140 |
| }, |
| { |
| "epoch": 20.72463768115942, |
| "grad_norm": 0.41704756021499634, |
| "learning_rate": 7.63079621775995e-05, |
| "loss": 0.1204, |
| "step": 7150 |
| }, |
| { |
| "epoch": 20.753623188405797, |
| "grad_norm": 0.2773784399032593, |
| "learning_rate": 7.623762186300319e-05, |
| "loss": 0.1231, |
| "step": 7160 |
| }, |
| { |
| "epoch": 20.782608695652176, |
| "grad_norm": 0.314942866563797, |
| "learning_rate": 7.616720981575057e-05, |
| "loss": 0.1082, |
| "step": 7170 |
| }, |
| { |
| "epoch": 20.81159420289855, |
| "grad_norm": 0.28883859515190125, |
| "learning_rate": 7.609672622834552e-05, |
| "loss": 0.1026, |
| "step": 7180 |
| }, |
| { |
| "epoch": 20.840579710144926, |
| "grad_norm": 0.3052704334259033, |
| "learning_rate": 7.602617129348747e-05, |
| "loss": 0.1078, |
| "step": 7190 |
| }, |
| { |
| "epoch": 20.869565217391305, |
| "grad_norm": 0.40418189764022827, |
| "learning_rate": 7.595554520407088e-05, |
| "loss": 0.1008, |
| "step": 7200 |
| }, |
| { |
| "epoch": 20.89855072463768, |
| "grad_norm": 0.547788143157959, |
| "learning_rate": 7.588484815318484e-05, |
| "loss": 0.1019, |
| "step": 7210 |
| }, |
| { |
| "epoch": 20.92753623188406, |
| "grad_norm": 0.29421624541282654, |
| "learning_rate": 7.581408033411234e-05, |
| "loss": 0.0984, |
| "step": 7220 |
| }, |
| { |
| "epoch": 20.956521739130434, |
| "grad_norm": 0.2703758478164673, |
| "learning_rate": 7.574324194032995e-05, |
| "loss": 0.105, |
| "step": 7230 |
| }, |
| { |
| "epoch": 20.985507246376812, |
| "grad_norm": 0.3435475528240204, |
| "learning_rate": 7.567233316550705e-05, |
| "loss": 0.1285, |
| "step": 7240 |
| }, |
| { |
| "epoch": 21.014492753623188, |
| "grad_norm": 0.33567100763320923, |
| "learning_rate": 7.560135420350562e-05, |
| "loss": 0.0879, |
| "step": 7250 |
| }, |
| { |
| "epoch": 21.043478260869566, |
| "grad_norm": 0.28695228695869446, |
| "learning_rate": 7.553030524837935e-05, |
| "loss": 0.0969, |
| "step": 7260 |
| }, |
| { |
| "epoch": 21.07246376811594, |
| "grad_norm": 0.41675615310668945, |
| "learning_rate": 7.545918649437341e-05, |
| "loss": 0.1269, |
| "step": 7270 |
| }, |
| { |
| "epoch": 21.10144927536232, |
| "grad_norm": 0.501548171043396, |
| "learning_rate": 7.538799813592377e-05, |
| "loss": 0.1074, |
| "step": 7280 |
| }, |
| { |
| "epoch": 21.130434782608695, |
| "grad_norm": 0.3565016984939575, |
| "learning_rate": 7.531674036765662e-05, |
| "loss": 0.1011, |
| "step": 7290 |
| }, |
| { |
| "epoch": 21.159420289855074, |
| "grad_norm": 0.4155751168727875, |
| "learning_rate": 7.524541338438807e-05, |
| "loss": 0.1139, |
| "step": 7300 |
| }, |
| { |
| "epoch": 21.18840579710145, |
| "grad_norm": 0.21688665449619293, |
| "learning_rate": 7.517401738112328e-05, |
| "loss": 0.0914, |
| "step": 7310 |
| }, |
| { |
| "epoch": 21.217391304347824, |
| "grad_norm": 0.28088539838790894, |
| "learning_rate": 7.510255255305628e-05, |
| "loss": 0.1125, |
| "step": 7320 |
| }, |
| { |
| "epoch": 21.246376811594203, |
| "grad_norm": 0.3268051743507385, |
| "learning_rate": 7.503101909556911e-05, |
| "loss": 0.0877, |
| "step": 7330 |
| }, |
| { |
| "epoch": 21.27536231884058, |
| "grad_norm": 0.34027546644210815, |
| "learning_rate": 7.495941720423154e-05, |
| "loss": 0.1026, |
| "step": 7340 |
| }, |
| { |
| "epoch": 21.304347826086957, |
| "grad_norm": 0.28073224425315857, |
| "learning_rate": 7.488774707480042e-05, |
| "loss": 0.0913, |
| "step": 7350 |
| }, |
| { |
| "epoch": 21.333333333333332, |
| "grad_norm": 0.37974223494529724, |
| "learning_rate": 7.481600890321911e-05, |
| "loss": 0.1041, |
| "step": 7360 |
| }, |
| { |
| "epoch": 21.36231884057971, |
| "grad_norm": 0.32456913590431213, |
| "learning_rate": 7.474420288561708e-05, |
| "loss": 0.1021, |
| "step": 7370 |
| }, |
| { |
| "epoch": 21.391304347826086, |
| "grad_norm": 0.3720680773258209, |
| "learning_rate": 7.467232921830921e-05, |
| "loss": 0.0958, |
| "step": 7380 |
| }, |
| { |
| "epoch": 21.420289855072465, |
| "grad_norm": 0.3506243824958801, |
| "learning_rate": 7.460038809779537e-05, |
| "loss": 0.1062, |
| "step": 7390 |
| }, |
| { |
| "epoch": 21.44927536231884, |
| "grad_norm": 0.33805230259895325, |
| "learning_rate": 7.452837972075983e-05, |
| "loss": 0.0969, |
| "step": 7400 |
| }, |
| { |
| "epoch": 21.47826086956522, |
| "grad_norm": 0.260945200920105, |
| "learning_rate": 7.445630428407074e-05, |
| "loss": 0.0879, |
| "step": 7410 |
| }, |
| { |
| "epoch": 21.507246376811594, |
| "grad_norm": 0.3978862762451172, |
| "learning_rate": 7.43841619847796e-05, |
| "loss": 0.0979, |
| "step": 7420 |
| }, |
| { |
| "epoch": 21.536231884057973, |
| "grad_norm": 0.3000033497810364, |
| "learning_rate": 7.431195302012072e-05, |
| "loss": 0.1194, |
| "step": 7430 |
| }, |
| { |
| "epoch": 21.565217391304348, |
| "grad_norm": 0.30280905961990356, |
| "learning_rate": 7.423967758751061e-05, |
| "loss": 0.0883, |
| "step": 7440 |
| }, |
| { |
| "epoch": 21.594202898550726, |
| "grad_norm": 0.26231029629707336, |
| "learning_rate": 7.416733588454758e-05, |
| "loss": 0.0773, |
| "step": 7450 |
| }, |
| { |
| "epoch": 21.6231884057971, |
| "grad_norm": 0.3744719922542572, |
| "learning_rate": 7.409492810901106e-05, |
| "loss": 0.1047, |
| "step": 7460 |
| }, |
| { |
| "epoch": 21.652173913043477, |
| "grad_norm": 0.27250558137893677, |
| "learning_rate": 7.402245445886116e-05, |
| "loss": 0.0996, |
| "step": 7470 |
| }, |
| { |
| "epoch": 21.681159420289855, |
| "grad_norm": 0.44744259119033813, |
| "learning_rate": 7.394991513223806e-05, |
| "loss": 0.1119, |
| "step": 7480 |
| }, |
| { |
| "epoch": 21.71014492753623, |
| "grad_norm": 0.41720837354660034, |
| "learning_rate": 7.38773103274615e-05, |
| "loss": 0.1009, |
| "step": 7490 |
| }, |
| { |
| "epoch": 21.73913043478261, |
| "grad_norm": 0.4026874303817749, |
| "learning_rate": 7.380464024303028e-05, |
| "loss": 0.0899, |
| "step": 7500 |
| }, |
| { |
| "epoch": 21.768115942028984, |
| "grad_norm": 0.39172300696372986, |
| "learning_rate": 7.373190507762162e-05, |
| "loss": 0.101, |
| "step": 7510 |
| }, |
| { |
| "epoch": 21.797101449275363, |
| "grad_norm": 0.3168098032474518, |
| "learning_rate": 7.365910503009066e-05, |
| "loss": 0.074, |
| "step": 7520 |
| }, |
| { |
| "epoch": 21.82608695652174, |
| "grad_norm": 0.28811538219451904, |
| "learning_rate": 7.358624029946996e-05, |
| "loss": 0.0993, |
| "step": 7530 |
| }, |
| { |
| "epoch": 21.855072463768117, |
| "grad_norm": 0.4034368693828583, |
| "learning_rate": 7.351331108496893e-05, |
| "loss": 0.115, |
| "step": 7540 |
| }, |
| { |
| "epoch": 21.884057971014492, |
| "grad_norm": 0.3656509220600128, |
| "learning_rate": 7.344031758597325e-05, |
| "loss": 0.112, |
| "step": 7550 |
| }, |
| { |
| "epoch": 21.91304347826087, |
| "grad_norm": 0.4922838807106018, |
| "learning_rate": 7.336726000204435e-05, |
| "loss": 0.0972, |
| "step": 7560 |
| }, |
| { |
| "epoch": 21.942028985507246, |
| "grad_norm": 0.3745553195476532, |
| "learning_rate": 7.32941385329189e-05, |
| "loss": 0.1027, |
| "step": 7570 |
| }, |
| { |
| "epoch": 21.971014492753625, |
| "grad_norm": 0.39149320125579834, |
| "learning_rate": 7.322095337850816e-05, |
| "loss": 0.1151, |
| "step": 7580 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 0.8151898384094238, |
| "learning_rate": 7.314770473889758e-05, |
| "loss": 0.1026, |
| "step": 7590 |
| }, |
| { |
| "epoch": 22.028985507246375, |
| "grad_norm": 0.4377081096172333, |
| "learning_rate": 7.307439281434615e-05, |
| "loss": 0.0823, |
| "step": 7600 |
| }, |
| { |
| "epoch": 22.057971014492754, |
| "grad_norm": 0.35784757137298584, |
| "learning_rate": 7.300101780528585e-05, |
| "loss": 0.11, |
| "step": 7610 |
| }, |
| { |
| "epoch": 22.08695652173913, |
| "grad_norm": 0.25670677423477173, |
| "learning_rate": 7.292757991232117e-05, |
| "loss": 0.1015, |
| "step": 7620 |
| }, |
| { |
| "epoch": 22.115942028985508, |
| "grad_norm": 0.35505029559135437, |
| "learning_rate": 7.285407933622848e-05, |
| "loss": 0.1097, |
| "step": 7630 |
| }, |
| { |
| "epoch": 22.144927536231883, |
| "grad_norm": 0.27871453762054443, |
| "learning_rate": 7.278051627795557e-05, |
| "loss": 0.0951, |
| "step": 7640 |
| }, |
| { |
| "epoch": 22.17391304347826, |
| "grad_norm": 0.4752453565597534, |
| "learning_rate": 7.270689093862105e-05, |
| "loss": 0.1036, |
| "step": 7650 |
| }, |
| { |
| "epoch": 22.202898550724637, |
| "grad_norm": 0.5493319630622864, |
| "learning_rate": 7.263320351951374e-05, |
| "loss": 0.1031, |
| "step": 7660 |
| }, |
| { |
| "epoch": 22.231884057971016, |
| "grad_norm": 0.43251800537109375, |
| "learning_rate": 7.255945422209227e-05, |
| "loss": 0.0984, |
| "step": 7670 |
| }, |
| { |
| "epoch": 22.26086956521739, |
| "grad_norm": 0.4127131998538971, |
| "learning_rate": 7.248564324798437e-05, |
| "loss": 0.0907, |
| "step": 7680 |
| }, |
| { |
| "epoch": 22.28985507246377, |
| "grad_norm": 0.28903988003730774, |
| "learning_rate": 7.241177079898644e-05, |
| "loss": 0.086, |
| "step": 7690 |
| }, |
| { |
| "epoch": 22.318840579710145, |
| "grad_norm": 0.35488802194595337, |
| "learning_rate": 7.233783707706295e-05, |
| "loss": 0.1017, |
| "step": 7700 |
| }, |
| { |
| "epoch": 22.347826086956523, |
| "grad_norm": 0.2337232232093811, |
| "learning_rate": 7.226384228434586e-05, |
| "loss": 0.0888, |
| "step": 7710 |
| }, |
| { |
| "epoch": 22.3768115942029, |
| "grad_norm": 0.2909092307090759, |
| "learning_rate": 7.21897866231341e-05, |
| "loss": 0.1124, |
| "step": 7720 |
| }, |
| { |
| "epoch": 22.405797101449274, |
| "grad_norm": 0.3277481496334076, |
| "learning_rate": 7.211567029589303e-05, |
| "loss": 0.1086, |
| "step": 7730 |
| }, |
| { |
| "epoch": 22.434782608695652, |
| "grad_norm": 0.2835393249988556, |
| "learning_rate": 7.204149350525387e-05, |
| "loss": 0.1085, |
| "step": 7740 |
| }, |
| { |
| "epoch": 22.463768115942027, |
| "grad_norm": 0.3101160526275635, |
| "learning_rate": 7.196725645401309e-05, |
| "loss": 0.0959, |
| "step": 7750 |
| }, |
| { |
| "epoch": 22.492753623188406, |
| "grad_norm": 0.42514339089393616, |
| "learning_rate": 7.1892959345132e-05, |
| "loss": 0.1108, |
| "step": 7760 |
| }, |
| { |
| "epoch": 22.52173913043478, |
| "grad_norm": 0.39696502685546875, |
| "learning_rate": 7.181860238173605e-05, |
| "loss": 0.1352, |
| "step": 7770 |
| }, |
| { |
| "epoch": 22.55072463768116, |
| "grad_norm": 0.43896979093551636, |
| "learning_rate": 7.174418576711432e-05, |
| "loss": 0.0971, |
| "step": 7780 |
| }, |
| { |
| "epoch": 22.579710144927535, |
| "grad_norm": 0.47712811827659607, |
| "learning_rate": 7.1669709704719e-05, |
| "loss": 0.0877, |
| "step": 7790 |
| }, |
| { |
| "epoch": 22.608695652173914, |
| "grad_norm": 0.3447103202342987, |
| "learning_rate": 7.159517439816481e-05, |
| "loss": 0.0971, |
| "step": 7800 |
| }, |
| { |
| "epoch": 22.63768115942029, |
| "grad_norm": 0.39140835404396057, |
| "learning_rate": 7.152058005122842e-05, |
| "loss": 0.0885, |
| "step": 7810 |
| }, |
| { |
| "epoch": 22.666666666666668, |
| "grad_norm": 0.28053638339042664, |
| "learning_rate": 7.144592686784793e-05, |
| "loss": 0.0945, |
| "step": 7820 |
| }, |
| { |
| "epoch": 22.695652173913043, |
| "grad_norm": 0.3110656142234802, |
| "learning_rate": 7.137121505212229e-05, |
| "loss": 0.1094, |
| "step": 7830 |
| }, |
| { |
| "epoch": 22.72463768115942, |
| "grad_norm": 0.38985612988471985, |
| "learning_rate": 7.129644480831077e-05, |
| "loss": 0.0794, |
| "step": 7840 |
| }, |
| { |
| "epoch": 22.753623188405797, |
| "grad_norm": 0.49533525109291077, |
| "learning_rate": 7.122161634083234e-05, |
| "loss": 0.1002, |
| "step": 7850 |
| }, |
| { |
| "epoch": 22.782608695652176, |
| "grad_norm": 0.43202659487724304, |
| "learning_rate": 7.114672985426516e-05, |
| "loss": 0.0962, |
| "step": 7860 |
| }, |
| { |
| "epoch": 22.81159420289855, |
| "grad_norm": 0.4098835587501526, |
| "learning_rate": 7.107178555334606e-05, |
| "loss": 0.1022, |
| "step": 7870 |
| }, |
| { |
| "epoch": 22.840579710144926, |
| "grad_norm": 0.39185699820518494, |
| "learning_rate": 7.099678364296989e-05, |
| "loss": 0.0911, |
| "step": 7880 |
| }, |
| { |
| "epoch": 22.869565217391305, |
| "grad_norm": 0.38475117087364197, |
| "learning_rate": 7.0921724328189e-05, |
| "loss": 0.1046, |
| "step": 7890 |
| }, |
| { |
| "epoch": 22.89855072463768, |
| "grad_norm": 0.3477749824523926, |
| "learning_rate": 7.084660781421268e-05, |
| "loss": 0.1047, |
| "step": 7900 |
| }, |
| { |
| "epoch": 22.92753623188406, |
| "grad_norm": 0.32388657331466675, |
| "learning_rate": 7.077143430640662e-05, |
| "loss": 0.1111, |
| "step": 7910 |
| }, |
| { |
| "epoch": 22.956521739130434, |
| "grad_norm": 0.28074944019317627, |
| "learning_rate": 7.069620401029232e-05, |
| "loss": 0.0952, |
| "step": 7920 |
| }, |
| { |
| "epoch": 22.985507246376812, |
| "grad_norm": 0.4655712842941284, |
| "learning_rate": 7.062091713154655e-05, |
| "loss": 0.119, |
| "step": 7930 |
| }, |
| { |
| "epoch": 23.014492753623188, |
| "grad_norm": 0.3347054719924927, |
| "learning_rate": 7.054557387600075e-05, |
| "loss": 0.1116, |
| "step": 7940 |
| }, |
| { |
| "epoch": 23.043478260869566, |
| "grad_norm": 0.3056691288948059, |
| "learning_rate": 7.04701744496405e-05, |
| "loss": 0.0995, |
| "step": 7950 |
| }, |
| { |
| "epoch": 23.07246376811594, |
| "grad_norm": 0.29524263739585876, |
| "learning_rate": 7.039471905860495e-05, |
| "loss": 0.0923, |
| "step": 7960 |
| }, |
| { |
| "epoch": 23.10144927536232, |
| "grad_norm": 0.3292746841907501, |
| "learning_rate": 7.031920790918628e-05, |
| "loss": 0.102, |
| "step": 7970 |
| }, |
| { |
| "epoch": 23.130434782608695, |
| "grad_norm": 0.3358573019504547, |
| "learning_rate": 7.024364120782906e-05, |
| "loss": 0.0919, |
| "step": 7980 |
| }, |
| { |
| "epoch": 23.159420289855074, |
| "grad_norm": 0.4067601263523102, |
| "learning_rate": 7.016801916112978e-05, |
| "loss": 0.093, |
| "step": 7990 |
| }, |
| { |
| "epoch": 23.18840579710145, |
| "grad_norm": 0.3560484051704407, |
| "learning_rate": 7.009234197583623e-05, |
| "loss": 0.1045, |
| "step": 8000 |
| }, |
| { |
| "epoch": 23.217391304347824, |
| "grad_norm": 0.4393708407878876, |
| "learning_rate": 7.001660985884692e-05, |
| "loss": 0.0963, |
| "step": 8010 |
| }, |
| { |
| "epoch": 23.246376811594203, |
| "grad_norm": 0.5236015915870667, |
| "learning_rate": 6.994082301721063e-05, |
| "loss": 0.1136, |
| "step": 8020 |
| }, |
| { |
| "epoch": 23.27536231884058, |
| "grad_norm": 0.4401554465293884, |
| "learning_rate": 6.986498165812563e-05, |
| "loss": 0.0955, |
| "step": 8030 |
| }, |
| { |
| "epoch": 23.304347826086957, |
| "grad_norm": 0.30517131090164185, |
| "learning_rate": 6.978908598893932e-05, |
| "loss": 0.0939, |
| "step": 8040 |
| }, |
| { |
| "epoch": 23.333333333333332, |
| "grad_norm": 0.3316713869571686, |
| "learning_rate": 6.971313621714756e-05, |
| "loss": 0.0912, |
| "step": 8050 |
| }, |
| { |
| "epoch": 23.36231884057971, |
| "grad_norm": 0.388837069272995, |
| "learning_rate": 6.96371325503941e-05, |
| "loss": 0.1064, |
| "step": 8060 |
| }, |
| { |
| "epoch": 23.391304347826086, |
| "grad_norm": 0.2927514612674713, |
| "learning_rate": 6.956107519647014e-05, |
| "loss": 0.1115, |
| "step": 8070 |
| }, |
| { |
| "epoch": 23.420289855072465, |
| "grad_norm": 0.4804588556289673, |
| "learning_rate": 6.94849643633135e-05, |
| "loss": 0.1035, |
| "step": 8080 |
| }, |
| { |
| "epoch": 23.44927536231884, |
| "grad_norm": 0.32680946588516235, |
| "learning_rate": 6.940880025900834e-05, |
| "loss": 0.0984, |
| "step": 8090 |
| }, |
| { |
| "epoch": 23.47826086956522, |
| "grad_norm": 0.392529159784317, |
| "learning_rate": 6.933258309178438e-05, |
| "loss": 0.1002, |
| "step": 8100 |
| }, |
| { |
| "epoch": 23.507246376811594, |
| "grad_norm": 0.451831579208374, |
| "learning_rate": 6.925631307001646e-05, |
| "loss": 0.094, |
| "step": 8110 |
| }, |
| { |
| "epoch": 23.536231884057973, |
| "grad_norm": 0.314008504152298, |
| "learning_rate": 6.91799904022239e-05, |
| "loss": 0.084, |
| "step": 8120 |
| }, |
| { |
| "epoch": 23.565217391304348, |
| "grad_norm": 0.33511435985565186, |
| "learning_rate": 6.910361529706997e-05, |
| "loss": 0.0823, |
| "step": 8130 |
| }, |
| { |
| "epoch": 23.594202898550726, |
| "grad_norm": 0.36016684770584106, |
| "learning_rate": 6.902718796336131e-05, |
| "loss": 0.1009, |
| "step": 8140 |
| }, |
| { |
| "epoch": 23.6231884057971, |
| "grad_norm": 0.29572927951812744, |
| "learning_rate": 6.895070861004729e-05, |
| "loss": 0.1142, |
| "step": 8150 |
| }, |
| { |
| "epoch": 23.652173913043477, |
| "grad_norm": 0.3417483866214752, |
| "learning_rate": 6.887417744621956e-05, |
| "loss": 0.1075, |
| "step": 8160 |
| }, |
| { |
| "epoch": 23.681159420289855, |
| "grad_norm": 0.3952733278274536, |
| "learning_rate": 6.87975946811114e-05, |
| "loss": 0.0899, |
| "step": 8170 |
| }, |
| { |
| "epoch": 23.71014492753623, |
| "grad_norm": 0.4189750552177429, |
| "learning_rate": 6.872096052409718e-05, |
| "loss": 0.0903, |
| "step": 8180 |
| }, |
| { |
| "epoch": 23.73913043478261, |
| "grad_norm": 0.3113269805908203, |
| "learning_rate": 6.864427518469174e-05, |
| "loss": 0.098, |
| "step": 8190 |
| }, |
| { |
| "epoch": 23.768115942028984, |
| "grad_norm": 0.29072266817092896, |
| "learning_rate": 6.856753887254986e-05, |
| "loss": 0.0961, |
| "step": 8200 |
| }, |
| { |
| "epoch": 23.797101449275363, |
| "grad_norm": 0.31375062465667725, |
| "learning_rate": 6.849075179746572e-05, |
| "loss": 0.1149, |
| "step": 8210 |
| }, |
| { |
| "epoch": 23.82608695652174, |
| "grad_norm": 0.3995482325553894, |
| "learning_rate": 6.841391416937221e-05, |
| "loss": 0.0941, |
| "step": 8220 |
| }, |
| { |
| "epoch": 23.855072463768117, |
| "grad_norm": 0.4191873371601105, |
| "learning_rate": 6.833702619834053e-05, |
| "loss": 0.1021, |
| "step": 8230 |
| }, |
| { |
| "epoch": 23.884057971014492, |
| "grad_norm": 0.3322891891002655, |
| "learning_rate": 6.82600880945794e-05, |
| "loss": 0.1022, |
| "step": 8240 |
| }, |
| { |
| "epoch": 23.91304347826087, |
| "grad_norm": 0.37546929717063904, |
| "learning_rate": 6.818310006843468e-05, |
| "loss": 0.1051, |
| "step": 8250 |
| }, |
| { |
| "epoch": 23.942028985507246, |
| "grad_norm": 0.4086068868637085, |
| "learning_rate": 6.810606233038868e-05, |
| "loss": 0.115, |
| "step": 8260 |
| }, |
| { |
| "epoch": 23.971014492753625, |
| "grad_norm": 0.3562030494213104, |
| "learning_rate": 6.802897509105966e-05, |
| "loss": 0.094, |
| "step": 8270 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 0.8567777276039124, |
| "learning_rate": 6.79518385612012e-05, |
| "loss": 0.1065, |
| "step": 8280 |
| }, |
| { |
| "epoch": 24.028985507246375, |
| "grad_norm": 0.6033879518508911, |
| "learning_rate": 6.787465295170157e-05, |
| "loss": 0.1118, |
| "step": 8290 |
| }, |
| { |
| "epoch": 24.057971014492754, |
| "grad_norm": 0.4588029384613037, |
| "learning_rate": 6.779741847358332e-05, |
| "loss": 0.1, |
| "step": 8300 |
| }, |
| { |
| "epoch": 24.08695652173913, |
| "grad_norm": 0.5255804657936096, |
| "learning_rate": 6.772013533800256e-05, |
| "loss": 0.1236, |
| "step": 8310 |
| }, |
| { |
| "epoch": 24.115942028985508, |
| "grad_norm": 0.4105243980884552, |
| "learning_rate": 6.764280375624843e-05, |
| "loss": 0.1017, |
| "step": 8320 |
| }, |
| { |
| "epoch": 24.144927536231883, |
| "grad_norm": 0.27915531396865845, |
| "learning_rate": 6.756542393974252e-05, |
| "loss": 0.1041, |
| "step": 8330 |
| }, |
| { |
| "epoch": 24.17391304347826, |
| "grad_norm": 0.24191172420978546, |
| "learning_rate": 6.748799610003828e-05, |
| "loss": 0.0786, |
| "step": 8340 |
| }, |
| { |
| "epoch": 24.202898550724637, |
| "grad_norm": 0.3112497329711914, |
| "learning_rate": 6.741052044882048e-05, |
| "loss": 0.108, |
| "step": 8350 |
| }, |
| { |
| "epoch": 24.231884057971016, |
| "grad_norm": 0.42907461524009705, |
| "learning_rate": 6.73329971979046e-05, |
| "loss": 0.1122, |
| "step": 8360 |
| }, |
| { |
| "epoch": 24.26086956521739, |
| "grad_norm": 0.39249175786972046, |
| "learning_rate": 6.725542655923625e-05, |
| "loss": 0.1023, |
| "step": 8370 |
| }, |
| { |
| "epoch": 24.28985507246377, |
| "grad_norm": 0.3118097186088562, |
| "learning_rate": 6.717780874489057e-05, |
| "loss": 0.0926, |
| "step": 8380 |
| }, |
| { |
| "epoch": 24.318840579710145, |
| "grad_norm": 0.4916854500770569, |
| "learning_rate": 6.710014396707172e-05, |
| "loss": 0.1157, |
| "step": 8390 |
| }, |
| { |
| "epoch": 24.347826086956523, |
| "grad_norm": 0.30792731046676636, |
| "learning_rate": 6.702243243811221e-05, |
| "loss": 0.103, |
| "step": 8400 |
| }, |
| { |
| "epoch": 24.3768115942029, |
| "grad_norm": 0.438876748085022, |
| "learning_rate": 6.694467437047244e-05, |
| "loss": 0.1035, |
| "step": 8410 |
| }, |
| { |
| "epoch": 24.405797101449274, |
| "grad_norm": 0.4188535511493683, |
| "learning_rate": 6.686686997673997e-05, |
| "loss": 0.094, |
| "step": 8420 |
| }, |
| { |
| "epoch": 24.434782608695652, |
| "grad_norm": 0.35675615072250366, |
| "learning_rate": 6.678901946962903e-05, |
| "loss": 0.0814, |
| "step": 8430 |
| }, |
| { |
| "epoch": 24.463768115942027, |
| "grad_norm": 0.3431568145751953, |
| "learning_rate": 6.671112306197996e-05, |
| "loss": 0.0866, |
| "step": 8440 |
| }, |
| { |
| "epoch": 24.492753623188406, |
| "grad_norm": 0.35794076323509216, |
| "learning_rate": 6.663318096675854e-05, |
| "loss": 0.1105, |
| "step": 8450 |
| }, |
| { |
| "epoch": 24.52173913043478, |
| "grad_norm": 0.41272181272506714, |
| "learning_rate": 6.655519339705552e-05, |
| "loss": 0.0897, |
| "step": 8460 |
| }, |
| { |
| "epoch": 24.55072463768116, |
| "grad_norm": 0.32626742124557495, |
| "learning_rate": 6.647716056608588e-05, |
| "loss": 0.0956, |
| "step": 8470 |
| }, |
| { |
| "epoch": 24.579710144927535, |
| "grad_norm": 0.3472849130630493, |
| "learning_rate": 6.639908268718843e-05, |
| "loss": 0.0892, |
| "step": 8480 |
| }, |
| { |
| "epoch": 24.608695652173914, |
| "grad_norm": 0.40613627433776855, |
| "learning_rate": 6.632095997382514e-05, |
| "loss": 0.0968, |
| "step": 8490 |
| }, |
| { |
| "epoch": 24.63768115942029, |
| "grad_norm": 0.367157518863678, |
| "learning_rate": 6.624279263958047e-05, |
| "loss": 0.0907, |
| "step": 8500 |
| }, |
| { |
| "epoch": 24.666666666666668, |
| "grad_norm": 0.3079644441604614, |
| "learning_rate": 6.616458089816097e-05, |
| "loss": 0.1052, |
| "step": 8510 |
| }, |
| { |
| "epoch": 24.695652173913043, |
| "grad_norm": 0.29992493987083435, |
| "learning_rate": 6.608632496339454e-05, |
| "loss": 0.0841, |
| "step": 8520 |
| }, |
| { |
| "epoch": 24.72463768115942, |
| "grad_norm": 0.4213980734348297, |
| "learning_rate": 6.600802504922988e-05, |
| "loss": 0.1172, |
| "step": 8530 |
| }, |
| { |
| "epoch": 24.753623188405797, |
| "grad_norm": 0.42781922221183777, |
| "learning_rate": 6.592968136973604e-05, |
| "loss": 0.1114, |
| "step": 8540 |
| }, |
| { |
| "epoch": 24.782608695652176, |
| "grad_norm": 0.3830643892288208, |
| "learning_rate": 6.585129413910159e-05, |
| "loss": 0.0979, |
| "step": 8550 |
| }, |
| { |
| "epoch": 24.81159420289855, |
| "grad_norm": 0.33465775847435, |
| "learning_rate": 6.577286357163424e-05, |
| "loss": 0.1, |
| "step": 8560 |
| }, |
| { |
| "epoch": 24.840579710144926, |
| "grad_norm": 0.4267924427986145, |
| "learning_rate": 6.569438988176018e-05, |
| "loss": 0.0926, |
| "step": 8570 |
| }, |
| { |
| "epoch": 24.869565217391305, |
| "grad_norm": 0.3853652775287628, |
| "learning_rate": 6.561587328402347e-05, |
| "loss": 0.0893, |
| "step": 8580 |
| }, |
| { |
| "epoch": 24.89855072463768, |
| "grad_norm": 0.3373638689517975, |
| "learning_rate": 6.553731399308549e-05, |
| "loss": 0.091, |
| "step": 8590 |
| }, |
| { |
| "epoch": 24.92753623188406, |
| "grad_norm": 0.38048073649406433, |
| "learning_rate": 6.545871222372436e-05, |
| "loss": 0.0821, |
| "step": 8600 |
| }, |
| { |
| "epoch": 24.956521739130434, |
| "grad_norm": 0.30811434984207153, |
| "learning_rate": 6.538006819083426e-05, |
| "loss": 0.0819, |
| "step": 8610 |
| }, |
| { |
| "epoch": 24.985507246376812, |
| "grad_norm": 0.34115296602249146, |
| "learning_rate": 6.530138210942505e-05, |
| "loss": 0.1111, |
| "step": 8620 |
| }, |
| { |
| "epoch": 25.014492753623188, |
| "grad_norm": 0.23322944343090057, |
| "learning_rate": 6.522265419462141e-05, |
| "loss": 0.0829, |
| "step": 8630 |
| }, |
| { |
| "epoch": 25.043478260869566, |
| "grad_norm": 0.4348927438259125, |
| "learning_rate": 6.514388466166248e-05, |
| "loss": 0.0903, |
| "step": 8640 |
| }, |
| { |
| "epoch": 25.07246376811594, |
| "grad_norm": 0.42378875613212585, |
| "learning_rate": 6.506507372590119e-05, |
| "loss": 0.0964, |
| "step": 8650 |
| }, |
| { |
| "epoch": 25.10144927536232, |
| "grad_norm": 0.4011875092983246, |
| "learning_rate": 6.498622160280355e-05, |
| "loss": 0.0741, |
| "step": 8660 |
| }, |
| { |
| "epoch": 25.130434782608695, |
| "grad_norm": 0.2823712229728699, |
| "learning_rate": 6.490732850794832e-05, |
| "loss": 0.0806, |
| "step": 8670 |
| }, |
| { |
| "epoch": 25.159420289855074, |
| "grad_norm": 0.40900862216949463, |
| "learning_rate": 6.482839465702616e-05, |
| "loss": 0.0823, |
| "step": 8680 |
| }, |
| { |
| "epoch": 25.18840579710145, |
| "grad_norm": 0.31911250948905945, |
| "learning_rate": 6.474942026583923e-05, |
| "loss": 0.0901, |
| "step": 8690 |
| }, |
| { |
| "epoch": 25.217391304347824, |
| "grad_norm": 0.42689287662506104, |
| "learning_rate": 6.467040555030052e-05, |
| "loss": 0.1052, |
| "step": 8700 |
| }, |
| { |
| "epoch": 25.246376811594203, |
| "grad_norm": 0.4792473018169403, |
| "learning_rate": 6.459135072643321e-05, |
| "loss": 0.1032, |
| "step": 8710 |
| }, |
| { |
| "epoch": 25.27536231884058, |
| "grad_norm": 0.39220404624938965, |
| "learning_rate": 6.451225601037019e-05, |
| "loss": 0.0866, |
| "step": 8720 |
| }, |
| { |
| "epoch": 25.304347826086957, |
| "grad_norm": 0.3560490012168884, |
| "learning_rate": 6.443312161835338e-05, |
| "loss": 0.0755, |
| "step": 8730 |
| }, |
| { |
| "epoch": 25.333333333333332, |
| "grad_norm": 0.36267733573913574, |
| "learning_rate": 6.43539477667332e-05, |
| "loss": 0.1098, |
| "step": 8740 |
| }, |
| { |
| "epoch": 25.36231884057971, |
| "grad_norm": 0.31615450978279114, |
| "learning_rate": 6.427473467196793e-05, |
| "loss": 0.1033, |
| "step": 8750 |
| }, |
| { |
| "epoch": 25.391304347826086, |
| "grad_norm": 0.4009799659252167, |
| "learning_rate": 6.419548255062315e-05, |
| "loss": 0.0904, |
| "step": 8760 |
| }, |
| { |
| "epoch": 25.420289855072465, |
| "grad_norm": 0.6001753211021423, |
| "learning_rate": 6.411619161937112e-05, |
| "loss": 0.0924, |
| "step": 8770 |
| }, |
| { |
| "epoch": 25.44927536231884, |
| "grad_norm": 0.2777409851551056, |
| "learning_rate": 6.403686209499022e-05, |
| "loss": 0.0911, |
| "step": 8780 |
| }, |
| { |
| "epoch": 25.47826086956522, |
| "grad_norm": 0.25605079531669617, |
| "learning_rate": 6.395749419436437e-05, |
| "loss": 0.1018, |
| "step": 8790 |
| }, |
| { |
| "epoch": 25.507246376811594, |
| "grad_norm": 0.3277330696582794, |
| "learning_rate": 6.387808813448234e-05, |
| "loss": 0.09, |
| "step": 8800 |
| }, |
| { |
| "epoch": 25.536231884057973, |
| "grad_norm": 0.22537122666835785, |
| "learning_rate": 6.37986441324373e-05, |
| "loss": 0.0835, |
| "step": 8810 |
| }, |
| { |
| "epoch": 25.565217391304348, |
| "grad_norm": 0.427733838558197, |
| "learning_rate": 6.37191624054261e-05, |
| "loss": 0.0812, |
| "step": 8820 |
| }, |
| { |
| "epoch": 25.594202898550726, |
| "grad_norm": 0.20617811381816864, |
| "learning_rate": 6.363964317074872e-05, |
| "loss": 0.0638, |
| "step": 8830 |
| }, |
| { |
| "epoch": 25.6231884057971, |
| "grad_norm": 0.32776620984077454, |
| "learning_rate": 6.356008664580776e-05, |
| "loss": 0.0969, |
| "step": 8840 |
| }, |
| { |
| "epoch": 25.652173913043477, |
| "grad_norm": 0.49045297503471375, |
| "learning_rate": 6.348049304810771e-05, |
| "loss": 0.0872, |
| "step": 8850 |
| }, |
| { |
| "epoch": 25.681159420289855, |
| "grad_norm": 0.25885435938835144, |
| "learning_rate": 6.340086259525442e-05, |
| "loss": 0.0876, |
| "step": 8860 |
| }, |
| { |
| "epoch": 25.71014492753623, |
| "grad_norm": 0.23793990910053253, |
| "learning_rate": 6.332119550495448e-05, |
| "loss": 0.0994, |
| "step": 8870 |
| }, |
| { |
| "epoch": 25.73913043478261, |
| "grad_norm": 0.5432042479515076, |
| "learning_rate": 6.324149199501473e-05, |
| "loss": 0.0867, |
| "step": 8880 |
| }, |
| { |
| "epoch": 25.768115942028984, |
| "grad_norm": 0.21161885559558868, |
| "learning_rate": 6.316175228334146e-05, |
| "loss": 0.0952, |
| "step": 8890 |
| }, |
| { |
| "epoch": 25.797101449275363, |
| "grad_norm": 0.5830066204071045, |
| "learning_rate": 6.308197658794003e-05, |
| "loss": 0.1331, |
| "step": 8900 |
| }, |
| { |
| "epoch": 25.82608695652174, |
| "grad_norm": 0.4310133159160614, |
| "learning_rate": 6.300216512691417e-05, |
| "loss": 0.1281, |
| "step": 8910 |
| }, |
| { |
| "epoch": 25.855072463768117, |
| "grad_norm": 0.48247355222702026, |
| "learning_rate": 6.292231811846532e-05, |
| "loss": 0.0932, |
| "step": 8920 |
| }, |
| { |
| "epoch": 25.884057971014492, |
| "grad_norm": 0.31100866198539734, |
| "learning_rate": 6.284243578089217e-05, |
| "loss": 0.0934, |
| "step": 8930 |
| }, |
| { |
| "epoch": 25.91304347826087, |
| "grad_norm": 0.6842138171195984, |
| "learning_rate": 6.276251833258999e-05, |
| "loss": 0.0881, |
| "step": 8940 |
| }, |
| { |
| "epoch": 25.942028985507246, |
| "grad_norm": 0.31012195348739624, |
| "learning_rate": 6.268256599205003e-05, |
| "loss": 0.1034, |
| "step": 8950 |
| }, |
| { |
| "epoch": 25.971014492753625, |
| "grad_norm": 0.4207480847835541, |
| "learning_rate": 6.260257897785892e-05, |
| "loss": 0.1123, |
| "step": 8960 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 0.4856835901737213, |
| "learning_rate": 6.252255750869811e-05, |
| "loss": 0.0968, |
| "step": 8970 |
| }, |
| { |
| "epoch": 26.028985507246375, |
| "grad_norm": 0.34793731570243835, |
| "learning_rate": 6.244250180334325e-05, |
| "loss": 0.0958, |
| "step": 8980 |
| }, |
| { |
| "epoch": 26.057971014492754, |
| "grad_norm": 0.36127743124961853, |
| "learning_rate": 6.236241208066356e-05, |
| "loss": 0.0995, |
| "step": 8990 |
| }, |
| { |
| "epoch": 26.08695652173913, |
| "grad_norm": 0.3173960745334625, |
| "learning_rate": 6.228228855962133e-05, |
| "loss": 0.089, |
| "step": 9000 |
| }, |
| { |
| "epoch": 26.115942028985508, |
| "grad_norm": 0.45852774381637573, |
| "learning_rate": 6.220213145927115e-05, |
| "loss": 0.1077, |
| "step": 9010 |
| }, |
| { |
| "epoch": 26.144927536231883, |
| "grad_norm": 0.3714202344417572, |
| "learning_rate": 6.212194099875951e-05, |
| "loss": 0.0766, |
| "step": 9020 |
| }, |
| { |
| "epoch": 26.17391304347826, |
| "grad_norm": 0.5989710688591003, |
| "learning_rate": 6.204171739732405e-05, |
| "loss": 0.0974, |
| "step": 9030 |
| }, |
| { |
| "epoch": 26.202898550724637, |
| "grad_norm": 0.3582770824432373, |
| "learning_rate": 6.196146087429303e-05, |
| "loss": 0.1153, |
| "step": 9040 |
| }, |
| { |
| "epoch": 26.231884057971016, |
| "grad_norm": 0.3930160105228424, |
| "learning_rate": 6.188117164908474e-05, |
| "loss": 0.1032, |
| "step": 9050 |
| }, |
| { |
| "epoch": 26.26086956521739, |
| "grad_norm": 0.4734560549259186, |
| "learning_rate": 6.180084994120684e-05, |
| "loss": 0.0911, |
| "step": 9060 |
| }, |
| { |
| "epoch": 26.28985507246377, |
| "grad_norm": 0.36610832810401917, |
| "learning_rate": 6.17204959702558e-05, |
| "loss": 0.0814, |
| "step": 9070 |
| }, |
| { |
| "epoch": 26.318840579710145, |
| "grad_norm": 0.37070533633232117, |
| "learning_rate": 6.164010995591635e-05, |
| "loss": 0.0913, |
| "step": 9080 |
| }, |
| { |
| "epoch": 26.347826086956523, |
| "grad_norm": 0.36717358231544495, |
| "learning_rate": 6.155969211796076e-05, |
| "loss": 0.1182, |
| "step": 9090 |
| }, |
| { |
| "epoch": 26.3768115942029, |
| "grad_norm": 0.39474284648895264, |
| "learning_rate": 6.147924267624829e-05, |
| "loss": 0.0764, |
| "step": 9100 |
| }, |
| { |
| "epoch": 26.405797101449274, |
| "grad_norm": 0.3292117118835449, |
| "learning_rate": 6.13987618507247e-05, |
| "loss": 0.0933, |
| "step": 9110 |
| }, |
| { |
| "epoch": 26.434782608695652, |
| "grad_norm": 0.4586057662963867, |
| "learning_rate": 6.131824986142147e-05, |
| "loss": 0.104, |
| "step": 9120 |
| }, |
| { |
| "epoch": 26.463768115942027, |
| "grad_norm": 0.4136529266834259, |
| "learning_rate": 6.123770692845529e-05, |
| "loss": 0.1009, |
| "step": 9130 |
| }, |
| { |
| "epoch": 26.492753623188406, |
| "grad_norm": 0.21014559268951416, |
| "learning_rate": 6.11571332720275e-05, |
| "loss": 0.097, |
| "step": 9140 |
| }, |
| { |
| "epoch": 26.52173913043478, |
| "grad_norm": 0.34362557530403137, |
| "learning_rate": 6.107652911242336e-05, |
| "loss": 0.0935, |
| "step": 9150 |
| }, |
| { |
| "epoch": 26.55072463768116, |
| "grad_norm": 0.40612903237342834, |
| "learning_rate": 6.0995894670011586e-05, |
| "loss": 0.1103, |
| "step": 9160 |
| }, |
| { |
| "epoch": 26.579710144927535, |
| "grad_norm": 0.5520173907279968, |
| "learning_rate": 6.091523016524368e-05, |
| "loss": 0.08, |
| "step": 9170 |
| }, |
| { |
| "epoch": 26.608695652173914, |
| "grad_norm": 0.34539029002189636, |
| "learning_rate": 6.083453581865328e-05, |
| "loss": 0.081, |
| "step": 9180 |
| }, |
| { |
| "epoch": 26.63768115942029, |
| "grad_norm": 0.2292974442243576, |
| "learning_rate": 6.075381185085568e-05, |
| "loss": 0.0913, |
| "step": 9190 |
| }, |
| { |
| "epoch": 26.666666666666668, |
| "grad_norm": 0.530166506767273, |
| "learning_rate": 6.067305848254709e-05, |
| "loss": 0.1242, |
| "step": 9200 |
| }, |
| { |
| "epoch": 26.695652173913043, |
| "grad_norm": 0.313507616519928, |
| "learning_rate": 6.059227593450418e-05, |
| "loss": 0.091, |
| "step": 9210 |
| }, |
| { |
| "epoch": 26.72463768115942, |
| "grad_norm": 0.22776463627815247, |
| "learning_rate": 6.051146442758333e-05, |
| "loss": 0.0891, |
| "step": 9220 |
| }, |
| { |
| "epoch": 26.753623188405797, |
| "grad_norm": 0.35936057567596436, |
| "learning_rate": 6.043062418272012e-05, |
| "loss": 0.0893, |
| "step": 9230 |
| }, |
| { |
| "epoch": 26.782608695652176, |
| "grad_norm": 0.4251636564731598, |
| "learning_rate": 6.0349755420928666e-05, |
| "loss": 0.0899, |
| "step": 9240 |
| }, |
| { |
| "epoch": 26.81159420289855, |
| "grad_norm": 0.420236736536026, |
| "learning_rate": 6.0268858363301105e-05, |
| "loss": 0.0914, |
| "step": 9250 |
| }, |
| { |
| "epoch": 26.840579710144926, |
| "grad_norm": 0.4716984033584595, |
| "learning_rate": 6.018793323100689e-05, |
| "loss": 0.1019, |
| "step": 9260 |
| }, |
| { |
| "epoch": 26.869565217391305, |
| "grad_norm": 0.2790106534957886, |
| "learning_rate": 6.0106980245292255e-05, |
| "loss": 0.0795, |
| "step": 9270 |
| }, |
| { |
| "epoch": 26.89855072463768, |
| "grad_norm": 0.6252140402793884, |
| "learning_rate": 6.002599962747957e-05, |
| "loss": 0.0852, |
| "step": 9280 |
| }, |
| { |
| "epoch": 26.92753623188406, |
| "grad_norm": 0.43576961755752563, |
| "learning_rate": 5.994499159896673e-05, |
| "loss": 0.0998, |
| "step": 9290 |
| }, |
| { |
| "epoch": 26.956521739130434, |
| "grad_norm": 0.6333770751953125, |
| "learning_rate": 5.9863956381226607e-05, |
| "loss": 0.0915, |
| "step": 9300 |
| }, |
| { |
| "epoch": 26.985507246376812, |
| "grad_norm": 0.5500407814979553, |
| "learning_rate": 5.9782894195806394e-05, |
| "loss": 0.104, |
| "step": 9310 |
| }, |
| { |
| "epoch": 27.014492753623188, |
| "grad_norm": 0.44380778074264526, |
| "learning_rate": 5.9701805264327004e-05, |
| "loss": 0.0836, |
| "step": 9320 |
| }, |
| { |
| "epoch": 27.043478260869566, |
| "grad_norm": 0.41339370608329773, |
| "learning_rate": 5.96206898084825e-05, |
| "loss": 0.0898, |
| "step": 9330 |
| }, |
| { |
| "epoch": 27.07246376811594, |
| "grad_norm": 0.42062732577323914, |
| "learning_rate": 5.953954805003942e-05, |
| "loss": 0.1016, |
| "step": 9340 |
| }, |
| { |
| "epoch": 27.10144927536232, |
| "grad_norm": 0.26764097809791565, |
| "learning_rate": 5.945838021083623e-05, |
| "loss": 0.0953, |
| "step": 9350 |
| }, |
| { |
| "epoch": 27.130434782608695, |
| "grad_norm": 0.3174140453338623, |
| "learning_rate": 5.9377186512782714e-05, |
| "loss": 0.1038, |
| "step": 9360 |
| }, |
| { |
| "epoch": 27.159420289855074, |
| "grad_norm": 0.5403830409049988, |
| "learning_rate": 5.929596717785935e-05, |
| "loss": 0.0998, |
| "step": 9370 |
| }, |
| { |
| "epoch": 27.18840579710145, |
| "grad_norm": 0.29460418224334717, |
| "learning_rate": 5.921472242811668e-05, |
| "loss": 0.0998, |
| "step": 9380 |
| }, |
| { |
| "epoch": 27.217391304347824, |
| "grad_norm": 0.3835254907608032, |
| "learning_rate": 5.913345248567475e-05, |
| "loss": 0.0895, |
| "step": 9390 |
| }, |
| { |
| "epoch": 27.246376811594203, |
| "grad_norm": 0.26222512125968933, |
| "learning_rate": 5.905215757272248e-05, |
| "loss": 0.0829, |
| "step": 9400 |
| }, |
| { |
| "epoch": 27.27536231884058, |
| "grad_norm": 0.3459964990615845, |
| "learning_rate": 5.897083791151706e-05, |
| "loss": 0.0762, |
| "step": 9410 |
| }, |
| { |
| "epoch": 27.304347826086957, |
| "grad_norm": 0.4421097934246063, |
| "learning_rate": 5.888949372438336e-05, |
| "loss": 0.0809, |
| "step": 9420 |
| }, |
| { |
| "epoch": 27.333333333333332, |
| "grad_norm": 0.3622925877571106, |
| "learning_rate": 5.8808125233713255e-05, |
| "loss": 0.0906, |
| "step": 9430 |
| }, |
| { |
| "epoch": 27.36231884057971, |
| "grad_norm": 0.25134244561195374, |
| "learning_rate": 5.872673266196509e-05, |
| "loss": 0.0879, |
| "step": 9440 |
| }, |
| { |
| "epoch": 27.391304347826086, |
| "grad_norm": 0.5268398523330688, |
| "learning_rate": 5.864531623166305e-05, |
| "loss": 0.0896, |
| "step": 9450 |
| }, |
| { |
| "epoch": 27.420289855072465, |
| "grad_norm": 0.2773943841457367, |
| "learning_rate": 5.856387616539656e-05, |
| "loss": 0.0992, |
| "step": 9460 |
| }, |
| { |
| "epoch": 27.44927536231884, |
| "grad_norm": 0.40491020679473877, |
| "learning_rate": 5.848241268581967e-05, |
| "loss": 0.1059, |
| "step": 9470 |
| }, |
| { |
| "epoch": 27.47826086956522, |
| "grad_norm": 0.5842623710632324, |
| "learning_rate": 5.840092601565037e-05, |
| "loss": 0.0952, |
| "step": 9480 |
| }, |
| { |
| "epoch": 27.507246376811594, |
| "grad_norm": 0.4927104115486145, |
| "learning_rate": 5.8319416377670144e-05, |
| "loss": 0.1123, |
| "step": 9490 |
| }, |
| { |
| "epoch": 27.536231884057973, |
| "grad_norm": 0.29302486777305603, |
| "learning_rate": 5.82378839947232e-05, |
| "loss": 0.106, |
| "step": 9500 |
| }, |
| { |
| "epoch": 27.565217391304348, |
| "grad_norm": 0.4573745131492615, |
| "learning_rate": 5.815632908971599e-05, |
| "loss": 0.0952, |
| "step": 9510 |
| }, |
| { |
| "epoch": 27.594202898550726, |
| "grad_norm": 0.4357374906539917, |
| "learning_rate": 5.80747518856165e-05, |
| "loss": 0.0924, |
| "step": 9520 |
| }, |
| { |
| "epoch": 27.6231884057971, |
| "grad_norm": 0.3612167537212372, |
| "learning_rate": 5.799315260545367e-05, |
| "loss": 0.1071, |
| "step": 9530 |
| }, |
| { |
| "epoch": 27.652173913043477, |
| "grad_norm": 0.3312841057777405, |
| "learning_rate": 5.791153147231686e-05, |
| "loss": 0.1093, |
| "step": 9540 |
| }, |
| { |
| "epoch": 27.681159420289855, |
| "grad_norm": 0.43029627203941345, |
| "learning_rate": 5.782988870935509e-05, |
| "loss": 0.0969, |
| "step": 9550 |
| }, |
| { |
| "epoch": 27.71014492753623, |
| "grad_norm": 0.371330201625824, |
| "learning_rate": 5.774822453977657e-05, |
| "loss": 0.0935, |
| "step": 9560 |
| }, |
| { |
| "epoch": 27.73913043478261, |
| "grad_norm": 0.35629457235336304, |
| "learning_rate": 5.7666539186848036e-05, |
| "loss": 0.0972, |
| "step": 9570 |
| }, |
| { |
| "epoch": 27.768115942028984, |
| "grad_norm": 0.37646907567977905, |
| "learning_rate": 5.758483287389411e-05, |
| "loss": 0.0836, |
| "step": 9580 |
| }, |
| { |
| "epoch": 27.797101449275363, |
| "grad_norm": 0.26607057452201843, |
| "learning_rate": 5.7503105824296735e-05, |
| "loss": 0.1109, |
| "step": 9590 |
| }, |
| { |
| "epoch": 27.82608695652174, |
| "grad_norm": 0.3088560998439789, |
| "learning_rate": 5.742135826149453e-05, |
| "loss": 0.0888, |
| "step": 9600 |
| }, |
| { |
| "epoch": 27.855072463768117, |
| "grad_norm": 0.2338147908449173, |
| "learning_rate": 5.7339590408982223e-05, |
| "loss": 0.0929, |
| "step": 9610 |
| }, |
| { |
| "epoch": 27.884057971014492, |
| "grad_norm": 0.33873507380485535, |
| "learning_rate": 5.725780249031e-05, |
| "loss": 0.0971, |
| "step": 9620 |
| }, |
| { |
| "epoch": 27.91304347826087, |
| "grad_norm": 0.2373759001493454, |
| "learning_rate": 5.717599472908292e-05, |
| "loss": 0.0844, |
| "step": 9630 |
| }, |
| { |
| "epoch": 27.942028985507246, |
| "grad_norm": 0.36954036355018616, |
| "learning_rate": 5.7094167348960237e-05, |
| "loss": 0.0979, |
| "step": 9640 |
| }, |
| { |
| "epoch": 27.971014492753625, |
| "grad_norm": 0.44296813011169434, |
| "learning_rate": 5.7012320573654945e-05, |
| "loss": 0.0953, |
| "step": 9650 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 0.5584344267845154, |
| "learning_rate": 5.693045462693295e-05, |
| "loss": 0.0984, |
| "step": 9660 |
| }, |
| { |
| "epoch": 28.028985507246375, |
| "grad_norm": 0.2427714765071869, |
| "learning_rate": 5.684856973261266e-05, |
| "loss": 0.0809, |
| "step": 9670 |
| }, |
| { |
| "epoch": 28.057971014492754, |
| "grad_norm": 0.33059096336364746, |
| "learning_rate": 5.6766666114564215e-05, |
| "loss": 0.0917, |
| "step": 9680 |
| }, |
| { |
| "epoch": 28.08695652173913, |
| "grad_norm": 0.32649749517440796, |
| "learning_rate": 5.668474399670899e-05, |
| "loss": 0.0882, |
| "step": 9690 |
| }, |
| { |
| "epoch": 28.115942028985508, |
| "grad_norm": 0.2927171289920807, |
| "learning_rate": 5.660280360301896e-05, |
| "loss": 0.0931, |
| "step": 9700 |
| }, |
| { |
| "epoch": 28.144927536231883, |
| "grad_norm": 0.3866276443004608, |
| "learning_rate": 5.652084515751599e-05, |
| "loss": 0.1069, |
| "step": 9710 |
| }, |
| { |
| "epoch": 28.17391304347826, |
| "grad_norm": 0.30153888463974, |
| "learning_rate": 5.643886888427137e-05, |
| "loss": 0.0833, |
| "step": 9720 |
| }, |
| { |
| "epoch": 28.202898550724637, |
| "grad_norm": 0.4071616232395172, |
| "learning_rate": 5.6356875007405074e-05, |
| "loss": 0.0932, |
| "step": 9730 |
| }, |
| { |
| "epoch": 28.231884057971016, |
| "grad_norm": 0.3093550503253937, |
| "learning_rate": 5.627486375108525e-05, |
| "loss": 0.0786, |
| "step": 9740 |
| }, |
| { |
| "epoch": 28.26086956521739, |
| "grad_norm": 0.4130619168281555, |
| "learning_rate": 5.619283533952754e-05, |
| "loss": 0.1169, |
| "step": 9750 |
| }, |
| { |
| "epoch": 28.28985507246377, |
| "grad_norm": 0.19634698331356049, |
| "learning_rate": 5.6110789996994474e-05, |
| "loss": 0.0808, |
| "step": 9760 |
| }, |
| { |
| "epoch": 28.318840579710145, |
| "grad_norm": 0.27509117126464844, |
| "learning_rate": 5.602872794779491e-05, |
| "loss": 0.0818, |
| "step": 9770 |
| }, |
| { |
| "epoch": 28.347826086956523, |
| "grad_norm": 0.36869877576828003, |
| "learning_rate": 5.594664941628334e-05, |
| "loss": 0.0808, |
| "step": 9780 |
| }, |
| { |
| "epoch": 28.3768115942029, |
| "grad_norm": 0.28021812438964844, |
| "learning_rate": 5.5864554626859324e-05, |
| "loss": 0.086, |
| "step": 9790 |
| }, |
| { |
| "epoch": 28.405797101449274, |
| "grad_norm": 0.26831555366516113, |
| "learning_rate": 5.578244380396691e-05, |
| "loss": 0.0951, |
| "step": 9800 |
| }, |
| { |
| "epoch": 28.434782608695652, |
| "grad_norm": 0.3775530457496643, |
| "learning_rate": 5.570031717209394e-05, |
| "loss": 0.0837, |
| "step": 9810 |
| }, |
| { |
| "epoch": 28.463768115942027, |
| "grad_norm": 0.24371632933616638, |
| "learning_rate": 5.561817495577147e-05, |
| "loss": 0.082, |
| "step": 9820 |
| }, |
| { |
| "epoch": 28.492753623188406, |
| "grad_norm": 0.3440195620059967, |
| "learning_rate": 5.5536017379573215e-05, |
| "loss": 0.086, |
| "step": 9830 |
| }, |
| { |
| "epoch": 28.52173913043478, |
| "grad_norm": 0.26935017108917236, |
| "learning_rate": 5.545384466811483e-05, |
| "loss": 0.0972, |
| "step": 9840 |
| }, |
| { |
| "epoch": 28.55072463768116, |
| "grad_norm": 0.38084691762924194, |
| "learning_rate": 5.5371657046053384e-05, |
| "loss": 0.1017, |
| "step": 9850 |
| }, |
| { |
| "epoch": 28.579710144927535, |
| "grad_norm": 0.290239155292511, |
| "learning_rate": 5.528945473808669e-05, |
| "loss": 0.0915, |
| "step": 9860 |
| }, |
| { |
| "epoch": 28.608695652173914, |
| "grad_norm": 0.33482253551483154, |
| "learning_rate": 5.520723796895272e-05, |
| "loss": 0.0908, |
| "step": 9870 |
| }, |
| { |
| "epoch": 28.63768115942029, |
| "grad_norm": 0.3747408986091614, |
| "learning_rate": 5.512500696342897e-05, |
| "loss": 0.0844, |
| "step": 9880 |
| }, |
| { |
| "epoch": 28.666666666666668, |
| "grad_norm": 0.4802875816822052, |
| "learning_rate": 5.504276194633188e-05, |
| "loss": 0.078, |
| "step": 9890 |
| }, |
| { |
| "epoch": 28.695652173913043, |
| "grad_norm": 0.27169641852378845, |
| "learning_rate": 5.49605031425162e-05, |
| "loss": 0.0952, |
| "step": 9900 |
| }, |
| { |
| "epoch": 28.72463768115942, |
| "grad_norm": 0.4197971522808075, |
| "learning_rate": 5.487823077687434e-05, |
| "loss": 0.0876, |
| "step": 9910 |
| }, |
| { |
| "epoch": 28.753623188405797, |
| "grad_norm": 0.37185847759246826, |
| "learning_rate": 5.4795945074335806e-05, |
| "loss": 0.1035, |
| "step": 9920 |
| }, |
| { |
| "epoch": 28.782608695652176, |
| "grad_norm": 0.4602510929107666, |
| "learning_rate": 5.471364625986657e-05, |
| "loss": 0.1092, |
| "step": 9930 |
| }, |
| { |
| "epoch": 28.81159420289855, |
| "grad_norm": 0.26933249831199646, |
| "learning_rate": 5.463133455846845e-05, |
| "loss": 0.0695, |
| "step": 9940 |
| }, |
| { |
| "epoch": 28.840579710144926, |
| "grad_norm": 0.4972953796386719, |
| "learning_rate": 5.4549010195178505e-05, |
| "loss": 0.0927, |
| "step": 9950 |
| }, |
| { |
| "epoch": 28.869565217391305, |
| "grad_norm": 0.33794844150543213, |
| "learning_rate": 5.446667339506838e-05, |
| "loss": 0.0836, |
| "step": 9960 |
| }, |
| { |
| "epoch": 28.89855072463768, |
| "grad_norm": 0.4375225007534027, |
| "learning_rate": 5.4384324383243756e-05, |
| "loss": 0.0749, |
| "step": 9970 |
| }, |
| { |
| "epoch": 28.92753623188406, |
| "grad_norm": 0.3220159411430359, |
| "learning_rate": 5.430196338484368e-05, |
| "loss": 0.1062, |
| "step": 9980 |
| }, |
| { |
| "epoch": 28.956521739130434, |
| "grad_norm": 0.2979547381401062, |
| "learning_rate": 5.4219590625039975e-05, |
| "loss": 0.0926, |
| "step": 9990 |
| }, |
| { |
| "epoch": 28.985507246376812, |
| "grad_norm": 0.3251277208328247, |
| "learning_rate": 5.413720632903664e-05, |
| "loss": 0.0753, |
| "step": 10000 |
| }, |
| { |
| "epoch": 29.014492753623188, |
| "grad_norm": 0.5778645873069763, |
| "learning_rate": 5.405481072206917e-05, |
| "loss": 0.1194, |
| "step": 10010 |
| }, |
| { |
| "epoch": 29.043478260869566, |
| "grad_norm": 0.5020672082901001, |
| "learning_rate": 5.397240402940402e-05, |
| "loss": 0.0799, |
| "step": 10020 |
| }, |
| { |
| "epoch": 29.07246376811594, |
| "grad_norm": 0.27410048246383667, |
| "learning_rate": 5.388998647633794e-05, |
| "loss": 0.0948, |
| "step": 10030 |
| }, |
| { |
| "epoch": 29.10144927536232, |
| "grad_norm": 0.42518341541290283, |
| "learning_rate": 5.380755828819737e-05, |
| "loss": 0.0803, |
| "step": 10040 |
| }, |
| { |
| "epoch": 29.130434782608695, |
| "grad_norm": 0.4766830503940582, |
| "learning_rate": 5.3725119690337846e-05, |
| "loss": 0.1079, |
| "step": 10050 |
| }, |
| { |
| "epoch": 29.159420289855074, |
| "grad_norm": 0.3795958161354065, |
| "learning_rate": 5.3642670908143324e-05, |
| "loss": 0.1114, |
| "step": 10060 |
| }, |
| { |
| "epoch": 29.18840579710145, |
| "grad_norm": 0.2962549924850464, |
| "learning_rate": 5.356021216702562e-05, |
| "loss": 0.1028, |
| "step": 10070 |
| }, |
| { |
| "epoch": 29.217391304347824, |
| "grad_norm": 0.5517275929450989, |
| "learning_rate": 5.347774369242381e-05, |
| "loss": 0.1054, |
| "step": 10080 |
| }, |
| { |
| "epoch": 29.246376811594203, |
| "grad_norm": 0.26086458563804626, |
| "learning_rate": 5.3395265709803545e-05, |
| "loss": 0.1065, |
| "step": 10090 |
| }, |
| { |
| "epoch": 29.27536231884058, |
| "grad_norm": 0.287026971578598, |
| "learning_rate": 5.331277844465647e-05, |
| "loss": 0.0849, |
| "step": 10100 |
| }, |
| { |
| "epoch": 29.304347826086957, |
| "grad_norm": 0.23269617557525635, |
| "learning_rate": 5.323028212249963e-05, |
| "loss": 0.0786, |
| "step": 10110 |
| }, |
| { |
| "epoch": 29.333333333333332, |
| "grad_norm": 0.384395569562912, |
| "learning_rate": 5.314777696887481e-05, |
| "loss": 0.0739, |
| "step": 10120 |
| }, |
| { |
| "epoch": 29.36231884057971, |
| "grad_norm": 0.3276943266391754, |
| "learning_rate": 5.306526320934796e-05, |
| "loss": 0.0752, |
| "step": 10130 |
| }, |
| { |
| "epoch": 29.391304347826086, |
| "grad_norm": 0.4074258506298065, |
| "learning_rate": 5.298274106950854e-05, |
| "loss": 0.0975, |
| "step": 10140 |
| }, |
| { |
| "epoch": 29.420289855072465, |
| "grad_norm": 0.48793792724609375, |
| "learning_rate": 5.290021077496893e-05, |
| "loss": 0.088, |
| "step": 10150 |
| }, |
| { |
| "epoch": 29.44927536231884, |
| "grad_norm": 0.3513041138648987, |
| "learning_rate": 5.2817672551363816e-05, |
| "loss": 0.1068, |
| "step": 10160 |
| }, |
| { |
| "epoch": 29.47826086956522, |
| "grad_norm": 0.4190158247947693, |
| "learning_rate": 5.273512662434952e-05, |
| "loss": 0.0749, |
| "step": 10170 |
| }, |
| { |
| "epoch": 29.507246376811594, |
| "grad_norm": 0.41182804107666016, |
| "learning_rate": 5.265257321960349e-05, |
| "loss": 0.0832, |
| "step": 10180 |
| }, |
| { |
| "epoch": 29.536231884057973, |
| "grad_norm": 0.406429648399353, |
| "learning_rate": 5.257001256282357e-05, |
| "loss": 0.0894, |
| "step": 10190 |
| }, |
| { |
| "epoch": 29.565217391304348, |
| "grad_norm": 0.3909933269023895, |
| "learning_rate": 5.248744487972742e-05, |
| "loss": 0.0981, |
| "step": 10200 |
| }, |
| { |
| "epoch": 29.594202898550726, |
| "grad_norm": 0.45473939180374146, |
| "learning_rate": 5.240487039605196e-05, |
| "loss": 0.0875, |
| "step": 10210 |
| }, |
| { |
| "epoch": 29.6231884057971, |
| "grad_norm": 0.3364003300666809, |
| "learning_rate": 5.232228933755267e-05, |
| "loss": 0.0938, |
| "step": 10220 |
| }, |
| { |
| "epoch": 29.652173913043477, |
| "grad_norm": 0.40386608242988586, |
| "learning_rate": 5.2239701930003006e-05, |
| "loss": 0.0972, |
| "step": 10230 |
| }, |
| { |
| "epoch": 29.681159420289855, |
| "grad_norm": 0.4128904342651367, |
| "learning_rate": 5.215710839919379e-05, |
| "loss": 0.085, |
| "step": 10240 |
| }, |
| { |
| "epoch": 29.71014492753623, |
| "grad_norm": 0.4223697781562805, |
| "learning_rate": 5.207450897093257e-05, |
| "loss": 0.0874, |
| "step": 10250 |
| }, |
| { |
| "epoch": 29.73913043478261, |
| "grad_norm": 0.4211285710334778, |
| "learning_rate": 5.1991903871043046e-05, |
| "loss": 0.103, |
| "step": 10260 |
| }, |
| { |
| "epoch": 29.768115942028984, |
| "grad_norm": 0.5267713665962219, |
| "learning_rate": 5.190929332536439e-05, |
| "loss": 0.0863, |
| "step": 10270 |
| }, |
| { |
| "epoch": 29.797101449275363, |
| "grad_norm": 0.275651752948761, |
| "learning_rate": 5.182667755975071e-05, |
| "loss": 0.0865, |
| "step": 10280 |
| }, |
| { |
| "epoch": 29.82608695652174, |
| "grad_norm": 0.3134634792804718, |
| "learning_rate": 5.1744056800070315e-05, |
| "loss": 0.0766, |
| "step": 10290 |
| }, |
| { |
| "epoch": 29.855072463768117, |
| "grad_norm": 0.39577049016952515, |
| "learning_rate": 5.166143127220524e-05, |
| "loss": 0.0986, |
| "step": 10300 |
| }, |
| { |
| "epoch": 29.884057971014492, |
| "grad_norm": 0.3079846501350403, |
| "learning_rate": 5.1578801202050485e-05, |
| "loss": 0.0919, |
| "step": 10310 |
| }, |
| { |
| "epoch": 29.91304347826087, |
| "grad_norm": 0.3528546690940857, |
| "learning_rate": 5.149616681551355e-05, |
| "loss": 0.1022, |
| "step": 10320 |
| }, |
| { |
| "epoch": 29.942028985507246, |
| "grad_norm": 0.45117315649986267, |
| "learning_rate": 5.141352833851367e-05, |
| "loss": 0.0895, |
| "step": 10330 |
| }, |
| { |
| "epoch": 29.971014492753625, |
| "grad_norm": 0.3826615810394287, |
| "learning_rate": 5.1330885996981285e-05, |
| "loss": 0.0746, |
| "step": 10340 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.5462546348571777, |
| "learning_rate": 5.124824001685741e-05, |
| "loss": 0.0873, |
| "step": 10350 |
| }, |
| { |
| "epoch": 30.028985507246375, |
| "grad_norm": 0.38107597827911377, |
| "learning_rate": 5.116559062409298e-05, |
| "loss": 0.0957, |
| "step": 10360 |
| }, |
| { |
| "epoch": 30.057971014492754, |
| "grad_norm": 0.30049923062324524, |
| "learning_rate": 5.10829380446483e-05, |
| "loss": 0.0765, |
| "step": 10370 |
| }, |
| { |
| "epoch": 30.08695652173913, |
| "grad_norm": 0.3036685585975647, |
| "learning_rate": 5.100028250449235e-05, |
| "loss": 0.0828, |
| "step": 10380 |
| }, |
| { |
| "epoch": 30.115942028985508, |
| "grad_norm": 0.3812878727912903, |
| "learning_rate": 5.0917624229602234e-05, |
| "loss": 0.0929, |
| "step": 10390 |
| }, |
| { |
| "epoch": 30.144927536231883, |
| "grad_norm": 0.3203854262828827, |
| "learning_rate": 5.0834963445962524e-05, |
| "loss": 0.0942, |
| "step": 10400 |
| }, |
| { |
| "epoch": 30.17391304347826, |
| "grad_norm": 0.2617367208003998, |
| "learning_rate": 5.075230037956461e-05, |
| "loss": 0.0991, |
| "step": 10410 |
| }, |
| { |
| "epoch": 30.202898550724637, |
| "grad_norm": 0.39057719707489014, |
| "learning_rate": 5.0669635256406213e-05, |
| "loss": 0.0895, |
| "step": 10420 |
| }, |
| { |
| "epoch": 30.231884057971016, |
| "grad_norm": 0.4870263636112213, |
| "learning_rate": 5.058696830249058e-05, |
| "loss": 0.0991, |
| "step": 10430 |
| }, |
| { |
| "epoch": 30.26086956521739, |
| "grad_norm": 0.40686681866645813, |
| "learning_rate": 5.050429974382602e-05, |
| "loss": 0.0821, |
| "step": 10440 |
| }, |
| { |
| "epoch": 30.28985507246377, |
| "grad_norm": 0.32615211606025696, |
| "learning_rate": 5.042162980642523e-05, |
| "loss": 0.1024, |
| "step": 10450 |
| }, |
| { |
| "epoch": 30.318840579710145, |
| "grad_norm": 0.47428640723228455, |
| "learning_rate": 5.033895871630462e-05, |
| "loss": 0.0855, |
| "step": 10460 |
| }, |
| { |
| "epoch": 30.347826086956523, |
| "grad_norm": 0.45208922028541565, |
| "learning_rate": 5.025628669948386e-05, |
| "loss": 0.0922, |
| "step": 10470 |
| }, |
| { |
| "epoch": 30.3768115942029, |
| "grad_norm": 0.41651803255081177, |
| "learning_rate": 5.017361398198502e-05, |
| "loss": 0.0776, |
| "step": 10480 |
| }, |
| { |
| "epoch": 30.405797101449274, |
| "grad_norm": 0.40021809935569763, |
| "learning_rate": 5.009094078983221e-05, |
| "loss": 0.0828, |
| "step": 10490 |
| }, |
| { |
| "epoch": 30.434782608695652, |
| "grad_norm": 0.40372738242149353, |
| "learning_rate": 5.000826734905073e-05, |
| "loss": 0.0947, |
| "step": 10500 |
| }, |
| { |
| "epoch": 30.463768115942027, |
| "grad_norm": 0.21877968311309814, |
| "learning_rate": 4.9925593885666645e-05, |
| "loss": 0.0826, |
| "step": 10510 |
| }, |
| { |
| "epoch": 30.492753623188406, |
| "grad_norm": 0.32412436604499817, |
| "learning_rate": 4.984292062570602e-05, |
| "loss": 0.1022, |
| "step": 10520 |
| }, |
| { |
| "epoch": 30.52173913043478, |
| "grad_norm": 0.3431316018104553, |
| "learning_rate": 4.976024779519442e-05, |
| "loss": 0.079, |
| "step": 10530 |
| }, |
| { |
| "epoch": 30.55072463768116, |
| "grad_norm": 0.3585143983364105, |
| "learning_rate": 4.9677575620156194e-05, |
| "loss": 0.0885, |
| "step": 10540 |
| }, |
| { |
| "epoch": 30.579710144927535, |
| "grad_norm": 0.3738825023174286, |
| "learning_rate": 4.959490432661391e-05, |
| "loss": 0.1003, |
| "step": 10550 |
| }, |
| { |
| "epoch": 30.608695652173914, |
| "grad_norm": 0.45717304944992065, |
| "learning_rate": 4.9512234140587726e-05, |
| "loss": 0.0908, |
| "step": 10560 |
| }, |
| { |
| "epoch": 30.63768115942029, |
| "grad_norm": 0.43018513917922974, |
| "learning_rate": 4.942956528809477e-05, |
| "loss": 0.0899, |
| "step": 10570 |
| }, |
| { |
| "epoch": 30.666666666666668, |
| "grad_norm": 0.4122094511985779, |
| "learning_rate": 4.934689799514854e-05, |
| "loss": 0.0976, |
| "step": 10580 |
| }, |
| { |
| "epoch": 30.695652173913043, |
| "grad_norm": 0.4348907172679901, |
| "learning_rate": 4.926423248775827e-05, |
| "loss": 0.0883, |
| "step": 10590 |
| }, |
| { |
| "epoch": 30.72463768115942, |
| "grad_norm": 0.45371150970458984, |
| "learning_rate": 4.918156899192826e-05, |
| "loss": 0.1057, |
| "step": 10600 |
| }, |
| { |
| "epoch": 30.753623188405797, |
| "grad_norm": 0.3190701901912689, |
| "learning_rate": 4.909890773365738e-05, |
| "loss": 0.0998, |
| "step": 10610 |
| }, |
| { |
| "epoch": 30.782608695652176, |
| "grad_norm": 0.26156431436538696, |
| "learning_rate": 4.9016248938938344e-05, |
| "loss": 0.086, |
| "step": 10620 |
| }, |
| { |
| "epoch": 30.81159420289855, |
| "grad_norm": 0.24229975044727325, |
| "learning_rate": 4.8933592833757156e-05, |
| "loss": 0.075, |
| "step": 10630 |
| }, |
| { |
| "epoch": 30.840579710144926, |
| "grad_norm": 0.47945863008499146, |
| "learning_rate": 4.8850939644092435e-05, |
| "loss": 0.0942, |
| "step": 10640 |
| }, |
| { |
| "epoch": 30.869565217391305, |
| "grad_norm": 0.42868760228157043, |
| "learning_rate": 4.876828959591485e-05, |
| "loss": 0.1054, |
| "step": 10650 |
| }, |
| { |
| "epoch": 30.89855072463768, |
| "grad_norm": 0.5427827835083008, |
| "learning_rate": 4.8685642915186474e-05, |
| "loss": 0.0908, |
| "step": 10660 |
| }, |
| { |
| "epoch": 30.92753623188406, |
| "grad_norm": 0.546563982963562, |
| "learning_rate": 4.860299982786018e-05, |
| "loss": 0.0944, |
| "step": 10670 |
| }, |
| { |
| "epoch": 30.956521739130434, |
| "grad_norm": 0.3557523190975189, |
| "learning_rate": 4.852036055987901e-05, |
| "loss": 0.0871, |
| "step": 10680 |
| }, |
| { |
| "epoch": 30.985507246376812, |
| "grad_norm": 0.50401771068573, |
| "learning_rate": 4.843772533717558e-05, |
| "loss": 0.0869, |
| "step": 10690 |
| }, |
| { |
| "epoch": 31.014492753623188, |
| "grad_norm": 0.3340211808681488, |
| "learning_rate": 4.835509438567142e-05, |
| "loss": 0.0857, |
| "step": 10700 |
| }, |
| { |
| "epoch": 31.043478260869566, |
| "grad_norm": 0.2681577205657959, |
| "learning_rate": 4.827246793127639e-05, |
| "loss": 0.0765, |
| "step": 10710 |
| }, |
| { |
| "epoch": 31.07246376811594, |
| "grad_norm": 0.5375443696975708, |
| "learning_rate": 4.818984619988807e-05, |
| "loss": 0.0977, |
| "step": 10720 |
| }, |
| { |
| "epoch": 31.10144927536232, |
| "grad_norm": 0.33494704961776733, |
| "learning_rate": 4.810722941739115e-05, |
| "loss": 0.0857, |
| "step": 10730 |
| }, |
| { |
| "epoch": 31.130434782608695, |
| "grad_norm": 0.44509807229042053, |
| "learning_rate": 4.8024617809656684e-05, |
| "loss": 0.0814, |
| "step": 10740 |
| }, |
| { |
| "epoch": 31.159420289855074, |
| "grad_norm": 0.42321598529815674, |
| "learning_rate": 4.794201160254171e-05, |
| "loss": 0.0832, |
| "step": 10750 |
| }, |
| { |
| "epoch": 31.18840579710145, |
| "grad_norm": 0.41145583987236023, |
| "learning_rate": 4.785941102188844e-05, |
| "loss": 0.101, |
| "step": 10760 |
| }, |
| { |
| "epoch": 31.217391304347824, |
| "grad_norm": 0.23340976238250732, |
| "learning_rate": 4.7776816293523686e-05, |
| "loss": 0.0987, |
| "step": 10770 |
| }, |
| { |
| "epoch": 31.246376811594203, |
| "grad_norm": 0.355365514755249, |
| "learning_rate": 4.769422764325832e-05, |
| "loss": 0.1148, |
| "step": 10780 |
| }, |
| { |
| "epoch": 31.27536231884058, |
| "grad_norm": 0.3993210792541504, |
| "learning_rate": 4.76116452968865e-05, |
| "loss": 0.0802, |
| "step": 10790 |
| }, |
| { |
| "epoch": 31.304347826086957, |
| "grad_norm": 0.23266702890396118, |
| "learning_rate": 4.752906948018525e-05, |
| "loss": 0.0755, |
| "step": 10800 |
| }, |
| { |
| "epoch": 31.333333333333332, |
| "grad_norm": 0.2677353024482727, |
| "learning_rate": 4.7446500418913684e-05, |
| "loss": 0.0754, |
| "step": 10810 |
| }, |
| { |
| "epoch": 31.36231884057971, |
| "grad_norm": 0.39404717087745667, |
| "learning_rate": 4.736393833881247e-05, |
| "loss": 0.0813, |
| "step": 10820 |
| }, |
| { |
| "epoch": 31.391304347826086, |
| "grad_norm": 0.39271312952041626, |
| "learning_rate": 4.7281383465603194e-05, |
| "loss": 0.0935, |
| "step": 10830 |
| }, |
| { |
| "epoch": 31.420289855072465, |
| "grad_norm": 0.40351206064224243, |
| "learning_rate": 4.71988360249877e-05, |
| "loss": 0.0677, |
| "step": 10840 |
| }, |
| { |
| "epoch": 31.44927536231884, |
| "grad_norm": 0.40005189180374146, |
| "learning_rate": 4.7116296242647554e-05, |
| "loss": 0.1069, |
| "step": 10850 |
| }, |
| { |
| "epoch": 31.47826086956522, |
| "grad_norm": 0.3219447433948517, |
| "learning_rate": 4.703376434424336e-05, |
| "loss": 0.0806, |
| "step": 10860 |
| }, |
| { |
| "epoch": 31.507246376811594, |
| "grad_norm": 0.4746580123901367, |
| "learning_rate": 4.695124055541421e-05, |
| "loss": 0.0851, |
| "step": 10870 |
| }, |
| { |
| "epoch": 31.536231884057973, |
| "grad_norm": 0.3610043525695801, |
| "learning_rate": 4.6868725101776934e-05, |
| "loss": 0.1042, |
| "step": 10880 |
| }, |
| { |
| "epoch": 31.565217391304348, |
| "grad_norm": 0.3520298898220062, |
| "learning_rate": 4.678621820892567e-05, |
| "loss": 0.0718, |
| "step": 10890 |
| }, |
| { |
| "epoch": 31.594202898550726, |
| "grad_norm": 0.4144718050956726, |
| "learning_rate": 4.670372010243111e-05, |
| "loss": 0.0957, |
| "step": 10900 |
| }, |
| { |
| "epoch": 31.6231884057971, |
| "grad_norm": 0.35748976469039917, |
| "learning_rate": 4.662123100783992e-05, |
| "loss": 0.0911, |
| "step": 10910 |
| }, |
| { |
| "epoch": 31.652173913043477, |
| "grad_norm": 0.21652653813362122, |
| "learning_rate": 4.653875115067415e-05, |
| "loss": 0.0731, |
| "step": 10920 |
| }, |
| { |
| "epoch": 31.681159420289855, |
| "grad_norm": 0.3723653256893158, |
| "learning_rate": 4.6456280756430545e-05, |
| "loss": 0.0888, |
| "step": 10930 |
| }, |
| { |
| "epoch": 31.71014492753623, |
| "grad_norm": 0.4222668409347534, |
| "learning_rate": 4.637382005058004e-05, |
| "loss": 0.1013, |
| "step": 10940 |
| }, |
| { |
| "epoch": 31.73913043478261, |
| "grad_norm": 0.4263753294944763, |
| "learning_rate": 4.629136925856705e-05, |
| "loss": 0.0847, |
| "step": 10950 |
| }, |
| { |
| "epoch": 31.768115942028984, |
| "grad_norm": 0.3686303198337555, |
| "learning_rate": 4.6208928605808895e-05, |
| "loss": 0.0952, |
| "step": 10960 |
| }, |
| { |
| "epoch": 31.797101449275363, |
| "grad_norm": 0.4002050459384918, |
| "learning_rate": 4.612649831769519e-05, |
| "loss": 0.0825, |
| "step": 10970 |
| }, |
| { |
| "epoch": 31.82608695652174, |
| "grad_norm": 0.2441813200712204, |
| "learning_rate": 4.604407861958715e-05, |
| "loss": 0.0955, |
| "step": 10980 |
| }, |
| { |
| "epoch": 31.855072463768117, |
| "grad_norm": 0.30742359161376953, |
| "learning_rate": 4.5961669736817114e-05, |
| "loss": 0.0881, |
| "step": 10990 |
| }, |
| { |
| "epoch": 31.884057971014492, |
| "grad_norm": 0.23788172006607056, |
| "learning_rate": 4.5879271894687814e-05, |
| "loss": 0.0935, |
| "step": 11000 |
| }, |
| { |
| "epoch": 31.91304347826087, |
| "grad_norm": 0.3632306158542633, |
| "learning_rate": 4.5796885318471826e-05, |
| "loss": 0.0848, |
| "step": 11010 |
| }, |
| { |
| "epoch": 31.942028985507246, |
| "grad_norm": 0.39893579483032227, |
| "learning_rate": 4.571451023341086e-05, |
| "loss": 0.1019, |
| "step": 11020 |
| }, |
| { |
| "epoch": 31.971014492753625, |
| "grad_norm": 0.4167952835559845, |
| "learning_rate": 4.563214686471527e-05, |
| "loss": 0.0897, |
| "step": 11030 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 0.3372804820537567, |
| "learning_rate": 4.5549795437563365e-05, |
| "loss": 0.0789, |
| "step": 11040 |
| }, |
| { |
| "epoch": 32.028985507246375, |
| "grad_norm": 0.3572154939174652, |
| "learning_rate": 4.546745617710081e-05, |
| "loss": 0.0829, |
| "step": 11050 |
| }, |
| { |
| "epoch": 32.05797101449275, |
| "grad_norm": 0.32375410199165344, |
| "learning_rate": 4.5385129308440014e-05, |
| "loss": 0.0787, |
| "step": 11060 |
| }, |
| { |
| "epoch": 32.08695652173913, |
| "grad_norm": 0.3995456099510193, |
| "learning_rate": 4.530281505665944e-05, |
| "loss": 0.1026, |
| "step": 11070 |
| }, |
| { |
| "epoch": 32.11594202898551, |
| "grad_norm": 0.4150542616844177, |
| "learning_rate": 4.5220513646803134e-05, |
| "loss": 0.0941, |
| "step": 11080 |
| }, |
| { |
| "epoch": 32.14492753623188, |
| "grad_norm": 0.32070857286453247, |
| "learning_rate": 4.513822530388003e-05, |
| "loss": 0.0693, |
| "step": 11090 |
| }, |
| { |
| "epoch": 32.17391304347826, |
| "grad_norm": 0.36070406436920166, |
| "learning_rate": 4.5055950252863296e-05, |
| "loss": 0.0719, |
| "step": 11100 |
| }, |
| { |
| "epoch": 32.20289855072464, |
| "grad_norm": 0.4651089906692505, |
| "learning_rate": 4.4973688718689803e-05, |
| "loss": 0.0776, |
| "step": 11110 |
| }, |
| { |
| "epoch": 32.231884057971016, |
| "grad_norm": 0.39821431040763855, |
| "learning_rate": 4.4891440926259406e-05, |
| "loss": 0.0979, |
| "step": 11120 |
| }, |
| { |
| "epoch": 32.26086956521739, |
| "grad_norm": 0.3794202506542206, |
| "learning_rate": 4.480920710043443e-05, |
| "loss": 0.1005, |
| "step": 11130 |
| }, |
| { |
| "epoch": 32.289855072463766, |
| "grad_norm": 0.5193749070167542, |
| "learning_rate": 4.4726987466039044e-05, |
| "loss": 0.0971, |
| "step": 11140 |
| }, |
| { |
| "epoch": 32.31884057971015, |
| "grad_norm": 0.2910986840724945, |
| "learning_rate": 4.46447822478586e-05, |
| "loss": 0.079, |
| "step": 11150 |
| }, |
| { |
| "epoch": 32.34782608695652, |
| "grad_norm": 0.3999570310115814, |
| "learning_rate": 4.4562591670638974e-05, |
| "loss": 0.0967, |
| "step": 11160 |
| }, |
| { |
| "epoch": 32.3768115942029, |
| "grad_norm": 0.33184731006622314, |
| "learning_rate": 4.4480415959086105e-05, |
| "loss": 0.0931, |
| "step": 11170 |
| }, |
| { |
| "epoch": 32.405797101449274, |
| "grad_norm": 0.3531089723110199, |
| "learning_rate": 4.439825533786522e-05, |
| "loss": 0.0847, |
| "step": 11180 |
| }, |
| { |
| "epoch": 32.43478260869565, |
| "grad_norm": 0.45204806327819824, |
| "learning_rate": 4.431611003160035e-05, |
| "loss": 0.0856, |
| "step": 11190 |
| }, |
| { |
| "epoch": 32.46376811594203, |
| "grad_norm": 0.328259140253067, |
| "learning_rate": 4.4233980264873636e-05, |
| "loss": 0.0916, |
| "step": 11200 |
| }, |
| { |
| "epoch": 32.492753623188406, |
| "grad_norm": 0.30385860800743103, |
| "learning_rate": 4.4151866262224684e-05, |
| "loss": 0.0831, |
| "step": 11210 |
| }, |
| { |
| "epoch": 32.52173913043478, |
| "grad_norm": 0.34350085258483887, |
| "learning_rate": 4.406976824815006e-05, |
| "loss": 0.0829, |
| "step": 11220 |
| }, |
| { |
| "epoch": 32.55072463768116, |
| "grad_norm": 0.381274551153183, |
| "learning_rate": 4.3987686447102595e-05, |
| "loss": 0.0889, |
| "step": 11230 |
| }, |
| { |
| "epoch": 32.57971014492754, |
| "grad_norm": 0.4919489920139313, |
| "learning_rate": 4.3905621083490804e-05, |
| "loss": 0.0786, |
| "step": 11240 |
| }, |
| { |
| "epoch": 32.608695652173914, |
| "grad_norm": 0.4313332438468933, |
| "learning_rate": 4.3823572381678286e-05, |
| "loss": 0.0832, |
| "step": 11250 |
| }, |
| { |
| "epoch": 32.63768115942029, |
| "grad_norm": 0.3867364823818207, |
| "learning_rate": 4.374154056598301e-05, |
| "loss": 0.0911, |
| "step": 11260 |
| }, |
| { |
| "epoch": 32.666666666666664, |
| "grad_norm": 0.4290856719017029, |
| "learning_rate": 4.3659525860676845e-05, |
| "loss": 0.0818, |
| "step": 11270 |
| }, |
| { |
| "epoch": 32.69565217391305, |
| "grad_norm": 0.2989586591720581, |
| "learning_rate": 4.3577528489984854e-05, |
| "loss": 0.0816, |
| "step": 11280 |
| }, |
| { |
| "epoch": 32.72463768115942, |
| "grad_norm": 0.3265022039413452, |
| "learning_rate": 4.349554867808476e-05, |
| "loss": 0.077, |
| "step": 11290 |
| }, |
| { |
| "epoch": 32.7536231884058, |
| "grad_norm": 0.5287574529647827, |
| "learning_rate": 4.34135866491062e-05, |
| "loss": 0.0736, |
| "step": 11300 |
| }, |
| { |
| "epoch": 32.78260869565217, |
| "grad_norm": 0.4195975661277771, |
| "learning_rate": 4.333164262713022e-05, |
| "loss": 0.0734, |
| "step": 11310 |
| }, |
| { |
| "epoch": 32.81159420289855, |
| "grad_norm": 0.27101531624794006, |
| "learning_rate": 4.324971683618868e-05, |
| "loss": 0.0776, |
| "step": 11320 |
| }, |
| { |
| "epoch": 32.84057971014493, |
| "grad_norm": 0.28514423966407776, |
| "learning_rate": 4.316780950026354e-05, |
| "loss": 0.0958, |
| "step": 11330 |
| }, |
| { |
| "epoch": 32.869565217391305, |
| "grad_norm": 0.45822855830192566, |
| "learning_rate": 4.308592084328637e-05, |
| "loss": 0.0972, |
| "step": 11340 |
| }, |
| { |
| "epoch": 32.89855072463768, |
| "grad_norm": 0.4056869149208069, |
| "learning_rate": 4.3004051089137576e-05, |
| "loss": 0.0871, |
| "step": 11350 |
| }, |
| { |
| "epoch": 32.927536231884055, |
| "grad_norm": 0.4822801649570465, |
| "learning_rate": 4.292220046164597e-05, |
| "loss": 0.0781, |
| "step": 11360 |
| }, |
| { |
| "epoch": 32.95652173913044, |
| "grad_norm": 0.4903472661972046, |
| "learning_rate": 4.2840369184588035e-05, |
| "loss": 0.1022, |
| "step": 11370 |
| }, |
| { |
| "epoch": 32.98550724637681, |
| "grad_norm": 0.2708165645599365, |
| "learning_rate": 4.2758557481687345e-05, |
| "loss": 0.0724, |
| "step": 11380 |
| }, |
| { |
| "epoch": 33.01449275362319, |
| "grad_norm": 0.46164244413375854, |
| "learning_rate": 4.267676557661403e-05, |
| "loss": 0.063, |
| "step": 11390 |
| }, |
| { |
| "epoch": 33.04347826086956, |
| "grad_norm": 0.3026619553565979, |
| "learning_rate": 4.2594993692983955e-05, |
| "loss": 0.0824, |
| "step": 11400 |
| }, |
| { |
| "epoch": 33.072463768115945, |
| "grad_norm": 0.28057217597961426, |
| "learning_rate": 4.251324205435837e-05, |
| "loss": 0.089, |
| "step": 11410 |
| }, |
| { |
| "epoch": 33.10144927536232, |
| "grad_norm": 0.18814432621002197, |
| "learning_rate": 4.243151088424312e-05, |
| "loss": 0.0838, |
| "step": 11420 |
| }, |
| { |
| "epoch": 33.130434782608695, |
| "grad_norm": 0.3627355098724365, |
| "learning_rate": 4.234980040608813e-05, |
| "loss": 0.0754, |
| "step": 11430 |
| }, |
| { |
| "epoch": 33.15942028985507, |
| "grad_norm": 0.3194730877876282, |
| "learning_rate": 4.22681108432867e-05, |
| "loss": 0.0857, |
| "step": 11440 |
| }, |
| { |
| "epoch": 33.18840579710145, |
| "grad_norm": 0.387783020734787, |
| "learning_rate": 4.2186442419174984e-05, |
| "loss": 0.0851, |
| "step": 11450 |
| }, |
| { |
| "epoch": 33.21739130434783, |
| "grad_norm": 0.34020793437957764, |
| "learning_rate": 4.210479535703133e-05, |
| "loss": 0.0821, |
| "step": 11460 |
| }, |
| { |
| "epoch": 33.2463768115942, |
| "grad_norm": 0.48423564434051514, |
| "learning_rate": 4.202316988007567e-05, |
| "loss": 0.0985, |
| "step": 11470 |
| }, |
| { |
| "epoch": 33.27536231884058, |
| "grad_norm": 0.4145282506942749, |
| "learning_rate": 4.194156621146901e-05, |
| "loss": 0.0704, |
| "step": 11480 |
| }, |
| { |
| "epoch": 33.30434782608695, |
| "grad_norm": 0.602695643901825, |
| "learning_rate": 4.1859984574312596e-05, |
| "loss": 0.0846, |
| "step": 11490 |
| }, |
| { |
| "epoch": 33.333333333333336, |
| "grad_norm": 0.23501792550086975, |
| "learning_rate": 4.177842519164752e-05, |
| "loss": 0.0817, |
| "step": 11500 |
| }, |
| { |
| "epoch": 33.36231884057971, |
| "grad_norm": 0.43396809697151184, |
| "learning_rate": 4.169688828645404e-05, |
| "loss": 0.103, |
| "step": 11510 |
| }, |
| { |
| "epoch": 33.391304347826086, |
| "grad_norm": 0.4772212505340576, |
| "learning_rate": 4.161537408165092e-05, |
| "loss": 0.0721, |
| "step": 11520 |
| }, |
| { |
| "epoch": 33.42028985507246, |
| "grad_norm": 0.3769497573375702, |
| "learning_rate": 4.1533882800094924e-05, |
| "loss": 0.1031, |
| "step": 11530 |
| }, |
| { |
| "epoch": 33.44927536231884, |
| "grad_norm": 0.4361927807331085, |
| "learning_rate": 4.145241466458005e-05, |
| "loss": 0.0785, |
| "step": 11540 |
| }, |
| { |
| "epoch": 33.47826086956522, |
| "grad_norm": 0.47106435894966125, |
| "learning_rate": 4.13709698978371e-05, |
| "loss": 0.0805, |
| "step": 11550 |
| }, |
| { |
| "epoch": 33.507246376811594, |
| "grad_norm": 0.19365593791007996, |
| "learning_rate": 4.1289548722532944e-05, |
| "loss": 0.0749, |
| "step": 11560 |
| }, |
| { |
| "epoch": 33.53623188405797, |
| "grad_norm": 0.47546547651290894, |
| "learning_rate": 4.120815136126999e-05, |
| "loss": 0.0852, |
| "step": 11570 |
| }, |
| { |
| "epoch": 33.56521739130435, |
| "grad_norm": 0.41180577874183655, |
| "learning_rate": 4.112677803658548e-05, |
| "loss": 0.0806, |
| "step": 11580 |
| }, |
| { |
| "epoch": 33.594202898550726, |
| "grad_norm": 0.2787127196788788, |
| "learning_rate": 4.1045428970951e-05, |
| "loss": 0.0899, |
| "step": 11590 |
| }, |
| { |
| "epoch": 33.6231884057971, |
| "grad_norm": 0.3546220660209656, |
| "learning_rate": 4.0964104386771785e-05, |
| "loss": 0.0813, |
| "step": 11600 |
| }, |
| { |
| "epoch": 33.65217391304348, |
| "grad_norm": 0.4572994112968445, |
| "learning_rate": 4.0882804506386144e-05, |
| "loss": 0.09, |
| "step": 11610 |
| }, |
| { |
| "epoch": 33.68115942028985, |
| "grad_norm": 0.33741870522499084, |
| "learning_rate": 4.080152955206485e-05, |
| "loss": 0.074, |
| "step": 11620 |
| }, |
| { |
| "epoch": 33.710144927536234, |
| "grad_norm": 0.24237462878227234, |
| "learning_rate": 4.0720279746010505e-05, |
| "loss": 0.0767, |
| "step": 11630 |
| }, |
| { |
| "epoch": 33.73913043478261, |
| "grad_norm": 0.37967872619628906, |
| "learning_rate": 4.063905531035699e-05, |
| "loss": 0.0715, |
| "step": 11640 |
| }, |
| { |
| "epoch": 33.768115942028984, |
| "grad_norm": 0.25618433952331543, |
| "learning_rate": 4.055785646716882e-05, |
| "loss": 0.0743, |
| "step": 11650 |
| }, |
| { |
| "epoch": 33.79710144927536, |
| "grad_norm": 0.3028956949710846, |
| "learning_rate": 4.047668343844051e-05, |
| "loss": 0.0948, |
| "step": 11660 |
| }, |
| { |
| "epoch": 33.82608695652174, |
| "grad_norm": 0.28945979475975037, |
| "learning_rate": 4.039553644609604e-05, |
| "loss": 0.0783, |
| "step": 11670 |
| }, |
| { |
| "epoch": 33.85507246376812, |
| "grad_norm": 0.4274953603744507, |
| "learning_rate": 4.0314415711988176e-05, |
| "loss": 0.0846, |
| "step": 11680 |
| }, |
| { |
| "epoch": 33.88405797101449, |
| "grad_norm": 0.4359511137008667, |
| "learning_rate": 4.023332145789792e-05, |
| "loss": 0.0772, |
| "step": 11690 |
| }, |
| { |
| "epoch": 33.91304347826087, |
| "grad_norm": 0.2297302633523941, |
| "learning_rate": 4.015225390553385e-05, |
| "loss": 0.0663, |
| "step": 11700 |
| }, |
| { |
| "epoch": 33.94202898550725, |
| "grad_norm": 0.4641404449939728, |
| "learning_rate": 4.007121327653158e-05, |
| "loss": 0.0822, |
| "step": 11710 |
| }, |
| { |
| "epoch": 33.971014492753625, |
| "grad_norm": 0.3523867726325989, |
| "learning_rate": 3.9990199792453064e-05, |
| "loss": 0.0897, |
| "step": 11720 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 0.6376750469207764, |
| "learning_rate": 3.9909213674786103e-05, |
| "loss": 0.082, |
| "step": 11730 |
| }, |
| { |
| "epoch": 34.028985507246375, |
| "grad_norm": 0.4435945749282837, |
| "learning_rate": 3.982825514494363e-05, |
| "loss": 0.0849, |
| "step": 11740 |
| }, |
| { |
| "epoch": 34.05797101449275, |
| "grad_norm": 0.3722585439682007, |
| "learning_rate": 3.974732442426319e-05, |
| "loss": 0.0991, |
| "step": 11750 |
| }, |
| { |
| "epoch": 34.08695652173913, |
| "grad_norm": 0.4315265715122223, |
| "learning_rate": 3.966642173400629e-05, |
| "loss": 0.0878, |
| "step": 11760 |
| }, |
| { |
| "epoch": 34.11594202898551, |
| "grad_norm": 0.45117440819740295, |
| "learning_rate": 3.9585547295357764e-05, |
| "loss": 0.0948, |
| "step": 11770 |
| }, |
| { |
| "epoch": 34.14492753623188, |
| "grad_norm": 0.4286547005176544, |
| "learning_rate": 3.950470132942526e-05, |
| "loss": 0.082, |
| "step": 11780 |
| }, |
| { |
| "epoch": 34.17391304347826, |
| "grad_norm": 0.34111738204956055, |
| "learning_rate": 3.942388405723856e-05, |
| "loss": 0.073, |
| "step": 11790 |
| }, |
| { |
| "epoch": 34.20289855072464, |
| "grad_norm": 0.24257983267307281, |
| "learning_rate": 3.9343095699749e-05, |
| "loss": 0.0707, |
| "step": 11800 |
| }, |
| { |
| "epoch": 34.231884057971016, |
| "grad_norm": 0.40664252638816833, |
| "learning_rate": 3.9262336477828874e-05, |
| "loss": 0.1052, |
| "step": 11810 |
| }, |
| { |
| "epoch": 34.26086956521739, |
| "grad_norm": 0.2790059745311737, |
| "learning_rate": 3.9181606612270794e-05, |
| "loss": 0.0863, |
| "step": 11820 |
| }, |
| { |
| "epoch": 34.289855072463766, |
| "grad_norm": 0.3338426351547241, |
| "learning_rate": 3.910090632378713e-05, |
| "loss": 0.1013, |
| "step": 11830 |
| }, |
| { |
| "epoch": 34.31884057971015, |
| "grad_norm": 0.3759063184261322, |
| "learning_rate": 3.90202358330094e-05, |
| "loss": 0.089, |
| "step": 11840 |
| }, |
| { |
| "epoch": 34.34782608695652, |
| "grad_norm": 0.3479987680912018, |
| "learning_rate": 3.8939595360487656e-05, |
| "loss": 0.0699, |
| "step": 11850 |
| }, |
| { |
| "epoch": 34.3768115942029, |
| "grad_norm": 0.42943084239959717, |
| "learning_rate": 3.885898512668984e-05, |
| "loss": 0.1114, |
| "step": 11860 |
| }, |
| { |
| "epoch": 34.405797101449274, |
| "grad_norm": 0.2323223501443863, |
| "learning_rate": 3.877840535200127e-05, |
| "loss": 0.0878, |
| "step": 11870 |
| }, |
| { |
| "epoch": 34.43478260869565, |
| "grad_norm": 0.5184713006019592, |
| "learning_rate": 3.869785625672397e-05, |
| "loss": 0.0974, |
| "step": 11880 |
| }, |
| { |
| "epoch": 34.46376811594203, |
| "grad_norm": 0.267502099275589, |
| "learning_rate": 3.8617338061076094e-05, |
| "loss": 0.0563, |
| "step": 11890 |
| }, |
| { |
| "epoch": 34.492753623188406, |
| "grad_norm": 0.42632079124450684, |
| "learning_rate": 3.853685098519132e-05, |
| "loss": 0.0739, |
| "step": 11900 |
| }, |
| { |
| "epoch": 34.52173913043478, |
| "grad_norm": 0.30418580770492554, |
| "learning_rate": 3.845639524911823e-05, |
| "loss": 0.0976, |
| "step": 11910 |
| }, |
| { |
| "epoch": 34.55072463768116, |
| "grad_norm": 0.38783854246139526, |
| "learning_rate": 3.837597107281974e-05, |
| "loss": 0.0738, |
| "step": 11920 |
| }, |
| { |
| "epoch": 34.57971014492754, |
| "grad_norm": 0.19843190908432007, |
| "learning_rate": 3.829557867617247e-05, |
| "loss": 0.0796, |
| "step": 11930 |
| }, |
| { |
| "epoch": 34.608695652173914, |
| "grad_norm": 0.3146209120750427, |
| "learning_rate": 3.821521827896618e-05, |
| "loss": 0.0826, |
| "step": 11940 |
| }, |
| { |
| "epoch": 34.63768115942029, |
| "grad_norm": 0.42972853779792786, |
| "learning_rate": 3.81348901009031e-05, |
| "loss": 0.0984, |
| "step": 11950 |
| }, |
| { |
| "epoch": 34.666666666666664, |
| "grad_norm": 0.28957119584083557, |
| "learning_rate": 3.805459436159741e-05, |
| "loss": 0.0714, |
| "step": 11960 |
| }, |
| { |
| "epoch": 34.69565217391305, |
| "grad_norm": 0.3170105814933777, |
| "learning_rate": 3.797433128057461e-05, |
| "loss": 0.0817, |
| "step": 11970 |
| }, |
| { |
| "epoch": 34.72463768115942, |
| "grad_norm": 0.443141907453537, |
| "learning_rate": 3.789410107727089e-05, |
| "loss": 0.0931, |
| "step": 11980 |
| }, |
| { |
| "epoch": 34.7536231884058, |
| "grad_norm": 0.4638511538505554, |
| "learning_rate": 3.781390397103257e-05, |
| "loss": 0.0917, |
| "step": 11990 |
| }, |
| { |
| "epoch": 34.78260869565217, |
| "grad_norm": 0.5074764490127563, |
| "learning_rate": 3.7733740181115455e-05, |
| "loss": 0.0919, |
| "step": 12000 |
| }, |
| { |
| "epoch": 34.81159420289855, |
| "grad_norm": 0.32013916969299316, |
| "learning_rate": 3.7653609926684306e-05, |
| "loss": 0.0784, |
| "step": 12010 |
| }, |
| { |
| "epoch": 34.84057971014493, |
| "grad_norm": 0.29025906324386597, |
| "learning_rate": 3.757351342681217e-05, |
| "loss": 0.0751, |
| "step": 12020 |
| }, |
| { |
| "epoch": 34.869565217391305, |
| "grad_norm": 0.33754485845565796, |
| "learning_rate": 3.749345090047982e-05, |
| "loss": 0.082, |
| "step": 12030 |
| }, |
| { |
| "epoch": 34.89855072463768, |
| "grad_norm": 0.2577219307422638, |
| "learning_rate": 3.741342256657515e-05, |
| "loss": 0.083, |
| "step": 12040 |
| }, |
| { |
| "epoch": 34.927536231884055, |
| "grad_norm": 0.4835989773273468, |
| "learning_rate": 3.7333428643892567e-05, |
| "loss": 0.096, |
| "step": 12050 |
| }, |
| { |
| "epoch": 34.95652173913044, |
| "grad_norm": 0.5097367763519287, |
| "learning_rate": 3.725346935113239e-05, |
| "loss": 0.0939, |
| "step": 12060 |
| }, |
| { |
| "epoch": 34.98550724637681, |
| "grad_norm": 0.47239720821380615, |
| "learning_rate": 3.717354490690029e-05, |
| "loss": 0.0732, |
| "step": 12070 |
| }, |
| { |
| "epoch": 35.01449275362319, |
| "grad_norm": 0.36919161677360535, |
| "learning_rate": 3.709365552970664e-05, |
| "loss": 0.0824, |
| "step": 12080 |
| }, |
| { |
| "epoch": 35.04347826086956, |
| "grad_norm": 0.3409859836101532, |
| "learning_rate": 3.7013801437965945e-05, |
| "loss": 0.0803, |
| "step": 12090 |
| }, |
| { |
| "epoch": 35.072463768115945, |
| "grad_norm": 0.3615312874317169, |
| "learning_rate": 3.693398284999623e-05, |
| "loss": 0.0951, |
| "step": 12100 |
| }, |
| { |
| "epoch": 35.10144927536232, |
| "grad_norm": 0.5234674215316772, |
| "learning_rate": 3.6854199984018484e-05, |
| "loss": 0.0834, |
| "step": 12110 |
| }, |
| { |
| "epoch": 35.130434782608695, |
| "grad_norm": 0.2838694155216217, |
| "learning_rate": 3.677445305815601e-05, |
| "loss": 0.091, |
| "step": 12120 |
| }, |
| { |
| "epoch": 35.15942028985507, |
| "grad_norm": 0.5254635810852051, |
| "learning_rate": 3.669474229043387e-05, |
| "loss": 0.0929, |
| "step": 12130 |
| }, |
| { |
| "epoch": 35.18840579710145, |
| "grad_norm": 0.32632967829704285, |
| "learning_rate": 3.6615067898778235e-05, |
| "loss": 0.0873, |
| "step": 12140 |
| }, |
| { |
| "epoch": 35.21739130434783, |
| "grad_norm": 0.3260731101036072, |
| "learning_rate": 3.6535430101015866e-05, |
| "loss": 0.054, |
| "step": 12150 |
| }, |
| { |
| "epoch": 35.2463768115942, |
| "grad_norm": 0.3727055788040161, |
| "learning_rate": 3.645582911487345e-05, |
| "loss": 0.0738, |
| "step": 12160 |
| }, |
| { |
| "epoch": 35.27536231884058, |
| "grad_norm": 0.27279332280158997, |
| "learning_rate": 3.637626515797706e-05, |
| "loss": 0.0718, |
| "step": 12170 |
| }, |
| { |
| "epoch": 35.30434782608695, |
| "grad_norm": 0.4319758415222168, |
| "learning_rate": 3.629673844785152e-05, |
| "loss": 0.0754, |
| "step": 12180 |
| }, |
| { |
| "epoch": 35.333333333333336, |
| "grad_norm": 0.49372681975364685, |
| "learning_rate": 3.621724920191979e-05, |
| "loss": 0.0778, |
| "step": 12190 |
| }, |
| { |
| "epoch": 35.36231884057971, |
| "grad_norm": 0.27620404958724976, |
| "learning_rate": 3.6137797637502444e-05, |
| "loss": 0.0776, |
| "step": 12200 |
| }, |
| { |
| "epoch": 35.391304347826086, |
| "grad_norm": 0.4745093286037445, |
| "learning_rate": 3.6058383971817035e-05, |
| "loss": 0.091, |
| "step": 12210 |
| }, |
| { |
| "epoch": 35.42028985507246, |
| "grad_norm": 0.49664023518562317, |
| "learning_rate": 3.59790084219775e-05, |
| "loss": 0.0892, |
| "step": 12220 |
| }, |
| { |
| "epoch": 35.44927536231884, |
| "grad_norm": 0.30979496240615845, |
| "learning_rate": 3.589967120499353e-05, |
| "loss": 0.074, |
| "step": 12230 |
| }, |
| { |
| "epoch": 35.47826086956522, |
| "grad_norm": 0.460953950881958, |
| "learning_rate": 3.5820372537770075e-05, |
| "loss": 0.08, |
| "step": 12240 |
| }, |
| { |
| "epoch": 35.507246376811594, |
| "grad_norm": 0.31548449397087097, |
| "learning_rate": 3.5741112637106655e-05, |
| "loss": 0.0892, |
| "step": 12250 |
| }, |
| { |
| "epoch": 35.53623188405797, |
| "grad_norm": 0.38868752121925354, |
| "learning_rate": 3.5661891719696804e-05, |
| "loss": 0.0803, |
| "step": 12260 |
| }, |
| { |
| "epoch": 35.56521739130435, |
| "grad_norm": 0.39552441239356995, |
| "learning_rate": 3.5582710002127504e-05, |
| "loss": 0.0709, |
| "step": 12270 |
| }, |
| { |
| "epoch": 35.594202898550726, |
| "grad_norm": 0.3134962022304535, |
| "learning_rate": 3.550356770087853e-05, |
| "loss": 0.0835, |
| "step": 12280 |
| }, |
| { |
| "epoch": 35.6231884057971, |
| "grad_norm": 0.42194268107414246, |
| "learning_rate": 3.5424465032321914e-05, |
| "loss": 0.076, |
| "step": 12290 |
| }, |
| { |
| "epoch": 35.65217391304348, |
| "grad_norm": 0.44927000999450684, |
| "learning_rate": 3.5345402212721335e-05, |
| "loss": 0.1047, |
| "step": 12300 |
| }, |
| { |
| "epoch": 35.68115942028985, |
| "grad_norm": 0.4046900272369385, |
| "learning_rate": 3.526637945823152e-05, |
| "loss": 0.0871, |
| "step": 12310 |
| }, |
| { |
| "epoch": 35.710144927536234, |
| "grad_norm": 0.34118810296058655, |
| "learning_rate": 3.518739698489767e-05, |
| "loss": 0.076, |
| "step": 12320 |
| }, |
| { |
| "epoch": 35.73913043478261, |
| "grad_norm": 0.1889665573835373, |
| "learning_rate": 3.510845500865485e-05, |
| "loss": 0.078, |
| "step": 12330 |
| }, |
| { |
| "epoch": 35.768115942028984, |
| "grad_norm": 0.25734132528305054, |
| "learning_rate": 3.502955374532739e-05, |
| "loss": 0.0808, |
| "step": 12340 |
| }, |
| { |
| "epoch": 35.79710144927536, |
| "grad_norm": 0.4329688549041748, |
| "learning_rate": 3.495069341062836e-05, |
| "loss": 0.0949, |
| "step": 12350 |
| }, |
| { |
| "epoch": 35.82608695652174, |
| "grad_norm": 0.4507119655609131, |
| "learning_rate": 3.4871874220158896e-05, |
| "loss": 0.0868, |
| "step": 12360 |
| }, |
| { |
| "epoch": 35.85507246376812, |
| "grad_norm": 0.42284590005874634, |
| "learning_rate": 3.479309638940762e-05, |
| "loss": 0.0928, |
| "step": 12370 |
| }, |
| { |
| "epoch": 35.88405797101449, |
| "grad_norm": 0.31752341985702515, |
| "learning_rate": 3.4714360133750146e-05, |
| "loss": 0.0824, |
| "step": 12380 |
| }, |
| { |
| "epoch": 35.91304347826087, |
| "grad_norm": 0.31320276856422424, |
| "learning_rate": 3.463566566844839e-05, |
| "loss": 0.0768, |
| "step": 12390 |
| }, |
| { |
| "epoch": 35.94202898550725, |
| "grad_norm": 0.46019718050956726, |
| "learning_rate": 3.4557013208650016e-05, |
| "loss": 0.0783, |
| "step": 12400 |
| }, |
| { |
| "epoch": 35.971014492753625, |
| "grad_norm": 0.3470844626426697, |
| "learning_rate": 3.4478402969387857e-05, |
| "loss": 0.0874, |
| "step": 12410 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 1.3857176303863525, |
| "learning_rate": 3.4399835165579266e-05, |
| "loss": 0.0754, |
| "step": 12420 |
| }, |
| { |
| "epoch": 36.028985507246375, |
| "grad_norm": 0.3289060592651367, |
| "learning_rate": 3.4321310012025645e-05, |
| "loss": 0.0942, |
| "step": 12430 |
| }, |
| { |
| "epoch": 36.05797101449275, |
| "grad_norm": 0.2967238426208496, |
| "learning_rate": 3.424282772341176e-05, |
| "loss": 0.076, |
| "step": 12440 |
| }, |
| { |
| "epoch": 36.08695652173913, |
| "grad_norm": 0.3292827308177948, |
| "learning_rate": 3.416438851430519e-05, |
| "loss": 0.0995, |
| "step": 12450 |
| }, |
| { |
| "epoch": 36.11594202898551, |
| "grad_norm": 0.3444810211658478, |
| "learning_rate": 3.408599259915577e-05, |
| "loss": 0.0739, |
| "step": 12460 |
| }, |
| { |
| "epoch": 36.14492753623188, |
| "grad_norm": 0.40988513827323914, |
| "learning_rate": 3.400764019229487e-05, |
| "loss": 0.0793, |
| "step": 12470 |
| }, |
| { |
| "epoch": 36.17391304347826, |
| "grad_norm": 0.37536290287971497, |
| "learning_rate": 3.3929331507935035e-05, |
| "loss": 0.0983, |
| "step": 12480 |
| }, |
| { |
| "epoch": 36.20289855072464, |
| "grad_norm": 0.45117539167404175, |
| "learning_rate": 3.3851066760169196e-05, |
| "loss": 0.0981, |
| "step": 12490 |
| }, |
| { |
| "epoch": 36.231884057971016, |
| "grad_norm": 0.4401688575744629, |
| "learning_rate": 3.377284616297021e-05, |
| "loss": 0.0702, |
| "step": 12500 |
| }, |
| { |
| "epoch": 36.26086956521739, |
| "grad_norm": 0.24332067370414734, |
| "learning_rate": 3.3694669930190166e-05, |
| "loss": 0.0741, |
| "step": 12510 |
| }, |
| { |
| "epoch": 36.289855072463766, |
| "grad_norm": 0.38454926013946533, |
| "learning_rate": 3.36165382755599e-05, |
| "loss": 0.0926, |
| "step": 12520 |
| }, |
| { |
| "epoch": 36.31884057971015, |
| "grad_norm": 0.35665246844291687, |
| "learning_rate": 3.35384514126884e-05, |
| "loss": 0.0686, |
| "step": 12530 |
| }, |
| { |
| "epoch": 36.34782608695652, |
| "grad_norm": 0.4824955463409424, |
| "learning_rate": 3.3460409555062154e-05, |
| "loss": 0.084, |
| "step": 12540 |
| }, |
| { |
| "epoch": 36.3768115942029, |
| "grad_norm": 0.4470244348049164, |
| "learning_rate": 3.3382412916044645e-05, |
| "loss": 0.1034, |
| "step": 12550 |
| }, |
| { |
| "epoch": 36.405797101449274, |
| "grad_norm": 0.3308650553226471, |
| "learning_rate": 3.330446170887566e-05, |
| "loss": 0.0708, |
| "step": 12560 |
| }, |
| { |
| "epoch": 36.43478260869565, |
| "grad_norm": 0.2681847810745239, |
| "learning_rate": 3.3226556146670834e-05, |
| "loss": 0.0748, |
| "step": 12570 |
| }, |
| { |
| "epoch": 36.46376811594203, |
| "grad_norm": 0.4676291048526764, |
| "learning_rate": 3.314869644242102e-05, |
| "loss": 0.0849, |
| "step": 12580 |
| }, |
| { |
| "epoch": 36.492753623188406, |
| "grad_norm": 0.468152791261673, |
| "learning_rate": 3.3070882808991674e-05, |
| "loss": 0.0726, |
| "step": 12590 |
| }, |
| { |
| "epoch": 36.52173913043478, |
| "grad_norm": 0.423662930727005, |
| "learning_rate": 3.2993115459122305e-05, |
| "loss": 0.0832, |
| "step": 12600 |
| }, |
| { |
| "epoch": 36.55072463768116, |
| "grad_norm": 0.4952705502510071, |
| "learning_rate": 3.2915394605425835e-05, |
| "loss": 0.086, |
| "step": 12610 |
| }, |
| { |
| "epoch": 36.57971014492754, |
| "grad_norm": 0.3361116945743561, |
| "learning_rate": 3.283772046038816e-05, |
| "loss": 0.0686, |
| "step": 12620 |
| }, |
| { |
| "epoch": 36.608695652173914, |
| "grad_norm": 0.35378262400627136, |
| "learning_rate": 3.276009323636739e-05, |
| "loss": 0.0956, |
| "step": 12630 |
| }, |
| { |
| "epoch": 36.63768115942029, |
| "grad_norm": 0.26826876401901245, |
| "learning_rate": 3.268251314559344e-05, |
| "loss": 0.0725, |
| "step": 12640 |
| }, |
| { |
| "epoch": 36.666666666666664, |
| "grad_norm": 0.4471190571784973, |
| "learning_rate": 3.2604980400167254e-05, |
| "loss": 0.0886, |
| "step": 12650 |
| }, |
| { |
| "epoch": 36.69565217391305, |
| "grad_norm": 0.26007452607154846, |
| "learning_rate": 3.252749521206042e-05, |
| "loss": 0.0736, |
| "step": 12660 |
| }, |
| { |
| "epoch": 36.72463768115942, |
| "grad_norm": 0.3644675016403198, |
| "learning_rate": 3.2450057793114494e-05, |
| "loss": 0.0859, |
| "step": 12670 |
| }, |
| { |
| "epoch": 36.7536231884058, |
| "grad_norm": 0.3555355966091156, |
| "learning_rate": 3.2372668355040435e-05, |
| "loss": 0.0952, |
| "step": 12680 |
| }, |
| { |
| "epoch": 36.78260869565217, |
| "grad_norm": 0.3508759140968323, |
| "learning_rate": 3.2295327109418005e-05, |
| "loss": 0.0761, |
| "step": 12690 |
| }, |
| { |
| "epoch": 36.81159420289855, |
| "grad_norm": 0.3372611999511719, |
| "learning_rate": 3.221803426769518e-05, |
| "loss": 0.1055, |
| "step": 12700 |
| }, |
| { |
| "epoch": 36.84057971014493, |
| "grad_norm": 0.45002785325050354, |
| "learning_rate": 3.214079004118768e-05, |
| "loss": 0.0677, |
| "step": 12710 |
| }, |
| { |
| "epoch": 36.869565217391305, |
| "grad_norm": 0.5220909118652344, |
| "learning_rate": 3.2063594641078234e-05, |
| "loss": 0.0679, |
| "step": 12720 |
| }, |
| { |
| "epoch": 36.89855072463768, |
| "grad_norm": 0.33023321628570557, |
| "learning_rate": 3.198644827841616e-05, |
| "loss": 0.0854, |
| "step": 12730 |
| }, |
| { |
| "epoch": 36.927536231884055, |
| "grad_norm": 0.37969428300857544, |
| "learning_rate": 3.1909351164116654e-05, |
| "loss": 0.0975, |
| "step": 12740 |
| }, |
| { |
| "epoch": 36.95652173913044, |
| "grad_norm": 0.39646878838539124, |
| "learning_rate": 3.183230350896026e-05, |
| "loss": 0.0651, |
| "step": 12750 |
| }, |
| { |
| "epoch": 36.98550724637681, |
| "grad_norm": 0.42903590202331543, |
| "learning_rate": 3.1755305523592337e-05, |
| "loss": 0.0964, |
| "step": 12760 |
| }, |
| { |
| "epoch": 37.01449275362319, |
| "grad_norm": 0.3350338339805603, |
| "learning_rate": 3.167835741852245e-05, |
| "loss": 0.0747, |
| "step": 12770 |
| }, |
| { |
| "epoch": 37.04347826086956, |
| "grad_norm": 0.5324596762657166, |
| "learning_rate": 3.160145940412378e-05, |
| "loss": 0.0865, |
| "step": 12780 |
| }, |
| { |
| "epoch": 37.072463768115945, |
| "grad_norm": 0.5436109900474548, |
| "learning_rate": 3.1524611690632545e-05, |
| "loss": 0.0853, |
| "step": 12790 |
| }, |
| { |
| "epoch": 37.10144927536232, |
| "grad_norm": 0.4058521091938019, |
| "learning_rate": 3.144781448814746e-05, |
| "loss": 0.0611, |
| "step": 12800 |
| }, |
| { |
| "epoch": 37.130434782608695, |
| "grad_norm": 0.222909078001976, |
| "learning_rate": 3.1371068006629145e-05, |
| "loss": 0.0849, |
| "step": 12810 |
| }, |
| { |
| "epoch": 37.15942028985507, |
| "grad_norm": 0.3150401711463928, |
| "learning_rate": 3.129437245589956e-05, |
| "loss": 0.0661, |
| "step": 12820 |
| }, |
| { |
| "epoch": 37.18840579710145, |
| "grad_norm": 0.5720604062080383, |
| "learning_rate": 3.121772804564143e-05, |
| "loss": 0.1058, |
| "step": 12830 |
| }, |
| { |
| "epoch": 37.21739130434783, |
| "grad_norm": 0.36148929595947266, |
| "learning_rate": 3.11411349853976e-05, |
| "loss": 0.0647, |
| "step": 12840 |
| }, |
| { |
| "epoch": 37.2463768115942, |
| "grad_norm": 0.4873165190219879, |
| "learning_rate": 3.10645934845706e-05, |
| "loss": 0.0919, |
| "step": 12850 |
| }, |
| { |
| "epoch": 37.27536231884058, |
| "grad_norm": 0.6560083627700806, |
| "learning_rate": 3.098810375242196e-05, |
| "loss": 0.0857, |
| "step": 12860 |
| }, |
| { |
| "epoch": 37.30434782608695, |
| "grad_norm": 0.37037011981010437, |
| "learning_rate": 3.0911665998071704e-05, |
| "loss": 0.084, |
| "step": 12870 |
| }, |
| { |
| "epoch": 37.333333333333336, |
| "grad_norm": 0.2736794650554657, |
| "learning_rate": 3.083528043049774e-05, |
| "loss": 0.0629, |
| "step": 12880 |
| }, |
| { |
| "epoch": 37.36231884057971, |
| "grad_norm": 0.39787065982818604, |
| "learning_rate": 3.0758947258535255e-05, |
| "loss": 0.0937, |
| "step": 12890 |
| }, |
| { |
| "epoch": 37.391304347826086, |
| "grad_norm": 0.2980014979839325, |
| "learning_rate": 3.068266669087625e-05, |
| "loss": 0.0747, |
| "step": 12900 |
| }, |
| { |
| "epoch": 37.42028985507246, |
| "grad_norm": 0.38902172446250916, |
| "learning_rate": 3.060643893606887e-05, |
| "loss": 0.0922, |
| "step": 12910 |
| }, |
| { |
| "epoch": 37.44927536231884, |
| "grad_norm": 0.412036269903183, |
| "learning_rate": 3.053026420251693e-05, |
| "loss": 0.0877, |
| "step": 12920 |
| }, |
| { |
| "epoch": 37.47826086956522, |
| "grad_norm": 0.36954089999198914, |
| "learning_rate": 3.0454142698479183e-05, |
| "loss": 0.1029, |
| "step": 12930 |
| }, |
| { |
| "epoch": 37.507246376811594, |
| "grad_norm": 0.521973192691803, |
| "learning_rate": 3.0378074632068954e-05, |
| "loss": 0.0682, |
| "step": 12940 |
| }, |
| { |
| "epoch": 37.53623188405797, |
| "grad_norm": 0.2521456182003021, |
| "learning_rate": 3.0302060211253408e-05, |
| "loss": 0.07, |
| "step": 12950 |
| }, |
| { |
| "epoch": 37.56521739130435, |
| "grad_norm": 0.4917527139186859, |
| "learning_rate": 3.0226099643853073e-05, |
| "loss": 0.0878, |
| "step": 12960 |
| }, |
| { |
| "epoch": 37.594202898550726, |
| "grad_norm": 0.339530348777771, |
| "learning_rate": 3.0150193137541283e-05, |
| "loss": 0.069, |
| "step": 12970 |
| }, |
| { |
| "epoch": 37.6231884057971, |
| "grad_norm": 0.3518831133842468, |
| "learning_rate": 3.0074340899843467e-05, |
| "loss": 0.0816, |
| "step": 12980 |
| }, |
| { |
| "epoch": 37.65217391304348, |
| "grad_norm": 0.4143315553665161, |
| "learning_rate": 2.999854313813677e-05, |
| "loss": 0.0988, |
| "step": 12990 |
| }, |
| { |
| "epoch": 37.68115942028985, |
| "grad_norm": 0.31359317898750305, |
| "learning_rate": 2.9922800059649382e-05, |
| "loss": 0.0671, |
| "step": 13000 |
| }, |
| { |
| "epoch": 37.710144927536234, |
| "grad_norm": 0.49539920687675476, |
| "learning_rate": 2.9847111871459976e-05, |
| "loss": 0.0752, |
| "step": 13010 |
| }, |
| { |
| "epoch": 37.73913043478261, |
| "grad_norm": 0.42408648133277893, |
| "learning_rate": 2.977147878049721e-05, |
| "loss": 0.0762, |
| "step": 13020 |
| }, |
| { |
| "epoch": 37.768115942028984, |
| "grad_norm": 0.5186890959739685, |
| "learning_rate": 2.9695900993539006e-05, |
| "loss": 0.0895, |
| "step": 13030 |
| }, |
| { |
| "epoch": 37.79710144927536, |
| "grad_norm": 0.46351712942123413, |
| "learning_rate": 2.9620378717212183e-05, |
| "loss": 0.1007, |
| "step": 13040 |
| }, |
| { |
| "epoch": 37.82608695652174, |
| "grad_norm": 0.6148757934570312, |
| "learning_rate": 2.9544912157991745e-05, |
| "loss": 0.0661, |
| "step": 13050 |
| }, |
| { |
| "epoch": 37.85507246376812, |
| "grad_norm": 0.43662676215171814, |
| "learning_rate": 2.9469501522200405e-05, |
| "loss": 0.0761, |
| "step": 13060 |
| }, |
| { |
| "epoch": 37.88405797101449, |
| "grad_norm": 0.4326452910900116, |
| "learning_rate": 2.9394147016007946e-05, |
| "loss": 0.0965, |
| "step": 13070 |
| }, |
| { |
| "epoch": 37.91304347826087, |
| "grad_norm": 0.5132485032081604, |
| "learning_rate": 2.9318848845430702e-05, |
| "loss": 0.0817, |
| "step": 13080 |
| }, |
| { |
| "epoch": 37.94202898550725, |
| "grad_norm": 0.4048340618610382, |
| "learning_rate": 2.9243607216331013e-05, |
| "loss": 0.0867, |
| "step": 13090 |
| }, |
| { |
| "epoch": 37.971014492753625, |
| "grad_norm": 0.5179027915000916, |
| "learning_rate": 2.916842233441661e-05, |
| "loss": 0.0914, |
| "step": 13100 |
| }, |
| { |
| "epoch": 38.0, |
| "grad_norm": 0.6405589580535889, |
| "learning_rate": 2.90932944052401e-05, |
| "loss": 0.0758, |
| "step": 13110 |
| }, |
| { |
| "epoch": 38.028985507246375, |
| "grad_norm": 0.3282417356967926, |
| "learning_rate": 2.9018223634198354e-05, |
| "loss": 0.0814, |
| "step": 13120 |
| }, |
| { |
| "epoch": 38.05797101449275, |
| "grad_norm": 0.25214284658432007, |
| "learning_rate": 2.8943210226532025e-05, |
| "loss": 0.0662, |
| "step": 13130 |
| }, |
| { |
| "epoch": 38.08695652173913, |
| "grad_norm": 0.6154152750968933, |
| "learning_rate": 2.8868254387324857e-05, |
| "loss": 0.0793, |
| "step": 13140 |
| }, |
| { |
| "epoch": 38.11594202898551, |
| "grad_norm": 0.4001002907752991, |
| "learning_rate": 2.8793356321503306e-05, |
| "loss": 0.0851, |
| "step": 13150 |
| }, |
| { |
| "epoch": 38.14492753623188, |
| "grad_norm": 0.2872644066810608, |
| "learning_rate": 2.87185162338358e-05, |
| "loss": 0.0664, |
| "step": 13160 |
| }, |
| { |
| "epoch": 38.17391304347826, |
| "grad_norm": 0.385065495967865, |
| "learning_rate": 2.8643734328932253e-05, |
| "loss": 0.077, |
| "step": 13170 |
| }, |
| { |
| "epoch": 38.20289855072464, |
| "grad_norm": 0.32745644450187683, |
| "learning_rate": 2.856901081124359e-05, |
| "loss": 0.0762, |
| "step": 13180 |
| }, |
| { |
| "epoch": 38.231884057971016, |
| "grad_norm": 0.3578251004219055, |
| "learning_rate": 2.8494345885061002e-05, |
| "loss": 0.0873, |
| "step": 13190 |
| }, |
| { |
| "epoch": 38.26086956521739, |
| "grad_norm": 0.4024776816368103, |
| "learning_rate": 2.8419739754515616e-05, |
| "loss": 0.0674, |
| "step": 13200 |
| }, |
| { |
| "epoch": 38.289855072463766, |
| "grad_norm": 0.23126451671123505, |
| "learning_rate": 2.8345192623577666e-05, |
| "loss": 0.096, |
| "step": 13210 |
| }, |
| { |
| "epoch": 38.31884057971015, |
| "grad_norm": 0.44609886407852173, |
| "learning_rate": 2.8270704696056193e-05, |
| "loss": 0.0924, |
| "step": 13220 |
| }, |
| { |
| "epoch": 38.34782608695652, |
| "grad_norm": 0.28004297614097595, |
| "learning_rate": 2.8196276175598367e-05, |
| "loss": 0.0824, |
| "step": 13230 |
| }, |
| { |
| "epoch": 38.3768115942029, |
| "grad_norm": 0.4256015419960022, |
| "learning_rate": 2.8121907265688884e-05, |
| "loss": 0.0793, |
| "step": 13240 |
| }, |
| { |
| "epoch": 38.405797101449274, |
| "grad_norm": 0.28294479846954346, |
| "learning_rate": 2.804759816964957e-05, |
| "loss": 0.0757, |
| "step": 13250 |
| }, |
| { |
| "epoch": 38.43478260869565, |
| "grad_norm": 0.36253151297569275, |
| "learning_rate": 2.797334909063857e-05, |
| "loss": 0.0638, |
| "step": 13260 |
| }, |
| { |
| "epoch": 38.46376811594203, |
| "grad_norm": 0.3807222247123718, |
| "learning_rate": 2.7899160231650056e-05, |
| "loss": 0.0824, |
| "step": 13270 |
| }, |
| { |
| "epoch": 38.492753623188406, |
| "grad_norm": 0.2997818887233734, |
| "learning_rate": 2.7825031795513585e-05, |
| "loss": 0.084, |
| "step": 13280 |
| }, |
| { |
| "epoch": 38.52173913043478, |
| "grad_norm": 0.24102069437503815, |
| "learning_rate": 2.775096398489341e-05, |
| "loss": 0.0893, |
| "step": 13290 |
| }, |
| { |
| "epoch": 38.55072463768116, |
| "grad_norm": 0.258094847202301, |
| "learning_rate": 2.7676957002288163e-05, |
| "loss": 0.0814, |
| "step": 13300 |
| }, |
| { |
| "epoch": 38.57971014492754, |
| "grad_norm": 0.4139418303966522, |
| "learning_rate": 2.760301105003003e-05, |
| "loss": 0.0803, |
| "step": 13310 |
| }, |
| { |
| "epoch": 38.608695652173914, |
| "grad_norm": 0.31138837337493896, |
| "learning_rate": 2.752912633028446e-05, |
| "loss": 0.0783, |
| "step": 13320 |
| }, |
| { |
| "epoch": 38.63768115942029, |
| "grad_norm": 0.4925903379917145, |
| "learning_rate": 2.7455303045049474e-05, |
| "loss": 0.0839, |
| "step": 13330 |
| }, |
| { |
| "epoch": 38.666666666666664, |
| "grad_norm": 0.3583664894104004, |
| "learning_rate": 2.7381541396155098e-05, |
| "loss": 0.071, |
| "step": 13340 |
| }, |
| { |
| "epoch": 38.69565217391305, |
| "grad_norm": 0.28774356842041016, |
| "learning_rate": 2.730784158526286e-05, |
| "loss": 0.0875, |
| "step": 13350 |
| }, |
| { |
| "epoch": 38.72463768115942, |
| "grad_norm": 0.43696558475494385, |
| "learning_rate": 2.723420381386521e-05, |
| "loss": 0.0782, |
| "step": 13360 |
| }, |
| { |
| "epoch": 38.7536231884058, |
| "grad_norm": 0.3710800111293793, |
| "learning_rate": 2.7160628283285018e-05, |
| "loss": 0.0719, |
| "step": 13370 |
| }, |
| { |
| "epoch": 38.78260869565217, |
| "grad_norm": 0.3696930408477783, |
| "learning_rate": 2.7087115194675007e-05, |
| "loss": 0.0656, |
| "step": 13380 |
| }, |
| { |
| "epoch": 38.81159420289855, |
| "grad_norm": 0.3197194039821625, |
| "learning_rate": 2.701366474901712e-05, |
| "loss": 0.0755, |
| "step": 13390 |
| }, |
| { |
| "epoch": 38.84057971014493, |
| "grad_norm": 0.3476333022117615, |
| "learning_rate": 2.6940277147122085e-05, |
| "loss": 0.0834, |
| "step": 13400 |
| }, |
| { |
| "epoch": 38.869565217391305, |
| "grad_norm": 0.3637937307357788, |
| "learning_rate": 2.686695258962878e-05, |
| "loss": 0.0745, |
| "step": 13410 |
| }, |
| { |
| "epoch": 38.89855072463768, |
| "grad_norm": 0.5231657028198242, |
| "learning_rate": 2.679369127700375e-05, |
| "loss": 0.0807, |
| "step": 13420 |
| }, |
| { |
| "epoch": 38.927536231884055, |
| "grad_norm": 0.35336682200431824, |
| "learning_rate": 2.672049340954067e-05, |
| "loss": 0.072, |
| "step": 13430 |
| }, |
| { |
| "epoch": 38.95652173913044, |
| "grad_norm": 0.5302248597145081, |
| "learning_rate": 2.6647359187359676e-05, |
| "loss": 0.0931, |
| "step": 13440 |
| }, |
| { |
| "epoch": 38.98550724637681, |
| "grad_norm": 0.4057472348213196, |
| "learning_rate": 2.6574288810406946e-05, |
| "loss": 0.0808, |
| "step": 13450 |
| }, |
| { |
| "epoch": 39.01449275362319, |
| "grad_norm": 0.40481290221214294, |
| "learning_rate": 2.6501282478454083e-05, |
| "loss": 0.0742, |
| "step": 13460 |
| }, |
| { |
| "epoch": 39.04347826086956, |
| "grad_norm": 0.5995214581489563, |
| "learning_rate": 2.6428340391097618e-05, |
| "loss": 0.0842, |
| "step": 13470 |
| }, |
| { |
| "epoch": 39.072463768115945, |
| "grad_norm": 0.46385887265205383, |
| "learning_rate": 2.6355462747758485e-05, |
| "loss": 0.0764, |
| "step": 13480 |
| }, |
| { |
| "epoch": 39.10144927536232, |
| "grad_norm": 0.21818841993808746, |
| "learning_rate": 2.6282649747681304e-05, |
| "loss": 0.0689, |
| "step": 13490 |
| }, |
| { |
| "epoch": 39.130434782608695, |
| "grad_norm": 0.24269723892211914, |
| "learning_rate": 2.620990158993406e-05, |
| "loss": 0.0674, |
| "step": 13500 |
| }, |
| { |
| "epoch": 39.15942028985507, |
| "grad_norm": 0.18235942721366882, |
| "learning_rate": 2.6137218473407477e-05, |
| "loss": 0.0781, |
| "step": 13510 |
| }, |
| { |
| "epoch": 39.18840579710145, |
| "grad_norm": 0.30598685145378113, |
| "learning_rate": 2.606460059681436e-05, |
| "loss": 0.0881, |
| "step": 13520 |
| }, |
| { |
| "epoch": 39.21739130434783, |
| "grad_norm": 0.3079904317855835, |
| "learning_rate": 2.599204815868928e-05, |
| "loss": 0.0796, |
| "step": 13530 |
| }, |
| { |
| "epoch": 39.2463768115942, |
| "grad_norm": 0.6565821170806885, |
| "learning_rate": 2.5919561357387756e-05, |
| "loss": 0.0723, |
| "step": 13540 |
| }, |
| { |
| "epoch": 39.27536231884058, |
| "grad_norm": 0.30793699622154236, |
| "learning_rate": 2.5847140391085972e-05, |
| "loss": 0.0741, |
| "step": 13550 |
| }, |
| { |
| "epoch": 39.30434782608695, |
| "grad_norm": 0.40903565287590027, |
| "learning_rate": 2.5774785457780103e-05, |
| "loss": 0.0895, |
| "step": 13560 |
| }, |
| { |
| "epoch": 39.333333333333336, |
| "grad_norm": 0.31521743535995483, |
| "learning_rate": 2.5702496755285753e-05, |
| "loss": 0.0635, |
| "step": 13570 |
| }, |
| { |
| "epoch": 39.36231884057971, |
| "grad_norm": 0.49470698833465576, |
| "learning_rate": 2.5630274481237483e-05, |
| "loss": 0.0812, |
| "step": 13580 |
| }, |
| { |
| "epoch": 39.391304347826086, |
| "grad_norm": 0.49831944704055786, |
| "learning_rate": 2.5558118833088197e-05, |
| "loss": 0.0764, |
| "step": 13590 |
| }, |
| { |
| "epoch": 39.42028985507246, |
| "grad_norm": 0.3357720971107483, |
| "learning_rate": 2.548603000810872e-05, |
| "loss": 0.0789, |
| "step": 13600 |
| }, |
| { |
| "epoch": 39.44927536231884, |
| "grad_norm": 0.49669551849365234, |
| "learning_rate": 2.5414008203387152e-05, |
| "loss": 0.0775, |
| "step": 13610 |
| }, |
| { |
| "epoch": 39.47826086956522, |
| "grad_norm": 0.45243167877197266, |
| "learning_rate": 2.534205361582834e-05, |
| "loss": 0.0931, |
| "step": 13620 |
| }, |
| { |
| "epoch": 39.507246376811594, |
| "grad_norm": 0.4529440701007843, |
| "learning_rate": 2.527016644215338e-05, |
| "loss": 0.0857, |
| "step": 13630 |
| }, |
| { |
| "epoch": 39.53623188405797, |
| "grad_norm": 0.3923579156398773, |
| "learning_rate": 2.519834687889905e-05, |
| "loss": 0.0907, |
| "step": 13640 |
| }, |
| { |
| "epoch": 39.56521739130435, |
| "grad_norm": 0.46026331186294556, |
| "learning_rate": 2.5126595122417295e-05, |
| "loss": 0.0653, |
| "step": 13650 |
| }, |
| { |
| "epoch": 39.594202898550726, |
| "grad_norm": 0.3783218264579773, |
| "learning_rate": 2.5054911368874713e-05, |
| "loss": 0.0894, |
| "step": 13660 |
| }, |
| { |
| "epoch": 39.6231884057971, |
| "grad_norm": 0.2474319487810135, |
| "learning_rate": 2.4983295814251916e-05, |
| "loss": 0.0855, |
| "step": 13670 |
| }, |
| { |
| "epoch": 39.65217391304348, |
| "grad_norm": 0.2657444179058075, |
| "learning_rate": 2.4911748654343105e-05, |
| "loss": 0.0811, |
| "step": 13680 |
| }, |
| { |
| "epoch": 39.68115942028985, |
| "grad_norm": 0.3964589238166809, |
| "learning_rate": 2.4840270084755463e-05, |
| "loss": 0.0719, |
| "step": 13690 |
| }, |
| { |
| "epoch": 39.710144927536234, |
| "grad_norm": 0.4461621046066284, |
| "learning_rate": 2.4768860300908685e-05, |
| "loss": 0.069, |
| "step": 13700 |
| }, |
| { |
| "epoch": 39.73913043478261, |
| "grad_norm": 0.32302120327949524, |
| "learning_rate": 2.469751949803443e-05, |
| "loss": 0.0827, |
| "step": 13710 |
| }, |
| { |
| "epoch": 39.768115942028984, |
| "grad_norm": 0.29357752203941345, |
| "learning_rate": 2.4626247871175666e-05, |
| "loss": 0.0605, |
| "step": 13720 |
| }, |
| { |
| "epoch": 39.79710144927536, |
| "grad_norm": 0.5546101331710815, |
| "learning_rate": 2.4555045615186346e-05, |
| "loss": 0.0806, |
| "step": 13730 |
| }, |
| { |
| "epoch": 39.82608695652174, |
| "grad_norm": 0.4854411482810974, |
| "learning_rate": 2.4483912924730677e-05, |
| "loss": 0.0825, |
| "step": 13740 |
| }, |
| { |
| "epoch": 39.85507246376812, |
| "grad_norm": 0.3219527304172516, |
| "learning_rate": 2.4412849994282742e-05, |
| "loss": 0.072, |
| "step": 13750 |
| }, |
| { |
| "epoch": 39.88405797101449, |
| "grad_norm": 0.3878593146800995, |
| "learning_rate": 2.434185701812592e-05, |
| "loss": 0.0763, |
| "step": 13760 |
| }, |
| { |
| "epoch": 39.91304347826087, |
| "grad_norm": 0.43568170070648193, |
| "learning_rate": 2.4270934190352218e-05, |
| "loss": 0.0837, |
| "step": 13770 |
| }, |
| { |
| "epoch": 39.94202898550725, |
| "grad_norm": 0.3280969560146332, |
| "learning_rate": 2.4200081704861998e-05, |
| "loss": 0.0852, |
| "step": 13780 |
| }, |
| { |
| "epoch": 39.971014492753625, |
| "grad_norm": 0.4428047239780426, |
| "learning_rate": 2.412929975536321e-05, |
| "loss": 0.0778, |
| "step": 13790 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 1.2345435619354248, |
| "learning_rate": 2.4058588535371017e-05, |
| "loss": 0.0667, |
| "step": 13800 |
| }, |
| { |
| "epoch": 40.028985507246375, |
| "grad_norm": 0.3271735906600952, |
| "learning_rate": 2.3987948238207243e-05, |
| "loss": 0.0644, |
| "step": 13810 |
| }, |
| { |
| "epoch": 40.05797101449275, |
| "grad_norm": 0.37233906984329224, |
| "learning_rate": 2.3917379056999678e-05, |
| "loss": 0.0809, |
| "step": 13820 |
| }, |
| { |
| "epoch": 40.08695652173913, |
| "grad_norm": 0.34821170568466187, |
| "learning_rate": 2.3846881184681824e-05, |
| "loss": 0.0712, |
| "step": 13830 |
| }, |
| { |
| "epoch": 40.11594202898551, |
| "grad_norm": 0.47559064626693726, |
| "learning_rate": 2.377645481399214e-05, |
| "loss": 0.071, |
| "step": 13840 |
| }, |
| { |
| "epoch": 40.14492753623188, |
| "grad_norm": 0.40675798058509827, |
| "learning_rate": 2.3706100137473667e-05, |
| "loss": 0.0876, |
| "step": 13850 |
| }, |
| { |
| "epoch": 40.17391304347826, |
| "grad_norm": 0.33387240767478943, |
| "learning_rate": 2.3635817347473394e-05, |
| "loss": 0.0763, |
| "step": 13860 |
| }, |
| { |
| "epoch": 40.20289855072464, |
| "grad_norm": 0.38296619057655334, |
| "learning_rate": 2.3565606636141757e-05, |
| "loss": 0.0861, |
| "step": 13870 |
| }, |
| { |
| "epoch": 40.231884057971016, |
| "grad_norm": 0.35619163513183594, |
| "learning_rate": 2.3495468195432203e-05, |
| "loss": 0.0875, |
| "step": 13880 |
| }, |
| { |
| "epoch": 40.26086956521739, |
| "grad_norm": 0.38837236166000366, |
| "learning_rate": 2.3425402217100507e-05, |
| "loss": 0.076, |
| "step": 13890 |
| }, |
| { |
| "epoch": 40.289855072463766, |
| "grad_norm": 0.5374419093132019, |
| "learning_rate": 2.3355408892704424e-05, |
| "loss": 0.0872, |
| "step": 13900 |
| }, |
| { |
| "epoch": 40.31884057971015, |
| "grad_norm": 0.399505078792572, |
| "learning_rate": 2.3285488413603003e-05, |
| "loss": 0.0688, |
| "step": 13910 |
| }, |
| { |
| "epoch": 40.34782608695652, |
| "grad_norm": 0.41612479090690613, |
| "learning_rate": 2.321564097095615e-05, |
| "loss": 0.0845, |
| "step": 13920 |
| }, |
| { |
| "epoch": 40.3768115942029, |
| "grad_norm": 0.5337821841239929, |
| "learning_rate": 2.3145866755724142e-05, |
| "loss": 0.0841, |
| "step": 13930 |
| }, |
| { |
| "epoch": 40.405797101449274, |
| "grad_norm": 0.4810619652271271, |
| "learning_rate": 2.307616595866699e-05, |
| "loss": 0.0757, |
| "step": 13940 |
| }, |
| { |
| "epoch": 40.43478260869565, |
| "grad_norm": 0.7072311043739319, |
| "learning_rate": 2.3006538770344032e-05, |
| "loss": 0.0872, |
| "step": 13950 |
| }, |
| { |
| "epoch": 40.46376811594203, |
| "grad_norm": 0.4162898659706116, |
| "learning_rate": 2.293698538111334e-05, |
| "loss": 0.0834, |
| "step": 13960 |
| }, |
| { |
| "epoch": 40.492753623188406, |
| "grad_norm": 0.40901780128479004, |
| "learning_rate": 2.28675059811312e-05, |
| "loss": 0.0647, |
| "step": 13970 |
| }, |
| { |
| "epoch": 40.52173913043478, |
| "grad_norm": 0.32501867413520813, |
| "learning_rate": 2.279810076035167e-05, |
| "loss": 0.0776, |
| "step": 13980 |
| }, |
| { |
| "epoch": 40.55072463768116, |
| "grad_norm": 0.32248783111572266, |
| "learning_rate": 2.272876990852596e-05, |
| "loss": 0.0749, |
| "step": 13990 |
| }, |
| { |
| "epoch": 40.57971014492754, |
| "grad_norm": 0.36385807394981384, |
| "learning_rate": 2.265951361520195e-05, |
| "loss": 0.0853, |
| "step": 14000 |
| }, |
| { |
| "epoch": 40.608695652173914, |
| "grad_norm": 0.3925250470638275, |
| "learning_rate": 2.2590332069723748e-05, |
| "loss": 0.0877, |
| "step": 14010 |
| }, |
| { |
| "epoch": 40.63768115942029, |
| "grad_norm": 0.3202069401741028, |
| "learning_rate": 2.2521225461231004e-05, |
| "loss": 0.0622, |
| "step": 14020 |
| }, |
| { |
| "epoch": 40.666666666666664, |
| "grad_norm": 0.335860013961792, |
| "learning_rate": 2.2452193978658597e-05, |
| "loss": 0.0798, |
| "step": 14030 |
| }, |
| { |
| "epoch": 40.69565217391305, |
| "grad_norm": 0.2558061480522156, |
| "learning_rate": 2.238323781073594e-05, |
| "loss": 0.0728, |
| "step": 14040 |
| }, |
| { |
| "epoch": 40.72463768115942, |
| "grad_norm": 0.551850438117981, |
| "learning_rate": 2.2314357145986552e-05, |
| "loss": 0.0729, |
| "step": 14050 |
| }, |
| { |
| "epoch": 40.7536231884058, |
| "grad_norm": 0.36312124133110046, |
| "learning_rate": 2.224555217272757e-05, |
| "loss": 0.0964, |
| "step": 14060 |
| }, |
| { |
| "epoch": 40.78260869565217, |
| "grad_norm": 0.2239280343055725, |
| "learning_rate": 2.2176823079069127e-05, |
| "loss": 0.079, |
| "step": 14070 |
| }, |
| { |
| "epoch": 40.81159420289855, |
| "grad_norm": 0.35991764068603516, |
| "learning_rate": 2.210817005291398e-05, |
| "loss": 0.085, |
| "step": 14080 |
| }, |
| { |
| "epoch": 40.84057971014493, |
| "grad_norm": 0.46211162209510803, |
| "learning_rate": 2.203959328195686e-05, |
| "loss": 0.0719, |
| "step": 14090 |
| }, |
| { |
| "epoch": 40.869565217391305, |
| "grad_norm": 0.4440068006515503, |
| "learning_rate": 2.1971092953684026e-05, |
| "loss": 0.0933, |
| "step": 14100 |
| }, |
| { |
| "epoch": 40.89855072463768, |
| "grad_norm": 0.5218793749809265, |
| "learning_rate": 2.1902669255372788e-05, |
| "loss": 0.0777, |
| "step": 14110 |
| }, |
| { |
| "epoch": 40.927536231884055, |
| "grad_norm": 0.23008324205875397, |
| "learning_rate": 2.1834322374090897e-05, |
| "loss": 0.0849, |
| "step": 14120 |
| }, |
| { |
| "epoch": 40.95652173913044, |
| "grad_norm": 0.2903602719306946, |
| "learning_rate": 2.1766052496696153e-05, |
| "loss": 0.0825, |
| "step": 14130 |
| }, |
| { |
| "epoch": 40.98550724637681, |
| "grad_norm": 0.22217999398708344, |
| "learning_rate": 2.169785980983577e-05, |
| "loss": 0.0672, |
| "step": 14140 |
| }, |
| { |
| "epoch": 41.01449275362319, |
| "grad_norm": 0.2826724350452423, |
| "learning_rate": 2.162974449994593e-05, |
| "loss": 0.0776, |
| "step": 14150 |
| }, |
| { |
| "epoch": 41.04347826086956, |
| "grad_norm": 0.4253155589103699, |
| "learning_rate": 2.1561706753251337e-05, |
| "loss": 0.0682, |
| "step": 14160 |
| }, |
| { |
| "epoch": 41.072463768115945, |
| "grad_norm": 0.5486535429954529, |
| "learning_rate": 2.1493746755764544e-05, |
| "loss": 0.0831, |
| "step": 14170 |
| }, |
| { |
| "epoch": 41.10144927536232, |
| "grad_norm": 0.31310802698135376, |
| "learning_rate": 2.1425864693285635e-05, |
| "loss": 0.0705, |
| "step": 14180 |
| }, |
| { |
| "epoch": 41.130434782608695, |
| "grad_norm": 0.4277971088886261, |
| "learning_rate": 2.1358060751401547e-05, |
| "loss": 0.0734, |
| "step": 14190 |
| }, |
| { |
| "epoch": 41.15942028985507, |
| "grad_norm": 0.25638988614082336, |
| "learning_rate": 2.129033511548566e-05, |
| "loss": 0.0608, |
| "step": 14200 |
| }, |
| { |
| "epoch": 41.18840579710145, |
| "grad_norm": 0.3612518608570099, |
| "learning_rate": 2.1222687970697315e-05, |
| "loss": 0.0726, |
| "step": 14210 |
| }, |
| { |
| "epoch": 41.21739130434783, |
| "grad_norm": 0.438911110162735, |
| "learning_rate": 2.1155119501981173e-05, |
| "loss": 0.0758, |
| "step": 14220 |
| }, |
| { |
| "epoch": 41.2463768115942, |
| "grad_norm": 0.45236918330192566, |
| "learning_rate": 2.1087629894066895e-05, |
| "loss": 0.0752, |
| "step": 14230 |
| }, |
| { |
| "epoch": 41.27536231884058, |
| "grad_norm": 0.3085053861141205, |
| "learning_rate": 2.1020219331468473e-05, |
| "loss": 0.07, |
| "step": 14240 |
| }, |
| { |
| "epoch": 41.30434782608695, |
| "grad_norm": 0.4090859889984131, |
| "learning_rate": 2.095288799848379e-05, |
| "loss": 0.073, |
| "step": 14250 |
| }, |
| { |
| "epoch": 41.333333333333336, |
| "grad_norm": 0.25771570205688477, |
| "learning_rate": 2.088563607919417e-05, |
| "loss": 0.0696, |
| "step": 14260 |
| }, |
| { |
| "epoch": 41.36231884057971, |
| "grad_norm": 0.3390887975692749, |
| "learning_rate": 2.0818463757463786e-05, |
| "loss": 0.0845, |
| "step": 14270 |
| }, |
| { |
| "epoch": 41.391304347826086, |
| "grad_norm": 0.41618505120277405, |
| "learning_rate": 2.0751371216939175e-05, |
| "loss": 0.0863, |
| "step": 14280 |
| }, |
| { |
| "epoch": 41.42028985507246, |
| "grad_norm": 0.3998986780643463, |
| "learning_rate": 2.068435864104882e-05, |
| "loss": 0.0709, |
| "step": 14290 |
| }, |
| { |
| "epoch": 41.44927536231884, |
| "grad_norm": 0.6030775308609009, |
| "learning_rate": 2.0617426213002506e-05, |
| "loss": 0.0828, |
| "step": 14300 |
| }, |
| { |
| "epoch": 41.47826086956522, |
| "grad_norm": 0.35719752311706543, |
| "learning_rate": 2.055057411579097e-05, |
| "loss": 0.08, |
| "step": 14310 |
| }, |
| { |
| "epoch": 41.507246376811594, |
| "grad_norm": 0.3750839829444885, |
| "learning_rate": 2.0483802532185286e-05, |
| "loss": 0.062, |
| "step": 14320 |
| }, |
| { |
| "epoch": 41.53623188405797, |
| "grad_norm": 0.303724467754364, |
| "learning_rate": 2.041711164473638e-05, |
| "loss": 0.0762, |
| "step": 14330 |
| }, |
| { |
| "epoch": 41.56521739130435, |
| "grad_norm": 0.3491968810558319, |
| "learning_rate": 2.0350501635774637e-05, |
| "loss": 0.0681, |
| "step": 14340 |
| }, |
| { |
| "epoch": 41.594202898550726, |
| "grad_norm": 0.31283631920814514, |
| "learning_rate": 2.0283972687409247e-05, |
| "loss": 0.099, |
| "step": 14350 |
| }, |
| { |
| "epoch": 41.6231884057971, |
| "grad_norm": 0.3491584360599518, |
| "learning_rate": 2.021752498152784e-05, |
| "loss": 0.0799, |
| "step": 14360 |
| }, |
| { |
| "epoch": 41.65217391304348, |
| "grad_norm": 0.40594348311424255, |
| "learning_rate": 2.015115869979589e-05, |
| "loss": 0.0673, |
| "step": 14370 |
| }, |
| { |
| "epoch": 41.68115942028985, |
| "grad_norm": 0.2607230842113495, |
| "learning_rate": 2.0084874023656265e-05, |
| "loss": 0.0678, |
| "step": 14380 |
| }, |
| { |
| "epoch": 41.710144927536234, |
| "grad_norm": 0.4588332176208496, |
| "learning_rate": 2.001867113432877e-05, |
| "loss": 0.0739, |
| "step": 14390 |
| }, |
| { |
| "epoch": 41.73913043478261, |
| "grad_norm": 0.47973960638046265, |
| "learning_rate": 1.995255021280954e-05, |
| "loss": 0.0799, |
| "step": 14400 |
| }, |
| { |
| "epoch": 41.768115942028984, |
| "grad_norm": 0.39496302604675293, |
| "learning_rate": 1.9886511439870688e-05, |
| "loss": 0.0755, |
| "step": 14410 |
| }, |
| { |
| "epoch": 41.79710144927536, |
| "grad_norm": 0.47075384855270386, |
| "learning_rate": 1.9820554996059675e-05, |
| "loss": 0.0843, |
| "step": 14420 |
| }, |
| { |
| "epoch": 41.82608695652174, |
| "grad_norm": 0.3595154583454132, |
| "learning_rate": 1.9754681061698893e-05, |
| "loss": 0.0847, |
| "step": 14430 |
| }, |
| { |
| "epoch": 41.85507246376812, |
| "grad_norm": 0.4555226266384125, |
| "learning_rate": 1.9688889816885185e-05, |
| "loss": 0.0703, |
| "step": 14440 |
| }, |
| { |
| "epoch": 41.88405797101449, |
| "grad_norm": 0.4868208169937134, |
| "learning_rate": 1.962318144148928e-05, |
| "loss": 0.0852, |
| "step": 14450 |
| }, |
| { |
| "epoch": 41.91304347826087, |
| "grad_norm": 0.3341791331768036, |
| "learning_rate": 1.955755611515539e-05, |
| "loss": 0.0665, |
| "step": 14460 |
| }, |
| { |
| "epoch": 41.94202898550725, |
| "grad_norm": 0.4766240119934082, |
| "learning_rate": 1.9492014017300642e-05, |
| "loss": 0.0717, |
| "step": 14470 |
| }, |
| { |
| "epoch": 41.971014492753625, |
| "grad_norm": 0.4072614908218384, |
| "learning_rate": 1.942655532711461e-05, |
| "loss": 0.0689, |
| "step": 14480 |
| }, |
| { |
| "epoch": 42.0, |
| "grad_norm": 0.5508348345756531, |
| "learning_rate": 1.9361180223558882e-05, |
| "loss": 0.0654, |
| "step": 14490 |
| }, |
| { |
| "epoch": 42.028985507246375, |
| "grad_norm": 0.3589998483657837, |
| "learning_rate": 1.929588888536647e-05, |
| "loss": 0.0736, |
| "step": 14500 |
| }, |
| { |
| "epoch": 42.05797101449275, |
| "grad_norm": 0.33736053109169006, |
| "learning_rate": 1.9230681491041425e-05, |
| "loss": 0.0682, |
| "step": 14510 |
| }, |
| { |
| "epoch": 42.08695652173913, |
| "grad_norm": 0.4881956875324249, |
| "learning_rate": 1.9165558218858264e-05, |
| "loss": 0.0817, |
| "step": 14520 |
| }, |
| { |
| "epoch": 42.11594202898551, |
| "grad_norm": 0.5997191071510315, |
| "learning_rate": 1.9100519246861505e-05, |
| "loss": 0.0722, |
| "step": 14530 |
| }, |
| { |
| "epoch": 42.14492753623188, |
| "grad_norm": 0.4747546911239624, |
| "learning_rate": 1.9035564752865248e-05, |
| "loss": 0.0624, |
| "step": 14540 |
| }, |
| { |
| "epoch": 42.17391304347826, |
| "grad_norm": 0.391609251499176, |
| "learning_rate": 1.897069491445258e-05, |
| "loss": 0.088, |
| "step": 14550 |
| }, |
| { |
| "epoch": 42.20289855072464, |
| "grad_norm": 0.5286002159118652, |
| "learning_rate": 1.890590990897515e-05, |
| "loss": 0.0921, |
| "step": 14560 |
| }, |
| { |
| "epoch": 42.231884057971016, |
| "grad_norm": 0.3322617709636688, |
| "learning_rate": 1.884120991355272e-05, |
| "loss": 0.0753, |
| "step": 14570 |
| }, |
| { |
| "epoch": 42.26086956521739, |
| "grad_norm": 0.366778165102005, |
| "learning_rate": 1.8776595105072576e-05, |
| "loss": 0.071, |
| "step": 14580 |
| }, |
| { |
| "epoch": 42.289855072463766, |
| "grad_norm": 0.5647521018981934, |
| "learning_rate": 1.8712065660189166e-05, |
| "loss": 0.0901, |
| "step": 14590 |
| }, |
| { |
| "epoch": 42.31884057971015, |
| "grad_norm": 0.44216540455818176, |
| "learning_rate": 1.8647621755323513e-05, |
| "loss": 0.0754, |
| "step": 14600 |
| }, |
| { |
| "epoch": 42.34782608695652, |
| "grad_norm": 0.41718125343322754, |
| "learning_rate": 1.858326356666278e-05, |
| "loss": 0.0798, |
| "step": 14610 |
| }, |
| { |
| "epoch": 42.3768115942029, |
| "grad_norm": 0.3692278563976288, |
| "learning_rate": 1.851899127015983e-05, |
| "loss": 0.0687, |
| "step": 14620 |
| }, |
| { |
| "epoch": 42.405797101449274, |
| "grad_norm": 0.5888849496841431, |
| "learning_rate": 1.8454805041532626e-05, |
| "loss": 0.0605, |
| "step": 14630 |
| }, |
| { |
| "epoch": 42.43478260869565, |
| "grad_norm": 0.366144061088562, |
| "learning_rate": 1.8390705056263906e-05, |
| "loss": 0.0665, |
| "step": 14640 |
| }, |
| { |
| "epoch": 42.46376811594203, |
| "grad_norm": 0.4007920026779175, |
| "learning_rate": 1.832669148960057e-05, |
| "loss": 0.0707, |
| "step": 14650 |
| }, |
| { |
| "epoch": 42.492753623188406, |
| "grad_norm": 0.36319825053215027, |
| "learning_rate": 1.8262764516553233e-05, |
| "loss": 0.0604, |
| "step": 14660 |
| }, |
| { |
| "epoch": 42.52173913043478, |
| "grad_norm": 0.5968917012214661, |
| "learning_rate": 1.8198924311895843e-05, |
| "loss": 0.0792, |
| "step": 14670 |
| }, |
| { |
| "epoch": 42.55072463768116, |
| "grad_norm": 0.3557155132293701, |
| "learning_rate": 1.813517105016505e-05, |
| "loss": 0.0587, |
| "step": 14680 |
| }, |
| { |
| "epoch": 42.57971014492754, |
| "grad_norm": 0.3647300899028778, |
| "learning_rate": 1.8071504905659888e-05, |
| "loss": 0.0678, |
| "step": 14690 |
| }, |
| { |
| "epoch": 42.608695652173914, |
| "grad_norm": 0.5016182065010071, |
| "learning_rate": 1.800792605244109e-05, |
| "loss": 0.0726, |
| "step": 14700 |
| }, |
| { |
| "epoch": 42.63768115942029, |
| "grad_norm": 0.39856255054473877, |
| "learning_rate": 1.7944434664330844e-05, |
| "loss": 0.0852, |
| "step": 14710 |
| }, |
| { |
| "epoch": 42.666666666666664, |
| "grad_norm": 0.3633764684200287, |
| "learning_rate": 1.7881030914912212e-05, |
| "loss": 0.08, |
| "step": 14720 |
| }, |
| { |
| "epoch": 42.69565217391305, |
| "grad_norm": 0.36024579405784607, |
| "learning_rate": 1.7817714977528577e-05, |
| "loss": 0.0686, |
| "step": 14730 |
| }, |
| { |
| "epoch": 42.72463768115942, |
| "grad_norm": 0.40388357639312744, |
| "learning_rate": 1.7754487025283332e-05, |
| "loss": 0.0657, |
| "step": 14740 |
| }, |
| { |
| "epoch": 42.7536231884058, |
| "grad_norm": 0.5098476409912109, |
| "learning_rate": 1.7691347231039275e-05, |
| "loss": 0.0651, |
| "step": 14750 |
| }, |
| { |
| "epoch": 42.78260869565217, |
| "grad_norm": 0.4363411068916321, |
| "learning_rate": 1.7628295767418164e-05, |
| "loss": 0.0966, |
| "step": 14760 |
| }, |
| { |
| "epoch": 42.81159420289855, |
| "grad_norm": 0.48385173082351685, |
| "learning_rate": 1.7565332806800333e-05, |
| "loss": 0.0751, |
| "step": 14770 |
| }, |
| { |
| "epoch": 42.84057971014493, |
| "grad_norm": 0.4358624815940857, |
| "learning_rate": 1.750245852132408e-05, |
| "loss": 0.087, |
| "step": 14780 |
| }, |
| { |
| "epoch": 42.869565217391305, |
| "grad_norm": 0.4145340621471405, |
| "learning_rate": 1.7439673082885323e-05, |
| "loss": 0.0738, |
| "step": 14790 |
| }, |
| { |
| "epoch": 42.89855072463768, |
| "grad_norm": 0.4053754508495331, |
| "learning_rate": 1.7376976663137047e-05, |
| "loss": 0.0895, |
| "step": 14800 |
| }, |
| { |
| "epoch": 42.927536231884055, |
| "grad_norm": 0.2905048131942749, |
| "learning_rate": 1.7314369433488853e-05, |
| "loss": 0.0622, |
| "step": 14810 |
| }, |
| { |
| "epoch": 42.95652173913044, |
| "grad_norm": 0.5020401477813721, |
| "learning_rate": 1.7251851565106548e-05, |
| "loss": 0.0642, |
| "step": 14820 |
| }, |
| { |
| "epoch": 42.98550724637681, |
| "grad_norm": 0.4154917597770691, |
| "learning_rate": 1.7189423228911574e-05, |
| "loss": 0.0807, |
| "step": 14830 |
| }, |
| { |
| "epoch": 43.01449275362319, |
| "grad_norm": 0.5019571781158447, |
| "learning_rate": 1.7127084595580606e-05, |
| "loss": 0.0779, |
| "step": 14840 |
| }, |
| { |
| "epoch": 43.04347826086956, |
| "grad_norm": 0.3335070312023163, |
| "learning_rate": 1.706483583554513e-05, |
| "loss": 0.0811, |
| "step": 14850 |
| }, |
| { |
| "epoch": 43.072463768115945, |
| "grad_norm": 0.3166472911834717, |
| "learning_rate": 1.700267711899083e-05, |
| "loss": 0.0729, |
| "step": 14860 |
| }, |
| { |
| "epoch": 43.10144927536232, |
| "grad_norm": 0.45485633611679077, |
| "learning_rate": 1.69406086158573e-05, |
| "loss": 0.0674, |
| "step": 14870 |
| }, |
| { |
| "epoch": 43.130434782608695, |
| "grad_norm": 0.27782437205314636, |
| "learning_rate": 1.6878630495837455e-05, |
| "loss": 0.0833, |
| "step": 14880 |
| }, |
| { |
| "epoch": 43.15942028985507, |
| "grad_norm": 0.24997830390930176, |
| "learning_rate": 1.681674292837707e-05, |
| "loss": 0.0649, |
| "step": 14890 |
| }, |
| { |
| "epoch": 43.18840579710145, |
| "grad_norm": 0.291838675737381, |
| "learning_rate": 1.6754946082674444e-05, |
| "loss": 0.0664, |
| "step": 14900 |
| }, |
| { |
| "epoch": 43.21739130434783, |
| "grad_norm": 0.3121786117553711, |
| "learning_rate": 1.6693240127679748e-05, |
| "loss": 0.0733, |
| "step": 14910 |
| }, |
| { |
| "epoch": 43.2463768115942, |
| "grad_norm": 0.38471075892448425, |
| "learning_rate": 1.663162523209475e-05, |
| "loss": 0.0821, |
| "step": 14920 |
| }, |
| { |
| "epoch": 43.27536231884058, |
| "grad_norm": 0.5700430274009705, |
| "learning_rate": 1.6570101564372193e-05, |
| "loss": 0.0669, |
| "step": 14930 |
| }, |
| { |
| "epoch": 43.30434782608695, |
| "grad_norm": 0.5257859230041504, |
| "learning_rate": 1.650866929271543e-05, |
| "loss": 0.0602, |
| "step": 14940 |
| }, |
| { |
| "epoch": 43.333333333333336, |
| "grad_norm": 0.4088708162307739, |
| "learning_rate": 1.644732858507797e-05, |
| "loss": 0.0871, |
| "step": 14950 |
| }, |
| { |
| "epoch": 43.36231884057971, |
| "grad_norm": 0.5116233825683594, |
| "learning_rate": 1.6386079609162943e-05, |
| "loss": 0.0598, |
| "step": 14960 |
| }, |
| { |
| "epoch": 43.391304347826086, |
| "grad_norm": 0.2616664469242096, |
| "learning_rate": 1.6324922532422742e-05, |
| "loss": 0.0606, |
| "step": 14970 |
| }, |
| { |
| "epoch": 43.42028985507246, |
| "grad_norm": 0.5427923798561096, |
| "learning_rate": 1.6263857522058434e-05, |
| "loss": 0.0937, |
| "step": 14980 |
| }, |
| { |
| "epoch": 43.44927536231884, |
| "grad_norm": 0.3789597153663635, |
| "learning_rate": 1.6202884745019443e-05, |
| "loss": 0.0851, |
| "step": 14990 |
| }, |
| { |
| "epoch": 43.47826086956522, |
| "grad_norm": 0.46611571311950684, |
| "learning_rate": 1.614200436800304e-05, |
| "loss": 0.0783, |
| "step": 15000 |
| }, |
| { |
| "epoch": 43.507246376811594, |
| "grad_norm": 0.37547364830970764, |
| "learning_rate": 1.6081216557453814e-05, |
| "loss": 0.0833, |
| "step": 15010 |
| }, |
| { |
| "epoch": 43.53623188405797, |
| "grad_norm": 0.3774726688861847, |
| "learning_rate": 1.6020521479563367e-05, |
| "loss": 0.0767, |
| "step": 15020 |
| }, |
| { |
| "epoch": 43.56521739130435, |
| "grad_norm": 0.44292446970939636, |
| "learning_rate": 1.5959919300269654e-05, |
| "loss": 0.0728, |
| "step": 15030 |
| }, |
| { |
| "epoch": 43.594202898550726, |
| "grad_norm": 0.5792534351348877, |
| "learning_rate": 1.5899410185256764e-05, |
| "loss": 0.0593, |
| "step": 15040 |
| }, |
| { |
| "epoch": 43.6231884057971, |
| "grad_norm": 0.2785523235797882, |
| "learning_rate": 1.583899429995431e-05, |
| "loss": 0.0612, |
| "step": 15050 |
| }, |
| { |
| "epoch": 43.65217391304348, |
| "grad_norm": 0.29454028606414795, |
| "learning_rate": 1.5778671809536993e-05, |
| "loss": 0.0751, |
| "step": 15060 |
| }, |
| { |
| "epoch": 43.68115942028985, |
| "grad_norm": 0.2879396378993988, |
| "learning_rate": 1.5718442878924246e-05, |
| "loss": 0.0883, |
| "step": 15070 |
| }, |
| { |
| "epoch": 43.710144927536234, |
| "grad_norm": 1.1070629358291626, |
| "learning_rate": 1.5658307672779593e-05, |
| "loss": 0.093, |
| "step": 15080 |
| }, |
| { |
| "epoch": 43.73913043478261, |
| "grad_norm": 0.29835617542266846, |
| "learning_rate": 1.5598266355510427e-05, |
| "loss": 0.0657, |
| "step": 15090 |
| }, |
| { |
| "epoch": 43.768115942028984, |
| "grad_norm": 0.4190385937690735, |
| "learning_rate": 1.553831909126744e-05, |
| "loss": 0.0742, |
| "step": 15100 |
| }, |
| { |
| "epoch": 43.79710144927536, |
| "grad_norm": 0.34586817026138306, |
| "learning_rate": 1.5478466043944135e-05, |
| "loss": 0.0715, |
| "step": 15110 |
| }, |
| { |
| "epoch": 43.82608695652174, |
| "grad_norm": 0.37232398986816406, |
| "learning_rate": 1.5418707377176468e-05, |
| "loss": 0.0695, |
| "step": 15120 |
| }, |
| { |
| "epoch": 43.85507246376812, |
| "grad_norm": 0.42787492275238037, |
| "learning_rate": 1.535904325434233e-05, |
| "loss": 0.0959, |
| "step": 15130 |
| }, |
| { |
| "epoch": 43.88405797101449, |
| "grad_norm": 0.8969880938529968, |
| "learning_rate": 1.529947383856118e-05, |
| "loss": 0.0693, |
| "step": 15140 |
| }, |
| { |
| "epoch": 43.91304347826087, |
| "grad_norm": 0.38823625445365906, |
| "learning_rate": 1.5239999292693524e-05, |
| "loss": 0.0825, |
| "step": 15150 |
| }, |
| { |
| "epoch": 43.94202898550725, |
| "grad_norm": 0.2747124135494232, |
| "learning_rate": 1.5180619779340505e-05, |
| "loss": 0.0809, |
| "step": 15160 |
| }, |
| { |
| "epoch": 43.971014492753625, |
| "grad_norm": 0.420537531375885, |
| "learning_rate": 1.5121335460843428e-05, |
| "loss": 0.0634, |
| "step": 15170 |
| }, |
| { |
| "epoch": 44.0, |
| "grad_norm": 0.784938395023346, |
| "learning_rate": 1.5062146499283347e-05, |
| "loss": 0.073, |
| "step": 15180 |
| }, |
| { |
| "epoch": 44.028985507246375, |
| "grad_norm": 0.47105634212493896, |
| "learning_rate": 1.5003053056480643e-05, |
| "loss": 0.0736, |
| "step": 15190 |
| }, |
| { |
| "epoch": 44.05797101449275, |
| "grad_norm": 0.2737712860107422, |
| "learning_rate": 1.4944055293994551e-05, |
| "loss": 0.0674, |
| "step": 15200 |
| }, |
| { |
| "epoch": 44.08695652173913, |
| "grad_norm": 0.6026032567024231, |
| "learning_rate": 1.4885153373122656e-05, |
| "loss": 0.0922, |
| "step": 15210 |
| }, |
| { |
| "epoch": 44.11594202898551, |
| "grad_norm": 0.3727162182331085, |
| "learning_rate": 1.482634745490059e-05, |
| "loss": 0.0644, |
| "step": 15220 |
| }, |
| { |
| "epoch": 44.14492753623188, |
| "grad_norm": 0.47362762689590454, |
| "learning_rate": 1.4767637700101466e-05, |
| "loss": 0.066, |
| "step": 15230 |
| }, |
| { |
| "epoch": 44.17391304347826, |
| "grad_norm": 0.35355237126350403, |
| "learning_rate": 1.4709024269235528e-05, |
| "loss": 0.0617, |
| "step": 15240 |
| }, |
| { |
| "epoch": 44.20289855072464, |
| "grad_norm": 0.3178042471408844, |
| "learning_rate": 1.4650507322549684e-05, |
| "loss": 0.1073, |
| "step": 15250 |
| }, |
| { |
| "epoch": 44.231884057971016, |
| "grad_norm": 0.5713096857070923, |
| "learning_rate": 1.4592087020026972e-05, |
| "loss": 0.0697, |
| "step": 15260 |
| }, |
| { |
| "epoch": 44.26086956521739, |
| "grad_norm": 0.39644819498062134, |
| "learning_rate": 1.4533763521386318e-05, |
| "loss": 0.0787, |
| "step": 15270 |
| }, |
| { |
| "epoch": 44.289855072463766, |
| "grad_norm": 0.3511520326137543, |
| "learning_rate": 1.44755369860819e-05, |
| "loss": 0.0637, |
| "step": 15280 |
| }, |
| { |
| "epoch": 44.31884057971015, |
| "grad_norm": 0.5535669326782227, |
| "learning_rate": 1.441740757330287e-05, |
| "loss": 0.0936, |
| "step": 15290 |
| }, |
| { |
| "epoch": 44.34782608695652, |
| "grad_norm": 0.5639561414718628, |
| "learning_rate": 1.4359375441972844e-05, |
| "loss": 0.0809, |
| "step": 15300 |
| }, |
| { |
| "epoch": 44.3768115942029, |
| "grad_norm": 0.3432080149650574, |
| "learning_rate": 1.4301440750749395e-05, |
| "loss": 0.0813, |
| "step": 15310 |
| }, |
| { |
| "epoch": 44.405797101449274, |
| "grad_norm": 0.3394940495491028, |
| "learning_rate": 1.4243603658023808e-05, |
| "loss": 0.0816, |
| "step": 15320 |
| }, |
| { |
| "epoch": 44.43478260869565, |
| "grad_norm": 0.3588254451751709, |
| "learning_rate": 1.4185864321920444e-05, |
| "loss": 0.0711, |
| "step": 15330 |
| }, |
| { |
| "epoch": 44.46376811594203, |
| "grad_norm": 0.3964613676071167, |
| "learning_rate": 1.4128222900296485e-05, |
| "loss": 0.0795, |
| "step": 15340 |
| }, |
| { |
| "epoch": 44.492753623188406, |
| "grad_norm": 0.38622230291366577, |
| "learning_rate": 1.407067955074135e-05, |
| "loss": 0.0716, |
| "step": 15350 |
| }, |
| { |
| "epoch": 44.52173913043478, |
| "grad_norm": 0.28652891516685486, |
| "learning_rate": 1.4013234430576356e-05, |
| "loss": 0.067, |
| "step": 15360 |
| }, |
| { |
| "epoch": 44.55072463768116, |
| "grad_norm": 0.3979763388633728, |
| "learning_rate": 1.3955887696854286e-05, |
| "loss": 0.0761, |
| "step": 15370 |
| }, |
| { |
| "epoch": 44.57971014492754, |
| "grad_norm": 0.4278284013271332, |
| "learning_rate": 1.38986395063589e-05, |
| "loss": 0.073, |
| "step": 15380 |
| }, |
| { |
| "epoch": 44.608695652173914, |
| "grad_norm": 0.40081092715263367, |
| "learning_rate": 1.3841490015604597e-05, |
| "loss": 0.0859, |
| "step": 15390 |
| }, |
| { |
| "epoch": 44.63768115942029, |
| "grad_norm": 0.45146530866622925, |
| "learning_rate": 1.3784439380835879e-05, |
| "loss": 0.0809, |
| "step": 15400 |
| }, |
| { |
| "epoch": 44.666666666666664, |
| "grad_norm": 0.3806000351905823, |
| "learning_rate": 1.3727487758026986e-05, |
| "loss": 0.0725, |
| "step": 15410 |
| }, |
| { |
| "epoch": 44.69565217391305, |
| "grad_norm": 0.5500205755233765, |
| "learning_rate": 1.3670635302881525e-05, |
| "loss": 0.0737, |
| "step": 15420 |
| }, |
| { |
| "epoch": 44.72463768115942, |
| "grad_norm": 0.2973146438598633, |
| "learning_rate": 1.3613882170831888e-05, |
| "loss": 0.0739, |
| "step": 15430 |
| }, |
| { |
| "epoch": 44.7536231884058, |
| "grad_norm": 0.4235207736492157, |
| "learning_rate": 1.355722851703901e-05, |
| "loss": 0.0837, |
| "step": 15440 |
| }, |
| { |
| "epoch": 44.78260869565217, |
| "grad_norm": 0.3844519853591919, |
| "learning_rate": 1.3500674496391814e-05, |
| "loss": 0.0669, |
| "step": 15450 |
| }, |
| { |
| "epoch": 44.81159420289855, |
| "grad_norm": 0.3494715988636017, |
| "learning_rate": 1.3444220263506795e-05, |
| "loss": 0.0587, |
| "step": 15460 |
| }, |
| { |
| "epoch": 44.84057971014493, |
| "grad_norm": 0.5101982355117798, |
| "learning_rate": 1.3387865972727714e-05, |
| "loss": 0.0871, |
| "step": 15470 |
| }, |
| { |
| "epoch": 44.869565217391305, |
| "grad_norm": 0.3597027361392975, |
| "learning_rate": 1.3331611778125036e-05, |
| "loss": 0.0728, |
| "step": 15480 |
| }, |
| { |
| "epoch": 44.89855072463768, |
| "grad_norm": 0.5626224279403687, |
| "learning_rate": 1.3275457833495564e-05, |
| "loss": 0.0804, |
| "step": 15490 |
| }, |
| { |
| "epoch": 44.927536231884055, |
| "grad_norm": 0.3257477581501007, |
| "learning_rate": 1.3219404292362065e-05, |
| "loss": 0.0632, |
| "step": 15500 |
| }, |
| { |
| "epoch": 44.95652173913044, |
| "grad_norm": 0.4441049098968506, |
| "learning_rate": 1.3163451307972751e-05, |
| "loss": 0.0695, |
| "step": 15510 |
| }, |
| { |
| "epoch": 44.98550724637681, |
| "grad_norm": 0.3859218657016754, |
| "learning_rate": 1.3107599033300977e-05, |
| "loss": 0.0671, |
| "step": 15520 |
| }, |
| { |
| "epoch": 45.01449275362319, |
| "grad_norm": 0.4354454278945923, |
| "learning_rate": 1.305184762104471e-05, |
| "loss": 0.0645, |
| "step": 15530 |
| }, |
| { |
| "epoch": 45.04347826086956, |
| "grad_norm": 0.2836010158061981, |
| "learning_rate": 1.2996197223626178e-05, |
| "loss": 0.0645, |
| "step": 15540 |
| }, |
| { |
| "epoch": 45.072463768115945, |
| "grad_norm": 0.3890087306499481, |
| "learning_rate": 1.2940647993191457e-05, |
| "loss": 0.0631, |
| "step": 15550 |
| }, |
| { |
| "epoch": 45.10144927536232, |
| "grad_norm": 0.3880113959312439, |
| "learning_rate": 1.2885200081610005e-05, |
| "loss": 0.0642, |
| "step": 15560 |
| }, |
| { |
| "epoch": 45.130434782608695, |
| "grad_norm": 0.47158360481262207, |
| "learning_rate": 1.2829853640474316e-05, |
| "loss": 0.076, |
| "step": 15570 |
| }, |
| { |
| "epoch": 45.15942028985507, |
| "grad_norm": 0.4908730089664459, |
| "learning_rate": 1.2774608821099438e-05, |
| "loss": 0.082, |
| "step": 15580 |
| }, |
| { |
| "epoch": 45.18840579710145, |
| "grad_norm": 0.44620388746261597, |
| "learning_rate": 1.2719465774522577e-05, |
| "loss": 0.0805, |
| "step": 15590 |
| }, |
| { |
| "epoch": 45.21739130434783, |
| "grad_norm": 0.39248040318489075, |
| "learning_rate": 1.2664424651502755e-05, |
| "loss": 0.0798, |
| "step": 15600 |
| }, |
| { |
| "epoch": 45.2463768115942, |
| "grad_norm": 0.4180006980895996, |
| "learning_rate": 1.260948560252026e-05, |
| "loss": 0.0856, |
| "step": 15610 |
| }, |
| { |
| "epoch": 45.27536231884058, |
| "grad_norm": 0.44177964329719543, |
| "learning_rate": 1.2554648777776396e-05, |
| "loss": 0.0949, |
| "step": 15620 |
| }, |
| { |
| "epoch": 45.30434782608695, |
| "grad_norm": 0.33813127875328064, |
| "learning_rate": 1.2499914327192919e-05, |
| "loss": 0.0781, |
| "step": 15630 |
| }, |
| { |
| "epoch": 45.333333333333336, |
| "grad_norm": 0.3105308711528778, |
| "learning_rate": 1.2445282400411722e-05, |
| "loss": 0.07, |
| "step": 15640 |
| }, |
| { |
| "epoch": 45.36231884057971, |
| "grad_norm": 0.2976597249507904, |
| "learning_rate": 1.2390753146794437e-05, |
| "loss": 0.0752, |
| "step": 15650 |
| }, |
| { |
| "epoch": 45.391304347826086, |
| "grad_norm": 0.4583851993083954, |
| "learning_rate": 1.2336326715421925e-05, |
| "loss": 0.0767, |
| "step": 15660 |
| }, |
| { |
| "epoch": 45.42028985507246, |
| "grad_norm": 0.41092222929000854, |
| "learning_rate": 1.2282003255094005e-05, |
| "loss": 0.0728, |
| "step": 15670 |
| }, |
| { |
| "epoch": 45.44927536231884, |
| "grad_norm": 0.3140925467014313, |
| "learning_rate": 1.2227782914328928e-05, |
| "loss": 0.069, |
| "step": 15680 |
| }, |
| { |
| "epoch": 45.47826086956522, |
| "grad_norm": 0.355333149433136, |
| "learning_rate": 1.2173665841363018e-05, |
| "loss": 0.0711, |
| "step": 15690 |
| }, |
| { |
| "epoch": 45.507246376811594, |
| "grad_norm": 0.3979286253452301, |
| "learning_rate": 1.211965218415032e-05, |
| "loss": 0.0755, |
| "step": 15700 |
| }, |
| { |
| "epoch": 45.53623188405797, |
| "grad_norm": 0.27833595871925354, |
| "learning_rate": 1.2065742090362082e-05, |
| "loss": 0.0804, |
| "step": 15710 |
| }, |
| { |
| "epoch": 45.56521739130435, |
| "grad_norm": 0.3665226101875305, |
| "learning_rate": 1.2011935707386457e-05, |
| "loss": 0.0959, |
| "step": 15720 |
| }, |
| { |
| "epoch": 45.594202898550726, |
| "grad_norm": 0.3983865976333618, |
| "learning_rate": 1.1958233182328044e-05, |
| "loss": 0.0809, |
| "step": 15730 |
| }, |
| { |
| "epoch": 45.6231884057971, |
| "grad_norm": 0.3656999468803406, |
| "learning_rate": 1.1904634662007474e-05, |
| "loss": 0.0675, |
| "step": 15740 |
| }, |
| { |
| "epoch": 45.65217391304348, |
| "grad_norm": 0.47683125734329224, |
| "learning_rate": 1.1851140292961088e-05, |
| "loss": 0.0754, |
| "step": 15750 |
| }, |
| { |
| "epoch": 45.68115942028985, |
| "grad_norm": 0.5368967652320862, |
| "learning_rate": 1.1797750221440424e-05, |
| "loss": 0.0786, |
| "step": 15760 |
| }, |
| { |
| "epoch": 45.710144927536234, |
| "grad_norm": 0.37085482478141785, |
| "learning_rate": 1.1744464593411897e-05, |
| "loss": 0.0735, |
| "step": 15770 |
| }, |
| { |
| "epoch": 45.73913043478261, |
| "grad_norm": 0.3648932874202728, |
| "learning_rate": 1.1691283554556399e-05, |
| "loss": 0.0688, |
| "step": 15780 |
| }, |
| { |
| "epoch": 45.768115942028984, |
| "grad_norm": 0.25463685393333435, |
| "learning_rate": 1.1638207250268834e-05, |
| "loss": 0.0658, |
| "step": 15790 |
| }, |
| { |
| "epoch": 45.79710144927536, |
| "grad_norm": 0.2738022804260254, |
| "learning_rate": 1.158523582565782e-05, |
| "loss": 0.0851, |
| "step": 15800 |
| }, |
| { |
| "epoch": 45.82608695652174, |
| "grad_norm": 0.43908044695854187, |
| "learning_rate": 1.1532369425545192e-05, |
| "loss": 0.079, |
| "step": 15810 |
| }, |
| { |
| "epoch": 45.85507246376812, |
| "grad_norm": 0.424430251121521, |
| "learning_rate": 1.1479608194465662e-05, |
| "loss": 0.0783, |
| "step": 15820 |
| }, |
| { |
| "epoch": 45.88405797101449, |
| "grad_norm": 0.4064854681491852, |
| "learning_rate": 1.1426952276666442e-05, |
| "loss": 0.0687, |
| "step": 15830 |
| }, |
| { |
| "epoch": 45.91304347826087, |
| "grad_norm": 0.5900323987007141, |
| "learning_rate": 1.1374401816106778e-05, |
| "loss": 0.0811, |
| "step": 15840 |
| }, |
| { |
| "epoch": 45.94202898550725, |
| "grad_norm": 0.3530072867870331, |
| "learning_rate": 1.1321956956457646e-05, |
| "loss": 0.0707, |
| "step": 15850 |
| }, |
| { |
| "epoch": 45.971014492753625, |
| "grad_norm": 0.4914955794811249, |
| "learning_rate": 1.1269617841101277e-05, |
| "loss": 0.0663, |
| "step": 15860 |
| }, |
| { |
| "epoch": 46.0, |
| "grad_norm": 0.6903124451637268, |
| "learning_rate": 1.1217384613130804e-05, |
| "loss": 0.0757, |
| "step": 15870 |
| }, |
| { |
| "epoch": 46.028985507246375, |
| "grad_norm": 0.35140737891197205, |
| "learning_rate": 1.11652574153499e-05, |
| "loss": 0.0689, |
| "step": 15880 |
| }, |
| { |
| "epoch": 46.05797101449275, |
| "grad_norm": 0.45175376534461975, |
| "learning_rate": 1.1113236390272303e-05, |
| "loss": 0.0698, |
| "step": 15890 |
| }, |
| { |
| "epoch": 46.08695652173913, |
| "grad_norm": 0.5367652773857117, |
| "learning_rate": 1.106132168012155e-05, |
| "loss": 0.0757, |
| "step": 15900 |
| }, |
| { |
| "epoch": 46.11594202898551, |
| "grad_norm": 0.47009265422821045, |
| "learning_rate": 1.1009513426830448e-05, |
| "loss": 0.0658, |
| "step": 15910 |
| }, |
| { |
| "epoch": 46.14492753623188, |
| "grad_norm": 0.26874783635139465, |
| "learning_rate": 1.0957811772040777e-05, |
| "loss": 0.0735, |
| "step": 15920 |
| }, |
| { |
| "epoch": 46.17391304347826, |
| "grad_norm": 0.5538775324821472, |
| "learning_rate": 1.0906216857102913e-05, |
| "loss": 0.073, |
| "step": 15930 |
| }, |
| { |
| "epoch": 46.20289855072464, |
| "grad_norm": 0.33384883403778076, |
| "learning_rate": 1.0854728823075355e-05, |
| "loss": 0.0662, |
| "step": 15940 |
| }, |
| { |
| "epoch": 46.231884057971016, |
| "grad_norm": 0.35423901677131653, |
| "learning_rate": 1.0803347810724452e-05, |
| "loss": 0.0773, |
| "step": 15950 |
| }, |
| { |
| "epoch": 46.26086956521739, |
| "grad_norm": 0.3087175488471985, |
| "learning_rate": 1.0752073960523911e-05, |
| "loss": 0.0588, |
| "step": 15960 |
| }, |
| { |
| "epoch": 46.289855072463766, |
| "grad_norm": 0.22049643099308014, |
| "learning_rate": 1.070090741265447e-05, |
| "loss": 0.0737, |
| "step": 15970 |
| }, |
| { |
| "epoch": 46.31884057971015, |
| "grad_norm": 0.3322051763534546, |
| "learning_rate": 1.0649848307003547e-05, |
| "loss": 0.0654, |
| "step": 15980 |
| }, |
| { |
| "epoch": 46.34782608695652, |
| "grad_norm": 0.42505577206611633, |
| "learning_rate": 1.0598896783164757e-05, |
| "loss": 0.0815, |
| "step": 15990 |
| }, |
| { |
| "epoch": 46.3768115942029, |
| "grad_norm": 0.26743263006210327, |
| "learning_rate": 1.0548052980437645e-05, |
| "loss": 0.0557, |
| "step": 16000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 58, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 12, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|