{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999547121960056, "eval_steps": 500, "global_step": 5520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": 1.2849, "step": 1 }, { "epoch": 0.0, "learning_rate": 8.333333333333334e-05, "loss": 1.3474, "step": 5 }, { "epoch": 0.0, "learning_rate": 0.0001666666666666667, "loss": 1.3589, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.00019954719225730847, "loss": 1.3078, "step": 15 }, { "epoch": 0.0, "learning_rate": 0.00019679487013963564, "loss": 1.2688, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00019161084574320696, "loss": 1.3346, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.00018412535328311814, "loss": 1.2897, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.0001745264449675755, "loss": 1.2717, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.00016305526670845226, "loss": 1.2734, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00015000000000000001, "loss": 1.2862, "step": 45 }, { "epoch": 0.01, "learning_rate": 0.00013568862215918717, "loss": 1.2708, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.00012048066680651908, "loss": 1.2435, "step": 55 }, { "epoch": 0.01, "learning_rate": 0.00010475819158237425, "loss": 1.2824, "step": 60 }, { "epoch": 0.01, "learning_rate": 8.891618000989891e-05, "loss": 1.2778, "step": 65 }, { "epoch": 0.01, "learning_rate": 7.335261863099651e-05, "loss": 1.2511, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.845849869981137e-05, "loss": 1.2728, "step": 75 }, { "epoch": 0.01, "learning_rate": 4.4607993613388976e-05, "loss": 1.2866, "step": 80 }, { "epoch": 0.02, "learning_rate": 3.21490588442868e-05, "loss": 1.2377, "step": 85 }, { "epoch": 0.02, "learning_rate": 2.139469052572127e-05, "loss": 1.2673, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.2615062293021507e-05, "loss": 1.2778, "step": 95 }, { "epoch": 0.02, "learning_rate": 6.030737921409169e-06, "loss": 1.2666, "step": 100 }, { "epoch": 0.02, "learning_rate": 3.804347826086957e-05, "loss": 1.2379, "step": 105 }, { "epoch": 0.02, "learning_rate": 3.985507246376812e-05, "loss": 1.2613, "step": 110 }, { "epoch": 0.02, "learning_rate": 4.166666666666667e-05, "loss": 1.2182, "step": 115 }, { "epoch": 0.02, "learning_rate": 4.347826086956522e-05, "loss": 1.238, "step": 120 }, { "epoch": 0.02, "learning_rate": 4.528985507246377e-05, "loss": 1.2098, "step": 125 }, { "epoch": 0.02, "learning_rate": 4.710144927536232e-05, "loss": 1.2186, "step": 130 }, { "epoch": 0.02, "learning_rate": 4.891304347826087e-05, "loss": 1.2178, "step": 135 }, { "epoch": 0.03, "learning_rate": 5.072463768115943e-05, "loss": 1.2473, "step": 140 }, { "epoch": 0.03, "learning_rate": 5.2536231884057975e-05, "loss": 1.2573, "step": 145 }, { "epoch": 0.03, "learning_rate": 5.4347826086956524e-05, "loss": 1.2302, "step": 150 }, { "epoch": 0.03, "learning_rate": 5.615942028985508e-05, "loss": 1.2523, "step": 155 }, { "epoch": 0.03, "learning_rate": 5.797101449275363e-05, "loss": 1.3071, "step": 160 }, { "epoch": 0.03, "learning_rate": 5.9782608695652175e-05, "loss": 1.2152, "step": 165 }, { "epoch": 0.03, "learning_rate": 6.159420289855072e-05, "loss": 1.2359, "step": 170 }, { "epoch": 0.03, "learning_rate": 6.340579710144928e-05, "loss": 1.2098, "step": 175 }, { "epoch": 0.03, "learning_rate": 6.521739130434783e-05, "loss": 1.2444, "step": 180 }, { "epoch": 0.03, "learning_rate": 6.702898550724638e-05, "loss": 1.2492, "step": 185 }, { "epoch": 0.03, "learning_rate": 6.884057971014493e-05, "loss": 1.1941, "step": 190 }, { "epoch": 0.04, "learning_rate": 7.065217391304349e-05, "loss": 1.2096, "step": 195 }, { "epoch": 0.04, "learning_rate": 7.246376811594203e-05, "loss": 1.2363, "step": 200 }, { "epoch": 0.04, "learning_rate": 7.427536231884058e-05, "loss": 1.2613, "step": 205 }, { "epoch": 0.04, "learning_rate": 7.608695652173914e-05, "loss": 1.2459, "step": 210 }, { "epoch": 0.04, "learning_rate": 7.789855072463769e-05, "loss": 1.2951, "step": 215 }, { "epoch": 0.04, "learning_rate": 7.971014492753623e-05, "loss": 1.2114, "step": 220 }, { "epoch": 0.04, "learning_rate": 8.152173913043478e-05, "loss": 1.2315, "step": 225 }, { "epoch": 0.04, "learning_rate": 8.333333333333334e-05, "loss": 1.2311, "step": 230 }, { "epoch": 0.04, "learning_rate": 8.514492753623189e-05, "loss": 1.2155, "step": 235 }, { "epoch": 0.04, "learning_rate": 8.695652173913044e-05, "loss": 1.2456, "step": 240 }, { "epoch": 0.04, "learning_rate": 8.876811594202898e-05, "loss": 1.2576, "step": 245 }, { "epoch": 0.05, "learning_rate": 9.057971014492754e-05, "loss": 1.1968, "step": 250 }, { "epoch": 0.05, "learning_rate": 9.239130434782609e-05, "loss": 1.361, "step": 255 }, { "epoch": 0.05, "learning_rate": 9.420289855072463e-05, "loss": 1.2729, "step": 260 }, { "epoch": 0.05, "learning_rate": 9.60144927536232e-05, "loss": 1.2613, "step": 265 }, { "epoch": 0.05, "learning_rate": 9.782608695652174e-05, "loss": 1.2513, "step": 270 }, { "epoch": 0.05, "learning_rate": 9.96376811594203e-05, "loss": 1.2838, "step": 275 }, { "epoch": 0.05, "learning_rate": 0.00010144927536231885, "loss": 1.216, "step": 280 }, { "epoch": 0.05, "learning_rate": 0.00010326086956521738, "loss": 1.2206, "step": 285 }, { "epoch": 0.05, "learning_rate": 0.00010507246376811595, "loss": 1.2322, "step": 290 }, { "epoch": 0.05, "learning_rate": 0.0001068840579710145, "loss": 1.2467, "step": 295 }, { "epoch": 0.05, "learning_rate": 0.00010869565217391305, "loss": 1.2177, "step": 300 }, { "epoch": 0.06, "learning_rate": 0.0001105072463768116, "loss": 1.2339, "step": 305 }, { "epoch": 0.06, "learning_rate": 0.00011231884057971016, "loss": 1.2207, "step": 310 }, { "epoch": 0.06, "learning_rate": 0.0001141304347826087, "loss": 1.2125, "step": 315 }, { "epoch": 0.06, "learning_rate": 0.00011594202898550725, "loss": 1.2331, "step": 320 }, { "epoch": 0.06, "learning_rate": 0.0001177536231884058, "loss": 1.2279, "step": 325 }, { "epoch": 0.06, "learning_rate": 0.00011956521739130435, "loss": 1.2772, "step": 330 }, { "epoch": 0.06, "learning_rate": 0.0001213768115942029, "loss": 1.2357, "step": 335 }, { "epoch": 0.06, "learning_rate": 0.00012318840579710145, "loss": 1.2611, "step": 340 }, { "epoch": 0.06, "learning_rate": 0.000125, "loss": 1.2308, "step": 345 }, { "epoch": 0.06, "learning_rate": 0.00012681159420289856, "loss": 1.2168, "step": 350 }, { "epoch": 0.06, "learning_rate": 0.0001286231884057971, "loss": 1.1828, "step": 355 }, { "epoch": 0.07, "learning_rate": 0.00013043478260869567, "loss": 1.2666, "step": 360 }, { "epoch": 0.07, "learning_rate": 0.00013224637681159422, "loss": 1.2298, "step": 365 }, { "epoch": 0.07, "learning_rate": 0.00013405797101449275, "loss": 1.2249, "step": 370 }, { "epoch": 0.07, "learning_rate": 0.0001358695652173913, "loss": 1.2445, "step": 375 }, { "epoch": 0.07, "learning_rate": 0.00013768115942028986, "loss": 1.2371, "step": 380 }, { "epoch": 0.07, "learning_rate": 0.00013949275362318842, "loss": 1.2005, "step": 385 }, { "epoch": 0.07, "learning_rate": 0.00014130434782608697, "loss": 1.2625, "step": 390 }, { "epoch": 0.07, "learning_rate": 0.0001431159420289855, "loss": 1.226, "step": 395 }, { "epoch": 0.07, "learning_rate": 0.00014492753623188405, "loss": 1.2484, "step": 400 }, { "epoch": 0.07, "learning_rate": 0.00014673913043478264, "loss": 1.2482, "step": 405 }, { "epoch": 0.07, "learning_rate": 0.00014855072463768116, "loss": 1.209, "step": 410 }, { "epoch": 0.08, "learning_rate": 0.00015036231884057972, "loss": 1.2102, "step": 415 }, { "epoch": 0.08, "learning_rate": 0.00015217391304347827, "loss": 1.2347, "step": 420 }, { "epoch": 0.08, "learning_rate": 0.0001539855072463768, "loss": 1.2134, "step": 425 }, { "epoch": 0.08, "learning_rate": 0.00015579710144927538, "loss": 1.2436, "step": 430 }, { "epoch": 0.08, "learning_rate": 0.0001576086956521739, "loss": 1.2616, "step": 435 }, { "epoch": 0.08, "learning_rate": 0.00015942028985507247, "loss": 1.2392, "step": 440 }, { "epoch": 0.08, "learning_rate": 0.00016123188405797102, "loss": 1.2507, "step": 445 }, { "epoch": 0.08, "learning_rate": 0.00016304347826086955, "loss": 1.2315, "step": 450 }, { "epoch": 0.08, "learning_rate": 0.00016485507246376813, "loss": 1.2151, "step": 455 }, { "epoch": 0.08, "learning_rate": 0.0001666666666666667, "loss": 1.168, "step": 460 }, { "epoch": 0.08, "learning_rate": 0.00016847826086956522, "loss": 1.2123, "step": 465 }, { "epoch": 0.09, "learning_rate": 0.00017028985507246377, "loss": 1.2129, "step": 470 }, { "epoch": 0.09, "learning_rate": 0.00017210144927536233, "loss": 1.201, "step": 475 }, { "epoch": 0.09, "learning_rate": 0.00017391304347826088, "loss": 1.1735, "step": 480 }, { "epoch": 0.09, "learning_rate": 0.00017572463768115944, "loss": 1.2197, "step": 485 }, { "epoch": 0.09, "learning_rate": 0.00017753623188405796, "loss": 1.2151, "step": 490 }, { "epoch": 0.09, "learning_rate": 0.00017934782608695652, "loss": 1.2171, "step": 495 }, { "epoch": 0.09, "learning_rate": 0.00018115942028985507, "loss": 1.2377, "step": 500 }, { "epoch": 0.09, "learning_rate": 0.00018297101449275363, "loss": 1.2137, "step": 505 }, { "epoch": 0.09, "learning_rate": 0.00018478260869565218, "loss": 1.2363, "step": 510 }, { "epoch": 0.09, "learning_rate": 0.00018659420289855074, "loss": 1.2076, "step": 515 }, { "epoch": 0.09, "learning_rate": 0.00018840579710144927, "loss": 1.2246, "step": 520 }, { "epoch": 0.1, "learning_rate": 0.00019021739130434782, "loss": 1.2535, "step": 525 }, { "epoch": 0.1, "learning_rate": 0.0001920289855072464, "loss": 1.2652, "step": 530 }, { "epoch": 0.1, "learning_rate": 0.00019384057971014493, "loss": 1.2043, "step": 535 }, { "epoch": 0.1, "learning_rate": 0.0001956521739130435, "loss": 1.2072, "step": 540 }, { "epoch": 0.1, "learning_rate": 0.00019746376811594204, "loss": 1.2167, "step": 545 }, { "epoch": 0.1, "learning_rate": 0.0001992753623188406, "loss": 1.2739, "step": 550 }, { "epoch": 0.1, "learning_rate": 0.00019999982005120014, "loss": 1.2288, "step": 555 }, { "epoch": 0.1, "learning_rate": 0.00019999872036643513, "loss": 1.2974, "step": 560 }, { "epoch": 0.1, "learning_rate": 0.00019999662097944096, "loss": 1.2527, "step": 565 }, { "epoch": 0.1, "learning_rate": 0.00019999352191120556, "loss": 1.2544, "step": 570 }, { "epoch": 0.1, "learning_rate": 0.00019998942319271077, "loss": 1.2211, "step": 575 }, { "epoch": 0.11, "learning_rate": 0.00019998432486493205, "loss": 1.2673, "step": 580 }, { "epoch": 0.11, "learning_rate": 0.00019997822697883822, "loss": 1.2358, "step": 585 }, { "epoch": 0.11, "learning_rate": 0.0001999711295953907, "loss": 1.1941, "step": 590 }, { "epoch": 0.11, "learning_rate": 0.0001999630327855431, "loss": 1.2176, "step": 595 }, { "epoch": 0.11, "learning_rate": 0.00019995393663024054, "loss": 1.2103, "step": 600 }, { "epoch": 0.11, "learning_rate": 0.00019994384122041853, "loss": 1.186, "step": 605 }, { "epoch": 0.11, "learning_rate": 0.00019993274665700244, "loss": 1.2305, "step": 610 }, { "epoch": 0.11, "learning_rate": 0.0001999206530509063, "loss": 1.2734, "step": 615 }, { "epoch": 0.11, "learning_rate": 0.00019990756052303173, "loss": 1.2792, "step": 620 }, { "epoch": 0.11, "learning_rate": 0.00019989346920426667, "loss": 1.2577, "step": 625 }, { "epoch": 0.11, "learning_rate": 0.0001998783792354841, "loss": 1.2192, "step": 630 }, { "epoch": 0.12, "learning_rate": 0.0001998622907675408, "loss": 1.2149, "step": 635 }, { "epoch": 0.12, "learning_rate": 0.00019984520396127553, "loss": 1.2541, "step": 640 }, { "epoch": 0.12, "learning_rate": 0.0001998271189875077, "loss": 1.1841, "step": 645 }, { "epoch": 0.12, "learning_rate": 0.0001998080360270355, "loss": 1.2418, "step": 650 }, { "epoch": 0.12, "learning_rate": 0.0001997879552706341, "loss": 1.1846, "step": 655 }, { "epoch": 0.12, "learning_rate": 0.00019976687691905393, "loss": 1.2417, "step": 660 }, { "epoch": 0.12, "learning_rate": 0.00019974480118301838, "loss": 1.2281, "step": 665 }, { "epoch": 0.12, "learning_rate": 0.0001997217282832219, "loss": 1.2, "step": 670 }, { "epoch": 0.12, "learning_rate": 0.00019969765845032775, "loss": 1.2256, "step": 675 }, { "epoch": 0.12, "learning_rate": 0.0001996725919249657, "loss": 1.2207, "step": 680 }, { "epoch": 0.12, "learning_rate": 0.00019964652895772947, "loss": 1.2042, "step": 685 }, { "epoch": 0.12, "learning_rate": 0.00019961946980917456, "loss": 1.2317, "step": 690 }, { "epoch": 0.13, "learning_rate": 0.0001995914147498153, "loss": 1.2431, "step": 695 }, { "epoch": 0.13, "learning_rate": 0.00019956236406012232, "loss": 1.2345, "step": 700 }, { "epoch": 0.13, "learning_rate": 0.00019953231803051974, "loss": 1.2213, "step": 705 }, { "epoch": 0.13, "learning_rate": 0.00019950127696138225, "loss": 1.2243, "step": 710 }, { "epoch": 0.13, "learning_rate": 0.00019946924116303206, "loss": 1.2061, "step": 715 }, { "epoch": 0.13, "learning_rate": 0.00019943621095573586, "loss": 1.2563, "step": 720 }, { "epoch": 0.13, "learning_rate": 0.00019940218666970161, "loss": 1.2556, "step": 725 }, { "epoch": 0.13, "learning_rate": 0.0001993671686450752, "loss": 1.353, "step": 730 }, { "epoch": 0.13, "learning_rate": 0.00019933115723193707, "loss": 1.4143, "step": 735 }, { "epoch": 0.13, "learning_rate": 0.00019929415279029873, "loss": 1.2836, "step": 740 }, { "epoch": 0.13, "learning_rate": 0.00019925615569009916, "loss": 1.3538, "step": 745 }, { "epoch": 0.14, "learning_rate": 0.00019921716631120107, "loss": 1.3211, "step": 750 }, { "epoch": 0.14, "learning_rate": 0.00019917718504338714, "loss": 1.2648, "step": 755 }, { "epoch": 0.14, "learning_rate": 0.0001991362122863561, "loss": 1.2779, "step": 760 }, { "epoch": 0.14, "learning_rate": 0.00019909424844971873, "loss": 1.2609, "step": 765 }, { "epoch": 0.14, "learning_rate": 0.0001990512939529939, "loss": 1.2179, "step": 770 }, { "epoch": 0.14, "learning_rate": 0.00019900734922560407, "loss": 1.2384, "step": 775 }, { "epoch": 0.14, "learning_rate": 0.0001989624147068713, "loss": 1.2196, "step": 780 }, { "epoch": 0.14, "learning_rate": 0.00019891649084601278, "loss": 1.2582, "step": 785 }, { "epoch": 0.14, "learning_rate": 0.00019886957810213619, "loss": 1.2313, "step": 790 }, { "epoch": 0.14, "learning_rate": 0.0001988216769442353, "loss": 1.2451, "step": 795 }, { "epoch": 0.14, "learning_rate": 0.00019877278785118517, "loss": 1.2274, "step": 800 }, { "epoch": 0.15, "learning_rate": 0.00019872291131173742, "loss": 1.2455, "step": 805 }, { "epoch": 0.15, "learning_rate": 0.0001986720478245153, "loss": 1.215, "step": 810 }, { "epoch": 0.15, "learning_rate": 0.0001986201978980087, "loss": 1.2123, "step": 815 }, { "epoch": 0.15, "learning_rate": 0.0001985673620505692, "loss": 1.2027, "step": 820 }, { "epoch": 0.15, "learning_rate": 0.00019851354081040467, "loss": 1.2324, "step": 825 }, { "epoch": 0.15, "learning_rate": 0.0001984587347155741, "loss": 1.245, "step": 830 }, { "epoch": 0.15, "learning_rate": 0.00019840294431398226, "loss": 1.2135, "step": 835 }, { "epoch": 0.15, "learning_rate": 0.0001983461701633742, "loss": 1.1822, "step": 840 }, { "epoch": 0.15, "learning_rate": 0.00019828841283132964, "loss": 1.2285, "step": 845 }, { "epoch": 0.15, "learning_rate": 0.0001982296728952573, "loss": 1.237, "step": 850 }, { "epoch": 0.15, "learning_rate": 0.00019816995094238912, "loss": 1.2098, "step": 855 }, { "epoch": 0.16, "learning_rate": 0.00019810924756977444, "loss": 1.1901, "step": 860 }, { "epoch": 0.16, "learning_rate": 0.000198047563384274, "loss": 1.2362, "step": 865 }, { "epoch": 0.16, "learning_rate": 0.00019798489900255389, "loss": 1.2439, "step": 870 }, { "epoch": 0.16, "learning_rate": 0.00019792125505107931, "loss": 1.2188, "step": 875 }, { "epoch": 0.16, "learning_rate": 0.00019785663216610844, "loss": 1.2794, "step": 880 }, { "epoch": 0.16, "learning_rate": 0.00019779103099368595, "loss": 1.2214, "step": 885 }, { "epoch": 0.16, "learning_rate": 0.0001977244521896366, "loss": 1.2287, "step": 890 }, { "epoch": 0.16, "learning_rate": 0.0001976568964195587, "loss": 1.2682, "step": 895 }, { "epoch": 0.16, "learning_rate": 0.00019758836435881746, "loss": 1.2253, "step": 900 }, { "epoch": 0.16, "learning_rate": 0.00019751885669253816, "loss": 1.2026, "step": 905 }, { "epoch": 0.16, "learning_rate": 0.00019744837411559942, "loss": 1.2285, "step": 910 }, { "epoch": 0.17, "learning_rate": 0.0001973769173326261, "loss": 1.2431, "step": 915 }, { "epoch": 0.17, "learning_rate": 0.00019730448705798239, "loss": 1.2365, "step": 920 }, { "epoch": 0.17, "learning_rate": 0.00019723108401576466, "loss": 1.2019, "step": 925 }, { "epoch": 0.17, "learning_rate": 0.00019715670893979414, "loss": 1.2133, "step": 930 }, { "epoch": 0.17, "learning_rate": 0.00019708136257360966, "loss": 1.2687, "step": 935 }, { "epoch": 0.17, "learning_rate": 0.00019700504567046013, "loss": 1.2272, "step": 940 }, { "epoch": 0.17, "learning_rate": 0.00019692775899329707, "loss": 1.2396, "step": 945 }, { "epoch": 0.17, "learning_rate": 0.00019684950331476706, "loss": 1.2365, "step": 950 }, { "epoch": 0.17, "learning_rate": 0.00019677027941720384, "loss": 1.1987, "step": 955 }, { "epoch": 0.17, "learning_rate": 0.00019669008809262062, "loss": 1.2087, "step": 960 }, { "epoch": 0.17, "learning_rate": 0.00019660893014270212, "loss": 1.2339, "step": 965 }, { "epoch": 0.18, "learning_rate": 0.00019652680637879654, "loss": 1.2094, "step": 970 }, { "epoch": 0.18, "learning_rate": 0.0001964437176219075, "loss": 1.2445, "step": 975 }, { "epoch": 0.18, "learning_rate": 0.00019635966470268583, "loss": 1.2229, "step": 980 }, { "epoch": 0.18, "learning_rate": 0.00019627464846142111, "loss": 1.194, "step": 985 }, { "epoch": 0.18, "learning_rate": 0.0001961886697480335, "loss": 1.2036, "step": 990 }, { "epoch": 0.18, "learning_rate": 0.00019610172942206516, "loss": 1.2269, "step": 995 }, { "epoch": 0.18, "learning_rate": 0.0001960138283526715, "loss": 1.2169, "step": 1000 }, { "epoch": 0.18, "learning_rate": 0.00019592496741861282, "loss": 1.2136, "step": 1005 }, { "epoch": 0.18, "learning_rate": 0.00019583514750824512, "loss": 1.2027, "step": 1010 }, { "epoch": 0.18, "learning_rate": 0.00019574436951951162, "loss": 1.2836, "step": 1015 }, { "epoch": 0.18, "learning_rate": 0.0001956526343599335, "loss": 1.1955, "step": 1020 }, { "epoch": 0.19, "learning_rate": 0.00019555994294660086, "loss": 1.2464, "step": 1025 }, { "epoch": 0.19, "learning_rate": 0.00019546629620616375, "loss": 1.2026, "step": 1030 }, { "epoch": 0.19, "learning_rate": 0.0001953716950748227, "loss": 1.2482, "step": 1035 }, { "epoch": 0.19, "learning_rate": 0.0001952761404983194, "loss": 1.2133, "step": 1040 }, { "epoch": 0.19, "learning_rate": 0.00019517963343192732, "loss": 1.2042, "step": 1045 }, { "epoch": 0.19, "learning_rate": 0.0001950821748404421, "loss": 1.2268, "step": 1050 }, { "epoch": 0.19, "learning_rate": 0.00019498376569817194, "loss": 1.2147, "step": 1055 }, { "epoch": 0.19, "learning_rate": 0.0001948844069889278, "loss": 1.2038, "step": 1060 }, { "epoch": 0.19, "learning_rate": 0.00019478409970601358, "loss": 1.2155, "step": 1065 }, { "epoch": 0.19, "learning_rate": 0.0001946828448522163, "loss": 1.2446, "step": 1070 }, { "epoch": 0.19, "learning_rate": 0.00019458064343979596, "loss": 1.3413, "step": 1075 }, { "epoch": 0.2, "learning_rate": 0.00019447749649047542, "loss": 3.8626, "step": 1080 }, { "epoch": 0.2, "learning_rate": 0.0001943734050354302, "loss": 6.3609, "step": 1085 }, { "epoch": 0.2, "learning_rate": 0.00019426837011527823, "loss": 5.2091, "step": 1090 }, { "epoch": 0.2, "learning_rate": 0.0001941623927800694, "loss": 1.9048, "step": 1095 }, { "epoch": 0.2, "learning_rate": 0.00019405547408927502, "loss": 1.3353, "step": 1100 }, { "epoch": 0.2, "learning_rate": 0.00019394761511177733, "loss": 1.586, "step": 1105 }, { "epoch": 0.2, "learning_rate": 0.0001938388169258587, "loss": 1.6837, "step": 1110 }, { "epoch": 0.2, "learning_rate": 0.00019372908061919097, "loss": 1.4677, "step": 1115 }, { "epoch": 0.2, "learning_rate": 0.00019361840728882447, "loss": 1.4636, "step": 1120 }, { "epoch": 0.2, "learning_rate": 0.00019350679804117711, "loss": 1.8272, "step": 1125 }, { "epoch": 0.2, "learning_rate": 0.00019339425399202327, "loss": 2.1575, "step": 1130 }, { "epoch": 0.21, "learning_rate": 0.00019328077626648278, "loss": 1.8073, "step": 1135 }, { "epoch": 0.21, "learning_rate": 0.00019316636599900946, "loss": 1.5317, "step": 1140 }, { "epoch": 0.21, "learning_rate": 0.00019305102433337998, "loss": 1.4068, "step": 1145 }, { "epoch": 0.21, "learning_rate": 0.00019293475242268223, "loss": 1.9292, "step": 1150 }, { "epoch": 0.21, "learning_rate": 0.00019281755142930407, "loss": 2.3396, "step": 1155 }, { "epoch": 0.21, "learning_rate": 0.00019269942252492133, "loss": 1.7311, "step": 1160 }, { "epoch": 0.21, "learning_rate": 0.0001925803668904865, "loss": 1.5029, "step": 1165 }, { "epoch": 0.21, "learning_rate": 0.00019246038571621657, "loss": 1.5402, "step": 1170 }, { "epoch": 0.21, "learning_rate": 0.0001923394802015814, "loss": 1.518, "step": 1175 }, { "epoch": 0.21, "learning_rate": 0.00019221765155529158, "loss": 1.4632, "step": 1180 }, { "epoch": 0.21, "learning_rate": 0.00019209490099528643, "loss": 1.3406, "step": 1185 }, { "epoch": 0.22, "learning_rate": 0.00019197122974872163, "loss": 1.3142, "step": 1190 }, { "epoch": 0.22, "learning_rate": 0.0001918466390519573, "loss": 1.3173, "step": 1195 }, { "epoch": 0.22, "learning_rate": 0.00019172113015054532, "loss": 1.2899, "step": 1200 }, { "epoch": 0.22, "learning_rate": 0.00019159470429921702, "loss": 1.2821, "step": 1205 }, { "epoch": 0.22, "learning_rate": 0.00019146736276187066, "loss": 1.3015, "step": 1210 }, { "epoch": 0.22, "learning_rate": 0.00019133910681155868, "loss": 1.2785, "step": 1215 }, { "epoch": 0.22, "learning_rate": 0.00019120993773047513, "loss": 1.2912, "step": 1220 }, { "epoch": 0.22, "learning_rate": 0.00019107985680994266, "loss": 1.2846, "step": 1225 }, { "epoch": 0.22, "learning_rate": 0.00019094886535039982, "loss": 1.2638, "step": 1230 }, { "epoch": 0.22, "learning_rate": 0.0001908169646613879, "loss": 1.2445, "step": 1235 }, { "epoch": 0.22, "learning_rate": 0.00019068415606153787, "loss": 1.262, "step": 1240 }, { "epoch": 0.23, "learning_rate": 0.00019055044087855726, "loss": 1.2625, "step": 1245 }, { "epoch": 0.23, "learning_rate": 0.00019041582044921688, "loss": 1.2291, "step": 1250 }, { "epoch": 0.23, "learning_rate": 0.00019028029611933739, "loss": 1.2873, "step": 1255 }, { "epoch": 0.23, "learning_rate": 0.00019014386924377582, "loss": 1.2883, "step": 1260 }, { "epoch": 0.23, "learning_rate": 0.00019000654118641211, "loss": 1.3117, "step": 1265 }, { "epoch": 0.23, "learning_rate": 0.0001898683133201356, "loss": 1.2681, "step": 1270 }, { "epoch": 0.23, "learning_rate": 0.00018972918702683092, "loss": 1.238, "step": 1275 }, { "epoch": 0.23, "learning_rate": 0.0001895891636973646, "loss": 1.2715, "step": 1280 }, { "epoch": 0.23, "learning_rate": 0.00018944824473157086, "loss": 1.2795, "step": 1285 }, { "epoch": 0.23, "learning_rate": 0.00018930643153823777, "loss": 1.255, "step": 1290 }, { "epoch": 0.23, "learning_rate": 0.00018916372553509314, "loss": 1.2555, "step": 1295 }, { "epoch": 0.24, "learning_rate": 0.0001890201281487903, "loss": 1.2326, "step": 1300 }, { "epoch": 0.24, "learning_rate": 0.00018887564081489393, "loss": 1.2387, "step": 1305 }, { "epoch": 0.24, "learning_rate": 0.0001887302649778656, "loss": 1.2303, "step": 1310 }, { "epoch": 0.24, "learning_rate": 0.00018858400209104933, "loss": 1.2334, "step": 1315 }, { "epoch": 0.24, "learning_rate": 0.00018843685361665723, "loss": 1.241, "step": 1320 }, { "epoch": 0.24, "learning_rate": 0.00018828882102575473, "loss": 1.2329, "step": 1325 }, { "epoch": 0.24, "learning_rate": 0.0001881399057982458, "loss": 1.244, "step": 1330 }, { "epoch": 0.24, "learning_rate": 0.0001879901094228584, "loss": 1.1944, "step": 1335 }, { "epoch": 0.24, "learning_rate": 0.00018783943339712938, "loss": 1.2184, "step": 1340 }, { "epoch": 0.24, "learning_rate": 0.0001876878792273896, "loss": 1.2478, "step": 1345 }, { "epoch": 0.24, "learning_rate": 0.00018753544842874887, "loss": 1.2352, "step": 1350 }, { "epoch": 0.25, "learning_rate": 0.00018738214252508073, "loss": 1.2322, "step": 1355 }, { "epoch": 0.25, "learning_rate": 0.0001872279630490074, "loss": 1.2599, "step": 1360 }, { "epoch": 0.25, "learning_rate": 0.00018707291154188425, "loss": 1.2495, "step": 1365 }, { "epoch": 0.25, "learning_rate": 0.00018691698955378445, "loss": 1.2369, "step": 1370 }, { "epoch": 0.25, "learning_rate": 0.0001867601986434836, "loss": 1.2732, "step": 1375 }, { "epoch": 0.25, "learning_rate": 0.00018660254037844388, "loss": 1.3559, "step": 1380 }, { "epoch": 0.25, "learning_rate": 0.00018644401633479874, "loss": 1.9588, "step": 1385 }, { "epoch": 0.25, "learning_rate": 0.00018628462809733683, "loss": 2.5337, "step": 1390 }, { "epoch": 0.25, "learning_rate": 0.00018612437725948631, "loss": 3.5829, "step": 1395 }, { "epoch": 0.25, "learning_rate": 0.00018596326542329888, "loss": 2.7139, "step": 1400 }, { "epoch": 0.25, "learning_rate": 0.00018580129419943373, "loss": 1.935, "step": 1405 }, { "epoch": 0.26, "learning_rate": 0.00018563846520714154, "loss": 1.5826, "step": 1410 }, { "epoch": 0.26, "learning_rate": 0.00018547478007424823, "loss": 1.4232, "step": 1415 }, { "epoch": 0.26, "learning_rate": 0.00018531024043713868, "loss": 1.4101, "step": 1420 }, { "epoch": 0.26, "learning_rate": 0.00018514484794074026, "loss": 1.4018, "step": 1425 }, { "epoch": 0.26, "learning_rate": 0.0001849786042385067, "loss": 1.3299, "step": 1430 }, { "epoch": 0.26, "learning_rate": 0.00018481151099240123, "loss": 1.306, "step": 1435 }, { "epoch": 0.26, "learning_rate": 0.00018464356987288013, "loss": 1.3258, "step": 1440 }, { "epoch": 0.26, "learning_rate": 0.00018447478255887595, "loss": 1.3129, "step": 1445 }, { "epoch": 0.26, "learning_rate": 0.00018430515073778093, "loss": 1.3296, "step": 1450 }, { "epoch": 0.26, "learning_rate": 0.0001841346761054298, "loss": 1.2667, "step": 1455 }, { "epoch": 0.26, "learning_rate": 0.00018396336036608307, "loss": 1.2641, "step": 1460 }, { "epoch": 0.27, "learning_rate": 0.0001837912052324099, "loss": 1.2361, "step": 1465 }, { "epoch": 0.27, "learning_rate": 0.0001836182124254711, "loss": 1.3251, "step": 1470 }, { "epoch": 0.27, "learning_rate": 0.00018344438367470168, "loss": 1.3017, "step": 1475 }, { "epoch": 0.27, "learning_rate": 0.0001832697207178938, "loss": 1.2847, "step": 1480 }, { "epoch": 0.27, "learning_rate": 0.00018309422530117924, "loss": 1.3278, "step": 1485 }, { "epoch": 0.27, "learning_rate": 0.0001829178991790121, "loss": 1.2803, "step": 1490 }, { "epoch": 0.27, "learning_rate": 0.00018274074411415105, "loss": 1.3346, "step": 1495 }, { "epoch": 0.27, "learning_rate": 0.00018256276187764197, "loss": 1.2782, "step": 1500 }, { "epoch": 0.27, "learning_rate": 0.00018238395424879992, "loss": 1.3485, "step": 1505 }, { "epoch": 0.27, "learning_rate": 0.00018220432301519168, "loss": 1.3187, "step": 1510 }, { "epoch": 0.27, "learning_rate": 0.0001820238699726177, "loss": 1.2878, "step": 1515 }, { "epoch": 0.28, "learning_rate": 0.00018184259692509406, "loss": 1.3199, "step": 1520 }, { "epoch": 0.28, "learning_rate": 0.00018166050568483474, "loss": 1.2897, "step": 1525 }, { "epoch": 0.28, "learning_rate": 0.0001814775980722332, "loss": 1.3277, "step": 1530 }, { "epoch": 0.28, "learning_rate": 0.0001812938759158443, "loss": 1.3135, "step": 1535 }, { "epoch": 0.28, "learning_rate": 0.000181109341052366, "loss": 1.3059, "step": 1540 }, { "epoch": 0.28, "learning_rate": 0.00018092399532662113, "loss": 1.342, "step": 1545 }, { "epoch": 0.28, "learning_rate": 0.00018073784059153872, "loss": 1.3264, "step": 1550 }, { "epoch": 0.28, "learning_rate": 0.00018055087870813558, "loss": 1.3069, "step": 1555 }, { "epoch": 0.28, "learning_rate": 0.00018036311154549784, "loss": 1.3145, "step": 1560 }, { "epoch": 0.28, "learning_rate": 0.00018017454098076194, "loss": 1.313, "step": 1565 }, { "epoch": 0.28, "learning_rate": 0.00017998516889909614, "loss": 1.2827, "step": 1570 }, { "epoch": 0.29, "learning_rate": 0.00017979499719368168, "loss": 1.2697, "step": 1575 }, { "epoch": 0.29, "learning_rate": 0.0001796040277656936, "loss": 1.3068, "step": 1580 }, { "epoch": 0.29, "learning_rate": 0.0001794122625242819, "loss": 1.2713, "step": 1585 }, { "epoch": 0.29, "learning_rate": 0.00017921970338655266, "loss": 1.2812, "step": 1590 }, { "epoch": 0.29, "learning_rate": 0.00017902635227754838, "loss": 1.2795, "step": 1595 }, { "epoch": 0.29, "learning_rate": 0.00017883221113022916, "loss": 1.2631, "step": 1600 }, { "epoch": 0.29, "learning_rate": 0.00017863728188545326, "loss": 1.2359, "step": 1605 }, { "epoch": 0.29, "learning_rate": 0.00017844156649195759, "loss": 1.272, "step": 1610 }, { "epoch": 0.29, "learning_rate": 0.00017824506690633832, "loss": 1.2845, "step": 1615 }, { "epoch": 0.29, "learning_rate": 0.00017804778509303138, "loss": 1.2616, "step": 1620 }, { "epoch": 0.29, "learning_rate": 0.00017784972302429264, "loss": 1.2332, "step": 1625 }, { "epoch": 0.3, "learning_rate": 0.0001776508826801784, "loss": 1.2521, "step": 1630 }, { "epoch": 0.3, "learning_rate": 0.00017745126604852538, "loss": 1.2938, "step": 1635 }, { "epoch": 0.3, "learning_rate": 0.0001772508751249311, "loss": 1.2933, "step": 1640 }, { "epoch": 0.3, "learning_rate": 0.00017704971191273368, "loss": 1.2702, "step": 1645 }, { "epoch": 0.3, "learning_rate": 0.00017684777842299205, "loss": 1.2851, "step": 1650 }, { "epoch": 0.3, "learning_rate": 0.00017664507667446554, "loss": 1.2907, "step": 1655 }, { "epoch": 0.3, "learning_rate": 0.00017644160869359404, "loss": 1.2637, "step": 1660 }, { "epoch": 0.3, "learning_rate": 0.0001762373765144775, "loss": 1.2571, "step": 1665 }, { "epoch": 0.3, "learning_rate": 0.00017603238217885569, "loss": 1.2194, "step": 1670 }, { "epoch": 0.3, "learning_rate": 0.00017582662773608777, "loss": 1.2755, "step": 1675 }, { "epoch": 0.3, "learning_rate": 0.00017562011524313185, "loss": 1.2936, "step": 1680 }, { "epoch": 0.31, "learning_rate": 0.0001754128467645243, "loss": 1.2564, "step": 1685 }, { "epoch": 0.31, "learning_rate": 0.0001752048243723593, "loss": 1.2177, "step": 1690 }, { "epoch": 0.31, "learning_rate": 0.00017499605014626788, "loss": 1.3018, "step": 1695 }, { "epoch": 0.31, "learning_rate": 0.00017478652617339738, "loss": 1.217, "step": 1700 }, { "epoch": 0.31, "learning_rate": 0.00017457625454839039, "loss": 1.2899, "step": 1705 }, { "epoch": 0.31, "learning_rate": 0.00017436523737336402, "loss": 1.2608, "step": 1710 }, { "epoch": 0.31, "learning_rate": 0.00017415347675788856, "loss": 1.2849, "step": 1715 }, { "epoch": 0.31, "learning_rate": 0.00017394097481896676, "loss": 1.3029, "step": 1720 }, { "epoch": 0.31, "learning_rate": 0.0001737277336810124, "loss": 1.2907, "step": 1725 }, { "epoch": 0.31, "learning_rate": 0.0001735137554758292, "loss": 1.308, "step": 1730 }, { "epoch": 0.31, "learning_rate": 0.0001732990423425894, "loss": 1.3432, "step": 1735 }, { "epoch": 0.32, "learning_rate": 0.00017308359642781242, "loss": 1.3239, "step": 1740 }, { "epoch": 0.32, "learning_rate": 0.00017286741988534348, "loss": 1.2793, "step": 1745 }, { "epoch": 0.32, "learning_rate": 0.0001726505148763319, "loss": 1.342, "step": 1750 }, { "epoch": 0.32, "learning_rate": 0.0001724328835692097, "loss": 1.2451, "step": 1755 }, { "epoch": 0.32, "learning_rate": 0.0001722145281396697, "loss": 1.2804, "step": 1760 }, { "epoch": 0.32, "learning_rate": 0.00017199545077064394, "loss": 1.3129, "step": 1765 }, { "epoch": 0.32, "learning_rate": 0.00017177565365228178, "loss": 1.2582, "step": 1770 }, { "epoch": 0.32, "learning_rate": 0.00017155513898192806, "loss": 1.2644, "step": 1775 }, { "epoch": 0.32, "learning_rate": 0.00017133390896410106, "loss": 1.2875, "step": 1780 }, { "epoch": 0.32, "learning_rate": 0.0001711119658104705, "loss": 1.3237, "step": 1785 }, { "epoch": 0.32, "learning_rate": 0.00017088931173983539, "loss": 1.3011, "step": 1790 }, { "epoch": 0.33, "learning_rate": 0.00017066594897810196, "loss": 1.3125, "step": 1795 }, { "epoch": 0.33, "learning_rate": 0.00017044187975826124, "loss": 1.35, "step": 1800 }, { "epoch": 0.33, "learning_rate": 0.00017021710632036694, "loss": 1.4059, "step": 1805 }, { "epoch": 0.33, "learning_rate": 0.00016999163091151287, "loss": 1.4471, "step": 1810 }, { "epoch": 0.33, "learning_rate": 0.00016976545578581057, "loss": 1.3584, "step": 1815 }, { "epoch": 0.33, "learning_rate": 0.00016953858320436672, "loss": 1.3308, "step": 1820 }, { "epoch": 0.33, "learning_rate": 0.0001693110154352606, "loss": 1.3613, "step": 1825 }, { "epoch": 0.33, "learning_rate": 0.0001690827547535214, "loss": 1.3879, "step": 1830 }, { "epoch": 0.33, "learning_rate": 0.00016885380344110545, "loss": 1.3412, "step": 1835 }, { "epoch": 0.33, "learning_rate": 0.0001686241637868734, "loss": 1.3521, "step": 1840 }, { "epoch": 0.33, "learning_rate": 0.00016839383808656732, "loss": 1.3658, "step": 1845 }, { "epoch": 0.34, "learning_rate": 0.00016816282864278793, "loss": 1.3324, "step": 1850 }, { "epoch": 0.34, "learning_rate": 0.00016793113776497127, "loss": 1.3101, "step": 1855 }, { "epoch": 0.34, "learning_rate": 0.0001676987677693659, "loss": 1.3923, "step": 1860 }, { "epoch": 0.34, "learning_rate": 0.0001674657209790095, "loss": 1.3177, "step": 1865 }, { "epoch": 0.34, "learning_rate": 0.00016723199972370594, "loss": 1.2952, "step": 1870 }, { "epoch": 0.34, "learning_rate": 0.00016699760634000165, "loss": 1.3095, "step": 1875 }, { "epoch": 0.34, "learning_rate": 0.00016676254317116252, "loss": 1.3412, "step": 1880 }, { "epoch": 0.34, "learning_rate": 0.00016652681256715032, "loss": 1.2945, "step": 1885 }, { "epoch": 0.34, "learning_rate": 0.00016629041688459941, "loss": 1.3498, "step": 1890 }, { "epoch": 0.34, "learning_rate": 0.0001660533584867928, "loss": 1.38, "step": 1895 }, { "epoch": 0.34, "learning_rate": 0.00016581563974363902, "loss": 1.3754, "step": 1900 }, { "epoch": 0.35, "learning_rate": 0.00016557726303164803, "loss": 1.357, "step": 1905 }, { "epoch": 0.35, "learning_rate": 0.00016533823073390757, "loss": 1.3054, "step": 1910 }, { "epoch": 0.35, "learning_rate": 0.00016509854524005948, "loss": 1.3338, "step": 1915 }, { "epoch": 0.35, "learning_rate": 0.0001648582089462756, "loss": 1.2822, "step": 1920 }, { "epoch": 0.35, "learning_rate": 0.00016461722425523402, "loss": 1.3557, "step": 1925 }, { "epoch": 0.35, "learning_rate": 0.00016437559357609488, "loss": 1.3282, "step": 1930 }, { "epoch": 0.35, "learning_rate": 0.00016413331932447638, "loss": 1.3206, "step": 1935 }, { "epoch": 0.35, "learning_rate": 0.00016389040392243056, "loss": 1.2873, "step": 1940 }, { "epoch": 0.35, "learning_rate": 0.00016364684979841925, "loss": 1.3082, "step": 1945 }, { "epoch": 0.35, "learning_rate": 0.00016340265938728958, "loss": 1.3155, "step": 1950 }, { "epoch": 0.35, "learning_rate": 0.00016315783513024977, "loss": 1.3231, "step": 1955 }, { "epoch": 0.36, "learning_rate": 0.0001629123794748447, "loss": 1.3236, "step": 1960 }, { "epoch": 0.36, "learning_rate": 0.00016266629487493144, "loss": 1.3079, "step": 1965 }, { "epoch": 0.36, "learning_rate": 0.0001624195837906547, "loss": 1.3281, "step": 1970 }, { "epoch": 0.36, "learning_rate": 0.0001621722486884222, "loss": 1.3313, "step": 1975 }, { "epoch": 0.36, "learning_rate": 0.0001619242920408802, "loss": 1.3004, "step": 1980 }, { "epoch": 0.36, "learning_rate": 0.0001616757163268885, "loss": 1.3087, "step": 1985 }, { "epoch": 0.36, "learning_rate": 0.00016142652403149582, "loss": 1.3004, "step": 1990 }, { "epoch": 0.36, "learning_rate": 0.00016117671764591504, "loss": 1.3005, "step": 1995 }, { "epoch": 0.36, "learning_rate": 0.0001609262996674981, "loss": 1.3026, "step": 2000 }, { "epoch": 0.36, "learning_rate": 0.00016067527259971113, "loss": 1.3331, "step": 2005 }, { "epoch": 0.36, "learning_rate": 0.00016042363895210946, "loss": 1.2832, "step": 2010 }, { "epoch": 0.37, "learning_rate": 0.00016017140124031245, "loss": 1.3191, "step": 2015 }, { "epoch": 0.37, "learning_rate": 0.0001599185619859784, "loss": 1.3237, "step": 2020 }, { "epoch": 0.37, "learning_rate": 0.00015966512371677928, "loss": 1.2876, "step": 2025 }, { "epoch": 0.37, "learning_rate": 0.0001594110889663756, "loss": 1.307, "step": 2030 }, { "epoch": 0.37, "learning_rate": 0.00015915646027439087, "loss": 1.3195, "step": 2035 }, { "epoch": 0.37, "learning_rate": 0.00015890124018638638, "loss": 1.2279, "step": 2040 }, { "epoch": 0.37, "learning_rate": 0.00015864543125383574, "loss": 1.2658, "step": 2045 }, { "epoch": 0.37, "learning_rate": 0.00015838903603409925, "loss": 1.3215, "step": 2050 }, { "epoch": 0.37, "learning_rate": 0.0001581320570903984, "loss": 1.2436, "step": 2055 }, { "epoch": 0.37, "learning_rate": 0.00015787449699179035, "loss": 1.3196, "step": 2060 }, { "epoch": 0.37, "learning_rate": 0.000157616358313142, "loss": 1.3022, "step": 2065 }, { "epoch": 0.37, "learning_rate": 0.0001573576436351046, "loss": 1.3023, "step": 2070 }, { "epoch": 0.38, "learning_rate": 0.00015709835554408765, "loss": 1.2972, "step": 2075 }, { "epoch": 0.38, "learning_rate": 0.00015683849663223308, "loss": 1.2841, "step": 2080 }, { "epoch": 0.38, "learning_rate": 0.00015657806949738947, "loss": 1.2875, "step": 2085 }, { "epoch": 0.38, "learning_rate": 0.00015631707674308606, "loss": 1.2808, "step": 2090 }, { "epoch": 0.38, "learning_rate": 0.0001560555209785066, "loss": 1.2274, "step": 2095 }, { "epoch": 0.38, "learning_rate": 0.00015579340481846336, "loss": 1.2724, "step": 2100 }, { "epoch": 0.38, "learning_rate": 0.00015553073088337094, "loss": 1.2879, "step": 2105 }, { "epoch": 0.38, "learning_rate": 0.00015526750179922013, "loss": 1.3452, "step": 2110 }, { "epoch": 0.38, "learning_rate": 0.00015500372019755168, "loss": 1.3314, "step": 2115 }, { "epoch": 0.38, "learning_rate": 0.00015473938871542986, "loss": 1.3538, "step": 2120 }, { "epoch": 0.38, "learning_rate": 0.00015447450999541616, "loss": 1.2747, "step": 2125 }, { "epoch": 0.39, "learning_rate": 0.00015420908668554298, "loss": 1.289, "step": 2130 }, { "epoch": 0.39, "learning_rate": 0.000153943121439287, "loss": 1.2945, "step": 2135 }, { "epoch": 0.39, "learning_rate": 0.0001536766169155428, "loss": 1.2736, "step": 2140 }, { "epoch": 0.39, "learning_rate": 0.00015340957577859605, "loss": 1.3336, "step": 2145 }, { "epoch": 0.39, "learning_rate": 0.00015314200069809712, "loss": 1.2652, "step": 2150 }, { "epoch": 0.39, "learning_rate": 0.00015287389434903435, "loss": 1.2656, "step": 2155 }, { "epoch": 0.39, "learning_rate": 0.00015260525941170712, "loss": 1.3415, "step": 2160 }, { "epoch": 0.39, "learning_rate": 0.0001523360985716993, "loss": 1.3093, "step": 2165 }, { "epoch": 0.39, "learning_rate": 0.00015206641451985222, "loss": 1.3094, "step": 2170 }, { "epoch": 0.39, "learning_rate": 0.00015179620995223783, "loss": 1.3243, "step": 2175 }, { "epoch": 0.39, "learning_rate": 0.00015152548757013182, "loss": 1.3247, "step": 2180 }, { "epoch": 0.4, "learning_rate": 0.00015125425007998653, "loss": 1.3668, "step": 2185 }, { "epoch": 0.4, "learning_rate": 0.00015098250019340387, "loss": 1.3491, "step": 2190 }, { "epoch": 0.4, "learning_rate": 0.00015071024062710824, "loss": 1.3402, "step": 2195 }, { "epoch": 0.4, "learning_rate": 0.00015043747410291945, "loss": 1.358, "step": 2200 }, { "epoch": 0.4, "learning_rate": 0.00015016420334772543, "loss": 1.32, "step": 2205 }, { "epoch": 0.4, "learning_rate": 0.00014989043109345498, "loss": 1.3746, "step": 2210 }, { "epoch": 0.4, "learning_rate": 0.00014961616007705042, "loss": 1.3256, "step": 2215 }, { "epoch": 0.4, "learning_rate": 0.00014934139304044033, "loss": 1.355, "step": 2220 }, { "epoch": 0.4, "learning_rate": 0.00014906613273051202, "loss": 1.2912, "step": 2225 }, { "epoch": 0.4, "learning_rate": 0.00014879038189908415, "loss": 1.3153, "step": 2230 }, { "epoch": 0.4, "learning_rate": 0.00014851414330287928, "loss": 1.3717, "step": 2235 }, { "epoch": 0.41, "learning_rate": 0.00014823741970349606, "loss": 1.4614, "step": 2240 }, { "epoch": 0.41, "learning_rate": 0.00014796021386738193, "loss": 1.3926, "step": 2245 }, { "epoch": 0.41, "learning_rate": 0.0001476825285658053, "loss": 1.3294, "step": 2250 }, { "epoch": 0.41, "learning_rate": 0.00014740436657482777, "loss": 1.3613, "step": 2255 }, { "epoch": 0.41, "learning_rate": 0.00014712573067527664, "loss": 1.2937, "step": 2260 }, { "epoch": 0.41, "learning_rate": 0.00014684662365271675, "loss": 1.2975, "step": 2265 }, { "epoch": 0.41, "learning_rate": 0.000146567048297423, "loss": 1.3285, "step": 2270 }, { "epoch": 0.41, "learning_rate": 0.00014628700740435221, "loss": 1.3075, "step": 2275 }, { "epoch": 0.41, "learning_rate": 0.00014600650377311522, "loss": 1.3492, "step": 2280 }, { "epoch": 0.41, "learning_rate": 0.000145725540207949, "loss": 1.2762, "step": 2285 }, { "epoch": 0.41, "learning_rate": 0.00014544411951768852, "loss": 1.2766, "step": 2290 }, { "epoch": 0.42, "learning_rate": 0.0001451622445157387, "loss": 1.2881, "step": 2295 }, { "epoch": 0.42, "learning_rate": 0.00014487991802004623, "loss": 1.3359, "step": 2300 }, { "epoch": 0.42, "learning_rate": 0.00014459714285307152, "loss": 1.3266, "step": 2305 }, { "epoch": 0.42, "learning_rate": 0.00014431392184176042, "loss": 1.237, "step": 2310 }, { "epoch": 0.42, "learning_rate": 0.00014403025781751594, "loss": 1.3363, "step": 2315 }, { "epoch": 0.42, "learning_rate": 0.00014374615361616985, "loss": 1.3084, "step": 2320 }, { "epoch": 0.42, "learning_rate": 0.00014346161207795462, "loss": 1.3463, "step": 2325 }, { "epoch": 0.42, "learning_rate": 0.00014317663604747477, "loss": 1.3328, "step": 2330 }, { "epoch": 0.42, "learning_rate": 0.00014289122837367834, "loss": 1.3092, "step": 2335 }, { "epoch": 0.42, "learning_rate": 0.00014260539190982886, "loss": 1.3619, "step": 2340 }, { "epoch": 0.42, "learning_rate": 0.00014231912951347632, "loss": 1.7222, "step": 2345 }, { "epoch": 0.43, "learning_rate": 0.00014203244404642894, "loss": 2.15, "step": 2350 }, { "epoch": 0.43, "learning_rate": 0.0001417453383747244, "loss": 1.6064, "step": 2355 }, { "epoch": 0.43, "learning_rate": 0.00014145781536860122, "loss": 1.5031, "step": 2360 }, { "epoch": 0.43, "learning_rate": 0.00014116987790247003, "loss": 1.409, "step": 2365 }, { "epoch": 0.43, "learning_rate": 0.00014088152885488502, "loss": 1.4162, "step": 2370 }, { "epoch": 0.43, "learning_rate": 0.0001405927711085149, "loss": 1.3969, "step": 2375 }, { "epoch": 0.43, "learning_rate": 0.00014030360755011424, "loss": 1.4638, "step": 2380 }, { "epoch": 0.43, "learning_rate": 0.00014001404107049454, "loss": 1.4287, "step": 2385 }, { "epoch": 0.43, "learning_rate": 0.0001397240745644954, "loss": 1.3563, "step": 2390 }, { "epoch": 0.43, "learning_rate": 0.00013943371093095558, "loss": 1.5156, "step": 2395 }, { "epoch": 0.43, "learning_rate": 0.00013914295307268396, "loss": 1.4148, "step": 2400 }, { "epoch": 0.44, "learning_rate": 0.0001388518038964304, "loss": 1.4221, "step": 2405 }, { "epoch": 0.44, "learning_rate": 0.0001385602663128571, "loss": 1.3772, "step": 2410 }, { "epoch": 0.44, "learning_rate": 0.000138268343236509, "loss": 1.3444, "step": 2415 }, { "epoch": 0.44, "learning_rate": 0.00013797603758578496, "loss": 1.3119, "step": 2420 }, { "epoch": 0.44, "learning_rate": 0.00013768335228290845, "loss": 1.3686, "step": 2425 }, { "epoch": 0.44, "learning_rate": 0.00013739029025389846, "loss": 1.3505, "step": 2430 }, { "epoch": 0.44, "learning_rate": 0.00013709685442854012, "loss": 1.3769, "step": 2435 }, { "epoch": 0.44, "learning_rate": 0.00013680304774035538, "loss": 1.3505, "step": 2440 }, { "epoch": 0.44, "learning_rate": 0.00013650887312657392, "loss": 1.362, "step": 2445 }, { "epoch": 0.44, "learning_rate": 0.00013621433352810353, "loss": 1.3773, "step": 2450 }, { "epoch": 0.44, "learning_rate": 0.0001359194318895008, "loss": 1.325, "step": 2455 }, { "epoch": 0.45, "learning_rate": 0.00013562417115894172, "loss": 1.3583, "step": 2460 }, { "epoch": 0.45, "learning_rate": 0.00013532855428819213, "loss": 1.3345, "step": 2465 }, { "epoch": 0.45, "learning_rate": 0.00013503258423257835, "loss": 1.3346, "step": 2470 }, { "epoch": 0.45, "learning_rate": 0.0001347362639509574, "loss": 1.2946, "step": 2475 }, { "epoch": 0.45, "learning_rate": 0.0001344395964056878, "loss": 1.3796, "step": 2480 }, { "epoch": 0.45, "learning_rate": 0.00013414258456259943, "loss": 1.3362, "step": 2485 }, { "epoch": 0.45, "learning_rate": 0.0001338452313909644, "loss": 1.2915, "step": 2490 }, { "epoch": 0.45, "learning_rate": 0.00013354753986346692, "loss": 1.3044, "step": 2495 }, { "epoch": 0.45, "learning_rate": 0.00013324951295617398, "loss": 1.3056, "step": 2500 }, { "epoch": 0.45, "learning_rate": 0.00013295115364850534, "loss": 1.3265, "step": 2505 }, { "epoch": 0.45, "learning_rate": 0.00013265246492320383, "loss": 1.3577, "step": 2510 }, { "epoch": 0.46, "learning_rate": 0.00013235344976630546, "loss": 1.3357, "step": 2515 }, { "epoch": 0.46, "learning_rate": 0.00013205411116710972, "loss": 1.293, "step": 2520 }, { "epoch": 0.46, "learning_rate": 0.00013175445211814952, "loss": 1.3595, "step": 2525 }, { "epoch": 0.46, "learning_rate": 0.00013145447561516138, "loss": 1.3376, "step": 2530 }, { "epoch": 0.46, "learning_rate": 0.0001311541846570555, "loss": 1.2918, "step": 2535 }, { "epoch": 0.46, "learning_rate": 0.00013085358224588565, "loss": 1.3296, "step": 2540 }, { "epoch": 0.46, "learning_rate": 0.00013055267138681936, "loss": 1.2756, "step": 2545 }, { "epoch": 0.46, "learning_rate": 0.0001302514550881076, "loss": 1.3264, "step": 2550 }, { "epoch": 0.46, "learning_rate": 0.000129949936361055, "loss": 1.2966, "step": 2555 }, { "epoch": 0.46, "learning_rate": 0.0001296481182199896, "loss": 1.2693, "step": 2560 }, { "epoch": 0.46, "learning_rate": 0.00012934600368223265, "loss": 1.3288, "step": 2565 }, { "epoch": 0.47, "learning_rate": 0.00012904359576806858, "loss": 1.258, "step": 2570 }, { "epoch": 0.47, "learning_rate": 0.00012874089750071477, "loss": 1.2904, "step": 2575 }, { "epoch": 0.47, "learning_rate": 0.0001284379119062912, "loss": 1.2678, "step": 2580 }, { "epoch": 0.47, "learning_rate": 0.00012813464201379043, "loss": 1.3021, "step": 2585 }, { "epoch": 0.47, "learning_rate": 0.0001278310908550471, "loss": 1.2584, "step": 2590 }, { "epoch": 0.47, "learning_rate": 0.00012752726146470774, "loss": 1.2773, "step": 2595 }, { "epoch": 0.47, "learning_rate": 0.00012722315688020047, "loss": 1.2542, "step": 2600 }, { "epoch": 0.47, "learning_rate": 0.0001269187801417044, "loss": 1.2914, "step": 2605 }, { "epoch": 0.47, "learning_rate": 0.00012661413429211957, "loss": 1.2748, "step": 2610 }, { "epoch": 0.47, "learning_rate": 0.0001263092223770363, "loss": 1.2961, "step": 2615 }, { "epoch": 0.47, "learning_rate": 0.0001260040474447048, "loss": 1.2932, "step": 2620 }, { "epoch": 0.48, "learning_rate": 0.0001256986125460047, "loss": 1.2617, "step": 2625 }, { "epoch": 0.48, "learning_rate": 0.0001253929207344146, "loss": 1.3035, "step": 2630 }, { "epoch": 0.48, "learning_rate": 0.00012508697506598144, "loss": 1.3183, "step": 2635 }, { "epoch": 0.48, "learning_rate": 0.00012478077859929, "loss": 1.2684, "step": 2640 }, { "epoch": 0.48, "learning_rate": 0.0001244743343954324, "loss": 1.3013, "step": 2645 }, { "epoch": 0.48, "learning_rate": 0.00012416764551797732, "loss": 1.2886, "step": 2650 }, { "epoch": 0.48, "learning_rate": 0.00012386071503293962, "loss": 1.297, "step": 2655 }, { "epoch": 0.48, "learning_rate": 0.0001235535460087494, "loss": 1.2574, "step": 2660 }, { "epoch": 0.48, "learning_rate": 0.00012324614151622154, "loss": 1.259, "step": 2665 }, { "epoch": 0.48, "learning_rate": 0.00012293850462852496, "loss": 1.2833, "step": 2670 }, { "epoch": 0.48, "learning_rate": 0.00012263063842115184, "loss": 1.3061, "step": 2675 }, { "epoch": 0.49, "learning_rate": 0.00012232254597188688, "loss": 1.2917, "step": 2680 }, { "epoch": 0.49, "learning_rate": 0.00012201423036077657, "loss": 1.2163, "step": 2685 }, { "epoch": 0.49, "learning_rate": 0.0001217056946700984, "loss": 1.2623, "step": 2690 }, { "epoch": 0.49, "learning_rate": 0.00012139694198433004, "loss": 1.3024, "step": 2695 }, { "epoch": 0.49, "learning_rate": 0.00012108797539011847, "loss": 1.3067, "step": 2700 }, { "epoch": 0.49, "learning_rate": 0.00012077879797624909, "loss": 1.2481, "step": 2705 }, { "epoch": 0.49, "learning_rate": 0.00012046941283361502, "loss": 1.2721, "step": 2710 }, { "epoch": 0.49, "learning_rate": 0.00012015982305518594, "loss": 1.237, "step": 2715 }, { "epoch": 0.49, "learning_rate": 0.0001198500317359774, "loss": 1.2805, "step": 2720 }, { "epoch": 0.49, "learning_rate": 0.00011954004197301978, "loss": 1.2614, "step": 2725 }, { "epoch": 0.49, "learning_rate": 0.00011922985686532726, "loss": 1.3051, "step": 2730 }, { "epoch": 0.5, "learning_rate": 0.00011891947951386701, "loss": 1.2543, "step": 2735 }, { "epoch": 0.5, "learning_rate": 0.00011860891302152798, "loss": 1.3179, "step": 2740 }, { "epoch": 0.5, "learning_rate": 0.00011829816049309009, "loss": 1.2996, "step": 2745 }, { "epoch": 0.5, "learning_rate": 0.00011798722503519304, "loss": 1.2597, "step": 2750 }, { "epoch": 0.5, "learning_rate": 0.0001176761097563053, "loss": 1.2888, "step": 2755 }, { "epoch": 0.5, "learning_rate": 0.00011736481776669306, "loss": 1.3059, "step": 2760 }, { "epoch": 0.5, "learning_rate": 0.00011705335217838909, "loss": 1.2971, "step": 2765 }, { "epoch": 0.5, "learning_rate": 0.00011674171610516165, "loss": 1.3073, "step": 2770 }, { "epoch": 0.5, "learning_rate": 0.00011642991266248338, "loss": 1.2934, "step": 2775 }, { "epoch": 0.5, "learning_rate": 0.00011611794496750019, "loss": 1.2918, "step": 2780 }, { "epoch": 0.5, "learning_rate": 0.00011580581613899992, "loss": 1.2676, "step": 2785 }, { "epoch": 0.51, "learning_rate": 0.00011549352929738142, "loss": 1.2998, "step": 2790 }, { "epoch": 0.51, "learning_rate": 0.0001151810875646231, "loss": 1.2429, "step": 2795 }, { "epoch": 0.51, "learning_rate": 0.00011486849406425188, "loss": 1.2739, "step": 2800 }, { "epoch": 0.51, "learning_rate": 0.00011455575192131204, "loss": 1.2622, "step": 2805 }, { "epoch": 0.51, "learning_rate": 0.00011424286426233368, "loss": 1.2449, "step": 2810 }, { "epoch": 0.51, "learning_rate": 0.00011392983421530175, "loss": 1.2877, "step": 2815 }, { "epoch": 0.51, "learning_rate": 0.00011361666490962468, "loss": 1.3152, "step": 2820 }, { "epoch": 0.51, "learning_rate": 0.000113303359476103, "loss": 1.2471, "step": 2825 }, { "epoch": 0.51, "learning_rate": 0.00011298992104689825, "loss": 1.2751, "step": 2830 }, { "epoch": 0.51, "learning_rate": 0.00011267635275550148, "loss": 1.3161, "step": 2835 }, { "epoch": 0.51, "learning_rate": 0.00011236265773670196, "loss": 1.2622, "step": 2840 }, { "epoch": 0.52, "learning_rate": 0.00011204883912655597, "loss": 1.2995, "step": 2845 }, { "epoch": 0.52, "learning_rate": 0.00011173490006235528, "loss": 1.2573, "step": 2850 }, { "epoch": 0.52, "learning_rate": 0.00011142084368259585, "loss": 1.2543, "step": 2855 }, { "epoch": 0.52, "learning_rate": 0.00011110667312694653, "loss": 1.2807, "step": 2860 }, { "epoch": 0.52, "learning_rate": 0.00011079239153621752, "loss": 1.2458, "step": 2865 }, { "epoch": 0.52, "learning_rate": 0.0001104780020523291, "loss": 1.2905, "step": 2870 }, { "epoch": 0.52, "learning_rate": 0.00011016350781828019, "loss": 1.2601, "step": 2875 }, { "epoch": 0.52, "learning_rate": 0.00010984891197811687, "loss": 1.2885, "step": 2880 }, { "epoch": 0.52, "learning_rate": 0.00010953421767690104, "loss": 1.2934, "step": 2885 }, { "epoch": 0.52, "learning_rate": 0.00010921942806067886, "loss": 1.2722, "step": 2890 }, { "epoch": 0.52, "learning_rate": 0.00010890454627644944, "loss": 1.2924, "step": 2895 }, { "epoch": 0.53, "learning_rate": 0.00010858957547213327, "loss": 1.3087, "step": 2900 }, { "epoch": 0.53, "learning_rate": 0.00010827451879654076, "loss": 1.2663, "step": 2905 }, { "epoch": 0.53, "learning_rate": 0.00010795937939934088, "loss": 1.2771, "step": 2910 }, { "epoch": 0.53, "learning_rate": 0.00010764416043102952, "loss": 1.2424, "step": 2915 }, { "epoch": 0.53, "learning_rate": 0.000107328865042898, "loss": 1.2307, "step": 2920 }, { "epoch": 0.53, "learning_rate": 0.00010701349638700173, "loss": 1.2394, "step": 2925 }, { "epoch": 0.53, "learning_rate": 0.00010669805761612854, "loss": 1.2664, "step": 2930 }, { "epoch": 0.53, "learning_rate": 0.00010638255188376717, "loss": 1.2833, "step": 2935 }, { "epoch": 0.53, "learning_rate": 0.00010606698234407586, "loss": 1.2873, "step": 2940 }, { "epoch": 0.53, "learning_rate": 0.0001057513521518507, "loss": 1.2672, "step": 2945 }, { "epoch": 0.53, "learning_rate": 0.00010543566446249408, "loss": 1.252, "step": 2950 }, { "epoch": 0.54, "learning_rate": 0.00010511992243198334, "loss": 1.2806, "step": 2955 }, { "epoch": 0.54, "learning_rate": 0.00010480412921683888, "loss": 1.3012, "step": 2960 }, { "epoch": 0.54, "learning_rate": 0.000104488287974093, "loss": 1.2897, "step": 2965 }, { "epoch": 0.54, "learning_rate": 0.00010417240186125805, "loss": 1.2662, "step": 2970 }, { "epoch": 0.54, "learning_rate": 0.00010385647403629488, "loss": 1.2983, "step": 2975 }, { "epoch": 0.54, "learning_rate": 0.00010354050765758147, "loss": 1.3074, "step": 2980 }, { "epoch": 0.54, "learning_rate": 0.00010322450588388117, "loss": 1.2433, "step": 2985 }, { "epoch": 0.54, "learning_rate": 0.00010290847187431113, "loss": 1.2837, "step": 2990 }, { "epoch": 0.54, "learning_rate": 0.00010259240878831091, "loss": 1.2503, "step": 2995 }, { "epoch": 0.54, "learning_rate": 0.00010227631978561056, "loss": 1.2471, "step": 3000 }, { "epoch": 0.54, "learning_rate": 0.00010196020802619941, "loss": 1.2549, "step": 3005 }, { "epoch": 0.55, "learning_rate": 0.00010164407667029417, "loss": 1.3157, "step": 3010 }, { "epoch": 0.55, "learning_rate": 0.00010132792887830744, "loss": 1.2674, "step": 3015 }, { "epoch": 0.55, "learning_rate": 0.00010101176781081625, "loss": 1.2781, "step": 3020 }, { "epoch": 0.55, "learning_rate": 0.00010069559662853027, "loss": 1.2948, "step": 3025 }, { "epoch": 0.55, "learning_rate": 0.00010037941849226032, "loss": 1.3088, "step": 3030 }, { "epoch": 0.55, "learning_rate": 0.00010006323656288669, "loss": 1.277, "step": 3035 }, { "epoch": 0.55, "learning_rate": 9.974705400132764e-05, "loss": 1.2629, "step": 3040 }, { "epoch": 0.55, "learning_rate": 9.943087396850773e-05, "loss": 1.2973, "step": 3045 }, { "epoch": 0.55, "learning_rate": 9.911469962532627e-05, "loss": 1.3478, "step": 3050 }, { "epoch": 0.55, "learning_rate": 9.879853413262563e-05, "loss": 1.2582, "step": 3055 }, { "epoch": 0.55, "learning_rate": 9.848238065115975e-05, "loss": 1.247, "step": 3060 }, { "epoch": 0.56, "learning_rate": 9.816624234156249e-05, "loss": 1.2527, "step": 3065 }, { "epoch": 0.56, "learning_rate": 9.785012236431598e-05, "loss": 1.211, "step": 3070 }, { "epoch": 0.56, "learning_rate": 9.753402387971917e-05, "loss": 1.2773, "step": 3075 }, { "epoch": 0.56, "learning_rate": 9.721795004785605e-05, "loss": 1.2478, "step": 3080 }, { "epoch": 0.56, "learning_rate": 9.69019040285642e-05, "loss": 1.2596, "step": 3085 }, { "epoch": 0.56, "learning_rate": 9.658588898140322e-05, "loss": 1.2535, "step": 3090 }, { "epoch": 0.56, "learning_rate": 9.626990806562291e-05, "loss": 1.2332, "step": 3095 }, { "epoch": 0.56, "learning_rate": 9.595396444013205e-05, "loss": 1.2232, "step": 3100 }, { "epoch": 0.56, "learning_rate": 9.563806126346642e-05, "loss": 1.2822, "step": 3105 }, { "epoch": 0.56, "learning_rate": 9.532220169375761e-05, "loss": 1.2272, "step": 3110 }, { "epoch": 0.56, "learning_rate": 9.500638888870113e-05, "loss": 1.2345, "step": 3115 }, { "epoch": 0.57, "learning_rate": 9.469062600552509e-05, "loss": 1.3178, "step": 3120 }, { "epoch": 0.57, "learning_rate": 9.43749162009584e-05, "loss": 1.248, "step": 3125 }, { "epoch": 0.57, "learning_rate": 9.405926263119945e-05, "loss": 1.2272, "step": 3130 }, { "epoch": 0.57, "learning_rate": 9.374366845188442e-05, "loss": 1.265, "step": 3135 }, { "epoch": 0.57, "learning_rate": 9.342813681805565e-05, "loss": 1.2711, "step": 3140 }, { "epoch": 0.57, "learning_rate": 9.311267088413035e-05, "loss": 1.2777, "step": 3145 }, { "epoch": 0.57, "learning_rate": 9.27972738038688e-05, "loss": 1.3019, "step": 3150 }, { "epoch": 0.57, "learning_rate": 9.248194873034301e-05, "loss": 1.2909, "step": 3155 }, { "epoch": 0.57, "learning_rate": 9.216669881590515e-05, "loss": 1.2529, "step": 3160 }, { "epoch": 0.57, "learning_rate": 9.18515272121559e-05, "loss": 1.3024, "step": 3165 }, { "epoch": 0.57, "learning_rate": 9.153643706991318e-05, "loss": 1.2295, "step": 3170 }, { "epoch": 0.58, "learning_rate": 9.122143153918045e-05, "loss": 1.2647, "step": 3175 }, { "epoch": 0.58, "learning_rate": 9.09065137691153e-05, "loss": 1.2777, "step": 3180 }, { "epoch": 0.58, "learning_rate": 9.059168690799804e-05, "loss": 1.2287, "step": 3185 }, { "epoch": 0.58, "learning_rate": 9.027695410320004e-05, "loss": 1.2618, "step": 3190 }, { "epoch": 0.58, "learning_rate": 8.996231850115246e-05, "loss": 1.2821, "step": 3195 }, { "epoch": 0.58, "learning_rate": 8.964778324731467e-05, "loss": 1.2888, "step": 3200 }, { "epoch": 0.58, "learning_rate": 8.933335148614284e-05, "loss": 1.2946, "step": 3205 }, { "epoch": 0.58, "learning_rate": 8.901902636105854e-05, "loss": 1.3121, "step": 3210 }, { "epoch": 0.58, "learning_rate": 8.870481101441723e-05, "loss": 1.3313, "step": 3215 }, { "epoch": 0.58, "learning_rate": 8.839070858747697e-05, "loss": 1.2501, "step": 3220 }, { "epoch": 0.58, "learning_rate": 8.807672222036691e-05, "loss": 1.3022, "step": 3225 }, { "epoch": 0.59, "learning_rate": 8.77628550520559e-05, "loss": 1.3413, "step": 3230 }, { "epoch": 0.59, "learning_rate": 8.744911022032115e-05, "loss": 1.2728, "step": 3235 }, { "epoch": 0.59, "learning_rate": 8.713549086171691e-05, "loss": 1.2878, "step": 3240 }, { "epoch": 0.59, "learning_rate": 8.682200011154302e-05, "loss": 1.3301, "step": 3245 }, { "epoch": 0.59, "learning_rate": 8.650864110381357e-05, "loss": 1.2421, "step": 3250 }, { "epoch": 0.59, "learning_rate": 8.619541697122568e-05, "loss": 1.3447, "step": 3255 }, { "epoch": 0.59, "learning_rate": 8.5882330845128e-05, "loss": 1.2773, "step": 3260 }, { "epoch": 0.59, "learning_rate": 8.55693858554896e-05, "loss": 1.2936, "step": 3265 }, { "epoch": 0.59, "learning_rate": 8.525658513086857e-05, "loss": 1.3005, "step": 3270 }, { "epoch": 0.59, "learning_rate": 8.49439317983807e-05, "loss": 1.2903, "step": 3275 }, { "epoch": 0.59, "learning_rate": 8.463142898366834e-05, "loss": 1.2755, "step": 3280 }, { "epoch": 0.6, "learning_rate": 8.431907981086906e-05, "loss": 1.2755, "step": 3285 }, { "epoch": 0.6, "learning_rate": 8.400688740258447e-05, "loss": 1.2669, "step": 3290 }, { "epoch": 0.6, "learning_rate": 8.3694854879849e-05, "loss": 1.2689, "step": 3295 }, { "epoch": 0.6, "learning_rate": 8.33829853620986e-05, "loss": 1.2619, "step": 3300 }, { "epoch": 0.6, "learning_rate": 8.307128196713972e-05, "loss": 1.287, "step": 3305 }, { "epoch": 0.6, "learning_rate": 8.275974781111804e-05, "loss": 1.2392, "step": 3310 }, { "epoch": 0.6, "learning_rate": 8.244838600848727e-05, "loss": 1.3307, "step": 3315 }, { "epoch": 0.6, "learning_rate": 8.213719967197817e-05, "loss": 1.281, "step": 3320 }, { "epoch": 0.6, "learning_rate": 8.182619191256724e-05, "loss": 1.3068, "step": 3325 }, { "epoch": 0.6, "learning_rate": 8.15153658394458e-05, "loss": 1.2864, "step": 3330 }, { "epoch": 0.6, "learning_rate": 8.120472455998882e-05, "loss": 1.2889, "step": 3335 }, { "epoch": 0.61, "learning_rate": 8.089427117972378e-05, "loss": 1.2591, "step": 3340 }, { "epoch": 0.61, "learning_rate": 8.058400880229978e-05, "loss": 1.3001, "step": 3345 }, { "epoch": 0.61, "learning_rate": 8.027394052945648e-05, "loss": 1.2743, "step": 3350 }, { "epoch": 0.61, "learning_rate": 7.996406946099289e-05, "loss": 1.271, "step": 3355 }, { "epoch": 0.61, "learning_rate": 7.965439869473664e-05, "loss": 1.2902, "step": 3360 }, { "epoch": 0.61, "learning_rate": 7.934493132651294e-05, "loss": 1.3117, "step": 3365 }, { "epoch": 0.61, "learning_rate": 7.903567045011352e-05, "loss": 1.2478, "step": 3370 }, { "epoch": 0.61, "learning_rate": 7.872661915726584e-05, "loss": 1.2871, "step": 3375 }, { "epoch": 0.61, "learning_rate": 7.841778053760211e-05, "loss": 1.2891, "step": 3380 }, { "epoch": 0.61, "learning_rate": 7.810915767862837e-05, "loss": 1.2877, "step": 3385 }, { "epoch": 0.61, "learning_rate": 7.780075366569374e-05, "loss": 1.2385, "step": 3390 }, { "epoch": 0.62, "learning_rate": 7.749257158195943e-05, "loss": 1.2439, "step": 3395 }, { "epoch": 0.62, "learning_rate": 7.718461450836804e-05, "loss": 1.331, "step": 3400 }, { "epoch": 0.62, "learning_rate": 7.687688552361272e-05, "loss": 1.2249, "step": 3405 }, { "epoch": 0.62, "learning_rate": 7.65693877041063e-05, "loss": 1.2736, "step": 3410 }, { "epoch": 0.62, "learning_rate": 7.626212412395072e-05, "loss": 1.2991, "step": 3415 }, { "epoch": 0.62, "learning_rate": 7.595509785490617e-05, "loss": 1.2505, "step": 3420 }, { "epoch": 0.62, "learning_rate": 7.564831196636032e-05, "loss": 1.3018, "step": 3425 }, { "epoch": 0.62, "learning_rate": 7.534176952529782e-05, "loss": 1.2937, "step": 3430 }, { "epoch": 0.62, "learning_rate": 7.503547359626948e-05, "loss": 1.2756, "step": 3435 }, { "epoch": 0.62, "learning_rate": 7.472942724136174e-05, "loss": 1.306, "step": 3440 }, { "epoch": 0.62, "learning_rate": 7.442363352016598e-05, "loss": 1.2915, "step": 3445 }, { "epoch": 0.62, "learning_rate": 7.411809548974792e-05, "loss": 1.3106, "step": 3450 }, { "epoch": 0.63, "learning_rate": 7.381281620461722e-05, "loss": 1.2993, "step": 3455 }, { "epoch": 0.63, "learning_rate": 7.350779871669669e-05, "loss": 1.2557, "step": 3460 }, { "epoch": 0.63, "learning_rate": 7.3203046075292e-05, "loss": 1.2596, "step": 3465 }, { "epoch": 0.63, "learning_rate": 7.289856132706112e-05, "loss": 1.2693, "step": 3470 }, { "epoch": 0.63, "learning_rate": 7.25943475159838e-05, "loss": 1.245, "step": 3475 }, { "epoch": 0.63, "learning_rate": 7.229040768333115e-05, "loss": 1.2455, "step": 3480 }, { "epoch": 0.63, "learning_rate": 7.198674486763537e-05, "loss": 1.2628, "step": 3485 }, { "epoch": 0.63, "learning_rate": 7.168336210465928e-05, "loss": 1.2572, "step": 3490 }, { "epoch": 0.63, "learning_rate": 7.138026242736589e-05, "loss": 1.232, "step": 3495 }, { "epoch": 0.63, "learning_rate": 7.107744886588824e-05, "loss": 1.3451, "step": 3500 }, { "epoch": 0.63, "learning_rate": 7.077492444749895e-05, "loss": 1.3035, "step": 3505 }, { "epoch": 0.64, "learning_rate": 7.04726921965801e-05, "loss": 1.2449, "step": 3510 }, { "epoch": 0.64, "learning_rate": 7.017075513459292e-05, "loss": 1.239, "step": 3515 }, { "epoch": 0.64, "learning_rate": 6.986911628004753e-05, "loss": 1.2471, "step": 3520 }, { "epoch": 0.64, "learning_rate": 6.956777864847291e-05, "loss": 1.3258, "step": 3525 }, { "epoch": 0.64, "learning_rate": 6.926674525238663e-05, "loss": 1.2594, "step": 3530 }, { "epoch": 0.64, "learning_rate": 6.896601910126475e-05, "loss": 1.2699, "step": 3535 }, { "epoch": 0.64, "learning_rate": 6.866560320151179e-05, "loss": 1.2516, "step": 3540 }, { "epoch": 0.64, "learning_rate": 6.83655005564306e-05, "loss": 1.2536, "step": 3545 }, { "epoch": 0.64, "learning_rate": 6.806571416619246e-05, "loss": 1.2782, "step": 3550 }, { "epoch": 0.64, "learning_rate": 6.77662470278069e-05, "loss": 1.2205, "step": 3555 }, { "epoch": 0.64, "learning_rate": 6.74671021350919e-05, "loss": 1.2888, "step": 3560 }, { "epoch": 0.65, "learning_rate": 6.71682824786439e-05, "loss": 1.2725, "step": 3565 }, { "epoch": 0.65, "learning_rate": 6.686979104580788e-05, "loss": 1.2165, "step": 3570 }, { "epoch": 0.65, "learning_rate": 6.657163082064752e-05, "loss": 1.2787, "step": 3575 }, { "epoch": 0.65, "learning_rate": 6.627380478391543e-05, "loss": 1.2569, "step": 3580 }, { "epoch": 0.65, "learning_rate": 6.597631591302319e-05, "loss": 1.2977, "step": 3585 }, { "epoch": 0.65, "learning_rate": 6.567916718201174e-05, "loss": 1.2661, "step": 3590 }, { "epoch": 0.65, "learning_rate": 6.538236156152163e-05, "loss": 1.3089, "step": 3595 }, { "epoch": 0.65, "learning_rate": 6.508590201876317e-05, "loss": 1.2441, "step": 3600 }, { "epoch": 0.65, "learning_rate": 6.478979151748694e-05, "loss": 1.2697, "step": 3605 }, { "epoch": 0.65, "learning_rate": 6.449403301795416e-05, "loss": 1.2843, "step": 3610 }, { "epoch": 0.65, "learning_rate": 6.419862947690692e-05, "loss": 1.2846, "step": 3615 }, { "epoch": 0.66, "learning_rate": 6.390358384753881e-05, "loss": 1.2265, "step": 3620 }, { "epoch": 0.66, "learning_rate": 6.360889907946534e-05, "loss": 1.2765, "step": 3625 }, { "epoch": 0.66, "learning_rate": 6.331457811869437e-05, "loss": 1.2698, "step": 3630 }, { "epoch": 0.66, "learning_rate": 6.302062390759677e-05, "loss": 1.2683, "step": 3635 }, { "epoch": 0.66, "learning_rate": 6.272703938487694e-05, "loss": 1.3128, "step": 3640 }, { "epoch": 0.66, "learning_rate": 6.243382748554346e-05, "loss": 1.2875, "step": 3645 }, { "epoch": 0.66, "learning_rate": 6.214099114087975e-05, "loss": 1.2476, "step": 3650 }, { "epoch": 0.66, "learning_rate": 6.18485332784147e-05, "loss": 1.2495, "step": 3655 }, { "epoch": 0.66, "learning_rate": 6.155645682189351e-05, "loss": 1.2578, "step": 3660 }, { "epoch": 0.66, "learning_rate": 6.126476469124842e-05, "loss": 1.2243, "step": 3665 }, { "epoch": 0.66, "learning_rate": 6.097345980256942e-05, "loss": 1.2923, "step": 3670 }, { "epoch": 0.67, "learning_rate": 6.0682545068075317e-05, "loss": 1.2724, "step": 3675 }, { "epoch": 0.67, "learning_rate": 6.039202339608432e-05, "loss": 1.2479, "step": 3680 }, { "epoch": 0.67, "learning_rate": 6.010189769098529e-05, "loss": 1.2146, "step": 3685 }, { "epoch": 0.67, "learning_rate": 5.9812170853208496e-05, "loss": 1.2868, "step": 3690 }, { "epoch": 0.67, "learning_rate": 5.952284577919659e-05, "loss": 1.2859, "step": 3695 }, { "epoch": 0.67, "learning_rate": 5.9233925361375864e-05, "loss": 1.2669, "step": 3700 }, { "epoch": 0.67, "learning_rate": 5.8945412488127096e-05, "loss": 1.2299, "step": 3705 }, { "epoch": 0.67, "learning_rate": 5.865731004375683e-05, "loss": 1.2655, "step": 3710 }, { "epoch": 0.67, "learning_rate": 5.8369620908468503e-05, "loss": 1.2446, "step": 3715 }, { "epoch": 0.67, "learning_rate": 5.8082347958333625e-05, "loss": 1.2017, "step": 3720 }, { "epoch": 0.67, "learning_rate": 5.7795494065262956e-05, "loss": 1.2805, "step": 3725 }, { "epoch": 0.68, "learning_rate": 5.750906209697802e-05, "loss": 1.2901, "step": 3730 }, { "epoch": 0.68, "learning_rate": 5.722305491698219e-05, "loss": 1.266, "step": 3735 }, { "epoch": 0.68, "learning_rate": 5.693747538453229e-05, "loss": 1.2995, "step": 3740 }, { "epoch": 0.68, "learning_rate": 5.665232635460971e-05, "loss": 1.2705, "step": 3745 }, { "epoch": 0.68, "learning_rate": 5.6367610677892177e-05, "loss": 1.2111, "step": 3750 }, { "epoch": 0.68, "learning_rate": 5.6083331200725074e-05, "loss": 1.2395, "step": 3755 }, { "epoch": 0.68, "learning_rate": 5.579949076509305e-05, "loss": 1.2338, "step": 3760 }, { "epoch": 0.68, "learning_rate": 5.5516092208591594e-05, "loss": 1.2674, "step": 3765 }, { "epoch": 0.68, "learning_rate": 5.5233138364398604e-05, "loss": 1.2674, "step": 3770 }, { "epoch": 0.68, "learning_rate": 5.495063206124619e-05, "loss": 1.2451, "step": 3775 }, { "epoch": 0.68, "learning_rate": 5.466857612339229e-05, "loss": 1.2155, "step": 3780 }, { "epoch": 0.69, "learning_rate": 5.4386973370592485e-05, "loss": 1.2747, "step": 3785 }, { "epoch": 0.69, "learning_rate": 5.410582661807182e-05, "loss": 1.2164, "step": 3790 }, { "epoch": 0.69, "learning_rate": 5.382513867649663e-05, "loss": 1.2605, "step": 3795 }, { "epoch": 0.69, "learning_rate": 5.354491235194635e-05, "loss": 1.2679, "step": 3800 }, { "epoch": 0.69, "learning_rate": 5.32651504458857e-05, "loss": 1.2327, "step": 3805 }, { "epoch": 0.69, "learning_rate": 5.298585575513648e-05, "loss": 1.287, "step": 3810 }, { "epoch": 0.69, "learning_rate": 5.2707031071849644e-05, "loss": 1.2592, "step": 3815 }, { "epoch": 0.69, "learning_rate": 5.2428679183477505e-05, "loss": 1.2454, "step": 3820 }, { "epoch": 0.69, "learning_rate": 5.215080287274561e-05, "loss": 1.2447, "step": 3825 }, { "epoch": 0.69, "learning_rate": 5.18734049176252e-05, "loss": 1.2778, "step": 3830 }, { "epoch": 0.69, "learning_rate": 5.159648809130534e-05, "loss": 1.2305, "step": 3835 }, { "epoch": 0.7, "learning_rate": 5.1320055162165115e-05, "loss": 1.2759, "step": 3840 }, { "epoch": 0.7, "learning_rate": 5.104410889374611e-05, "loss": 1.2292, "step": 3845 }, { "epoch": 0.7, "learning_rate": 5.076865204472454e-05, "loss": 1.282, "step": 3850 }, { "epoch": 0.7, "learning_rate": 5.0493687368883904e-05, "loss": 1.2767, "step": 3855 }, { "epoch": 0.7, "learning_rate": 5.021921761508739e-05, "loss": 1.2544, "step": 3860 }, { "epoch": 0.7, "learning_rate": 4.994524552725036e-05, "loss": 1.2261, "step": 3865 }, { "epoch": 0.7, "learning_rate": 4.967177384431293e-05, "loss": 1.2189, "step": 3870 }, { "epoch": 0.7, "learning_rate": 4.939880530021263e-05, "loss": 1.2411, "step": 3875 }, { "epoch": 0.7, "learning_rate": 4.912634262385695e-05, "loss": 1.235, "step": 3880 }, { "epoch": 0.7, "learning_rate": 4.8854388539096205e-05, "loss": 1.2501, "step": 3885 }, { "epoch": 0.7, "learning_rate": 4.8582945764696244e-05, "loss": 1.2903, "step": 3890 }, { "epoch": 0.71, "learning_rate": 4.831201701431124e-05, "loss": 1.2874, "step": 3895 }, { "epoch": 0.71, "learning_rate": 4.804160499645667e-05, "loss": 1.2588, "step": 3900 }, { "epoch": 0.71, "learning_rate": 4.7771712414482015e-05, "loss": 1.34, "step": 3905 }, { "epoch": 0.71, "learning_rate": 4.7502341966544e-05, "loss": 1.2294, "step": 3910 }, { "epoch": 0.71, "learning_rate": 4.7233496345579444e-05, "loss": 1.2135, "step": 3915 }, { "epoch": 0.71, "learning_rate": 4.696517823927842e-05, "loss": 1.2082, "step": 3920 }, { "epoch": 0.71, "learning_rate": 4.6697390330057335e-05, "loss": 1.2583, "step": 3925 }, { "epoch": 0.71, "learning_rate": 4.6430135295032184e-05, "loss": 1.2368, "step": 3930 }, { "epoch": 0.71, "learning_rate": 4.6163415805991626e-05, "loss": 1.2219, "step": 3935 }, { "epoch": 0.71, "learning_rate": 4.589723452937049e-05, "loss": 1.2668, "step": 3940 }, { "epoch": 0.71, "learning_rate": 4.5631594126222995e-05, "loss": 1.2654, "step": 3945 }, { "epoch": 0.72, "learning_rate": 4.536649725219615e-05, "loss": 1.2436, "step": 3950 }, { "epoch": 0.72, "learning_rate": 4.510194655750326e-05, "loss": 1.2278, "step": 3955 }, { "epoch": 0.72, "learning_rate": 4.483794468689728e-05, "loss": 1.2904, "step": 3960 }, { "epoch": 0.72, "learning_rate": 4.457449427964463e-05, "loss": 1.277, "step": 3965 }, { "epoch": 0.72, "learning_rate": 4.431159796949862e-05, "loss": 1.2656, "step": 3970 }, { "epoch": 0.72, "learning_rate": 4.4049258384673085e-05, "loss": 1.2845, "step": 3975 }, { "epoch": 0.72, "learning_rate": 4.3787478147816296e-05, "loss": 1.2586, "step": 3980 }, { "epoch": 0.72, "learning_rate": 4.352625987598467e-05, "loss": 1.2354, "step": 3985 }, { "epoch": 0.72, "learning_rate": 4.326560618061639e-05, "loss": 1.2829, "step": 3990 }, { "epoch": 0.72, "learning_rate": 4.3005519667505675e-05, "loss": 1.2248, "step": 3995 }, { "epoch": 0.72, "learning_rate": 4.274600293677647e-05, "loss": 1.2534, "step": 4000 }, { "epoch": 0.73, "learning_rate": 4.248705858285649e-05, "loss": 1.2016, "step": 4005 }, { "epoch": 0.73, "learning_rate": 4.222868919445139e-05, "loss": 1.2276, "step": 4010 }, { "epoch": 0.73, "learning_rate": 4.197089735451868e-05, "loss": 1.2504, "step": 4015 }, { "epoch": 0.73, "learning_rate": 4.1713685640242165e-05, "loss": 1.2469, "step": 4020 }, { "epoch": 0.73, "learning_rate": 4.145705662300595e-05, "loss": 1.2582, "step": 4025 }, { "epoch": 0.73, "learning_rate": 4.1201012868368915e-05, "loss": 1.2125, "step": 4030 }, { "epoch": 0.73, "learning_rate": 4.094555693603891e-05, "loss": 1.2324, "step": 4035 }, { "epoch": 0.73, "learning_rate": 4.069069137984731e-05, "loss": 1.3027, "step": 4040 }, { "epoch": 0.73, "learning_rate": 4.0436418747723295e-05, "loss": 1.2772, "step": 4045 }, { "epoch": 0.73, "learning_rate": 4.0182741581668593e-05, "loss": 1.2269, "step": 4050 }, { "epoch": 0.73, "learning_rate": 3.992966241773194e-05, "loss": 1.2746, "step": 4055 }, { "epoch": 0.74, "learning_rate": 3.967718378598376e-05, "loss": 1.2536, "step": 4060 }, { "epoch": 0.74, "learning_rate": 3.9425308210490905e-05, "loss": 1.2511, "step": 4065 }, { "epoch": 0.74, "learning_rate": 3.917403820929126e-05, "loss": 1.2734, "step": 4070 }, { "epoch": 0.74, "learning_rate": 3.8923376294368806e-05, "loss": 1.2622, "step": 4075 }, { "epoch": 0.74, "learning_rate": 3.8673324971628357e-05, "loss": 1.3099, "step": 4080 }, { "epoch": 0.74, "learning_rate": 3.8423886740870566e-05, "loss": 1.2349, "step": 4085 }, { "epoch": 0.74, "learning_rate": 3.817506409576691e-05, "loss": 1.2152, "step": 4090 }, { "epoch": 0.74, "learning_rate": 3.7926859523834725e-05, "loss": 1.2543, "step": 4095 }, { "epoch": 0.74, "learning_rate": 3.767927550641237e-05, "loss": 1.2365, "step": 4100 }, { "epoch": 0.74, "learning_rate": 3.743231451863448e-05, "loss": 1.2476, "step": 4105 }, { "epoch": 0.74, "learning_rate": 3.718597902940717e-05, "loss": 1.2462, "step": 4110 }, { "epoch": 0.75, "learning_rate": 3.694027150138331e-05, "loss": 1.2745, "step": 4115 }, { "epoch": 0.75, "learning_rate": 3.669519439093801e-05, "loss": 1.2563, "step": 4120 }, { "epoch": 0.75, "learning_rate": 3.6450750148143884e-05, "loss": 1.2764, "step": 4125 }, { "epoch": 0.75, "learning_rate": 3.620694121674679e-05, "loss": 1.2162, "step": 4130 }, { "epoch": 0.75, "learning_rate": 3.596377003414124e-05, "loss": 1.2474, "step": 4135 }, { "epoch": 0.75, "learning_rate": 3.5721239031346066e-05, "loss": 1.2109, "step": 4140 }, { "epoch": 0.75, "learning_rate": 3.547935063298018e-05, "loss": 1.2179, "step": 4145 }, { "epoch": 0.75, "learning_rate": 3.523810725723816e-05, "loss": 1.244, "step": 4150 }, { "epoch": 0.75, "learning_rate": 3.4997511315866306e-05, "loss": 1.2279, "step": 4155 }, { "epoch": 0.75, "learning_rate": 3.475756521413839e-05, "loss": 1.2368, "step": 4160 }, { "epoch": 0.75, "learning_rate": 3.4518271350831647e-05, "loss": 1.3006, "step": 4165 }, { "epoch": 0.76, "learning_rate": 3.427963211820274e-05, "loss": 1.2726, "step": 4170 }, { "epoch": 0.76, "learning_rate": 3.4041649901964e-05, "loss": 1.3108, "step": 4175 }, { "epoch": 0.76, "learning_rate": 3.3804327081259304e-05, "loss": 1.2491, "step": 4180 }, { "epoch": 0.76, "learning_rate": 3.356766602864056e-05, "loss": 1.2916, "step": 4185 }, { "epoch": 0.76, "learning_rate": 3.33316691100439e-05, "loss": 1.2198, "step": 4190 }, { "epoch": 0.76, "learning_rate": 3.309633868476594e-05, "loss": 1.2532, "step": 4195 }, { "epoch": 0.76, "learning_rate": 3.2861677105440336e-05, "loss": 1.2575, "step": 4200 }, { "epoch": 0.76, "learning_rate": 3.262768671801407e-05, "loss": 1.2602, "step": 4205 }, { "epoch": 0.76, "learning_rate": 3.239436986172425e-05, "loss": 1.2675, "step": 4210 }, { "epoch": 0.76, "learning_rate": 3.216172886907452e-05, "loss": 1.2613, "step": 4215 }, { "epoch": 0.76, "learning_rate": 3.192976606581186e-05, "loss": 1.2542, "step": 4220 }, { "epoch": 0.77, "learning_rate": 3.1698483770903207e-05, "loss": 1.2262, "step": 4225 }, { "epoch": 0.77, "learning_rate": 3.146788429651246e-05, "loss": 1.2854, "step": 4230 }, { "epoch": 0.77, "learning_rate": 3.1237969947977153e-05, "loss": 1.2374, "step": 4235 }, { "epoch": 0.77, "learning_rate": 3.100874302378559e-05, "loss": 1.2252, "step": 4240 }, { "epoch": 0.77, "learning_rate": 3.078020581555376e-05, "loss": 1.2981, "step": 4245 }, { "epoch": 0.77, "learning_rate": 3.055236060800247e-05, "loss": 1.268, "step": 4250 }, { "epoch": 0.77, "learning_rate": 3.032520967893453e-05, "loss": 1.2497, "step": 4255 }, { "epoch": 0.77, "learning_rate": 3.009875529921181e-05, "loss": 1.3, "step": 4260 }, { "epoch": 0.77, "learning_rate": 2.987299973273282e-05, "loss": 1.2536, "step": 4265 }, { "epoch": 0.77, "learning_rate": 2.9647945236409848e-05, "loss": 1.2717, "step": 4270 }, { "epoch": 0.77, "learning_rate": 2.942359406014652e-05, "loss": 1.2778, "step": 4275 }, { "epoch": 0.78, "learning_rate": 2.919994844681524e-05, "loss": 1.247, "step": 4280 }, { "epoch": 0.78, "learning_rate": 2.8977010632234826e-05, "loss": 1.2549, "step": 4285 }, { "epoch": 0.78, "learning_rate": 2.8754782845148043e-05, "loss": 1.257, "step": 4290 }, { "epoch": 0.78, "learning_rate": 2.8533267307199497e-05, "loss": 1.2337, "step": 4295 }, { "epoch": 0.78, "learning_rate": 2.8312466232913282e-05, "loss": 1.2896, "step": 4300 }, { "epoch": 0.78, "learning_rate": 2.809238182967092e-05, "loss": 1.264, "step": 4305 }, { "epoch": 0.78, "learning_rate": 2.7873016297689268e-05, "loss": 1.2336, "step": 4310 }, { "epoch": 0.78, "learning_rate": 2.765437182999846e-05, "loss": 1.205, "step": 4315 }, { "epoch": 0.78, "learning_rate": 2.7436450612420095e-05, "loss": 1.2147, "step": 4320 }, { "epoch": 0.78, "learning_rate": 2.7219254823545336e-05, "loss": 1.2887, "step": 4325 }, { "epoch": 0.78, "learning_rate": 2.7002786634713094e-05, "loss": 1.2237, "step": 4330 }, { "epoch": 0.79, "learning_rate": 2.678704820998842e-05, "loss": 1.2635, "step": 4335 }, { "epoch": 0.79, "learning_rate": 2.6572041706140683e-05, "loss": 1.2738, "step": 4340 }, { "epoch": 0.79, "learning_rate": 2.635776927262227e-05, "loss": 1.24, "step": 4345 }, { "epoch": 0.79, "learning_rate": 2.6144233051546796e-05, "loss": 1.2806, "step": 4350 }, { "epoch": 0.79, "learning_rate": 2.5931435177668006e-05, "loss": 1.2598, "step": 4355 }, { "epoch": 0.79, "learning_rate": 2.57193777783582e-05, "loss": 1.1954, "step": 4360 }, { "epoch": 0.79, "learning_rate": 2.5508062973587076e-05, "loss": 1.2031, "step": 4365 }, { "epoch": 0.79, "learning_rate": 2.529749287590042e-05, "loss": 1.2506, "step": 4370 }, { "epoch": 0.79, "learning_rate": 2.5087669590399178e-05, "loss": 1.225, "step": 4375 }, { "epoch": 0.79, "learning_rate": 2.4878595214718236e-05, "loss": 1.2695, "step": 4380 }, { "epoch": 0.79, "learning_rate": 2.4670271839005542e-05, "loss": 1.2714, "step": 4385 }, { "epoch": 0.8, "learning_rate": 2.446270154590117e-05, "loss": 1.2735, "step": 4390 }, { "epoch": 0.8, "learning_rate": 2.425588641051656e-05, "loss": 1.2415, "step": 4395 }, { "epoch": 0.8, "learning_rate": 2.404982850041363e-05, "loss": 1.271, "step": 4400 }, { "epoch": 0.8, "learning_rate": 2.3844529875584278e-05, "loss": 1.2445, "step": 4405 }, { "epoch": 0.8, "learning_rate": 2.3639992588429705e-05, "loss": 1.2269, "step": 4410 }, { "epoch": 0.8, "learning_rate": 2.3436218683739896e-05, "loss": 1.2577, "step": 4415 }, { "epoch": 0.8, "learning_rate": 2.3233210198673218e-05, "loss": 1.2917, "step": 4420 }, { "epoch": 0.8, "learning_rate": 2.3030969162735926e-05, "loss": 1.2436, "step": 4425 }, { "epoch": 0.8, "learning_rate": 2.282949759776206e-05, "loss": 1.2601, "step": 4430 }, { "epoch": 0.8, "learning_rate": 2.262879751789314e-05, "loss": 1.308, "step": 4435 }, { "epoch": 0.8, "learning_rate": 2.242887092955801e-05, "loss": 1.2468, "step": 4440 }, { "epoch": 0.81, "learning_rate": 2.2229719831452823e-05, "loss": 1.2763, "step": 4445 }, { "epoch": 0.81, "learning_rate": 2.2031346214520966e-05, "loss": 1.2405, "step": 4450 }, { "epoch": 0.81, "learning_rate": 2.183375206193331e-05, "loss": 1.2632, "step": 4455 }, { "epoch": 0.81, "learning_rate": 2.1636939349068308e-05, "loss": 1.2975, "step": 4460 }, { "epoch": 0.81, "learning_rate": 2.1440910043492212e-05, "loss": 1.2662, "step": 4465 }, { "epoch": 0.81, "learning_rate": 2.12456661049394e-05, "loss": 1.2363, "step": 4470 }, { "epoch": 0.81, "learning_rate": 2.105120948529291e-05, "loss": 1.2217, "step": 4475 }, { "epoch": 0.81, "learning_rate": 2.085754212856471e-05, "loss": 1.2094, "step": 4480 }, { "epoch": 0.81, "learning_rate": 2.0664665970876496e-05, "loss": 1.2848, "step": 4485 }, { "epoch": 0.81, "learning_rate": 2.04725829404402e-05, "loss": 1.2706, "step": 4490 }, { "epoch": 0.81, "learning_rate": 2.028129495753871e-05, "loss": 1.2804, "step": 4495 }, { "epoch": 0.82, "learning_rate": 2.0090803934506764e-05, "loss": 1.2642, "step": 4500 }, { "epoch": 0.82, "learning_rate": 1.9901111775711677e-05, "loss": 1.2477, "step": 4505 }, { "epoch": 0.82, "learning_rate": 1.9712220377534496e-05, "loss": 1.2972, "step": 4510 }, { "epoch": 0.82, "learning_rate": 1.9524131628350883e-05, "loss": 1.2812, "step": 4515 }, { "epoch": 0.82, "learning_rate": 1.9336847408512328e-05, "loss": 1.2165, "step": 4520 }, { "epoch": 0.82, "learning_rate": 1.915036959032732e-05, "loss": 1.2503, "step": 4525 }, { "epoch": 0.82, "learning_rate": 1.8964700038042626e-05, "loss": 1.2767, "step": 4530 }, { "epoch": 0.82, "learning_rate": 1.8779840607824618e-05, "loss": 1.2897, "step": 4535 }, { "epoch": 0.82, "learning_rate": 1.859579314774079e-05, "loss": 1.1977, "step": 4540 }, { "epoch": 0.82, "learning_rate": 1.8412559497741278e-05, "loss": 1.2283, "step": 4545 }, { "epoch": 0.82, "learning_rate": 1.8230141489640394e-05, "loss": 1.2615, "step": 4550 }, { "epoch": 0.83, "learning_rate": 1.804854094709838e-05, "loss": 1.2232, "step": 4555 }, { "epoch": 0.83, "learning_rate": 1.7867759685603114e-05, "loss": 1.2341, "step": 4560 }, { "epoch": 0.83, "learning_rate": 1.768779951245202e-05, "loss": 1.2484, "step": 4565 }, { "epoch": 0.83, "learning_rate": 1.7508662226734006e-05, "loss": 1.255, "step": 4570 }, { "epoch": 0.83, "learning_rate": 1.7330349619311415e-05, "loss": 1.2868, "step": 4575 }, { "epoch": 0.83, "learning_rate": 1.7152863472802195e-05, "loss": 1.2346, "step": 4580 }, { "epoch": 0.83, "learning_rate": 1.6976205561561975e-05, "loss": 1.2512, "step": 4585 }, { "epoch": 0.83, "learning_rate": 1.6800377651666465e-05, "loss": 1.2173, "step": 4590 }, { "epoch": 0.83, "learning_rate": 1.6625381500893655e-05, "loss": 1.2773, "step": 4595 }, { "epoch": 0.83, "learning_rate": 1.6451218858706374e-05, "loss": 1.2677, "step": 4600 }, { "epoch": 0.83, "learning_rate": 1.6277891466234708e-05, "loss": 1.2503, "step": 4605 }, { "epoch": 0.84, "learning_rate": 1.6105401056258674e-05, "loss": 1.2477, "step": 4610 }, { "epoch": 0.84, "learning_rate": 1.5933749353190764e-05, "loss": 1.2885, "step": 4615 }, { "epoch": 0.84, "learning_rate": 1.5762938073058853e-05, "loss": 1.2545, "step": 4620 }, { "epoch": 0.84, "learning_rate": 1.559296892348897e-05, "loss": 1.3032, "step": 4625 }, { "epoch": 0.84, "learning_rate": 1.5423843603688236e-05, "loss": 1.2375, "step": 4630 }, { "epoch": 0.84, "learning_rate": 1.5255563804427885e-05, "loss": 1.2736, "step": 4635 }, { "epoch": 0.84, "learning_rate": 1.5088131208026367e-05, "loss": 1.2613, "step": 4640 }, { "epoch": 0.84, "learning_rate": 1.4921547488332454e-05, "loss": 1.2215, "step": 4645 }, { "epoch": 0.84, "learning_rate": 1.475581431070865e-05, "loss": 1.2022, "step": 4650 }, { "epoch": 0.84, "learning_rate": 1.4590933332014401e-05, "loss": 1.242, "step": 4655 }, { "epoch": 0.84, "learning_rate": 1.442690620058964e-05, "loss": 1.2596, "step": 4660 }, { "epoch": 0.85, "learning_rate": 1.4263734556238263e-05, "loss": 1.2308, "step": 4665 }, { "epoch": 0.85, "learning_rate": 1.4101420030211654e-05, "loss": 1.2559, "step": 4670 }, { "epoch": 0.85, "learning_rate": 1.3939964245192538e-05, "loss": 1.2474, "step": 4675 }, { "epoch": 0.85, "learning_rate": 1.3779368815278647e-05, "loss": 1.2696, "step": 4680 }, { "epoch": 0.85, "learning_rate": 1.3619635345966641e-05, "loss": 1.2504, "step": 4685 }, { "epoch": 0.85, "learning_rate": 1.3460765434136003e-05, "loss": 1.2421, "step": 4690 }, { "epoch": 0.85, "learning_rate": 1.3302760668033076e-05, "loss": 1.2001, "step": 4695 }, { "epoch": 0.85, "learning_rate": 1.314562262725526e-05, "loss": 1.2147, "step": 4700 }, { "epoch": 0.85, "learning_rate": 1.298935288273515e-05, "loss": 1.2805, "step": 4705 }, { "epoch": 0.85, "learning_rate": 1.2833952996724863e-05, "loss": 1.257, "step": 4710 }, { "epoch": 0.85, "learning_rate": 1.2679424522780426e-05, "loss": 1.2333, "step": 4715 }, { "epoch": 0.86, "learning_rate": 1.252576900574618e-05, "loss": 1.2537, "step": 4720 }, { "epoch": 0.86, "learning_rate": 1.2372987981739393e-05, "loss": 1.2611, "step": 4725 }, { "epoch": 0.86, "learning_rate": 1.2221082978134935e-05, "loss": 1.2067, "step": 4730 }, { "epoch": 0.86, "learning_rate": 1.2070055513549938e-05, "loss": 1.2493, "step": 4735 }, { "epoch": 0.86, "learning_rate": 1.1919907097828653e-05, "loss": 1.2574, "step": 4740 }, { "epoch": 0.86, "learning_rate": 1.1770639232027358e-05, "loss": 1.2496, "step": 4745 }, { "epoch": 0.86, "learning_rate": 1.1622253408399341e-05, "loss": 1.2225, "step": 4750 }, { "epoch": 0.86, "learning_rate": 1.1474751110379933e-05, "loss": 1.2766, "step": 4755 }, { "epoch": 0.86, "learning_rate": 1.1328133812571784e-05, "loss": 1.256, "step": 4760 }, { "epoch": 0.86, "learning_rate": 1.1182402980730044e-05, "loss": 1.2526, "step": 4765 }, { "epoch": 0.86, "learning_rate": 1.1037560071747732e-05, "loss": 1.218, "step": 4770 }, { "epoch": 0.86, "learning_rate": 1.0893606533641187e-05, "loss": 1.2296, "step": 4775 }, { "epoch": 0.87, "learning_rate": 1.0750543805535518e-05, "loss": 1.2528, "step": 4780 }, { "epoch": 0.87, "learning_rate": 1.0608373317650323e-05, "loss": 1.2522, "step": 4785 }, { "epoch": 0.87, "learning_rate": 1.0467096491285333e-05, "loss": 1.2501, "step": 4790 }, { "epoch": 0.87, "learning_rate": 1.0326714738806198e-05, "loss": 1.2425, "step": 4795 }, { "epoch": 0.87, "learning_rate": 1.01872294636304e-05, "loss": 1.2714, "step": 4800 }, { "epoch": 0.87, "learning_rate": 1.0048642060213154e-05, "loss": 1.2768, "step": 4805 }, { "epoch": 0.87, "learning_rate": 9.910953914033572e-06, "loss": 1.2464, "step": 4810 }, { "epoch": 0.87, "learning_rate": 9.774166401580732e-06, "loss": 1.2267, "step": 4815 }, { "epoch": 0.87, "learning_rate": 9.638280890339945e-06, "loss": 1.2463, "step": 4820 }, { "epoch": 0.87, "learning_rate": 9.503298738779098e-06, "loss": 1.2651, "step": 4825 }, { "epoch": 0.87, "learning_rate": 9.369221296335006e-06, "loss": 1.2172, "step": 4830 }, { "epoch": 0.88, "learning_rate": 9.236049903400012e-06, "loss": 1.2365, "step": 4835 }, { "epoch": 0.88, "learning_rate": 9.103785891308547e-06, "loss": 1.2667, "step": 4840 }, { "epoch": 0.88, "learning_rate": 8.972430582323787e-06, "loss": 1.2578, "step": 4845 }, { "epoch": 0.88, "learning_rate": 8.84198528962451e-06, "loss": 1.1834, "step": 4850 }, { "epoch": 0.88, "learning_rate": 8.712451317291902e-06, "loss": 1.2584, "step": 4855 }, { "epoch": 0.88, "learning_rate": 8.58382996029652e-06, "loss": 1.284, "step": 4860 }, { "epoch": 0.88, "learning_rate": 8.456122504485397e-06, "loss": 1.2217, "step": 4865 }, { "epoch": 0.88, "learning_rate": 8.329330226569166e-06, "loss": 1.227, "step": 4870 }, { "epoch": 0.88, "learning_rate": 8.203454394109266e-06, "loss": 1.2694, "step": 4875 }, { "epoch": 0.88, "learning_rate": 8.07849626550531e-06, "loss": 1.2169, "step": 4880 }, { "epoch": 0.88, "learning_rate": 7.954457089982492e-06, "loss": 1.2436, "step": 4885 }, { "epoch": 0.89, "learning_rate": 7.831338107579056e-06, "loss": 1.2592, "step": 4890 }, { "epoch": 0.89, "learning_rate": 7.709140549133975e-06, "loss": 1.2316, "step": 4895 }, { "epoch": 0.89, "learning_rate": 7.587865636274594e-06, "loss": 1.2511, "step": 4900 }, { "epoch": 0.89, "learning_rate": 7.467514581404444e-06, "loss": 1.2475, "step": 4905 }, { "epoch": 0.89, "learning_rate": 7.3480885876911156e-06, "loss": 1.2627, "step": 4910 }, { "epoch": 0.89, "learning_rate": 7.229588849054158e-06, "loss": 1.2263, "step": 4915 }, { "epoch": 0.89, "learning_rate": 7.1120165501533e-06, "loss": 1.259, "step": 4920 }, { "epoch": 0.89, "learning_rate": 6.995372866376459e-06, "loss": 1.222, "step": 4925 }, { "epoch": 0.89, "learning_rate": 6.879658963828062e-06, "loss": 1.2719, "step": 4930 }, { "epoch": 0.89, "learning_rate": 6.7648759993174104e-06, "loss": 1.2065, "step": 4935 }, { "epoch": 0.89, "learning_rate": 6.651025120346988e-06, "loss": 1.2355, "step": 4940 }, { "epoch": 0.9, "learning_rate": 6.538107465101162e-06, "loss": 1.2374, "step": 4945 }, { "epoch": 0.9, "learning_rate": 6.426124162434688e-06, "loss": 1.2555, "step": 4950 }, { "epoch": 0.9, "learning_rate": 6.3150763318614695e-06, "loss": 1.2187, "step": 4955 }, { "epoch": 0.9, "learning_rate": 6.204965083543368e-06, "loss": 1.2505, "step": 4960 }, { "epoch": 0.9, "learning_rate": 6.095791518279059e-06, "loss": 1.2313, "step": 4965 }, { "epoch": 0.9, "learning_rate": 5.987556727493049e-06, "loss": 1.216, "step": 4970 }, { "epoch": 0.9, "learning_rate": 5.880261793224828e-06, "loss": 1.2232, "step": 4975 }, { "epoch": 0.9, "learning_rate": 5.77390778811796e-06, "loss": 1.2839, "step": 4980 }, { "epoch": 0.9, "learning_rate": 5.6684957754094105e-06, "loss": 1.2096, "step": 4985 }, { "epoch": 0.9, "learning_rate": 5.564026808918921e-06, "loss": 1.2564, "step": 4990 }, { "epoch": 0.9, "learning_rate": 5.460501933038442e-06, "loss": 1.2488, "step": 4995 }, { "epoch": 0.91, "learning_rate": 5.357922182721687e-06, "loss": 1.2381, "step": 5000 }, { "epoch": 0.91, "learning_rate": 5.256288583473834e-06, "loss": 1.2258, "step": 5005 }, { "epoch": 0.91, "learning_rate": 5.1556021513412544e-06, "loss": 1.1936, "step": 5010 }, { "epoch": 0.91, "learning_rate": 5.0558638929013354e-06, "loss": 1.2509, "step": 5015 }, { "epoch": 0.91, "learning_rate": 4.957074805252437e-06, "loss": 1.2375, "step": 5020 }, { "epoch": 0.91, "learning_rate": 4.859235876003876e-06, "loss": 1.2556, "step": 5025 }, { "epoch": 0.91, "learning_rate": 4.762348083266144e-06, "loss": 1.2136, "step": 5030 }, { "epoch": 0.91, "learning_rate": 4.666412395641062e-06, "loss": 1.2863, "step": 5035 }, { "epoch": 0.91, "learning_rate": 4.5714297722121106e-06, "loss": 1.243, "step": 5040 }, { "epoch": 0.91, "learning_rate": 4.477401162534856e-06, "loss": 1.2579, "step": 5045 }, { "epoch": 0.91, "learning_rate": 4.384327506627395e-06, "loss": 1.2646, "step": 5050 }, { "epoch": 0.92, "learning_rate": 4.2922097349610835e-06, "loss": 1.2615, "step": 5055 }, { "epoch": 0.92, "learning_rate": 4.20104876845111e-06, "loss": 1.2481, "step": 5060 }, { "epoch": 0.92, "learning_rate": 4.110845518447348e-06, "loss": 1.2763, "step": 5065 }, { "epoch": 0.92, "learning_rate": 4.021600886725263e-06, "loss": 1.2249, "step": 5070 }, { "epoch": 0.92, "learning_rate": 3.933315765476808e-06, "loss": 1.2437, "step": 5075 }, { "epoch": 0.92, "learning_rate": 3.84599103730161e-06, "loss": 1.2199, "step": 5080 }, { "epoch": 0.92, "learning_rate": 3.75962757519811e-06, "loss": 1.2349, "step": 5085 }, { "epoch": 0.92, "learning_rate": 3.6742262425548125e-06, "loss": 1.2319, "step": 5090 }, { "epoch": 0.92, "learning_rate": 3.5897878931416497e-06, "loss": 1.2168, "step": 5095 }, { "epoch": 0.92, "learning_rate": 3.5063133711014882e-06, "loss": 1.2151, "step": 5100 }, { "epoch": 0.92, "learning_rate": 3.4238035109416922e-06, "loss": 1.2785, "step": 5105 }, { "epoch": 0.93, "learning_rate": 3.342259137525694e-06, "loss": 1.2178, "step": 5110 }, { "epoch": 0.93, "learning_rate": 3.2616810660648588e-06, "loss": 1.2742, "step": 5115 }, { "epoch": 0.93, "learning_rate": 3.182070102110257e-06, "loss": 1.2287, "step": 5120 }, { "epoch": 0.93, "learning_rate": 3.103427041544682e-06, "loss": 1.2327, "step": 5125 }, { "epoch": 0.93, "learning_rate": 3.025752670574622e-06, "loss": 1.2371, "step": 5130 }, { "epoch": 0.93, "learning_rate": 2.9490477657224014e-06, "loss": 1.2718, "step": 5135 }, { "epoch": 0.93, "learning_rate": 2.873313093818486e-06, "loss": 1.2082, "step": 5140 }, { "epoch": 0.93, "learning_rate": 2.798549411993789e-06, "loss": 1.1962, "step": 5145 }, { "epoch": 0.93, "learning_rate": 2.7247574676720454e-06, "loss": 1.2537, "step": 5150 }, { "epoch": 0.93, "learning_rate": 2.651937998562437e-06, "loss": 1.2331, "step": 5155 }, { "epoch": 0.93, "learning_rate": 2.580091732652101e-06, "loss": 1.2437, "step": 5160 }, { "epoch": 0.94, "learning_rate": 2.509219388198958e-06, "loss": 1.2562, "step": 5165 }, { "epoch": 0.94, "learning_rate": 2.439321673724504e-06, "loss": 1.2432, "step": 5170 }, { "epoch": 0.94, "learning_rate": 2.3703992880066638e-06, "loss": 1.2832, "step": 5175 }, { "epoch": 0.94, "learning_rate": 2.302452920072895e-06, "loss": 1.2662, "step": 5180 }, { "epoch": 0.94, "learning_rate": 2.2354832491932486e-06, "loss": 1.2605, "step": 5185 }, { "epoch": 0.94, "learning_rate": 2.1694909448735645e-06, "loss": 1.2612, "step": 5190 }, { "epoch": 0.94, "learning_rate": 2.1044766668488424e-06, "loss": 1.2727, "step": 5195 }, { "epoch": 0.94, "learning_rate": 2.0404410650765817e-06, "loss": 1.2572, "step": 5200 }, { "epoch": 0.94, "learning_rate": 1.977384779730307e-06, "loss": 1.2633, "step": 5205 }, { "epoch": 0.94, "learning_rate": 1.9153084411931863e-06, "loss": 1.229, "step": 5210 }, { "epoch": 0.94, "learning_rate": 1.8542126700516804e-06, "loss": 1.2002, "step": 5215 }, { "epoch": 0.95, "learning_rate": 1.7940980770894122e-06, "loss": 1.2471, "step": 5220 }, { "epoch": 0.95, "learning_rate": 1.7349652632809744e-06, "loss": 1.257, "step": 5225 }, { "epoch": 0.95, "learning_rate": 1.6768148197860212e-06, "loss": 1.2562, "step": 5230 }, { "epoch": 0.95, "learning_rate": 1.6196473279432412e-06, "loss": 1.2322, "step": 5235 }, { "epoch": 0.95, "learning_rate": 1.5634633592646609e-06, "loss": 1.225, "step": 5240 }, { "epoch": 0.95, "learning_rate": 1.5082634754298385e-06, "loss": 1.2315, "step": 5245 }, { "epoch": 0.95, "learning_rate": 1.4540482282803137e-06, "loss": 1.2741, "step": 5250 }, { "epoch": 0.95, "learning_rate": 1.4008181598140547e-06, "loss": 1.2291, "step": 5255 }, { "epoch": 0.95, "learning_rate": 1.348573802180053e-06, "loss": 1.2508, "step": 5260 }, { "epoch": 0.95, "learning_rate": 1.2973156776729944e-06, "loss": 1.2438, "step": 5265 }, { "epoch": 0.95, "learning_rate": 1.2470442987280617e-06, "loss": 1.2337, "step": 5270 }, { "epoch": 0.96, "learning_rate": 1.1977601679157625e-06, "loss": 1.2373, "step": 5275 }, { "epoch": 0.96, "learning_rate": 1.1494637779369766e-06, "loss": 1.2654, "step": 5280 }, { "epoch": 0.96, "learning_rate": 1.10215561161795e-06, "loss": 1.2273, "step": 5285 }, { "epoch": 0.96, "learning_rate": 1.055836141905553e-06, "loss": 1.2503, "step": 5290 }, { "epoch": 0.96, "learning_rate": 1.0105058318624517e-06, "loss": 1.2419, "step": 5295 }, { "epoch": 0.96, "learning_rate": 9.661651346625889e-07, "loss": 1.2124, "step": 5300 }, { "epoch": 0.96, "learning_rate": 9.228144935865657e-07, "loss": 1.276, "step": 5305 }, { "epoch": 0.96, "learning_rate": 8.804543420172562e-07, "loss": 1.1957, "step": 5310 }, { "epoch": 0.96, "learning_rate": 8.390851034354552e-07, "loss": 1.2669, "step": 5315 }, { "epoch": 0.96, "learning_rate": 7.987071914156596e-07, "loss": 1.2554, "step": 5320 }, { "epoch": 0.96, "learning_rate": 7.593210096219161e-07, "loss": 1.2942, "step": 5325 }, { "epoch": 0.97, "learning_rate": 7.20926951803802e-07, "loss": 1.2459, "step": 5330 }, { "epoch": 0.97, "learning_rate": 6.835254017924953e-07, "loss": 1.213, "step": 5335 }, { "epoch": 0.97, "learning_rate": 6.471167334968886e-07, "loss": 1.2581, "step": 5340 }, { "epoch": 0.97, "learning_rate": 6.117013108999037e-07, "loss": 1.2228, "step": 5345 }, { "epoch": 0.97, "learning_rate": 5.772794880548715e-07, "loss": 1.2393, "step": 5350 }, { "epoch": 0.97, "learning_rate": 5.438516090819024e-07, "loss": 1.2756, "step": 5355 }, { "epoch": 0.97, "learning_rate": 5.114180081645214e-07, "loss": 1.2465, "step": 5360 }, { "epoch": 0.97, "learning_rate": 4.799790095463164e-07, "loss": 1.2646, "step": 5365 }, { "epoch": 0.97, "learning_rate": 4.495349275276839e-07, "loss": 1.254, "step": 5370 }, { "epoch": 0.97, "learning_rate": 4.200860664626882e-07, "loss": 1.2278, "step": 5375 }, { "epoch": 0.97, "learning_rate": 3.9163272075599664e-07, "loss": 1.2163, "step": 5380 }, { "epoch": 0.98, "learning_rate": 3.641751748600042e-07, "loss": 1.2457, "step": 5385 }, { "epoch": 0.98, "learning_rate": 3.3771370327190246e-07, "loss": 1.2151, "step": 5390 }, { "epoch": 0.98, "learning_rate": 3.122485705310041e-07, "loss": 1.1919, "step": 5395 }, { "epoch": 0.98, "learning_rate": 2.877800312160783e-07, "loss": 1.2623, "step": 5400 }, { "epoch": 0.98, "learning_rate": 2.643083299427751e-07, "loss": 1.2373, "step": 5405 }, { "epoch": 0.98, "learning_rate": 2.4183370136121595e-07, "loss": 1.2287, "step": 5410 }, { "epoch": 0.98, "learning_rate": 2.2035637015365152e-07, "loss": 1.2602, "step": 5415 }, { "epoch": 0.98, "learning_rate": 1.9987655103217428e-07, "loss": 1.231, "step": 5420 }, { "epoch": 0.98, "learning_rate": 1.8039444873663158e-07, "loss": 1.1957, "step": 5425 }, { "epoch": 0.98, "learning_rate": 1.6191025803250492e-07, "loss": 1.2533, "step": 5430 }, { "epoch": 0.98, "learning_rate": 1.444241637090338e-07, "loss": 1.2552, "step": 5435 }, { "epoch": 0.99, "learning_rate": 1.2793634057732818e-07, "loss": 1.2871, "step": 5440 }, { "epoch": 0.99, "learning_rate": 1.1244695346864786e-07, "loss": 1.2085, "step": 5445 }, { "epoch": 0.99, "learning_rate": 9.795615723270369e-08, "loss": 1.2469, "step": 5450 }, { "epoch": 0.99, "learning_rate": 8.446409673615874e-08, "loss": 1.2277, "step": 5455 }, { "epoch": 0.99, "learning_rate": 7.197090686119623e-08, "loss": 1.2988, "step": 5460 }, { "epoch": 0.99, "learning_rate": 6.047671250408726e-08, "loss": 1.2706, "step": 5465 }, { "epoch": 0.99, "learning_rate": 4.998162857402511e-08, "loss": 1.2576, "step": 5470 }, { "epoch": 0.99, "learning_rate": 4.0485759991937264e-08, "loss": 1.2001, "step": 5475 }, { "epoch": 0.99, "learning_rate": 3.1989201689452967e-08, "loss": 1.2299, "step": 5480 }, { "epoch": 0.99, "learning_rate": 2.4492038607948353e-08, "loss": 1.2341, "step": 5485 }, { "epoch": 0.99, "learning_rate": 1.7994345697680547e-08, "loss": 1.2108, "step": 5490 }, { "epoch": 1.0, "learning_rate": 1.2496187917065972e-08, "loss": 1.2237, "step": 5495 }, { "epoch": 1.0, "learning_rate": 7.997620232014225e-09, "loss": 1.3129, "step": 5500 }, { "epoch": 1.0, "learning_rate": 4.498687615372976e-09, "loss": 1.1938, "step": 5505 }, { "epoch": 1.0, "learning_rate": 1.999425046506076e-09, "loss": 1.224, "step": 5510 }, { "epoch": 1.0, "learning_rate": 4.998575109160797e-10, "loss": 1.254, "step": 5515 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.2513, "step": 5520 }, { "epoch": 1.0, "eval_loss": 1.2233707904815674, "eval_runtime": 1795.568, "eval_samples_per_second": 16.333, "eval_steps_per_second": 1.361, "step": 5520 }, { "epoch": 1.0, "step": 5520, "total_flos": 3958946251407360.0, "train_loss": 1.269419441966043, "train_runtime": 32984.8618, "train_samples_per_second": 8.033, "train_steps_per_second": 0.167 } ], "logging_steps": 5, "max_steps": 5520, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 3958946251407360.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }