{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14381040036815462, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019920000000000002, "loss": 8.4557, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.0001984, "loss": 7.0381, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.0001976, "loss": 6.6591, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.0001968, "loss": 6.532, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.000196, "loss": 6.482, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.0001952, "loss": 6.2959, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.0001944, "loss": 6.2219, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.00019360000000000002, "loss": 6.0269, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.0001928, "loss": 5.8261, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.000192, "loss": 5.7313, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.0001912, "loss": 5.5915, "step": 110 }, { "epoch": 0.01, "learning_rate": 0.0001904, "loss": 5.5185, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.0001896, "loss": 5.4169, "step": 130 }, { "epoch": 0.01, "learning_rate": 0.0001888, "loss": 5.4288, "step": 140 }, { "epoch": 0.01, "learning_rate": 0.000188, "loss": 5.3205, "step": 150 }, { "epoch": 0.01, "learning_rate": 0.00018720000000000002, "loss": 5.3107, "step": 160 }, { "epoch": 0.01, "learning_rate": 0.00018640000000000003, "loss": 5.2239, "step": 170 }, { "epoch": 0.01, "learning_rate": 0.0001856, "loss": 5.1775, "step": 180 }, { "epoch": 0.01, "learning_rate": 0.00018480000000000002, "loss": 5.1681, "step": 190 }, { "epoch": 0.01, "learning_rate": 0.00018400000000000003, "loss": 5.1125, "step": 200 }, { "epoch": 0.01, "learning_rate": 0.0001832, "loss": 5.0995, "step": 210 }, { "epoch": 0.01, "learning_rate": 0.00018240000000000002, "loss": 4.992, "step": 220 }, { "epoch": 0.01, "learning_rate": 0.00018160000000000002, "loss": 4.9694, "step": 230 }, { "epoch": 0.01, "learning_rate": 0.0001808, "loss": 4.9517, "step": 240 }, { "epoch": 0.01, "learning_rate": 0.00018, "loss": 4.9058, "step": 250 }, { "epoch": 0.01, "learning_rate": 0.00017920000000000002, "loss": 4.9224, "step": 260 }, { "epoch": 0.02, "learning_rate": 0.0001784, "loss": 4.8916, "step": 270 }, { "epoch": 0.02, "learning_rate": 0.0001776, "loss": 4.8406, "step": 280 }, { "epoch": 0.02, "learning_rate": 0.00017680000000000001, "loss": 4.774, "step": 290 }, { "epoch": 0.02, "learning_rate": 0.00017600000000000002, "loss": 4.7396, "step": 300 }, { "epoch": 0.02, "learning_rate": 0.0001752, "loss": 4.7, "step": 310 }, { "epoch": 0.02, "learning_rate": 0.0001744, "loss": 4.7168, "step": 320 }, { "epoch": 0.02, "learning_rate": 0.00017360000000000002, "loss": 4.7133, "step": 330 }, { "epoch": 0.02, "learning_rate": 0.0001728, "loss": 4.7343, "step": 340 }, { "epoch": 0.02, "learning_rate": 0.000172, "loss": 4.6509, "step": 350 }, { "epoch": 0.02, "learning_rate": 0.00017120000000000001, "loss": 4.6127, "step": 360 }, { "epoch": 0.02, "learning_rate": 0.0001704, "loss": 4.6737, "step": 370 }, { "epoch": 0.02, "learning_rate": 0.0001696, "loss": 4.6422, "step": 380 }, { "epoch": 0.02, "learning_rate": 0.0001688, "loss": 4.5502, "step": 390 }, { "epoch": 0.02, "learning_rate": 0.000168, "loss": 4.536, "step": 400 }, { "epoch": 0.02, "learning_rate": 0.0001672, "loss": 4.6153, "step": 410 }, { "epoch": 0.02, "learning_rate": 0.0001664, "loss": 4.6164, "step": 420 }, { "epoch": 0.02, "learning_rate": 0.0001656, "loss": 4.5383, "step": 430 }, { "epoch": 0.03, "learning_rate": 0.0001648, "loss": 4.5488, "step": 440 }, { "epoch": 0.03, "learning_rate": 0.000164, "loss": 4.4269, "step": 450 }, { "epoch": 0.03, "learning_rate": 0.0001632, "loss": 4.495, "step": 460 }, { "epoch": 0.03, "learning_rate": 0.00016240000000000002, "loss": 4.4965, "step": 470 }, { "epoch": 0.03, "learning_rate": 0.00016160000000000002, "loss": 4.375, "step": 480 }, { "epoch": 0.03, "learning_rate": 0.0001608, "loss": 4.4913, "step": 490 }, { "epoch": 0.03, "learning_rate": 0.00016, "loss": 4.404, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.00015920000000000002, "loss": 4.3919, "step": 510 }, { "epoch": 0.03, "learning_rate": 0.00015840000000000003, "loss": 4.3843, "step": 520 }, { "epoch": 0.03, "learning_rate": 0.0001576, "loss": 4.3839, "step": 530 }, { "epoch": 0.03, "learning_rate": 0.00015680000000000002, "loss": 4.35, "step": 540 }, { "epoch": 0.03, "learning_rate": 0.00015600000000000002, "loss": 4.3799, "step": 550 }, { "epoch": 0.03, "learning_rate": 0.0001552, "loss": 4.3709, "step": 560 }, { "epoch": 0.03, "learning_rate": 0.0001544, "loss": 4.3112, "step": 570 }, { "epoch": 0.03, "learning_rate": 0.00015360000000000002, "loss": 4.2727, "step": 580 }, { "epoch": 0.03, "learning_rate": 0.0001528, "loss": 4.3415, "step": 590 }, { "epoch": 0.03, "learning_rate": 0.000152, "loss": 4.2105, "step": 600 }, { "epoch": 0.04, "learning_rate": 0.00015120000000000002, "loss": 4.3271, "step": 610 }, { "epoch": 0.04, "learning_rate": 0.0001504, "loss": 4.2573, "step": 620 }, { "epoch": 0.04, "learning_rate": 0.0001496, "loss": 4.267, "step": 630 }, { "epoch": 0.04, "learning_rate": 0.0001488, "loss": 4.2071, "step": 640 }, { "epoch": 0.04, "learning_rate": 0.000148, "loss": 4.2608, "step": 650 }, { "epoch": 0.04, "learning_rate": 0.0001472, "loss": 4.2364, "step": 660 }, { "epoch": 0.04, "learning_rate": 0.0001464, "loss": 4.2047, "step": 670 }, { "epoch": 0.04, "learning_rate": 0.00014560000000000002, "loss": 4.1439, "step": 680 }, { "epoch": 0.04, "learning_rate": 0.0001448, "loss": 4.168, "step": 690 }, { "epoch": 0.04, "learning_rate": 0.000144, "loss": 4.1714, "step": 700 }, { "epoch": 0.04, "learning_rate": 0.0001432, "loss": 4.1681, "step": 710 }, { "epoch": 0.04, "learning_rate": 0.0001424, "loss": 4.1024, "step": 720 }, { "epoch": 0.04, "learning_rate": 0.0001416, "loss": 4.1481, "step": 730 }, { "epoch": 0.04, "learning_rate": 0.0001408, "loss": 4.179, "step": 740 }, { "epoch": 0.04, "learning_rate": 0.00014, "loss": 4.0661, "step": 750 }, { "epoch": 0.04, "learning_rate": 0.0001392, "loss": 4.1157, "step": 760 }, { "epoch": 0.04, "learning_rate": 0.0001384, "loss": 4.1339, "step": 770 }, { "epoch": 0.04, "learning_rate": 0.00013759999999999998, "loss": 4.1385, "step": 780 }, { "epoch": 0.05, "learning_rate": 0.00013680000000000002, "loss": 4.0792, "step": 790 }, { "epoch": 0.05, "learning_rate": 0.00013600000000000003, "loss": 4.0452, "step": 800 }, { "epoch": 0.05, "learning_rate": 0.0001352, "loss": 4.0236, "step": 810 }, { "epoch": 0.05, "learning_rate": 0.00013440000000000001, "loss": 4.073, "step": 820 }, { "epoch": 0.05, "learning_rate": 0.00013360000000000002, "loss": 4.0847, "step": 830 }, { "epoch": 0.05, "learning_rate": 0.0001328, "loss": 4.0611, "step": 840 }, { "epoch": 0.05, "learning_rate": 0.000132, "loss": 4.0137, "step": 850 }, { "epoch": 0.05, "learning_rate": 0.00013120000000000002, "loss": 4.0238, "step": 860 }, { "epoch": 0.05, "learning_rate": 0.0001304, "loss": 4.0407, "step": 870 }, { "epoch": 0.05, "learning_rate": 0.0001296, "loss": 3.9783, "step": 880 }, { "epoch": 0.05, "learning_rate": 0.00012880000000000001, "loss": 4.0045, "step": 890 }, { "epoch": 0.05, "learning_rate": 0.00012800000000000002, "loss": 3.9835, "step": 900 }, { "epoch": 0.05, "learning_rate": 0.0001272, "loss": 4.0443, "step": 910 }, { "epoch": 0.05, "learning_rate": 0.0001264, "loss": 4.0503, "step": 920 }, { "epoch": 0.05, "learning_rate": 0.00012560000000000002, "loss": 3.992, "step": 930 }, { "epoch": 0.05, "learning_rate": 0.0001248, "loss": 3.9498, "step": 940 }, { "epoch": 0.05, "learning_rate": 0.000124, "loss": 3.8644, "step": 950 }, { "epoch": 0.06, "learning_rate": 0.0001232, "loss": 3.9939, "step": 960 }, { "epoch": 0.06, "learning_rate": 0.0001224, "loss": 4.0153, "step": 970 }, { "epoch": 0.06, "learning_rate": 0.0001216, "loss": 3.9969, "step": 980 }, { "epoch": 0.06, "learning_rate": 0.0001208, "loss": 3.9262, "step": 990 }, { "epoch": 0.06, "learning_rate": 0.00012, "loss": 3.8844, "step": 1000 }, { "epoch": 0.06, "learning_rate": 0.0001192, "loss": 3.9665, "step": 1010 }, { "epoch": 0.06, "learning_rate": 0.0001184, "loss": 3.8693, "step": 1020 }, { "epoch": 0.06, "learning_rate": 0.0001176, "loss": 3.9703, "step": 1030 }, { "epoch": 0.06, "learning_rate": 0.00011679999999999999, "loss": 3.8615, "step": 1040 }, { "epoch": 0.06, "learning_rate": 0.000116, "loss": 3.8649, "step": 1050 }, { "epoch": 0.06, "learning_rate": 0.0001152, "loss": 3.9136, "step": 1060 }, { "epoch": 0.06, "learning_rate": 0.0001144, "loss": 3.934, "step": 1070 }, { "epoch": 0.06, "learning_rate": 0.0001136, "loss": 3.9192, "step": 1080 }, { "epoch": 0.06, "learning_rate": 0.00011279999999999999, "loss": 3.955, "step": 1090 }, { "epoch": 0.06, "learning_rate": 0.00011200000000000001, "loss": 3.919, "step": 1100 }, { "epoch": 0.06, "learning_rate": 0.00011120000000000002, "loss": 3.9481, "step": 1110 }, { "epoch": 0.06, "learning_rate": 0.00011040000000000001, "loss": 3.8208, "step": 1120 }, { "epoch": 0.07, "learning_rate": 0.00010960000000000001, "loss": 3.8789, "step": 1130 }, { "epoch": 0.07, "learning_rate": 0.00010880000000000002, "loss": 3.9033, "step": 1140 }, { "epoch": 0.07, "learning_rate": 0.00010800000000000001, "loss": 3.9091, "step": 1150 }, { "epoch": 0.07, "learning_rate": 0.00010720000000000002, "loss": 3.81, "step": 1160 }, { "epoch": 0.07, "learning_rate": 0.00010640000000000001, "loss": 3.8324, "step": 1170 }, { "epoch": 0.07, "learning_rate": 0.0001056, "loss": 3.8826, "step": 1180 }, { "epoch": 0.07, "learning_rate": 0.00010480000000000001, "loss": 3.7962, "step": 1190 }, { "epoch": 0.07, "learning_rate": 0.00010400000000000001, "loss": 3.8355, "step": 1200 }, { "epoch": 0.07, "learning_rate": 0.0001032, "loss": 3.8116, "step": 1210 }, { "epoch": 0.07, "learning_rate": 0.00010240000000000001, "loss": 3.9012, "step": 1220 }, { "epoch": 0.07, "learning_rate": 0.0001016, "loss": 3.7013, "step": 1230 }, { "epoch": 0.07, "learning_rate": 0.00010080000000000001, "loss": 3.8228, "step": 1240 }, { "epoch": 0.07, "learning_rate": 0.0001, "loss": 3.8041, "step": 1250 }, { "epoch": 0.07, "learning_rate": 9.92e-05, "loss": 3.7857, "step": 1260 }, { "epoch": 0.07, "learning_rate": 9.84e-05, "loss": 3.796, "step": 1270 }, { "epoch": 0.07, "learning_rate": 9.76e-05, "loss": 3.7516, "step": 1280 }, { "epoch": 0.07, "learning_rate": 9.680000000000001e-05, "loss": 3.8125, "step": 1290 }, { "epoch": 0.07, "learning_rate": 9.6e-05, "loss": 3.7846, "step": 1300 }, { "epoch": 0.08, "learning_rate": 9.52e-05, "loss": 3.7275, "step": 1310 }, { "epoch": 0.08, "learning_rate": 9.44e-05, "loss": 3.8137, "step": 1320 }, { "epoch": 0.08, "learning_rate": 9.360000000000001e-05, "loss": 3.6987, "step": 1330 }, { "epoch": 0.08, "learning_rate": 9.28e-05, "loss": 3.6313, "step": 1340 }, { "epoch": 0.08, "learning_rate": 9.200000000000001e-05, "loss": 3.7136, "step": 1350 }, { "epoch": 0.08, "learning_rate": 9.120000000000001e-05, "loss": 3.7877, "step": 1360 }, { "epoch": 0.08, "learning_rate": 9.04e-05, "loss": 3.7199, "step": 1370 }, { "epoch": 0.08, "learning_rate": 8.960000000000001e-05, "loss": 3.7796, "step": 1380 }, { "epoch": 0.08, "learning_rate": 8.88e-05, "loss": 3.7043, "step": 1390 }, { "epoch": 0.08, "learning_rate": 8.800000000000001e-05, "loss": 3.7328, "step": 1400 }, { "epoch": 0.08, "learning_rate": 8.72e-05, "loss": 3.7058, "step": 1410 }, { "epoch": 0.08, "learning_rate": 8.64e-05, "loss": 3.7437, "step": 1420 }, { "epoch": 0.08, "learning_rate": 8.560000000000001e-05, "loss": 3.7679, "step": 1430 }, { "epoch": 0.08, "learning_rate": 8.48e-05, "loss": 3.7289, "step": 1440 }, { "epoch": 0.08, "learning_rate": 8.4e-05, "loss": 3.6444, "step": 1450 }, { "epoch": 0.08, "learning_rate": 8.32e-05, "loss": 3.773, "step": 1460 }, { "epoch": 0.08, "learning_rate": 8.24e-05, "loss": 3.6826, "step": 1470 }, { "epoch": 0.09, "learning_rate": 8.16e-05, "loss": 3.6621, "step": 1480 }, { "epoch": 0.09, "learning_rate": 8.080000000000001e-05, "loss": 3.672, "step": 1490 }, { "epoch": 0.09, "learning_rate": 8e-05, "loss": 3.7035, "step": 1500 }, { "epoch": 0.09, "learning_rate": 7.920000000000001e-05, "loss": 3.6906, "step": 1510 }, { "epoch": 0.09, "learning_rate": 7.840000000000001e-05, "loss": 3.7303, "step": 1520 }, { "epoch": 0.09, "learning_rate": 7.76e-05, "loss": 3.7044, "step": 1530 }, { "epoch": 0.09, "learning_rate": 7.680000000000001e-05, "loss": 3.7036, "step": 1540 }, { "epoch": 0.09, "learning_rate": 7.6e-05, "loss": 3.6357, "step": 1550 }, { "epoch": 0.09, "learning_rate": 7.52e-05, "loss": 3.7038, "step": 1560 }, { "epoch": 0.09, "learning_rate": 7.44e-05, "loss": 3.6945, "step": 1570 }, { "epoch": 0.09, "learning_rate": 7.36e-05, "loss": 3.6436, "step": 1580 }, { "epoch": 0.09, "learning_rate": 7.280000000000001e-05, "loss": 3.6751, "step": 1590 }, { "epoch": 0.09, "learning_rate": 7.2e-05, "loss": 3.6696, "step": 1600 }, { "epoch": 0.09, "learning_rate": 7.12e-05, "loss": 3.5993, "step": 1610 }, { "epoch": 0.09, "learning_rate": 7.04e-05, "loss": 3.729, "step": 1620 }, { "epoch": 0.09, "learning_rate": 6.96e-05, "loss": 3.6762, "step": 1630 }, { "epoch": 0.09, "learning_rate": 6.879999999999999e-05, "loss": 3.6434, "step": 1640 }, { "epoch": 0.09, "learning_rate": 6.800000000000001e-05, "loss": 3.674, "step": 1650 }, { "epoch": 0.1, "learning_rate": 6.720000000000001e-05, "loss": 3.6305, "step": 1660 }, { "epoch": 0.1, "learning_rate": 6.64e-05, "loss": 3.6497, "step": 1670 }, { "epoch": 0.1, "learning_rate": 6.560000000000001e-05, "loss": 3.6324, "step": 1680 }, { "epoch": 0.1, "learning_rate": 6.48e-05, "loss": 3.6072, "step": 1690 }, { "epoch": 0.1, "learning_rate": 6.400000000000001e-05, "loss": 3.6292, "step": 1700 }, { "epoch": 0.1, "learning_rate": 6.32e-05, "loss": 3.6791, "step": 1710 }, { "epoch": 0.1, "learning_rate": 6.24e-05, "loss": 3.6148, "step": 1720 }, { "epoch": 0.1, "learning_rate": 6.16e-05, "loss": 3.6849, "step": 1730 }, { "epoch": 0.1, "learning_rate": 6.08e-05, "loss": 3.682, "step": 1740 }, { "epoch": 0.1, "learning_rate": 6e-05, "loss": 3.6862, "step": 1750 }, { "epoch": 0.1, "learning_rate": 5.92e-05, "loss": 3.7216, "step": 1760 }, { "epoch": 0.1, "learning_rate": 5.8399999999999997e-05, "loss": 3.5938, "step": 1770 }, { "epoch": 0.1, "learning_rate": 5.76e-05, "loss": 3.5927, "step": 1780 }, { "epoch": 0.1, "learning_rate": 5.68e-05, "loss": 3.5946, "step": 1790 }, { "epoch": 0.1, "learning_rate": 5.6000000000000006e-05, "loss": 3.5888, "step": 1800 }, { "epoch": 0.1, "learning_rate": 5.520000000000001e-05, "loss": 3.6444, "step": 1810 }, { "epoch": 0.1, "learning_rate": 5.440000000000001e-05, "loss": 3.6313, "step": 1820 }, { "epoch": 0.11, "learning_rate": 5.360000000000001e-05, "loss": 3.6639, "step": 1830 }, { "epoch": 0.11, "learning_rate": 5.28e-05, "loss": 3.5927, "step": 1840 }, { "epoch": 0.11, "learning_rate": 5.2000000000000004e-05, "loss": 3.616, "step": 1850 }, { "epoch": 0.11, "learning_rate": 5.1200000000000004e-05, "loss": 3.5431, "step": 1860 }, { "epoch": 0.11, "learning_rate": 5.0400000000000005e-05, "loss": 3.5989, "step": 1870 }, { "epoch": 0.11, "learning_rate": 4.96e-05, "loss": 3.5686, "step": 1880 }, { "epoch": 0.11, "learning_rate": 4.88e-05, "loss": 3.5774, "step": 1890 }, { "epoch": 0.11, "learning_rate": 4.8e-05, "loss": 3.6271, "step": 1900 }, { "epoch": 0.11, "learning_rate": 4.72e-05, "loss": 3.7588, "step": 1910 }, { "epoch": 0.11, "learning_rate": 4.64e-05, "loss": 3.6529, "step": 1920 }, { "epoch": 0.11, "learning_rate": 4.5600000000000004e-05, "loss": 3.5913, "step": 1930 }, { "epoch": 0.11, "learning_rate": 4.4800000000000005e-05, "loss": 3.6257, "step": 1940 }, { "epoch": 0.11, "learning_rate": 4.4000000000000006e-05, "loss": 3.5864, "step": 1950 }, { "epoch": 0.11, "learning_rate": 4.32e-05, "loss": 3.6019, "step": 1960 }, { "epoch": 0.11, "learning_rate": 4.24e-05, "loss": 3.6562, "step": 1970 }, { "epoch": 0.11, "learning_rate": 4.16e-05, "loss": 3.4892, "step": 1980 }, { "epoch": 0.11, "learning_rate": 4.08e-05, "loss": 3.5904, "step": 1990 }, { "epoch": 0.12, "learning_rate": 4e-05, "loss": 3.5266, "step": 2000 }, { "epoch": 0.12, "learning_rate": 3.9200000000000004e-05, "loss": 3.5661, "step": 2010 }, { "epoch": 0.12, "learning_rate": 3.8400000000000005e-05, "loss": 3.5291, "step": 2020 }, { "epoch": 0.12, "learning_rate": 3.76e-05, "loss": 3.5508, "step": 2030 }, { "epoch": 0.12, "learning_rate": 3.68e-05, "loss": 3.4954, "step": 2040 }, { "epoch": 0.12, "learning_rate": 3.6e-05, "loss": 3.5762, "step": 2050 }, { "epoch": 0.12, "learning_rate": 3.52e-05, "loss": 3.6292, "step": 2060 }, { "epoch": 0.12, "learning_rate": 3.4399999999999996e-05, "loss": 3.5931, "step": 2070 }, { "epoch": 0.12, "learning_rate": 3.3600000000000004e-05, "loss": 3.597, "step": 2080 }, { "epoch": 0.12, "learning_rate": 3.2800000000000004e-05, "loss": 3.5034, "step": 2090 }, { "epoch": 0.12, "learning_rate": 3.2000000000000005e-05, "loss": 3.6404, "step": 2100 }, { "epoch": 0.12, "learning_rate": 3.12e-05, "loss": 3.5314, "step": 2110 }, { "epoch": 0.12, "learning_rate": 3.04e-05, "loss": 3.5406, "step": 2120 }, { "epoch": 0.12, "learning_rate": 2.96e-05, "loss": 3.5308, "step": 2130 }, { "epoch": 0.12, "learning_rate": 2.88e-05, "loss": 3.6702, "step": 2140 }, { "epoch": 0.12, "learning_rate": 2.8000000000000003e-05, "loss": 3.5541, "step": 2150 }, { "epoch": 0.12, "learning_rate": 2.7200000000000004e-05, "loss": 3.5771, "step": 2160 }, { "epoch": 0.12, "learning_rate": 2.64e-05, "loss": 3.6234, "step": 2170 }, { "epoch": 0.13, "learning_rate": 2.5600000000000002e-05, "loss": 3.5795, "step": 2180 }, { "epoch": 0.13, "learning_rate": 2.48e-05, "loss": 3.5233, "step": 2190 }, { "epoch": 0.13, "learning_rate": 2.4e-05, "loss": 3.596, "step": 2200 }, { "epoch": 0.13, "learning_rate": 2.32e-05, "loss": 3.5541, "step": 2210 }, { "epoch": 0.13, "learning_rate": 2.2400000000000002e-05, "loss": 3.5712, "step": 2220 }, { "epoch": 0.13, "learning_rate": 2.16e-05, "loss": 3.6036, "step": 2230 }, { "epoch": 0.13, "learning_rate": 2.08e-05, "loss": 3.5356, "step": 2240 }, { "epoch": 0.13, "learning_rate": 2e-05, "loss": 3.5857, "step": 2250 }, { "epoch": 0.13, "learning_rate": 1.9200000000000003e-05, "loss": 3.5787, "step": 2260 }, { "epoch": 0.13, "learning_rate": 1.84e-05, "loss": 3.5258, "step": 2270 }, { "epoch": 0.13, "learning_rate": 1.76e-05, "loss": 3.5827, "step": 2280 }, { "epoch": 0.13, "learning_rate": 1.6800000000000002e-05, "loss": 3.5119, "step": 2290 }, { "epoch": 0.13, "learning_rate": 1.6000000000000003e-05, "loss": 3.5787, "step": 2300 }, { "epoch": 0.13, "learning_rate": 1.52e-05, "loss": 3.533, "step": 2310 }, { "epoch": 0.13, "learning_rate": 1.44e-05, "loss": 3.5161, "step": 2320 }, { "epoch": 0.13, "learning_rate": 1.3600000000000002e-05, "loss": 3.5642, "step": 2330 }, { "epoch": 0.13, "learning_rate": 1.2800000000000001e-05, "loss": 3.5549, "step": 2340 }, { "epoch": 0.14, "learning_rate": 1.2e-05, "loss": 3.5438, "step": 2350 }, { "epoch": 0.14, "learning_rate": 1.1200000000000001e-05, "loss": 3.5547, "step": 2360 }, { "epoch": 0.14, "learning_rate": 1.04e-05, "loss": 3.5629, "step": 2370 }, { "epoch": 0.14, "learning_rate": 9.600000000000001e-06, "loss": 3.5534, "step": 2380 }, { "epoch": 0.14, "learning_rate": 8.8e-06, "loss": 3.5361, "step": 2390 }, { "epoch": 0.14, "learning_rate": 8.000000000000001e-06, "loss": 3.4188, "step": 2400 }, { "epoch": 0.14, "learning_rate": 7.2e-06, "loss": 3.5082, "step": 2410 }, { "epoch": 0.14, "learning_rate": 6.4000000000000006e-06, "loss": 3.5241, "step": 2420 }, { "epoch": 0.14, "learning_rate": 5.600000000000001e-06, "loss": 3.6059, "step": 2430 }, { "epoch": 0.14, "learning_rate": 4.800000000000001e-06, "loss": 3.5657, "step": 2440 }, { "epoch": 0.14, "learning_rate": 4.000000000000001e-06, "loss": 3.5578, "step": 2450 }, { "epoch": 0.14, "learning_rate": 3.2000000000000003e-06, "loss": 3.5827, "step": 2460 }, { "epoch": 0.14, "learning_rate": 2.4000000000000003e-06, "loss": 3.5179, "step": 2470 }, { "epoch": 0.14, "learning_rate": 1.6000000000000001e-06, "loss": 3.4545, "step": 2480 }, { "epoch": 0.14, "learning_rate": 8.000000000000001e-07, "loss": 3.5484, "step": 2490 }, { "epoch": 0.14, "learning_rate": 0.0, "loss": 3.5167, "step": 2500 } ], "logging_steps": 10, "max_steps": 2500, "num_train_epochs": 1, "save_steps": 500, "total_flos": 2.2400955580416e+17, "trial_name": null, "trial_params": null }