{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9926262053318208, "eval_steps": 500, "global_step": 10500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0002997163925127623, "loss": 2.2978, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.00029943278502552465, "loss": 2.2765, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.000299149177538287, "loss": 2.2289, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.0002988655700510493, "loss": 2.3721, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.00029858196256381166, "loss": 2.135, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.000298298355076574, "loss": 2.2899, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.00029801474758933634, "loss": 2.1047, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.0002977311401020987, "loss": 2.1712, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.000297447532614861, "loss": 2.1048, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.00029716392512762335, "loss": 2.1801, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.0002968803176403857, "loss": 2.1241, "step": 110 }, { "epoch": 0.01, "learning_rate": 0.00029659671015314803, "loss": 2.1783, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.00029631310266591037, "loss": 2.2588, "step": 130 }, { "epoch": 0.01, "learning_rate": 0.0002960294951786727, "loss": 2.1962, "step": 140 }, { "epoch": 0.01, "learning_rate": 0.00029574588769143504, "loss": 1.9953, "step": 150 }, { "epoch": 0.02, "learning_rate": 0.0002954622802041974, "loss": 2.15, "step": 160 }, { "epoch": 0.02, "learning_rate": 0.0002951786727169597, "loss": 2.1306, "step": 170 }, { "epoch": 0.02, "learning_rate": 0.000294895065229722, "loss": 2.2464, "step": 180 }, { "epoch": 0.02, "learning_rate": 0.0002946114577424844, "loss": 2.2083, "step": 190 }, { "epoch": 0.02, "learning_rate": 0.00029432785025524673, "loss": 2.0959, "step": 200 }, { "epoch": 0.02, "learning_rate": 0.00029404424276800907, "loss": 2.2083, "step": 210 }, { "epoch": 0.02, "learning_rate": 0.0002937606352807714, "loss": 2.2126, "step": 220 }, { "epoch": 0.02, "learning_rate": 0.0002934770277935337, "loss": 2.1264, "step": 230 }, { "epoch": 0.02, "learning_rate": 0.00029319342030629603, "loss": 2.1913, "step": 240 }, { "epoch": 0.02, "learning_rate": 0.0002929098128190584, "loss": 2.1374, "step": 250 }, { "epoch": 0.02, "learning_rate": 0.00029262620533182076, "loss": 2.1884, "step": 260 }, { "epoch": 0.03, "learning_rate": 0.00029234259784458304, "loss": 2.0839, "step": 270 }, { "epoch": 0.03, "learning_rate": 0.0002920873511060692, "loss": 1.9849, "step": 280 }, { "epoch": 0.03, "learning_rate": 0.00029180374361883153, "loss": 2.0729, "step": 290 }, { "epoch": 0.03, "learning_rate": 0.0002915201361315938, "loss": 1.925, "step": 300 }, { "epoch": 0.03, "learning_rate": 0.00029123652864435616, "loss": 2.0478, "step": 310 }, { "epoch": 0.03, "learning_rate": 0.00029095292115711855, "loss": 2.1546, "step": 320 }, { "epoch": 0.03, "learning_rate": 0.0002906693136698809, "loss": 2.1341, "step": 330 }, { "epoch": 0.03, "learning_rate": 0.00029038570618264317, "loss": 2.1653, "step": 340 }, { "epoch": 0.03, "learning_rate": 0.0002901020986954055, "loss": 1.9772, "step": 350 }, { "epoch": 0.03, "learning_rate": 0.00028981849120816785, "loss": 2.0511, "step": 360 }, { "epoch": 0.03, "learning_rate": 0.00028953488372093024, "loss": 2.0662, "step": 370 }, { "epoch": 0.04, "learning_rate": 0.0002892512762336926, "loss": 2.1331, "step": 380 }, { "epoch": 0.04, "learning_rate": 0.00028896766874645486, "loss": 2.1118, "step": 390 }, { "epoch": 0.04, "learning_rate": 0.0002886840612592172, "loss": 1.9541, "step": 400 }, { "epoch": 0.04, "learning_rate": 0.00028840045377197954, "loss": 2.1632, "step": 410 }, { "epoch": 0.04, "learning_rate": 0.0002881168462847419, "loss": 2.1462, "step": 420 }, { "epoch": 0.04, "learning_rate": 0.00028783323879750427, "loss": 2.1207, "step": 430 }, { "epoch": 0.04, "learning_rate": 0.00028754963131026655, "loss": 2.058, "step": 440 }, { "epoch": 0.04, "learning_rate": 0.0002872660238230289, "loss": 2.0473, "step": 450 }, { "epoch": 0.04, "learning_rate": 0.0002869824163357912, "loss": 2.1528, "step": 460 }, { "epoch": 0.04, "learning_rate": 0.00028669880884855356, "loss": 2.0529, "step": 470 }, { "epoch": 0.05, "learning_rate": 0.0002864152013613159, "loss": 2.1924, "step": 480 }, { "epoch": 0.05, "learning_rate": 0.00028613159387407824, "loss": 2.0982, "step": 490 }, { "epoch": 0.05, "learning_rate": 0.0002858479863868406, "loss": 2.0854, "step": 500 }, { "epoch": 0.05, "learning_rate": 0.0002855643788996029, "loss": 2.179, "step": 510 }, { "epoch": 0.05, "learning_rate": 0.00028528077141236525, "loss": 1.9784, "step": 520 }, { "epoch": 0.05, "learning_rate": 0.0002849971639251276, "loss": 2.1489, "step": 530 }, { "epoch": 0.05, "learning_rate": 0.00028471355643788993, "loss": 2.1446, "step": 540 }, { "epoch": 0.05, "learning_rate": 0.00028442994895065227, "loss": 2.0817, "step": 550 }, { "epoch": 0.05, "learning_rate": 0.0002841463414634146, "loss": 2.1402, "step": 560 }, { "epoch": 0.05, "learning_rate": 0.00028386273397617694, "loss": 2.027, "step": 570 }, { "epoch": 0.05, "learning_rate": 0.0002835791264889393, "loss": 2.098, "step": 580 }, { "epoch": 0.06, "learning_rate": 0.0002832955190017016, "loss": 2.0974, "step": 590 }, { "epoch": 0.06, "learning_rate": 0.00028301191151446396, "loss": 1.8892, "step": 600 }, { "epoch": 0.06, "learning_rate": 0.0002827283040272263, "loss": 1.9368, "step": 610 }, { "epoch": 0.06, "learning_rate": 0.00028244469653998863, "loss": 2.1184, "step": 620 }, { "epoch": 0.06, "learning_rate": 0.00028216108905275097, "loss": 2.1468, "step": 630 }, { "epoch": 0.06, "learning_rate": 0.0002818774815655133, "loss": 2.0261, "step": 640 }, { "epoch": 0.06, "learning_rate": 0.00028159387407827565, "loss": 2.2235, "step": 650 }, { "epoch": 0.06, "learning_rate": 0.000281310266591038, "loss": 2.1657, "step": 660 }, { "epoch": 0.06, "learning_rate": 0.0002810266591038003, "loss": 2.0709, "step": 670 }, { "epoch": 0.06, "learning_rate": 0.00028074305161656266, "loss": 2.1451, "step": 680 }, { "epoch": 0.07, "learning_rate": 0.000280459444129325, "loss": 2.0991, "step": 690 }, { "epoch": 0.07, "learning_rate": 0.00028017583664208734, "loss": 1.983, "step": 700 }, { "epoch": 0.07, "learning_rate": 0.0002798922291548497, "loss": 2.0995, "step": 710 }, { "epoch": 0.07, "learning_rate": 0.000279608621667612, "loss": 1.9696, "step": 720 }, { "epoch": 0.07, "learning_rate": 0.00027932501418037435, "loss": 2.1549, "step": 730 }, { "epoch": 0.07, "learning_rate": 0.00027904140669313663, "loss": 1.9938, "step": 740 }, { "epoch": 0.07, "learning_rate": 0.000278757799205899, "loss": 2.1518, "step": 750 }, { "epoch": 0.07, "learning_rate": 0.00027847419171866136, "loss": 2.0179, "step": 760 }, { "epoch": 0.07, "learning_rate": 0.0002781905842314237, "loss": 2.0337, "step": 770 }, { "epoch": 0.07, "learning_rate": 0.00027790697674418604, "loss": 2.1366, "step": 780 }, { "epoch": 0.07, "learning_rate": 0.0002776233692569483, "loss": 2.0452, "step": 790 }, { "epoch": 0.08, "learning_rate": 0.0002773397617697107, "loss": 2.177, "step": 800 }, { "epoch": 0.08, "learning_rate": 0.00027705615428247305, "loss": 2.0401, "step": 810 }, { "epoch": 0.08, "learning_rate": 0.0002767725467952354, "loss": 2.06, "step": 820 }, { "epoch": 0.08, "learning_rate": 0.00027648893930799773, "loss": 2.1167, "step": 830 }, { "epoch": 0.08, "learning_rate": 0.00027620533182076, "loss": 2.0292, "step": 840 }, { "epoch": 0.08, "learning_rate": 0.00027592172433352235, "loss": 2.0566, "step": 850 }, { "epoch": 0.08, "learning_rate": 0.00027563811684628474, "loss": 2.054, "step": 860 }, { "epoch": 0.08, "learning_rate": 0.0002753545093590471, "loss": 2.0844, "step": 870 }, { "epoch": 0.08, "learning_rate": 0.00027507090187180936, "loss": 2.1432, "step": 880 }, { "epoch": 0.08, "learning_rate": 0.0002747872943845717, "loss": 2.0712, "step": 890 }, { "epoch": 0.09, "learning_rate": 0.00027450368689733404, "loss": 2.1214, "step": 900 }, { "epoch": 0.09, "learning_rate": 0.00027422007941009643, "loss": 1.9663, "step": 910 }, { "epoch": 0.09, "learning_rate": 0.00027393647192285877, "loss": 2.0059, "step": 920 }, { "epoch": 0.09, "learning_rate": 0.00027368122518434487, "loss": 2.0752, "step": 930 }, { "epoch": 0.09, "learning_rate": 0.0002733976176971072, "loss": 2.0892, "step": 940 }, { "epoch": 0.09, "learning_rate": 0.0002731140102098695, "loss": 2.0141, "step": 950 }, { "epoch": 0.09, "learning_rate": 0.00027283040272263183, "loss": 2.0029, "step": 960 }, { "epoch": 0.09, "learning_rate": 0.00027254679523539417, "loss": 2.0242, "step": 970 }, { "epoch": 0.09, "learning_rate": 0.00027226318774815656, "loss": 2.1536, "step": 980 }, { "epoch": 0.09, "learning_rate": 0.0002719795802609189, "loss": 1.9372, "step": 990 }, { "epoch": 0.09, "learning_rate": 0.0002716959727736812, "loss": 2.0371, "step": 1000 }, { "epoch": 0.1, "learning_rate": 0.0002714123652864435, "loss": 2.1432, "step": 1010 }, { "epoch": 0.1, "learning_rate": 0.00027112875779920586, "loss": 2.1021, "step": 1020 }, { "epoch": 0.1, "learning_rate": 0.00027084515031196825, "loss": 2.0531, "step": 1030 }, { "epoch": 0.1, "learning_rate": 0.0002705615428247306, "loss": 2.0348, "step": 1040 }, { "epoch": 0.1, "learning_rate": 0.00027027793533749287, "loss": 1.9867, "step": 1050 }, { "epoch": 0.1, "learning_rate": 0.0002699943278502552, "loss": 2.1114, "step": 1060 }, { "epoch": 0.1, "learning_rate": 0.00026971072036301755, "loss": 2.0896, "step": 1070 }, { "epoch": 0.1, "learning_rate": 0.0002694271128757799, "loss": 2.1385, "step": 1080 }, { "epoch": 0.1, "learning_rate": 0.0002691435053885422, "loss": 2.0062, "step": 1090 }, { "epoch": 0.1, "learning_rate": 0.00026885989790130456, "loss": 2.0613, "step": 1100 }, { "epoch": 0.1, "learning_rate": 0.0002685762904140669, "loss": 2.1112, "step": 1110 }, { "epoch": 0.11, "learning_rate": 0.00026829268292682924, "loss": 2.0592, "step": 1120 }, { "epoch": 0.11, "learning_rate": 0.0002680090754395916, "loss": 2.0427, "step": 1130 }, { "epoch": 0.11, "learning_rate": 0.0002677254679523539, "loss": 2.1737, "step": 1140 }, { "epoch": 0.11, "learning_rate": 0.00026744186046511625, "loss": 2.1267, "step": 1150 }, { "epoch": 0.11, "learning_rate": 0.0002671582529778786, "loss": 2.1244, "step": 1160 }, { "epoch": 0.11, "learning_rate": 0.0002668746454906409, "loss": 2.1278, "step": 1170 }, { "epoch": 0.11, "learning_rate": 0.00026659103800340326, "loss": 2.0743, "step": 1180 }, { "epoch": 0.11, "learning_rate": 0.0002663074305161656, "loss": 2.1632, "step": 1190 }, { "epoch": 0.11, "learning_rate": 0.00026602382302892794, "loss": 2.0233, "step": 1200 }, { "epoch": 0.11, "learning_rate": 0.0002657402155416903, "loss": 2.0631, "step": 1210 }, { "epoch": 0.12, "learning_rate": 0.0002654566080544526, "loss": 2.2041, "step": 1220 }, { "epoch": 0.12, "learning_rate": 0.00026517300056721495, "loss": 2.095, "step": 1230 }, { "epoch": 0.12, "learning_rate": 0.0002648893930799773, "loss": 1.9956, "step": 1240 }, { "epoch": 0.12, "learning_rate": 0.00026460578559273963, "loss": 1.9052, "step": 1250 }, { "epoch": 0.12, "learning_rate": 0.00026432217810550197, "loss": 1.8921, "step": 1260 }, { "epoch": 0.12, "learning_rate": 0.0002640385706182643, "loss": 2.171, "step": 1270 }, { "epoch": 0.12, "learning_rate": 0.00026375496313102664, "loss": 1.9069, "step": 1280 }, { "epoch": 0.12, "learning_rate": 0.000263471355643789, "loss": 1.9907, "step": 1290 }, { "epoch": 0.12, "learning_rate": 0.0002631877481565513, "loss": 2.1011, "step": 1300 }, { "epoch": 0.12, "learning_rate": 0.00026290414066931366, "loss": 2.1417, "step": 1310 }, { "epoch": 0.12, "learning_rate": 0.000262620533182076, "loss": 2.039, "step": 1320 }, { "epoch": 0.13, "learning_rate": 0.00026233692569483833, "loss": 2.0006, "step": 1330 }, { "epoch": 0.13, "learning_rate": 0.00026205331820760067, "loss": 1.9599, "step": 1340 }, { "epoch": 0.13, "learning_rate": 0.00026176971072036295, "loss": 1.893, "step": 1350 }, { "epoch": 0.13, "learning_rate": 0.0002615144639818491, "loss": 2.0587, "step": 1360 }, { "epoch": 0.13, "learning_rate": 0.00026123085649461144, "loss": 1.9907, "step": 1370 }, { "epoch": 0.13, "learning_rate": 0.0002609472490073738, "loss": 2.0395, "step": 1380 }, { "epoch": 0.13, "learning_rate": 0.0002606636415201361, "loss": 2.1024, "step": 1390 }, { "epoch": 0.13, "learning_rate": 0.00026038003403289846, "loss": 2.0419, "step": 1400 }, { "epoch": 0.13, "learning_rate": 0.0002600964265456608, "loss": 2.0421, "step": 1410 }, { "epoch": 0.13, "learning_rate": 0.0002598128190584231, "loss": 1.9999, "step": 1420 }, { "epoch": 0.14, "learning_rate": 0.00025952921157118547, "loss": 2.0371, "step": 1430 }, { "epoch": 0.14, "learning_rate": 0.0002592456040839478, "loss": 2.0273, "step": 1440 }, { "epoch": 0.14, "learning_rate": 0.00025896199659671015, "loss": 2.0051, "step": 1450 }, { "epoch": 0.14, "learning_rate": 0.0002586783891094725, "loss": 2.0452, "step": 1460 }, { "epoch": 0.14, "learning_rate": 0.00025839478162223477, "loss": 2.0932, "step": 1470 }, { "epoch": 0.14, "learning_rate": 0.00025811117413499716, "loss": 2.0414, "step": 1480 }, { "epoch": 0.14, "learning_rate": 0.0002578275666477595, "loss": 2.1954, "step": 1490 }, { "epoch": 0.14, "learning_rate": 0.00025754395916052184, "loss": 2.0392, "step": 1500 }, { "epoch": 0.14, "learning_rate": 0.0002572603516732841, "loss": 2.0825, "step": 1510 }, { "epoch": 0.14, "learning_rate": 0.00025697674418604646, "loss": 2.0541, "step": 1520 }, { "epoch": 0.14, "learning_rate": 0.0002566931366988088, "loss": 2.0768, "step": 1530 }, { "epoch": 0.15, "learning_rate": 0.0002564095292115712, "loss": 2.0877, "step": 1540 }, { "epoch": 0.15, "learning_rate": 0.00025612592172433353, "loss": 2.0012, "step": 1550 }, { "epoch": 0.15, "learning_rate": 0.0002558423142370958, "loss": 2.1406, "step": 1560 }, { "epoch": 0.15, "learning_rate": 0.00025555870674985815, "loss": 1.9447, "step": 1570 }, { "epoch": 0.15, "learning_rate": 0.0002552750992626205, "loss": 2.0579, "step": 1580 }, { "epoch": 0.15, "learning_rate": 0.0002549914917753829, "loss": 2.0272, "step": 1590 }, { "epoch": 0.15, "learning_rate": 0.0002547078842881452, "loss": 1.9909, "step": 1600 }, { "epoch": 0.15, "learning_rate": 0.0002544242768009075, "loss": 1.891, "step": 1610 }, { "epoch": 0.15, "learning_rate": 0.00025414066931366984, "loss": 2.0163, "step": 1620 }, { "epoch": 0.15, "learning_rate": 0.0002538570618264322, "loss": 2.1711, "step": 1630 }, { "epoch": 0.16, "learning_rate": 0.00025357345433919457, "loss": 2.0521, "step": 1640 }, { "epoch": 0.16, "learning_rate": 0.00025328984685195685, "loss": 2.0709, "step": 1650 }, { "epoch": 0.16, "learning_rate": 0.0002530062393647192, "loss": 2.0268, "step": 1660 }, { "epoch": 0.16, "learning_rate": 0.00025272263187748153, "loss": 1.9734, "step": 1670 }, { "epoch": 0.16, "learning_rate": 0.00025243902439024387, "loss": 2.028, "step": 1680 }, { "epoch": 0.16, "learning_rate": 0.0002521554169030062, "loss": 1.9305, "step": 1690 }, { "epoch": 0.16, "learning_rate": 0.0002519001701644923, "loss": 2.1173, "step": 1700 }, { "epoch": 0.16, "learning_rate": 0.0002516165626772547, "loss": 2.0794, "step": 1710 }, { "epoch": 0.16, "learning_rate": 0.000251332955190017, "loss": 2.0245, "step": 1720 }, { "epoch": 0.16, "learning_rate": 0.0002510493477027793, "loss": 1.9065, "step": 1730 }, { "epoch": 0.16, "learning_rate": 0.00025076574021554165, "loss": 1.9802, "step": 1740 }, { "epoch": 0.17, "learning_rate": 0.000250482132728304, "loss": 2.0261, "step": 1750 }, { "epoch": 0.17, "learning_rate": 0.00025019852524106633, "loss": 2.0128, "step": 1760 }, { "epoch": 0.17, "learning_rate": 0.00024991491775382867, "loss": 2.0552, "step": 1770 }, { "epoch": 0.17, "learning_rate": 0.000249631310266591, "loss": 2.0746, "step": 1780 }, { "epoch": 0.17, "learning_rate": 0.00024934770277935334, "loss": 2.1647, "step": 1790 }, { "epoch": 0.17, "learning_rate": 0.0002490640952921157, "loss": 2.0698, "step": 1800 }, { "epoch": 0.17, "learning_rate": 0.000248780487804878, "loss": 2.0405, "step": 1810 }, { "epoch": 0.17, "learning_rate": 0.00024849688031764036, "loss": 1.9449, "step": 1820 }, { "epoch": 0.17, "learning_rate": 0.0002482132728304027, "loss": 1.9748, "step": 1830 }, { "epoch": 0.17, "learning_rate": 0.00024792966534316503, "loss": 2.0121, "step": 1840 }, { "epoch": 0.17, "learning_rate": 0.00024764605785592737, "loss": 2.0935, "step": 1850 }, { "epoch": 0.18, "learning_rate": 0.0002473624503686897, "loss": 1.9214, "step": 1860 }, { "epoch": 0.18, "learning_rate": 0.00024707884288145205, "loss": 1.9404, "step": 1870 }, { "epoch": 0.18, "learning_rate": 0.0002467952353942144, "loss": 2.0133, "step": 1880 }, { "epoch": 0.18, "learning_rate": 0.0002465116279069767, "loss": 2.098, "step": 1890 }, { "epoch": 0.18, "learning_rate": 0.00024622802041973906, "loss": 2.0551, "step": 1900 }, { "epoch": 0.18, "learning_rate": 0.0002459444129325014, "loss": 1.9603, "step": 1910 }, { "epoch": 0.18, "learning_rate": 0.00024566080544526374, "loss": 2.0956, "step": 1920 }, { "epoch": 0.18, "learning_rate": 0.0002453771979580261, "loss": 1.9561, "step": 1930 }, { "epoch": 0.18, "learning_rate": 0.0002450935904707884, "loss": 1.9836, "step": 1940 }, { "epoch": 0.18, "learning_rate": 0.00024480998298355075, "loss": 1.8087, "step": 1950 }, { "epoch": 0.19, "learning_rate": 0.0002445263754963131, "loss": 1.919, "step": 1960 }, { "epoch": 0.19, "learning_rate": 0.00024424276800907543, "loss": 2.0155, "step": 1970 }, { "epoch": 0.19, "learning_rate": 0.00024395916052183777, "loss": 2.0345, "step": 1980 }, { "epoch": 0.19, "learning_rate": 0.0002436755530346001, "loss": 2.0431, "step": 1990 }, { "epoch": 0.19, "learning_rate": 0.00024339194554736244, "loss": 2.0517, "step": 2000 }, { "epoch": 0.19, "learning_rate": 0.00024310833806012475, "loss": 2.0122, "step": 2010 }, { "epoch": 0.19, "learning_rate": 0.0002428247305728871, "loss": 1.8942, "step": 2020 }, { "epoch": 0.19, "learning_rate": 0.00024254112308564943, "loss": 1.8664, "step": 2030 }, { "epoch": 0.19, "learning_rate": 0.0002422575155984118, "loss": 1.9414, "step": 2040 }, { "epoch": 0.19, "learning_rate": 0.00024197390811117413, "loss": 2.0272, "step": 2050 }, { "epoch": 0.19, "learning_rate": 0.00024169030062393644, "loss": 1.9806, "step": 2060 }, { "epoch": 0.2, "learning_rate": 0.00024140669313669878, "loss": 2.0221, "step": 2070 }, { "epoch": 0.2, "learning_rate": 0.00024112308564946112, "loss": 2.0129, "step": 2080 }, { "epoch": 0.2, "learning_rate": 0.00024083947816222348, "loss": 2.0592, "step": 2090 }, { "epoch": 0.2, "learning_rate": 0.0002405558706749858, "loss": 1.9784, "step": 2100 }, { "epoch": 0.2, "learning_rate": 0.00024027226318774813, "loss": 1.9588, "step": 2110 }, { "epoch": 0.2, "learning_rate": 0.00023998865570051047, "loss": 2.0713, "step": 2120 }, { "epoch": 0.2, "learning_rate": 0.0002397050482132728, "loss": 1.8854, "step": 2130 }, { "epoch": 0.2, "learning_rate": 0.00023942144072603512, "loss": 2.0537, "step": 2140 }, { "epoch": 0.2, "learning_rate": 0.00023913783323879748, "loss": 2.0167, "step": 2150 }, { "epoch": 0.2, "learning_rate": 0.00023885422575155982, "loss": 1.9523, "step": 2160 }, { "epoch": 0.21, "learning_rate": 0.00023857061826432216, "loss": 1.9412, "step": 2170 }, { "epoch": 0.21, "learning_rate": 0.0002382870107770845, "loss": 1.9623, "step": 2180 }, { "epoch": 0.21, "learning_rate": 0.0002380034032898468, "loss": 2.0483, "step": 2190 }, { "epoch": 0.21, "learning_rate": 0.00023771979580260917, "loss": 1.9854, "step": 2200 }, { "epoch": 0.21, "learning_rate": 0.0002374361883153715, "loss": 1.9711, "step": 2210 }, { "epoch": 0.21, "learning_rate": 0.00023715258082813385, "loss": 1.9786, "step": 2220 }, { "epoch": 0.21, "learning_rate": 0.0002368689733408962, "loss": 2.0829, "step": 2230 }, { "epoch": 0.21, "learning_rate": 0.0002365853658536585, "loss": 1.8776, "step": 2240 }, { "epoch": 0.21, "learning_rate": 0.00023630175836642086, "loss": 1.9447, "step": 2250 }, { "epoch": 0.21, "learning_rate": 0.0002360181508791832, "loss": 1.997, "step": 2260 }, { "epoch": 0.21, "learning_rate": 0.00023573454339194554, "loss": 2.0287, "step": 2270 }, { "epoch": 0.22, "learning_rate": 0.00023545093590470785, "loss": 2.0302, "step": 2280 }, { "epoch": 0.22, "learning_rate": 0.0002351673284174702, "loss": 1.9696, "step": 2290 }, { "epoch": 0.22, "learning_rate": 0.00023488372093023252, "loss": 2.0416, "step": 2300 }, { "epoch": 0.22, "learning_rate": 0.0002346001134429949, "loss": 1.8271, "step": 2310 }, { "epoch": 0.22, "learning_rate": 0.00023431650595575723, "loss": 1.9771, "step": 2320 }, { "epoch": 0.22, "learning_rate": 0.00023403289846851954, "loss": 2.0091, "step": 2330 }, { "epoch": 0.22, "learning_rate": 0.00023374929098128188, "loss": 2.1508, "step": 2340 }, { "epoch": 0.22, "learning_rate": 0.00023346568349404421, "loss": 1.9832, "step": 2350 }, { "epoch": 0.22, "learning_rate": 0.00023318207600680658, "loss": 2.0144, "step": 2360 }, { "epoch": 0.22, "learning_rate": 0.0002328984685195689, "loss": 1.9328, "step": 2370 }, { "epoch": 0.22, "learning_rate": 0.00023261486103233123, "loss": 1.891, "step": 2380 }, { "epoch": 0.23, "learning_rate": 0.00023233125354509357, "loss": 2.0427, "step": 2390 }, { "epoch": 0.23, "learning_rate": 0.0002320476460578559, "loss": 1.874, "step": 2400 }, { "epoch": 0.23, "learning_rate": 0.00023176403857061827, "loss": 1.9653, "step": 2410 }, { "epoch": 0.23, "learning_rate": 0.00023148043108338058, "loss": 2.0203, "step": 2420 }, { "epoch": 0.23, "learning_rate": 0.00023119682359614292, "loss": 1.8663, "step": 2430 }, { "epoch": 0.23, "learning_rate": 0.00023091321610890526, "loss": 1.8917, "step": 2440 }, { "epoch": 0.23, "learning_rate": 0.0002306296086216676, "loss": 1.9734, "step": 2450 }, { "epoch": 0.23, "learning_rate": 0.0002303460011344299, "loss": 1.9634, "step": 2460 }, { "epoch": 0.23, "learning_rate": 0.00023006239364719227, "loss": 1.9635, "step": 2470 }, { "epoch": 0.23, "learning_rate": 0.0002297787861599546, "loss": 1.9262, "step": 2480 }, { "epoch": 0.24, "learning_rate": 0.00022949517867271695, "loss": 1.9753, "step": 2490 }, { "epoch": 0.24, "learning_rate": 0.00022921157118547928, "loss": 2.0465, "step": 2500 }, { "epoch": 0.24, "learning_rate": 0.0002289279636982416, "loss": 1.879, "step": 2510 }, { "epoch": 0.24, "learning_rate": 0.00022864435621100396, "loss": 2.0032, "step": 2520 }, { "epoch": 0.24, "learning_rate": 0.0002283607487237663, "loss": 1.9896, "step": 2530 }, { "epoch": 0.24, "learning_rate": 0.00022807714123652864, "loss": 1.9966, "step": 2540 }, { "epoch": 0.24, "learning_rate": 0.00022779353374929095, "loss": 1.9653, "step": 2550 }, { "epoch": 0.24, "learning_rate": 0.00022750992626205328, "loss": 1.9386, "step": 2560 }, { "epoch": 0.24, "learning_rate": 0.00022722631877481562, "loss": 2.0176, "step": 2570 }, { "epoch": 0.24, "learning_rate": 0.000226942711287578, "loss": 1.9585, "step": 2580 }, { "epoch": 0.24, "learning_rate": 0.00022665910380034033, "loss": 1.9366, "step": 2590 }, { "epoch": 0.25, "learning_rate": 0.00022637549631310264, "loss": 2.0283, "step": 2600 }, { "epoch": 0.25, "learning_rate": 0.00022609188882586497, "loss": 1.9384, "step": 2610 }, { "epoch": 0.25, "learning_rate": 0.0002258082813386273, "loss": 2.1084, "step": 2620 }, { "epoch": 0.25, "learning_rate": 0.00022552467385138968, "loss": 2.0181, "step": 2630 }, { "epoch": 0.25, "learning_rate": 0.000225241066364152, "loss": 1.8862, "step": 2640 }, { "epoch": 0.25, "learning_rate": 0.00022495745887691433, "loss": 2.0016, "step": 2650 }, { "epoch": 0.25, "learning_rate": 0.00022467385138967666, "loss": 1.9543, "step": 2660 }, { "epoch": 0.25, "learning_rate": 0.000224390243902439, "loss": 1.9392, "step": 2670 }, { "epoch": 0.25, "learning_rate": 0.00022410663641520137, "loss": 1.9454, "step": 2680 }, { "epoch": 0.25, "learning_rate": 0.00022382302892796368, "loss": 2.0417, "step": 2690 }, { "epoch": 0.26, "learning_rate": 0.00022353942144072602, "loss": 2.0467, "step": 2700 }, { "epoch": 0.26, "learning_rate": 0.00022325581395348835, "loss": 1.8961, "step": 2710 }, { "epoch": 0.26, "learning_rate": 0.0002229722064662507, "loss": 2.0104, "step": 2720 }, { "epoch": 0.26, "learning_rate": 0.000222688598979013, "loss": 1.9292, "step": 2730 }, { "epoch": 0.26, "learning_rate": 0.00022240499149177537, "loss": 2.0145, "step": 2740 }, { "epoch": 0.26, "learning_rate": 0.0002221213840045377, "loss": 2.0363, "step": 2750 }, { "epoch": 0.26, "learning_rate": 0.00022183777651730004, "loss": 2.0434, "step": 2760 }, { "epoch": 0.26, "learning_rate": 0.00022155416903006238, "loss": 1.9628, "step": 2770 }, { "epoch": 0.26, "learning_rate": 0.0002212705615428247, "loss": 2.0745, "step": 2780 }, { "epoch": 0.26, "learning_rate": 0.00022098695405558706, "loss": 2.0417, "step": 2790 }, { "epoch": 0.26, "learning_rate": 0.0002207033465683494, "loss": 1.9554, "step": 2800 }, { "epoch": 0.27, "learning_rate": 0.00022041973908111173, "loss": 2.0323, "step": 2810 }, { "epoch": 0.27, "learning_rate": 0.00022013613159387404, "loss": 2.0586, "step": 2820 }, { "epoch": 0.27, "learning_rate": 0.00021985252410663638, "loss": 1.8894, "step": 2830 }, { "epoch": 0.27, "learning_rate": 0.00021956891661939875, "loss": 1.8927, "step": 2840 }, { "epoch": 0.27, "learning_rate": 0.00021928530913216108, "loss": 1.8973, "step": 2850 }, { "epoch": 0.27, "learning_rate": 0.00021900170164492342, "loss": 1.9644, "step": 2860 }, { "epoch": 0.27, "learning_rate": 0.00021871809415768573, "loss": 1.9039, "step": 2870 }, { "epoch": 0.27, "learning_rate": 0.00021843448667044807, "loss": 1.9316, "step": 2880 }, { "epoch": 0.27, "learning_rate": 0.0002181508791832104, "loss": 1.8483, "step": 2890 }, { "epoch": 0.27, "learning_rate": 0.00021786727169597277, "loss": 2.0534, "step": 2900 }, { "epoch": 0.28, "learning_rate": 0.00021758366420873508, "loss": 1.9126, "step": 2910 }, { "epoch": 0.28, "learning_rate": 0.00021730005672149742, "loss": 2.0031, "step": 2920 }, { "epoch": 0.28, "learning_rate": 0.00021701644923425976, "loss": 2.1399, "step": 2930 }, { "epoch": 0.28, "learning_rate": 0.0002167328417470221, "loss": 2.1048, "step": 2940 }, { "epoch": 0.28, "learning_rate": 0.00021644923425978446, "loss": 1.8944, "step": 2950 }, { "epoch": 0.28, "learning_rate": 0.00021616562677254677, "loss": 2.0135, "step": 2960 }, { "epoch": 0.28, "learning_rate": 0.0002158820192853091, "loss": 1.9103, "step": 2970 }, { "epoch": 0.28, "learning_rate": 0.00021559841179807145, "loss": 1.8927, "step": 2980 }, { "epoch": 0.28, "learning_rate": 0.0002153148043108338, "loss": 1.9573, "step": 2990 }, { "epoch": 0.28, "learning_rate": 0.0002150311968235961, "loss": 2.0191, "step": 3000 }, { "epoch": 0.28, "learning_rate": 0.00021474758933635846, "loss": 1.9678, "step": 3010 }, { "epoch": 0.29, "learning_rate": 0.0002144639818491208, "loss": 1.8932, "step": 3020 }, { "epoch": 0.29, "learning_rate": 0.00021418037436188314, "loss": 1.9244, "step": 3030 }, { "epoch": 0.29, "learning_rate": 0.00021389676687464548, "loss": 2.0792, "step": 3040 }, { "epoch": 0.29, "learning_rate": 0.0002136131593874078, "loss": 1.8409, "step": 3050 }, { "epoch": 0.29, "learning_rate": 0.00021332955190017015, "loss": 1.9291, "step": 3060 }, { "epoch": 0.29, "learning_rate": 0.0002130459444129325, "loss": 1.9294, "step": 3070 }, { "epoch": 0.29, "learning_rate": 0.00021276233692569483, "loss": 2.0805, "step": 3080 }, { "epoch": 0.29, "learning_rate": 0.00021247872943845714, "loss": 1.8575, "step": 3090 }, { "epoch": 0.29, "learning_rate": 0.00021219512195121948, "loss": 2.0922, "step": 3100 }, { "epoch": 0.29, "learning_rate": 0.00021191151446398184, "loss": 2.0071, "step": 3110 }, { "epoch": 0.29, "learning_rate": 0.00021162790697674418, "loss": 1.9723, "step": 3120 }, { "epoch": 0.3, "learning_rate": 0.00021134429948950652, "loss": 1.936, "step": 3130 }, { "epoch": 0.3, "learning_rate": 0.00021106069200226883, "loss": 2.0164, "step": 3140 }, { "epoch": 0.3, "learning_rate": 0.00021077708451503117, "loss": 2.029, "step": 3150 }, { "epoch": 0.3, "learning_rate": 0.0002104934770277935, "loss": 1.9273, "step": 3160 }, { "epoch": 0.3, "learning_rate": 0.00021020986954055587, "loss": 2.0218, "step": 3170 }, { "epoch": 0.3, "learning_rate": 0.00020992626205331818, "loss": 1.9076, "step": 3180 }, { "epoch": 0.3, "learning_rate": 0.00020964265456608052, "loss": 1.9888, "step": 3190 }, { "epoch": 0.3, "learning_rate": 0.00020935904707884286, "loss": 1.9866, "step": 3200 }, { "epoch": 0.3, "learning_rate": 0.0002090754395916052, "loss": 1.9614, "step": 3210 }, { "epoch": 0.3, "learning_rate": 0.00020879183210436756, "loss": 1.9859, "step": 3220 }, { "epoch": 0.31, "learning_rate": 0.00020850822461712987, "loss": 1.9241, "step": 3230 }, { "epoch": 0.31, "learning_rate": 0.0002082246171298922, "loss": 2.0371, "step": 3240 }, { "epoch": 0.31, "learning_rate": 0.00020794100964265455, "loss": 1.982, "step": 3250 }, { "epoch": 0.31, "learning_rate": 0.00020765740215541689, "loss": 2.0358, "step": 3260 }, { "epoch": 0.31, "learning_rate": 0.00020737379466817925, "loss": 1.9841, "step": 3270 }, { "epoch": 0.31, "learning_rate": 0.00020709018718094156, "loss": 1.9346, "step": 3280 }, { "epoch": 0.31, "learning_rate": 0.0002068065796937039, "loss": 1.8959, "step": 3290 }, { "epoch": 0.31, "learning_rate": 0.00020652297220646624, "loss": 1.9524, "step": 3300 }, { "epoch": 0.31, "learning_rate": 0.00020623936471922855, "loss": 1.8774, "step": 3310 }, { "epoch": 0.31, "learning_rate": 0.00020595575723199089, "loss": 2.0079, "step": 3320 }, { "epoch": 0.31, "learning_rate": 0.00020567214974475325, "loss": 1.9101, "step": 3330 }, { "epoch": 0.32, "learning_rate": 0.0002053885422575156, "loss": 1.992, "step": 3340 }, { "epoch": 0.32, "learning_rate": 0.00020510493477027793, "loss": 1.9441, "step": 3350 }, { "epoch": 0.32, "learning_rate": 0.00020482132728304024, "loss": 2.0872, "step": 3360 }, { "epoch": 0.32, "learning_rate": 0.00020453771979580258, "loss": 2.1295, "step": 3370 }, { "epoch": 0.32, "learning_rate": 0.00020425411230856494, "loss": 1.9662, "step": 3380 }, { "epoch": 0.32, "learning_rate": 0.00020397050482132728, "loss": 2.0128, "step": 3390 }, { "epoch": 0.32, "learning_rate": 0.00020368689733408962, "loss": 1.9651, "step": 3400 }, { "epoch": 0.32, "learning_rate": 0.00020340328984685193, "loss": 1.9138, "step": 3410 }, { "epoch": 0.32, "learning_rate": 0.00020311968235961427, "loss": 2.0154, "step": 3420 }, { "epoch": 0.32, "learning_rate": 0.0002028360748723766, "loss": 2.0664, "step": 3430 }, { "epoch": 0.33, "learning_rate": 0.00020255246738513897, "loss": 2.0125, "step": 3440 }, { "epoch": 0.33, "learning_rate": 0.00020226885989790128, "loss": 1.8252, "step": 3450 }, { "epoch": 0.33, "learning_rate": 0.00020198525241066362, "loss": 1.9527, "step": 3460 }, { "epoch": 0.33, "learning_rate": 0.00020170164492342595, "loss": 1.9952, "step": 3470 }, { "epoch": 0.33, "learning_rate": 0.0002014180374361883, "loss": 1.8446, "step": 3480 }, { "epoch": 0.33, "learning_rate": 0.00020113442994895066, "loss": 1.9602, "step": 3490 }, { "epoch": 0.33, "learning_rate": 0.00020085082246171297, "loss": 2.0458, "step": 3500 }, { "epoch": 0.33, "learning_rate": 0.0002005672149744753, "loss": 1.9447, "step": 3510 }, { "epoch": 0.33, "learning_rate": 0.00020028360748723764, "loss": 2.0325, "step": 3520 }, { "epoch": 0.33, "learning_rate": 0.00019999999999999998, "loss": 1.9888, "step": 3530 }, { "epoch": 0.33, "learning_rate": 0.00019971639251276235, "loss": 1.9477, "step": 3540 }, { "epoch": 0.34, "learning_rate": 0.00019943278502552466, "loss": 1.9787, "step": 3550 }, { "epoch": 0.34, "learning_rate": 0.000199149177538287, "loss": 1.8773, "step": 3560 }, { "epoch": 0.34, "learning_rate": 0.00019886557005104933, "loss": 1.9589, "step": 3570 }, { "epoch": 0.34, "learning_rate": 0.00019858196256381165, "loss": 2.0078, "step": 3580 }, { "epoch": 0.34, "learning_rate": 0.00019829835507657398, "loss": 1.9154, "step": 3590 }, { "epoch": 0.34, "learning_rate": 0.00019801474758933635, "loss": 1.9619, "step": 3600 }, { "epoch": 0.34, "learning_rate": 0.00019773114010209869, "loss": 1.9137, "step": 3610 }, { "epoch": 0.34, "learning_rate": 0.00019744753261486102, "loss": 1.9595, "step": 3620 }, { "epoch": 0.34, "learning_rate": 0.00019716392512762333, "loss": 1.953, "step": 3630 }, { "epoch": 0.34, "learning_rate": 0.00019688031764038567, "loss": 1.9825, "step": 3640 }, { "epoch": 0.35, "learning_rate": 0.00019659671015314804, "loss": 1.8326, "step": 3650 }, { "epoch": 0.35, "learning_rate": 0.00019631310266591038, "loss": 1.9083, "step": 3660 }, { "epoch": 0.35, "learning_rate": 0.00019602949517867271, "loss": 2.0078, "step": 3670 }, { "epoch": 0.35, "learning_rate": 0.00019574588769143502, "loss": 1.9444, "step": 3680 }, { "epoch": 0.35, "learning_rate": 0.00019546228020419736, "loss": 1.8508, "step": 3690 }, { "epoch": 0.35, "learning_rate": 0.0001951786727169597, "loss": 1.8987, "step": 3700 }, { "epoch": 0.35, "learning_rate": 0.00019489506522972207, "loss": 2.0121, "step": 3710 }, { "epoch": 0.35, "learning_rate": 0.00019461145774248438, "loss": 1.9966, "step": 3720 }, { "epoch": 0.35, "learning_rate": 0.00019432785025524671, "loss": 1.8469, "step": 3730 }, { "epoch": 0.35, "learning_rate": 0.00019404424276800905, "loss": 2.0692, "step": 3740 }, { "epoch": 0.35, "learning_rate": 0.0001937606352807714, "loss": 2.0527, "step": 3750 }, { "epoch": 0.36, "learning_rate": 0.00019347702779353375, "loss": 1.9751, "step": 3760 }, { "epoch": 0.36, "learning_rate": 0.00019319342030629607, "loss": 1.8138, "step": 3770 }, { "epoch": 0.36, "learning_rate": 0.0001929098128190584, "loss": 1.981, "step": 3780 }, { "epoch": 0.36, "learning_rate": 0.00019262620533182074, "loss": 2.0095, "step": 3790 }, { "epoch": 0.36, "learning_rate": 0.00019234259784458308, "loss": 2.1045, "step": 3800 }, { "epoch": 0.36, "learning_rate": 0.00019205899035734544, "loss": 1.9747, "step": 3810 }, { "epoch": 0.36, "learning_rate": 0.00019177538287010776, "loss": 1.8581, "step": 3820 }, { "epoch": 0.36, "learning_rate": 0.0001914917753828701, "loss": 1.871, "step": 3830 }, { "epoch": 0.36, "learning_rate": 0.00019120816789563243, "loss": 1.9531, "step": 3840 }, { "epoch": 0.36, "learning_rate": 0.00019092456040839474, "loss": 1.8435, "step": 3850 }, { "epoch": 0.36, "learning_rate": 0.00019064095292115708, "loss": 1.9877, "step": 3860 }, { "epoch": 0.37, "learning_rate": 0.00019035734543391945, "loss": 1.9131, "step": 3870 }, { "epoch": 0.37, "learning_rate": 0.00019007373794668178, "loss": 1.8909, "step": 3880 }, { "epoch": 0.37, "learning_rate": 0.00018979013045944412, "loss": 1.9501, "step": 3890 }, { "epoch": 0.37, "learning_rate": 0.00018950652297220643, "loss": 1.9938, "step": 3900 }, { "epoch": 0.37, "learning_rate": 0.00018922291548496877, "loss": 1.9519, "step": 3910 }, { "epoch": 0.37, "learning_rate": 0.00018893930799773113, "loss": 1.9293, "step": 3920 }, { "epoch": 0.37, "learning_rate": 0.00018865570051049347, "loss": 1.9221, "step": 3930 }, { "epoch": 0.37, "learning_rate": 0.0001883720930232558, "loss": 1.8675, "step": 3940 }, { "epoch": 0.37, "learning_rate": 0.00018808848553601812, "loss": 1.9782, "step": 3950 }, { "epoch": 0.37, "learning_rate": 0.00018780487804878046, "loss": 1.9747, "step": 3960 }, { "epoch": 0.38, "learning_rate": 0.00018752127056154282, "loss": 1.9417, "step": 3970 }, { "epoch": 0.38, "learning_rate": 0.00018723766307430516, "loss": 2.0062, "step": 3980 }, { "epoch": 0.38, "learning_rate": 0.00018698241633579126, "loss": 2.0212, "step": 3990 }, { "epoch": 0.38, "learning_rate": 0.0001866988088485536, "loss": 2.0481, "step": 4000 }, { "epoch": 0.38, "learning_rate": 0.00018641520136131594, "loss": 2.0239, "step": 4010 }, { "epoch": 0.38, "learning_rate": 0.00018613159387407825, "loss": 1.8528, "step": 4020 }, { "epoch": 0.38, "learning_rate": 0.00018584798638684059, "loss": 1.9132, "step": 4030 }, { "epoch": 0.38, "learning_rate": 0.00018556437889960292, "loss": 1.9824, "step": 4040 }, { "epoch": 0.38, "learning_rate": 0.0001852807714123653, "loss": 1.9436, "step": 4050 }, { "epoch": 0.38, "learning_rate": 0.0001849971639251276, "loss": 1.8854, "step": 4060 }, { "epoch": 0.38, "learning_rate": 0.00018471355643788994, "loss": 2.0388, "step": 4070 }, { "epoch": 0.39, "learning_rate": 0.00018442994895065228, "loss": 1.978, "step": 4080 }, { "epoch": 0.39, "learning_rate": 0.0001841463414634146, "loss": 1.9922, "step": 4090 }, { "epoch": 0.39, "learning_rate": 0.00018386273397617698, "loss": 1.8897, "step": 4100 }, { "epoch": 0.39, "learning_rate": 0.0001835791264889393, "loss": 1.7802, "step": 4110 }, { "epoch": 0.39, "learning_rate": 0.00018329551900170163, "loss": 1.9593, "step": 4120 }, { "epoch": 0.39, "learning_rate": 0.00018301191151446397, "loss": 1.9404, "step": 4130 }, { "epoch": 0.39, "learning_rate": 0.0001827283040272263, "loss": 1.9313, "step": 4140 }, { "epoch": 0.39, "learning_rate": 0.00018244469653998867, "loss": 1.8596, "step": 4150 }, { "epoch": 0.39, "learning_rate": 0.00018216108905275098, "loss": 1.9912, "step": 4160 }, { "epoch": 0.39, "learning_rate": 0.00018187748156551332, "loss": 1.8539, "step": 4170 }, { "epoch": 0.4, "learning_rate": 0.00018159387407827565, "loss": 2.0064, "step": 4180 }, { "epoch": 0.4, "learning_rate": 0.00018131026659103797, "loss": 1.9917, "step": 4190 }, { "epoch": 0.4, "learning_rate": 0.0001810266591038003, "loss": 2.0214, "step": 4200 }, { "epoch": 0.4, "learning_rate": 0.00018074305161656267, "loss": 1.7989, "step": 4210 }, { "epoch": 0.4, "learning_rate": 0.000180459444129325, "loss": 1.9071, "step": 4220 }, { "epoch": 0.4, "learning_rate": 0.00018017583664208734, "loss": 2.0126, "step": 4230 }, { "epoch": 0.4, "learning_rate": 0.00017989222915484966, "loss": 1.9182, "step": 4240 }, { "epoch": 0.4, "learning_rate": 0.000179608621667612, "loss": 1.9052, "step": 4250 }, { "epoch": 0.4, "learning_rate": 0.00017932501418037436, "loss": 2.038, "step": 4260 }, { "epoch": 0.4, "learning_rate": 0.0001790414066931367, "loss": 1.8673, "step": 4270 }, { "epoch": 0.4, "learning_rate": 0.00017875779920589903, "loss": 2.0043, "step": 4280 }, { "epoch": 0.41, "learning_rate": 0.00017847419171866134, "loss": 1.9785, "step": 4290 }, { "epoch": 0.41, "learning_rate": 0.00017819058423142368, "loss": 1.871, "step": 4300 }, { "epoch": 0.41, "learning_rate": 0.00017790697674418605, "loss": 2.1149, "step": 4310 }, { "epoch": 0.41, "learning_rate": 0.00017762336925694839, "loss": 1.8676, "step": 4320 }, { "epoch": 0.41, "learning_rate": 0.0001773397617697107, "loss": 2.0189, "step": 4330 }, { "epoch": 0.41, "learning_rate": 0.00017705615428247303, "loss": 1.8926, "step": 4340 }, { "epoch": 0.41, "learning_rate": 0.00017677254679523537, "loss": 1.9703, "step": 4350 }, { "epoch": 0.41, "learning_rate": 0.0001764889393079977, "loss": 1.9466, "step": 4360 }, { "epoch": 0.41, "learning_rate": 0.00017620533182076008, "loss": 1.8895, "step": 4370 }, { "epoch": 0.41, "learning_rate": 0.00017592172433352239, "loss": 1.9457, "step": 4380 }, { "epoch": 0.42, "learning_rate": 0.00017563811684628472, "loss": 1.8517, "step": 4390 }, { "epoch": 0.42, "learning_rate": 0.00017535450935904706, "loss": 1.8771, "step": 4400 }, { "epoch": 0.42, "learning_rate": 0.0001750709018718094, "loss": 2.0174, "step": 4410 }, { "epoch": 0.42, "learning_rate": 0.00017478729438457177, "loss": 1.976, "step": 4420 }, { "epoch": 0.42, "learning_rate": 0.00017450368689733408, "loss": 2.0323, "step": 4430 }, { "epoch": 0.42, "learning_rate": 0.00017422007941009641, "loss": 1.9091, "step": 4440 }, { "epoch": 0.42, "learning_rate": 0.00017393647192285875, "loss": 1.9384, "step": 4450 }, { "epoch": 0.42, "learning_rate": 0.00017365286443562106, "loss": 1.8759, "step": 4460 }, { "epoch": 0.42, "learning_rate": 0.0001733692569483834, "loss": 1.9307, "step": 4470 }, { "epoch": 0.42, "learning_rate": 0.00017308564946114577, "loss": 2.025, "step": 4480 }, { "epoch": 0.42, "learning_rate": 0.0001728020419739081, "loss": 1.9593, "step": 4490 }, { "epoch": 0.43, "learning_rate": 0.00017251843448667044, "loss": 2.0366, "step": 4500 }, { "epoch": 0.43, "learning_rate": 0.00017223482699943275, "loss": 2.0388, "step": 4510 }, { "epoch": 0.43, "learning_rate": 0.0001719512195121951, "loss": 1.9827, "step": 4520 }, { "epoch": 0.43, "learning_rate": 0.00017166761202495746, "loss": 1.9018, "step": 4530 }, { "epoch": 0.43, "learning_rate": 0.0001713840045377198, "loss": 1.8422, "step": 4540 }, { "epoch": 0.43, "learning_rate": 0.00017110039705048213, "loss": 1.8816, "step": 4550 }, { "epoch": 0.43, "learning_rate": 0.00017081678956324444, "loss": 1.9177, "step": 4560 }, { "epoch": 0.43, "learning_rate": 0.00017053318207600678, "loss": 1.9647, "step": 4570 }, { "epoch": 0.43, "learning_rate": 0.00017024957458876915, "loss": 1.8367, "step": 4580 }, { "epoch": 0.43, "learning_rate": 0.00016996596710153148, "loss": 1.958, "step": 4590 }, { "epoch": 0.43, "learning_rate": 0.0001696823596142938, "loss": 1.8869, "step": 4600 }, { "epoch": 0.44, "learning_rate": 0.00016939875212705613, "loss": 1.976, "step": 4610 }, { "epoch": 0.44, "learning_rate": 0.00016911514463981847, "loss": 1.9574, "step": 4620 }, { "epoch": 0.44, "learning_rate": 0.0001688315371525808, "loss": 1.9362, "step": 4630 }, { "epoch": 0.44, "learning_rate": 0.00016854792966534317, "loss": 1.9701, "step": 4640 }, { "epoch": 0.44, "learning_rate": 0.00016826432217810548, "loss": 1.8939, "step": 4650 }, { "epoch": 0.44, "learning_rate": 0.00016798071469086782, "loss": 1.8127, "step": 4660 }, { "epoch": 0.44, "learning_rate": 0.00016769710720363016, "loss": 1.965, "step": 4670 }, { "epoch": 0.44, "learning_rate": 0.0001674134997163925, "loss": 1.8539, "step": 4680 }, { "epoch": 0.44, "learning_rate": 0.00016712989222915486, "loss": 1.9504, "step": 4690 }, { "epoch": 0.44, "learning_rate": 0.00016684628474191717, "loss": 1.9817, "step": 4700 }, { "epoch": 0.45, "learning_rate": 0.0001665626772546795, "loss": 1.9137, "step": 4710 }, { "epoch": 0.45, "learning_rate": 0.0001663074305161656, "loss": 1.9258, "step": 4720 }, { "epoch": 0.45, "learning_rate": 0.00016602382302892795, "loss": 1.7631, "step": 4730 }, { "epoch": 0.45, "learning_rate": 0.00016574021554169029, "loss": 1.8479, "step": 4740 }, { "epoch": 0.45, "learning_rate": 0.00016545660805445262, "loss": 1.8883, "step": 4750 }, { "epoch": 0.45, "learning_rate": 0.00016517300056721496, "loss": 1.9619, "step": 4760 }, { "epoch": 0.45, "learning_rate": 0.0001648893930799773, "loss": 1.9964, "step": 4770 }, { "epoch": 0.45, "learning_rate": 0.00016460578559273964, "loss": 1.9687, "step": 4780 }, { "epoch": 0.45, "learning_rate": 0.00016432217810550198, "loss": 2.0085, "step": 4790 }, { "epoch": 0.45, "learning_rate": 0.00016403857061826429, "loss": 1.9943, "step": 4800 }, { "epoch": 0.45, "learning_rate": 0.00016375496313102662, "loss": 1.9711, "step": 4810 }, { "epoch": 0.46, "learning_rate": 0.000163471355643789, "loss": 1.7781, "step": 4820 }, { "epoch": 0.46, "learning_rate": 0.00016318774815655133, "loss": 1.849, "step": 4830 }, { "epoch": 0.46, "learning_rate": 0.00016290414066931367, "loss": 1.9066, "step": 4840 }, { "epoch": 0.46, "learning_rate": 0.00016262053318207598, "loss": 1.9813, "step": 4850 }, { "epoch": 0.46, "learning_rate": 0.00016233692569483831, "loss": 1.9247, "step": 4860 }, { "epoch": 0.46, "learning_rate": 0.00016205331820760068, "loss": 2.0478, "step": 4870 }, { "epoch": 0.46, "learning_rate": 0.00016176971072036302, "loss": 1.8815, "step": 4880 }, { "epoch": 0.46, "learning_rate": 0.00016148610323312535, "loss": 1.8873, "step": 4890 }, { "epoch": 0.46, "learning_rate": 0.00016120249574588767, "loss": 1.8828, "step": 4900 }, { "epoch": 0.46, "learning_rate": 0.00016091888825865, "loss": 1.9985, "step": 4910 }, { "epoch": 0.47, "learning_rate": 0.00016063528077141237, "loss": 1.9237, "step": 4920 }, { "epoch": 0.47, "learning_rate": 0.0001603516732841747, "loss": 2.03, "step": 4930 }, { "epoch": 0.47, "learning_rate": 0.00016006806579693702, "loss": 1.881, "step": 4940 }, { "epoch": 0.47, "learning_rate": 0.00015978445830969936, "loss": 1.9496, "step": 4950 }, { "epoch": 0.47, "learning_rate": 0.0001595008508224617, "loss": 1.9751, "step": 4960 }, { "epoch": 0.47, "learning_rate": 0.00015921724333522403, "loss": 1.9493, "step": 4970 }, { "epoch": 0.47, "learning_rate": 0.0001589336358479864, "loss": 1.9607, "step": 4980 }, { "epoch": 0.47, "learning_rate": 0.0001586500283607487, "loss": 1.9989, "step": 4990 }, { "epoch": 0.47, "learning_rate": 0.00015836642087351104, "loss": 2.0145, "step": 5000 }, { "epoch": 0.47, "learning_rate": 0.00015808281338627338, "loss": 1.8441, "step": 5010 }, { "epoch": 0.47, "learning_rate": 0.00015779920589903572, "loss": 1.9853, "step": 5020 }, { "epoch": 0.48, "learning_rate": 0.00015751559841179806, "loss": 1.8068, "step": 5030 }, { "epoch": 0.48, "learning_rate": 0.0001572319909245604, "loss": 1.9993, "step": 5040 }, { "epoch": 0.48, "learning_rate": 0.00015694838343732273, "loss": 1.8017, "step": 5050 }, { "epoch": 0.48, "learning_rate": 0.00015666477595008507, "loss": 1.9598, "step": 5060 }, { "epoch": 0.48, "learning_rate": 0.00015638116846284738, "loss": 1.8583, "step": 5070 }, { "epoch": 0.48, "learning_rate": 0.00015609756097560975, "loss": 1.9907, "step": 5080 }, { "epoch": 0.48, "learning_rate": 0.00015581395348837209, "loss": 1.9518, "step": 5090 }, { "epoch": 0.48, "learning_rate": 0.00015553034600113442, "loss": 2.013, "step": 5100 }, { "epoch": 0.48, "learning_rate": 0.00015524673851389676, "loss": 1.8681, "step": 5110 }, { "epoch": 0.48, "learning_rate": 0.00015496313102665907, "loss": 1.8049, "step": 5120 }, { "epoch": 0.48, "learning_rate": 0.0001546795235394214, "loss": 1.9509, "step": 5130 }, { "epoch": 0.49, "learning_rate": 0.00015439591605218378, "loss": 2.0249, "step": 5140 }, { "epoch": 0.49, "learning_rate": 0.00015411230856494611, "loss": 1.9814, "step": 5150 }, { "epoch": 0.49, "learning_rate": 0.00015382870107770845, "loss": 1.9319, "step": 5160 }, { "epoch": 0.49, "learning_rate": 0.00015354509359047076, "loss": 1.8496, "step": 5170 }, { "epoch": 0.49, "learning_rate": 0.0001532614861032331, "loss": 1.9162, "step": 5180 }, { "epoch": 0.49, "learning_rate": 0.00015297787861599547, "loss": 1.9752, "step": 5190 }, { "epoch": 0.49, "learning_rate": 0.0001526942711287578, "loss": 1.9887, "step": 5200 }, { "epoch": 0.49, "learning_rate": 0.00015241066364152011, "loss": 1.9524, "step": 5210 }, { "epoch": 0.49, "learning_rate": 0.00015212705615428245, "loss": 2.0026, "step": 5220 }, { "epoch": 0.49, "learning_rate": 0.0001518434486670448, "loss": 1.9157, "step": 5230 }, { "epoch": 0.5, "learning_rate": 0.00015155984117980713, "loss": 2.0611, "step": 5240 }, { "epoch": 0.5, "learning_rate": 0.0001512762336925695, "loss": 2.0075, "step": 5250 }, { "epoch": 0.5, "learning_rate": 0.0001509926262053318, "loss": 1.9902, "step": 5260 }, { "epoch": 0.5, "learning_rate": 0.00015070901871809414, "loss": 1.9127, "step": 5270 }, { "epoch": 0.5, "learning_rate": 0.00015042541123085648, "loss": 1.998, "step": 5280 }, { "epoch": 0.5, "learning_rate": 0.00015014180374361882, "loss": 1.8734, "step": 5290 }, { "epoch": 0.5, "learning_rate": 0.00014985819625638116, "loss": 1.9243, "step": 5300 }, { "epoch": 0.5, "learning_rate": 0.0001495745887691435, "loss": 1.9015, "step": 5310 }, { "epoch": 0.5, "learning_rate": 0.00014929098128190583, "loss": 1.8077, "step": 5320 }, { "epoch": 0.5, "learning_rate": 0.00014900737379466817, "loss": 1.8186, "step": 5330 }, { "epoch": 0.5, "learning_rate": 0.0001487237663074305, "loss": 1.9202, "step": 5340 }, { "epoch": 0.51, "learning_rate": 0.00014844015882019285, "loss": 1.8258, "step": 5350 }, { "epoch": 0.51, "learning_rate": 0.00014815655133295518, "loss": 1.8155, "step": 5360 }, { "epoch": 0.51, "learning_rate": 0.00014787294384571752, "loss": 1.9628, "step": 5370 }, { "epoch": 0.51, "learning_rate": 0.00014758933635847986, "loss": 1.9668, "step": 5380 }, { "epoch": 0.51, "learning_rate": 0.0001473057288712422, "loss": 1.956, "step": 5390 }, { "epoch": 0.51, "learning_rate": 0.00014702212138400454, "loss": 1.9612, "step": 5400 }, { "epoch": 0.51, "learning_rate": 0.00014673851389676685, "loss": 1.9822, "step": 5410 }, { "epoch": 0.51, "learning_rate": 0.0001464549064095292, "loss": 1.9493, "step": 5420 }, { "epoch": 0.51, "learning_rate": 0.00014617129892229152, "loss": 1.9886, "step": 5430 }, { "epoch": 0.51, "learning_rate": 0.00014588769143505386, "loss": 1.9212, "step": 5440 }, { "epoch": 0.52, "learning_rate": 0.00014560408394781622, "loss": 1.8092, "step": 5450 }, { "epoch": 0.52, "learning_rate": 0.00014532047646057854, "loss": 1.9926, "step": 5460 }, { "epoch": 0.52, "learning_rate": 0.0001450368689733409, "loss": 1.8548, "step": 5470 }, { "epoch": 0.52, "learning_rate": 0.0001447532614861032, "loss": 1.8581, "step": 5480 }, { "epoch": 0.52, "learning_rate": 0.00014446965399886555, "loss": 1.9153, "step": 5490 }, { "epoch": 0.52, "learning_rate": 0.0001441860465116279, "loss": 2.0769, "step": 5500 }, { "epoch": 0.52, "learning_rate": 0.00014390243902439023, "loss": 1.9996, "step": 5510 }, { "epoch": 0.52, "learning_rate": 0.00014361883153715256, "loss": 1.9398, "step": 5520 }, { "epoch": 0.52, "learning_rate": 0.0001433352240499149, "loss": 1.9979, "step": 5530 }, { "epoch": 0.52, "learning_rate": 0.00014305161656267724, "loss": 1.804, "step": 5540 }, { "epoch": 0.52, "learning_rate": 0.00014276800907543958, "loss": 1.936, "step": 5550 }, { "epoch": 0.53, "learning_rate": 0.00014248440158820192, "loss": 1.9309, "step": 5560 }, { "epoch": 0.53, "learning_rate": 0.00014220079410096425, "loss": 1.9491, "step": 5570 }, { "epoch": 0.53, "learning_rate": 0.0001419171866137266, "loss": 1.9203, "step": 5580 }, { "epoch": 0.53, "learning_rate": 0.00014163357912648893, "loss": 1.8223, "step": 5590 }, { "epoch": 0.53, "learning_rate": 0.00014134997163925127, "loss": 1.922, "step": 5600 }, { "epoch": 0.53, "learning_rate": 0.0001410663641520136, "loss": 1.9175, "step": 5610 }, { "epoch": 0.53, "learning_rate": 0.00014078275666477594, "loss": 1.9536, "step": 5620 }, { "epoch": 0.53, "learning_rate": 0.00014049914917753828, "loss": 1.8396, "step": 5630 }, { "epoch": 0.53, "learning_rate": 0.00014021554169030062, "loss": 1.9098, "step": 5640 }, { "epoch": 0.53, "learning_rate": 0.00013993193420306296, "loss": 1.9491, "step": 5650 }, { "epoch": 0.54, "learning_rate": 0.0001396483267158253, "loss": 1.8775, "step": 5660 }, { "epoch": 0.54, "learning_rate": 0.00013936471922858763, "loss": 1.9059, "step": 5670 }, { "epoch": 0.54, "learning_rate": 0.00013908111174134994, "loss": 2.0328, "step": 5680 }, { "epoch": 0.54, "learning_rate": 0.0001387975042541123, "loss": 1.9487, "step": 5690 }, { "epoch": 0.54, "learning_rate": 0.00013851389676687462, "loss": 1.9911, "step": 5700 }, { "epoch": 0.54, "learning_rate": 0.00013823028927963696, "loss": 1.9483, "step": 5710 }, { "epoch": 0.54, "learning_rate": 0.00013794668179239932, "loss": 1.8927, "step": 5720 }, { "epoch": 0.54, "learning_rate": 0.00013766307430516163, "loss": 1.8362, "step": 5730 }, { "epoch": 0.54, "learning_rate": 0.000137379466817924, "loss": 1.9886, "step": 5740 }, { "epoch": 0.54, "learning_rate": 0.0001370958593306863, "loss": 2.0213, "step": 5750 }, { "epoch": 0.54, "learning_rate": 0.00013681225184344865, "loss": 2.0016, "step": 5760 }, { "epoch": 0.55, "learning_rate": 0.00013652864435621098, "loss": 1.9383, "step": 5770 }, { "epoch": 0.55, "learning_rate": 0.00013624503686897332, "loss": 2.0477, "step": 5780 }, { "epoch": 0.55, "learning_rate": 0.00013596142938173566, "loss": 1.8091, "step": 5790 }, { "epoch": 0.55, "learning_rate": 0.000135677821894498, "loss": 1.9684, "step": 5800 }, { "epoch": 0.55, "learning_rate": 0.00013539421440726034, "loss": 1.9759, "step": 5810 }, { "epoch": 0.55, "learning_rate": 0.00013511060692002267, "loss": 1.8849, "step": 5820 }, { "epoch": 0.55, "learning_rate": 0.000134826999432785, "loss": 1.926, "step": 5830 }, { "epoch": 0.55, "learning_rate": 0.00013454339194554735, "loss": 1.972, "step": 5840 }, { "epoch": 0.55, "learning_rate": 0.0001342597844583097, "loss": 1.812, "step": 5850 }, { "epoch": 0.55, "learning_rate": 0.00013397617697107203, "loss": 1.9176, "step": 5860 }, { "epoch": 0.55, "learning_rate": 0.00013369256948383436, "loss": 1.9561, "step": 5870 }, { "epoch": 0.56, "learning_rate": 0.0001334089619965967, "loss": 1.982, "step": 5880 }, { "epoch": 0.56, "learning_rate": 0.00013312535450935904, "loss": 1.9534, "step": 5890 }, { "epoch": 0.56, "learning_rate": 0.00013284174702212138, "loss": 1.9424, "step": 5900 }, { "epoch": 0.56, "learning_rate": 0.00013255813953488372, "loss": 1.9401, "step": 5910 }, { "epoch": 0.56, "learning_rate": 0.00013227453204764605, "loss": 1.9386, "step": 5920 }, { "epoch": 0.56, "learning_rate": 0.0001319909245604084, "loss": 1.9403, "step": 5930 }, { "epoch": 0.56, "learning_rate": 0.00013170731707317073, "loss": 2.0248, "step": 5940 }, { "epoch": 0.56, "learning_rate": 0.00013142370958593304, "loss": 1.8946, "step": 5950 }, { "epoch": 0.56, "learning_rate": 0.0001311401020986954, "loss": 1.9736, "step": 5960 }, { "epoch": 0.56, "learning_rate": 0.00013085649461145772, "loss": 2.041, "step": 5970 }, { "epoch": 0.57, "learning_rate": 0.00013057288712422005, "loss": 1.8931, "step": 5980 }, { "epoch": 0.57, "learning_rate": 0.00013028927963698242, "loss": 1.9143, "step": 5990 }, { "epoch": 0.57, "learning_rate": 0.00013000567214974473, "loss": 1.8607, "step": 6000 }, { "epoch": 0.57, "learning_rate": 0.0001297220646625071, "loss": 2.0425, "step": 6010 }, { "epoch": 0.57, "learning_rate": 0.0001294384571752694, "loss": 1.9615, "step": 6020 }, { "epoch": 0.57, "learning_rate": 0.00012915484968803174, "loss": 1.9234, "step": 6030 }, { "epoch": 0.57, "learning_rate": 0.00012887124220079408, "loss": 1.996, "step": 6040 }, { "epoch": 0.57, "learning_rate": 0.00012858763471355642, "loss": 1.9451, "step": 6050 }, { "epoch": 0.57, "learning_rate": 0.00012830402722631876, "loss": 1.9887, "step": 6060 }, { "epoch": 0.57, "learning_rate": 0.0001280204197390811, "loss": 1.8884, "step": 6070 }, { "epoch": 0.57, "learning_rate": 0.00012773681225184343, "loss": 1.8801, "step": 6080 }, { "epoch": 0.58, "learning_rate": 0.00012745320476460577, "loss": 1.948, "step": 6090 }, { "epoch": 0.58, "learning_rate": 0.0001271695972773681, "loss": 1.9517, "step": 6100 }, { "epoch": 0.58, "learning_rate": 0.00012688598979013045, "loss": 1.9363, "step": 6110 }, { "epoch": 0.58, "learning_rate": 0.00012660238230289279, "loss": 1.985, "step": 6120 }, { "epoch": 0.58, "learning_rate": 0.00012631877481565512, "loss": 2.0393, "step": 6130 }, { "epoch": 0.58, "learning_rate": 0.00012603516732841746, "loss": 1.8576, "step": 6140 }, { "epoch": 0.58, "learning_rate": 0.0001257515598411798, "loss": 1.8394, "step": 6150 }, { "epoch": 0.58, "learning_rate": 0.00012546795235394214, "loss": 1.9693, "step": 6160 }, { "epoch": 0.58, "learning_rate": 0.00012518434486670447, "loss": 2.0355, "step": 6170 }, { "epoch": 0.58, "learning_rate": 0.0001249007373794668, "loss": 1.8122, "step": 6180 }, { "epoch": 0.59, "learning_rate": 0.00012461712989222915, "loss": 1.8931, "step": 6190 }, { "epoch": 0.59, "learning_rate": 0.0001243335224049915, "loss": 1.9087, "step": 6200 }, { "epoch": 0.59, "learning_rate": 0.00012404991491775383, "loss": 1.9078, "step": 6210 }, { "epoch": 0.59, "learning_rate": 0.00012376630743051614, "loss": 1.917, "step": 6220 }, { "epoch": 0.59, "learning_rate": 0.0001234826999432785, "loss": 2.0256, "step": 6230 }, { "epoch": 0.59, "learning_rate": 0.0001231990924560408, "loss": 2.0448, "step": 6240 }, { "epoch": 0.59, "learning_rate": 0.00012291548496880315, "loss": 1.9633, "step": 6250 }, { "epoch": 0.59, "learning_rate": 0.00012263187748156552, "loss": 2.0159, "step": 6260 }, { "epoch": 0.59, "learning_rate": 0.00012234826999432783, "loss": 1.8067, "step": 6270 }, { "epoch": 0.59, "learning_rate": 0.00012206466250709018, "loss": 1.9475, "step": 6280 }, { "epoch": 0.59, "learning_rate": 0.00012178105501985252, "loss": 1.9107, "step": 6290 }, { "epoch": 0.6, "learning_rate": 0.00012149744753261484, "loss": 1.8108, "step": 6300 }, { "epoch": 0.6, "learning_rate": 0.00012121384004537719, "loss": 2.0893, "step": 6310 }, { "epoch": 0.6, "learning_rate": 0.00012093023255813952, "loss": 1.9486, "step": 6320 }, { "epoch": 0.6, "learning_rate": 0.00012064662507090185, "loss": 1.8204, "step": 6330 }, { "epoch": 0.6, "learning_rate": 0.0001203630175836642, "loss": 1.9909, "step": 6340 }, { "epoch": 0.6, "learning_rate": 0.00012007941009642653, "loss": 1.9413, "step": 6350 }, { "epoch": 0.6, "learning_rate": 0.00011979580260918888, "loss": 1.88, "step": 6360 }, { "epoch": 0.6, "learning_rate": 0.0001195121951219512, "loss": 1.8708, "step": 6370 }, { "epoch": 0.6, "learning_rate": 0.00011922858763471354, "loss": 2.019, "step": 6380 }, { "epoch": 0.6, "learning_rate": 0.00011894498014747588, "loss": 1.82, "step": 6390 }, { "epoch": 0.61, "learning_rate": 0.00011866137266023822, "loss": 1.9024, "step": 6400 }, { "epoch": 0.61, "learning_rate": 0.00011837776517300054, "loss": 2.0031, "step": 6410 }, { "epoch": 0.61, "learning_rate": 0.0001180941576857629, "loss": 1.9088, "step": 6420 }, { "epoch": 0.61, "learning_rate": 0.00011781055019852523, "loss": 1.8936, "step": 6430 }, { "epoch": 0.61, "learning_rate": 0.00011752694271128757, "loss": 1.7903, "step": 6440 }, { "epoch": 0.61, "learning_rate": 0.00011724333522404991, "loss": 1.9177, "step": 6450 }, { "epoch": 0.61, "learning_rate": 0.00011695972773681223, "loss": 1.9584, "step": 6460 }, { "epoch": 0.61, "learning_rate": 0.00011667612024957459, "loss": 1.9577, "step": 6470 }, { "epoch": 0.61, "learning_rate": 0.00011639251276233691, "loss": 1.9879, "step": 6480 }, { "epoch": 0.61, "learning_rate": 0.00011610890527509925, "loss": 1.8903, "step": 6490 }, { "epoch": 0.61, "learning_rate": 0.0001158252977878616, "loss": 1.8622, "step": 6500 }, { "epoch": 0.62, "learning_rate": 0.00011554169030062392, "loss": 1.8421, "step": 6510 }, { "epoch": 0.62, "learning_rate": 0.00011525808281338628, "loss": 1.8843, "step": 6520 }, { "epoch": 0.62, "learning_rate": 0.0001149744753261486, "loss": 1.7929, "step": 6530 }, { "epoch": 0.62, "learning_rate": 0.00011469086783891094, "loss": 1.987, "step": 6540 }, { "epoch": 0.62, "learning_rate": 0.00011440726035167328, "loss": 1.9068, "step": 6550 }, { "epoch": 0.62, "learning_rate": 0.00011412365286443561, "loss": 1.9718, "step": 6560 }, { "epoch": 0.62, "learning_rate": 0.00011384004537719794, "loss": 2.045, "step": 6570 }, { "epoch": 0.62, "learning_rate": 0.00011355643788996029, "loss": 1.9014, "step": 6580 }, { "epoch": 0.62, "learning_rate": 0.00011327283040272261, "loss": 1.7638, "step": 6590 }, { "epoch": 0.62, "learning_rate": 0.00011298922291548497, "loss": 2.0565, "step": 6600 }, { "epoch": 0.62, "learning_rate": 0.0001127056154282473, "loss": 1.8918, "step": 6610 }, { "epoch": 0.63, "learning_rate": 0.00011242200794100963, "loss": 2.0033, "step": 6620 }, { "epoch": 0.63, "learning_rate": 0.00011213840045377198, "loss": 1.93, "step": 6630 }, { "epoch": 0.63, "learning_rate": 0.0001118547929665343, "loss": 1.8651, "step": 6640 }, { "epoch": 0.63, "learning_rate": 0.00011159954622802042, "loss": 1.8975, "step": 6650 }, { "epoch": 0.63, "learning_rate": 0.00011131593874078274, "loss": 1.8809, "step": 6660 }, { "epoch": 0.63, "learning_rate": 0.00011103233125354508, "loss": 1.9653, "step": 6670 }, { "epoch": 0.63, "learning_rate": 0.00011074872376630743, "loss": 1.9804, "step": 6680 }, { "epoch": 0.63, "learning_rate": 0.00011046511627906975, "loss": 2.0938, "step": 6690 }, { "epoch": 0.63, "learning_rate": 0.0001101815087918321, "loss": 1.9611, "step": 6700 }, { "epoch": 0.63, "learning_rate": 0.00010989790130459443, "loss": 2.0472, "step": 6710 }, { "epoch": 0.64, "learning_rate": 0.00010961429381735677, "loss": 1.8546, "step": 6720 }, { "epoch": 0.64, "learning_rate": 0.0001093306863301191, "loss": 1.9998, "step": 6730 }, { "epoch": 0.64, "learning_rate": 0.00010904707884288144, "loss": 1.8905, "step": 6740 }, { "epoch": 0.64, "learning_rate": 0.00010876347135564377, "loss": 1.8993, "step": 6750 }, { "epoch": 0.64, "learning_rate": 0.00010847986386840612, "loss": 1.9437, "step": 6760 }, { "epoch": 0.64, "learning_rate": 0.00010819625638116844, "loss": 1.9391, "step": 6770 }, { "epoch": 0.64, "learning_rate": 0.0001079126488939308, "loss": 1.8582, "step": 6780 }, { "epoch": 0.64, "learning_rate": 0.00010762904140669313, "loss": 1.9534, "step": 6790 }, { "epoch": 0.64, "learning_rate": 0.00010734543391945546, "loss": 1.8838, "step": 6800 }, { "epoch": 0.64, "learning_rate": 0.00010706182643221781, "loss": 1.8608, "step": 6810 }, { "epoch": 0.64, "learning_rate": 0.00010677821894498013, "loss": 2.0729, "step": 6820 }, { "epoch": 0.65, "learning_rate": 0.00010649461145774247, "loss": 1.8698, "step": 6830 }, { "epoch": 0.65, "learning_rate": 0.00010621100397050481, "loss": 2.0031, "step": 6840 }, { "epoch": 0.65, "learning_rate": 0.00010592739648326715, "loss": 1.8842, "step": 6850 }, { "epoch": 0.65, "learning_rate": 0.0001056437889960295, "loss": 1.9431, "step": 6860 }, { "epoch": 0.65, "learning_rate": 0.00010536018150879182, "loss": 1.9264, "step": 6870 }, { "epoch": 0.65, "learning_rate": 0.00010507657402155416, "loss": 1.9616, "step": 6880 }, { "epoch": 0.65, "learning_rate": 0.0001047929665343165, "loss": 2.0009, "step": 6890 }, { "epoch": 0.65, "learning_rate": 0.00010450935904707884, "loss": 1.9505, "step": 6900 }, { "epoch": 0.65, "learning_rate": 0.00010422575155984116, "loss": 2.0046, "step": 6910 }, { "epoch": 0.65, "learning_rate": 0.00010394214407260351, "loss": 1.9535, "step": 6920 }, { "epoch": 0.66, "learning_rate": 0.00010365853658536584, "loss": 1.9317, "step": 6930 }, { "epoch": 0.66, "learning_rate": 0.00010337492909812819, "loss": 2.0639, "step": 6940 }, { "epoch": 0.66, "learning_rate": 0.00010309132161089053, "loss": 1.9932, "step": 6950 }, { "epoch": 0.66, "learning_rate": 0.00010280771412365285, "loss": 1.9404, "step": 6960 }, { "epoch": 0.66, "learning_rate": 0.0001025241066364152, "loss": 1.8413, "step": 6970 }, { "epoch": 0.66, "learning_rate": 0.00010224049914917753, "loss": 1.9619, "step": 6980 }, { "epoch": 0.66, "learning_rate": 0.00010195689166193986, "loss": 1.9393, "step": 6990 }, { "epoch": 0.66, "learning_rate": 0.0001016732841747022, "loss": 1.9883, "step": 7000 }, { "epoch": 0.66, "learning_rate": 0.00010138967668746454, "loss": 1.9447, "step": 7010 }, { "epoch": 0.66, "learning_rate": 0.00010110606920022687, "loss": 2.0046, "step": 7020 }, { "epoch": 0.66, "learning_rate": 0.00010082246171298922, "loss": 1.9276, "step": 7030 }, { "epoch": 0.67, "learning_rate": 0.00010053885422575154, "loss": 1.8701, "step": 7040 }, { "epoch": 0.67, "learning_rate": 0.00010025524673851389, "loss": 1.9827, "step": 7050 }, { "epoch": 0.67, "learning_rate": 9.997163925127623e-05, "loss": 2.0274, "step": 7060 }, { "epoch": 0.67, "learning_rate": 9.968803176403855e-05, "loss": 1.9328, "step": 7070 }, { "epoch": 0.67, "learning_rate": 9.94044242768009e-05, "loss": 1.9588, "step": 7080 }, { "epoch": 0.67, "learning_rate": 9.912081678956323e-05, "loss": 1.8266, "step": 7090 }, { "epoch": 0.67, "learning_rate": 9.883720930232557e-05, "loss": 2.0324, "step": 7100 }, { "epoch": 0.67, "learning_rate": 9.85536018150879e-05, "loss": 1.9151, "step": 7110 }, { "epoch": 0.67, "learning_rate": 9.826999432785024e-05, "loss": 1.9188, "step": 7120 }, { "epoch": 0.67, "learning_rate": 9.79863868406126e-05, "loss": 1.8823, "step": 7130 }, { "epoch": 0.67, "learning_rate": 9.770277935337492e-05, "loss": 2.0327, "step": 7140 }, { "epoch": 0.68, "learning_rate": 9.741917186613726e-05, "loss": 1.8703, "step": 7150 }, { "epoch": 0.68, "learning_rate": 9.71355643788996e-05, "loss": 1.9775, "step": 7160 }, { "epoch": 0.68, "learning_rate": 9.685195689166193e-05, "loss": 1.9586, "step": 7170 }, { "epoch": 0.68, "learning_rate": 9.656834940442426e-05, "loss": 1.9808, "step": 7180 }, { "epoch": 0.68, "learning_rate": 9.628474191718661e-05, "loss": 1.9306, "step": 7190 }, { "epoch": 0.68, "learning_rate": 9.600113442994893e-05, "loss": 1.9038, "step": 7200 }, { "epoch": 0.68, "learning_rate": 9.571752694271129e-05, "loss": 1.9689, "step": 7210 }, { "epoch": 0.68, "learning_rate": 9.543391945547362e-05, "loss": 1.9347, "step": 7220 }, { "epoch": 0.68, "learning_rate": 9.515031196823595e-05, "loss": 1.9965, "step": 7230 }, { "epoch": 0.68, "learning_rate": 9.48667044809983e-05, "loss": 1.9164, "step": 7240 }, { "epoch": 0.69, "learning_rate": 9.458309699376062e-05, "loss": 1.9053, "step": 7250 }, { "epoch": 0.69, "learning_rate": 9.429948950652296e-05, "loss": 1.8375, "step": 7260 }, { "epoch": 0.69, "learning_rate": 9.40158820192853e-05, "loss": 1.9083, "step": 7270 }, { "epoch": 0.69, "learning_rate": 9.373227453204764e-05, "loss": 1.953, "step": 7280 }, { "epoch": 0.69, "learning_rate": 9.344866704480999e-05, "loss": 2.0066, "step": 7290 }, { "epoch": 0.69, "learning_rate": 9.316505955757231e-05, "loss": 1.9737, "step": 7300 }, { "epoch": 0.69, "learning_rate": 9.288145207033464e-05, "loss": 1.93, "step": 7310 }, { "epoch": 0.69, "learning_rate": 9.259784458309699e-05, "loss": 1.9436, "step": 7320 }, { "epoch": 0.69, "learning_rate": 9.231423709585933e-05, "loss": 1.9847, "step": 7330 }, { "epoch": 0.69, "learning_rate": 9.203062960862165e-05, "loss": 1.9744, "step": 7340 }, { "epoch": 0.69, "learning_rate": 9.1747022121384e-05, "loss": 1.9623, "step": 7350 }, { "epoch": 0.7, "learning_rate": 9.146341463414633e-05, "loss": 1.83, "step": 7360 }, { "epoch": 0.7, "learning_rate": 9.117980714690867e-05, "loss": 1.741, "step": 7370 }, { "epoch": 0.7, "learning_rate": 9.0896199659671e-05, "loss": 1.8454, "step": 7380 }, { "epoch": 0.7, "learning_rate": 9.061259217243334e-05, "loss": 2.004, "step": 7390 }, { "epoch": 0.7, "learning_rate": 9.032898468519569e-05, "loss": 1.9973, "step": 7400 }, { "epoch": 0.7, "learning_rate": 9.004537719795802e-05, "loss": 1.9251, "step": 7410 }, { "epoch": 0.7, "learning_rate": 8.976176971072036e-05, "loss": 2.0345, "step": 7420 }, { "epoch": 0.7, "learning_rate": 8.94781622234827e-05, "loss": 2.0359, "step": 7430 }, { "epoch": 0.7, "learning_rate": 8.919455473624503e-05, "loss": 1.9897, "step": 7440 }, { "epoch": 0.7, "learning_rate": 8.891094724900736e-05, "loss": 1.9629, "step": 7450 }, { "epoch": 0.71, "learning_rate": 8.862733976176971e-05, "loss": 1.9894, "step": 7460 }, { "epoch": 0.71, "learning_rate": 8.834373227453203e-05, "loss": 2.059, "step": 7470 }, { "epoch": 0.71, "learning_rate": 8.806012478729438e-05, "loss": 2.0083, "step": 7480 }, { "epoch": 0.71, "learning_rate": 8.777651730005672e-05, "loss": 1.9406, "step": 7490 }, { "epoch": 0.71, "learning_rate": 8.749290981281905e-05, "loss": 2.0806, "step": 7500 }, { "epoch": 0.71, "learning_rate": 8.72093023255814e-05, "loss": 1.9512, "step": 7510 }, { "epoch": 0.71, "learning_rate": 8.692569483834372e-05, "loss": 1.8797, "step": 7520 }, { "epoch": 0.71, "learning_rate": 8.664208735110606e-05, "loss": 1.9013, "step": 7530 }, { "epoch": 0.71, "learning_rate": 8.63584798638684e-05, "loss": 2.1102, "step": 7540 }, { "epoch": 0.71, "learning_rate": 8.607487237663074e-05, "loss": 1.963, "step": 7550 }, { "epoch": 0.71, "learning_rate": 8.579126488939309e-05, "loss": 1.9602, "step": 7560 }, { "epoch": 0.72, "learning_rate": 8.550765740215541e-05, "loss": 1.8007, "step": 7570 }, { "epoch": 0.72, "learning_rate": 8.522404991491774e-05, "loss": 1.8766, "step": 7580 }, { "epoch": 0.72, "learning_rate": 8.494044242768009e-05, "loss": 1.9586, "step": 7590 }, { "epoch": 0.72, "learning_rate": 8.465683494044242e-05, "loss": 1.9787, "step": 7600 }, { "epoch": 0.72, "learning_rate": 8.437322745320475e-05, "loss": 1.8037, "step": 7610 }, { "epoch": 0.72, "learning_rate": 8.40896199659671e-05, "loss": 1.9972, "step": 7620 }, { "epoch": 0.72, "learning_rate": 8.380601247872942e-05, "loss": 1.8995, "step": 7630 }, { "epoch": 0.72, "learning_rate": 8.352240499149178e-05, "loss": 1.8151, "step": 7640 }, { "epoch": 0.72, "learning_rate": 8.32387975042541e-05, "loss": 1.9566, "step": 7650 }, { "epoch": 0.72, "learning_rate": 8.295519001701644e-05, "loss": 1.887, "step": 7660 }, { "epoch": 0.73, "learning_rate": 8.267158252977879e-05, "loss": 1.8363, "step": 7670 }, { "epoch": 0.73, "learning_rate": 8.238797504254111e-05, "loss": 1.9418, "step": 7680 }, { "epoch": 0.73, "learning_rate": 8.210436755530345e-05, "loss": 1.9908, "step": 7690 }, { "epoch": 0.73, "learning_rate": 8.182076006806579e-05, "loss": 1.9422, "step": 7700 }, { "epoch": 0.73, "learning_rate": 8.153715258082813e-05, "loss": 1.8687, "step": 7710 }, { "epoch": 0.73, "learning_rate": 8.125354509359047e-05, "loss": 2.0009, "step": 7720 }, { "epoch": 0.73, "learning_rate": 8.09699376063528e-05, "loss": 1.9095, "step": 7730 }, { "epoch": 0.73, "learning_rate": 8.068633011911513e-05, "loss": 1.997, "step": 7740 }, { "epoch": 0.73, "learning_rate": 8.040272263187748e-05, "loss": 1.8182, "step": 7750 }, { "epoch": 0.73, "learning_rate": 8.011911514463982e-05, "loss": 1.9101, "step": 7760 }, { "epoch": 0.73, "learning_rate": 7.983550765740214e-05, "loss": 2.0283, "step": 7770 }, { "epoch": 0.74, "learning_rate": 7.95519001701645e-05, "loss": 1.9184, "step": 7780 }, { "epoch": 0.74, "learning_rate": 7.926829268292682e-05, "loss": 1.9559, "step": 7790 }, { "epoch": 0.74, "learning_rate": 7.898468519568916e-05, "loss": 1.9898, "step": 7800 }, { "epoch": 0.74, "learning_rate": 7.87010777084515e-05, "loss": 1.9356, "step": 7810 }, { "epoch": 0.74, "learning_rate": 7.841747022121383e-05, "loss": 1.9283, "step": 7820 }, { "epoch": 0.74, "learning_rate": 7.813386273397618e-05, "loss": 1.9031, "step": 7830 }, { "epoch": 0.74, "learning_rate": 7.785025524673851e-05, "loss": 1.9627, "step": 7840 }, { "epoch": 0.74, "learning_rate": 7.756664775950083e-05, "loss": 1.8959, "step": 7850 }, { "epoch": 0.74, "learning_rate": 7.728304027226318e-05, "loss": 2.0207, "step": 7860 }, { "epoch": 0.74, "learning_rate": 7.699943278502552e-05, "loss": 1.9682, "step": 7870 }, { "epoch": 0.74, "learning_rate": 7.671582529778785e-05, "loss": 2.0098, "step": 7880 }, { "epoch": 0.75, "learning_rate": 7.64322178105502e-05, "loss": 2.0244, "step": 7890 }, { "epoch": 0.75, "learning_rate": 7.614861032331252e-05, "loss": 1.959, "step": 7900 }, { "epoch": 0.75, "learning_rate": 7.586500283607487e-05, "loss": 1.9689, "step": 7910 }, { "epoch": 0.75, "learning_rate": 7.55813953488372e-05, "loss": 1.8624, "step": 7920 }, { "epoch": 0.75, "learning_rate": 7.529778786159954e-05, "loss": 1.8887, "step": 7930 }, { "epoch": 0.75, "learning_rate": 7.501418037436189e-05, "loss": 1.9566, "step": 7940 }, { "epoch": 0.75, "learning_rate": 7.473057288712421e-05, "loss": 1.9733, "step": 7950 }, { "epoch": 0.75, "learning_rate": 7.444696539988655e-05, "loss": 2.0292, "step": 7960 }, { "epoch": 0.75, "learning_rate": 7.416335791264889e-05, "loss": 1.8787, "step": 7970 }, { "epoch": 0.75, "learning_rate": 7.387975042541123e-05, "loss": 1.9056, "step": 7980 }, { "epoch": 0.76, "learning_rate": 7.359614293817356e-05, "loss": 2.0239, "step": 7990 }, { "epoch": 0.76, "learning_rate": 7.33125354509359e-05, "loss": 1.8771, "step": 8000 }, { "epoch": 0.76, "learning_rate": 7.302892796369824e-05, "loss": 1.9212, "step": 8010 }, { "epoch": 0.76, "learning_rate": 7.274532047646056e-05, "loss": 1.9028, "step": 8020 }, { "epoch": 0.76, "learning_rate": 7.246171298922292e-05, "loss": 1.9409, "step": 8030 }, { "epoch": 0.76, "learning_rate": 7.217810550198525e-05, "loss": 1.9232, "step": 8040 }, { "epoch": 0.76, "learning_rate": 7.189449801474759e-05, "loss": 1.9214, "step": 8050 }, { "epoch": 0.76, "learning_rate": 7.161089052750992e-05, "loss": 1.9733, "step": 8060 }, { "epoch": 0.76, "learning_rate": 7.132728304027225e-05, "loss": 1.8845, "step": 8070 }, { "epoch": 0.76, "learning_rate": 7.104367555303459e-05, "loss": 1.9782, "step": 8080 }, { "epoch": 0.76, "learning_rate": 7.076006806579693e-05, "loss": 1.8242, "step": 8090 }, { "epoch": 0.77, "learning_rate": 7.047646057855927e-05, "loss": 1.9213, "step": 8100 }, { "epoch": 0.77, "learning_rate": 7.01928530913216e-05, "loss": 1.9098, "step": 8110 }, { "epoch": 0.77, "learning_rate": 6.990924560408394e-05, "loss": 1.9736, "step": 8120 }, { "epoch": 0.77, "learning_rate": 6.962563811684628e-05, "loss": 1.883, "step": 8130 }, { "epoch": 0.77, "learning_rate": 6.934203062960862e-05, "loss": 1.827, "step": 8140 }, { "epoch": 0.77, "learning_rate": 6.905842314237096e-05, "loss": 1.8746, "step": 8150 }, { "epoch": 0.77, "learning_rate": 6.87748156551333e-05, "loss": 1.8928, "step": 8160 }, { "epoch": 0.77, "learning_rate": 6.849120816789562e-05, "loss": 1.9172, "step": 8170 }, { "epoch": 0.77, "learning_rate": 6.820760068065796e-05, "loss": 2.0127, "step": 8180 }, { "epoch": 0.77, "learning_rate": 6.79239931934203e-05, "loss": 1.9741, "step": 8190 }, { "epoch": 0.78, "learning_rate": 6.764038570618265e-05, "loss": 2.0312, "step": 8200 }, { "epoch": 0.78, "learning_rate": 6.735677821894497e-05, "loss": 1.953, "step": 8210 }, { "epoch": 0.78, "learning_rate": 6.707317073170731e-05, "loss": 2.0159, "step": 8220 }, { "epoch": 0.78, "learning_rate": 6.678956324446965e-05, "loss": 1.9546, "step": 8230 }, { "epoch": 0.78, "learning_rate": 6.650595575723198e-05, "loss": 1.8581, "step": 8240 }, { "epoch": 0.78, "learning_rate": 6.622234826999432e-05, "loss": 1.878, "step": 8250 }, { "epoch": 0.78, "learning_rate": 6.593874078275666e-05, "loss": 1.9166, "step": 8260 }, { "epoch": 0.78, "learning_rate": 6.5655133295519e-05, "loss": 1.9166, "step": 8270 }, { "epoch": 0.78, "learning_rate": 6.537152580828134e-05, "loss": 1.9435, "step": 8280 }, { "epoch": 0.78, "learning_rate": 6.508791832104366e-05, "loss": 1.8599, "step": 8290 }, { "epoch": 0.78, "learning_rate": 6.480431083380601e-05, "loss": 1.8612, "step": 8300 }, { "epoch": 0.79, "learning_rate": 6.452070334656835e-05, "loss": 2.0328, "step": 8310 }, { "epoch": 0.79, "learning_rate": 6.423709585933069e-05, "loss": 2.0004, "step": 8320 }, { "epoch": 0.79, "learning_rate": 6.395348837209301e-05, "loss": 2.007, "step": 8330 }, { "epoch": 0.79, "learning_rate": 6.366988088485535e-05, "loss": 1.9076, "step": 8340 }, { "epoch": 0.79, "learning_rate": 6.338627339761769e-05, "loss": 1.893, "step": 8350 }, { "epoch": 0.79, "learning_rate": 6.310266591038003e-05, "loss": 2.0122, "step": 8360 }, { "epoch": 0.79, "learning_rate": 6.281905842314236e-05, "loss": 1.9318, "step": 8370 }, { "epoch": 0.79, "learning_rate": 6.25354509359047e-05, "loss": 1.9334, "step": 8380 }, { "epoch": 0.79, "learning_rate": 6.225184344866704e-05, "loss": 1.8622, "step": 8390 }, { "epoch": 0.79, "learning_rate": 6.196823596142938e-05, "loss": 1.9893, "step": 8400 }, { "epoch": 0.8, "learning_rate": 6.168462847419172e-05, "loss": 1.8939, "step": 8410 }, { "epoch": 0.8, "learning_rate": 6.140102098695405e-05, "loss": 1.9368, "step": 8420 }, { "epoch": 0.8, "learning_rate": 6.111741349971639e-05, "loss": 1.8463, "step": 8430 }, { "epoch": 0.8, "learning_rate": 6.083380601247873e-05, "loss": 1.9361, "step": 8440 }, { "epoch": 0.8, "learning_rate": 6.055019852524106e-05, "loss": 1.9151, "step": 8450 }, { "epoch": 0.8, "learning_rate": 6.02665910380034e-05, "loss": 1.9659, "step": 8460 }, { "epoch": 0.8, "learning_rate": 5.998298355076574e-05, "loss": 1.9531, "step": 8470 }, { "epoch": 0.8, "learning_rate": 5.9699376063528075e-05, "loss": 1.9611, "step": 8480 }, { "epoch": 0.8, "learning_rate": 5.9415768576290406e-05, "loss": 1.9284, "step": 8490 }, { "epoch": 0.8, "learning_rate": 5.9132161089052744e-05, "loss": 2.0525, "step": 8500 }, { "epoch": 0.8, "learning_rate": 5.884855360181508e-05, "loss": 1.9746, "step": 8510 }, { "epoch": 0.81, "learning_rate": 5.856494611457741e-05, "loss": 1.7876, "step": 8520 }, { "epoch": 0.81, "learning_rate": 5.828133862733976e-05, "loss": 1.941, "step": 8530 }, { "epoch": 0.81, "learning_rate": 5.7997731140102096e-05, "loss": 1.925, "step": 8540 }, { "epoch": 0.81, "learning_rate": 5.7714123652864434e-05, "loss": 1.7862, "step": 8550 }, { "epoch": 0.81, "learning_rate": 5.7430516165626765e-05, "loss": 1.8708, "step": 8560 }, { "epoch": 0.81, "learning_rate": 5.71469086783891e-05, "loss": 1.9129, "step": 8570 }, { "epoch": 0.81, "learning_rate": 5.686330119115144e-05, "loss": 2.0183, "step": 8580 }, { "epoch": 0.81, "learning_rate": 5.657969370391378e-05, "loss": 1.965, "step": 8590 }, { "epoch": 0.81, "learning_rate": 5.629608621667611e-05, "loss": 2.0176, "step": 8600 }, { "epoch": 0.81, "learning_rate": 5.601247872943845e-05, "loss": 1.8755, "step": 8610 }, { "epoch": 0.81, "learning_rate": 5.572887124220079e-05, "loss": 1.975, "step": 8620 }, { "epoch": 0.82, "learning_rate": 5.544526375496313e-05, "loss": 1.9415, "step": 8630 }, { "epoch": 0.82, "learning_rate": 5.516165626772546e-05, "loss": 1.992, "step": 8640 }, { "epoch": 0.82, "learning_rate": 5.48780487804878e-05, "loss": 2.0002, "step": 8650 }, { "epoch": 0.82, "learning_rate": 5.459444129325014e-05, "loss": 1.8894, "step": 8660 }, { "epoch": 0.82, "learning_rate": 5.4310833806012475e-05, "loss": 1.8315, "step": 8670 }, { "epoch": 0.82, "learning_rate": 5.4027226318774806e-05, "loss": 1.9616, "step": 8680 }, { "epoch": 0.82, "learning_rate": 5.3743618831537144e-05, "loss": 1.9351, "step": 8690 }, { "epoch": 0.82, "learning_rate": 5.346001134429949e-05, "loss": 1.8639, "step": 8700 }, { "epoch": 0.82, "learning_rate": 5.317640385706183e-05, "loss": 1.9152, "step": 8710 }, { "epoch": 0.82, "learning_rate": 5.289279636982416e-05, "loss": 1.8928, "step": 8720 }, { "epoch": 0.83, "learning_rate": 5.2609188882586496e-05, "loss": 1.9043, "step": 8730 }, { "epoch": 0.83, "learning_rate": 5.2325581395348834e-05, "loss": 1.8675, "step": 8740 }, { "epoch": 0.83, "learning_rate": 5.204197390811117e-05, "loss": 1.9687, "step": 8750 }, { "epoch": 0.83, "learning_rate": 5.17583664208735e-05, "loss": 1.8306, "step": 8760 }, { "epoch": 0.83, "learning_rate": 5.147475893363584e-05, "loss": 1.9105, "step": 8770 }, { "epoch": 0.83, "learning_rate": 5.119115144639818e-05, "loss": 2.0177, "step": 8780 }, { "epoch": 0.83, "learning_rate": 5.0907543959160524e-05, "loss": 1.871, "step": 8790 }, { "epoch": 0.83, "learning_rate": 5.0623936471922855e-05, "loss": 1.8813, "step": 8800 }, { "epoch": 0.83, "learning_rate": 5.034032898468519e-05, "loss": 1.9172, "step": 8810 }, { "epoch": 0.83, "learning_rate": 5.005672149744753e-05, "loss": 1.8479, "step": 8820 }, { "epoch": 0.83, "learning_rate": 4.977311401020987e-05, "loss": 2.0173, "step": 8830 }, { "epoch": 0.84, "learning_rate": 4.94895065229722e-05, "loss": 1.8729, "step": 8840 }, { "epoch": 0.84, "learning_rate": 4.920589903573454e-05, "loss": 1.9665, "step": 8850 }, { "epoch": 0.84, "learning_rate": 4.8922291548496876e-05, "loss": 1.9225, "step": 8860 }, { "epoch": 0.84, "learning_rate": 4.863868406125922e-05, "loss": 1.956, "step": 8870 }, { "epoch": 0.84, "learning_rate": 4.8355076574021545e-05, "loss": 1.9591, "step": 8880 }, { "epoch": 0.84, "learning_rate": 4.807146908678389e-05, "loss": 1.9469, "step": 8890 }, { "epoch": 0.84, "learning_rate": 4.778786159954623e-05, "loss": 1.8405, "step": 8900 }, { "epoch": 0.84, "learning_rate": 4.750425411230856e-05, "loss": 1.8911, "step": 8910 }, { "epoch": 0.84, "learning_rate": 4.7220646625070897e-05, "loss": 1.9739, "step": 8920 }, { "epoch": 0.84, "learning_rate": 4.6937039137833234e-05, "loss": 1.934, "step": 8930 }, { "epoch": 0.85, "learning_rate": 4.665343165059557e-05, "loss": 1.9334, "step": 8940 }, { "epoch": 0.85, "learning_rate": 4.6369824163357904e-05, "loss": 1.9642, "step": 8950 }, { "epoch": 0.85, "learning_rate": 4.608621667612024e-05, "loss": 2.009, "step": 8960 }, { "epoch": 0.85, "learning_rate": 4.5802609188882586e-05, "loss": 1.914, "step": 8970 }, { "epoch": 0.85, "learning_rate": 4.5519001701644924e-05, "loss": 2.0272, "step": 8980 }, { "epoch": 0.85, "learning_rate": 4.5235394214407255e-05, "loss": 1.9943, "step": 8990 }, { "epoch": 0.85, "learning_rate": 4.495178672716959e-05, "loss": 1.8665, "step": 9000 }, { "epoch": 0.85, "learning_rate": 4.466817923993193e-05, "loss": 1.989, "step": 9010 }, { "epoch": 0.85, "learning_rate": 4.438457175269427e-05, "loss": 1.776, "step": 9020 }, { "epoch": 0.85, "learning_rate": 4.41009642654566e-05, "loss": 1.7324, "step": 9030 }, { "epoch": 0.85, "learning_rate": 4.381735677821894e-05, "loss": 1.8907, "step": 9040 }, { "epoch": 0.86, "learning_rate": 4.3533749290981276e-05, "loss": 1.8821, "step": 9050 }, { "epoch": 0.86, "learning_rate": 4.325014180374362e-05, "loss": 1.9867, "step": 9060 }, { "epoch": 0.86, "learning_rate": 4.296653431650595e-05, "loss": 1.869, "step": 9070 }, { "epoch": 0.86, "learning_rate": 4.268292682926829e-05, "loss": 1.9415, "step": 9080 }, { "epoch": 0.86, "learning_rate": 4.239931934203063e-05, "loss": 1.906, "step": 9090 }, { "epoch": 0.86, "learning_rate": 4.2115711854792966e-05, "loss": 2.0106, "step": 9100 }, { "epoch": 0.86, "learning_rate": 4.18321043675553e-05, "loss": 1.9134, "step": 9110 }, { "epoch": 0.86, "learning_rate": 4.1548496880317635e-05, "loss": 1.849, "step": 9120 }, { "epoch": 0.86, "learning_rate": 4.126488939307997e-05, "loss": 1.839, "step": 9130 }, { "epoch": 0.86, "learning_rate": 4.098128190584232e-05, "loss": 1.8784, "step": 9140 }, { "epoch": 0.87, "learning_rate": 4.069767441860464e-05, "loss": 2.0755, "step": 9150 }, { "epoch": 0.87, "learning_rate": 4.041406693136699e-05, "loss": 1.909, "step": 9160 }, { "epoch": 0.87, "learning_rate": 4.0130459444129325e-05, "loss": 1.966, "step": 9170 }, { "epoch": 0.87, "learning_rate": 3.984685195689166e-05, "loss": 1.8651, "step": 9180 }, { "epoch": 0.87, "learning_rate": 3.9563244469653994e-05, "loss": 2.1033, "step": 9190 }, { "epoch": 0.87, "learning_rate": 3.927963698241633e-05, "loss": 1.9832, "step": 9200 }, { "epoch": 0.87, "learning_rate": 3.899602949517867e-05, "loss": 1.8741, "step": 9210 }, { "epoch": 0.87, "learning_rate": 3.871242200794101e-05, "loss": 1.8932, "step": 9220 }, { "epoch": 0.87, "learning_rate": 3.842881452070334e-05, "loss": 1.845, "step": 9230 }, { "epoch": 0.87, "learning_rate": 3.814520703346568e-05, "loss": 1.8516, "step": 9240 }, { "epoch": 0.87, "learning_rate": 3.786159954622802e-05, "loss": 1.9229, "step": 9250 }, { "epoch": 0.88, "learning_rate": 3.757799205899035e-05, "loss": 1.9784, "step": 9260 }, { "epoch": 0.88, "learning_rate": 3.729438457175269e-05, "loss": 1.8802, "step": 9270 }, { "epoch": 0.88, "learning_rate": 3.701077708451503e-05, "loss": 1.9196, "step": 9280 }, { "epoch": 0.88, "learning_rate": 3.6727169597277366e-05, "loss": 1.9718, "step": 9290 }, { "epoch": 0.88, "learning_rate": 3.6443562110039704e-05, "loss": 1.9188, "step": 9300 }, { "epoch": 0.88, "learning_rate": 3.6159954622802035e-05, "loss": 2.0001, "step": 9310 }, { "epoch": 0.88, "learning_rate": 3.587634713556437e-05, "loss": 1.8871, "step": 9320 }, { "epoch": 0.88, "learning_rate": 3.559273964832671e-05, "loss": 1.909, "step": 9330 }, { "epoch": 0.88, "learning_rate": 3.530913216108905e-05, "loss": 1.9234, "step": 9340 }, { "epoch": 0.88, "learning_rate": 3.502552467385139e-05, "loss": 1.9902, "step": 9350 }, { "epoch": 0.88, "learning_rate": 3.4741917186613725e-05, "loss": 1.8774, "step": 9360 }, { "epoch": 0.89, "learning_rate": 3.4458309699376056e-05, "loss": 2.0578, "step": 9370 }, { "epoch": 0.89, "learning_rate": 3.41747022121384e-05, "loss": 1.9046, "step": 9380 }, { "epoch": 0.89, "learning_rate": 3.389109472490073e-05, "loss": 1.8949, "step": 9390 }, { "epoch": 0.89, "learning_rate": 3.360748723766307e-05, "loss": 1.8696, "step": 9400 }, { "epoch": 0.89, "learning_rate": 3.332387975042541e-05, "loss": 2.0034, "step": 9410 }, { "epoch": 0.89, "learning_rate": 3.3040272263187746e-05, "loss": 1.9803, "step": 9420 }, { "epoch": 0.89, "learning_rate": 3.2756664775950084e-05, "loss": 1.8447, "step": 9430 }, { "epoch": 0.89, "learning_rate": 3.247305728871242e-05, "loss": 1.871, "step": 9440 }, { "epoch": 0.89, "learning_rate": 3.218944980147475e-05, "loss": 1.8718, "step": 9450 }, { "epoch": 0.89, "learning_rate": 3.19058423142371e-05, "loss": 1.7916, "step": 9460 }, { "epoch": 0.9, "learning_rate": 3.162223482699943e-05, "loss": 1.9947, "step": 9470 }, { "epoch": 0.9, "learning_rate": 3.133862733976177e-05, "loss": 1.981, "step": 9480 }, { "epoch": 0.9, "learning_rate": 3.1055019852524105e-05, "loss": 1.9623, "step": 9490 }, { "epoch": 0.9, "learning_rate": 3.077141236528644e-05, "loss": 1.8504, "step": 9500 }, { "epoch": 0.9, "learning_rate": 3.0487804878048777e-05, "loss": 1.909, "step": 9510 }, { "epoch": 0.9, "learning_rate": 3.0204197390811115e-05, "loss": 2.0195, "step": 9520 }, { "epoch": 0.9, "learning_rate": 2.992058990357345e-05, "loss": 1.9324, "step": 9530 }, { "epoch": 0.9, "learning_rate": 2.963698241633579e-05, "loss": 1.9728, "step": 9540 }, { "epoch": 0.9, "learning_rate": 2.9353374929098125e-05, "loss": 1.9669, "step": 9550 }, { "epoch": 0.9, "learning_rate": 2.9069767441860463e-05, "loss": 1.9203, "step": 9560 }, { "epoch": 0.9, "learning_rate": 2.881452070334657e-05, "loss": 1.9111, "step": 9570 }, { "epoch": 0.91, "learning_rate": 2.8530913216108903e-05, "loss": 1.9052, "step": 9580 }, { "epoch": 0.91, "learning_rate": 2.824730572887124e-05, "loss": 1.9317, "step": 9590 }, { "epoch": 0.91, "learning_rate": 2.7963698241633576e-05, "loss": 1.9512, "step": 9600 }, { "epoch": 0.91, "learning_rate": 2.768009075439591e-05, "loss": 1.9274, "step": 9610 }, { "epoch": 0.91, "learning_rate": 2.7396483267158252e-05, "loss": 1.9878, "step": 9620 }, { "epoch": 0.91, "learning_rate": 2.7112875779920586e-05, "loss": 1.8672, "step": 9630 }, { "epoch": 0.91, "learning_rate": 2.6829268292682924e-05, "loss": 1.8437, "step": 9640 }, { "epoch": 0.91, "learning_rate": 2.654566080544526e-05, "loss": 1.9504, "step": 9650 }, { "epoch": 0.91, "learning_rate": 2.62620533182076e-05, "loss": 1.9867, "step": 9660 }, { "epoch": 0.91, "learning_rate": 2.5978445830969935e-05, "loss": 1.9193, "step": 9670 }, { "epoch": 0.92, "learning_rate": 2.5694838343732273e-05, "loss": 1.8887, "step": 9680 }, { "epoch": 0.92, "learning_rate": 2.5411230856494607e-05, "loss": 1.8255, "step": 9690 }, { "epoch": 0.92, "learning_rate": 2.5127623369256945e-05, "loss": 1.8051, "step": 9700 }, { "epoch": 0.92, "learning_rate": 2.4844015882019283e-05, "loss": 1.9183, "step": 9710 }, { "epoch": 0.92, "learning_rate": 2.456040839478162e-05, "loss": 1.9585, "step": 9720 }, { "epoch": 0.92, "learning_rate": 2.4276800907543955e-05, "loss": 1.8288, "step": 9730 }, { "epoch": 0.92, "learning_rate": 2.3993193420306293e-05, "loss": 1.8407, "step": 9740 }, { "epoch": 0.92, "learning_rate": 2.370958593306863e-05, "loss": 1.9804, "step": 9750 }, { "epoch": 0.92, "learning_rate": 2.342597844583097e-05, "loss": 1.9133, "step": 9760 }, { "epoch": 0.92, "learning_rate": 2.3142370958593304e-05, "loss": 2.0379, "step": 9770 }, { "epoch": 0.92, "learning_rate": 2.2858763471355642e-05, "loss": 1.9466, "step": 9780 }, { "epoch": 0.93, "learning_rate": 2.2575155984117976e-05, "loss": 2.023, "step": 9790 }, { "epoch": 0.93, "learning_rate": 2.2291548496880318e-05, "loss": 1.8804, "step": 9800 }, { "epoch": 0.93, "learning_rate": 2.2007941009642652e-05, "loss": 1.8823, "step": 9810 }, { "epoch": 0.93, "learning_rate": 2.172433352240499e-05, "loss": 1.8368, "step": 9820 }, { "epoch": 0.93, "learning_rate": 2.1440726035167325e-05, "loss": 1.8944, "step": 9830 }, { "epoch": 0.93, "learning_rate": 2.1157118547929666e-05, "loss": 1.9715, "step": 9840 }, { "epoch": 0.93, "learning_rate": 2.0873511060692e-05, "loss": 1.8169, "step": 9850 }, { "epoch": 0.93, "learning_rate": 2.058990357345434e-05, "loss": 1.926, "step": 9860 }, { "epoch": 0.93, "learning_rate": 2.0306296086216673e-05, "loss": 1.8995, "step": 9870 }, { "epoch": 0.93, "learning_rate": 2.0022688598979014e-05, "loss": 1.964, "step": 9880 }, { "epoch": 0.93, "learning_rate": 1.973908111174135e-05, "loss": 2.0038, "step": 9890 }, { "epoch": 0.94, "learning_rate": 1.9455473624503687e-05, "loss": 1.8676, "step": 9900 }, { "epoch": 0.94, "learning_rate": 1.917186613726602e-05, "loss": 1.8659, "step": 9910 }, { "epoch": 0.94, "learning_rate": 1.888825865002836e-05, "loss": 1.8943, "step": 9920 }, { "epoch": 0.94, "learning_rate": 1.8604651162790697e-05, "loss": 1.9027, "step": 9930 }, { "epoch": 0.94, "learning_rate": 1.8321043675553032e-05, "loss": 1.9053, "step": 9940 }, { "epoch": 0.94, "learning_rate": 1.803743618831537e-05, "loss": 2.1544, "step": 9950 }, { "epoch": 0.94, "learning_rate": 1.7753828701077708e-05, "loss": 1.9002, "step": 9960 }, { "epoch": 0.94, "learning_rate": 1.7470221213840042e-05, "loss": 1.8899, "step": 9970 }, { "epoch": 0.94, "learning_rate": 1.718661372660238e-05, "loss": 1.7576, "step": 9980 }, { "epoch": 0.94, "learning_rate": 1.6903006239364718e-05, "loss": 2.0429, "step": 9990 }, { "epoch": 0.95, "learning_rate": 1.6619398752127056e-05, "loss": 1.9995, "step": 10000 }, { "epoch": 0.95, "learning_rate": 1.633579126488939e-05, "loss": 1.7999, "step": 10010 }, { "epoch": 0.95, "learning_rate": 1.605218377765173e-05, "loss": 2.0613, "step": 10020 }, { "epoch": 0.95, "learning_rate": 1.5768576290414066e-05, "loss": 1.9488, "step": 10030 }, { "epoch": 0.95, "learning_rate": 1.5484968803176404e-05, "loss": 1.8953, "step": 10040 }, { "epoch": 0.95, "learning_rate": 1.520136131593874e-05, "loss": 1.9878, "step": 10050 }, { "epoch": 0.95, "learning_rate": 1.4917753828701077e-05, "loss": 1.9123, "step": 10060 }, { "epoch": 0.95, "learning_rate": 1.4634146341463413e-05, "loss": 1.9685, "step": 10070 }, { "epoch": 0.95, "learning_rate": 1.4350538854225751e-05, "loss": 1.9945, "step": 10080 }, { "epoch": 0.95, "learning_rate": 1.4066931366988087e-05, "loss": 2.0429, "step": 10090 }, { "epoch": 0.95, "learning_rate": 1.3783323879750425e-05, "loss": 1.9244, "step": 10100 }, { "epoch": 0.96, "learning_rate": 1.3499716392512761e-05, "loss": 1.8477, "step": 10110 }, { "epoch": 0.96, "learning_rate": 1.32161089052751e-05, "loss": 2.0131, "step": 10120 }, { "epoch": 0.96, "learning_rate": 1.2932501418037436e-05, "loss": 1.9577, "step": 10130 }, { "epoch": 0.96, "learning_rate": 1.2648893930799774e-05, "loss": 1.893, "step": 10140 }, { "epoch": 0.96, "learning_rate": 1.236528644356211e-05, "loss": 1.9509, "step": 10150 }, { "epoch": 0.96, "learning_rate": 1.2081678956324448e-05, "loss": 1.8446, "step": 10160 }, { "epoch": 0.96, "learning_rate": 1.1798071469086782e-05, "loss": 1.9316, "step": 10170 }, { "epoch": 0.96, "learning_rate": 1.1514463981849118e-05, "loss": 1.9555, "step": 10180 }, { "epoch": 0.96, "learning_rate": 1.1230856494611456e-05, "loss": 1.944, "step": 10190 }, { "epoch": 0.96, "learning_rate": 1.0947249007373793e-05, "loss": 2.0048, "step": 10200 }, { "epoch": 0.97, "learning_rate": 1.066364152013613e-05, "loss": 1.9539, "step": 10210 }, { "epoch": 0.97, "learning_rate": 1.0380034032898467e-05, "loss": 1.8775, "step": 10220 }, { "epoch": 0.97, "learning_rate": 1.0096426545660803e-05, "loss": 1.8658, "step": 10230 }, { "epoch": 0.97, "learning_rate": 9.812819058423141e-06, "loss": 1.8844, "step": 10240 }, { "epoch": 0.97, "learning_rate": 9.529211571185477e-06, "loss": 1.8888, "step": 10250 }, { "epoch": 0.97, "learning_rate": 9.245604083947815e-06, "loss": 1.93, "step": 10260 }, { "epoch": 0.97, "learning_rate": 8.961996596710151e-06, "loss": 1.9094, "step": 10270 }, { "epoch": 0.97, "learning_rate": 8.67838910947249e-06, "loss": 1.9298, "step": 10280 }, { "epoch": 0.97, "learning_rate": 8.394781622234826e-06, "loss": 1.9718, "step": 10290 }, { "epoch": 0.97, "learning_rate": 8.111174134997164e-06, "loss": 1.9241, "step": 10300 }, { "epoch": 0.97, "learning_rate": 7.8275666477595e-06, "loss": 1.9156, "step": 10310 }, { "epoch": 0.98, "learning_rate": 7.543959160521837e-06, "loss": 1.9464, "step": 10320 }, { "epoch": 0.98, "learning_rate": 7.260351673284174e-06, "loss": 1.8982, "step": 10330 }, { "epoch": 0.98, "learning_rate": 6.976744186046511e-06, "loss": 2.0217, "step": 10340 }, { "epoch": 0.98, "learning_rate": 6.693136698808848e-06, "loss": 1.9342, "step": 10350 }, { "epoch": 0.98, "learning_rate": 6.409529211571185e-06, "loss": 1.8258, "step": 10360 }, { "epoch": 0.98, "learning_rate": 6.125921724333522e-06, "loss": 1.9249, "step": 10370 }, { "epoch": 0.98, "learning_rate": 5.8423142370958585e-06, "loss": 1.8313, "step": 10380 }, { "epoch": 0.98, "learning_rate": 5.558706749858196e-06, "loss": 1.917, "step": 10390 }, { "epoch": 0.98, "learning_rate": 5.275099262620533e-06, "loss": 1.8811, "step": 10400 }, { "epoch": 0.98, "learning_rate": 4.99149177538287e-06, "loss": 1.8812, "step": 10410 }, { "epoch": 0.99, "learning_rate": 4.707884288145207e-06, "loss": 1.9099, "step": 10420 }, { "epoch": 0.99, "learning_rate": 4.424276800907543e-06, "loss": 1.8592, "step": 10430 }, { "epoch": 0.99, "learning_rate": 4.14066931366988e-06, "loss": 1.738, "step": 10440 }, { "epoch": 0.99, "learning_rate": 3.857061826432217e-06, "loss": 1.8415, "step": 10450 }, { "epoch": 0.99, "learning_rate": 3.5734543391945544e-06, "loss": 1.9281, "step": 10460 }, { "epoch": 0.99, "learning_rate": 3.2898468519568915e-06, "loss": 1.9303, "step": 10470 }, { "epoch": 0.99, "learning_rate": 3.0062393647192286e-06, "loss": 1.9314, "step": 10480 }, { "epoch": 0.99, "learning_rate": 2.7226318774815657e-06, "loss": 1.8479, "step": 10490 }, { "epoch": 0.99, "learning_rate": 2.4390243902439023e-06, "loss": 1.8967, "step": 10500 } ], "logging_steps": 10, "max_steps": 10578, "num_train_epochs": 1, "save_steps": 500, "total_flos": 4.3967525098082304e+17, "trial_name": null, "trial_params": null }