{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2163565556036348, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9999999999999997e-05, "loss": 2.9132, "step": 10 }, { "epoch": 0.0, "learning_rate": 5.9999999999999995e-05, "loss": 2.5796, "step": 20 }, { "epoch": 0.0, "learning_rate": 8.999999999999999e-05, "loss": 2.3848, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.00011999999999999999, "loss": 2.0514, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.000147, "loss": 1.9692, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.00017699999999999997, "loss": 1.7979, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.000204, "loss": 1.7851, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.000234, "loss": 1.7217, "step": 80 }, { "epoch": 0.0, "learning_rate": 0.00025499999999999996, "loss": 1.3989, "step": 90 }, { "epoch": 0.0, "learning_rate": 0.000285, "loss": 1.654, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.00029996088657105604, "loss": 1.4094, "step": 110 }, { "epoch": 0.01, "learning_rate": 0.0002998565840938722, "loss": 1.8543, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.0002997522816166884, "loss": 1.7782, "step": 130 }, { "epoch": 0.01, "learning_rate": 0.0002996219035202086, "loss": 1.7068, "step": 140 }, { "epoch": 0.01, "learning_rate": 0.00029951760104302477, "loss": 1.4387, "step": 150 }, { "epoch": 0.01, "learning_rate": 0.00029940026075619296, "loss": 1.7555, "step": 160 }, { "epoch": 0.01, "learning_rate": 0.00029928292046936116, "loss": 1.5488, "step": 170 }, { "epoch": 0.01, "learning_rate": 0.0002991655801825293, "loss": 1.5043, "step": 180 }, { "epoch": 0.01, "learning_rate": 0.00029906127770534547, "loss": 1.5463, "step": 190 }, { "epoch": 0.01, "learning_rate": 0.0002989830508474576, "loss": 1.1729, "step": 200 }, { "epoch": 0.01, "learning_rate": 0.00029887874837027377, "loss": 1.5994, "step": 210 }, { "epoch": 0.01, "learning_rate": 0.000298748370273794, "loss": 1.6526, "step": 220 }, { "epoch": 0.01, "learning_rate": 0.0002986179921773142, "loss": 5.4035, "step": 230 }, { "epoch": 0.01, "learning_rate": 0.0002984876140808344, "loss": 7.2882, "step": 240 }, { "epoch": 0.01, "learning_rate": 0.0002983572359843546, "loss": 7.2899, "step": 250 }, { "epoch": 0.01, "learning_rate": 0.0002982268578878748, "loss": 7.5663, "step": 260 }, { "epoch": 0.01, "learning_rate": 0.000298096479791395, "loss": 7.7473, "step": 270 }, { "epoch": 0.01, "learning_rate": 0.0002979661016949152, "loss": 7.2212, "step": 280 }, { "epoch": 0.01, "learning_rate": 0.00029783572359843544, "loss": 7.3973, "step": 290 }, { "epoch": 0.01, "learning_rate": 0.00029770534550195566, "loss": 7.8749, "step": 300 }, { "epoch": 0.01, "learning_rate": 0.00029757496740547587, "loss": 7.6257, "step": 310 }, { "epoch": 0.01, "learning_rate": 0.00029744458930899603, "loss": 7.7121, "step": 320 }, { "epoch": 0.01, "learning_rate": 0.00029731421121251625, "loss": 7.6982, "step": 330 }, { "epoch": 0.01, "learning_rate": 0.0002971838331160365, "loss": 7.9487, "step": 340 }, { "epoch": 0.02, "learning_rate": 0.0002970534550195567, "loss": 7.5477, "step": 350 }, { "epoch": 0.02, "learning_rate": 0.0002969230769230769, "loss": 7.5343, "step": 360 }, { "epoch": 0.02, "learning_rate": 0.0002967926988265971, "loss": 7.5523, "step": 370 }, { "epoch": 0.02, "learning_rate": 0.0002966623207301173, "loss": 7.4696, "step": 380 }, { "epoch": 0.02, "learning_rate": 0.0002965319426336375, "loss": 7.6125, "step": 390 }, { "epoch": 0.02, "learning_rate": 0.00029640156453715775, "loss": 7.4849, "step": 400 }, { "epoch": 0.02, "learning_rate": 0.00029627118644067797, "loss": 7.9908, "step": 410 }, { "epoch": 0.02, "learning_rate": 0.00029614080834419813, "loss": 7.7173, "step": 420 }, { "epoch": 0.02, "learning_rate": 0.00029601043024771835, "loss": 7.7963, "step": 430 }, { "epoch": 0.02, "learning_rate": 0.00029588005215123856, "loss": 7.9082, "step": 440 }, { "epoch": 0.02, "learning_rate": 0.0002957496740547588, "loss": 7.673, "step": 450 }, { "epoch": 0.02, "learning_rate": 0.000295619295958279, "loss": 7.6576, "step": 460 }, { "epoch": 0.02, "learning_rate": 0.0002954889178617992, "loss": 7.7349, "step": 470 }, { "epoch": 0.02, "learning_rate": 0.0002953585397653194, "loss": 8.1483, "step": 480 }, { "epoch": 0.02, "learning_rate": 0.0002952281616688396, "loss": 7.8854, "step": 490 }, { "epoch": 0.02, "learning_rate": 0.0002950977835723598, "loss": 7.8698, "step": 500 }, { "epoch": 0.02, "learning_rate": 0.00029496740547588, "loss": 7.9662, "step": 510 }, { "epoch": 0.02, "learning_rate": 0.00029483702737940023, "loss": 7.7332, "step": 520 }, { "epoch": 0.02, "learning_rate": 0.00029470664928292045, "loss": 7.9352, "step": 530 }, { "epoch": 0.02, "learning_rate": 0.00029457627118644066, "loss": 7.8444, "step": 540 }, { "epoch": 0.02, "learning_rate": 0.0002944458930899609, "loss": 7.9594, "step": 550 }, { "epoch": 0.02, "learning_rate": 0.00029431551499348104, "loss": 7.6888, "step": 560 }, { "epoch": 0.02, "learning_rate": 0.0002941851368970013, "loss": 7.9143, "step": 570 }, { "epoch": 0.03, "learning_rate": 0.00029405475880052147, "loss": 7.7642, "step": 580 }, { "epoch": 0.03, "learning_rate": 0.0002939243807040417, "loss": 7.6396, "step": 590 }, { "epoch": 0.03, "learning_rate": 0.0002937940026075619, "loss": 7.6659, "step": 600 }, { "epoch": 0.03, "learning_rate": 0.0002936636245110821, "loss": 7.9758, "step": 610 }, { "epoch": 0.03, "learning_rate": 0.00029353324641460233, "loss": 7.7407, "step": 620 }, { "epoch": 0.03, "learning_rate": 0.00029340286831812254, "loss": 7.9381, "step": 630 }, { "epoch": 0.03, "learning_rate": 0.00029327249022164276, "loss": 8.0261, "step": 640 }, { "epoch": 0.03, "learning_rate": 0.0002931421121251629, "loss": 7.7693, "step": 650 }, { "epoch": 0.03, "learning_rate": 0.00029301173402868314, "loss": 7.9575, "step": 660 }, { "epoch": 0.03, "learning_rate": 0.00029288135593220335, "loss": 7.6038, "step": 670 }, { "epoch": 0.03, "learning_rate": 0.00029275097783572357, "loss": 7.7868, "step": 680 }, { "epoch": 0.03, "learning_rate": 0.0002926205997392438, "loss": 7.9182, "step": 690 }, { "epoch": 0.03, "learning_rate": 0.000292490221642764, "loss": 7.8777, "step": 700 }, { "epoch": 0.03, "learning_rate": 0.0002923598435462842, "loss": 7.915, "step": 710 }, { "epoch": 0.03, "learning_rate": 0.00029222946544980443, "loss": 7.8993, "step": 720 }, { "epoch": 0.03, "learning_rate": 0.0002920990873533246, "loss": 7.9536, "step": 730 }, { "epoch": 0.03, "learning_rate": 0.00029196870925684486, "loss": 7.8386, "step": 740 }, { "epoch": 0.03, "learning_rate": 0.000291838331160365, "loss": 7.9146, "step": 750 }, { "epoch": 0.03, "learning_rate": 0.00029170795306388524, "loss": 7.938, "step": 760 }, { "epoch": 0.03, "learning_rate": 0.00029157757496740545, "loss": 7.9984, "step": 770 }, { "epoch": 0.03, "learning_rate": 0.00029144719687092567, "loss": 7.6733, "step": 780 }, { "epoch": 0.03, "learning_rate": 0.0002913168187744459, "loss": 7.8783, "step": 790 }, { "epoch": 0.03, "learning_rate": 0.0002911864406779661, "loss": 8.1666, "step": 800 }, { "epoch": 0.04, "learning_rate": 0.0002910560625814863, "loss": 7.9801, "step": 810 }, { "epoch": 0.04, "learning_rate": 0.00029092568448500647, "loss": 8.0533, "step": 820 }, { "epoch": 0.04, "learning_rate": 0.0002907953063885267, "loss": 8.0206, "step": 830 }, { "epoch": 0.04, "learning_rate": 0.0002906649282920469, "loss": 7.7818, "step": 840 }, { "epoch": 0.04, "learning_rate": 0.0002905345501955671, "loss": 8.021, "step": 850 }, { "epoch": 0.04, "learning_rate": 0.00029040417209908733, "loss": 7.9063, "step": 860 }, { "epoch": 0.04, "learning_rate": 0.00029027379400260755, "loss": 7.8719, "step": 870 }, { "epoch": 0.04, "learning_rate": 0.00029014341590612776, "loss": 8.1405, "step": 880 }, { "epoch": 0.04, "learning_rate": 0.0002900130378096479, "loss": 7.8046, "step": 890 }, { "epoch": 0.04, "learning_rate": 0.00028988265971316814, "loss": 7.9293, "step": 900 }, { "epoch": 0.04, "learning_rate": 0.0002897522816166884, "loss": 8.0217, "step": 910 }, { "epoch": 0.04, "learning_rate": 0.00028962190352020857, "loss": 7.8202, "step": 920 }, { "epoch": 0.04, "learning_rate": 0.0002894915254237288, "loss": 7.8705, "step": 930 }, { "epoch": 0.04, "learning_rate": 0.000289361147327249, "loss": 7.9237, "step": 940 }, { "epoch": 0.04, "learning_rate": 0.0002892307692307692, "loss": 8.1584, "step": 950 }, { "epoch": 0.04, "learning_rate": 0.0002891003911342894, "loss": 8.0448, "step": 960 }, { "epoch": 0.04, "learning_rate": 0.00028897001303780965, "loss": 8.0041, "step": 970 }, { "epoch": 0.04, "learning_rate": 0.00028883963494132986, "loss": 7.9727, "step": 980 }, { "epoch": 0.04, "learning_rate": 0.00028870925684485, "loss": 7.8942, "step": 990 }, { "epoch": 0.04, "learning_rate": 0.00028857887874837024, "loss": 7.8319, "step": 1000 }, { "epoch": 0.04, "learning_rate": 0.00028844850065189046, "loss": 7.898, "step": 1010 }, { "epoch": 0.04, "learning_rate": 0.00028831812255541067, "loss": 7.899, "step": 1020 }, { "epoch": 0.04, "learning_rate": 0.00028818774445893083, "loss": 7.7232, "step": 1030 }, { "epoch": 0.05, "learning_rate": 0.0002880573663624511, "loss": 8.2552, "step": 1040 }, { "epoch": 0.05, "learning_rate": 0.0002879269882659713, "loss": 8.0233, "step": 1050 }, { "epoch": 0.05, "learning_rate": 0.0002877966101694915, "loss": 8.1904, "step": 1060 }, { "epoch": 0.05, "learning_rate": 0.0002876662320730117, "loss": 7.9744, "step": 1070 }, { "epoch": 0.05, "learning_rate": 0.0002875358539765319, "loss": 7.9458, "step": 1080 }, { "epoch": 0.05, "learning_rate": 0.0002874054758800521, "loss": 8.1225, "step": 1090 }, { "epoch": 0.05, "learning_rate": 0.00028727509778357234, "loss": 8.2103, "step": 1100 }, { "epoch": 0.05, "learning_rate": 0.00028714471968709255, "loss": 8.1877, "step": 1110 }, { "epoch": 0.05, "learning_rate": 0.00028701434159061277, "loss": 8.1925, "step": 1120 }, { "epoch": 0.05, "learning_rate": 0.00028688396349413293, "loss": 8.1942, "step": 1130 }, { "epoch": 0.05, "learning_rate": 0.0002867535853976532, "loss": 8.268, "step": 1140 }, { "epoch": 0.05, "learning_rate": 0.00028662320730117336, "loss": 8.0643, "step": 1150 }, { "epoch": 0.05, "learning_rate": 0.0002864928292046936, "loss": 8.1105, "step": 1160 }, { "epoch": 0.05, "learning_rate": 0.0002863624511082138, "loss": 8.3186, "step": 1170 }, { "epoch": 0.05, "learning_rate": 0.000286232073011734, "loss": 8.2267, "step": 1180 }, { "epoch": 0.05, "learning_rate": 0.0002861016949152542, "loss": 8.4379, "step": 1190 }, { "epoch": 0.05, "learning_rate": 0.0002859713168187744, "loss": 8.2195, "step": 1200 }, { "epoch": 0.05, "learning_rate": 0.00028584093872229465, "loss": 8.1754, "step": 1210 }, { "epoch": 0.05, "learning_rate": 0.0002857105606258148, "loss": 8.2161, "step": 1220 }, { "epoch": 0.05, "learning_rate": 0.00028558018252933503, "loss": 8.3426, "step": 1230 }, { "epoch": 0.05, "learning_rate": 0.00028544980443285525, "loss": 8.3226, "step": 1240 }, { "epoch": 0.05, "learning_rate": 0.00028531942633637546, "loss": 8.1448, "step": 1250 }, { "epoch": 0.05, "learning_rate": 0.0002851890482398957, "loss": 8.3116, "step": 1260 }, { "epoch": 0.05, "learning_rate": 0.0002850586701434159, "loss": 8.2342, "step": 1270 }, { "epoch": 0.06, "learning_rate": 0.0002849282920469361, "loss": 8.6083, "step": 1280 }, { "epoch": 0.06, "learning_rate": 0.0002847979139504563, "loss": 8.4491, "step": 1290 }, { "epoch": 0.06, "learning_rate": 0.0002846675358539765, "loss": 8.2441, "step": 1300 }, { "epoch": 0.06, "learning_rate": 0.0002845371577574967, "loss": 8.4184, "step": 1310 }, { "epoch": 0.06, "learning_rate": 0.0002844067796610169, "loss": 8.2803, "step": 1320 }, { "epoch": 0.06, "learning_rate": 0.00028427640156453713, "loss": 8.1456, "step": 1330 }, { "epoch": 0.06, "learning_rate": 0.00028414602346805734, "loss": 8.3464, "step": 1340 }, { "epoch": 0.06, "learning_rate": 0.00028401564537157756, "loss": 8.2956, "step": 1350 }, { "epoch": 0.06, "learning_rate": 0.0002838852672750978, "loss": 8.5188, "step": 1360 }, { "epoch": 0.06, "learning_rate": 0.00028375488917861794, "loss": 8.2586, "step": 1370 }, { "epoch": 0.06, "learning_rate": 0.0002836245110821382, "loss": 8.2999, "step": 1380 }, { "epoch": 0.06, "learning_rate": 0.00028349413298565837, "loss": 8.4213, "step": 1390 }, { "epoch": 0.06, "learning_rate": 0.0002833637548891786, "loss": 8.2921, "step": 1400 }, { "epoch": 0.06, "learning_rate": 0.0002832333767926988, "loss": 8.4024, "step": 1410 }, { "epoch": 0.06, "learning_rate": 0.000283102998696219, "loss": 8.4043, "step": 1420 }, { "epoch": 0.06, "learning_rate": 0.00028297262059973923, "loss": 8.3759, "step": 1430 }, { "epoch": 0.06, "learning_rate": 0.00028284224250325944, "loss": 8.3432, "step": 1440 }, { "epoch": 0.06, "learning_rate": 0.00028271186440677966, "loss": 8.3263, "step": 1450 }, { "epoch": 0.06, "learning_rate": 0.0002825814863102998, "loss": 8.348, "step": 1460 }, { "epoch": 0.06, "learning_rate": 0.00028245110821382004, "loss": 8.2902, "step": 1470 }, { "epoch": 0.06, "learning_rate": 0.00028232073011734025, "loss": 8.5699, "step": 1480 }, { "epoch": 0.06, "learning_rate": 0.00028219035202086047, "loss": 8.4766, "step": 1490 }, { "epoch": 0.06, "learning_rate": 0.0002820599739243807, "loss": 8.5017, "step": 1500 }, { "epoch": 0.07, "learning_rate": 0.0002819295958279009, "loss": 8.6927, "step": 1510 }, { "epoch": 0.07, "learning_rate": 0.0002817992177314211, "loss": 8.5978, "step": 1520 }, { "epoch": 0.07, "learning_rate": 0.0002816688396349413, "loss": 8.4803, "step": 1530 }, { "epoch": 0.07, "learning_rate": 0.0002815384615384615, "loss": 8.5869, "step": 1540 }, { "epoch": 0.07, "learning_rate": 0.00028140808344198176, "loss": 8.4203, "step": 1550 }, { "epoch": 0.07, "learning_rate": 0.0002812777053455019, "loss": 8.7451, "step": 1560 }, { "epoch": 0.07, "learning_rate": 0.00028114732724902213, "loss": 8.6679, "step": 1570 }, { "epoch": 0.07, "learning_rate": 0.00028101694915254235, "loss": 8.6733, "step": 1580 }, { "epoch": 0.07, "learning_rate": 0.00028088657105606257, "loss": 8.5751, "step": 1590 }, { "epoch": 0.07, "learning_rate": 0.0002807561929595828, "loss": 8.7441, "step": 1600 }, { "epoch": 0.07, "learning_rate": 0.000280625814863103, "loss": 8.6364, "step": 1610 }, { "epoch": 0.07, "learning_rate": 0.0002804954367666232, "loss": 8.3853, "step": 1620 }, { "epoch": 0.07, "learning_rate": 0.00028036505867014337, "loss": 8.6059, "step": 1630 }, { "epoch": 0.07, "learning_rate": 0.0002802346805736636, "loss": 8.6964, "step": 1640 }, { "epoch": 0.07, "learning_rate": 0.0002801043024771838, "loss": 8.4077, "step": 1650 }, { "epoch": 0.07, "learning_rate": 0.000279973924380704, "loss": 8.5408, "step": 1660 }, { "epoch": 0.07, "learning_rate": 0.00027984354628422423, "loss": 8.6068, "step": 1670 }, { "epoch": 0.07, "learning_rate": 0.00027971316818774445, "loss": 8.5852, "step": 1680 }, { "epoch": 0.07, "learning_rate": 0.00027958279009126466, "loss": 8.6365, "step": 1690 }, { "epoch": 0.07, "learning_rate": 0.0002794524119947848, "loss": 8.3437, "step": 1700 }, { "epoch": 0.07, "learning_rate": 0.00027932203389830504, "loss": 8.5662, "step": 1710 }, { "epoch": 0.07, "learning_rate": 0.00027919165580182526, "loss": 8.6039, "step": 1720 }, { "epoch": 0.07, "learning_rate": 0.00027906127770534547, "loss": 8.666, "step": 1730 }, { "epoch": 0.08, "learning_rate": 0.0002789308996088657, "loss": 8.5818, "step": 1740 }, { "epoch": 0.08, "learning_rate": 0.0002788005215123859, "loss": 8.4733, "step": 1750 }, { "epoch": 0.08, "learning_rate": 0.0002786701434159061, "loss": 8.6056, "step": 1760 }, { "epoch": 0.08, "learning_rate": 0.0002785397653194263, "loss": 8.7309, "step": 1770 }, { "epoch": 0.08, "learning_rate": 0.00027840938722294655, "loss": 8.4525, "step": 1780 }, { "epoch": 0.08, "learning_rate": 0.00027827900912646676, "loss": 8.5793, "step": 1790 }, { "epoch": 0.08, "learning_rate": 0.0002781486310299869, "loss": 8.4926, "step": 1800 }, { "epoch": 0.08, "learning_rate": 0.00027801825293350714, "loss": 8.5963, "step": 1810 }, { "epoch": 0.08, "learning_rate": 0.00027788787483702736, "loss": 8.5549, "step": 1820 }, { "epoch": 0.08, "learning_rate": 0.00027775749674054757, "loss": 8.7375, "step": 1830 }, { "epoch": 0.08, "learning_rate": 0.00027762711864406773, "loss": 8.3517, "step": 1840 }, { "epoch": 0.08, "learning_rate": 0.000277496740547588, "loss": 8.6929, "step": 1850 }, { "epoch": 0.08, "learning_rate": 0.0002773663624511082, "loss": 8.4538, "step": 1860 }, { "epoch": 0.08, "learning_rate": 0.0002772359843546284, "loss": 8.7503, "step": 1870 }, { "epoch": 0.08, "learning_rate": 0.0002771056062581486, "loss": 8.6003, "step": 1880 }, { "epoch": 0.08, "learning_rate": 0.0002769752281616688, "loss": 8.6623, "step": 1890 }, { "epoch": 0.08, "learning_rate": 0.000276844850065189, "loss": 8.6156, "step": 1900 }, { "epoch": 0.08, "learning_rate": 0.00027671447196870924, "loss": 8.6077, "step": 1910 }, { "epoch": 0.08, "learning_rate": 0.00027658409387222945, "loss": 8.6117, "step": 1920 }, { "epoch": 0.08, "learning_rate": 0.00027645371577574967, "loss": 8.6552, "step": 1930 }, { "epoch": 0.08, "learning_rate": 0.00027632333767926983, "loss": 8.6804, "step": 1940 }, { "epoch": 0.08, "learning_rate": 0.0002761929595827901, "loss": 8.8094, "step": 1950 }, { "epoch": 0.08, "learning_rate": 0.00027606258148631026, "loss": 8.6628, "step": 1960 }, { "epoch": 0.09, "learning_rate": 0.0002759322033898305, "loss": 8.5159, "step": 1970 }, { "epoch": 0.09, "learning_rate": 0.0002758018252933507, "loss": 8.396, "step": 1980 }, { "epoch": 0.09, "learning_rate": 0.0002756714471968709, "loss": 8.8074, "step": 1990 }, { "epoch": 0.09, "learning_rate": 0.0002755410691003911, "loss": 8.773, "step": 2000 }, { "epoch": 0.09, "learning_rate": 0.0002754106910039113, "loss": 8.7639, "step": 2010 }, { "epoch": 0.09, "learning_rate": 0.00027528031290743155, "loss": 8.4934, "step": 2020 }, { "epoch": 0.09, "learning_rate": 0.0002751499348109517, "loss": 8.5592, "step": 2030 }, { "epoch": 0.09, "learning_rate": 0.00027501955671447193, "loss": 8.5546, "step": 2040 }, { "epoch": 0.09, "learning_rate": 0.00027488917861799215, "loss": 8.5575, "step": 2050 }, { "epoch": 0.09, "learning_rate": 0.00027475880052151236, "loss": 8.6952, "step": 2060 }, { "epoch": 0.09, "learning_rate": 0.0002746284224250326, "loss": 8.6958, "step": 2070 }, { "epoch": 0.09, "learning_rate": 0.0002744980443285528, "loss": 8.7037, "step": 2080 }, { "epoch": 0.09, "learning_rate": 0.000274367666232073, "loss": 8.9301, "step": 2090 }, { "epoch": 0.09, "learning_rate": 0.00027423728813559317, "loss": 8.6533, "step": 2100 }, { "epoch": 0.09, "learning_rate": 0.0002741069100391134, "loss": 8.7503, "step": 2110 }, { "epoch": 0.09, "learning_rate": 0.00027397653194263365, "loss": 8.725, "step": 2120 }, { "epoch": 0.09, "learning_rate": 0.0002738461538461538, "loss": 8.5035, "step": 2130 }, { "epoch": 0.09, "learning_rate": 0.00027371577574967403, "loss": 8.802, "step": 2140 }, { "epoch": 0.09, "learning_rate": 0.00027358539765319424, "loss": 8.9058, "step": 2150 }, { "epoch": 0.09, "learning_rate": 0.00027345501955671446, "loss": 8.7056, "step": 2160 }, { "epoch": 0.09, "learning_rate": 0.0002733246414602347, "loss": 8.7894, "step": 2170 }, { "epoch": 0.09, "learning_rate": 0.00027319426336375484, "loss": 8.669, "step": 2180 }, { "epoch": 0.09, "learning_rate": 0.0002730638852672751, "loss": 8.708, "step": 2190 }, { "epoch": 0.1, "learning_rate": 0.00027293350717079527, "loss": 8.8639, "step": 2200 }, { "epoch": 0.1, "learning_rate": 0.0002728031290743155, "loss": 8.8779, "step": 2210 }, { "epoch": 0.1, "learning_rate": 0.0002726727509778357, "loss": 8.8422, "step": 2220 }, { "epoch": 0.1, "learning_rate": 0.0002725423728813559, "loss": 8.7745, "step": 2230 }, { "epoch": 0.1, "learning_rate": 0.00027241199478487613, "loss": 9.0292, "step": 2240 }, { "epoch": 0.1, "learning_rate": 0.00027228161668839634, "loss": 9.051, "step": 2250 }, { "epoch": 0.1, "learning_rate": 0.00027215123859191656, "loss": 9.0168, "step": 2260 }, { "epoch": 0.1, "learning_rate": 0.0002720208604954367, "loss": 9.1592, "step": 2270 }, { "epoch": 0.1, "learning_rate": 0.00027189048239895694, "loss": 9.1905, "step": 2280 }, { "epoch": 0.1, "learning_rate": 0.00027176010430247715, "loss": 9.216, "step": 2290 }, { "epoch": 0.1, "learning_rate": 0.00027162972620599737, "loss": 9.2634, "step": 2300 }, { "epoch": 0.1, "learning_rate": 0.0002714993481095176, "loss": 9.249, "step": 2310 }, { "epoch": 0.1, "learning_rate": 0.0002713689700130378, "loss": 9.3536, "step": 2320 }, { "epoch": 0.1, "learning_rate": 0.000271238591916558, "loss": 9.4016, "step": 2330 }, { "epoch": 0.1, "learning_rate": 0.0002711082138200782, "loss": 9.2968, "step": 2340 }, { "epoch": 0.1, "learning_rate": 0.0002709778357235984, "loss": 9.4278, "step": 2350 }, { "epoch": 0.1, "learning_rate": 0.00027084745762711866, "loss": 9.4604, "step": 2360 }, { "epoch": 0.1, "learning_rate": 0.0002707170795306388, "loss": 9.5698, "step": 2370 }, { "epoch": 0.1, "learning_rate": 0.00027058670143415903, "loss": 9.4086, "step": 2380 }, { "epoch": 0.1, "learning_rate": 0.00027045632333767925, "loss": 9.5957, "step": 2390 }, { "epoch": 0.1, "learning_rate": 0.00027032594524119947, "loss": 9.4389, "step": 2400 }, { "epoch": 0.1, "learning_rate": 0.0002701955671447196, "loss": 9.6004, "step": 2410 }, { "epoch": 0.1, "learning_rate": 0.0002700651890482399, "loss": 9.6392, "step": 2420 }, { "epoch": 0.11, "learning_rate": 0.0002699348109517601, "loss": 9.5309, "step": 2430 }, { "epoch": 0.11, "learning_rate": 0.00026980443285528027, "loss": 9.6664, "step": 2440 }, { "epoch": 0.11, "learning_rate": 0.0002696740547588005, "loss": 9.4468, "step": 2450 }, { "epoch": 0.11, "learning_rate": 0.0002695436766623207, "loss": 9.6085, "step": 2460 }, { "epoch": 0.11, "learning_rate": 0.0002694132985658409, "loss": 9.5151, "step": 2470 }, { "epoch": 0.11, "learning_rate": 0.00026928292046936113, "loss": 9.6408, "step": 2480 }, { "epoch": 0.11, "learning_rate": 0.00026915254237288135, "loss": 9.6165, "step": 2490 }, { "epoch": 0.11, "learning_rate": 0.00026902216427640156, "loss": 9.7855, "step": 2500 }, { "epoch": 0.11, "learning_rate": 0.0002688917861799217, "loss": 9.6457, "step": 2510 }, { "epoch": 0.11, "learning_rate": 0.00026876140808344194, "loss": 9.7363, "step": 2520 }, { "epoch": 0.11, "learning_rate": 0.00026863102998696216, "loss": 9.8018, "step": 2530 }, { "epoch": 0.11, "learning_rate": 0.00026850065189048237, "loss": 9.7463, "step": 2540 }, { "epoch": 0.11, "learning_rate": 0.0002683702737940026, "loss": 9.8131, "step": 2550 }, { "epoch": 0.11, "learning_rate": 0.0002682398956975228, "loss": 9.7435, "step": 2560 }, { "epoch": 0.11, "learning_rate": 0.000268109517601043, "loss": 9.9082, "step": 2570 }, { "epoch": 0.11, "learning_rate": 0.0002679791395045632, "loss": 9.9361, "step": 2580 }, { "epoch": 0.11, "learning_rate": 0.00026784876140808345, "loss": 9.7746, "step": 2590 }, { "epoch": 0.11, "learning_rate": 0.0002677183833116036, "loss": 9.8179, "step": 2600 }, { "epoch": 0.11, "learning_rate": 0.0002675880052151238, "loss": 9.7547, "step": 2610 }, { "epoch": 0.11, "learning_rate": 0.00026745762711864404, "loss": 9.884, "step": 2620 }, { "epoch": 0.11, "learning_rate": 0.00026732724902216426, "loss": 9.9553, "step": 2630 }, { "epoch": 0.11, "learning_rate": 0.00026719687092568447, "loss": 9.8555, "step": 2640 }, { "epoch": 0.11, "learning_rate": 0.0002670664928292047, "loss": 9.7841, "step": 2650 }, { "epoch": 0.12, "learning_rate": 0.0002669361147327249, "loss": 9.6766, "step": 2660 }, { "epoch": 0.12, "learning_rate": 0.00026680573663624506, "loss": 9.8294, "step": 2670 }, { "epoch": 0.12, "learning_rate": 0.0002666753585397653, "loss": 9.8066, "step": 2680 }, { "epoch": 0.12, "learning_rate": 0.0002665449804432855, "loss": 9.8608, "step": 2690 }, { "epoch": 0.12, "learning_rate": 0.0002664146023468057, "loss": 9.972, "step": 2700 }, { "epoch": 0.12, "learning_rate": 0.0002662842242503259, "loss": 9.9361, "step": 2710 }, { "epoch": 0.12, "learning_rate": 0.00026615384615384614, "loss": 10.0587, "step": 2720 }, { "epoch": 0.12, "learning_rate": 0.00026602346805736635, "loss": 9.9388, "step": 2730 }, { "epoch": 0.12, "learning_rate": 0.00026589308996088657, "loss": 9.9727, "step": 2740 }, { "epoch": 0.12, "learning_rate": 0.00026576271186440673, "loss": 9.9163, "step": 2750 }, { "epoch": 0.12, "learning_rate": 0.000265632333767927, "loss": 9.8902, "step": 2760 }, { "epoch": 0.12, "learning_rate": 0.00026550195567144716, "loss": 9.885, "step": 2770 }, { "epoch": 0.12, "learning_rate": 0.0002653715775749674, "loss": 9.8856, "step": 2780 }, { "epoch": 0.12, "learning_rate": 0.0002652411994784876, "loss": 9.9419, "step": 2790 }, { "epoch": 0.12, "learning_rate": 0.0002651108213820078, "loss": 10.0771, "step": 2800 }, { "epoch": 0.12, "learning_rate": 0.000264980443285528, "loss": 9.9506, "step": 2810 }, { "epoch": 0.12, "learning_rate": 0.0002648500651890482, "loss": 10.0638, "step": 2820 }, { "epoch": 0.12, "learning_rate": 0.00026471968709256845, "loss": 10.0016, "step": 2830 }, { "epoch": 0.12, "learning_rate": 0.0002645893089960886, "loss": 10.0485, "step": 2840 }, { "epoch": 0.12, "learning_rate": 0.00026445893089960883, "loss": 9.936, "step": 2850 }, { "epoch": 0.12, "learning_rate": 0.00026432855280312905, "loss": 10.0238, "step": 2860 }, { "epoch": 0.12, "learning_rate": 0.00026419817470664926, "loss": 10.0553, "step": 2870 }, { "epoch": 0.12, "learning_rate": 0.0002640677966101695, "loss": 10.0423, "step": 2880 }, { "epoch": 0.13, "learning_rate": 0.0002639374185136897, "loss": 9.9627, "step": 2890 }, { "epoch": 0.13, "learning_rate": 0.0002638070404172099, "loss": 10.0819, "step": 2900 }, { "epoch": 0.13, "learning_rate": 0.00026367666232073007, "loss": 9.9082, "step": 2910 }, { "epoch": 0.13, "learning_rate": 0.0002635462842242503, "loss": 10.0401, "step": 2920 }, { "epoch": 0.13, "learning_rate": 0.00026341590612777055, "loss": 9.9946, "step": 2930 }, { "epoch": 0.13, "learning_rate": 0.0002632855280312907, "loss": 10.0913, "step": 2940 }, { "epoch": 0.13, "learning_rate": 0.00026315514993481093, "loss": 10.1353, "step": 2950 }, { "epoch": 0.13, "learning_rate": 0.00026302477183833114, "loss": 10.0295, "step": 2960 }, { "epoch": 0.13, "learning_rate": 0.00026289439374185136, "loss": 10.0846, "step": 2970 }, { "epoch": 0.13, "learning_rate": 0.0002627640156453715, "loss": 10.0067, "step": 2980 }, { "epoch": 0.13, "learning_rate": 0.00026263363754889174, "loss": 10.0299, "step": 2990 }, { "epoch": 0.13, "learning_rate": 0.000262503259452412, "loss": 10.0556, "step": 3000 }, { "epoch": 0.13, "learning_rate": 0.00026237288135593217, "loss": 10.0429, "step": 3010 }, { "epoch": 0.13, "learning_rate": 0.0002622425032594524, "loss": 10.02, "step": 3020 }, { "epoch": 0.13, "learning_rate": 0.0002621121251629726, "loss": 10.0642, "step": 3030 }, { "epoch": 0.13, "learning_rate": 0.0002619817470664928, "loss": 10.1297, "step": 3040 }, { "epoch": 0.13, "learning_rate": 0.00026185136897001303, "loss": 10.0012, "step": 3050 }, { "epoch": 0.13, "learning_rate": 0.00026172099087353324, "loss": 9.9938, "step": 3060 }, { "epoch": 0.13, "learning_rate": 0.00026159061277705346, "loss": 10.0385, "step": 3070 }, { "epoch": 0.13, "learning_rate": 0.0002614602346805736, "loss": 10.0903, "step": 3080 }, { "epoch": 0.13, "learning_rate": 0.00026132985658409384, "loss": 10.1171, "step": 3090 }, { "epoch": 0.13, "learning_rate": 0.00026119947848761405, "loss": 10.1059, "step": 3100 }, { "epoch": 0.13, "learning_rate": 0.00026106910039113427, "loss": 10.0862, "step": 3110 }, { "epoch": 0.14, "learning_rate": 0.0002609387222946545, "loss": 9.9758, "step": 3120 }, { "epoch": 0.14, "learning_rate": 0.0002608083441981747, "loss": 9.897, "step": 3130 }, { "epoch": 0.14, "learning_rate": 0.0002606779661016949, "loss": 10.1126, "step": 3140 }, { "epoch": 0.14, "learning_rate": 0.00026054758800521507, "loss": 9.9863, "step": 3150 }, { "epoch": 0.14, "learning_rate": 0.0002604172099087353, "loss": 10.0276, "step": 3160 }, { "epoch": 0.14, "learning_rate": 0.0002602868318122555, "loss": 10.0507, "step": 3170 }, { "epoch": 0.14, "learning_rate": 0.0002601564537157757, "loss": 10.0196, "step": 3180 }, { "epoch": 0.14, "learning_rate": 0.00026002607561929593, "loss": 10.0326, "step": 3190 }, { "epoch": 0.14, "learning_rate": 0.00025989569752281615, "loss": 10.0175, "step": 3200 }, { "epoch": 0.14, "learning_rate": 0.00025976531942633636, "loss": 10.0518, "step": 3210 }, { "epoch": 0.14, "learning_rate": 0.0002596349413298565, "loss": 10.006, "step": 3220 }, { "epoch": 0.14, "learning_rate": 0.0002595045632333768, "loss": 10.0784, "step": 3230 }, { "epoch": 0.14, "learning_rate": 0.000259374185136897, "loss": 10.0334, "step": 3240 }, { "epoch": 0.14, "learning_rate": 0.00025924380704041717, "loss": 9.8772, "step": 3250 }, { "epoch": 0.14, "learning_rate": 0.0002591134289439374, "loss": 10.0744, "step": 3260 }, { "epoch": 0.14, "learning_rate": 0.0002589830508474576, "loss": 9.9749, "step": 3270 }, { "epoch": 0.14, "learning_rate": 0.0002588526727509778, "loss": 10.082, "step": 3280 }, { "epoch": 0.14, "learning_rate": 0.00025872229465449803, "loss": 9.979, "step": 3290 }, { "epoch": 0.14, "learning_rate": 0.00025859191655801825, "loss": 10.0619, "step": 3300 }, { "epoch": 0.14, "learning_rate": 0.00025846153846153846, "loss": 10.1153, "step": 3310 }, { "epoch": 0.14, "learning_rate": 0.0002583311603650586, "loss": 10.1584, "step": 3320 }, { "epoch": 0.14, "learning_rate": 0.00025820078226857884, "loss": 10.0608, "step": 3330 }, { "epoch": 0.14, "learning_rate": 0.00025807040417209906, "loss": 10.1515, "step": 3340 }, { "epoch": 0.14, "learning_rate": 0.00025794002607561927, "loss": 10.1096, "step": 3350 }, { "epoch": 0.15, "learning_rate": 0.0002578096479791395, "loss": 10.1689, "step": 3360 }, { "epoch": 0.15, "learning_rate": 0.0002576792698826597, "loss": 10.2237, "step": 3370 }, { "epoch": 0.15, "learning_rate": 0.0002575488917861799, "loss": 10.1274, "step": 3380 }, { "epoch": 0.15, "learning_rate": 0.0002574185136897001, "loss": 10.0753, "step": 3390 }, { "epoch": 0.15, "learning_rate": 0.00025728813559322035, "loss": 10.0884, "step": 3400 }, { "epoch": 0.15, "learning_rate": 0.0002571577574967405, "loss": 10.0873, "step": 3410 }, { "epoch": 0.15, "learning_rate": 0.0002570273794002607, "loss": 10.222, "step": 3420 }, { "epoch": 0.15, "learning_rate": 0.00025689700130378094, "loss": 10.1173, "step": 3430 }, { "epoch": 0.15, "learning_rate": 0.00025676662320730115, "loss": 10.0776, "step": 3440 }, { "epoch": 0.15, "learning_rate": 0.00025663624511082137, "loss": 10.1509, "step": 3450 }, { "epoch": 0.15, "learning_rate": 0.0002565058670143416, "loss": 10.1337, "step": 3460 }, { "epoch": 0.15, "learning_rate": 0.0002563754889178618, "loss": 10.2193, "step": 3470 }, { "epoch": 0.15, "learning_rate": 0.00025624511082138196, "loss": 10.1478, "step": 3480 }, { "epoch": 0.15, "learning_rate": 0.0002561147327249022, "loss": 10.2988, "step": 3490 }, { "epoch": 0.15, "learning_rate": 0.0002559843546284224, "loss": 10.1737, "step": 3500 }, { "epoch": 0.15, "learning_rate": 0.0002558539765319426, "loss": 10.1102, "step": 3510 }, { "epoch": 0.15, "learning_rate": 0.0002557235984354628, "loss": 10.1505, "step": 3520 }, { "epoch": 0.15, "learning_rate": 0.00025559322033898304, "loss": 10.1551, "step": 3530 }, { "epoch": 0.15, "learning_rate": 0.00025546284224250325, "loss": 10.1628, "step": 3540 }, { "epoch": 0.15, "learning_rate": 0.0002553324641460234, "loss": 10.227, "step": 3550 }, { "epoch": 0.15, "learning_rate": 0.00025520208604954363, "loss": 10.1379, "step": 3560 }, { "epoch": 0.15, "learning_rate": 0.0002550717079530639, "loss": 10.1795, "step": 3570 }, { "epoch": 0.15, "learning_rate": 0.00025494132985658406, "loss": 10.1938, "step": 3580 }, { "epoch": 0.16, "learning_rate": 0.0002548109517601043, "loss": 10.2941, "step": 3590 }, { "epoch": 0.16, "learning_rate": 0.0002546805736636245, "loss": 10.2581, "step": 3600 }, { "epoch": 0.16, "learning_rate": 0.0002545501955671447, "loss": 10.1737, "step": 3610 }, { "epoch": 0.16, "learning_rate": 0.0002544198174706649, "loss": 10.0775, "step": 3620 }, { "epoch": 0.16, "learning_rate": 0.00025428943937418514, "loss": 10.1794, "step": 3630 }, { "epoch": 0.16, "learning_rate": 0.00025415906127770535, "loss": 10.1894, "step": 3640 }, { "epoch": 0.16, "learning_rate": 0.0002540286831812255, "loss": 10.247, "step": 3650 }, { "epoch": 0.16, "learning_rate": 0.00025389830508474573, "loss": 10.2817, "step": 3660 }, { "epoch": 0.16, "learning_rate": 0.00025376792698826594, "loss": 10.2589, "step": 3670 }, { "epoch": 0.16, "learning_rate": 0.00025363754889178616, "loss": 10.232, "step": 3680 }, { "epoch": 0.16, "learning_rate": 0.0002535071707953064, "loss": 10.312, "step": 3690 }, { "epoch": 0.16, "learning_rate": 0.0002533767926988266, "loss": 10.1569, "step": 3700 }, { "epoch": 0.16, "learning_rate": 0.0002532464146023468, "loss": 10.1389, "step": 3710 }, { "epoch": 0.16, "learning_rate": 0.00025311603650586697, "loss": 10.2286, "step": 3720 }, { "epoch": 0.16, "learning_rate": 0.0002529856584093872, "loss": 10.2886, "step": 3730 }, { "epoch": 0.16, "learning_rate": 0.0002528552803129074, "loss": 10.2117, "step": 3740 }, { "epoch": 0.16, "learning_rate": 0.0002527249022164276, "loss": 10.1231, "step": 3750 }, { "epoch": 0.16, "learning_rate": 0.00025259452411994783, "loss": 10.2635, "step": 3760 }, { "epoch": 0.16, "learning_rate": 0.00025246414602346804, "loss": 10.1321, "step": 3770 }, { "epoch": 0.16, "learning_rate": 0.00025233376792698826, "loss": 10.269, "step": 3780 }, { "epoch": 0.16, "learning_rate": 0.0002522033898305084, "loss": 10.2287, "step": 3790 }, { "epoch": 0.16, "learning_rate": 0.00025207301173402864, "loss": 10.243, "step": 3800 }, { "epoch": 0.16, "learning_rate": 0.0002519426336375489, "loss": 10.1994, "step": 3810 }, { "epoch": 0.17, "learning_rate": 0.00025181225554106907, "loss": 10.3732, "step": 3820 }, { "epoch": 0.17, "learning_rate": 0.0002516818774445893, "loss": 10.2368, "step": 3830 }, { "epoch": 0.17, "learning_rate": 0.0002515514993481095, "loss": 10.2235, "step": 3840 }, { "epoch": 0.17, "learning_rate": 0.0002514211212516297, "loss": 10.2138, "step": 3850 }, { "epoch": 0.17, "learning_rate": 0.0002512907431551499, "loss": 10.1026, "step": 3860 }, { "epoch": 0.17, "learning_rate": 0.00025116036505867014, "loss": 10.2148, "step": 3870 }, { "epoch": 0.17, "learning_rate": 0.00025102998696219036, "loss": 10.328, "step": 3880 }, { "epoch": 0.17, "learning_rate": 0.0002508996088657105, "loss": 10.2609, "step": 3890 }, { "epoch": 0.17, "learning_rate": 0.00025076923076923073, "loss": 10.2178, "step": 3900 }, { "epoch": 0.17, "learning_rate": 0.00025063885267275095, "loss": 10.1765, "step": 3910 }, { "epoch": 0.17, "learning_rate": 0.00025050847457627117, "loss": 10.1472, "step": 3920 }, { "epoch": 0.17, "learning_rate": 0.0002503780964797914, "loss": 10.2004, "step": 3930 }, { "epoch": 0.17, "learning_rate": 0.0002502477183833116, "loss": 10.334, "step": 3940 }, { "epoch": 0.17, "learning_rate": 0.0002501173402868318, "loss": 10.3113, "step": 3950 }, { "epoch": 0.17, "learning_rate": 0.00024998696219035197, "loss": 10.29, "step": 3960 }, { "epoch": 0.17, "learning_rate": 0.0002498565840938722, "loss": 10.2834, "step": 3970 }, { "epoch": 0.17, "learning_rate": 0.0002497262059973924, "loss": 10.1185, "step": 3980 }, { "epoch": 0.17, "learning_rate": 0.0002495958279009126, "loss": 10.2562, "step": 3990 }, { "epoch": 0.17, "learning_rate": 0.00024946544980443283, "loss": 10.1536, "step": 4000 }, { "epoch": 0.17, "learning_rate": 0.00024933507170795305, "loss": 10.215, "step": 4010 }, { "epoch": 0.17, "learning_rate": 0.00024920469361147326, "loss": 10.2364, "step": 4020 }, { "epoch": 0.17, "learning_rate": 0.0002490743155149934, "loss": 10.2292, "step": 4030 }, { "epoch": 0.17, "learning_rate": 0.0002489439374185137, "loss": 10.2103, "step": 4040 }, { "epoch": 0.18, "learning_rate": 0.00024881355932203386, "loss": 10.2176, "step": 4050 }, { "epoch": 0.18, "learning_rate": 0.00024868318122555407, "loss": 10.2494, "step": 4060 }, { "epoch": 0.18, "learning_rate": 0.0002485528031290743, "loss": 10.2538, "step": 4070 }, { "epoch": 0.18, "learning_rate": 0.0002484224250325945, "loss": 10.1897, "step": 4080 }, { "epoch": 0.18, "learning_rate": 0.0002482920469361147, "loss": 10.2138, "step": 4090 }, { "epoch": 0.18, "learning_rate": 0.00024816166883963493, "loss": 10.2346, "step": 4100 }, { "epoch": 0.18, "learning_rate": 0.00024803129074315515, "loss": 10.2503, "step": 4110 }, { "epoch": 0.18, "learning_rate": 0.0002479009126466753, "loss": 10.1818, "step": 4120 }, { "epoch": 0.18, "learning_rate": 0.0002477705345501955, "loss": 10.252, "step": 4130 }, { "epoch": 0.18, "learning_rate": 0.00024764015645371574, "loss": 10.1853, "step": 4140 }, { "epoch": 0.18, "learning_rate": 0.00024750977835723596, "loss": 10.3086, "step": 4150 }, { "epoch": 0.18, "learning_rate": 0.00024737940026075617, "loss": 10.1909, "step": 4160 }, { "epoch": 0.18, "learning_rate": 0.0002472490221642764, "loss": 10.114, "step": 4170 }, { "epoch": 0.18, "learning_rate": 0.0002471186440677966, "loss": 10.284, "step": 4180 }, { "epoch": 0.18, "learning_rate": 0.0002469882659713168, "loss": 10.224, "step": 4190 }, { "epoch": 0.18, "learning_rate": 0.000246857887874837, "loss": 10.2025, "step": 4200 }, { "epoch": 0.18, "learning_rate": 0.00024672750977835725, "loss": 10.1293, "step": 4210 }, { "epoch": 0.18, "learning_rate": 0.0002465971316818774, "loss": 10.3302, "step": 4220 }, { "epoch": 0.18, "learning_rate": 0.0002464667535853976, "loss": 10.4794, "step": 4230 }, { "epoch": 0.18, "learning_rate": 0.00024633637548891784, "loss": 10.4658, "step": 4240 }, { "epoch": 0.18, "learning_rate": 0.00024620599739243805, "loss": 10.515, "step": 4250 }, { "epoch": 0.18, "learning_rate": 0.00024607561929595827, "loss": 10.4624, "step": 4260 }, { "epoch": 0.18, "learning_rate": 0.0002459452411994785, "loss": 10.5324, "step": 4270 }, { "epoch": 0.19, "learning_rate": 0.0002458148631029987, "loss": 10.5538, "step": 4280 }, { "epoch": 0.19, "learning_rate": 0.00024568448500651886, "loss": 10.6497, "step": 4290 }, { "epoch": 0.19, "learning_rate": 0.0002455541069100391, "loss": 10.8531, "step": 4300 }, { "epoch": 0.19, "learning_rate": 0.0002454237288135593, "loss": 10.8161, "step": 4310 }, { "epoch": 0.19, "learning_rate": 0.0002452933507170795, "loss": 10.636, "step": 4320 }, { "epoch": 0.19, "learning_rate": 0.0002451629726205997, "loss": 10.6457, "step": 4330 }, { "epoch": 0.19, "learning_rate": 0.00024503259452411994, "loss": 10.6817, "step": 4340 }, { "epoch": 0.19, "learning_rate": 0.00024490221642764015, "loss": 10.5572, "step": 4350 }, { "epoch": 0.19, "learning_rate": 0.0002447718383311603, "loss": 10.5855, "step": 4360 }, { "epoch": 0.19, "learning_rate": 0.00024464146023468053, "loss": 10.5547, "step": 4370 }, { "epoch": 0.19, "learning_rate": 0.0002445110821382008, "loss": 10.5638, "step": 4380 }, { "epoch": 0.19, "learning_rate": 0.00024438070404172096, "loss": 10.5428, "step": 4390 }, { "epoch": 0.19, "learning_rate": 0.0002442503259452412, "loss": 10.5823, "step": 4400 }, { "epoch": 0.19, "learning_rate": 0.00024411994784876136, "loss": 10.5564, "step": 4410 }, { "epoch": 0.19, "learning_rate": 0.0002439895697522816, "loss": 10.5738, "step": 4420 }, { "epoch": 0.19, "learning_rate": 0.0002438591916558018, "loss": 10.729, "step": 4430 }, { "epoch": 0.19, "learning_rate": 0.000243728813559322, "loss": 10.4963, "step": 4440 }, { "epoch": 0.19, "learning_rate": 0.00024359843546284223, "loss": 10.6297, "step": 4450 }, { "epoch": 0.19, "learning_rate": 0.00024346805736636241, "loss": 10.5185, "step": 4460 }, { "epoch": 0.19, "learning_rate": 0.00024333767926988266, "loss": 10.5761, "step": 4470 }, { "epoch": 0.19, "learning_rate": 0.00024320730117340284, "loss": 10.6883, "step": 4480 }, { "epoch": 0.19, "learning_rate": 0.00024307692307692306, "loss": 10.6906, "step": 4490 }, { "epoch": 0.19, "learning_rate": 0.00024294654498044328, "loss": 10.5998, "step": 4500 }, { "epoch": 0.2, "learning_rate": 0.00024281616688396346, "loss": 10.6782, "step": 4510 }, { "epoch": 0.2, "learning_rate": 0.0002426857887874837, "loss": 10.6462, "step": 4520 }, { "epoch": 0.2, "learning_rate": 0.0002425554106910039, "loss": 10.7396, "step": 4530 }, { "epoch": 0.2, "learning_rate": 0.0002424250325945241, "loss": 10.6588, "step": 4540 }, { "epoch": 0.2, "learning_rate": 0.0002422946544980443, "loss": 10.7514, "step": 4550 }, { "epoch": 0.2, "learning_rate": 0.0002421642764015645, "loss": 10.7364, "step": 4560 }, { "epoch": 0.2, "learning_rate": 0.00024203389830508473, "loss": 10.8158, "step": 4570 }, { "epoch": 0.2, "learning_rate": 0.00024190352020860492, "loss": 10.7492, "step": 4580 }, { "epoch": 0.2, "learning_rate": 0.00024177314211212516, "loss": 10.6646, "step": 4590 }, { "epoch": 0.2, "learning_rate": 0.00024164276401564535, "loss": 10.7252, "step": 4600 }, { "epoch": 0.2, "learning_rate": 0.00024151238591916556, "loss": 10.7439, "step": 4610 }, { "epoch": 0.2, "learning_rate": 0.00024138200782268575, "loss": 10.6968, "step": 4620 }, { "epoch": 0.2, "learning_rate": 0.00024125162972620597, "loss": 10.775, "step": 4630 }, { "epoch": 0.2, "learning_rate": 0.0002411212516297262, "loss": 10.7529, "step": 4640 }, { "epoch": 0.2, "learning_rate": 0.0002409908735332464, "loss": 10.7829, "step": 4650 }, { "epoch": 0.2, "learning_rate": 0.0002408604954367666, "loss": 10.7423, "step": 4660 }, { "epoch": 0.2, "learning_rate": 0.0002407301173402868, "loss": 10.8141, "step": 4670 }, { "epoch": 0.2, "learning_rate": 0.00024059973924380702, "loss": 10.7217, "step": 4680 }, { "epoch": 0.2, "learning_rate": 0.00024046936114732723, "loss": 10.7555, "step": 4690 }, { "epoch": 0.2, "learning_rate": 0.00024033898305084745, "loss": 10.8166, "step": 4700 }, { "epoch": 0.2, "learning_rate": 0.00024020860495436766, "loss": 10.7251, "step": 4710 }, { "epoch": 0.2, "learning_rate": 0.00024007822685788785, "loss": 10.7733, "step": 4720 }, { "epoch": 0.2, "learning_rate": 0.00023994784876140807, "loss": 10.8039, "step": 4730 }, { "epoch": 0.21, "learning_rate": 0.00023981747066492825, "loss": 10.8321, "step": 4740 }, { "epoch": 0.21, "learning_rate": 0.00023968709256844847, "loss": 10.7657, "step": 4750 }, { "epoch": 0.21, "learning_rate": 0.0002395567144719687, "loss": 10.8044, "step": 4760 }, { "epoch": 0.21, "learning_rate": 0.0002394263363754889, "loss": 10.8451, "step": 4770 }, { "epoch": 0.21, "learning_rate": 0.00023929595827900911, "loss": 10.8282, "step": 4780 }, { "epoch": 0.21, "learning_rate": 0.0002391655801825293, "loss": 10.8957, "step": 4790 }, { "epoch": 0.21, "learning_rate": 0.00023903520208604952, "loss": 10.8406, "step": 4800 }, { "epoch": 0.21, "learning_rate": 0.0002389048239895697, "loss": 10.7582, "step": 4810 }, { "epoch": 0.21, "learning_rate": 0.00023877444589308995, "loss": 10.7831, "step": 4820 }, { "epoch": 0.21, "learning_rate": 0.00023864406779661016, "loss": 10.8734, "step": 4830 }, { "epoch": 0.21, "learning_rate": 0.00023851368970013035, "loss": 10.7978, "step": 4840 }, { "epoch": 0.21, "learning_rate": 0.00023838331160365057, "loss": 10.829, "step": 4850 }, { "epoch": 0.21, "learning_rate": 0.00023825293350717076, "loss": 10.8092, "step": 4860 }, { "epoch": 0.21, "learning_rate": 0.000238122555410691, "loss": 10.8564, "step": 4870 }, { "epoch": 0.21, "learning_rate": 0.00023799217731421121, "loss": 10.8201, "step": 4880 }, { "epoch": 0.21, "learning_rate": 0.0002378617992177314, "loss": 10.87, "step": 4890 }, { "epoch": 0.21, "learning_rate": 0.00023773142112125162, "loss": 10.849, "step": 4900 }, { "epoch": 0.21, "learning_rate": 0.0002376010430247718, "loss": 10.8149, "step": 4910 }, { "epoch": 0.21, "learning_rate": 0.00023747066492829202, "loss": 10.7457, "step": 4920 }, { "epoch": 0.21, "learning_rate": 0.0002373402868318122, "loss": 10.856, "step": 4930 }, { "epoch": 0.21, "learning_rate": 0.00023720990873533245, "loss": 10.8324, "step": 4940 }, { "epoch": 0.21, "learning_rate": 0.00023707953063885267, "loss": 10.8251, "step": 4950 }, { "epoch": 0.21, "learning_rate": 0.00023694915254237286, "loss": 10.8374, "step": 4960 }, { "epoch": 0.22, "learning_rate": 0.00023681877444589307, "loss": 10.8555, "step": 4970 }, { "epoch": 0.22, "learning_rate": 0.00023668839634941326, "loss": 10.8281, "step": 4980 }, { "epoch": 0.22, "learning_rate": 0.0002365580182529335, "loss": 10.8369, "step": 4990 }, { "epoch": 0.22, "learning_rate": 0.0002364276401564537, "loss": 10.8378, "step": 5000 } ], "max_steps": 23110, "num_train_epochs": 1, "total_flos": 7.846668409602048e+16, "trial_name": null, "trial_params": null }