{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.67373142203852, "global_step": 110000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 5.885815185403178e-07, "loss": 0.6708, "step": 200 }, { "epoch": 0.04, "learning_rate": 1.1771630370806356e-06, "loss": 0.6457, "step": 400 }, { "epoch": 0.05, "learning_rate": 1.7657445556209538e-06, "loss": 0.6149, "step": 600 }, { "epoch": 0.07, "learning_rate": 2.3543260741612712e-06, "loss": 0.5516, "step": 800 }, { "epoch": 0.09, "learning_rate": 2.942907592701589e-06, "loss": 0.5014, "step": 1000 }, { "epoch": 0.11, "learning_rate": 3.5314891112419075e-06, "loss": 0.484, "step": 1200 }, { "epoch": 0.12, "learning_rate": 4.120070629782225e-06, "loss": 0.4428, "step": 1400 }, { "epoch": 0.14, "learning_rate": 4.7086521483225425e-06, "loss": 0.4323, "step": 1600 }, { "epoch": 0.16, "learning_rate": 5.297233666862861e-06, "loss": 0.4234, "step": 1800 }, { "epoch": 0.18, "learning_rate": 5.885815185403178e-06, "loss": 0.402, "step": 2000 }, { "epoch": 0.19, "learning_rate": 6.474396703943496e-06, "loss": 0.4142, "step": 2200 }, { "epoch": 0.21, "learning_rate": 7.062978222483815e-06, "loss": 0.39, "step": 2400 }, { "epoch": 0.23, "learning_rate": 7.651559741024132e-06, "loss": 0.3842, "step": 2600 }, { "epoch": 0.25, "learning_rate": 8.24014125956445e-06, "loss": 0.3747, "step": 2800 }, { "epoch": 0.26, "learning_rate": 8.828722778104768e-06, "loss": 0.37, "step": 3000 }, { "epoch": 0.28, "learning_rate": 9.417304296645085e-06, "loss": 0.3688, "step": 3200 }, { "epoch": 0.3, "learning_rate": 1.0005885815185404e-05, "loss": 0.3656, "step": 3400 }, { "epoch": 0.32, "learning_rate": 1.0594467333725723e-05, "loss": 0.3488, "step": 3600 }, { "epoch": 0.33, "learning_rate": 1.1183048852266041e-05, "loss": 0.3532, "step": 3800 }, { "epoch": 0.35, "learning_rate": 1.1771630370806357e-05, "loss": 0.3553, "step": 4000 }, { "epoch": 0.37, "learning_rate": 1.2360211889346675e-05, "loss": 0.3597, "step": 4200 }, { "epoch": 0.39, "learning_rate": 1.2948793407886992e-05, "loss": 0.3427, "step": 4400 }, { "epoch": 0.4, "learning_rate": 1.3537374926427311e-05, "loss": 0.35, "step": 4600 }, { "epoch": 0.42, "learning_rate": 1.412595644496763e-05, "loss": 0.342, "step": 4800 }, { "epoch": 0.44, "learning_rate": 1.4714537963507947e-05, "loss": 0.3304, "step": 5000 }, { "epoch": 0.46, "learning_rate": 1.5303119482048264e-05, "loss": 0.3326, "step": 5200 }, { "epoch": 0.47, "learning_rate": 1.589170100058858e-05, "loss": 0.3273, "step": 5400 }, { "epoch": 0.49, "learning_rate": 1.64802825191289e-05, "loss": 0.3226, "step": 5600 }, { "epoch": 0.51, "learning_rate": 1.706886403766922e-05, "loss": 0.329, "step": 5800 }, { "epoch": 0.53, "learning_rate": 1.7657445556209536e-05, "loss": 0.3096, "step": 6000 }, { "epoch": 0.55, "learning_rate": 1.8246027074749856e-05, "loss": 0.3196, "step": 6200 }, { "epoch": 0.56, "learning_rate": 1.883460859329017e-05, "loss": 0.3228, "step": 6400 }, { "epoch": 0.58, "learning_rate": 1.942319011183049e-05, "loss": 0.3164, "step": 6600 }, { "epoch": 0.6, "learning_rate": 1.999925173503938e-05, "loss": 0.2999, "step": 6800 }, { "epoch": 0.62, "learning_rate": 1.996183848700825e-05, "loss": 0.3205, "step": 7000 }, { "epoch": 0.63, "learning_rate": 1.9924425238977122e-05, "loss": 0.3053, "step": 7200 }, { "epoch": 0.65, "learning_rate": 1.9887011990945994e-05, "loss": 0.2995, "step": 7400 }, { "epoch": 0.67, "learning_rate": 1.984959874291487e-05, "loss": 0.313, "step": 7600 }, { "epoch": 0.69, "learning_rate": 1.981218549488374e-05, "loss": 0.2996, "step": 7800 }, { "epoch": 0.7, "learning_rate": 1.977477224685261e-05, "loss": 0.3289, "step": 8000 }, { "epoch": 0.72, "learning_rate": 1.9737358998821485e-05, "loss": 0.3015, "step": 8200 }, { "epoch": 0.74, "learning_rate": 1.9699945750790357e-05, "loss": 0.3029, "step": 8400 }, { "epoch": 0.76, "learning_rate": 1.966253250275923e-05, "loss": 0.3075, "step": 8600 }, { "epoch": 0.77, "learning_rate": 1.96251192547281e-05, "loss": 0.2972, "step": 8800 }, { "epoch": 0.79, "learning_rate": 1.9587706006696973e-05, "loss": 0.2984, "step": 9000 }, { "epoch": 0.81, "learning_rate": 1.9550292758665845e-05, "loss": 0.2894, "step": 9200 }, { "epoch": 0.83, "learning_rate": 1.9512879510634717e-05, "loss": 0.2857, "step": 9400 }, { "epoch": 0.84, "learning_rate": 1.9475466262603592e-05, "loss": 0.2812, "step": 9600 }, { "epoch": 0.86, "learning_rate": 1.943805301457246e-05, "loss": 0.2872, "step": 9800 }, { "epoch": 0.88, "learning_rate": 1.9400639766541333e-05, "loss": 0.289, "step": 10000 }, { "epoch": 0.9, "learning_rate": 1.9363226518510205e-05, "loss": 0.2791, "step": 10200 }, { "epoch": 0.91, "learning_rate": 1.932581327047908e-05, "loss": 0.2912, "step": 10400 }, { "epoch": 0.93, "learning_rate": 1.9288400022447952e-05, "loss": 0.2872, "step": 10600 }, { "epoch": 0.95, "learning_rate": 1.9250986774416824e-05, "loss": 0.2766, "step": 10800 }, { "epoch": 0.97, "learning_rate": 1.9213573526385692e-05, "loss": 0.2819, "step": 11000 }, { "epoch": 0.98, "learning_rate": 1.9176160278354568e-05, "loss": 0.2795, "step": 11200 }, { "epoch": 1.0, "learning_rate": 1.913874703032344e-05, "loss": 0.2677, "step": 11400 }, { "epoch": 1.02, "learning_rate": 1.910133378229231e-05, "loss": 0.2488, "step": 11600 }, { "epoch": 1.04, "learning_rate": 1.9063920534261183e-05, "loss": 0.256, "step": 11800 }, { "epoch": 1.06, "learning_rate": 1.9026507286230055e-05, "loss": 0.2489, "step": 12000 }, { "epoch": 1.07, "learning_rate": 1.8989094038198927e-05, "loss": 0.2525, "step": 12200 }, { "epoch": 1.09, "learning_rate": 1.89516807901678e-05, "loss": 0.2483, "step": 12400 }, { "epoch": 1.11, "learning_rate": 1.8914267542136674e-05, "loss": 0.2528, "step": 12600 }, { "epoch": 1.13, "learning_rate": 1.8876854294105543e-05, "loss": 0.2481, "step": 12800 }, { "epoch": 1.14, "learning_rate": 1.8839441046074415e-05, "loss": 0.2517, "step": 13000 }, { "epoch": 1.16, "learning_rate": 1.880202779804329e-05, "loss": 0.2514, "step": 13200 }, { "epoch": 1.18, "learning_rate": 1.8764614550012162e-05, "loss": 0.2464, "step": 13400 }, { "epoch": 1.2, "learning_rate": 1.8727201301981034e-05, "loss": 0.2586, "step": 13600 }, { "epoch": 1.21, "learning_rate": 1.8689788053949906e-05, "loss": 0.2507, "step": 13800 }, { "epoch": 1.23, "learning_rate": 1.8652374805918778e-05, "loss": 0.2609, "step": 14000 }, { "epoch": 1.25, "learning_rate": 1.861496155788765e-05, "loss": 0.2368, "step": 14200 }, { "epoch": 1.27, "learning_rate": 1.857754830985652e-05, "loss": 0.2473, "step": 14400 }, { "epoch": 1.28, "learning_rate": 1.8540135061825394e-05, "loss": 0.2379, "step": 14600 }, { "epoch": 1.3, "learning_rate": 1.8502721813794265e-05, "loss": 0.2431, "step": 14800 }, { "epoch": 1.32, "learning_rate": 1.8465308565763137e-05, "loss": 0.2521, "step": 15000 }, { "epoch": 1.34, "learning_rate": 1.842789531773201e-05, "loss": 0.2473, "step": 15200 }, { "epoch": 1.35, "learning_rate": 1.8390482069700885e-05, "loss": 0.2496, "step": 15400 }, { "epoch": 1.37, "learning_rate": 1.8353068821669757e-05, "loss": 0.2529, "step": 15600 }, { "epoch": 1.39, "learning_rate": 1.8315655573638625e-05, "loss": 0.2484, "step": 15800 }, { "epoch": 1.41, "learning_rate": 1.8278242325607497e-05, "loss": 0.2393, "step": 16000 }, { "epoch": 1.42, "learning_rate": 1.8240829077576372e-05, "loss": 0.2394, "step": 16200 }, { "epoch": 1.44, "learning_rate": 1.8203415829545244e-05, "loss": 0.2265, "step": 16400 }, { "epoch": 1.46, "learning_rate": 1.8166002581514116e-05, "loss": 0.2435, "step": 16600 }, { "epoch": 1.48, "learning_rate": 1.8128589333482988e-05, "loss": 0.2513, "step": 16800 }, { "epoch": 1.5, "learning_rate": 1.809117608545186e-05, "loss": 0.2478, "step": 17000 }, { "epoch": 1.51, "learning_rate": 1.8053762837420732e-05, "loss": 0.2601, "step": 17200 }, { "epoch": 1.53, "learning_rate": 1.8016349589389604e-05, "loss": 0.2371, "step": 17400 }, { "epoch": 1.55, "learning_rate": 1.7978936341358476e-05, "loss": 0.2504, "step": 17600 }, { "epoch": 1.57, "learning_rate": 1.7941523093327348e-05, "loss": 0.2414, "step": 17800 }, { "epoch": 1.58, "learning_rate": 1.790410984529622e-05, "loss": 0.2296, "step": 18000 }, { "epoch": 1.6, "learning_rate": 1.7866696597265095e-05, "loss": 0.2413, "step": 18200 }, { "epoch": 1.62, "learning_rate": 1.7829283349233967e-05, "loss": 0.2447, "step": 18400 }, { "epoch": 1.64, "learning_rate": 1.779187010120284e-05, "loss": 0.2392, "step": 18600 }, { "epoch": 1.65, "learning_rate": 1.7754456853171707e-05, "loss": 0.2515, "step": 18800 }, { "epoch": 1.67, "learning_rate": 1.7717043605140583e-05, "loss": 0.2383, "step": 19000 }, { "epoch": 1.69, "learning_rate": 1.7679630357109454e-05, "loss": 0.2522, "step": 19200 }, { "epoch": 1.71, "learning_rate": 1.7642217109078326e-05, "loss": 0.244, "step": 19400 }, { "epoch": 1.72, "learning_rate": 1.7604803861047198e-05, "loss": 0.2368, "step": 19600 }, { "epoch": 1.74, "learning_rate": 1.756739061301607e-05, "loss": 0.2416, "step": 19800 }, { "epoch": 1.76, "learning_rate": 1.7529977364984942e-05, "loss": 0.2428, "step": 20000 }, { "epoch": 1.78, "learning_rate": 1.7492564116953814e-05, "loss": 0.2387, "step": 20200 }, { "epoch": 1.79, "learning_rate": 1.745515086892269e-05, "loss": 0.2363, "step": 20400 }, { "epoch": 1.81, "learning_rate": 1.7417737620891558e-05, "loss": 0.2433, "step": 20600 }, { "epoch": 1.83, "learning_rate": 1.738032437286043e-05, "loss": 0.2395, "step": 20800 }, { "epoch": 1.85, "learning_rate": 1.7342911124829302e-05, "loss": 0.237, "step": 21000 }, { "epoch": 1.86, "learning_rate": 1.7305497876798177e-05, "loss": 0.2382, "step": 21200 }, { "epoch": 1.88, "learning_rate": 1.726808462876705e-05, "loss": 0.2306, "step": 21400 }, { "epoch": 1.9, "learning_rate": 1.723067138073592e-05, "loss": 0.2356, "step": 21600 }, { "epoch": 1.92, "learning_rate": 1.7193258132704793e-05, "loss": 0.2473, "step": 21800 }, { "epoch": 1.93, "learning_rate": 1.7155844884673665e-05, "loss": 0.2428, "step": 22000 }, { "epoch": 1.95, "learning_rate": 1.7118431636642537e-05, "loss": 0.2302, "step": 22200 }, { "epoch": 1.97, "learning_rate": 1.708101838861141e-05, "loss": 0.2235, "step": 22400 }, { "epoch": 1.99, "learning_rate": 1.704360514058028e-05, "loss": 0.2271, "step": 22600 }, { "epoch": 2.01, "learning_rate": 1.7006191892549152e-05, "loss": 0.2229, "step": 22800 }, { "epoch": 2.02, "learning_rate": 1.6968778644518024e-05, "loss": 0.2, "step": 23000 }, { "epoch": 2.04, "learning_rate": 1.69313653964869e-05, "loss": 0.1905, "step": 23200 }, { "epoch": 2.06, "learning_rate": 1.689395214845577e-05, "loss": 0.1908, "step": 23400 }, { "epoch": 2.08, "learning_rate": 1.685653890042464e-05, "loss": 0.2062, "step": 23600 }, { "epoch": 2.09, "learning_rate": 1.6819125652393512e-05, "loss": 0.1926, "step": 23800 }, { "epoch": 2.11, "learning_rate": 1.6781712404362387e-05, "loss": 0.1867, "step": 24000 }, { "epoch": 2.13, "learning_rate": 1.674429915633126e-05, "loss": 0.1892, "step": 24200 }, { "epoch": 2.15, "learning_rate": 1.670688590830013e-05, "loss": 0.1853, "step": 24400 }, { "epoch": 2.16, "learning_rate": 1.6669472660269003e-05, "loss": 0.1972, "step": 24600 }, { "epoch": 2.18, "learning_rate": 1.6632059412237875e-05, "loss": 0.1916, "step": 24800 }, { "epoch": 2.2, "learning_rate": 1.6594646164206747e-05, "loss": 0.1836, "step": 25000 }, { "epoch": 2.22, "learning_rate": 1.655723291617562e-05, "loss": 0.1855, "step": 25200 }, { "epoch": 2.23, "learning_rate": 1.651981966814449e-05, "loss": 0.2007, "step": 25400 }, { "epoch": 2.25, "learning_rate": 1.6482406420113363e-05, "loss": 0.1857, "step": 25600 }, { "epoch": 2.27, "learning_rate": 1.6444993172082235e-05, "loss": 0.2067, "step": 25800 }, { "epoch": 2.29, "learning_rate": 1.6407579924051106e-05, "loss": 0.2096, "step": 26000 }, { "epoch": 2.3, "learning_rate": 1.6370166676019982e-05, "loss": 0.2039, "step": 26200 }, { "epoch": 2.32, "learning_rate": 1.6332753427988854e-05, "loss": 0.1985, "step": 26400 }, { "epoch": 2.34, "learning_rate": 1.6295340179957722e-05, "loss": 0.1927, "step": 26600 }, { "epoch": 2.36, "learning_rate": 1.6257926931926597e-05, "loss": 0.2015, "step": 26800 }, { "epoch": 2.37, "learning_rate": 1.622051368389547e-05, "loss": 0.1822, "step": 27000 }, { "epoch": 2.39, "learning_rate": 1.618310043586434e-05, "loss": 0.1957, "step": 27200 }, { "epoch": 2.41, "learning_rate": 1.6145687187833213e-05, "loss": 0.1936, "step": 27400 }, { "epoch": 2.43, "learning_rate": 1.6108273939802085e-05, "loss": 0.1896, "step": 27600 }, { "epoch": 2.44, "learning_rate": 1.6070860691770957e-05, "loss": 0.199, "step": 27800 }, { "epoch": 2.46, "learning_rate": 1.603344744373983e-05, "loss": 0.1954, "step": 28000 }, { "epoch": 2.48, "learning_rate": 1.5996034195708704e-05, "loss": 0.1918, "step": 28200 }, { "epoch": 2.5, "learning_rate": 1.5958620947677573e-05, "loss": 0.1902, "step": 28400 }, { "epoch": 2.52, "learning_rate": 1.5921207699646445e-05, "loss": 0.201, "step": 28600 }, { "epoch": 2.53, "learning_rate": 1.5883794451615317e-05, "loss": 0.1888, "step": 28800 }, { "epoch": 2.55, "learning_rate": 1.5846381203584192e-05, "loss": 0.1932, "step": 29000 }, { "epoch": 2.57, "learning_rate": 1.5808967955553064e-05, "loss": 0.2016, "step": 29200 }, { "epoch": 2.59, "learning_rate": 1.5771554707521936e-05, "loss": 0.199, "step": 29400 }, { "epoch": 2.6, "learning_rate": 1.5734141459490804e-05, "loss": 0.1854, "step": 29600 }, { "epoch": 2.62, "learning_rate": 1.569672821145968e-05, "loss": 0.1935, "step": 29800 }, { "epoch": 2.64, "learning_rate": 1.565931496342855e-05, "loss": 0.1902, "step": 30000 }, { "epoch": 2.66, "learning_rate": 1.5621901715397423e-05, "loss": 0.1831, "step": 30200 }, { "epoch": 2.67, "learning_rate": 1.5584488467366295e-05, "loss": 0.1978, "step": 30400 }, { "epoch": 2.69, "learning_rate": 1.5547075219335167e-05, "loss": 0.1938, "step": 30600 }, { "epoch": 2.71, "learning_rate": 1.550966197130404e-05, "loss": 0.1911, "step": 30800 }, { "epoch": 2.73, "learning_rate": 1.547224872327291e-05, "loss": 0.1847, "step": 31000 }, { "epoch": 2.74, "learning_rate": 1.5434835475241786e-05, "loss": 0.1919, "step": 31200 }, { "epoch": 2.76, "learning_rate": 1.5397422227210655e-05, "loss": 0.1944, "step": 31400 }, { "epoch": 2.78, "learning_rate": 1.5360008979179527e-05, "loss": 0.1809, "step": 31600 }, { "epoch": 2.8, "learning_rate": 1.5322595731148402e-05, "loss": 0.1986, "step": 31800 }, { "epoch": 2.81, "learning_rate": 1.5285182483117274e-05, "loss": 0.1927, "step": 32000 }, { "epoch": 2.83, "learning_rate": 1.5247769235086144e-05, "loss": 0.1955, "step": 32200 }, { "epoch": 2.85, "learning_rate": 1.5210355987055016e-05, "loss": 0.1909, "step": 32400 }, { "epoch": 2.87, "learning_rate": 1.517294273902389e-05, "loss": 0.2026, "step": 32600 }, { "epoch": 2.88, "learning_rate": 1.5135529490992762e-05, "loss": 0.1922, "step": 32800 }, { "epoch": 2.9, "learning_rate": 1.5098116242961634e-05, "loss": 0.1892, "step": 33000 }, { "epoch": 2.92, "learning_rate": 1.5060702994930506e-05, "loss": 0.1962, "step": 33200 }, { "epoch": 2.94, "learning_rate": 1.502328974689938e-05, "loss": 0.1987, "step": 33400 }, { "epoch": 2.95, "learning_rate": 1.4985876498868251e-05, "loss": 0.2, "step": 33600 }, { "epoch": 2.97, "learning_rate": 1.4948463250837121e-05, "loss": 0.1784, "step": 33800 }, { "epoch": 2.99, "learning_rate": 1.4911050002805995e-05, "loss": 0.1998, "step": 34000 }, { "epoch": 3.01, "learning_rate": 1.4873636754774867e-05, "loss": 0.1794, "step": 34200 }, { "epoch": 3.03, "learning_rate": 1.4836223506743739e-05, "loss": 0.1545, "step": 34400 }, { "epoch": 3.04, "learning_rate": 1.479881025871261e-05, "loss": 0.1596, "step": 34600 }, { "epoch": 3.06, "learning_rate": 1.4761397010681484e-05, "loss": 0.1522, "step": 34800 }, { "epoch": 3.08, "learning_rate": 1.4723983762650356e-05, "loss": 0.1633, "step": 35000 }, { "epoch": 3.1, "learning_rate": 1.4686570514619227e-05, "loss": 0.1421, "step": 35200 }, { "epoch": 3.11, "learning_rate": 1.4649157266588102e-05, "loss": 0.1506, "step": 35400 }, { "epoch": 3.13, "learning_rate": 1.4611744018556972e-05, "loss": 0.1563, "step": 35600 }, { "epoch": 3.15, "learning_rate": 1.4574330770525844e-05, "loss": 0.1557, "step": 35800 }, { "epoch": 3.17, "learning_rate": 1.4536917522494716e-05, "loss": 0.1641, "step": 36000 }, { "epoch": 3.18, "learning_rate": 1.449950427446359e-05, "loss": 0.1545, "step": 36200 }, { "epoch": 3.2, "learning_rate": 1.4462091026432461e-05, "loss": 0.1625, "step": 36400 }, { "epoch": 3.22, "learning_rate": 1.4424677778401333e-05, "loss": 0.165, "step": 36600 }, { "epoch": 3.24, "learning_rate": 1.4387264530370207e-05, "loss": 0.157, "step": 36800 }, { "epoch": 3.25, "learning_rate": 1.4349851282339077e-05, "loss": 0.1485, "step": 37000 }, { "epoch": 3.27, "learning_rate": 1.4312438034307949e-05, "loss": 0.1571, "step": 37200 }, { "epoch": 3.29, "learning_rate": 1.4275024786276821e-05, "loss": 0.1608, "step": 37400 }, { "epoch": 3.31, "learning_rate": 1.4237611538245695e-05, "loss": 0.162, "step": 37600 }, { "epoch": 3.32, "learning_rate": 1.4200198290214567e-05, "loss": 0.154, "step": 37800 }, { "epoch": 3.34, "learning_rate": 1.4162785042183438e-05, "loss": 0.1536, "step": 38000 }, { "epoch": 3.36, "learning_rate": 1.4125371794152309e-05, "loss": 0.164, "step": 38200 }, { "epoch": 3.38, "learning_rate": 1.4087958546121184e-05, "loss": 0.1655, "step": 38400 }, { "epoch": 3.39, "learning_rate": 1.4050545298090054e-05, "loss": 0.1606, "step": 38600 }, { "epoch": 3.41, "learning_rate": 1.4013132050058926e-05, "loss": 0.1533, "step": 38800 }, { "epoch": 3.43, "learning_rate": 1.39757188020278e-05, "loss": 0.1515, "step": 39000 }, { "epoch": 3.45, "learning_rate": 1.3938305553996672e-05, "loss": 0.1624, "step": 39200 }, { "epoch": 3.46, "learning_rate": 1.3900892305965544e-05, "loss": 0.1467, "step": 39400 }, { "epoch": 3.48, "learning_rate": 1.3863479057934415e-05, "loss": 0.1582, "step": 39600 }, { "epoch": 3.5, "learning_rate": 1.3826065809903289e-05, "loss": 0.163, "step": 39800 }, { "epoch": 3.52, "learning_rate": 1.378865256187216e-05, "loss": 0.1561, "step": 40000 }, { "epoch": 3.54, "learning_rate": 1.3751239313841031e-05, "loss": 0.1558, "step": 40200 }, { "epoch": 3.55, "learning_rate": 1.3713826065809905e-05, "loss": 0.1686, "step": 40400 }, { "epoch": 3.57, "learning_rate": 1.3676412817778777e-05, "loss": 0.161, "step": 40600 }, { "epoch": 3.59, "learning_rate": 1.3638999569747649e-05, "loss": 0.1574, "step": 40800 }, { "epoch": 3.61, "learning_rate": 1.360158632171652e-05, "loss": 0.1591, "step": 41000 }, { "epoch": 3.62, "learning_rate": 1.3564173073685394e-05, "loss": 0.1618, "step": 41200 }, { "epoch": 3.64, "learning_rate": 1.3526759825654266e-05, "loss": 0.1545, "step": 41400 }, { "epoch": 3.66, "learning_rate": 1.3489346577623136e-05, "loss": 0.1626, "step": 41600 }, { "epoch": 3.68, "learning_rate": 1.345193332959201e-05, "loss": 0.1588, "step": 41800 }, { "epoch": 3.69, "learning_rate": 1.3414520081560882e-05, "loss": 0.1538, "step": 42000 }, { "epoch": 3.71, "learning_rate": 1.3377106833529754e-05, "loss": 0.1557, "step": 42200 }, { "epoch": 3.73, "learning_rate": 1.3339693585498626e-05, "loss": 0.1554, "step": 42400 }, { "epoch": 3.75, "learning_rate": 1.33022803374675e-05, "loss": 0.1639, "step": 42600 }, { "epoch": 3.76, "learning_rate": 1.3264867089436371e-05, "loss": 0.1519, "step": 42800 }, { "epoch": 3.78, "learning_rate": 1.3227453841405241e-05, "loss": 0.1671, "step": 43000 }, { "epoch": 3.8, "learning_rate": 1.3190040593374113e-05, "loss": 0.1614, "step": 43200 }, { "epoch": 3.82, "learning_rate": 1.3152627345342987e-05, "loss": 0.1523, "step": 43400 }, { "epoch": 3.83, "learning_rate": 1.3115214097311859e-05, "loss": 0.1564, "step": 43600 }, { "epoch": 3.85, "learning_rate": 1.307780084928073e-05, "loss": 0.1662, "step": 43800 }, { "epoch": 3.87, "learning_rate": 1.3040387601249604e-05, "loss": 0.1667, "step": 44000 }, { "epoch": 3.89, "learning_rate": 1.3002974353218476e-05, "loss": 0.1631, "step": 44200 }, { "epoch": 3.9, "learning_rate": 1.2965561105187348e-05, "loss": 0.1561, "step": 44400 }, { "epoch": 3.92, "learning_rate": 1.2928147857156219e-05, "loss": 0.159, "step": 44600 }, { "epoch": 3.94, "learning_rate": 1.2890734609125092e-05, "loss": 0.1618, "step": 44800 }, { "epoch": 3.96, "learning_rate": 1.2853321361093964e-05, "loss": 0.1538, "step": 45000 }, { "epoch": 3.98, "learning_rate": 1.2815908113062836e-05, "loss": 0.1586, "step": 45200 }, { "epoch": 3.99, "learning_rate": 1.277849486503171e-05, "loss": 0.16, "step": 45400 }, { "epoch": 4.01, "learning_rate": 1.2741081617000581e-05, "loss": 0.1483, "step": 45600 }, { "epoch": 4.03, "learning_rate": 1.2703668368969453e-05, "loss": 0.1201, "step": 45800 }, { "epoch": 4.05, "learning_rate": 1.2666255120938324e-05, "loss": 0.1278, "step": 46000 }, { "epoch": 4.06, "learning_rate": 1.2628841872907199e-05, "loss": 0.1348, "step": 46200 }, { "epoch": 4.08, "learning_rate": 1.2591428624876069e-05, "loss": 0.1238, "step": 46400 }, { "epoch": 4.1, "learning_rate": 1.2554015376844941e-05, "loss": 0.1215, "step": 46600 }, { "epoch": 4.12, "learning_rate": 1.2516602128813815e-05, "loss": 0.132, "step": 46800 }, { "epoch": 4.13, "learning_rate": 1.2479188880782687e-05, "loss": 0.1244, "step": 47000 }, { "epoch": 4.15, "learning_rate": 1.2441775632751559e-05, "loss": 0.1235, "step": 47200 }, { "epoch": 4.17, "learning_rate": 1.240436238472043e-05, "loss": 0.1312, "step": 47400 }, { "epoch": 4.19, "learning_rate": 1.2366949136689304e-05, "loss": 0.1226, "step": 47600 }, { "epoch": 4.2, "learning_rate": 1.2329535888658174e-05, "loss": 0.1292, "step": 47800 }, { "epoch": 4.22, "learning_rate": 1.2292122640627046e-05, "loss": 0.1255, "step": 48000 }, { "epoch": 4.24, "learning_rate": 1.2254709392595918e-05, "loss": 0.1258, "step": 48200 }, { "epoch": 4.26, "learning_rate": 1.2217296144564792e-05, "loss": 0.1292, "step": 48400 }, { "epoch": 4.27, "learning_rate": 1.2179882896533664e-05, "loss": 0.1298, "step": 48600 }, { "epoch": 4.29, "learning_rate": 1.2142469648502536e-05, "loss": 0.1254, "step": 48800 }, { "epoch": 4.31, "learning_rate": 1.210505640047141e-05, "loss": 0.1241, "step": 49000 }, { "epoch": 4.33, "learning_rate": 1.2067643152440281e-05, "loss": 0.1302, "step": 49200 }, { "epoch": 4.34, "learning_rate": 1.2030229904409151e-05, "loss": 0.1309, "step": 49400 }, { "epoch": 4.36, "learning_rate": 1.1992816656378023e-05, "loss": 0.1182, "step": 49600 }, { "epoch": 4.38, "learning_rate": 1.1955403408346897e-05, "loss": 0.1331, "step": 49800 }, { "epoch": 4.4, "learning_rate": 1.1917990160315769e-05, "loss": 0.1289, "step": 50000 }, { "epoch": 4.41, "learning_rate": 1.188057691228464e-05, "loss": 0.1149, "step": 50200 }, { "epoch": 4.43, "learning_rate": 1.1843163664253514e-05, "loss": 0.1201, "step": 50400 }, { "epoch": 4.45, "learning_rate": 1.1805750416222386e-05, "loss": 0.1218, "step": 50600 }, { "epoch": 4.47, "learning_rate": 1.1768337168191256e-05, "loss": 0.1278, "step": 50800 }, { "epoch": 4.49, "learning_rate": 1.1730923920160128e-05, "loss": 0.1275, "step": 51000 }, { "epoch": 4.5, "learning_rate": 1.1693510672129002e-05, "loss": 0.1379, "step": 51200 }, { "epoch": 4.52, "learning_rate": 1.1656097424097874e-05, "loss": 0.1298, "step": 51400 }, { "epoch": 4.54, "learning_rate": 1.1618684176066746e-05, "loss": 0.1375, "step": 51600 }, { "epoch": 4.56, "learning_rate": 1.1581270928035618e-05, "loss": 0.1271, "step": 51800 }, { "epoch": 4.57, "learning_rate": 1.1543857680004491e-05, "loss": 0.1487, "step": 52000 }, { "epoch": 4.59, "learning_rate": 1.1506444431973363e-05, "loss": 0.1341, "step": 52200 }, { "epoch": 4.61, "learning_rate": 1.1469031183942233e-05, "loss": 0.1314, "step": 52400 }, { "epoch": 4.63, "learning_rate": 1.1431617935911107e-05, "loss": 0.1187, "step": 52600 }, { "epoch": 4.64, "learning_rate": 1.1394204687879979e-05, "loss": 0.1308, "step": 52800 }, { "epoch": 4.66, "learning_rate": 1.1356791439848851e-05, "loss": 0.1357, "step": 53000 }, { "epoch": 4.68, "learning_rate": 1.1319378191817723e-05, "loss": 0.1346, "step": 53200 }, { "epoch": 4.7, "learning_rate": 1.1281964943786596e-05, "loss": 0.1302, "step": 53400 }, { "epoch": 4.71, "learning_rate": 1.1244551695755468e-05, "loss": 0.1318, "step": 53600 }, { "epoch": 4.73, "learning_rate": 1.120713844772434e-05, "loss": 0.1406, "step": 53800 }, { "epoch": 4.75, "learning_rate": 1.1169725199693214e-05, "loss": 0.1287, "step": 54000 }, { "epoch": 4.77, "learning_rate": 1.1132311951662084e-05, "loss": 0.1296, "step": 54200 }, { "epoch": 4.78, "learning_rate": 1.1094898703630956e-05, "loss": 0.1239, "step": 54400 }, { "epoch": 4.8, "learning_rate": 1.1057485455599828e-05, "loss": 0.1289, "step": 54600 }, { "epoch": 4.82, "learning_rate": 1.1020072207568702e-05, "loss": 0.1371, "step": 54800 }, { "epoch": 4.84, "learning_rate": 1.0982658959537573e-05, "loss": 0.1371, "step": 55000 }, { "epoch": 4.85, "learning_rate": 1.0945245711506445e-05, "loss": 0.1197, "step": 55200 }, { "epoch": 4.87, "learning_rate": 1.0907832463475319e-05, "loss": 0.1316, "step": 55400 }, { "epoch": 4.89, "learning_rate": 1.0870419215444191e-05, "loss": 0.1275, "step": 55600 }, { "epoch": 4.91, "learning_rate": 1.0833005967413061e-05, "loss": 0.1287, "step": 55800 }, { "epoch": 4.92, "learning_rate": 1.0795592719381933e-05, "loss": 0.1266, "step": 56000 }, { "epoch": 4.94, "learning_rate": 1.0758179471350807e-05, "loss": 0.1275, "step": 56200 }, { "epoch": 4.96, "learning_rate": 1.0720766223319679e-05, "loss": 0.1244, "step": 56400 }, { "epoch": 4.98, "learning_rate": 1.068335297528855e-05, "loss": 0.1453, "step": 56600 }, { "epoch": 5.0, "learning_rate": 1.0645939727257422e-05, "loss": 0.1343, "step": 56800 }, { "epoch": 5.01, "learning_rate": 1.0608526479226296e-05, "loss": 0.114, "step": 57000 }, { "epoch": 5.03, "learning_rate": 1.0571113231195166e-05, "loss": 0.1038, "step": 57200 }, { "epoch": 5.05, "learning_rate": 1.0533699983164038e-05, "loss": 0.1064, "step": 57400 }, { "epoch": 5.07, "learning_rate": 1.0496286735132912e-05, "loss": 0.0928, "step": 57600 }, { "epoch": 5.08, "learning_rate": 1.0458873487101784e-05, "loss": 0.1079, "step": 57800 }, { "epoch": 5.1, "learning_rate": 1.0421460239070656e-05, "loss": 0.0976, "step": 58000 }, { "epoch": 5.12, "learning_rate": 1.0384046991039528e-05, "loss": 0.1086, "step": 58200 }, { "epoch": 5.14, "learning_rate": 1.0346633743008401e-05, "loss": 0.105, "step": 58400 }, { "epoch": 5.15, "learning_rate": 1.0309220494977273e-05, "loss": 0.1086, "step": 58600 }, { "epoch": 5.17, "learning_rate": 1.0271807246946143e-05, "loss": 0.0972, "step": 58800 }, { "epoch": 5.19, "learning_rate": 1.0234393998915017e-05, "loss": 0.1086, "step": 59000 }, { "epoch": 5.21, "learning_rate": 1.0196980750883889e-05, "loss": 0.1151, "step": 59200 }, { "epoch": 5.22, "learning_rate": 1.015956750285276e-05, "loss": 0.1076, "step": 59400 }, { "epoch": 5.24, "learning_rate": 1.0122154254821633e-05, "loss": 0.1061, "step": 59600 }, { "epoch": 5.26, "learning_rate": 1.0084741006790506e-05, "loss": 0.1087, "step": 59800 }, { "epoch": 5.28, "learning_rate": 1.0047327758759378e-05, "loss": 0.1081, "step": 60000 }, { "epoch": 5.29, "learning_rate": 1.0009914510728248e-05, "loss": 0.1051, "step": 60200 }, { "epoch": 5.31, "learning_rate": 9.972501262697122e-06, "loss": 0.1096, "step": 60400 }, { "epoch": 5.33, "learning_rate": 9.935088014665994e-06, "loss": 0.1067, "step": 60600 }, { "epoch": 5.35, "learning_rate": 9.897674766634866e-06, "loss": 0.1106, "step": 60800 }, { "epoch": 5.36, "learning_rate": 9.86026151860374e-06, "loss": 0.1019, "step": 61000 }, { "epoch": 5.38, "learning_rate": 9.82284827057261e-06, "loss": 0.1107, "step": 61200 }, { "epoch": 5.4, "learning_rate": 9.785435022541483e-06, "loss": 0.1045, "step": 61400 }, { "epoch": 5.42, "learning_rate": 9.748021774510355e-06, "loss": 0.1101, "step": 61600 }, { "epoch": 5.43, "learning_rate": 9.710608526479227e-06, "loss": 0.1001, "step": 61800 }, { "epoch": 5.45, "learning_rate": 9.673195278448099e-06, "loss": 0.1093, "step": 62000 }, { "epoch": 5.47, "learning_rate": 9.635782030416971e-06, "loss": 0.1091, "step": 62200 }, { "epoch": 5.49, "learning_rate": 9.598368782385845e-06, "loss": 0.1073, "step": 62400 }, { "epoch": 5.51, "learning_rate": 9.560955534354715e-06, "loss": 0.1175, "step": 62600 }, { "epoch": 5.52, "learning_rate": 9.523542286323588e-06, "loss": 0.1018, "step": 62800 }, { "epoch": 5.54, "learning_rate": 9.48612903829246e-06, "loss": 0.1015, "step": 63000 }, { "epoch": 5.56, "learning_rate": 9.448715790261332e-06, "loss": 0.1159, "step": 63200 }, { "epoch": 5.58, "learning_rate": 9.411302542230204e-06, "loss": 0.1104, "step": 63400 }, { "epoch": 5.59, "learning_rate": 9.373889294199076e-06, "loss": 0.1105, "step": 63600 }, { "epoch": 5.61, "learning_rate": 9.33647604616795e-06, "loss": 0.1037, "step": 63800 }, { "epoch": 5.63, "learning_rate": 9.299062798136822e-06, "loss": 0.103, "step": 64000 }, { "epoch": 5.65, "learning_rate": 9.261649550105694e-06, "loss": 0.1129, "step": 64200 }, { "epoch": 5.66, "learning_rate": 9.224236302074565e-06, "loss": 0.1005, "step": 64400 }, { "epoch": 5.68, "learning_rate": 9.186823054043437e-06, "loss": 0.1082, "step": 64600 }, { "epoch": 5.7, "learning_rate": 9.14940980601231e-06, "loss": 0.1157, "step": 64800 }, { "epoch": 5.72, "learning_rate": 9.111996557981181e-06, "loss": 0.1139, "step": 65000 }, { "epoch": 5.73, "learning_rate": 9.074583309950053e-06, "loss": 0.1101, "step": 65200 }, { "epoch": 5.75, "learning_rate": 9.037170061918927e-06, "loss": 0.1139, "step": 65400 }, { "epoch": 5.77, "learning_rate": 8.999756813887799e-06, "loss": 0.1107, "step": 65600 }, { "epoch": 5.79, "learning_rate": 8.96234356585667e-06, "loss": 0.1095, "step": 65800 }, { "epoch": 5.8, "learning_rate": 8.924930317825543e-06, "loss": 0.1127, "step": 66000 }, { "epoch": 5.82, "learning_rate": 8.887517069794414e-06, "loss": 0.1118, "step": 66200 }, { "epoch": 5.84, "learning_rate": 8.850103821763288e-06, "loss": 0.1042, "step": 66400 }, { "epoch": 5.86, "learning_rate": 8.812690573732158e-06, "loss": 0.1112, "step": 66600 }, { "epoch": 5.87, "learning_rate": 8.775277325701032e-06, "loss": 0.1116, "step": 66800 }, { "epoch": 5.89, "learning_rate": 8.737864077669904e-06, "loss": 0.1139, "step": 67000 }, { "epoch": 5.91, "learning_rate": 8.700450829638776e-06, "loss": 0.1082, "step": 67200 }, { "epoch": 5.93, "learning_rate": 8.663037581607648e-06, "loss": 0.1056, "step": 67400 }, { "epoch": 5.94, "learning_rate": 8.62562433357652e-06, "loss": 0.102, "step": 67600 }, { "epoch": 5.96, "learning_rate": 8.588211085545393e-06, "loss": 0.1026, "step": 67800 }, { "epoch": 5.98, "learning_rate": 8.550797837514263e-06, "loss": 0.1103, "step": 68000 }, { "epoch": 6.0, "learning_rate": 8.513384589483137e-06, "loss": 0.1147, "step": 68200 }, { "epoch": 6.02, "learning_rate": 8.475971341452009e-06, "loss": 0.0773, "step": 68400 }, { "epoch": 6.03, "learning_rate": 8.43855809342088e-06, "loss": 0.0812, "step": 68600 }, { "epoch": 6.05, "learning_rate": 8.401144845389754e-06, "loss": 0.0801, "step": 68800 }, { "epoch": 6.07, "learning_rate": 8.363731597358625e-06, "loss": 0.0884, "step": 69000 }, { "epoch": 6.09, "learning_rate": 8.326318349327498e-06, "loss": 0.0914, "step": 69200 }, { "epoch": 6.1, "learning_rate": 8.28890510129637e-06, "loss": 0.0868, "step": 69400 }, { "epoch": 6.12, "learning_rate": 8.251491853265242e-06, "loss": 0.0948, "step": 69600 }, { "epoch": 6.14, "learning_rate": 8.214078605234114e-06, "loss": 0.0808, "step": 69800 }, { "epoch": 6.16, "learning_rate": 8.176665357202986e-06, "loss": 0.092, "step": 70000 }, { "epoch": 6.17, "learning_rate": 8.139252109171858e-06, "loss": 0.0841, "step": 70200 }, { "epoch": 6.19, "learning_rate": 8.10183886114073e-06, "loss": 0.0951, "step": 70400 }, { "epoch": 6.21, "learning_rate": 8.064425613109603e-06, "loss": 0.0928, "step": 70600 }, { "epoch": 6.23, "learning_rate": 8.027012365078475e-06, "loss": 0.0935, "step": 70800 }, { "epoch": 6.24, "learning_rate": 7.989599117047347e-06, "loss": 0.0927, "step": 71000 }, { "epoch": 6.26, "learning_rate": 7.952185869016219e-06, "loss": 0.0923, "step": 71200 }, { "epoch": 6.28, "learning_rate": 7.914772620985091e-06, "loss": 0.0801, "step": 71400 }, { "epoch": 6.3, "learning_rate": 7.877359372953963e-06, "loss": 0.0937, "step": 71600 }, { "epoch": 6.31, "learning_rate": 7.839946124922837e-06, "loss": 0.0865, "step": 71800 }, { "epoch": 6.33, "learning_rate": 7.802532876891707e-06, "loss": 0.0871, "step": 72000 }, { "epoch": 6.35, "learning_rate": 7.76511962886058e-06, "loss": 0.0786, "step": 72200 }, { "epoch": 6.37, "learning_rate": 7.727706380829452e-06, "loss": 0.0934, "step": 72400 }, { "epoch": 6.38, "learning_rate": 7.690293132798324e-06, "loss": 0.0838, "step": 72600 }, { "epoch": 6.4, "learning_rate": 7.652879884767196e-06, "loss": 0.097, "step": 72800 }, { "epoch": 6.42, "learning_rate": 7.615466636736069e-06, "loss": 0.0885, "step": 73000 }, { "epoch": 6.44, "learning_rate": 7.578053388704941e-06, "loss": 0.0919, "step": 73200 }, { "epoch": 6.46, "learning_rate": 7.540640140673813e-06, "loss": 0.0822, "step": 73400 }, { "epoch": 6.47, "learning_rate": 7.5032268926426856e-06, "loss": 0.0837, "step": 73600 }, { "epoch": 6.49, "learning_rate": 7.465813644611558e-06, "loss": 0.0879, "step": 73800 }, { "epoch": 6.51, "learning_rate": 7.428400396580429e-06, "loss": 0.0927, "step": 74000 }, { "epoch": 6.53, "learning_rate": 7.390987148549302e-06, "loss": 0.0929, "step": 74200 }, { "epoch": 6.54, "learning_rate": 7.353573900518174e-06, "loss": 0.0871, "step": 74400 }, { "epoch": 6.56, "learning_rate": 7.316160652487047e-06, "loss": 0.0886, "step": 74600 }, { "epoch": 6.58, "learning_rate": 7.278747404455918e-06, "loss": 0.0887, "step": 74800 }, { "epoch": 6.6, "learning_rate": 7.241334156424791e-06, "loss": 0.0924, "step": 75000 }, { "epoch": 6.61, "learning_rate": 7.203920908393663e-06, "loss": 0.0971, "step": 75200 }, { "epoch": 6.63, "learning_rate": 7.166507660362535e-06, "loss": 0.0922, "step": 75400 }, { "epoch": 6.65, "learning_rate": 7.129094412331407e-06, "loss": 0.0866, "step": 75600 }, { "epoch": 6.67, "learning_rate": 7.091681164300279e-06, "loss": 0.0822, "step": 75800 }, { "epoch": 6.68, "learning_rate": 7.054267916269152e-06, "loss": 0.102, "step": 76000 }, { "epoch": 6.7, "learning_rate": 7.016854668238023e-06, "loss": 0.091, "step": 76200 }, { "epoch": 6.72, "learning_rate": 6.979441420206896e-06, "loss": 0.0937, "step": 76400 }, { "epoch": 6.74, "learning_rate": 6.942028172175768e-06, "loss": 0.0795, "step": 76600 }, { "epoch": 6.75, "learning_rate": 6.9046149241446405e-06, "loss": 0.0917, "step": 76800 }, { "epoch": 6.77, "learning_rate": 6.8672016761135115e-06, "loss": 0.0987, "step": 77000 }, { "epoch": 6.79, "learning_rate": 6.829788428082384e-06, "loss": 0.0946, "step": 77200 }, { "epoch": 6.81, "learning_rate": 6.792375180051257e-06, "loss": 0.0915, "step": 77400 }, { "epoch": 6.82, "learning_rate": 6.754961932020129e-06, "loss": 0.0889, "step": 77600 }, { "epoch": 6.84, "learning_rate": 6.717548683989002e-06, "loss": 0.0884, "step": 77800 }, { "epoch": 6.86, "learning_rate": 6.680135435957873e-06, "loss": 0.0854, "step": 78000 }, { "epoch": 6.88, "learning_rate": 6.642722187926746e-06, "loss": 0.0847, "step": 78200 }, { "epoch": 6.89, "learning_rate": 6.6053089398956175e-06, "loss": 0.0907, "step": 78400 }, { "epoch": 6.91, "learning_rate": 6.5678956918644894e-06, "loss": 0.0955, "step": 78600 }, { "epoch": 6.93, "learning_rate": 6.530482443833361e-06, "loss": 0.095, "step": 78800 }, { "epoch": 6.95, "learning_rate": 6.493069195802234e-06, "loss": 0.0948, "step": 79000 }, { "epoch": 6.97, "learning_rate": 6.455655947771107e-06, "loss": 0.0774, "step": 79200 }, { "epoch": 6.98, "learning_rate": 6.418242699739978e-06, "loss": 0.0973, "step": 79400 }, { "epoch": 7.0, "learning_rate": 6.380829451708851e-06, "loss": 0.0976, "step": 79600 }, { "epoch": 7.02, "learning_rate": 6.343416203677723e-06, "loss": 0.0674, "step": 79800 }, { "epoch": 7.04, "learning_rate": 6.306002955646595e-06, "loss": 0.0735, "step": 80000 }, { "epoch": 7.05, "learning_rate": 6.2685897076154665e-06, "loss": 0.0777, "step": 80200 }, { "epoch": 7.07, "learning_rate": 6.231176459584339e-06, "loss": 0.0688, "step": 80400 }, { "epoch": 7.09, "learning_rate": 6.193763211553212e-06, "loss": 0.0721, "step": 80600 }, { "epoch": 7.11, "learning_rate": 6.156349963522084e-06, "loss": 0.0787, "step": 80800 }, { "epoch": 7.12, "learning_rate": 6.118936715490956e-06, "loss": 0.0755, "step": 81000 }, { "epoch": 7.14, "learning_rate": 6.081523467459828e-06, "loss": 0.072, "step": 81200 }, { "epoch": 7.16, "learning_rate": 6.0441102194287005e-06, "loss": 0.0695, "step": 81400 }, { "epoch": 7.18, "learning_rate": 6.0066969713975724e-06, "loss": 0.0713, "step": 81600 }, { "epoch": 7.19, "learning_rate": 5.969283723366444e-06, "loss": 0.0771, "step": 81800 }, { "epoch": 7.21, "learning_rate": 5.931870475335316e-06, "loss": 0.0695, "step": 82000 }, { "epoch": 7.23, "learning_rate": 5.894457227304189e-06, "loss": 0.0676, "step": 82200 }, { "epoch": 7.25, "learning_rate": 5.857043979273062e-06, "loss": 0.0766, "step": 82400 }, { "epoch": 7.26, "learning_rate": 5.819630731241933e-06, "loss": 0.0766, "step": 82600 }, { "epoch": 7.28, "learning_rate": 5.782217483210806e-06, "loss": 0.0807, "step": 82800 }, { "epoch": 7.3, "learning_rate": 5.7448042351796775e-06, "loss": 0.0834, "step": 83000 }, { "epoch": 7.32, "learning_rate": 5.70739098714855e-06, "loss": 0.0789, "step": 83200 }, { "epoch": 7.33, "learning_rate": 5.669977739117421e-06, "loss": 0.0691, "step": 83400 }, { "epoch": 7.35, "learning_rate": 5.632564491086294e-06, "loss": 0.0777, "step": 83600 }, { "epoch": 7.37, "learning_rate": 5.595151243055166e-06, "loss": 0.0725, "step": 83800 }, { "epoch": 7.39, "learning_rate": 5.557737995024039e-06, "loss": 0.0788, "step": 84000 }, { "epoch": 7.4, "learning_rate": 5.520324746992911e-06, "loss": 0.075, "step": 84200 }, { "epoch": 7.42, "learning_rate": 5.482911498961783e-06, "loss": 0.0742, "step": 84400 }, { "epoch": 7.44, "learning_rate": 5.4454982509306554e-06, "loss": 0.0666, "step": 84600 }, { "epoch": 7.46, "learning_rate": 5.4080850028995265e-06, "loss": 0.0688, "step": 84800 }, { "epoch": 7.48, "learning_rate": 5.370671754868399e-06, "loss": 0.0747, "step": 85000 }, { "epoch": 7.49, "learning_rate": 5.333258506837271e-06, "loss": 0.0741, "step": 85200 }, { "epoch": 7.51, "learning_rate": 5.295845258806144e-06, "loss": 0.0657, "step": 85400 }, { "epoch": 7.53, "learning_rate": 5.258432010775017e-06, "loss": 0.0788, "step": 85600 }, { "epoch": 7.55, "learning_rate": 5.221018762743888e-06, "loss": 0.0791, "step": 85800 }, { "epoch": 7.56, "learning_rate": 5.1836055147127605e-06, "loss": 0.0752, "step": 86000 }, { "epoch": 7.58, "learning_rate": 5.1461922666816325e-06, "loss": 0.0762, "step": 86200 }, { "epoch": 7.6, "learning_rate": 5.108779018650505e-06, "loss": 0.0771, "step": 86400 }, { "epoch": 7.62, "learning_rate": 5.071365770619376e-06, "loss": 0.0727, "step": 86600 }, { "epoch": 7.63, "learning_rate": 5.033952522588249e-06, "loss": 0.0843, "step": 86800 }, { "epoch": 7.65, "learning_rate": 4.996539274557121e-06, "loss": 0.0741, "step": 87000 }, { "epoch": 7.67, "learning_rate": 4.959126026525993e-06, "loss": 0.0721, "step": 87200 }, { "epoch": 7.69, "learning_rate": 4.921712778494866e-06, "loss": 0.0814, "step": 87400 }, { "epoch": 7.7, "learning_rate": 4.884299530463738e-06, "loss": 0.0703, "step": 87600 }, { "epoch": 7.72, "learning_rate": 4.8468862824326095e-06, "loss": 0.0745, "step": 87800 }, { "epoch": 7.74, "learning_rate": 4.809473034401481e-06, "loss": 0.0788, "step": 88000 }, { "epoch": 7.76, "learning_rate": 4.772059786370354e-06, "loss": 0.0721, "step": 88200 }, { "epoch": 7.77, "learning_rate": 4.734646538339226e-06, "loss": 0.0689, "step": 88400 }, { "epoch": 7.79, "learning_rate": 4.697233290308099e-06, "loss": 0.0651, "step": 88600 }, { "epoch": 7.81, "learning_rate": 4.659820042276971e-06, "loss": 0.0775, "step": 88800 }, { "epoch": 7.83, "learning_rate": 4.622406794245843e-06, "loss": 0.069, "step": 89000 }, { "epoch": 7.84, "learning_rate": 4.584993546214715e-06, "loss": 0.0807, "step": 89200 }, { "epoch": 7.86, "learning_rate": 4.547580298183587e-06, "loss": 0.0817, "step": 89400 }, { "epoch": 7.88, "learning_rate": 4.510167050152459e-06, "loss": 0.0771, "step": 89600 }, { "epoch": 7.9, "learning_rate": 4.472753802121331e-06, "loss": 0.0683, "step": 89800 }, { "epoch": 7.91, "learning_rate": 4.435340554090204e-06, "loss": 0.0704, "step": 90000 }, { "epoch": 7.93, "learning_rate": 4.397927306059076e-06, "loss": 0.0852, "step": 90200 }, { "epoch": 7.95, "learning_rate": 4.360514058027948e-06, "loss": 0.0773, "step": 90400 }, { "epoch": 7.97, "learning_rate": 4.323100809996821e-06, "loss": 0.0694, "step": 90600 }, { "epoch": 7.99, "learning_rate": 4.2856875619656925e-06, "loss": 0.0771, "step": 90800 }, { "epoch": 8.0, "learning_rate": 4.2482743139345644e-06, "loss": 0.0702, "step": 91000 }, { "epoch": 8.02, "learning_rate": 4.210861065903436e-06, "loss": 0.0533, "step": 91200 }, { "epoch": 8.04, "learning_rate": 4.173447817872308e-06, "loss": 0.0666, "step": 91400 }, { "epoch": 8.06, "learning_rate": 4.136034569841181e-06, "loss": 0.0566, "step": 91600 }, { "epoch": 8.07, "learning_rate": 4.098621321810054e-06, "loss": 0.0611, "step": 91800 }, { "epoch": 8.09, "learning_rate": 4.061208073778926e-06, "loss": 0.0589, "step": 92000 }, { "epoch": 8.11, "learning_rate": 4.023794825747798e-06, "loss": 0.0602, "step": 92200 }, { "epoch": 8.13, "learning_rate": 3.9863815777166695e-06, "loss": 0.0615, "step": 92400 }, { "epoch": 8.14, "learning_rate": 3.948968329685542e-06, "loss": 0.0682, "step": 92600 }, { "epoch": 8.16, "learning_rate": 3.911555081654414e-06, "loss": 0.0678, "step": 92800 }, { "epoch": 8.18, "learning_rate": 3.874141833623286e-06, "loss": 0.0586, "step": 93000 }, { "epoch": 8.2, "learning_rate": 3.836728585592159e-06, "loss": 0.0641, "step": 93200 }, { "epoch": 8.21, "learning_rate": 3.799315337561031e-06, "loss": 0.0586, "step": 93400 }, { "epoch": 8.23, "learning_rate": 3.761902089529903e-06, "loss": 0.0684, "step": 93600 }, { "epoch": 8.25, "learning_rate": 3.724488841498775e-06, "loss": 0.0594, "step": 93800 }, { "epoch": 8.27, "learning_rate": 3.687075593467647e-06, "loss": 0.061, "step": 94000 }, { "epoch": 8.28, "learning_rate": 3.6496623454365193e-06, "loss": 0.0587, "step": 94200 }, { "epoch": 8.3, "learning_rate": 3.6122490974053913e-06, "loss": 0.0675, "step": 94400 }, { "epoch": 8.32, "learning_rate": 3.5748358493742636e-06, "loss": 0.0621, "step": 94600 }, { "epoch": 8.34, "learning_rate": 3.5374226013431355e-06, "loss": 0.0556, "step": 94800 }, { "epoch": 8.35, "learning_rate": 3.5000093533120083e-06, "loss": 0.0728, "step": 95000 }, { "epoch": 8.37, "learning_rate": 3.46259610528088e-06, "loss": 0.0554, "step": 95200 }, { "epoch": 8.39, "learning_rate": 3.4251828572497525e-06, "loss": 0.0634, "step": 95400 }, { "epoch": 8.41, "learning_rate": 3.3877696092186245e-06, "loss": 0.063, "step": 95600 }, { "epoch": 8.42, "learning_rate": 3.350356361187497e-06, "loss": 0.0613, "step": 95800 }, { "epoch": 8.44, "learning_rate": 3.3129431131563687e-06, "loss": 0.0755, "step": 96000 }, { "epoch": 8.46, "learning_rate": 3.275529865125241e-06, "loss": 0.0616, "step": 96200 }, { "epoch": 8.48, "learning_rate": 3.238116617094113e-06, "loss": 0.0513, "step": 96400 }, { "epoch": 8.5, "learning_rate": 3.2007033690629857e-06, "loss": 0.0609, "step": 96600 }, { "epoch": 8.51, "learning_rate": 3.1632901210318577e-06, "loss": 0.0625, "step": 96800 }, { "epoch": 8.53, "learning_rate": 3.12587687300073e-06, "loss": 0.057, "step": 97000 }, { "epoch": 8.55, "learning_rate": 3.088463624969602e-06, "loss": 0.0655, "step": 97200 }, { "epoch": 8.57, "learning_rate": 3.0510503769384743e-06, "loss": 0.0543, "step": 97400 }, { "epoch": 8.58, "learning_rate": 3.013637128907346e-06, "loss": 0.077, "step": 97600 }, { "epoch": 8.6, "learning_rate": 2.9762238808762185e-06, "loss": 0.0487, "step": 97800 }, { "epoch": 8.62, "learning_rate": 2.9388106328450904e-06, "loss": 0.0655, "step": 98000 }, { "epoch": 8.64, "learning_rate": 2.9013973848139628e-06, "loss": 0.0655, "step": 98200 }, { "epoch": 8.65, "learning_rate": 2.863984136782835e-06, "loss": 0.0596, "step": 98400 }, { "epoch": 8.67, "learning_rate": 2.8265708887517075e-06, "loss": 0.0594, "step": 98600 }, { "epoch": 8.69, "learning_rate": 2.7891576407205794e-06, "loss": 0.0737, "step": 98800 }, { "epoch": 8.71, "learning_rate": 2.7517443926894517e-06, "loss": 0.0616, "step": 99000 }, { "epoch": 8.72, "learning_rate": 2.7143311446583236e-06, "loss": 0.0531, "step": 99200 }, { "epoch": 8.74, "learning_rate": 2.676917896627196e-06, "loss": 0.0717, "step": 99400 }, { "epoch": 8.76, "learning_rate": 2.639504648596068e-06, "loss": 0.0643, "step": 99600 }, { "epoch": 8.78, "learning_rate": 2.60209140056494e-06, "loss": 0.0515, "step": 99800 }, { "epoch": 8.79, "learning_rate": 2.5646781525338126e-06, "loss": 0.0612, "step": 100000 }, { "epoch": 8.81, "learning_rate": 2.527264904502685e-06, "loss": 0.0562, "step": 100200 }, { "epoch": 8.83, "learning_rate": 2.489851656471557e-06, "loss": 0.0605, "step": 100400 }, { "epoch": 8.85, "learning_rate": 2.452438408440429e-06, "loss": 0.0621, "step": 100600 }, { "epoch": 8.86, "learning_rate": 2.415025160409301e-06, "loss": 0.0639, "step": 100800 }, { "epoch": 8.88, "learning_rate": 2.3776119123781734e-06, "loss": 0.0565, "step": 101000 }, { "epoch": 8.9, "learning_rate": 2.3401986643470454e-06, "loss": 0.0609, "step": 101200 }, { "epoch": 8.92, "learning_rate": 2.3027854163159177e-06, "loss": 0.0613, "step": 101400 }, { "epoch": 8.94, "learning_rate": 2.26537216828479e-06, "loss": 0.0673, "step": 101600 }, { "epoch": 8.95, "learning_rate": 2.227958920253662e-06, "loss": 0.0598, "step": 101800 }, { "epoch": 8.97, "learning_rate": 2.190545672222534e-06, "loss": 0.0649, "step": 102000 }, { "epoch": 8.99, "learning_rate": 2.1531324241914066e-06, "loss": 0.0615, "step": 102200 }, { "epoch": 9.01, "learning_rate": 2.1157191761602786e-06, "loss": 0.0538, "step": 102400 }, { "epoch": 9.02, "learning_rate": 2.0783059281291505e-06, "loss": 0.0462, "step": 102600 }, { "epoch": 9.04, "learning_rate": 2.040892680098023e-06, "loss": 0.0542, "step": 102800 }, { "epoch": 9.06, "learning_rate": 2.003479432066895e-06, "loss": 0.0529, "step": 103000 }, { "epoch": 9.08, "learning_rate": 1.966066184035767e-06, "loss": 0.0585, "step": 103200 }, { "epoch": 9.09, "learning_rate": 1.9286529360046394e-06, "loss": 0.0462, "step": 103400 }, { "epoch": 9.11, "learning_rate": 1.8912396879735116e-06, "loss": 0.0496, "step": 103600 }, { "epoch": 9.13, "learning_rate": 1.8538264399423839e-06, "loss": 0.0477, "step": 103800 }, { "epoch": 9.15, "learning_rate": 1.816413191911256e-06, "loss": 0.048, "step": 104000 }, { "epoch": 9.16, "learning_rate": 1.7789999438801282e-06, "loss": 0.0591, "step": 104200 }, { "epoch": 9.18, "learning_rate": 1.7415866958490003e-06, "loss": 0.0445, "step": 104400 }, { "epoch": 9.2, "learning_rate": 1.7041734478178726e-06, "loss": 0.0508, "step": 104600 }, { "epoch": 9.22, "learning_rate": 1.6667601997867448e-06, "loss": 0.0534, "step": 104800 }, { "epoch": 9.23, "learning_rate": 1.6293469517556169e-06, "loss": 0.046, "step": 105000 }, { "epoch": 9.25, "learning_rate": 1.591933703724489e-06, "loss": 0.0454, "step": 105200 }, { "epoch": 9.27, "learning_rate": 1.554520455693361e-06, "loss": 0.0601, "step": 105400 }, { "epoch": 9.29, "learning_rate": 1.5171072076622335e-06, "loss": 0.0543, "step": 105600 }, { "epoch": 9.3, "learning_rate": 1.4796939596311056e-06, "loss": 0.0587, "step": 105800 }, { "epoch": 9.32, "learning_rate": 1.4422807115999775e-06, "loss": 0.0526, "step": 106000 }, { "epoch": 9.34, "learning_rate": 1.4048674635688497e-06, "loss": 0.065, "step": 106200 }, { "epoch": 9.36, "learning_rate": 1.3674542155377222e-06, "loss": 0.0531, "step": 106400 }, { "epoch": 9.37, "learning_rate": 1.3300409675065941e-06, "loss": 0.0607, "step": 106600 }, { "epoch": 9.39, "learning_rate": 1.2926277194754663e-06, "loss": 0.0425, "step": 106800 }, { "epoch": 9.41, "learning_rate": 1.2552144714443384e-06, "loss": 0.0541, "step": 107000 }, { "epoch": 9.43, "learning_rate": 1.2178012234132107e-06, "loss": 0.0592, "step": 107200 }, { "epoch": 9.45, "learning_rate": 1.1803879753820829e-06, "loss": 0.0494, "step": 107400 }, { "epoch": 9.46, "learning_rate": 1.142974727350955e-06, "loss": 0.0548, "step": 107600 }, { "epoch": 9.48, "learning_rate": 1.1055614793198273e-06, "loss": 0.0439, "step": 107800 }, { "epoch": 9.5, "learning_rate": 1.0681482312886995e-06, "loss": 0.0543, "step": 108000 }, { "epoch": 9.52, "learning_rate": 1.0307349832575716e-06, "loss": 0.0604, "step": 108200 }, { "epoch": 9.53, "learning_rate": 9.933217352264437e-07, "loss": 0.0546, "step": 108400 }, { "epoch": 9.55, "learning_rate": 9.559084871953159e-07, "loss": 0.0576, "step": 108600 }, { "epoch": 9.57, "learning_rate": 9.184952391641881e-07, "loss": 0.0441, "step": 108800 }, { "epoch": 9.59, "learning_rate": 8.810819911330602e-07, "loss": 0.05, "step": 109000 }, { "epoch": 9.6, "learning_rate": 8.436687431019325e-07, "loss": 0.0523, "step": 109200 }, { "epoch": 9.62, "learning_rate": 8.062554950708046e-07, "loss": 0.053, "step": 109400 }, { "epoch": 9.64, "learning_rate": 7.688422470396768e-07, "loss": 0.0456, "step": 109600 }, { "epoch": 9.66, "learning_rate": 7.314289990085489e-07, "loss": 0.0508, "step": 109800 }, { "epoch": 9.67, "learning_rate": 6.940157509774212e-07, "loss": 0.0415, "step": 110000 } ], "max_steps": 113710, "num_train_epochs": 10, "total_flos": 2.6986684097812992e+17, "trial_name": null, "trial_params": null }