{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999715839665373, "eval_steps": 500, "global_step": 21994, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.977266527234701e-05, "loss": 2.6296, "step": 50 }, { "epoch": 0.0, "learning_rate": 9.954533054469402e-05, "loss": 2.6156, "step": 100 }, { "epoch": 0.01, "learning_rate": 9.931799581704102e-05, "loss": 2.6035, "step": 150 }, { "epoch": 0.01, "learning_rate": 9.909066108938801e-05, "loss": 2.5399, "step": 200 }, { "epoch": 0.01, "learning_rate": 9.886332636173502e-05, "loss": 2.5857, "step": 250 }, { "epoch": 0.01, "learning_rate": 9.863599163408202e-05, "loss": 2.6078, "step": 300 }, { "epoch": 0.02, "learning_rate": 9.840865690642903e-05, "loss": 2.5931, "step": 350 }, { "epoch": 0.02, "learning_rate": 9.818132217877604e-05, "loss": 2.5919, "step": 400 }, { "epoch": 0.02, "learning_rate": 9.795398745112304e-05, "loss": 2.59, "step": 450 }, { "epoch": 0.02, "learning_rate": 9.772665272347005e-05, "loss": 2.605, "step": 500 }, { "epoch": 0.03, "learning_rate": 9.749931799581704e-05, "loss": 2.6026, "step": 550 }, { "epoch": 0.03, "learning_rate": 9.727198326816404e-05, "loss": 2.5839, "step": 600 }, { "epoch": 0.03, "learning_rate": 9.704464854051105e-05, "loss": 2.5862, "step": 650 }, { "epoch": 0.03, "learning_rate": 9.681731381285806e-05, "loss": 2.609, "step": 700 }, { "epoch": 0.03, "learning_rate": 9.658997908520506e-05, "loss": 2.5759, "step": 750 }, { "epoch": 0.04, "learning_rate": 9.636264435755207e-05, "loss": 2.6046, "step": 800 }, { "epoch": 0.04, "learning_rate": 9.613530962989907e-05, "loss": 2.5811, "step": 850 }, { "epoch": 0.04, "learning_rate": 9.590797490224606e-05, "loss": 2.5797, "step": 900 }, { "epoch": 0.04, "learning_rate": 9.568064017459307e-05, "loss": 2.5867, "step": 950 }, { "epoch": 0.05, "learning_rate": 9.545330544694008e-05, "loss": 2.5927, "step": 1000 }, { "epoch": 0.05, "learning_rate": 9.522597071928708e-05, "loss": 2.568, "step": 1050 }, { "epoch": 0.05, "learning_rate": 9.499863599163409e-05, "loss": 2.6024, "step": 1100 }, { "epoch": 0.05, "learning_rate": 9.477130126398109e-05, "loss": 2.5936, "step": 1150 }, { "epoch": 0.05, "learning_rate": 9.45439665363281e-05, "loss": 2.605, "step": 1200 }, { "epoch": 0.06, "learning_rate": 9.431663180867509e-05, "loss": 2.5775, "step": 1250 }, { "epoch": 0.06, "learning_rate": 9.40892970810221e-05, "loss": 2.5752, "step": 1300 }, { "epoch": 0.06, "learning_rate": 9.38619623533691e-05, "loss": 2.5679, "step": 1350 }, { "epoch": 0.06, "learning_rate": 9.36346276257161e-05, "loss": 2.5856, "step": 1400 }, { "epoch": 0.07, "learning_rate": 9.340729289806311e-05, "loss": 2.5787, "step": 1450 }, { "epoch": 0.07, "learning_rate": 9.317995817041012e-05, "loss": 2.5875, "step": 1500 }, { "epoch": 0.07, "learning_rate": 9.295262344275712e-05, "loss": 2.5631, "step": 1550 }, { "epoch": 0.07, "learning_rate": 9.272528871510412e-05, "loss": 2.583, "step": 1600 }, { "epoch": 0.08, "learning_rate": 9.249795398745112e-05, "loss": 2.5609, "step": 1650 }, { "epoch": 0.08, "learning_rate": 9.227061925979813e-05, "loss": 2.587, "step": 1700 }, { "epoch": 0.08, "learning_rate": 9.204328453214513e-05, "loss": 2.5555, "step": 1750 }, { "epoch": 0.08, "learning_rate": 9.181594980449214e-05, "loss": 2.5488, "step": 1800 }, { "epoch": 0.08, "learning_rate": 9.158861507683914e-05, "loss": 2.5554, "step": 1850 }, { "epoch": 0.09, "learning_rate": 9.136128034918615e-05, "loss": 2.5408, "step": 1900 }, { "epoch": 0.09, "learning_rate": 9.113394562153314e-05, "loss": 2.582, "step": 1950 }, { "epoch": 0.09, "learning_rate": 9.090661089388015e-05, "loss": 2.5533, "step": 2000 }, { "epoch": 0.09, "learning_rate": 9.067927616622715e-05, "loss": 2.5432, "step": 2050 }, { "epoch": 0.1, "learning_rate": 9.045194143857416e-05, "loss": 2.5867, "step": 2100 }, { "epoch": 0.1, "learning_rate": 9.022460671092116e-05, "loss": 2.5343, "step": 2150 }, { "epoch": 0.1, "learning_rate": 8.999727198326817e-05, "loss": 2.585, "step": 2200 }, { "epoch": 0.1, "learning_rate": 8.976993725561517e-05, "loss": 2.5679, "step": 2250 }, { "epoch": 0.1, "learning_rate": 8.954260252796217e-05, "loss": 2.5515, "step": 2300 }, { "epoch": 0.11, "learning_rate": 8.931526780030917e-05, "loss": 2.5713, "step": 2350 }, { "epoch": 0.11, "learning_rate": 8.908793307265618e-05, "loss": 2.5587, "step": 2400 }, { "epoch": 0.11, "learning_rate": 8.886059834500318e-05, "loss": 2.5774, "step": 2450 }, { "epoch": 0.11, "learning_rate": 8.863326361735019e-05, "loss": 2.551, "step": 2500 }, { "epoch": 0.12, "learning_rate": 8.84059288896972e-05, "loss": 2.5685, "step": 2550 }, { "epoch": 0.12, "learning_rate": 8.81785941620442e-05, "loss": 2.5707, "step": 2600 }, { "epoch": 0.12, "learning_rate": 8.795125943439119e-05, "loss": 2.568, "step": 2650 }, { "epoch": 0.12, "learning_rate": 8.77239247067382e-05, "loss": 2.5536, "step": 2700 }, { "epoch": 0.13, "learning_rate": 8.74965899790852e-05, "loss": 2.5406, "step": 2750 }, { "epoch": 0.13, "learning_rate": 8.726925525143221e-05, "loss": 2.5572, "step": 2800 }, { "epoch": 0.13, "learning_rate": 8.704192052377921e-05, "loss": 2.5749, "step": 2850 }, { "epoch": 0.13, "learning_rate": 8.681458579612622e-05, "loss": 2.5607, "step": 2900 }, { "epoch": 0.13, "learning_rate": 8.658725106847322e-05, "loss": 2.5612, "step": 2950 }, { "epoch": 0.14, "learning_rate": 8.635991634082023e-05, "loss": 2.5626, "step": 3000 }, { "epoch": 0.14, "learning_rate": 8.613258161316724e-05, "loss": 2.5677, "step": 3050 }, { "epoch": 0.14, "learning_rate": 8.590524688551423e-05, "loss": 2.5072, "step": 3100 }, { "epoch": 0.14, "learning_rate": 8.567791215786123e-05, "loss": 2.562, "step": 3150 }, { "epoch": 0.15, "learning_rate": 8.545057743020824e-05, "loss": 2.5786, "step": 3200 }, { "epoch": 0.15, "learning_rate": 8.522324270255524e-05, "loss": 2.5388, "step": 3250 }, { "epoch": 0.15, "learning_rate": 8.499590797490225e-05, "loss": 2.538, "step": 3300 }, { "epoch": 0.15, "learning_rate": 8.476857324724926e-05, "loss": 2.5448, "step": 3350 }, { "epoch": 0.15, "learning_rate": 8.454123851959626e-05, "loss": 2.5306, "step": 3400 }, { "epoch": 0.16, "learning_rate": 8.431390379194327e-05, "loss": 2.5647, "step": 3450 }, { "epoch": 0.16, "learning_rate": 8.408656906429027e-05, "loss": 2.5386, "step": 3500 }, { "epoch": 0.16, "learning_rate": 8.385923433663728e-05, "loss": 2.5376, "step": 3550 }, { "epoch": 0.16, "learning_rate": 8.363189960898427e-05, "loss": 2.535, "step": 3600 }, { "epoch": 0.17, "learning_rate": 8.340456488133128e-05, "loss": 2.5476, "step": 3650 }, { "epoch": 0.17, "learning_rate": 8.317723015367828e-05, "loss": 2.5462, "step": 3700 }, { "epoch": 0.17, "learning_rate": 8.294989542602529e-05, "loss": 2.5795, "step": 3750 }, { "epoch": 0.17, "learning_rate": 8.272256069837229e-05, "loss": 2.5425, "step": 3800 }, { "epoch": 0.18, "learning_rate": 8.24952259707193e-05, "loss": 2.5663, "step": 3850 }, { "epoch": 0.18, "learning_rate": 8.22678912430663e-05, "loss": 2.5376, "step": 3900 }, { "epoch": 0.18, "learning_rate": 8.204055651541331e-05, "loss": 2.5695, "step": 3950 }, { "epoch": 0.18, "learning_rate": 8.18132217877603e-05, "loss": 2.5112, "step": 4000 }, { "epoch": 0.18, "learning_rate": 8.15858870601073e-05, "loss": 2.5255, "step": 4050 }, { "epoch": 0.19, "learning_rate": 8.135855233245431e-05, "loss": 2.5448, "step": 4100 }, { "epoch": 0.19, "learning_rate": 8.113121760480132e-05, "loss": 2.5483, "step": 4150 }, { "epoch": 0.19, "learning_rate": 8.090388287714832e-05, "loss": 2.5319, "step": 4200 }, { "epoch": 0.19, "learning_rate": 8.067654814949533e-05, "loss": 2.5655, "step": 4250 }, { "epoch": 0.2, "learning_rate": 8.044921342184233e-05, "loss": 2.5399, "step": 4300 }, { "epoch": 0.2, "learning_rate": 8.022187869418933e-05, "loss": 2.5485, "step": 4350 }, { "epoch": 0.2, "learning_rate": 7.999454396653633e-05, "loss": 2.5196, "step": 4400 }, { "epoch": 0.2, "learning_rate": 7.976720923888334e-05, "loss": 2.5554, "step": 4450 }, { "epoch": 0.2, "learning_rate": 7.953987451123034e-05, "loss": 2.583, "step": 4500 }, { "epoch": 0.21, "learning_rate": 7.931253978357735e-05, "loss": 2.5643, "step": 4550 }, { "epoch": 0.21, "learning_rate": 7.908520505592435e-05, "loss": 2.5345, "step": 4600 }, { "epoch": 0.21, "learning_rate": 7.885787032827136e-05, "loss": 2.5393, "step": 4650 }, { "epoch": 0.21, "learning_rate": 7.863053560061835e-05, "loss": 2.5349, "step": 4700 }, { "epoch": 0.22, "learning_rate": 7.840320087296536e-05, "loss": 2.54, "step": 4750 }, { "epoch": 0.22, "learning_rate": 7.817586614531236e-05, "loss": 2.5526, "step": 4800 }, { "epoch": 0.22, "learning_rate": 7.794853141765937e-05, "loss": 2.5419, "step": 4850 }, { "epoch": 0.22, "learning_rate": 7.772119669000637e-05, "loss": 2.5122, "step": 4900 }, { "epoch": 0.23, "learning_rate": 7.749386196235338e-05, "loss": 2.5247, "step": 4950 }, { "epoch": 0.23, "learning_rate": 7.726652723470039e-05, "loss": 2.5516, "step": 5000 }, { "epoch": 0.23, "learning_rate": 7.703919250704738e-05, "loss": 2.5321, "step": 5050 }, { "epoch": 0.23, "learning_rate": 7.681185777939438e-05, "loss": 2.5453, "step": 5100 }, { "epoch": 0.23, "learning_rate": 7.658452305174139e-05, "loss": 2.5453, "step": 5150 }, { "epoch": 0.24, "learning_rate": 7.63571883240884e-05, "loss": 2.5522, "step": 5200 }, { "epoch": 0.24, "learning_rate": 7.61298535964354e-05, "loss": 2.5417, "step": 5250 }, { "epoch": 0.24, "learning_rate": 7.59025188687824e-05, "loss": 2.5241, "step": 5300 }, { "epoch": 0.24, "learning_rate": 7.567518414112941e-05, "loss": 2.5574, "step": 5350 }, { "epoch": 0.25, "learning_rate": 7.54478494134764e-05, "loss": 2.5127, "step": 5400 }, { "epoch": 0.25, "learning_rate": 7.522051468582341e-05, "loss": 2.5346, "step": 5450 }, { "epoch": 0.25, "learning_rate": 7.499317995817041e-05, "loss": 2.5164, "step": 5500 }, { "epoch": 0.25, "learning_rate": 7.476584523051742e-05, "loss": 2.5571, "step": 5550 }, { "epoch": 0.25, "learning_rate": 7.453851050286442e-05, "loss": 2.5455, "step": 5600 }, { "epoch": 0.26, "learning_rate": 7.431117577521143e-05, "loss": 2.544, "step": 5650 }, { "epoch": 0.26, "learning_rate": 7.408384104755844e-05, "loss": 2.5271, "step": 5700 }, { "epoch": 0.26, "learning_rate": 7.385650631990543e-05, "loss": 2.525, "step": 5750 }, { "epoch": 0.26, "learning_rate": 7.362917159225243e-05, "loss": 2.5278, "step": 5800 }, { "epoch": 0.27, "learning_rate": 7.340183686459944e-05, "loss": 2.5161, "step": 5850 }, { "epoch": 0.27, "learning_rate": 7.317450213694644e-05, "loss": 2.5296, "step": 5900 }, { "epoch": 0.27, "learning_rate": 7.294716740929345e-05, "loss": 2.5454, "step": 5950 }, { "epoch": 0.27, "learning_rate": 7.271983268164046e-05, "loss": 2.5319, "step": 6000 }, { "epoch": 0.28, "learning_rate": 7.249249795398746e-05, "loss": 2.5282, "step": 6050 }, { "epoch": 0.28, "learning_rate": 7.226516322633445e-05, "loss": 2.5359, "step": 6100 }, { "epoch": 0.28, "learning_rate": 7.203782849868146e-05, "loss": 2.494, "step": 6150 }, { "epoch": 0.28, "learning_rate": 7.181049377102846e-05, "loss": 2.5289, "step": 6200 }, { "epoch": 0.28, "learning_rate": 7.158315904337547e-05, "loss": 2.4985, "step": 6250 }, { "epoch": 0.29, "learning_rate": 7.135582431572248e-05, "loss": 2.5156, "step": 6300 }, { "epoch": 0.29, "learning_rate": 7.112848958806948e-05, "loss": 2.53, "step": 6350 }, { "epoch": 0.29, "learning_rate": 7.090115486041649e-05, "loss": 2.5157, "step": 6400 }, { "epoch": 0.29, "learning_rate": 7.067382013276348e-05, "loss": 2.5303, "step": 6450 }, { "epoch": 0.3, "learning_rate": 7.044648540511048e-05, "loss": 2.5286, "step": 6500 }, { "epoch": 0.3, "learning_rate": 7.021915067745749e-05, "loss": 2.5039, "step": 6550 }, { "epoch": 0.3, "learning_rate": 6.99918159498045e-05, "loss": 2.5161, "step": 6600 }, { "epoch": 0.3, "learning_rate": 6.97644812221515e-05, "loss": 2.5105, "step": 6650 }, { "epoch": 0.3, "learning_rate": 6.95371464944985e-05, "loss": 2.5151, "step": 6700 }, { "epoch": 0.31, "learning_rate": 6.930981176684551e-05, "loss": 2.5425, "step": 6750 }, { "epoch": 0.31, "learning_rate": 6.90824770391925e-05, "loss": 2.5357, "step": 6800 }, { "epoch": 0.31, "learning_rate": 6.885514231153951e-05, "loss": 2.4989, "step": 6850 }, { "epoch": 0.31, "learning_rate": 6.862780758388652e-05, "loss": 2.5413, "step": 6900 }, { "epoch": 0.32, "learning_rate": 6.840047285623352e-05, "loss": 2.4909, "step": 6950 }, { "epoch": 0.32, "learning_rate": 6.817313812858053e-05, "loss": 2.5177, "step": 7000 }, { "epoch": 0.32, "learning_rate": 6.794580340092753e-05, "loss": 2.5107, "step": 7050 }, { "epoch": 0.32, "learning_rate": 6.771846867327454e-05, "loss": 2.5343, "step": 7100 }, { "epoch": 0.33, "learning_rate": 6.749113394562153e-05, "loss": 2.5247, "step": 7150 }, { "epoch": 0.33, "learning_rate": 6.726379921796854e-05, "loss": 2.5202, "step": 7200 }, { "epoch": 0.33, "learning_rate": 6.703646449031554e-05, "loss": 2.5156, "step": 7250 }, { "epoch": 0.33, "learning_rate": 6.680912976266255e-05, "loss": 2.5431, "step": 7300 }, { "epoch": 0.33, "learning_rate": 6.658179503500955e-05, "loss": 2.5221, "step": 7350 }, { "epoch": 0.34, "learning_rate": 6.635446030735656e-05, "loss": 2.516, "step": 7400 }, { "epoch": 0.34, "learning_rate": 6.612712557970356e-05, "loss": 2.5297, "step": 7450 }, { "epoch": 0.34, "learning_rate": 6.589979085205056e-05, "loss": 2.5052, "step": 7500 }, { "epoch": 0.34, "learning_rate": 6.567245612439756e-05, "loss": 2.4981, "step": 7550 }, { "epoch": 0.35, "learning_rate": 6.544512139674457e-05, "loss": 2.5292, "step": 7600 }, { "epoch": 0.35, "learning_rate": 6.521778666909157e-05, "loss": 2.4853, "step": 7650 }, { "epoch": 0.35, "learning_rate": 6.499045194143858e-05, "loss": 2.5181, "step": 7700 }, { "epoch": 0.35, "learning_rate": 6.476311721378558e-05, "loss": 2.5599, "step": 7750 }, { "epoch": 0.35, "learning_rate": 6.453578248613259e-05, "loss": 2.5093, "step": 7800 }, { "epoch": 0.36, "learning_rate": 6.430844775847958e-05, "loss": 2.5449, "step": 7850 }, { "epoch": 0.36, "learning_rate": 6.408111303082659e-05, "loss": 2.5013, "step": 7900 }, { "epoch": 0.36, "learning_rate": 6.385377830317359e-05, "loss": 2.5366, "step": 7950 }, { "epoch": 0.36, "learning_rate": 6.36264435755206e-05, "loss": 2.49, "step": 8000 }, { "epoch": 0.37, "learning_rate": 6.33991088478676e-05, "loss": 2.5496, "step": 8050 }, { "epoch": 0.37, "learning_rate": 6.317177412021461e-05, "loss": 2.5071, "step": 8100 }, { "epoch": 0.37, "learning_rate": 6.294443939256161e-05, "loss": 2.5374, "step": 8150 }, { "epoch": 0.37, "learning_rate": 6.27171046649086e-05, "loss": 2.5284, "step": 8200 }, { "epoch": 0.38, "learning_rate": 6.248976993725561e-05, "loss": 2.4792, "step": 8250 }, { "epoch": 0.38, "learning_rate": 6.226243520960262e-05, "loss": 2.5061, "step": 8300 }, { "epoch": 0.38, "learning_rate": 6.203510048194962e-05, "loss": 2.5183, "step": 8350 }, { "epoch": 0.38, "learning_rate": 6.180776575429663e-05, "loss": 2.4886, "step": 8400 }, { "epoch": 0.38, "learning_rate": 6.158043102664363e-05, "loss": 2.5116, "step": 8450 }, { "epoch": 0.39, "learning_rate": 6.135309629899064e-05, "loss": 2.5364, "step": 8500 }, { "epoch": 0.39, "learning_rate": 6.112576157133763e-05, "loss": 2.5205, "step": 8550 }, { "epoch": 0.39, "learning_rate": 6.0898426843684644e-05, "loss": 2.5125, "step": 8600 }, { "epoch": 0.39, "learning_rate": 6.067109211603165e-05, "loss": 2.5089, "step": 8650 }, { "epoch": 0.4, "learning_rate": 6.0443757388378655e-05, "loss": 2.5088, "step": 8700 }, { "epoch": 0.4, "learning_rate": 6.021642266072566e-05, "loss": 2.5264, "step": 8750 }, { "epoch": 0.4, "learning_rate": 5.9989087933072666e-05, "loss": 2.5045, "step": 8800 }, { "epoch": 0.4, "learning_rate": 5.976175320541967e-05, "loss": 2.5085, "step": 8850 }, { "epoch": 0.4, "learning_rate": 5.9534418477766663e-05, "loss": 2.4801, "step": 8900 }, { "epoch": 0.41, "learning_rate": 5.930708375011367e-05, "loss": 2.5017, "step": 8950 }, { "epoch": 0.41, "learning_rate": 5.9079749022460675e-05, "loss": 2.5109, "step": 9000 }, { "epoch": 0.41, "learning_rate": 5.885241429480768e-05, "loss": 2.5052, "step": 9050 }, { "epoch": 0.41, "learning_rate": 5.8625079567154686e-05, "loss": 2.5139, "step": 9100 }, { "epoch": 0.42, "learning_rate": 5.839774483950169e-05, "loss": 2.4941, "step": 9150 }, { "epoch": 0.42, "learning_rate": 5.817041011184868e-05, "loss": 2.5137, "step": 9200 }, { "epoch": 0.42, "learning_rate": 5.794307538419569e-05, "loss": 2.5101, "step": 9250 }, { "epoch": 0.42, "learning_rate": 5.7715740656542694e-05, "loss": 2.5009, "step": 9300 }, { "epoch": 0.43, "learning_rate": 5.74884059288897e-05, "loss": 2.5395, "step": 9350 }, { "epoch": 0.43, "learning_rate": 5.7261071201236706e-05, "loss": 2.5108, "step": 9400 }, { "epoch": 0.43, "learning_rate": 5.703373647358371e-05, "loss": 2.5238, "step": 9450 }, { "epoch": 0.43, "learning_rate": 5.680640174593072e-05, "loss": 2.5037, "step": 9500 }, { "epoch": 0.43, "learning_rate": 5.657906701827771e-05, "loss": 2.5038, "step": 9550 }, { "epoch": 0.44, "learning_rate": 5.6351732290624714e-05, "loss": 2.5324, "step": 9600 }, { "epoch": 0.44, "learning_rate": 5.612439756297172e-05, "loss": 2.5054, "step": 9650 }, { "epoch": 0.44, "learning_rate": 5.5897062835318725e-05, "loss": 2.5119, "step": 9700 }, { "epoch": 0.44, "learning_rate": 5.566972810766573e-05, "loss": 2.5214, "step": 9750 }, { "epoch": 0.45, "learning_rate": 5.5442393380012737e-05, "loss": 2.5404, "step": 9800 }, { "epoch": 0.45, "learning_rate": 5.521505865235974e-05, "loss": 2.516, "step": 9850 }, { "epoch": 0.45, "learning_rate": 5.4987723924706734e-05, "loss": 2.5166, "step": 9900 }, { "epoch": 0.45, "learning_rate": 5.476038919705374e-05, "loss": 2.4983, "step": 9950 }, { "epoch": 0.45, "learning_rate": 5.4533054469400745e-05, "loss": 2.5101, "step": 10000 }, { "epoch": 0.46, "learning_rate": 5.430571974174775e-05, "loss": 2.4998, "step": 10050 }, { "epoch": 0.46, "learning_rate": 5.4078385014094756e-05, "loss": 2.5116, "step": 10100 }, { "epoch": 0.46, "learning_rate": 5.385105028644176e-05, "loss": 2.5136, "step": 10150 }, { "epoch": 0.46, "learning_rate": 5.362371555878877e-05, "loss": 2.5313, "step": 10200 }, { "epoch": 0.47, "learning_rate": 5.339638083113576e-05, "loss": 2.4989, "step": 10250 }, { "epoch": 0.47, "learning_rate": 5.3169046103482765e-05, "loss": 2.5062, "step": 10300 }, { "epoch": 0.47, "learning_rate": 5.294171137582977e-05, "loss": 2.531, "step": 10350 }, { "epoch": 0.47, "learning_rate": 5.2714376648176776e-05, "loss": 2.4975, "step": 10400 }, { "epoch": 0.48, "learning_rate": 5.248704192052378e-05, "loss": 2.4922, "step": 10450 }, { "epoch": 0.48, "learning_rate": 5.225970719287079e-05, "loss": 2.5128, "step": 10500 }, { "epoch": 0.48, "learning_rate": 5.203237246521779e-05, "loss": 2.504, "step": 10550 }, { "epoch": 0.48, "learning_rate": 5.1805037737564785e-05, "loss": 2.5093, "step": 10600 }, { "epoch": 0.48, "learning_rate": 5.157770300991179e-05, "loss": 2.491, "step": 10650 }, { "epoch": 0.49, "learning_rate": 5.1350368282258796e-05, "loss": 2.5008, "step": 10700 }, { "epoch": 0.49, "learning_rate": 5.11230335546058e-05, "loss": 2.5103, "step": 10750 }, { "epoch": 0.49, "learning_rate": 5.089569882695281e-05, "loss": 2.5167, "step": 10800 }, { "epoch": 0.49, "learning_rate": 5.066836409929981e-05, "loss": 2.5062, "step": 10850 }, { "epoch": 0.5, "learning_rate": 5.044102937164682e-05, "loss": 2.5135, "step": 10900 }, { "epoch": 0.5, "learning_rate": 5.021369464399382e-05, "loss": 2.489, "step": 10950 }, { "epoch": 0.5, "learning_rate": 4.998635991634082e-05, "loss": 2.5071, "step": 11000 }, { "epoch": 0.5, "learning_rate": 4.975902518868782e-05, "loss": 2.5181, "step": 11050 }, { "epoch": 0.5, "learning_rate": 4.953169046103483e-05, "loss": 2.4997, "step": 11100 }, { "epoch": 0.51, "learning_rate": 4.930435573338183e-05, "loss": 2.5127, "step": 11150 }, { "epoch": 0.51, "learning_rate": 4.907702100572884e-05, "loss": 2.4906, "step": 11200 }, { "epoch": 0.51, "learning_rate": 4.884968627807584e-05, "loss": 2.5129, "step": 11250 }, { "epoch": 0.51, "learning_rate": 4.862235155042284e-05, "loss": 2.5015, "step": 11300 }, { "epoch": 0.52, "learning_rate": 4.839501682276985e-05, "loss": 2.5049, "step": 11350 }, { "epoch": 0.52, "learning_rate": 4.8167682095116854e-05, "loss": 2.4971, "step": 11400 }, { "epoch": 0.52, "learning_rate": 4.794034736746386e-05, "loss": 2.5177, "step": 11450 }, { "epoch": 0.52, "learning_rate": 4.771301263981086e-05, "loss": 2.5056, "step": 11500 }, { "epoch": 0.53, "learning_rate": 4.7485677912157864e-05, "loss": 2.4831, "step": 11550 }, { "epoch": 0.53, "learning_rate": 4.725834318450487e-05, "loss": 2.4972, "step": 11600 }, { "epoch": 0.53, "learning_rate": 4.7031008456851875e-05, "loss": 2.5103, "step": 11650 }, { "epoch": 0.53, "learning_rate": 4.680367372919888e-05, "loss": 2.5083, "step": 11700 }, { "epoch": 0.53, "learning_rate": 4.657633900154588e-05, "loss": 2.5027, "step": 11750 }, { "epoch": 0.54, "learning_rate": 4.6349004273892885e-05, "loss": 2.4846, "step": 11800 }, { "epoch": 0.54, "learning_rate": 4.612166954623989e-05, "loss": 2.5193, "step": 11850 }, { "epoch": 0.54, "learning_rate": 4.589433481858689e-05, "loss": 2.5123, "step": 11900 }, { "epoch": 0.54, "learning_rate": 4.5667000090933895e-05, "loss": 2.5219, "step": 11950 }, { "epoch": 0.55, "learning_rate": 4.54396653632809e-05, "loss": 2.4979, "step": 12000 }, { "epoch": 0.55, "learning_rate": 4.5212330635627906e-05, "loss": 2.4849, "step": 12050 }, { "epoch": 0.55, "learning_rate": 4.4984995907974905e-05, "loss": 2.4783, "step": 12100 }, { "epoch": 0.55, "learning_rate": 4.475766118032191e-05, "loss": 2.5035, "step": 12150 }, { "epoch": 0.55, "learning_rate": 4.4530326452668916e-05, "loss": 2.4879, "step": 12200 }, { "epoch": 0.56, "learning_rate": 4.4302991725015914e-05, "loss": 2.4972, "step": 12250 }, { "epoch": 0.56, "learning_rate": 4.407565699736292e-05, "loss": 2.5043, "step": 12300 }, { "epoch": 0.56, "learning_rate": 4.3848322269709926e-05, "loss": 2.491, "step": 12350 }, { "epoch": 0.56, "learning_rate": 4.362098754205693e-05, "loss": 2.5032, "step": 12400 }, { "epoch": 0.57, "learning_rate": 4.339365281440393e-05, "loss": 2.5227, "step": 12450 }, { "epoch": 0.57, "learning_rate": 4.3166318086750935e-05, "loss": 2.5245, "step": 12500 }, { "epoch": 0.57, "learning_rate": 4.293898335909794e-05, "loss": 2.4927, "step": 12550 }, { "epoch": 0.57, "learning_rate": 4.271164863144494e-05, "loss": 2.5002, "step": 12600 }, { "epoch": 0.58, "learning_rate": 4.2484313903791945e-05, "loss": 2.4997, "step": 12650 }, { "epoch": 0.58, "learning_rate": 4.225697917613895e-05, "loss": 2.4939, "step": 12700 }, { "epoch": 0.58, "learning_rate": 4.2029644448485957e-05, "loss": 2.5223, "step": 12750 }, { "epoch": 0.58, "learning_rate": 4.1802309720832955e-05, "loss": 2.4963, "step": 12800 }, { "epoch": 0.58, "learning_rate": 4.157497499317996e-05, "loss": 2.5334, "step": 12850 }, { "epoch": 0.59, "learning_rate": 4.1347640265526966e-05, "loss": 2.5085, "step": 12900 }, { "epoch": 0.59, "learning_rate": 4.1120305537873965e-05, "loss": 2.4901, "step": 12950 }, { "epoch": 0.59, "learning_rate": 4.089297081022097e-05, "loss": 2.5268, "step": 13000 }, { "epoch": 0.59, "learning_rate": 4.0665636082567976e-05, "loss": 2.5237, "step": 13050 }, { "epoch": 0.6, "learning_rate": 4.043830135491498e-05, "loss": 2.4928, "step": 13100 }, { "epoch": 0.6, "learning_rate": 4.021096662726198e-05, "loss": 2.4852, "step": 13150 }, { "epoch": 0.6, "learning_rate": 3.9983631899608986e-05, "loss": 2.5139, "step": 13200 }, { "epoch": 0.6, "learning_rate": 3.975629717195599e-05, "loss": 2.5336, "step": 13250 }, { "epoch": 0.6, "learning_rate": 3.952896244430299e-05, "loss": 2.527, "step": 13300 }, { "epoch": 0.61, "learning_rate": 3.9301627716649996e-05, "loss": 2.4877, "step": 13350 }, { "epoch": 0.61, "learning_rate": 3.9074292988997e-05, "loss": 2.4992, "step": 13400 }, { "epoch": 0.61, "learning_rate": 3.884695826134401e-05, "loss": 2.4909, "step": 13450 }, { "epoch": 0.61, "learning_rate": 3.8619623533691006e-05, "loss": 2.4983, "step": 13500 }, { "epoch": 0.62, "learning_rate": 3.839228880603801e-05, "loss": 2.5146, "step": 13550 }, { "epoch": 0.62, "learning_rate": 3.816495407838502e-05, "loss": 2.5058, "step": 13600 }, { "epoch": 0.62, "learning_rate": 3.7937619350732016e-05, "loss": 2.4943, "step": 13650 }, { "epoch": 0.62, "learning_rate": 3.771028462307902e-05, "loss": 2.5002, "step": 13700 }, { "epoch": 0.63, "learning_rate": 3.748294989542603e-05, "loss": 2.4918, "step": 13750 }, { "epoch": 0.63, "learning_rate": 3.7255615167773026e-05, "loss": 2.4915, "step": 13800 }, { "epoch": 0.63, "learning_rate": 3.702828044012003e-05, "loss": 2.5089, "step": 13850 }, { "epoch": 0.63, "learning_rate": 3.680094571246704e-05, "loss": 2.5048, "step": 13900 }, { "epoch": 0.63, "learning_rate": 3.657361098481404e-05, "loss": 2.5108, "step": 13950 }, { "epoch": 0.64, "learning_rate": 3.634627625716104e-05, "loss": 2.4959, "step": 14000 }, { "epoch": 0.64, "learning_rate": 3.611894152950805e-05, "loss": 2.5154, "step": 14050 }, { "epoch": 0.64, "learning_rate": 3.589160680185505e-05, "loss": 2.5092, "step": 14100 }, { "epoch": 0.64, "learning_rate": 3.566427207420205e-05, "loss": 2.5265, "step": 14150 }, { "epoch": 0.65, "learning_rate": 3.543693734654906e-05, "loss": 2.4678, "step": 14200 }, { "epoch": 0.65, "learning_rate": 3.520960261889606e-05, "loss": 2.5236, "step": 14250 }, { "epoch": 0.65, "learning_rate": 3.498226789124307e-05, "loss": 2.5156, "step": 14300 }, { "epoch": 0.65, "learning_rate": 3.475493316359007e-05, "loss": 2.508, "step": 14350 }, { "epoch": 0.65, "learning_rate": 3.452759843593707e-05, "loss": 2.4949, "step": 14400 }, { "epoch": 0.66, "learning_rate": 3.430026370828408e-05, "loss": 2.4898, "step": 14450 }, { "epoch": 0.66, "learning_rate": 3.4072928980631084e-05, "loss": 2.5006, "step": 14500 }, { "epoch": 0.66, "learning_rate": 3.384559425297808e-05, "loss": 2.4878, "step": 14550 }, { "epoch": 0.66, "learning_rate": 3.361825952532509e-05, "loss": 2.5073, "step": 14600 }, { "epoch": 0.67, "learning_rate": 3.3390924797672094e-05, "loss": 2.5176, "step": 14650 }, { "epoch": 0.67, "learning_rate": 3.31635900700191e-05, "loss": 2.5078, "step": 14700 }, { "epoch": 0.67, "learning_rate": 3.2936255342366105e-05, "loss": 2.5101, "step": 14750 }, { "epoch": 0.67, "learning_rate": 3.2708920614713103e-05, "loss": 2.5076, "step": 14800 }, { "epoch": 0.68, "learning_rate": 3.248158588706011e-05, "loss": 2.4916, "step": 14850 }, { "epoch": 0.68, "learning_rate": 3.2254251159407115e-05, "loss": 2.4919, "step": 14900 }, { "epoch": 0.68, "learning_rate": 3.202691643175412e-05, "loss": 2.5042, "step": 14950 }, { "epoch": 0.68, "learning_rate": 3.1799581704101126e-05, "loss": 2.5191, "step": 15000 }, { "epoch": 0.68, "learning_rate": 3.1572246976448124e-05, "loss": 2.5034, "step": 15050 }, { "epoch": 0.69, "learning_rate": 3.134491224879513e-05, "loss": 2.4878, "step": 15100 }, { "epoch": 0.69, "learning_rate": 3.1117577521142136e-05, "loss": 2.5072, "step": 15150 }, { "epoch": 0.69, "learning_rate": 3.0890242793489134e-05, "loss": 2.506, "step": 15200 }, { "epoch": 0.69, "learning_rate": 3.066290806583614e-05, "loss": 2.4885, "step": 15250 }, { "epoch": 0.7, "learning_rate": 3.0435573338183142e-05, "loss": 2.488, "step": 15300 }, { "epoch": 0.7, "learning_rate": 3.0208238610530148e-05, "loss": 2.4939, "step": 15350 }, { "epoch": 0.7, "learning_rate": 2.998090388287715e-05, "loss": 2.5397, "step": 15400 }, { "epoch": 0.7, "learning_rate": 2.9753569155224152e-05, "loss": 2.5131, "step": 15450 }, { "epoch": 0.7, "learning_rate": 2.9526234427571158e-05, "loss": 2.5287, "step": 15500 }, { "epoch": 0.71, "learning_rate": 2.929889969991816e-05, "loss": 2.4852, "step": 15550 }, { "epoch": 0.71, "learning_rate": 2.9071564972265165e-05, "loss": 2.4941, "step": 15600 }, { "epoch": 0.71, "learning_rate": 2.884423024461217e-05, "loss": 2.508, "step": 15650 }, { "epoch": 0.71, "learning_rate": 2.8616895516959173e-05, "loss": 2.5011, "step": 15700 }, { "epoch": 0.72, "learning_rate": 2.8389560789306175e-05, "loss": 2.5029, "step": 15750 }, { "epoch": 0.72, "learning_rate": 2.816222606165318e-05, "loss": 2.4956, "step": 15800 }, { "epoch": 0.72, "learning_rate": 2.7934891334000186e-05, "loss": 2.4998, "step": 15850 }, { "epoch": 0.72, "learning_rate": 2.7707556606347185e-05, "loss": 2.4954, "step": 15900 }, { "epoch": 0.73, "learning_rate": 2.748022187869419e-05, "loss": 2.5171, "step": 15950 }, { "epoch": 0.73, "learning_rate": 2.7252887151041196e-05, "loss": 2.476, "step": 16000 }, { "epoch": 0.73, "learning_rate": 2.7025552423388202e-05, "loss": 2.506, "step": 16050 }, { "epoch": 0.73, "learning_rate": 2.67982176957352e-05, "loss": 2.5201, "step": 16100 }, { "epoch": 0.73, "learning_rate": 2.6570882968082206e-05, "loss": 2.5205, "step": 16150 }, { "epoch": 0.74, "learning_rate": 2.6343548240429212e-05, "loss": 2.4971, "step": 16200 }, { "epoch": 0.74, "learning_rate": 2.611621351277621e-05, "loss": 2.5135, "step": 16250 }, { "epoch": 0.74, "learning_rate": 2.5888878785123216e-05, "loss": 2.4894, "step": 16300 }, { "epoch": 0.74, "learning_rate": 2.5661544057470222e-05, "loss": 2.5127, "step": 16350 }, { "epoch": 0.75, "learning_rate": 2.5434209329817227e-05, "loss": 2.4999, "step": 16400 }, { "epoch": 0.75, "learning_rate": 2.5206874602164226e-05, "loss": 2.5048, "step": 16450 }, { "epoch": 0.75, "learning_rate": 2.4979539874511232e-05, "loss": 2.5208, "step": 16500 }, { "epoch": 0.75, "learning_rate": 2.4752205146858234e-05, "loss": 2.5155, "step": 16550 }, { "epoch": 0.75, "learning_rate": 2.452487041920524e-05, "loss": 2.5196, "step": 16600 }, { "epoch": 0.76, "learning_rate": 2.429753569155224e-05, "loss": 2.5205, "step": 16650 }, { "epoch": 0.76, "learning_rate": 2.4070200963899247e-05, "loss": 2.5083, "step": 16700 }, { "epoch": 0.76, "learning_rate": 2.384286623624625e-05, "loss": 2.506, "step": 16750 }, { "epoch": 0.76, "learning_rate": 2.361553150859325e-05, "loss": 2.5251, "step": 16800 }, { "epoch": 0.77, "learning_rate": 2.3388196780940257e-05, "loss": 2.5124, "step": 16850 }, { "epoch": 0.77, "learning_rate": 2.316086205328726e-05, "loss": 2.4869, "step": 16900 }, { "epoch": 0.77, "learning_rate": 2.2933527325634265e-05, "loss": 2.5066, "step": 16950 }, { "epoch": 0.77, "learning_rate": 2.2706192597981267e-05, "loss": 2.4888, "step": 17000 }, { "epoch": 0.78, "learning_rate": 2.2478857870328273e-05, "loss": 2.5086, "step": 17050 }, { "epoch": 0.78, "learning_rate": 2.2251523142675275e-05, "loss": 2.5449, "step": 17100 }, { "epoch": 0.78, "learning_rate": 2.202418841502228e-05, "loss": 2.5186, "step": 17150 }, { "epoch": 0.78, "learning_rate": 2.1796853687369283e-05, "loss": 2.482, "step": 17200 }, { "epoch": 0.78, "learning_rate": 2.1569518959716288e-05, "loss": 2.4895, "step": 17250 }, { "epoch": 0.79, "learning_rate": 2.1342184232063294e-05, "loss": 2.4988, "step": 17300 }, { "epoch": 0.79, "learning_rate": 2.1114849504410296e-05, "loss": 2.5107, "step": 17350 }, { "epoch": 0.79, "learning_rate": 2.0887514776757298e-05, "loss": 2.5257, "step": 17400 }, { "epoch": 0.79, "learning_rate": 2.0660180049104304e-05, "loss": 2.5139, "step": 17450 }, { "epoch": 0.8, "learning_rate": 2.0432845321451306e-05, "loss": 2.5364, "step": 17500 }, { "epoch": 0.8, "learning_rate": 2.020551059379831e-05, "loss": 2.5242, "step": 17550 }, { "epoch": 0.8, "learning_rate": 1.9978175866145313e-05, "loss": 2.482, "step": 17600 }, { "epoch": 0.8, "learning_rate": 1.975084113849232e-05, "loss": 2.4981, "step": 17650 }, { "epoch": 0.8, "learning_rate": 1.952350641083932e-05, "loss": 2.5049, "step": 17700 }, { "epoch": 0.81, "learning_rate": 1.9296171683186323e-05, "loss": 2.5089, "step": 17750 }, { "epoch": 0.81, "learning_rate": 1.906883695553333e-05, "loss": 2.4937, "step": 17800 }, { "epoch": 0.81, "learning_rate": 1.884150222788033e-05, "loss": 2.4983, "step": 17850 }, { "epoch": 0.81, "learning_rate": 1.8614167500227337e-05, "loss": 2.5152, "step": 17900 }, { "epoch": 0.82, "learning_rate": 1.838683277257434e-05, "loss": 2.5198, "step": 17950 }, { "epoch": 0.82, "learning_rate": 1.8159498044921344e-05, "loss": 2.5108, "step": 18000 }, { "epoch": 0.82, "learning_rate": 1.7932163317268347e-05, "loss": 2.5362, "step": 18050 }, { "epoch": 0.82, "learning_rate": 1.770482858961535e-05, "loss": 2.5186, "step": 18100 }, { "epoch": 0.83, "learning_rate": 1.7477493861962354e-05, "loss": 2.5331, "step": 18150 }, { "epoch": 0.83, "learning_rate": 1.7250159134309357e-05, "loss": 2.506, "step": 18200 }, { "epoch": 0.83, "learning_rate": 1.7022824406656362e-05, "loss": 2.4932, "step": 18250 }, { "epoch": 0.83, "learning_rate": 1.6795489679003364e-05, "loss": 2.4975, "step": 18300 }, { "epoch": 0.83, "learning_rate": 1.656815495135037e-05, "loss": 2.4996, "step": 18350 }, { "epoch": 0.84, "learning_rate": 1.6340820223697372e-05, "loss": 2.4987, "step": 18400 }, { "epoch": 0.84, "learning_rate": 1.6113485496044374e-05, "loss": 2.5013, "step": 18450 }, { "epoch": 0.84, "learning_rate": 1.588615076839138e-05, "loss": 2.4971, "step": 18500 }, { "epoch": 0.84, "learning_rate": 1.5658816040738382e-05, "loss": 2.5349, "step": 18550 }, { "epoch": 0.85, "learning_rate": 1.5431481313085388e-05, "loss": 2.5176, "step": 18600 }, { "epoch": 0.85, "learning_rate": 1.5204146585432391e-05, "loss": 2.4829, "step": 18650 }, { "epoch": 0.85, "learning_rate": 1.4976811857779397e-05, "loss": 2.5258, "step": 18700 }, { "epoch": 0.85, "learning_rate": 1.47494771301264e-05, "loss": 2.5232, "step": 18750 }, { "epoch": 0.85, "learning_rate": 1.4522142402473401e-05, "loss": 2.5032, "step": 18800 }, { "epoch": 0.86, "learning_rate": 1.4294807674820407e-05, "loss": 2.5197, "step": 18850 }, { "epoch": 0.86, "learning_rate": 1.4067472947167409e-05, "loss": 2.5035, "step": 18900 }, { "epoch": 0.86, "learning_rate": 1.3840138219514415e-05, "loss": 2.5016, "step": 18950 }, { "epoch": 0.86, "learning_rate": 1.3612803491861417e-05, "loss": 2.5291, "step": 19000 }, { "epoch": 0.87, "learning_rate": 1.3385468764208419e-05, "loss": 2.5092, "step": 19050 }, { "epoch": 0.87, "learning_rate": 1.3158134036555425e-05, "loss": 2.4956, "step": 19100 }, { "epoch": 0.87, "learning_rate": 1.2930799308902428e-05, "loss": 2.4947, "step": 19150 }, { "epoch": 0.87, "learning_rate": 1.2703464581249432e-05, "loss": 2.519, "step": 19200 }, { "epoch": 0.88, "learning_rate": 1.2476129853596436e-05, "loss": 2.5452, "step": 19250 }, { "epoch": 0.88, "learning_rate": 1.224879512594344e-05, "loss": 2.5248, "step": 19300 }, { "epoch": 0.88, "learning_rate": 1.2021460398290444e-05, "loss": 2.5179, "step": 19350 }, { "epoch": 0.88, "learning_rate": 1.1794125670637448e-05, "loss": 2.5047, "step": 19400 }, { "epoch": 0.88, "learning_rate": 1.1566790942984452e-05, "loss": 2.5084, "step": 19450 }, { "epoch": 0.89, "learning_rate": 1.1339456215331456e-05, "loss": 2.5044, "step": 19500 }, { "epoch": 0.89, "learning_rate": 1.1112121487678458e-05, "loss": 2.4969, "step": 19550 }, { "epoch": 0.89, "learning_rate": 1.0884786760025462e-05, "loss": 2.5069, "step": 19600 }, { "epoch": 0.89, "learning_rate": 1.0657452032372465e-05, "loss": 2.4756, "step": 19650 }, { "epoch": 0.9, "learning_rate": 1.043011730471947e-05, "loss": 2.489, "step": 19700 }, { "epoch": 0.9, "learning_rate": 1.0202782577066473e-05, "loss": 2.4985, "step": 19750 }, { "epoch": 0.9, "learning_rate": 9.975447849413477e-06, "loss": 2.5375, "step": 19800 }, { "epoch": 0.9, "learning_rate": 9.748113121760481e-06, "loss": 2.4924, "step": 19850 }, { "epoch": 0.9, "learning_rate": 9.520778394107483e-06, "loss": 2.4879, "step": 19900 }, { "epoch": 0.91, "learning_rate": 9.293443666454487e-06, "loss": 2.5486, "step": 19950 }, { "epoch": 0.91, "learning_rate": 9.066108938801491e-06, "loss": 2.5187, "step": 20000 }, { "epoch": 0.91, "learning_rate": 8.838774211148495e-06, "loss": 2.5102, "step": 20050 }, { "epoch": 0.91, "learning_rate": 8.6114394834955e-06, "loss": 2.4975, "step": 20100 }, { "epoch": 0.92, "learning_rate": 8.384104755842504e-06, "loss": 2.5036, "step": 20150 }, { "epoch": 0.92, "learning_rate": 8.156770028189506e-06, "loss": 2.5323, "step": 20200 }, { "epoch": 0.92, "learning_rate": 7.92943530053651e-06, "loss": 2.5228, "step": 20250 }, { "epoch": 0.92, "learning_rate": 7.702100572883514e-06, "loss": 2.5411, "step": 20300 }, { "epoch": 0.93, "learning_rate": 7.474765845230518e-06, "loss": 2.4923, "step": 20350 }, { "epoch": 0.93, "learning_rate": 7.247431117577522e-06, "loss": 2.5178, "step": 20400 }, { "epoch": 0.93, "learning_rate": 7.020096389924526e-06, "loss": 2.5121, "step": 20450 }, { "epoch": 0.93, "learning_rate": 6.79276166227153e-06, "loss": 2.5414, "step": 20500 }, { "epoch": 0.93, "learning_rate": 6.565426934618532e-06, "loss": 2.5222, "step": 20550 }, { "epoch": 0.94, "learning_rate": 6.338092206965536e-06, "loss": 2.5261, "step": 20600 }, { "epoch": 0.94, "learning_rate": 6.11075747931254e-06, "loss": 2.5274, "step": 20650 }, { "epoch": 0.94, "learning_rate": 5.883422751659544e-06, "loss": 2.4965, "step": 20700 }, { "epoch": 0.94, "learning_rate": 5.656088024006548e-06, "loss": 2.5141, "step": 20750 }, { "epoch": 0.95, "learning_rate": 5.428753296353551e-06, "loss": 2.5101, "step": 20800 }, { "epoch": 0.95, "learning_rate": 5.201418568700555e-06, "loss": 2.5011, "step": 20850 }, { "epoch": 0.95, "learning_rate": 4.974083841047559e-06, "loss": 2.5091, "step": 20900 }, { "epoch": 0.95, "learning_rate": 4.746749113394562e-06, "loss": 2.5237, "step": 20950 }, { "epoch": 0.95, "learning_rate": 4.519414385741566e-06, "loss": 2.4949, "step": 21000 }, { "epoch": 0.96, "learning_rate": 4.29207965808857e-06, "loss": 2.503, "step": 21050 }, { "epoch": 0.96, "learning_rate": 4.0647449304355735e-06, "loss": 2.5068, "step": 21100 }, { "epoch": 0.96, "learning_rate": 3.837410202782577e-06, "loss": 2.4922, "step": 21150 }, { "epoch": 0.96, "learning_rate": 3.6100754751295813e-06, "loss": 2.5199, "step": 21200 }, { "epoch": 0.97, "learning_rate": 3.382740747476585e-06, "loss": 2.5058, "step": 21250 }, { "epoch": 0.97, "learning_rate": 3.155406019823588e-06, "loss": 2.5294, "step": 21300 }, { "epoch": 0.97, "learning_rate": 2.928071292170592e-06, "loss": 2.4969, "step": 21350 }, { "epoch": 0.97, "learning_rate": 2.700736564517596e-06, "loss": 2.5419, "step": 21400 }, { "epoch": 0.98, "learning_rate": 2.4734018368645998e-06, "loss": 2.5299, "step": 21450 }, { "epoch": 0.98, "learning_rate": 2.2460671092116032e-06, "loss": 2.5275, "step": 21500 }, { "epoch": 0.98, "learning_rate": 2.0187323815586067e-06, "loss": 2.4891, "step": 21550 }, { "epoch": 0.98, "learning_rate": 1.7913976539056108e-06, "loss": 2.5108, "step": 21600 }, { "epoch": 0.98, "learning_rate": 1.5640629262526144e-06, "loss": 2.5246, "step": 21650 }, { "epoch": 0.99, "learning_rate": 1.336728198599618e-06, "loss": 2.5304, "step": 21700 }, { "epoch": 0.99, "learning_rate": 1.109393470946622e-06, "loss": 2.5159, "step": 21750 }, { "epoch": 0.99, "learning_rate": 8.820587432936256e-07, "loss": 2.5071, "step": 21800 }, { "epoch": 0.99, "learning_rate": 6.547240156406293e-07, "loss": 2.5091, "step": 21850 }, { "epoch": 1.0, "learning_rate": 4.2738928798763303e-07, "loss": 2.5386, "step": 21900 }, { "epoch": 1.0, "learning_rate": 2.0005456033463672e-07, "loss": 2.5228, "step": 21950 }, { "epoch": 1.0, "step": 21994, "total_flos": 3.604860407937761e+17, "train_loss": 2.5232514588412474, "train_runtime": 5052.6511, "train_samples_per_second": 34.825, "train_steps_per_second": 4.353 } ], "logging_steps": 50, "max_steps": 21994, "num_train_epochs": 1, "save_steps": 2500, "total_flos": 3.604860407937761e+17, "trial_name": null, "trial_params": null }