| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.999856278819091, |
| "global_step": 83492, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.9826330666411153e-06, |
| "loss": 1.4668, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.9652661332822306e-06, |
| "loss": 1.4671, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 2.9478991999233463e-06, |
| "loss": 1.467, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2.930532266564461e-06, |
| "loss": 1.4669, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2.913165333205577e-06, |
| "loss": 1.4671, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 2.8957983998466917e-06, |
| "loss": 1.4675, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.8784314664878074e-06, |
| "loss": 1.467, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 2.8610645331289227e-06, |
| "loss": 1.4663, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 2.843697599770038e-06, |
| "loss": 1.4664, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 2.826330666411153e-06, |
| "loss": 1.4667, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 2.8089637330522685e-06, |
| "loss": 1.467, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.791596799693384e-06, |
| "loss": 1.4669, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2.774229866334499e-06, |
| "loss": 1.4668, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.7568629329756147e-06, |
| "loss": 1.4663, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2.7394959996167295e-06, |
| "loss": 1.4668, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.7221290662578452e-06, |
| "loss": 1.4665, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 2.7047621328989605e-06, |
| "loss": 1.4665, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 2.6873951995400758e-06, |
| "loss": 1.4663, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 2.670028266181191e-06, |
| "loss": 1.4663, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 2.6526613328223063e-06, |
| "loss": 1.4663, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 2.635294399463422e-06, |
| "loss": 1.4658, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 2.617927466104537e-06, |
| "loss": 1.4657, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 2.6005605327456525e-06, |
| "loss": 1.4657, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 2.5831935993867674e-06, |
| "loss": 1.4659, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 2.565826666027883e-06, |
| "loss": 1.4661, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 2.5484597326689983e-06, |
| "loss": 1.4657, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.5310927993101136e-06, |
| "loss": 1.4661, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.513725865951229e-06, |
| "loss": 1.4654, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.496358932592344e-06, |
| "loss": 1.466, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.4789919992334594e-06, |
| "loss": 1.4661, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.4616250658745747e-06, |
| "loss": 1.4655, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.4442581325156904e-06, |
| "loss": 1.4654, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.4268911991568052e-06, |
| "loss": 1.4654, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.409524265797921e-06, |
| "loss": 1.4647, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.392157332439036e-06, |
| "loss": 1.4651, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 2.3747903990801515e-06, |
| "loss": 1.4656, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2.3574234657212667e-06, |
| "loss": 1.4652, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.340056532362382e-06, |
| "loss": 1.4649, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.3226895990034977e-06, |
| "loss": 1.4654, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 2.3053226656446125e-06, |
| "loss": 1.4649, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.2879557322857282e-06, |
| "loss": 1.4648, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.4683704376220703, |
| "eval_runtime": 319.8553, |
| "eval_samples_per_second": 93.792, |
| "eval_steps_per_second": 1.466, |
| "step": 20873 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 2.270588798926843e-06, |
| "loss": 1.4643, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 2.2532218655679588e-06, |
| "loss": 1.4633, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 2.235854932209074e-06, |
| "loss": 1.4635, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 2.2184879988501893e-06, |
| "loss": 1.4633, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 2.2011210654913046e-06, |
| "loss": 1.4634, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 2.18375413213242e-06, |
| "loss": 1.4632, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 2.166387198773535e-06, |
| "loss": 1.4632, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 2.1490202654146504e-06, |
| "loss": 1.4632, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 2.131653332055766e-06, |
| "loss": 1.4634, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.114286398696881e-06, |
| "loss": 1.463, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.0969194653379966e-06, |
| "loss": 1.4635, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 2.079552531979112e-06, |
| "loss": 1.4636, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2.062185598620227e-06, |
| "loss": 1.4629, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2.0448186652613424e-06, |
| "loss": 1.463, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2.0274517319024577e-06, |
| "loss": 1.4627, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 2.0100847985435734e-06, |
| "loss": 1.4635, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.9927178651846882e-06, |
| "loss": 1.463, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.9753509318258035e-06, |
| "loss": 1.4631, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.957983998466919e-06, |
| "loss": 1.463, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.9406170651080345e-06, |
| "loss": 1.4632, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.9232501317491497e-06, |
| "loss": 1.4626, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.9058831983902652e-06, |
| "loss": 1.463, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.8885162650313807e-06, |
| "loss": 1.4632, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 1.8711493316724955e-06, |
| "loss": 1.4629, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.853782398313611e-06, |
| "loss": 1.4628, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 1.836415464954726e-06, |
| "loss": 1.463, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 1.8190485315958416e-06, |
| "loss": 1.4627, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 1.801681598236957e-06, |
| "loss": 1.4626, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1.7843146648780719e-06, |
| "loss": 1.4631, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 1.766947731519188e-06, |
| "loss": 1.4628, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 1.7495807981603028e-06, |
| "loss": 1.4625, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 1.7322138648014181e-06, |
| "loss": 1.4627, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.7148469314425332e-06, |
| "loss": 1.4628, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.697479998083649e-06, |
| "loss": 1.4623, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.680113064724764e-06, |
| "loss": 1.4626, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.662746131365879e-06, |
| "loss": 1.4621, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.6453791980069949e-06, |
| "loss": 1.4626, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.62801226464811e-06, |
| "loss": 1.4623, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.6106453312892254e-06, |
| "loss": 1.4622, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.5932783979303407e-06, |
| "loss": 1.4624, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.5759114645714566e-06, |
| "loss": 1.4626, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.5585445312125714e-06, |
| "loss": 1.4619, |
| "step": 41500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.467372179031372, |
| "eval_runtime": 318.5344, |
| "eval_samples_per_second": 94.181, |
| "eval_steps_per_second": 1.472, |
| "step": 41747 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.5411775978536865e-06, |
| "loss": 1.4618, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.5238106644948016e-06, |
| "loss": 1.4609, |
| "step": 42500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.5064437311359175e-06, |
| "loss": 1.461, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.4890767977770327e-06, |
| "loss": 1.4613, |
| "step": 43500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.471709864418148e-06, |
| "loss": 1.4607, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.4543429310592633e-06, |
| "loss": 1.4611, |
| "step": 44500 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.4369759977003785e-06, |
| "loss": 1.4614, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.4196090643414938e-06, |
| "loss": 1.4608, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.402242130982609e-06, |
| "loss": 1.4609, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.3848751976237243e-06, |
| "loss": 1.4618, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.3675082642648396e-06, |
| "loss": 1.4609, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.3501413309059553e-06, |
| "loss": 1.4606, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.3327743975470706e-06, |
| "loss": 1.461, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.3154074641881858e-06, |
| "loss": 1.4609, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.2980405308293011e-06, |
| "loss": 1.4611, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.2806735974704164e-06, |
| "loss": 1.4608, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.2633066641115317e-06, |
| "loss": 1.4604, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.245939730752647e-06, |
| "loss": 1.4608, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.2285727973937622e-06, |
| "loss": 1.4607, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.2112058640348775e-06, |
| "loss": 1.4606, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.1938389306759932e-06, |
| "loss": 1.4606, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.1764719973171084e-06, |
| "loss": 1.4607, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.1591050639582237e-06, |
| "loss": 1.4606, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.141738130599339e-06, |
| "loss": 1.4609, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 1.1243711972404542e-06, |
| "loss": 1.4609, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 1.1070042638815695e-06, |
| "loss": 1.4604, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 1.0896373305226848e-06, |
| "loss": 1.4608, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 1.0722703971638e-06, |
| "loss": 1.4604, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 1.0549034638049153e-06, |
| "loss": 1.4607, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 1.037536530446031e-06, |
| "loss": 1.4607, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 1.020169597087146e-06, |
| "loss": 1.4609, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 1.0028026637282615e-06, |
| "loss": 1.461, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 9.854357303693768e-07, |
| "loss": 1.4609, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 9.68068797010492e-07, |
| "loss": 1.4608, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 9.507018636516072e-07, |
| "loss": 1.4603, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 9.333349302927227e-07, |
| "loss": 1.4606, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 9.159679969338379e-07, |
| "loss": 1.4605, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 8.986010635749534e-07, |
| "loss": 1.4603, |
| "step": 60500 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 8.81234130216069e-07, |
| "loss": 1.4606, |
| "step": 61000 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.638671968571839e-07, |
| "loss": 1.4611, |
| "step": 61500 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 8.465002634982994e-07, |
| "loss": 1.4603, |
| "step": 62000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 8.291333301394146e-07, |
| "loss": 1.4606, |
| "step": 62500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 1.4666800498962402, |
| "eval_runtime": 320.1083, |
| "eval_samples_per_second": 93.718, |
| "eval_steps_per_second": 1.465, |
| "step": 62621 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 8.1176639678053e-07, |
| "loss": 1.4591, |
| "step": 63000 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 7.943994634216451e-07, |
| "loss": 1.46, |
| "step": 63500 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 7.770325300627606e-07, |
| "loss": 1.4594, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 7.596655967038757e-07, |
| "loss": 1.4603, |
| "step": 64500 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 7.422986633449912e-07, |
| "loss": 1.4599, |
| "step": 65000 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 7.249317299861067e-07, |
| "loss": 1.4596, |
| "step": 65500 |
| }, |
| { |
| "epoch": 3.16, |
| "learning_rate": 7.075647966272218e-07, |
| "loss": 1.4593, |
| "step": 66000 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 6.901978632683372e-07, |
| "loss": 1.4594, |
| "step": 66500 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 6.728309299094524e-07, |
| "loss": 1.4595, |
| "step": 67000 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 6.554639965505679e-07, |
| "loss": 1.4602, |
| "step": 67500 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 6.380970631916829e-07, |
| "loss": 1.4592, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 6.207301298327984e-07, |
| "loss": 1.4592, |
| "step": 68500 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 6.033631964739136e-07, |
| "loss": 1.46, |
| "step": 69000 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 5.859962631150291e-07, |
| "loss": 1.4596, |
| "step": 69500 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 5.686293297561441e-07, |
| "loss": 1.4597, |
| "step": 70000 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 5.512623963972596e-07, |
| "loss": 1.4594, |
| "step": 70500 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 5.338954630383751e-07, |
| "loss": 1.4598, |
| "step": 71000 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 5.165285296794902e-07, |
| "loss": 1.4596, |
| "step": 71500 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 4.991615963206056e-07, |
| "loss": 1.4596, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 4.817946629617209e-07, |
| "loss": 1.4594, |
| "step": 72500 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 4.6442772960283626e-07, |
| "loss": 1.4593, |
| "step": 73000 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 4.4706079624395143e-07, |
| "loss": 1.4597, |
| "step": 73500 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 4.296938628850668e-07, |
| "loss": 1.4601, |
| "step": 74000 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 4.1232692952618197e-07, |
| "loss": 1.4592, |
| "step": 74500 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 3.9495999616729745e-07, |
| "loss": 1.4599, |
| "step": 75000 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 3.77593062808413e-07, |
| "loss": 1.4591, |
| "step": 75500 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 3.602261294495281e-07, |
| "loss": 1.4598, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 3.428591960906435e-07, |
| "loss": 1.4592, |
| "step": 76500 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 3.2549226273175863e-07, |
| "loss": 1.4597, |
| "step": 77000 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 3.081253293728741e-07, |
| "loss": 1.4598, |
| "step": 77500 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 2.907583960139893e-07, |
| "loss": 1.4594, |
| "step": 78000 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 2.7339146265510476e-07, |
| "loss": 1.4596, |
| "step": 78500 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 2.5602452929621987e-07, |
| "loss": 1.4592, |
| "step": 79000 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 2.386575959373353e-07, |
| "loss": 1.4597, |
| "step": 79500 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 2.2129066257845077e-07, |
| "loss": 1.4593, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 2.0392372921956589e-07, |
| "loss": 1.4598, |
| "step": 80500 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 1.8655679586068137e-07, |
| "loss": 1.4593, |
| "step": 81000 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 1.691898625017965e-07, |
| "loss": 1.4592, |
| "step": 81500 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 1.5182292914291196e-07, |
| "loss": 1.4593, |
| "step": 82000 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 1.344559957840271e-07, |
| "loss": 1.4594, |
| "step": 82500 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 1.1708906242514258e-07, |
| "loss": 1.4596, |
| "step": 83000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 1.4663872718811035, |
| "eval_runtime": 321.2818, |
| "eval_samples_per_second": 93.376, |
| "eval_steps_per_second": 1.46, |
| "step": 83492 |
| }, |
| { |
| "epoch": 4.0, |
| "step": 83492, |
| "total_flos": 2.080820519714685e+18, |
| "train_loss": 1.4623237360947567, |
| "train_runtime": 299973.1617, |
| "train_samples_per_second": 71.255, |
| "train_steps_per_second": 0.278 |
| } |
| ], |
| "max_steps": 83492, |
| "num_train_epochs": 4, |
| "total_flos": 2.080820519714685e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|