| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.999856278819091, | |
| "global_step": 83492, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.9826330666411153e-06, | |
| "loss": 1.4668, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.9652661332822306e-06, | |
| "loss": 1.4671, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.9478991999233463e-06, | |
| "loss": 1.467, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.930532266564461e-06, | |
| "loss": 1.4669, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.913165333205577e-06, | |
| "loss": 1.4671, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.8957983998466917e-06, | |
| "loss": 1.4675, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8784314664878074e-06, | |
| "loss": 1.467, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.8610645331289227e-06, | |
| "loss": 1.4663, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.843697599770038e-06, | |
| "loss": 1.4664, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.826330666411153e-06, | |
| "loss": 1.4667, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.8089637330522685e-06, | |
| "loss": 1.467, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 2.791596799693384e-06, | |
| "loss": 1.4669, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 2.774229866334499e-06, | |
| "loss": 1.4668, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.7568629329756147e-06, | |
| "loss": 1.4663, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.7394959996167295e-06, | |
| "loss": 1.4668, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 2.7221290662578452e-06, | |
| "loss": 1.4665, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.7047621328989605e-06, | |
| "loss": 1.4665, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.6873951995400758e-06, | |
| "loss": 1.4663, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.670028266181191e-06, | |
| "loss": 1.4663, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6526613328223063e-06, | |
| "loss": 1.4663, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.635294399463422e-06, | |
| "loss": 1.4658, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.617927466104537e-06, | |
| "loss": 1.4657, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.6005605327456525e-06, | |
| "loss": 1.4657, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.5831935993867674e-06, | |
| "loss": 1.4659, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.565826666027883e-06, | |
| "loss": 1.4661, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.5484597326689983e-06, | |
| "loss": 1.4657, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.5310927993101136e-06, | |
| "loss": 1.4661, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.513725865951229e-06, | |
| "loss": 1.4654, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.496358932592344e-06, | |
| "loss": 1.466, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.4789919992334594e-06, | |
| "loss": 1.4661, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.4616250658745747e-06, | |
| "loss": 1.4655, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.4442581325156904e-06, | |
| "loss": 1.4654, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.4268911991568052e-06, | |
| "loss": 1.4654, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.409524265797921e-06, | |
| "loss": 1.4647, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 2.392157332439036e-06, | |
| "loss": 1.4651, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.3747903990801515e-06, | |
| "loss": 1.4656, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 2.3574234657212667e-06, | |
| "loss": 1.4652, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.340056532362382e-06, | |
| "loss": 1.4649, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.3226895990034977e-06, | |
| "loss": 1.4654, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.3053226656446125e-06, | |
| "loss": 1.4649, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.2879557322857282e-06, | |
| "loss": 1.4648, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.4683704376220703, | |
| "eval_runtime": 319.8553, | |
| "eval_samples_per_second": 93.792, | |
| "eval_steps_per_second": 1.466, | |
| "step": 20873 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 2.270588798926843e-06, | |
| "loss": 1.4643, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.2532218655679588e-06, | |
| "loss": 1.4633, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.235854932209074e-06, | |
| "loss": 1.4635, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.2184879988501893e-06, | |
| "loss": 1.4633, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.2011210654913046e-06, | |
| "loss": 1.4634, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.18375413213242e-06, | |
| "loss": 1.4632, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.166387198773535e-06, | |
| "loss": 1.4632, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.1490202654146504e-06, | |
| "loss": 1.4632, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.131653332055766e-06, | |
| "loss": 1.4634, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.114286398696881e-06, | |
| "loss": 1.463, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.0969194653379966e-06, | |
| "loss": 1.4635, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.079552531979112e-06, | |
| "loss": 1.4636, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.062185598620227e-06, | |
| "loss": 1.4629, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.0448186652613424e-06, | |
| "loss": 1.463, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.0274517319024577e-06, | |
| "loss": 1.4627, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.0100847985435734e-06, | |
| "loss": 1.4635, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.9927178651846882e-06, | |
| "loss": 1.463, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.9753509318258035e-06, | |
| "loss": 1.4631, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.957983998466919e-06, | |
| "loss": 1.463, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.9406170651080345e-06, | |
| "loss": 1.4632, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.9232501317491497e-06, | |
| "loss": 1.4626, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.9058831983902652e-06, | |
| "loss": 1.463, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.8885162650313807e-06, | |
| "loss": 1.4632, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.8711493316724955e-06, | |
| "loss": 1.4629, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.853782398313611e-06, | |
| "loss": 1.4628, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.836415464954726e-06, | |
| "loss": 1.463, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 1.8190485315958416e-06, | |
| "loss": 1.4627, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1.801681598236957e-06, | |
| "loss": 1.4626, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 1.7843146648780719e-06, | |
| "loss": 1.4631, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.766947731519188e-06, | |
| "loss": 1.4628, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.7495807981603028e-06, | |
| "loss": 1.4625, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 1.7322138648014181e-06, | |
| "loss": 1.4627, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.7148469314425332e-06, | |
| "loss": 1.4628, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.697479998083649e-06, | |
| "loss": 1.4623, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.680113064724764e-06, | |
| "loss": 1.4626, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.662746131365879e-06, | |
| "loss": 1.4621, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.6453791980069949e-06, | |
| "loss": 1.4626, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.62801226464811e-06, | |
| "loss": 1.4623, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.6106453312892254e-06, | |
| "loss": 1.4622, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.5932783979303407e-06, | |
| "loss": 1.4624, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.5759114645714566e-06, | |
| "loss": 1.4626, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.5585445312125714e-06, | |
| "loss": 1.4619, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.467372179031372, | |
| "eval_runtime": 318.5344, | |
| "eval_samples_per_second": 94.181, | |
| "eval_steps_per_second": 1.472, | |
| "step": 41747 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.5411775978536865e-06, | |
| "loss": 1.4618, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.5238106644948016e-06, | |
| "loss": 1.4609, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5064437311359175e-06, | |
| "loss": 1.461, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.4890767977770327e-06, | |
| "loss": 1.4613, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.471709864418148e-06, | |
| "loss": 1.4607, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4543429310592633e-06, | |
| "loss": 1.4611, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.4369759977003785e-06, | |
| "loss": 1.4614, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.4196090643414938e-06, | |
| "loss": 1.4608, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.402242130982609e-06, | |
| "loss": 1.4609, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.3848751976237243e-06, | |
| "loss": 1.4618, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3675082642648396e-06, | |
| "loss": 1.4609, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3501413309059553e-06, | |
| "loss": 1.4606, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.3327743975470706e-06, | |
| "loss": 1.461, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.3154074641881858e-06, | |
| "loss": 1.4609, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.2980405308293011e-06, | |
| "loss": 1.4611, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.2806735974704164e-06, | |
| "loss": 1.4608, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.2633066641115317e-06, | |
| "loss": 1.4604, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.245939730752647e-06, | |
| "loss": 1.4608, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.2285727973937622e-06, | |
| "loss": 1.4607, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.2112058640348775e-06, | |
| "loss": 1.4606, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 1.1938389306759932e-06, | |
| "loss": 1.4606, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.1764719973171084e-06, | |
| "loss": 1.4607, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.1591050639582237e-06, | |
| "loss": 1.4606, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.141738130599339e-06, | |
| "loss": 1.4609, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 1.1243711972404542e-06, | |
| "loss": 1.4609, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 1.1070042638815695e-06, | |
| "loss": 1.4604, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 1.0896373305226848e-06, | |
| "loss": 1.4608, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.0722703971638e-06, | |
| "loss": 1.4604, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.0549034638049153e-06, | |
| "loss": 1.4607, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.037536530446031e-06, | |
| "loss": 1.4607, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.020169597087146e-06, | |
| "loss": 1.4609, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.0028026637282615e-06, | |
| "loss": 1.461, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 9.854357303693768e-07, | |
| "loss": 1.4609, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 9.68068797010492e-07, | |
| "loss": 1.4608, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 9.507018636516072e-07, | |
| "loss": 1.4603, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 9.333349302927227e-07, | |
| "loss": 1.4606, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 9.159679969338379e-07, | |
| "loss": 1.4605, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 8.986010635749534e-07, | |
| "loss": 1.4603, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 8.81234130216069e-07, | |
| "loss": 1.4606, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.638671968571839e-07, | |
| "loss": 1.4611, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 8.465002634982994e-07, | |
| "loss": 1.4603, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 8.291333301394146e-07, | |
| "loss": 1.4606, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.4666800498962402, | |
| "eval_runtime": 320.1083, | |
| "eval_samples_per_second": 93.718, | |
| "eval_steps_per_second": 1.465, | |
| "step": 62621 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 8.1176639678053e-07, | |
| "loss": 1.4591, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 7.943994634216451e-07, | |
| "loss": 1.46, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 7.770325300627606e-07, | |
| "loss": 1.4594, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 7.596655967038757e-07, | |
| "loss": 1.4603, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 7.422986633449912e-07, | |
| "loss": 1.4599, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 7.249317299861067e-07, | |
| "loss": 1.4596, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 7.075647966272218e-07, | |
| "loss": 1.4593, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 6.901978632683372e-07, | |
| "loss": 1.4594, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 6.728309299094524e-07, | |
| "loss": 1.4595, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 6.554639965505679e-07, | |
| "loss": 1.4602, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 6.380970631916829e-07, | |
| "loss": 1.4592, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 6.207301298327984e-07, | |
| "loss": 1.4592, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 6.033631964739136e-07, | |
| "loss": 1.46, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 5.859962631150291e-07, | |
| "loss": 1.4596, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 5.686293297561441e-07, | |
| "loss": 1.4597, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 5.512623963972596e-07, | |
| "loss": 1.4594, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 5.338954630383751e-07, | |
| "loss": 1.4598, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 5.165285296794902e-07, | |
| "loss": 1.4596, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 4.991615963206056e-07, | |
| "loss": 1.4596, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 4.817946629617209e-07, | |
| "loss": 1.4594, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 4.6442772960283626e-07, | |
| "loss": 1.4593, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 4.4706079624395143e-07, | |
| "loss": 1.4597, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 4.296938628850668e-07, | |
| "loss": 1.4601, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 4.1232692952618197e-07, | |
| "loss": 1.4592, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 3.9495999616729745e-07, | |
| "loss": 1.4599, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 3.77593062808413e-07, | |
| "loss": 1.4591, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 3.602261294495281e-07, | |
| "loss": 1.4598, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.428591960906435e-07, | |
| "loss": 1.4592, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 3.2549226273175863e-07, | |
| "loss": 1.4597, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 3.081253293728741e-07, | |
| "loss": 1.4598, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 2.907583960139893e-07, | |
| "loss": 1.4594, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 2.7339146265510476e-07, | |
| "loss": 1.4596, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 2.5602452929621987e-07, | |
| "loss": 1.4592, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 2.386575959373353e-07, | |
| "loss": 1.4597, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 2.2129066257845077e-07, | |
| "loss": 1.4593, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 2.0392372921956589e-07, | |
| "loss": 1.4598, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.8655679586068137e-07, | |
| "loss": 1.4593, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 1.691898625017965e-07, | |
| "loss": 1.4592, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 1.5182292914291196e-07, | |
| "loss": 1.4593, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 1.344559957840271e-07, | |
| "loss": 1.4594, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.1708906242514258e-07, | |
| "loss": 1.4596, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.4663872718811035, | |
| "eval_runtime": 321.2818, | |
| "eval_samples_per_second": 93.376, | |
| "eval_steps_per_second": 1.46, | |
| "step": 83492 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 83492, | |
| "total_flos": 2.080820519714685e+18, | |
| "train_loss": 1.4623237360947567, | |
| "train_runtime": 299973.1617, | |
| "train_samples_per_second": 71.255, | |
| "train_steps_per_second": 0.278 | |
| } | |
| ], | |
| "max_steps": 83492, | |
| "num_train_epochs": 4, | |
| "total_flos": 2.080820519714685e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |