| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 18.0, | |
| "global_step": 15678, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.68108177063401e-05, | |
| "loss": 2.3589, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.2804882526397705, | |
| "eval_runtime": 46.5333, | |
| "eval_samples_per_second": 63.03, | |
| "eval_steps_per_second": 7.887, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 9.36216354126802e-05, | |
| "loss": 2.3359, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 9.043245311902028e-05, | |
| "loss": 2.2563, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.2500908374786377, | |
| "eval_runtime": 46.4819, | |
| "eval_samples_per_second": 63.1, | |
| "eval_steps_per_second": 7.896, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 8.724327082536038e-05, | |
| "loss": 2.2043, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 8.405408853170048e-05, | |
| "loss": 2.1936, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.2418746948242188, | |
| "eval_runtime": 46.4009, | |
| "eval_samples_per_second": 63.21, | |
| "eval_steps_per_second": 7.909, | |
| "step": 2613 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 8.086490623804057e-05, | |
| "loss": 2.11, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.230112314224243, | |
| "eval_runtime": 46.3837, | |
| "eval_samples_per_second": 63.233, | |
| "eval_steps_per_second": 7.912, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 7.767572394438066e-05, | |
| "loss": 2.1018, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 7.448654165072075e-05, | |
| "loss": 2.0311, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.231987476348877, | |
| "eval_runtime": 46.4154, | |
| "eval_samples_per_second": 63.19, | |
| "eval_steps_per_second": 7.907, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 7.129735935706085e-05, | |
| "loss": 2.0174, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 6.810817706340095e-05, | |
| "loss": 1.969, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.227581262588501, | |
| "eval_runtime": 46.0273, | |
| "eval_samples_per_second": 63.723, | |
| "eval_steps_per_second": 7.974, | |
| "step": 5226 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 6.491899476974103e-05, | |
| "loss": 1.9427, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 6.172981247608114e-05, | |
| "loss": 1.9148, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 2.1621322631835938, | |
| "eval_runtime": 45.9847, | |
| "eval_samples_per_second": 63.782, | |
| "eval_steps_per_second": 7.981, | |
| "step": 6097 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 5.854063018242123e-05, | |
| "loss": 1.8569, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.1876232624053955, | |
| "eval_runtime": 45.9436, | |
| "eval_samples_per_second": 63.839, | |
| "eval_steps_per_second": 7.988, | |
| "step": 6968 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 5.535144788876132e-05, | |
| "loss": 1.8523, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 5.216226559510142e-05, | |
| "loss": 1.7978, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 2.201099395751953, | |
| "eval_runtime": 45.9305, | |
| "eval_samples_per_second": 63.857, | |
| "eval_steps_per_second": 7.99, | |
| "step": 7839 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 4.897308330144152e-05, | |
| "loss": 1.7922, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 4.578390100778161e-05, | |
| "loss": 1.7602, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 2.1279566287994385, | |
| "eval_runtime": 45.9065, | |
| "eval_samples_per_second": 63.891, | |
| "eval_steps_per_second": 7.995, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 4.2594718714121704e-05, | |
| "loss": 1.7371, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "learning_rate": 3.9405536420461794e-05, | |
| "loss": 1.7166, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 2.1643788814544678, | |
| "eval_runtime": 45.9375, | |
| "eval_samples_per_second": 63.848, | |
| "eval_steps_per_second": 7.989, | |
| "step": 9581 | |
| }, | |
| { | |
| "epoch": 11.48, | |
| "learning_rate": 3.621635412680189e-05, | |
| "loss": 1.6651, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 2.1245827674865723, | |
| "eval_runtime": 45.9431, | |
| "eval_samples_per_second": 63.84, | |
| "eval_steps_per_second": 7.988, | |
| "step": 10452 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "learning_rate": 3.302717183314198e-05, | |
| "loss": 1.6508, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 12.63, | |
| "learning_rate": 2.9837989539482082e-05, | |
| "loss": 1.6141, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 2.126392126083374, | |
| "eval_runtime": 45.9202, | |
| "eval_samples_per_second": 63.872, | |
| "eval_steps_per_second": 7.992, | |
| "step": 11323 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "learning_rate": 2.6648807245822172e-05, | |
| "loss": 1.6312, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "learning_rate": 2.345962495216227e-05, | |
| "loss": 1.5759, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 2.114293098449707, | |
| "eval_runtime": 45.9021, | |
| "eval_samples_per_second": 63.897, | |
| "eval_steps_per_second": 7.995, | |
| "step": 12194 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 2.0270442658502363e-05, | |
| "loss": 1.572, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "learning_rate": 1.7081260364842456e-05, | |
| "loss": 1.5478, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 2.098154306411743, | |
| "eval_runtime": 45.9315, | |
| "eval_samples_per_second": 63.856, | |
| "eval_steps_per_second": 7.99, | |
| "step": 13065 | |
| }, | |
| { | |
| "epoch": 15.5, | |
| "learning_rate": 1.3892078071182548e-05, | |
| "loss": 1.5311, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 2.099287509918213, | |
| "eval_runtime": 45.9245, | |
| "eval_samples_per_second": 63.866, | |
| "eval_steps_per_second": 7.991, | |
| "step": 13936 | |
| }, | |
| { | |
| "epoch": 16.07, | |
| "learning_rate": 1.0702895777522644e-05, | |
| "loss": 1.5187, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 16.65, | |
| "learning_rate": 7.513713483862737e-06, | |
| "loss": 1.5187, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 2.0979349613189697, | |
| "eval_runtime": 45.9022, | |
| "eval_samples_per_second": 63.897, | |
| "eval_steps_per_second": 7.995, | |
| "step": 14807 | |
| }, | |
| { | |
| "epoch": 17.22, | |
| "learning_rate": 4.324531190202833e-06, | |
| "loss": 1.4819, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 17.8, | |
| "learning_rate": 1.1353488965429266e-06, | |
| "loss": 1.4809, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 2.0337767601013184, | |
| "eval_runtime": 45.9061, | |
| "eval_samples_per_second": 63.891, | |
| "eval_steps_per_second": 7.995, | |
| "step": 15678 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "step": 15678, | |
| "total_flos": 2.6405831822391706e+17, | |
| "train_loss": 1.8265393278187887, | |
| "train_runtime": 59534.5244, | |
| "train_samples_per_second": 16.852, | |
| "train_steps_per_second": 0.263 | |
| } | |
| ], | |
| "max_steps": 15678, | |
| "num_train_epochs": 18, | |
| "total_flos": 2.6405831822391706e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |