| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.936347103143674, |
| "global_step": 19000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9480384515458563e-05, |
| "loss": 2.3177, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.8960769030917124e-05, |
| "loss": 1.998, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.8441153546375682e-05, |
| "loss": 1.8707, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.7921538061834244e-05, |
| "loss": 1.8186, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.7401922577292805e-05, |
| "loss": 1.766, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.6882307092751363e-05, |
| "loss": 1.7102, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.6362691608209928e-05, |
| "loss": 1.6835, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.5456233024597168, |
| "eval_runtime": 14.4448, |
| "eval_samples_per_second": 1065.508, |
| "eval_steps_per_second": 133.197, |
| "step": 3849 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.5843076123668486e-05, |
| "loss": 1.6502, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.5323460639127047e-05, |
| "loss": 1.6152, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.4803845154585607e-05, |
| "loss": 1.6067, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.4284229670044167e-05, |
| "loss": 1.5855, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 1.376461418550273e-05, |
| "loss": 1.5687, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 1.324499870096129e-05, |
| "loss": 1.5541, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.272538321641985e-05, |
| "loss": 1.5415, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.220576773187841e-05, |
| "loss": 1.5347, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.433572769165039, |
| "eval_runtime": 12.6009, |
| "eval_samples_per_second": 1221.422, |
| "eval_steps_per_second": 152.688, |
| "step": 7698 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.1686152247336972e-05, |
| "loss": 1.513, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.1166536762795532e-05, |
| "loss": 1.5097, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.0646921278254091e-05, |
| "loss": 1.5, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.0127305793712654e-05, |
| "loss": 1.4931, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 9.607690309171214e-06, |
| "loss": 1.4868, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 9.088074824629775e-06, |
| "loss": 1.4732, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 8.568459340088335e-06, |
| "loss": 1.4793, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 8.048843855546896e-06, |
| "loss": 1.4488, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 1.3860989809036255, |
| "eval_runtime": 14.3478, |
| "eval_samples_per_second": 1072.711, |
| "eval_steps_per_second": 134.098, |
| "step": 11547 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 7.529228371005457e-06, |
| "loss": 1.4531, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 7.009612886464017e-06, |
| "loss": 1.4495, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 6.489997401922577e-06, |
| "loss": 1.4453, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 5.9703819173811386e-06, |
| "loss": 1.4387, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 5.450766432839699e-06, |
| "loss": 1.4305, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 4.93115094829826e-06, |
| "loss": 1.4403, |
| "step": 14500 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 4.41153546375682e-06, |
| "loss": 1.4434, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 1.3605549335479736, |
| "eval_runtime": 14.4, |
| "eval_samples_per_second": 1068.818, |
| "eval_steps_per_second": 133.611, |
| "step": 15396 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 3.8919199792153815e-06, |
| "loss": 1.4402, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 3.3723044946739412e-06, |
| "loss": 1.4151, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 2.852689010132502e-06, |
| "loss": 1.4172, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 2.3330735255910627e-06, |
| "loss": 1.4282, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 1.8134580410496232e-06, |
| "loss": 1.4168, |
| "step": 17500 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 1.2938425565081842e-06, |
| "loss": 1.4168, |
| "step": 18000 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 7.742270719667447e-07, |
| "loss": 1.4327, |
| "step": 18500 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 2.546115874253053e-07, |
| "loss": 1.4071, |
| "step": 19000 |
| } |
| ], |
| "max_steps": 19245, |
| "num_train_epochs": 5, |
| "total_flos": 969604875878400.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|