| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 22.0, | |
| "eval_steps": 500, | |
| "global_step": 2354, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 1.2691198587417603, | |
| "eval_runtime": 4.8417, | |
| "eval_samples_per_second": 44.199, | |
| "eval_steps_per_second": 5.577, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6214953271028038, | |
| "eval_loss": 1.0963133573532104, | |
| "eval_runtime": 5.0021, | |
| "eval_samples_per_second": 42.782, | |
| "eval_steps_per_second": 5.398, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6308411214953271, | |
| "eval_loss": 0.860569179058075, | |
| "eval_runtime": 4.6891, | |
| "eval_samples_per_second": 45.638, | |
| "eval_steps_per_second": 5.758, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7990654205607477, | |
| "eval_loss": 0.672334611415863, | |
| "eval_runtime": 4.717, | |
| "eval_samples_per_second": 45.368, | |
| "eval_steps_per_second": 5.724, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 4.0654205607476636e-05, | |
| "loss": 1.1331, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8457943925233645, | |
| "eval_loss": 0.48274144530296326, | |
| "eval_runtime": 4.6309, | |
| "eval_samples_per_second": 46.212, | |
| "eval_steps_per_second": 5.83, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8551401869158879, | |
| "eval_loss": 0.35962656140327454, | |
| "eval_runtime": 4.7112, | |
| "eval_samples_per_second": 45.424, | |
| "eval_steps_per_second": 5.731, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8925233644859814, | |
| "eval_loss": 0.26292115449905396, | |
| "eval_runtime": 5.4422, | |
| "eval_samples_per_second": 39.322, | |
| "eval_steps_per_second": 4.961, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9158878504672897, | |
| "eval_loss": 0.22579917311668396, | |
| "eval_runtime": 4.8554, | |
| "eval_samples_per_second": 44.074, | |
| "eval_steps_per_second": 5.561, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9158878504672897, | |
| "eval_loss": 0.19785191118717194, | |
| "eval_runtime": 4.9901, | |
| "eval_samples_per_second": 42.885, | |
| "eval_steps_per_second": 5.411, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 3.130841121495327e-05, | |
| "loss": 0.6031, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9719626168224299, | |
| "eval_loss": 0.16760671138763428, | |
| "eval_runtime": 4.8143, | |
| "eval_samples_per_second": 44.451, | |
| "eval_steps_per_second": 5.608, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.1423913985490799, | |
| "eval_runtime": 4.7195, | |
| "eval_samples_per_second": 45.343, | |
| "eval_steps_per_second": 5.721, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.985981308411215, | |
| "eval_loss": 0.12256418913602829, | |
| "eval_runtime": 5.1046, | |
| "eval_samples_per_second": 41.923, | |
| "eval_steps_per_second": 5.289, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9953271028037384, | |
| "eval_loss": 0.11292136460542679, | |
| "eval_runtime": 4.8426, | |
| "eval_samples_per_second": 44.191, | |
| "eval_steps_per_second": 5.576, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9906542056074766, | |
| "eval_loss": 0.106930673122406, | |
| "eval_runtime": 4.9097, | |
| "eval_samples_per_second": 43.587, | |
| "eval_steps_per_second": 5.499, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 2.196261682242991e-05, | |
| "loss": 0.4317, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.9953271028037384, | |
| "eval_loss": 0.09224073588848114, | |
| "eval_runtime": 4.854, | |
| "eval_samples_per_second": 44.087, | |
| "eval_steps_per_second": 5.562, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9953271028037384, | |
| "eval_loss": 0.08621260523796082, | |
| "eval_runtime": 4.7695, | |
| "eval_samples_per_second": 44.868, | |
| "eval_steps_per_second": 5.661, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.07688089460134506, | |
| "eval_runtime": 4.8878, | |
| "eval_samples_per_second": 43.782, | |
| "eval_steps_per_second": 5.524, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.061450209468603134, | |
| "eval_runtime": 4.7171, | |
| "eval_samples_per_second": 45.367, | |
| "eval_steps_per_second": 5.724, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 18.69, | |
| "learning_rate": 1.2616822429906542e-05, | |
| "loss": 0.3584, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0667119175195694, | |
| "eval_runtime": 4.8093, | |
| "eval_samples_per_second": 44.498, | |
| "eval_steps_per_second": 5.614, | |
| "step": 2033 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9953271028037384, | |
| "eval_loss": 0.05547282472252846, | |
| "eval_runtime": 4.7518, | |
| "eval_samples_per_second": 45.036, | |
| "eval_steps_per_second": 5.682, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.054027605801820755, | |
| "eval_runtime": 4.7251, | |
| "eval_samples_per_second": 45.29, | |
| "eval_steps_per_second": 5.714, | |
| "step": 2247 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.04496881738305092, | |
| "eval_runtime": 4.6636, | |
| "eval_samples_per_second": 45.887, | |
| "eval_steps_per_second": 5.79, | |
| "step": 2354 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2675, | |
| "num_train_epochs": 25, | |
| "save_steps": 200, | |
| "total_flos": 3.9904029741839155e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |