| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 15.0, | |
| "global_step": 13065, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.9234596249521625e-05, | |
| "loss": 2.2122, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.0468506813049316, | |
| "eval_runtime": 45.9615, | |
| "eval_samples_per_second": 63.814, | |
| "eval_steps_per_second": 7.985, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.8469192499043248e-05, | |
| "loss": 2.1507, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.770378874856487e-05, | |
| "loss": 2.0961, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.0117030143737793, | |
| "eval_runtime": 45.9973, | |
| "eval_samples_per_second": 63.765, | |
| "eval_steps_per_second": 7.979, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.693838499808649e-05, | |
| "loss": 2.0572, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 1.6172981247608114e-05, | |
| "loss": 2.0628, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.004037380218506, | |
| "eval_runtime": 45.987, | |
| "eval_samples_per_second": 63.779, | |
| "eval_steps_per_second": 7.981, | |
| "step": 2613 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 1.5407577497129737e-05, | |
| "loss": 2.0173, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.990085244178772, | |
| "eval_runtime": 45.9573, | |
| "eval_samples_per_second": 63.82, | |
| "eval_steps_per_second": 7.986, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 1.464217374665136e-05, | |
| "loss": 2.0011, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 1.3876769996172983e-05, | |
| "loss": 1.9772, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.9711157083511353, | |
| "eval_runtime": 45.9361, | |
| "eval_samples_per_second": 63.85, | |
| "eval_steps_per_second": 7.989, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 1.3111366245694605e-05, | |
| "loss": 1.9615, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 1.2345962495216228e-05, | |
| "loss": 1.9455, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.9784963130950928, | |
| "eval_runtime": 45.9339, | |
| "eval_samples_per_second": 63.853, | |
| "eval_steps_per_second": 7.99, | |
| "step": 5226 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 1.1580558744737851e-05, | |
| "loss": 1.9343, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 1.0815154994259474e-05, | |
| "loss": 1.917, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.9379661083221436, | |
| "eval_runtime": 45.9382, | |
| "eval_samples_per_second": 63.847, | |
| "eval_steps_per_second": 7.989, | |
| "step": 6097 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1.0049751243781096e-05, | |
| "loss": 1.8933, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.9650969505310059, | |
| "eval_runtime": 45.9336, | |
| "eval_samples_per_second": 63.853, | |
| "eval_steps_per_second": 7.99, | |
| "step": 6968 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 9.284347493302717e-06, | |
| "loss": 1.8992, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 8.51894374282434e-06, | |
| "loss": 1.8708, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.9915274381637573, | |
| "eval_runtime": 45.9645, | |
| "eval_samples_per_second": 63.81, | |
| "eval_steps_per_second": 7.984, | |
| "step": 7839 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 7.753539992345964e-06, | |
| "loss": 1.8698, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 6.988136241867586e-06, | |
| "loss": 1.862, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.9309507608413696, | |
| "eval_runtime": 45.9471, | |
| "eval_samples_per_second": 63.834, | |
| "eval_steps_per_second": 7.987, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 6.222732491389208e-06, | |
| "loss": 1.862, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "learning_rate": 5.457328740910831e-06, | |
| "loss": 1.8545, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.9422342777252197, | |
| "eval_runtime": 45.9437, | |
| "eval_samples_per_second": 63.839, | |
| "eval_steps_per_second": 7.988, | |
| "step": 9581 | |
| }, | |
| { | |
| "epoch": 11.48, | |
| "learning_rate": 4.691924990432454e-06, | |
| "loss": 1.8231, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 1.931045651435852, | |
| "eval_runtime": 45.9553, | |
| "eval_samples_per_second": 63.823, | |
| "eval_steps_per_second": 7.986, | |
| "step": 10452 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "learning_rate": 3.926521239954076e-06, | |
| "loss": 1.8175, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 12.63, | |
| "learning_rate": 3.1611174894756987e-06, | |
| "loss": 1.8141, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 1.936198115348816, | |
| "eval_runtime": 45.947, | |
| "eval_samples_per_second": 63.834, | |
| "eval_steps_per_second": 7.987, | |
| "step": 11323 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "learning_rate": 2.395713738997321e-06, | |
| "loss": 1.84, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "learning_rate": 1.630309988518944e-06, | |
| "loss": 1.7939, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 1.9334322214126587, | |
| "eval_runtime": 45.9955, | |
| "eval_samples_per_second": 63.767, | |
| "eval_steps_per_second": 7.979, | |
| "step": 12194 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 8.649062380405665e-07, | |
| "loss": 1.8165, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "learning_rate": 9.950248756218906e-08, | |
| "loss": 1.8035, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.9196784496307373, | |
| "eval_runtime": 45.9399, | |
| "eval_samples_per_second": 63.844, | |
| "eval_steps_per_second": 7.989, | |
| "step": 13065 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "step": 13065, | |
| "total_flos": 2.2004859851993088e+17, | |
| "train_loss": 1.9284902631757455, | |
| "train_runtime": 49525.3053, | |
| "train_samples_per_second": 16.881, | |
| "train_steps_per_second": 0.264 | |
| } | |
| ], | |
| "max_steps": 13065, | |
| "num_train_epochs": 15, | |
| "total_flos": 2.2004859851993088e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |