| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 3174, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.34024038910865784, | |
| "eval_runtime": 11.8832, | |
| "eval_samples_per_second": 93.914, | |
| "eval_steps_per_second": 11.781, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.2754386365413666, | |
| "eval_runtime": 11.9246, | |
| "eval_samples_per_second": 93.588, | |
| "eval_steps_per_second": 11.74, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 0.26367756724357605, | |
| "eval_runtime": 11.939, | |
| "eval_samples_per_second": 93.475, | |
| "eval_steps_per_second": 11.726, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 0.2583855092525482, | |
| "eval_runtime": 11.9202, | |
| "eval_samples_per_second": 93.622, | |
| "eval_steps_per_second": 11.745, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.2123503465658476e-05, | |
| "loss": 0.3046, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 0.2609529197216034, | |
| "eval_runtime": 11.9342, | |
| "eval_samples_per_second": 93.513, | |
| "eval_steps_per_second": 11.731, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 0.2814560830593109, | |
| "eval_runtime": 11.9316, | |
| "eval_samples_per_second": 93.533, | |
| "eval_steps_per_second": 11.734, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_loss": 0.2598992884159088, | |
| "eval_runtime": 11.9444, | |
| "eval_samples_per_second": 93.433, | |
| "eval_steps_per_second": 11.721, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 0.23917993903160095, | |
| "eval_runtime": 11.9432, | |
| "eval_samples_per_second": 93.442, | |
| "eval_steps_per_second": 11.722, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 0.24270252883434296, | |
| "eval_runtime": 11.9464, | |
| "eval_samples_per_second": 93.417, | |
| "eval_steps_per_second": 11.719, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.424700693131695e-05, | |
| "loss": 0.2239, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 0.22673173248767853, | |
| "eval_runtime": 11.9446, | |
| "eval_samples_per_second": 93.432, | |
| "eval_steps_per_second": 11.721, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_loss": 0.2705618441104889, | |
| "eval_runtime": 11.9287, | |
| "eval_samples_per_second": 93.556, | |
| "eval_steps_per_second": 11.736, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 0.2692383825778961, | |
| "eval_runtime": 11.9394, | |
| "eval_samples_per_second": 93.472, | |
| "eval_steps_per_second": 11.726, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_loss": 0.2735636830329895, | |
| "eval_runtime": 11.9434, | |
| "eval_samples_per_second": 93.441, | |
| "eval_steps_per_second": 11.722, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 0.26235586404800415, | |
| "eval_runtime": 11.9462, | |
| "eval_samples_per_second": 93.419, | |
| "eval_steps_per_second": 11.719, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.637051039697543e-05, | |
| "loss": 0.1439, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.2414882779121399, | |
| "eval_runtime": 11.9488, | |
| "eval_samples_per_second": 93.399, | |
| "eval_steps_per_second": 11.717, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 0.24062982201576233, | |
| "eval_runtime": 11.9467, | |
| "eval_samples_per_second": 93.415, | |
| "eval_steps_per_second": 11.719, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 0.25898364186286926, | |
| "eval_runtime": 11.9478, | |
| "eval_samples_per_second": 93.407, | |
| "eval_steps_per_second": 11.718, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.2588403522968292, | |
| "eval_runtime": 11.9444, | |
| "eval_samples_per_second": 93.433, | |
| "eval_steps_per_second": 11.721, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 0.2335430383682251, | |
| "eval_runtime": 11.9527, | |
| "eval_samples_per_second": 93.368, | |
| "eval_steps_per_second": 11.713, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8494013862633903e-05, | |
| "loss": 0.1284, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.22313761711120605, | |
| "eval_runtime": 11.9452, | |
| "eval_samples_per_second": 93.427, | |
| "eval_steps_per_second": 11.72, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 0.23307541012763977, | |
| "eval_runtime": 11.9484, | |
| "eval_samples_per_second": 93.401, | |
| "eval_steps_per_second": 11.717, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_loss": 0.2677692770957947, | |
| "eval_runtime": 11.9343, | |
| "eval_samples_per_second": 93.512, | |
| "eval_steps_per_second": 11.731, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_loss": 0.26433369517326355, | |
| "eval_runtime": 11.9466, | |
| "eval_samples_per_second": 93.416, | |
| "eval_steps_per_second": 11.719, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_loss": 0.23752279579639435, | |
| "eval_runtime": 11.9483, | |
| "eval_samples_per_second": 93.403, | |
| "eval_steps_per_second": 11.717, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0617517328292375e-05, | |
| "loss": 0.0864, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_loss": 0.26130592823028564, | |
| "eval_runtime": 11.949, | |
| "eval_samples_per_second": 93.397, | |
| "eval_steps_per_second": 11.716, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 0.26423731446266174, | |
| "eval_runtime": 11.9518, | |
| "eval_samples_per_second": 93.375, | |
| "eval_steps_per_second": 11.714, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 0.2653162181377411, | |
| "eval_runtime": 11.952, | |
| "eval_samples_per_second": 93.374, | |
| "eval_steps_per_second": 11.714, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "eval_loss": 0.24894019961357117, | |
| "eval_runtime": 11.9606, | |
| "eval_samples_per_second": 93.306, | |
| "eval_steps_per_second": 11.705, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_loss": 0.2676887512207031, | |
| "eval_runtime": 11.9594, | |
| "eval_samples_per_second": 93.316, | |
| "eval_steps_per_second": 11.706, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.741020793950851e-06, | |
| "loss": 0.069, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_loss": 0.2585600018501282, | |
| "eval_runtime": 11.954, | |
| "eval_samples_per_second": 93.358, | |
| "eval_steps_per_second": 11.712, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_loss": 0.25155311822891235, | |
| "eval_runtime": 11.9543, | |
| "eval_samples_per_second": 93.356, | |
| "eval_steps_per_second": 11.711, | |
| "step": 3100 | |
| } | |
| ], | |
| "max_steps": 3174, | |
| "num_train_epochs": 3, | |
| "total_flos": 6635087188033536.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |