| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "global_step": 26580, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.90594431903687e-05, | |
| "loss": 3.0406, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.81188863807374e-05, | |
| "loss": 2.9246, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.71783295711061e-05, | |
| "loss": 2.8939, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.6237772761474796e-05, | |
| "loss": 2.8394, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.5297215951843495e-05, | |
| "loss": 2.8208, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.435665914221219e-05, | |
| "loss": 2.7974, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.3416102332580885e-05, | |
| "loss": 2.775, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.247554552294959e-05, | |
| "loss": 2.7597, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.153498871331828e-05, | |
| "loss": 2.7433, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.059443190368699e-05, | |
| "loss": 2.7478, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.965387509405568e-05, | |
| "loss": 2.6777, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.8713318284424384e-05, | |
| "loss": 2.5825, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.7772761474793075e-05, | |
| "loss": 2.5918, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.683220466516178e-05, | |
| "loss": 2.5835, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.589164785553047e-05, | |
| "loss": 2.5557, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.495109104589918e-05, | |
| "loss": 2.5774, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.401053423626787e-05, | |
| "loss": 2.5853, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.3069977426636574e-05, | |
| "loss": 2.552, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.2129420617005266e-05, | |
| "loss": 2.5412, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.118886380737397e-05, | |
| "loss": 2.5345, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.0248306997742666e-05, | |
| "loss": 2.5333, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 2.9307750188111365e-05, | |
| "loss": 2.463, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 2.8367193378480063e-05, | |
| "loss": 2.4103, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.742663656884876e-05, | |
| "loss": 2.4347, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.6486079759217457e-05, | |
| "loss": 2.4205, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 2.5545522949586155e-05, | |
| "loss": 2.4301, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 2.4604966139954853e-05, | |
| "loss": 2.4104, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.3664409330323552e-05, | |
| "loss": 2.42, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.272385252069225e-05, | |
| "loss": 2.4195, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.178329571106095e-05, | |
| "loss": 2.4112, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.0842738901429647e-05, | |
| "loss": 2.3915, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.9902182091798346e-05, | |
| "loss": 2.3849, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.8961625282167044e-05, | |
| "loss": 2.3143, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.8021068472535743e-05, | |
| "loss": 2.304, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.708051166290444e-05, | |
| "loss": 2.3193, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 1.613995485327314e-05, | |
| "loss": 2.318, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.5199398043641836e-05, | |
| "loss": 2.308, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 1.4258841234010533e-05, | |
| "loss": 2.3242, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.3318284424379231e-05, | |
| "loss": 2.3143, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.2377727614747931e-05, | |
| "loss": 2.3192, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 1.143717080511663e-05, | |
| "loss": 2.3153, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 1.0496613995485328e-05, | |
| "loss": 2.2939, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 9.556057185854025e-06, | |
| "loss": 2.2786, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 8.615500376222724e-06, | |
| "loss": 2.2562, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 7.674943566591422e-06, | |
| "loss": 2.2488, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 6.73438675696012e-06, | |
| "loss": 2.2515, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 5.793829947328819e-06, | |
| "loss": 2.2375, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 4.853273137697517e-06, | |
| "loss": 2.2446, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 3.912716328066216e-06, | |
| "loss": 2.2226, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 2.9721595184349133e-06, | |
| "loss": 2.2476, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 2.0316027088036117e-06, | |
| "loss": 2.2568, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 1.0910458991723102e-06, | |
| "loss": 2.2529, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 1.5048908954100828e-07, | |
| "loss": 2.245, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 26580, | |
| "total_flos": 20321956433756160, | |
| "train_runtime": 15554.6344, | |
| "train_samples_per_second": 1.709 | |
| } | |
| ], | |
| "max_steps": 26580, | |
| "num_train_epochs": 5, | |
| "total_flos": 20321956433756160, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |