{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.929233772571987, "eval_steps": 50, "global_step": 1280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3904343582235237, "grad_norm": 316011.5625, "learning_rate": 9.609375e-05, "loss": 14505.3388, "step": 50 }, { "epoch": 0.3904343582235237, "eval_runtime": 19.78, "eval_samples_per_second": 21.84, "eval_steps_per_second": 5.46, "step": 50 }, { "epoch": 0.7808687164470474, "grad_norm": 484251.53125, "learning_rate": 9.21875e-05, "loss": 4453.3172, "step": 100 }, { "epoch": 0.7808687164470474, "eval_runtime": 19.7737, "eval_samples_per_second": 21.847, "eval_steps_per_second": 5.462, "step": 100 }, { "epoch": 1.16398243045388, "grad_norm": 502497.4375, "learning_rate": 8.828125000000001e-05, "loss": 3495.4003, "step": 150 }, { "epoch": 1.16398243045388, "eval_runtime": 19.7678, "eval_samples_per_second": 21.854, "eval_steps_per_second": 5.463, "step": 150 }, { "epoch": 1.5544167886774036, "grad_norm": 733164.8125, "learning_rate": 8.4375e-05, "loss": 3503.1944, "step": 200 }, { "epoch": 1.5544167886774036, "eval_runtime": 19.8051, "eval_samples_per_second": 21.813, "eval_steps_per_second": 5.453, "step": 200 }, { "epoch": 1.9448511469009273, "grad_norm": 385944.6875, "learning_rate": 8.046875e-05, "loss": 2722.6881, "step": 250 }, { "epoch": 1.9448511469009273, "eval_runtime": 19.7807, "eval_samples_per_second": 21.839, "eval_steps_per_second": 5.46, "step": 250 }, { "epoch": 2.32796486090776, "grad_norm": 331782.8125, "learning_rate": 7.65625e-05, "loss": 2230.8694, "step": 300 }, { "epoch": 2.32796486090776, "eval_runtime": 19.7643, "eval_samples_per_second": 21.858, "eval_steps_per_second": 5.464, "step": 300 }, { "epoch": 2.7183992191312836, "grad_norm": 321584.84375, "learning_rate": 7.265625000000001e-05, "loss": 1869.6005, "step": 350 }, { "epoch": 2.7183992191312836, "eval_runtime": 19.7383, "eval_samples_per_second": 21.886, "eval_steps_per_second": 5.472, "step": 350 }, { "epoch": 3.101512933138116, "grad_norm": 535160.875, "learning_rate": 6.875e-05, "loss": 1711.6647, "step": 400 }, { "epoch": 3.101512933138116, "eval_runtime": 19.7699, "eval_samples_per_second": 21.851, "eval_steps_per_second": 5.463, "step": 400 }, { "epoch": 3.49194729136164, "grad_norm": 411710.09375, "learning_rate": 6.484375e-05, "loss": 1542.0261, "step": 450 }, { "epoch": 3.49194729136164, "eval_runtime": 19.7891, "eval_samples_per_second": 21.83, "eval_steps_per_second": 5.458, "step": 450 }, { "epoch": 3.8823816495851635, "grad_norm": 426711.1875, "learning_rate": 6.0937500000000004e-05, "loss": 1512.678, "step": 500 }, { "epoch": 3.8823816495851635, "eval_runtime": 19.7847, "eval_samples_per_second": 21.835, "eval_steps_per_second": 5.459, "step": 500 }, { "epoch": 4.265495363591996, "grad_norm": 605412.375, "learning_rate": 5.703125e-05, "loss": 1325.2478, "step": 550 }, { "epoch": 4.265495363591996, "eval_runtime": 19.7746, "eval_samples_per_second": 21.846, "eval_steps_per_second": 5.462, "step": 550 }, { "epoch": 4.65592972181552, "grad_norm": 409170.1875, "learning_rate": 5.3125000000000004e-05, "loss": 1282.363, "step": 600 }, { "epoch": 4.65592972181552, "eval_runtime": 19.7404, "eval_samples_per_second": 21.884, "eval_steps_per_second": 5.471, "step": 600 }, { "epoch": 5.039043435822352, "grad_norm": 328730.40625, "learning_rate": 4.921875e-05, "loss": 971.7115, "step": 650 }, { "epoch": 5.039043435822352, "eval_runtime": 19.734, "eval_samples_per_second": 21.891, "eval_steps_per_second": 5.473, "step": 650 }, { "epoch": 5.4294777940458765, "grad_norm": 145752.921875, "learning_rate": 4.5312500000000004e-05, "loss": 901.4456, "step": 700 }, { "epoch": 5.4294777940458765, "eval_runtime": 19.7781, "eval_samples_per_second": 21.842, "eval_steps_per_second": 5.461, "step": 700 }, { "epoch": 5.819912152269399, "grad_norm": 190886.453125, "learning_rate": 4.140625e-05, "loss": 991.2295, "step": 750 }, { "epoch": 5.819912152269399, "eval_runtime": 19.7898, "eval_samples_per_second": 21.829, "eval_steps_per_second": 5.457, "step": 750 }, { "epoch": 6.203025866276232, "grad_norm": 195982.890625, "learning_rate": 3.7500000000000003e-05, "loss": 829.7355, "step": 800 }, { "epoch": 6.203025866276232, "eval_runtime": 19.7721, "eval_samples_per_second": 21.849, "eval_steps_per_second": 5.462, "step": 800 }, { "epoch": 6.593460224499756, "grad_norm": 427655.71875, "learning_rate": 3.359375e-05, "loss": 684.6569, "step": 850 }, { "epoch": 6.593460224499756, "eval_runtime": 19.7731, "eval_samples_per_second": 21.848, "eval_steps_per_second": 5.462, "step": 850 }, { "epoch": 6.98389458272328, "grad_norm": 516619.03125, "learning_rate": 2.96875e-05, "loss": 778.275, "step": 900 }, { "epoch": 6.98389458272328, "eval_runtime": 19.7592, "eval_samples_per_second": 21.863, "eval_steps_per_second": 5.466, "step": 900 }, { "epoch": 7.367008296730113, "grad_norm": 240667.90625, "learning_rate": 2.578125e-05, "loss": 658.1772, "step": 950 }, { "epoch": 7.367008296730113, "eval_runtime": 19.7525, "eval_samples_per_second": 21.871, "eval_steps_per_second": 5.468, "step": 950 }, { "epoch": 7.7574426549536355, "grad_norm": 319554.6875, "learning_rate": 2.1875e-05, "loss": 457.791, "step": 1000 }, { "epoch": 7.7574426549536355, "eval_runtime": 19.8005, "eval_samples_per_second": 21.818, "eval_steps_per_second": 5.454, "step": 1000 }, { "epoch": 8.140556368960468, "grad_norm": 406190.0625, "learning_rate": 1.796875e-05, "loss": 536.1774, "step": 1050 }, { "epoch": 8.140556368960468, "eval_runtime": 19.767, "eval_samples_per_second": 21.855, "eval_steps_per_second": 5.464, "step": 1050 }, { "epoch": 8.530990727183992, "grad_norm": 325491.78125, "learning_rate": 1.4062500000000001e-05, "loss": 373.9504, "step": 1100 }, { "epoch": 8.530990727183992, "eval_runtime": 19.7611, "eval_samples_per_second": 21.861, "eval_steps_per_second": 5.465, "step": 1100 }, { "epoch": 8.921425085407517, "grad_norm": 98692.546875, "learning_rate": 1.0156250000000001e-05, "loss": 313.6299, "step": 1150 }, { "epoch": 8.921425085407517, "eval_runtime": 19.766, "eval_samples_per_second": 21.856, "eval_steps_per_second": 5.464, "step": 1150 }, { "epoch": 9.304538799414349, "grad_norm": 154307.0, "learning_rate": 6.25e-06, "loss": 275.3413, "step": 1200 }, { "epoch": 9.304538799414349, "eval_runtime": 19.7645, "eval_samples_per_second": 21.857, "eval_steps_per_second": 5.464, "step": 1200 }, { "epoch": 9.694973157637872, "grad_norm": 77167.2265625, "learning_rate": 2.3437500000000002e-06, "loss": 241.9685, "step": 1250 }, { "epoch": 9.694973157637872, "eval_runtime": 19.7657, "eval_samples_per_second": 21.856, "eval_steps_per_second": 5.464, "step": 1250 } ], "logging_steps": 50, "max_steps": 1280, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }