{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14201183431952663, "eval_steps": 20, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001183431952662722, "eval_loss": 10.566378593444824, "eval_runtime": 1.9917, "eval_samples_per_second": 754.145, "eval_steps_per_second": 47.197, "step": 1 }, { "epoch": 0.011834319526627219, "grad_norm": 6.460504531860352, "learning_rate": 1.6000000000000003e-05, "loss": 10.5689, "step": 10 }, { "epoch": 0.023668639053254437, "grad_norm": 4.217194557189941, "learning_rate": 3.2000000000000005e-05, "loss": 10.5293, "step": 20 }, { "epoch": 0.023668639053254437, "eval_loss": 10.470946311950684, "eval_runtime": 1.962, "eval_samples_per_second": 765.537, "eval_steps_per_second": 47.91, "step": 20 }, { "epoch": 0.03550295857988166, "grad_norm": 7.088407039642334, "learning_rate": 4.8e-05, "loss": 10.3793, "step": 30 }, { "epoch": 0.047337278106508875, "grad_norm": 4.761963844299316, "learning_rate": 6.400000000000001e-05, "loss": 9.924, "step": 40 }, { "epoch": 0.047337278106508875, "eval_loss": 9.448060035705566, "eval_runtime": 2.0352, "eval_samples_per_second": 738.016, "eval_steps_per_second": 46.187, "step": 40 }, { "epoch": 0.05917159763313609, "grad_norm": 5.449029922485352, "learning_rate": 8e-05, "loss": 8.9682, "step": 50 }, { "epoch": 0.07100591715976332, "grad_norm": 2.087066173553467, "learning_rate": 9.6e-05, "loss": 8.2576, "step": 60 }, { "epoch": 0.07100591715976332, "eval_loss": 7.836277961730957, "eval_runtime": 2.012, "eval_samples_per_second": 746.529, "eval_steps_per_second": 46.72, "step": 60 }, { "epoch": 0.08284023668639054, "grad_norm": 1.812099575996399, "learning_rate": 0.00011200000000000001, "loss": 7.6596, "step": 70 }, { "epoch": 0.09467455621301775, "grad_norm": 2.425971269607544, "learning_rate": 0.00012800000000000002, "loss": 7.4757, "step": 80 }, { "epoch": 0.09467455621301775, "eval_loss": 7.445075988769531, "eval_runtime": 2.0477, "eval_samples_per_second": 733.507, "eval_steps_per_second": 45.905, "step": 80 }, { "epoch": 0.10650887573964497, "grad_norm": 1.7427077293395996, "learning_rate": 0.000144, "loss": 7.4227, "step": 90 }, { "epoch": 0.11834319526627218, "grad_norm": 5.246922492980957, "learning_rate": 0.00016, "loss": 7.2513, "step": 100 }, { "epoch": 0.11834319526627218, "eval_loss": 7.342855930328369, "eval_runtime": 1.997, "eval_samples_per_second": 752.143, "eval_steps_per_second": 47.072, "step": 100 }, { "epoch": 0.1301775147928994, "grad_norm": 2.3408617973327637, "learning_rate": 0.00017600000000000002, "loss": 7.3941, "step": 110 }, { "epoch": 0.14201183431952663, "grad_norm": 2.138099431991577, "learning_rate": 0.000192, "loss": 7.1604, "step": 120 }, { "epoch": 0.14201183431952663, "eval_loss": 7.194538593292236, "eval_runtime": 1.9979, "eval_samples_per_second": 751.783, "eval_steps_per_second": 47.049, "step": 120 } ], "logging_steps": 10, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5058714486702080.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }