{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 154, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06527947776417789, "grad_norm": 0.3659420311450958, "learning_rate": 4.999421254949728e-05, "loss": 0.5493, "step": 10 }, { "epoch": 0.13055895552835578, "grad_norm": 0.3503095209598541, "learning_rate": 4.930295491572653e-05, "loss": 0.5735, "step": 20 }, { "epoch": 0.19583843329253367, "grad_norm": 0.42860716581344604, "learning_rate": 4.749077174130608e-05, "loss": 0.5553, "step": 30 }, { "epoch": 0.26111791105671156, "grad_norm": 0.4349290430545807, "learning_rate": 4.464124637696786e-05, "loss": 0.5709, "step": 40 }, { "epoch": 0.3263973888208894, "grad_norm": 0.4874759018421173, "learning_rate": 4.088580752225444e-05, "loss": 0.5424, "step": 50 }, { "epoch": 0.39167686658506734, "grad_norm": 0.39663174748420715, "learning_rate": 3.639766733771147e-05, "loss": 0.5106, "step": 60 }, { "epoch": 0.4569563443492452, "grad_norm": 0.3940886855125427, "learning_rate": 3.1383832377904675e-05, "loss": 0.5367, "step": 70 }, { "epoch": 0.5222358221134231, "grad_norm": 0.4165942966938019, "learning_rate": 2.607555582511326e-05, "loss": 0.5536, "step": 80 }, { "epoch": 0.587515299877601, "grad_norm": 0.37471580505371094, "learning_rate": 2.071767139546306e-05, "loss": 0.5372, "step": 90 }, { "epoch": 0.6527947776417788, "grad_norm": 0.37760552763938904, "learning_rate": 1.5557300869900876e-05, "loss": 0.5255, "step": 100 }, { "epoch": 0.7180742554059567, "grad_norm": 0.3830636143684387, "learning_rate": 1.0832456092722062e-05, "loss": 0.5326, "step": 110 }, { "epoch": 0.7833537331701347, "grad_norm": 0.3630097210407257, "learning_rate": 6.761061147837808e-06, "loss": 0.5391, "step": 120 }, { "epoch": 0.8486332109343125, "grad_norm": 0.4070269465446472, "learning_rate": 3.5309010431049283e-06, "loss": 0.5158, "step": 130 }, { "epoch": 0.9139126886984904, "grad_norm": 0.3441726267337799, "learning_rate": 1.2909604996957092e-06, "loss": 0.5276, "step": 140 }, { "epoch": 0.9791921664626683, "grad_norm": 0.34463751316070557, "learning_rate": 1.4455232765120396e-07, "loss": 0.5374, "step": 150 } ], "logging_steps": 10, "max_steps": 154, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.526035444970086e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }