{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 155, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0645682001614205, "grad_norm": 0.8069110898977796, "learning_rate": 5.625e-06, "loss": 0.6695831418037415, "step": 10 }, { "epoch": 0.129136400322841, "grad_norm": 0.4330663610600958, "learning_rate": 9.9885108930818e-06, "loss": 0.5777108073234558, "step": 20 }, { "epoch": 0.1937046004842615, "grad_norm": 0.2754324714654337, "learning_rate": 9.785725615782262e-06, "loss": 0.5425556500752767, "step": 30 }, { "epoch": 0.258272800645682, "grad_norm": 0.22721648542536138, "learning_rate": 9.33951351964525e-06, "loss": 0.5174514055252075, "step": 40 }, { "epoch": 0.3228410008071025, "grad_norm": 0.23342764181076078, "learning_rate": 8.672571265492944e-06, "loss": 0.5092498779296875, "step": 50 }, { "epoch": 0.387409200968523, "grad_norm": 0.24311945587345685, "learning_rate": 7.818822994471504e-06, "loss": 0.4979246457417806, "step": 60 }, { "epoch": 0.4519774011299435, "grad_norm": 0.3383109105717731, "learning_rate": 6.8216947703354815e-06, "loss": 0.49263341086251394, "step": 70 }, { "epoch": 0.516545601291364, "grad_norm": 0.4587467243203668, "learning_rate": 5.731905704680834e-06, "loss": 0.48938480019569397, "step": 80 }, { "epoch": 0.5811138014527845, "grad_norm": 0.2835512697183779, "learning_rate": 4.6048881199502265e-06, "loss": 0.4843639267815484, "step": 90 }, { "epoch": 0.645682001614205, "grad_norm": 0.2506103669101136, "learning_rate": 3.497967974096647e-06, "loss": 0.48187875747680664, "step": 100 }, { "epoch": 0.7102502017756255, "grad_norm": 0.2406265586311052, "learning_rate": 2.4674489651264433e-06, "loss": 0.48817190527915955, "step": 110 }, { "epoch": 0.774818401937046, "grad_norm": 0.23065794023843042, "learning_rate": 1.5657486330836786e-06, "loss": 0.48073236147562665, "step": 120 }, { "epoch": 0.8393866020984665, "grad_norm": 0.22029963575446374, "learning_rate": 8.387321321781977e-07, "loss": 0.4764113128185272, "step": 130 }, { "epoch": 0.903954802259887, "grad_norm": 0.2151258135358492, "learning_rate": 3.233792912313943e-07, "loss": 0.47532830238342283, "step": 140 }, { "epoch": 0.9685230024213075, "grad_norm": 0.22455673688429373, "learning_rate": 4.590362784169022e-08, "loss": 0.48181764284769696, "step": 150 } ], "logging_steps": 10, "max_steps": 155, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4414173445100667e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }