{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9957081545064378, "eval_steps": 1000, "global_step": 174, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05722460658082976, "grad_norm": 2.8843040386286187, "learning_rate": 1e-05, "loss": 1.2427, "step": 10 }, { "epoch": 0.11444921316165951, "grad_norm": 0.9017664305111632, "learning_rate": 9.909643486313533e-06, "loss": 0.5328, "step": 20 }, { "epoch": 0.17167381974248927, "grad_norm": 0.797068637259448, "learning_rate": 9.641839665080363e-06, "loss": 0.4447, "step": 30 }, { "epoch": 0.22889842632331903, "grad_norm": 0.6941745476559829, "learning_rate": 9.206267664155906e-06, "loss": 0.4017, "step": 40 }, { "epoch": 0.2861230329041488, "grad_norm": 0.6250591769825486, "learning_rate": 8.61867019052535e-06, "loss": 0.3776, "step": 50 }, { "epoch": 0.34334763948497854, "grad_norm": 0.6177949791797019, "learning_rate": 7.900284547855992e-06, "loss": 0.3662, "step": 60 }, { "epoch": 0.4005722460658083, "grad_norm": 0.6776944915753278, "learning_rate": 7.0770750650094335e-06, "loss": 0.3658, "step": 70 }, { "epoch": 0.45779685264663805, "grad_norm": 0.7027875485112575, "learning_rate": 6.178794677547138e-06, "loss": 0.3553, "step": 80 }, { "epoch": 0.5150214592274678, "grad_norm": 0.6301404025031555, "learning_rate": 5.237909579118713e-06, "loss": 0.3317, "step": 90 }, { "epoch": 0.5722460658082976, "grad_norm": 0.6840316408079424, "learning_rate": 4.2884258086335755e-06, "loss": 0.3428, "step": 100 }, { "epoch": 0.6294706723891274, "grad_norm": 0.6028809547153681, "learning_rate": 3.3646601834128924e-06, "loss": 0.331, "step": 110 }, { "epoch": 0.6866952789699571, "grad_norm": 0.6463003155720976, "learning_rate": 2.5000000000000015e-06, "loss": 0.3252, "step": 120 }, { "epoch": 0.7439198855507868, "grad_norm": 0.5511586120399353, "learning_rate": 1.7256963302735752e-06, "loss": 0.3167, "step": 130 }, { "epoch": 0.8011444921316166, "grad_norm": 0.6770595845177203, "learning_rate": 1.0697345262860638e-06, "loss": 0.326, "step": 140 }, { "epoch": 0.8583690987124464, "grad_norm": 0.5474842641924246, "learning_rate": 5.558227567253832e-07, "loss": 0.3133, "step": 150 }, { "epoch": 0.9155937052932761, "grad_norm": 0.5872523545390601, "learning_rate": 2.0253513192751374e-07, "loss": 0.3195, "step": 160 }, { "epoch": 0.9728183118741058, "grad_norm": 0.5924289357218945, "learning_rate": 2.264038713457706e-08, "loss": 0.3216, "step": 170 }, { "epoch": 0.9957081545064378, "step": 174, "total_flos": 599683779002368.0, "train_loss": 0.40995535357245083, "train_runtime": 10019.3178, "train_samples_per_second": 2.232, "train_steps_per_second": 0.017 } ], "logging_steps": 10, "max_steps": 174, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 599683779002368.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }