{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012578616352201259, "grad_norm": 9.856545448303223, "learning_rate": 6.25e-08, "loss": 0.1027, "num_input_tokens_seen": 16384, "step": 1 }, { "epoch": 0.06289308176100629, "grad_norm": 11.089835166931152, "learning_rate": 3.1249999999999997e-07, "loss": 0.1198, "num_input_tokens_seen": 76168, "step": 5 }, { "epoch": 0.12578616352201258, "grad_norm": 15.803651809692383, "learning_rate": 6.249999999999999e-07, "loss": 0.1383, "num_input_tokens_seen": 149912, "step": 10 }, { "epoch": 0.18867924528301888, "grad_norm": 22.760234832763672, "learning_rate": 9.374999999999999e-07, "loss": 0.248, "num_input_tokens_seen": 226896, "step": 15 }, { "epoch": 0.25157232704402516, "grad_norm": 19.20156478881836, "learning_rate": 9.980434110374724e-07, "loss": 0.1197, "num_input_tokens_seen": 300528, "step": 20 }, { "epoch": 0.31446540880503143, "grad_norm": 7.172516345977783, "learning_rate": 9.901210054809014e-07, "loss": 0.0924, "num_input_tokens_seen": 374912, "step": 25 }, { "epoch": 0.37735849056603776, "grad_norm": 25.888011932373047, "learning_rate": 9.762072666790656e-07, "loss": 0.1603, "num_input_tokens_seen": 453440, "step": 30 }, { "epoch": 0.44025157232704404, "grad_norm": 15.700849533081055, "learning_rate": 9.564722788456943e-07, "loss": 0.1709, "num_input_tokens_seen": 528608, "step": 35 }, { "epoch": 0.5031446540880503, "grad_norm": 5.688323497772217, "learning_rate": 9.311572862600138e-07, "loss": 0.0813, "num_input_tokens_seen": 602320, "step": 40 }, { "epoch": 0.5660377358490566, "grad_norm": 7.077651023864746, "learning_rate": 9.005717442503739e-07, "loss": 0.173, "num_input_tokens_seen": 677240, "step": 45 }, { "epoch": 0.6289308176100629, "grad_norm": 7.772736549377441, "learning_rate": 8.650895363529172e-07, "loss": 0.1132, "num_input_tokens_seen": 754944, "step": 50 }, { "epoch": 0.6918238993710691, "grad_norm": 10.398797035217285, "learning_rate": 8.251444038874685e-07, "loss": 0.1637, "num_input_tokens_seen": 832976, "step": 55 }, { "epoch": 0.7547169811320755, "grad_norm": 8.86781120300293, "learning_rate": 7.812246438203903e-07, "loss": 0.0767, "num_input_tokens_seen": 909744, "step": 60 }, { "epoch": 0.8176100628930818, "grad_norm": 7.527681827545166, "learning_rate": 7.338671397287408e-07, "loss": 0.1128, "num_input_tokens_seen": 986672, "step": 65 }, { "epoch": 0.8805031446540881, "grad_norm": 12.296772956848145, "learning_rate": 6.836507988323784e-07, "loss": 0.155, "num_input_tokens_seen": 1063256, "step": 70 }, { "epoch": 0.9433962264150944, "grad_norm": 5.3815460205078125, "learning_rate": 6.311894753209895e-07, "loss": 0.1218, "num_input_tokens_seen": 1139944, "step": 75 }, { "epoch": 1.0, "grad_norm": 3.86342191696167, "learning_rate": 5.771244664826511e-07, "loss": 0.0685, "num_input_tokens_seen": 1209864, "step": 80 } ], "logging_steps": 5, "max_steps": 158, "num_input_tokens_seen": 1209864, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6516860485632.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }