| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 80, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012578616352201259, | |
| "grad_norm": 9.856545448303223, | |
| "learning_rate": 6.25e-08, | |
| "loss": 0.1027, | |
| "num_input_tokens_seen": 16384, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.06289308176100629, | |
| "grad_norm": 11.089835166931152, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "loss": 0.1198, | |
| "num_input_tokens_seen": 76168, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.12578616352201258, | |
| "grad_norm": 15.803651809692383, | |
| "learning_rate": 6.249999999999999e-07, | |
| "loss": 0.1383, | |
| "num_input_tokens_seen": 149912, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 22.760234832763672, | |
| "learning_rate": 9.374999999999999e-07, | |
| "loss": 0.248, | |
| "num_input_tokens_seen": 226896, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.25157232704402516, | |
| "grad_norm": 19.20156478881836, | |
| "learning_rate": 9.980434110374724e-07, | |
| "loss": 0.1197, | |
| "num_input_tokens_seen": 300528, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.31446540880503143, | |
| "grad_norm": 7.172516345977783, | |
| "learning_rate": 9.901210054809014e-07, | |
| "loss": 0.0924, | |
| "num_input_tokens_seen": 374912, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 25.888011932373047, | |
| "learning_rate": 9.762072666790656e-07, | |
| "loss": 0.1603, | |
| "num_input_tokens_seen": 453440, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.44025157232704404, | |
| "grad_norm": 15.700849533081055, | |
| "learning_rate": 9.564722788456943e-07, | |
| "loss": 0.1709, | |
| "num_input_tokens_seen": 528608, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.5031446540880503, | |
| "grad_norm": 5.688323497772217, | |
| "learning_rate": 9.311572862600138e-07, | |
| "loss": 0.0813, | |
| "num_input_tokens_seen": 602320, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 7.077651023864746, | |
| "learning_rate": 9.005717442503739e-07, | |
| "loss": 0.173, | |
| "num_input_tokens_seen": 677240, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6289308176100629, | |
| "grad_norm": 7.772736549377441, | |
| "learning_rate": 8.650895363529172e-07, | |
| "loss": 0.1132, | |
| "num_input_tokens_seen": 754944, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6918238993710691, | |
| "grad_norm": 10.398797035217285, | |
| "learning_rate": 8.251444038874685e-07, | |
| "loss": 0.1637, | |
| "num_input_tokens_seen": 832976, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 8.86781120300293, | |
| "learning_rate": 7.812246438203903e-07, | |
| "loss": 0.0767, | |
| "num_input_tokens_seen": 909744, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8176100628930818, | |
| "grad_norm": 7.527681827545166, | |
| "learning_rate": 7.338671397287408e-07, | |
| "loss": 0.1128, | |
| "num_input_tokens_seen": 986672, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8805031446540881, | |
| "grad_norm": 12.296772956848145, | |
| "learning_rate": 6.836507988323784e-07, | |
| "loss": 0.155, | |
| "num_input_tokens_seen": 1063256, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 5.3815460205078125, | |
| "learning_rate": 6.311894753209895e-07, | |
| "loss": 0.1218, | |
| "num_input_tokens_seen": 1139944, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.86342191696167, | |
| "learning_rate": 5.771244664826511e-07, | |
| "loss": 0.0685, | |
| "num_input_tokens_seen": 1209864, | |
| "step": 80 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 158, | |
| "num_input_tokens_seen": 1209864, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6516860485632.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |