{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.12975523444411677, "eval_steps": 500, "global_step": 110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011795930404010616, "grad_norm": 1.6082771771683364, "learning_rate": 1.0588235294117648e-06, "loss": 0.7619158267974854, "step": 10 }, { "epoch": 0.023591860808021232, "grad_norm": 0.7435107194033177, "learning_rate": 2.2352941176470592e-06, "loss": 0.6887937545776367, "step": 20 }, { "epoch": 0.03538779121203185, "grad_norm": 0.3929338257386395, "learning_rate": 3.4117647058823532e-06, "loss": 0.6154314994812011, "step": 30 }, { "epoch": 0.047183721616042465, "grad_norm": 0.294584784052743, "learning_rate": 4.588235294117647e-06, "loss": 0.570392656326294, "step": 40 }, { "epoch": 0.058979652020053085, "grad_norm": 0.26241333881617407, "learning_rate": 5.764705882352941e-06, "loss": 0.5440690040588378, "step": 50 }, { "epoch": 0.0707755824240637, "grad_norm": 0.22176527857340889, "learning_rate": 6.941176470588236e-06, "loss": 0.52490234375, "step": 60 }, { "epoch": 0.08257151282807432, "grad_norm": 0.48251093193969485, "learning_rate": 8.11764705882353e-06, "loss": 0.5174517631530762, "step": 70 }, { "epoch": 0.09436744323208493, "grad_norm": 0.2301578989148145, "learning_rate": 9.294117647058824e-06, "loss": 0.5046623229980469, "step": 80 }, { "epoch": 0.10616337363609554, "grad_norm": 0.24175747960955796, "learning_rate": 9.999321888914837e-06, "loss": 0.4992623805999756, "step": 90 }, { "epoch": 0.11795930404010617, "grad_norm": 0.2661087745214427, "learning_rate": 9.991695251414584e-06, "loss": 0.493220853805542, "step": 100 }, { "epoch": 0.12975523444411677, "grad_norm": 0.2088421316748935, "learning_rate": 9.975607308402101e-06, "loss": 0.488851261138916, "step": 110 } ], "logging_steps": 10, "max_steps": 848, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.5436128954498417e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }