{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.6287968158721924, "learning_rate": 9.783837236868609e-05, "loss": 2.0146, "step": 33 }, { "epoch": 2.0, "grad_norm": 1.897132158279419, "learning_rate": 9.100305426420956e-05, "loss": 0.8409, "step": 66 }, { "epoch": 3.0, "grad_norm": 2.1679365634918213, "learning_rate": 8.015407152327448e-05, "loss": 0.5425, "step": 99 }, { "epoch": 4.0, "grad_norm": 2.982478380203247, "learning_rate": 6.635339816587109e-05, "loss": 0.3347, "step": 132 }, { "epoch": 5.0, "grad_norm": 2.2915990352630615, "learning_rate": 5.095194025516733e-05, "loss": 0.2133, "step": 165 }, { "epoch": 6.0, "grad_norm": 2.5713918209075928, "learning_rate": 3.545729979973005e-05, "loss": 0.1354, "step": 198 }, { "epoch": 7.0, "grad_norm": 1.6528334617614746, "learning_rate": 2.1386200164845526e-05, "loss": 0.0824, "step": 231 }, { "epoch": 8.0, "grad_norm": 1.591257095336914, "learning_rate": 1.0116018621892237e-05, "loss": 0.0583, "step": 264 }, { "epoch": 9.0, "grad_norm": 0.5333567261695862, "learning_rate": 2.7499590642665774e-06, "loss": 0.0476, "step": 297 }, { "epoch": 10.0, "grad_norm": 0.3626488447189331, "learning_rate": 9.06272382371065e-09, "loss": 0.0434, "step": 330 } ], "logging_steps": 500, "max_steps": 330, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 920382499768320.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }