{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 18, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18181818181818182, "grad_norm": 0.8233810663223267, "learning_rate": 0.0, "loss": 1.2926392555236816, "step": 1 }, { "epoch": 0.36363636363636365, "grad_norm": 0.8667462468147278, "learning_rate": 4e-05, "loss": 1.2933003902435303, "step": 2 }, { "epoch": 0.5454545454545454, "grad_norm": 0.9416549205780029, "learning_rate": 8e-05, "loss": 1.2474114894866943, "step": 3 }, { "epoch": 0.7272727272727273, "grad_norm": 0.9050902724266052, "learning_rate": 0.00012, "loss": 1.0754895210266113, "step": 4 }, { "epoch": 0.9090909090909091, "grad_norm": 0.6808173060417175, "learning_rate": 0.00016, "loss": 1.2081325054168701, "step": 5 }, { "epoch": 1.0, "grad_norm": 0.7349768280982971, "learning_rate": 0.0002, "loss": 0.9636205434799194, "step": 6 }, { "epoch": 1.1818181818181819, "grad_norm": 0.33241593837738037, "learning_rate": 0.00018461538461538463, "loss": 0.9576319456100464, "step": 7 }, { "epoch": 1.3636363636363638, "grad_norm": 0.334722101688385, "learning_rate": 0.00016923076923076923, "loss": 0.9906941056251526, "step": 8 }, { "epoch": 1.5454545454545454, "grad_norm": 0.32507219910621643, "learning_rate": 0.00015384615384615385, "loss": 0.8973167538642883, "step": 9 }, { "epoch": 1.7272727272727273, "grad_norm": 0.3215590715408325, "learning_rate": 0.00013846153846153847, "loss": 0.9241716265678406, "step": 10 }, { "epoch": 1.9090909090909092, "grad_norm": 0.32502657175064087, "learning_rate": 0.0001230769230769231, "loss": 0.8657209277153015, "step": 11 }, { "epoch": 2.0, "grad_norm": 0.4819266200065613, "learning_rate": 0.0001076923076923077, "loss": 0.9260194897651672, "step": 12 }, { "epoch": 2.1818181818181817, "grad_norm": 0.320608526468277, "learning_rate": 9.230769230769232e-05, "loss": 0.7931567430496216, "step": 13 }, { "epoch": 2.3636363636363638, "grad_norm": 0.34196072816848755, "learning_rate": 7.692307692307693e-05, "loss": 0.839251697063446, "step": 14 }, { "epoch": 2.5454545454545454, "grad_norm": 0.3709123432636261, "learning_rate": 6.153846153846155e-05, "loss": 0.9486470818519592, "step": 15 }, { "epoch": 2.7272727272727275, "grad_norm": 0.3574763536453247, "learning_rate": 4.615384615384616e-05, "loss": 0.7083936929702759, "step": 16 }, { "epoch": 2.909090909090909, "grad_norm": 0.4191477298736572, "learning_rate": 3.0769230769230774e-05, "loss": 0.6311583518981934, "step": 17 }, { "epoch": 3.0, "grad_norm": 0.5564912557601929, "learning_rate": 1.5384615384615387e-05, "loss": 0.7128921151161194, "step": 18 } ], "logging_steps": 1, "max_steps": 18, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 542381054054400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }