{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 74, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.0259285002946854, "epoch": 0.273972602739726, "grad_norm": 3.722426652908325, "learning_rate": 9e-06, "loss": 1.0839, "mean_token_accuracy": 0.761720848083496, "num_tokens": 584826.0, "step": 10 }, { "entropy": 0.9608730539679528, "epoch": 0.547945205479452, "grad_norm": 0.8895509839057922, "learning_rate": 9.934881598487478e-06, "loss": 0.9788, "mean_token_accuracy": 0.7810790002346039, "num_tokens": 1174174.0, "step": 20 }, { "entropy": 0.9163148060441018, "epoch": 0.821917808219178, "grad_norm": 0.802199125289917, "learning_rate": 9.711957702320176e-06, "loss": 0.9307, "mean_token_accuracy": 0.7890266820788383, "num_tokens": 1762094.0, "step": 30 }, { "entropy": 0.8868917273847681, "epoch": 1.0821917808219177, "grad_norm": 0.7817137837409973, "learning_rate": 9.337587608588588e-06, "loss": 0.8963, "mean_token_accuracy": 0.7947045470538893, "num_tokens": 2322415.0, "step": 40 }, { "entropy": 0.8300882771611213, "epoch": 1.356164383561644, "grad_norm": 0.6832650899887085, "learning_rate": 8.823803880137993e-06, "loss": 0.8464, "mean_token_accuracy": 0.8045218542218209, "num_tokens": 2910028.0, "step": 50 }, { "entropy": 0.8211456254124642, "epoch": 1.6301369863013697, "grad_norm": 0.6951064467430115, "learning_rate": 8.18711994874345e-06, "loss": 0.8352, "mean_token_accuracy": 0.8072828114032745, "num_tokens": 3495988.0, "step": 60 }, { "entropy": 0.8173969030380249, "epoch": 1.904109589041096, "grad_norm": 0.762572705745697, "learning_rate": 7.447999359825263e-06, "loss": 0.8315, "mean_token_accuracy": 0.8067627891898155, "num_tokens": 4083547.0, "step": 70 } ], "logging_steps": 10, "max_steps": 185, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.831815282786304e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }