{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5877457310948486, "eval_steps": 32, "global_step": 8, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07346821638685608, "grad_norm": 52224.03643740955, "learning_rate": 3e-06, "loss": 1232.5903, "mean_token_accuracy": 0.5698598268500064, "num_tokens": 2439700.0, "step": 1 }, { "epoch": 0.14693643277371216, "grad_norm": 24564.28249818258, "learning_rate": 2.7692307692307693e-06, "loss": 1012.342, "mean_token_accuracy": 0.5935460063046776, "num_tokens": 4818537.0, "step": 2 }, { "epoch": 0.22040464916056823, "grad_norm": 10030.685256076777, "learning_rate": 2.5384615384615385e-06, "loss": 771.7506, "mean_token_accuracy": 0.6206552012590691, "num_tokens": 7222999.0, "step": 3 }, { "epoch": 0.2938728655474243, "grad_norm": 9797.578497343955, "learning_rate": 2.307692307692308e-06, "loss": 673.8949, "mean_token_accuracy": 0.6229124862584285, "num_tokens": 9603627.0, "step": 4 }, { "epoch": 0.3673410819342804, "grad_norm": 6906.536059477202, "learning_rate": 2.076923076923077e-06, "loss": 647.7118, "mean_token_accuracy": 0.6252844646223821, "num_tokens": 12031078.0, "step": 5 }, { "epoch": 0.44080929832113647, "grad_norm": 4121.501080449312, "learning_rate": 1.8461538461538462e-06, "loss": 616.8495, "mean_token_accuracy": 0.6341593922115862, "num_tokens": 14403431.0, "step": 6 }, { "epoch": 0.5142775147079925, "grad_norm": 2026.657249791317, "learning_rate": 1.6153846153846154e-06, "loss": 601.5251, "mean_token_accuracy": 0.6352307246997952, "num_tokens": 16811979.0, "step": 7 }, { "epoch": 0.5877457310948486, "grad_norm": 1892.3110141436696, "learning_rate": 1.3846153846153846e-06, "loss": 577.3281, "mean_token_accuracy": 0.6342741788248532, "num_tokens": 19217660.0, "step": 8 } ], "logging_steps": 1, "max_steps": 13, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 8, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 128626689310720.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }