{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022222222222222223, "grad_norm": 33.61671447753906, "learning_rate": 2e-05, "loss": 5.4203, "mean_token_accuracy": 0.21561338007450104, "num_tokens": 1071.0, "step": 1 }, { "epoch": 1.0, "grad_norm": 39.851768493652344, "learning_rate": 1.8044444444444445e-05, "loss": 2.8638, "mean_token_accuracy": 0.4754302335733717, "num_tokens": 72222.0, "step": 45 }, { "epoch": 2.0, "grad_norm": 23.199281692504883, "learning_rate": 1.6044444444444444e-05, "loss": 1.3918, "mean_token_accuracy": 0.6957559704780578, "num_tokens": 144444.0, "step": 90 }, { "epoch": 3.0, "grad_norm": 11.660584449768066, "learning_rate": 1.4044444444444445e-05, "loss": 0.6698, "mean_token_accuracy": 0.8350067456563314, "num_tokens": 216666.0, "step": 135 }, { "epoch": 4.0, "grad_norm": 4.399273872375488, "learning_rate": 1.2044444444444445e-05, "loss": 0.3166, "mean_token_accuracy": 0.9196972608566284, "num_tokens": 288888.0, "step": 180 }, { "epoch": 5.0, "grad_norm": 13.878437995910645, "learning_rate": 1.0044444444444446e-05, "loss": 0.1585, "mean_token_accuracy": 0.9616732305950588, "num_tokens": 361110.0, "step": 225 }, { "epoch": 6.0, "grad_norm": 7.022050380706787, "learning_rate": 8.044444444444444e-06, "loss": 0.0867, "mean_token_accuracy": 0.9774630414115058, "num_tokens": 433332.0, "step": 270 }, { "epoch": 7.0, "grad_norm": 2.5981574058532715, "learning_rate": 6.044444444444445e-06, "loss": 0.0484, "mean_token_accuracy": 0.9874586674902174, "num_tokens": 505554.0, "step": 315 }, { "epoch": 8.0, "grad_norm": 9.60135555267334, "learning_rate": 4.044444444444445e-06, "loss": 0.0327, "mean_token_accuracy": 0.9892550269762675, "num_tokens": 577776.0, "step": 360 }, { "epoch": 9.0, "grad_norm": 4.281480312347412, "learning_rate": 2.0444444444444447e-06, "loss": 0.0256, "mean_token_accuracy": 0.9917015499538846, "num_tokens": 649998.0, "step": 405 }, { "epoch": 10.0, "grad_norm": 0.3038378059864044, "learning_rate": 4.444444444444445e-08, "loss": 0.0162, "mean_token_accuracy": 0.9933217287063598, "num_tokens": 722220.0, "step": 450 }, { "epoch": 10.0, "step": 450, "total_flos": 3625341809836032.0, "train_loss": 0.5666879651281569, "train_runtime": 426.1414, "train_samples_per_second": 8.354, "train_steps_per_second": 1.056 } ], "logging_steps": 10, "max_steps": 450, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3625341809836032.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }