{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 587, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03411513859275053, "grad_norm": 8.578770637512207, "learning_rate": 4.838160136286201e-05, "loss": 3.8842, "step": 20 }, { "epoch": 0.06823027718550106, "grad_norm": 8.367802619934082, "learning_rate": 4.667802385008518e-05, "loss": 2.9618, "step": 40 }, { "epoch": 0.1023454157782516, "grad_norm": 0.27688243985176086, "learning_rate": 4.4974446337308354e-05, "loss": 1.5431, "step": 60 }, { "epoch": 0.13646055437100213, "grad_norm": 0.3149465024471283, "learning_rate": 4.3270868824531515e-05, "loss": 1.6255, "step": 80 }, { "epoch": 0.17057569296375266, "grad_norm": 0.41583290696144104, "learning_rate": 4.1567291311754684e-05, "loss": 1.5965, "step": 100 }, { "epoch": 0.2046908315565032, "grad_norm": 0.26143890619277954, "learning_rate": 3.986371379897785e-05, "loss": 1.6496, "step": 120 }, { "epoch": 0.23880597014925373, "grad_norm": 0.2798439562320709, "learning_rate": 3.816013628620103e-05, "loss": 1.7672, "step": 140 }, { "epoch": 0.27292110874200426, "grad_norm": 0.387648344039917, "learning_rate": 3.6456558773424196e-05, "loss": 1.5012, "step": 160 }, { "epoch": 0.3070362473347548, "grad_norm": 0.4054558277130127, "learning_rate": 3.475298126064736e-05, "loss": 1.4026, "step": 180 }, { "epoch": 0.3411513859275053, "grad_norm": 0.29433342814445496, "learning_rate": 3.3049403747870526e-05, "loss": 1.4018, "step": 200 }, { "epoch": 0.3752665245202559, "grad_norm": 0.3762003481388092, "learning_rate": 3.13458262350937e-05, "loss": 1.6192, "step": 220 }, { "epoch": 0.4093816631130064, "grad_norm": 0.46877652406692505, "learning_rate": 2.9642248722316867e-05, "loss": 1.6039, "step": 240 }, { "epoch": 0.44349680170575695, "grad_norm": 0.39935043454170227, "learning_rate": 2.7938671209540035e-05, "loss": 1.7791, "step": 260 }, { "epoch": 0.47761194029850745, "grad_norm": 0.4038810729980469, "learning_rate": 2.6235093696763204e-05, "loss": 1.8276, "step": 280 }, { "epoch": 0.511727078891258, "grad_norm": 0.4830745458602905, "learning_rate": 2.4531516183986372e-05, "loss": 1.5808, "step": 300 }, { "epoch": 0.5458422174840085, "grad_norm": 0.3768492639064789, "learning_rate": 2.2827938671209544e-05, "loss": 1.5571, "step": 320 }, { "epoch": 0.579957356076759, "grad_norm": 0.4571619927883148, "learning_rate": 2.112436115843271e-05, "loss": 1.8501, "step": 340 }, { "epoch": 0.6140724946695096, "grad_norm": 0.4635985791683197, "learning_rate": 1.942078364565588e-05, "loss": 1.9233, "step": 360 }, { "epoch": 0.6481876332622601, "grad_norm": 0.3889450132846832, "learning_rate": 1.7717206132879046e-05, "loss": 1.8055, "step": 380 }, { "epoch": 0.6823027718550106, "grad_norm": 0.4540878236293793, "learning_rate": 1.6013628620102218e-05, "loss": 1.6003, "step": 400 }, { "epoch": 0.7164179104477612, "grad_norm": 0.33580684661865234, "learning_rate": 1.4310051107325385e-05, "loss": 1.8662, "step": 420 }, { "epoch": 0.7505330490405118, "grad_norm": 0.35709238052368164, "learning_rate": 1.2606473594548551e-05, "loss": 1.3334, "step": 440 }, { "epoch": 0.7846481876332623, "grad_norm": 0.3105335235595703, "learning_rate": 1.0902896081771721e-05, "loss": 1.6746, "step": 460 }, { "epoch": 0.8187633262260128, "grad_norm": 0.35830241441726685, "learning_rate": 9.19931856899489e-06, "loss": 1.5783, "step": 480 }, { "epoch": 0.8528784648187633, "grad_norm": 0.47154855728149414, "learning_rate": 7.495741056218058e-06, "loss": 1.4601, "step": 500 }, { "epoch": 0.8869936034115139, "grad_norm": 0.4667692184448242, "learning_rate": 5.792163543441227e-06, "loss": 1.4256, "step": 520 }, { "epoch": 0.9211087420042644, "grad_norm": 0.48983854055404663, "learning_rate": 4.088586030664395e-06, "loss": 1.6231, "step": 540 }, { "epoch": 0.9552238805970149, "grad_norm": 0.40086817741394043, "learning_rate": 2.3850085178875642e-06, "loss": 1.6176, "step": 560 }, { "epoch": 0.9893390191897654, "grad_norm": 0.46825674176216125, "learning_rate": 6.814310051107326e-07, "loss": 1.7415, "step": 580 } ], "logging_steps": 20, "max_steps": 587, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.367904884228096e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }