| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 587, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03411513859275053, |
| "grad_norm": 8.578770637512207, |
| "learning_rate": 4.838160136286201e-05, |
| "loss": 3.8842, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06823027718550106, |
| "grad_norm": 8.367802619934082, |
| "learning_rate": 4.667802385008518e-05, |
| "loss": 2.9618, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1023454157782516, |
| "grad_norm": 0.27688243985176086, |
| "learning_rate": 4.4974446337308354e-05, |
| "loss": 1.5431, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13646055437100213, |
| "grad_norm": 0.3149465024471283, |
| "learning_rate": 4.3270868824531515e-05, |
| "loss": 1.6255, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17057569296375266, |
| "grad_norm": 0.41583290696144104, |
| "learning_rate": 4.1567291311754684e-05, |
| "loss": 1.5965, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2046908315565032, |
| "grad_norm": 0.26143890619277954, |
| "learning_rate": 3.986371379897785e-05, |
| "loss": 1.6496, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.23880597014925373, |
| "grad_norm": 0.2798439562320709, |
| "learning_rate": 3.816013628620103e-05, |
| "loss": 1.7672, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.27292110874200426, |
| "grad_norm": 0.387648344039917, |
| "learning_rate": 3.6456558773424196e-05, |
| "loss": 1.5012, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3070362473347548, |
| "grad_norm": 0.4054558277130127, |
| "learning_rate": 3.475298126064736e-05, |
| "loss": 1.4026, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3411513859275053, |
| "grad_norm": 0.29433342814445496, |
| "learning_rate": 3.3049403747870526e-05, |
| "loss": 1.4018, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3752665245202559, |
| "grad_norm": 0.3762003481388092, |
| "learning_rate": 3.13458262350937e-05, |
| "loss": 1.6192, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4093816631130064, |
| "grad_norm": 0.46877652406692505, |
| "learning_rate": 2.9642248722316867e-05, |
| "loss": 1.6039, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.44349680170575695, |
| "grad_norm": 0.39935043454170227, |
| "learning_rate": 2.7938671209540035e-05, |
| "loss": 1.7791, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.47761194029850745, |
| "grad_norm": 0.4038810729980469, |
| "learning_rate": 2.6235093696763204e-05, |
| "loss": 1.8276, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.511727078891258, |
| "grad_norm": 0.4830745458602905, |
| "learning_rate": 2.4531516183986372e-05, |
| "loss": 1.5808, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5458422174840085, |
| "grad_norm": 0.3768492639064789, |
| "learning_rate": 2.2827938671209544e-05, |
| "loss": 1.5571, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.579957356076759, |
| "grad_norm": 0.4571619927883148, |
| "learning_rate": 2.112436115843271e-05, |
| "loss": 1.8501, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6140724946695096, |
| "grad_norm": 0.4635985791683197, |
| "learning_rate": 1.942078364565588e-05, |
| "loss": 1.9233, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6481876332622601, |
| "grad_norm": 0.3889450132846832, |
| "learning_rate": 1.7717206132879046e-05, |
| "loss": 1.8055, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6823027718550106, |
| "grad_norm": 0.4540878236293793, |
| "learning_rate": 1.6013628620102218e-05, |
| "loss": 1.6003, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7164179104477612, |
| "grad_norm": 0.33580684661865234, |
| "learning_rate": 1.4310051107325385e-05, |
| "loss": 1.8662, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7505330490405118, |
| "grad_norm": 0.35709238052368164, |
| "learning_rate": 1.2606473594548551e-05, |
| "loss": 1.3334, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7846481876332623, |
| "grad_norm": 0.3105335235595703, |
| "learning_rate": 1.0902896081771721e-05, |
| "loss": 1.6746, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8187633262260128, |
| "grad_norm": 0.35830241441726685, |
| "learning_rate": 9.19931856899489e-06, |
| "loss": 1.5783, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8528784648187633, |
| "grad_norm": 0.47154855728149414, |
| "learning_rate": 7.495741056218058e-06, |
| "loss": 1.4601, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8869936034115139, |
| "grad_norm": 0.4667692184448242, |
| "learning_rate": 5.792163543441227e-06, |
| "loss": 1.4256, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9211087420042644, |
| "grad_norm": 0.48983854055404663, |
| "learning_rate": 4.088586030664395e-06, |
| "loss": 1.6231, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9552238805970149, |
| "grad_norm": 0.40086817741394043, |
| "learning_rate": 2.3850085178875642e-06, |
| "loss": 1.6176, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.9893390191897654, |
| "grad_norm": 0.46825674176216125, |
| "learning_rate": 6.814310051107326e-07, |
| "loss": 1.7415, |
| "step": 580 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 587, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.367904884228096e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|