{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 129, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007751937984496124, "grad_norm": 18.670406341552734, "learning_rate": 1.5384615384615387e-06, "loss": 0.7022, "step": 1 }, { "epoch": 0.03875968992248062, "grad_norm": 27.956096649169922, "learning_rate": 7.692307692307694e-06, "loss": 0.6412, "step": 5 }, { "epoch": 0.07751937984496124, "grad_norm": 6.442818641662598, "learning_rate": 1.5384615384615387e-05, "loss": 0.5704, "step": 10 }, { "epoch": 0.11627906976744186, "grad_norm": 5.311695098876953, "learning_rate": 1.998533413851124e-05, "loss": 0.5628, "step": 15 }, { "epoch": 0.15503875968992248, "grad_norm": 2.338909149169922, "learning_rate": 1.982083682742156e-05, "loss": 0.5298, "step": 20 }, { "epoch": 0.1937984496124031, "grad_norm": 3.053166151046753, "learning_rate": 1.9476531711828027e-05, "loss": 0.5377, "step": 25 }, { "epoch": 0.23255813953488372, "grad_norm": 1.823050618171692, "learning_rate": 1.8958722607586883e-05, "loss": 0.5015, "step": 30 }, { "epoch": 0.2713178294573643, "grad_norm": 2.082890033721924, "learning_rate": 1.827688998156891e-05, "loss": 0.5019, "step": 35 }, { "epoch": 0.31007751937984496, "grad_norm": 1.6335759162902832, "learning_rate": 1.7443517375622706e-05, "loss": 0.4828, "step": 40 }, { "epoch": 0.3488372093023256, "grad_norm": 1.522133469581604, "learning_rate": 1.647386284781828e-05, "loss": 0.4751, "step": 45 }, { "epoch": 0.3875968992248062, "grad_norm": 1.3869783878326416, "learning_rate": 1.5385679615609045e-05, "loss": 0.4726, "step": 50 }, { "epoch": 0.4263565891472868, "grad_norm": 1.2799592018127441, "learning_rate": 1.4198891015602648e-05, "loss": 0.4608, "step": 55 }, { "epoch": 0.46511627906976744, "grad_norm": 1.2317698001861572, "learning_rate": 1.2935225731039349e-05, "loss": 0.4507, "step": 60 }, { "epoch": 0.5038759689922481, "grad_norm": 13.605137825012207, "learning_rate": 1.161781996552765e-05, "loss": 0.4503, "step": 65 }, { "epoch": 0.5426356589147286, "grad_norm": 1.201204776763916, "learning_rate": 1.0270793846761347e-05, "loss": 0.4423, "step": 70 }, { "epoch": 0.5813953488372093, "grad_norm": 1.115617036819458, "learning_rate": 8.918809815760585e-06, "loss": 0.4404, "step": 75 }, { "epoch": 0.6201550387596899, "grad_norm": 1.1243541240692139, "learning_rate": 7.586621087002945e-06, "loss": 0.4309, "step": 80 }, { "epoch": 0.6589147286821705, "grad_norm": 1.0983189344406128, "learning_rate": 6.298618446600856e-06, "loss": 0.4217, "step": 85 }, { "epoch": 0.6976744186046512, "grad_norm": 0.9838166832923889, "learning_rate": 5.078383686109927e-06, "loss": 0.4165, "step": 90 }, { "epoch": 0.7364341085271318, "grad_norm": 1.0262510776519775, "learning_rate": 3.948257848062351e-06, "loss": 0.4057, "step": 95 }, { "epoch": 0.7751937984496124, "grad_norm": 1.020778775215149, "learning_rate": 2.9289321881345257e-06, "loss": 0.3977, "step": 100 }, { "epoch": 0.813953488372093, "grad_norm": 0.9869725108146667, "learning_rate": 2.0390693429435626e-06, "loss": 0.4028, "step": 105 }, { "epoch": 0.8527131782945736, "grad_norm": 0.9537738561630249, "learning_rate": 1.2949616394382802e-06, "loss": 0.3927, "step": 110 }, { "epoch": 0.8914728682170543, "grad_norm": 0.9723293781280518, "learning_rate": 7.102328018320859e-07, "loss": 0.3931, "step": 115 }, { "epoch": 0.9302325581395349, "grad_norm": 0.9556717872619629, "learning_rate": 2.955885174678852e-07, "loss": 0.3904, "step": 120 }, { "epoch": 0.9689922480620154, "grad_norm": 0.9231972694396973, "learning_rate": 5.862042845640403e-08, "loss": 0.3911, "step": 125 }, { "epoch": 1.0, "eval_loss": 0.30268725752830505, "eval_runtime": 1.3209, "eval_samples_per_second": 0.757, "eval_steps_per_second": 0.757, "step": 129 }, { "epoch": 1.0, "step": 129, "total_flos": 54019951165440.0, "train_loss": 0.46054963455643766, "train_runtime": 2340.6782, "train_samples_per_second": 7.036, "train_steps_per_second": 0.055 } ], "logging_steps": 5, "max_steps": 129, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 54019951165440.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }