{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9982905982905983, "eval_steps": 500, "global_step": 438, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022792022792022793, "grad_norm": 2.218597567152377, "learning_rate": 5e-06, "loss": 0.7544, "step": 10 }, { "epoch": 0.045584045584045586, "grad_norm": 0.8186028537973438, "learning_rate": 5e-06, "loss": 0.6997, "step": 20 }, { "epoch": 0.06837606837606838, "grad_norm": 0.9212941016288362, "learning_rate": 5e-06, "loss": 0.6798, "step": 30 }, { "epoch": 0.09116809116809117, "grad_norm": 0.8748927154749253, "learning_rate": 5e-06, "loss": 0.68, "step": 40 }, { "epoch": 0.11396011396011396, "grad_norm": 0.8185828012496023, "learning_rate": 5e-06, "loss": 0.6762, "step": 50 }, { "epoch": 0.13675213675213677, "grad_norm": 0.7047820637148428, "learning_rate": 5e-06, "loss": 0.6559, "step": 60 }, { "epoch": 0.15954415954415954, "grad_norm": 0.4630577367491141, "learning_rate": 5e-06, "loss": 0.6526, "step": 70 }, { "epoch": 0.18233618233618235, "grad_norm": 0.37711272448868094, "learning_rate": 5e-06, "loss": 0.6554, "step": 80 }, { "epoch": 0.20512820512820512, "grad_norm": 0.3202737721386268, "learning_rate": 5e-06, "loss": 0.64, "step": 90 }, { "epoch": 0.22792022792022792, "grad_norm": 0.29895290557822196, "learning_rate": 5e-06, "loss": 0.6413, "step": 100 }, { "epoch": 0.25071225071225073, "grad_norm": 0.3339564719104408, "learning_rate": 5e-06, "loss": 0.6326, "step": 110 }, { "epoch": 0.27350427350427353, "grad_norm": 0.3089711327253267, "learning_rate": 5e-06, "loss": 0.6408, "step": 120 }, { "epoch": 0.2962962962962963, "grad_norm": 0.2880064692869082, "learning_rate": 5e-06, "loss": 0.6417, "step": 130 }, { "epoch": 0.3190883190883191, "grad_norm": 0.3066866041749207, "learning_rate": 5e-06, "loss": 0.6439, "step": 140 }, { "epoch": 0.3418803418803419, "grad_norm": 0.3183377069071228, "learning_rate": 5e-06, "loss": 0.6364, "step": 150 }, { "epoch": 0.3646723646723647, "grad_norm": 0.30389279648516754, "learning_rate": 5e-06, "loss": 0.6415, "step": 160 }, { "epoch": 0.38746438746438744, "grad_norm": 0.34515965846333546, "learning_rate": 5e-06, "loss": 0.6333, "step": 170 }, { "epoch": 0.41025641025641024, "grad_norm": 0.3010238903973123, "learning_rate": 5e-06, "loss": 0.6389, "step": 180 }, { "epoch": 0.43304843304843305, "grad_norm": 0.305044869326132, "learning_rate": 5e-06, "loss": 0.6314, "step": 190 }, { "epoch": 0.45584045584045585, "grad_norm": 0.30840519078259393, "learning_rate": 5e-06, "loss": 0.6395, "step": 200 }, { "epoch": 0.47863247863247865, "grad_norm": 0.30681357495275924, "learning_rate": 5e-06, "loss": 0.6358, "step": 210 }, { "epoch": 0.5014245014245015, "grad_norm": 0.30336186343842153, "learning_rate": 5e-06, "loss": 0.6395, "step": 220 }, { "epoch": 0.5242165242165242, "grad_norm": 0.3283645936629147, "learning_rate": 5e-06, "loss": 0.6351, "step": 230 }, { "epoch": 0.5470085470085471, "grad_norm": 0.3041964089929852, "learning_rate": 5e-06, "loss": 0.631, "step": 240 }, { "epoch": 0.5698005698005698, "grad_norm": 0.33949867440584647, "learning_rate": 5e-06, "loss": 0.6359, "step": 250 }, { "epoch": 0.5925925925925926, "grad_norm": 0.30499195934646295, "learning_rate": 5e-06, "loss": 0.6341, "step": 260 }, { "epoch": 0.6153846153846154, "grad_norm": 0.31613496109824796, "learning_rate": 5e-06, "loss": 0.6313, "step": 270 }, { "epoch": 0.6381766381766382, "grad_norm": 0.31969719335542396, "learning_rate": 5e-06, "loss": 0.642, "step": 280 }, { "epoch": 0.6609686609686609, "grad_norm": 0.3186872465072314, "learning_rate": 5e-06, "loss": 0.6307, "step": 290 }, { "epoch": 0.6837606837606838, "grad_norm": 0.2888007951280724, "learning_rate": 5e-06, "loss": 0.6287, "step": 300 }, { "epoch": 0.7065527065527065, "grad_norm": 0.2960253626480404, "learning_rate": 5e-06, "loss": 0.6286, "step": 310 }, { "epoch": 0.7293447293447294, "grad_norm": 0.33915618291310873, "learning_rate": 5e-06, "loss": 0.6292, "step": 320 }, { "epoch": 0.7521367521367521, "grad_norm": 0.30116887815816673, "learning_rate": 5e-06, "loss": 0.6258, "step": 330 }, { "epoch": 0.7749287749287749, "grad_norm": 0.3333518580221403, "learning_rate": 5e-06, "loss": 0.6317, "step": 340 }, { "epoch": 0.7977207977207977, "grad_norm": 0.33224367385448017, "learning_rate": 5e-06, "loss": 0.6387, "step": 350 }, { "epoch": 0.8205128205128205, "grad_norm": 0.3181916905648656, "learning_rate": 5e-06, "loss": 0.6305, "step": 360 }, { "epoch": 0.8433048433048433, "grad_norm": 0.33030362566649507, "learning_rate": 5e-06, "loss": 0.6242, "step": 370 }, { "epoch": 0.8660968660968661, "grad_norm": 0.3162880358649072, "learning_rate": 5e-06, "loss": 0.6365, "step": 380 }, { "epoch": 0.8888888888888888, "grad_norm": 0.3263181921886909, "learning_rate": 5e-06, "loss": 0.6351, "step": 390 }, { "epoch": 0.9116809116809117, "grad_norm": 0.2982532466684275, "learning_rate": 5e-06, "loss": 0.6383, "step": 400 }, { "epoch": 0.9344729344729344, "grad_norm": 0.2890804672214108, "learning_rate": 5e-06, "loss": 0.6346, "step": 410 }, { "epoch": 0.9572649572649573, "grad_norm": 0.300069760789381, "learning_rate": 5e-06, "loss": 0.6214, "step": 420 }, { "epoch": 0.98005698005698, "grad_norm": 0.34145032165166, "learning_rate": 5e-06, "loss": 0.6345, "step": 430 }, { "epoch": 0.9982905982905983, "eval_loss": 0.6251269578933716, "eval_runtime": 442.6816, "eval_samples_per_second": 26.708, "eval_steps_per_second": 0.418, "step": 438 }, { "epoch": 0.9982905982905983, "step": 438, "total_flos": 918231661412352.0, "train_loss": 0.6429911312991625, "train_runtime": 23746.3651, "train_samples_per_second": 9.459, "train_steps_per_second": 0.018 } ], "logging_steps": 10, "max_steps": 438, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 918231661412352.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }