| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9982905982905983, |
| "eval_steps": 500, |
| "global_step": 438, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.022792022792022793, |
| "grad_norm": 2.218597567152377, |
| "learning_rate": 5e-06, |
| "loss": 0.7544, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.045584045584045586, |
| "grad_norm": 0.8186028537973438, |
| "learning_rate": 5e-06, |
| "loss": 0.6997, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06837606837606838, |
| "grad_norm": 0.9212941016288362, |
| "learning_rate": 5e-06, |
| "loss": 0.6798, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09116809116809117, |
| "grad_norm": 0.8748927154749253, |
| "learning_rate": 5e-06, |
| "loss": 0.68, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11396011396011396, |
| "grad_norm": 0.8185828012496023, |
| "learning_rate": 5e-06, |
| "loss": 0.6762, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13675213675213677, |
| "grad_norm": 0.7047820637148428, |
| "learning_rate": 5e-06, |
| "loss": 0.6559, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15954415954415954, |
| "grad_norm": 0.4630577367491141, |
| "learning_rate": 5e-06, |
| "loss": 0.6526, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18233618233618235, |
| "grad_norm": 0.37711272448868094, |
| "learning_rate": 5e-06, |
| "loss": 0.6554, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 0.3202737721386268, |
| "learning_rate": 5e-06, |
| "loss": 0.64, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.22792022792022792, |
| "grad_norm": 0.29895290557822196, |
| "learning_rate": 5e-06, |
| "loss": 0.6413, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.25071225071225073, |
| "grad_norm": 0.3339564719104408, |
| "learning_rate": 5e-06, |
| "loss": 0.6326, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.27350427350427353, |
| "grad_norm": 0.3089711327253267, |
| "learning_rate": 5e-06, |
| "loss": 0.6408, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 0.2880064692869082, |
| "learning_rate": 5e-06, |
| "loss": 0.6417, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3190883190883191, |
| "grad_norm": 0.3066866041749207, |
| "learning_rate": 5e-06, |
| "loss": 0.6439, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 0.3183377069071228, |
| "learning_rate": 5e-06, |
| "loss": 0.6364, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3646723646723647, |
| "grad_norm": 0.30389279648516754, |
| "learning_rate": 5e-06, |
| "loss": 0.6415, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.38746438746438744, |
| "grad_norm": 0.34515965846333546, |
| "learning_rate": 5e-06, |
| "loss": 0.6333, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 0.3010238903973123, |
| "learning_rate": 5e-06, |
| "loss": 0.6389, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.43304843304843305, |
| "grad_norm": 0.305044869326132, |
| "learning_rate": 5e-06, |
| "loss": 0.6314, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.45584045584045585, |
| "grad_norm": 0.30840519078259393, |
| "learning_rate": 5e-06, |
| "loss": 0.6395, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.47863247863247865, |
| "grad_norm": 0.30681357495275924, |
| "learning_rate": 5e-06, |
| "loss": 0.6358, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5014245014245015, |
| "grad_norm": 0.30336186343842153, |
| "learning_rate": 5e-06, |
| "loss": 0.6395, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5242165242165242, |
| "grad_norm": 0.3283645936629147, |
| "learning_rate": 5e-06, |
| "loss": 0.6351, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5470085470085471, |
| "grad_norm": 0.3041964089929852, |
| "learning_rate": 5e-06, |
| "loss": 0.631, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5698005698005698, |
| "grad_norm": 0.33949867440584647, |
| "learning_rate": 5e-06, |
| "loss": 0.6359, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.30499195934646295, |
| "learning_rate": 5e-06, |
| "loss": 0.6341, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.31613496109824796, |
| "learning_rate": 5e-06, |
| "loss": 0.6313, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6381766381766382, |
| "grad_norm": 0.31969719335542396, |
| "learning_rate": 5e-06, |
| "loss": 0.642, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6609686609686609, |
| "grad_norm": 0.3186872465072314, |
| "learning_rate": 5e-06, |
| "loss": 0.6307, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 0.2888007951280724, |
| "learning_rate": 5e-06, |
| "loss": 0.6287, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7065527065527065, |
| "grad_norm": 0.2960253626480404, |
| "learning_rate": 5e-06, |
| "loss": 0.6286, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7293447293447294, |
| "grad_norm": 0.33915618291310873, |
| "learning_rate": 5e-06, |
| "loss": 0.6292, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7521367521367521, |
| "grad_norm": 0.30116887815816673, |
| "learning_rate": 5e-06, |
| "loss": 0.6258, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7749287749287749, |
| "grad_norm": 0.3333518580221403, |
| "learning_rate": 5e-06, |
| "loss": 0.6317, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7977207977207977, |
| "grad_norm": 0.33224367385448017, |
| "learning_rate": 5e-06, |
| "loss": 0.6387, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 0.3181916905648656, |
| "learning_rate": 5e-06, |
| "loss": 0.6305, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8433048433048433, |
| "grad_norm": 0.33030362566649507, |
| "learning_rate": 5e-06, |
| "loss": 0.6242, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8660968660968661, |
| "grad_norm": 0.3162880358649072, |
| "learning_rate": 5e-06, |
| "loss": 0.6365, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.3263181921886909, |
| "learning_rate": 5e-06, |
| "loss": 0.6351, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.9116809116809117, |
| "grad_norm": 0.2982532466684275, |
| "learning_rate": 5e-06, |
| "loss": 0.6383, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9344729344729344, |
| "grad_norm": 0.2890804672214108, |
| "learning_rate": 5e-06, |
| "loss": 0.6346, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9572649572649573, |
| "grad_norm": 0.300069760789381, |
| "learning_rate": 5e-06, |
| "loss": 0.6214, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.98005698005698, |
| "grad_norm": 0.34145032165166, |
| "learning_rate": 5e-06, |
| "loss": 0.6345, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9982905982905983, |
| "eval_loss": 0.6251269578933716, |
| "eval_runtime": 442.6816, |
| "eval_samples_per_second": 26.708, |
| "eval_steps_per_second": 0.418, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9982905982905983, |
| "step": 438, |
| "total_flos": 918231661412352.0, |
| "train_loss": 0.6429911312991625, |
| "train_runtime": 23746.3651, |
| "train_samples_per_second": 9.459, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 438, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 918231661412352.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|