{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08403361344537816, "grad_norm": 0.5671328902244568, "learning_rate": 8e-06, "loss": 1.9146, "step": 5 }, { "epoch": 0.16806722689075632, "grad_norm": 0.6045827269554138, "learning_rate": 1.8e-05, "loss": 1.8831, "step": 10 }, { "epoch": 0.25210084033613445, "grad_norm": 0.4676426351070404, "learning_rate": 2.8e-05, "loss": 1.8656, "step": 15 }, { "epoch": 0.33613445378151263, "grad_norm": 0.6086534857749939, "learning_rate": 2.998542122917149e-05, "loss": 1.7756, "step": 20 }, { "epoch": 0.42016806722689076, "grad_norm": 0.5000975131988525, "learning_rate": 2.9926243538175172e-05, "loss": 1.7644, "step": 25 }, { "epoch": 0.5042016806722689, "grad_norm": 0.4531285762786865, "learning_rate": 2.9821735336128774e-05, "loss": 1.7086, "step": 30 }, { "epoch": 0.5882352941176471, "grad_norm": 0.503158688545227, "learning_rate": 2.9672214011007087e-05, "loss": 1.6228, "step": 35 }, { "epoch": 0.6722689075630253, "grad_norm": 0.5503730177879333, "learning_rate": 2.947813365416023e-05, "loss": 1.541, "step": 40 }, { "epoch": 0.7563025210084033, "grad_norm": 0.5910710692405701, "learning_rate": 2.9240083681253192e-05, "loss": 1.5533, "step": 45 }, { "epoch": 0.8403361344537815, "grad_norm": 0.5865580439567566, "learning_rate": 2.895878704222978e-05, "loss": 1.5369, "step": 50 }, { "epoch": 0.9243697478991597, "grad_norm": 0.694772481918335, "learning_rate": 2.863509802573744e-05, "loss": 1.4103, "step": 55 }, { "epoch": 1.0, "grad_norm": 1.0787880420684814, "learning_rate": 2.826999966468069e-05, "loss": 1.4139, "step": 60 }, { "epoch": 1.084033613445378, "grad_norm": 0.8692999482154846, "learning_rate": 2.7864600750782507e-05, "loss": 1.2387, "step": 65 }, { "epoch": 1.1680672268907564, "grad_norm": 0.8634143471717834, "learning_rate": 2.74201324672203e-05, "loss": 1.23, "step": 70 }, { "epoch": 1.2521008403361344, "grad_norm": 1.0803030729293823, "learning_rate": 2.6937944649563078e-05, "loss": 1.2203, "step": 75 }, { "epoch": 1.3361344537815127, "grad_norm": 1.4019559621810913, "learning_rate": 2.641950168636517e-05, "loss": 1.1067, "step": 80 }, { "epoch": 1.4201680672268908, "grad_norm": 1.0336651802062988, "learning_rate": 2.5866378071866338e-05, "loss": 1.0889, "step": 85 }, { "epoch": 1.504201680672269, "grad_norm": 1.1765518188476562, "learning_rate": 2.52802536243045e-05, "loss": 1.05, "step": 90 }, { "epoch": 1.5882352941176472, "grad_norm": 1.146044135093689, "learning_rate": 2.4662908384362964e-05, "loss": 0.9963, "step": 95 }, { "epoch": 1.6722689075630253, "grad_norm": 1.2770051956176758, "learning_rate": 2.4016217209245377e-05, "loss": 0.9403, "step": 100 }, { "epoch": 1.7563025210084033, "grad_norm": 1.9435651302337646, "learning_rate": 2.3342144078796007e-05, "loss": 0.9242, "step": 105 }, { "epoch": 1.8403361344537816, "grad_norm": 1.3785291910171509, "learning_rate": 2.2642736130957522e-05, "loss": 0.8651, "step": 110 }, { "epoch": 1.9243697478991597, "grad_norm": 1.5004098415374756, "learning_rate": 2.1920117444680317e-05, "loss": 0.8922, "step": 115 }, { "epoch": 2.0, "grad_norm": 1.874428153038025, "learning_rate": 2.1176482589164575e-05, "loss": 0.8208, "step": 120 }, { "epoch": 2.0840336134453783, "grad_norm": 1.3628363609313965, "learning_rate": 2.0414089959025724e-05, "loss": 0.7341, "step": 125 }, { "epoch": 2.168067226890756, "grad_norm": 2.062274694442749, "learning_rate": 1.963525491562421e-05, "loss": 0.7337, "step": 130 }, { "epoch": 2.2521008403361344, "grad_norm": 1.80815589427948, "learning_rate": 1.8842342755389172e-05, "loss": 0.6989, "step": 135 }, { "epoch": 2.3361344537815127, "grad_norm": 1.5047976970672607, "learning_rate": 1.803776152649088e-05, "loss": 0.6134, "step": 140 }, { "epoch": 2.4201680672268906, "grad_norm": 1.5067673921585083, "learning_rate": 1.722395471567763e-05, "loss": 0.6403, "step": 145 }, { "epoch": 2.504201680672269, "grad_norm": 1.7410961389541626, "learning_rate": 1.6403393827486768e-05, "loss": 0.659, "step": 150 }, { "epoch": 2.588235294117647, "grad_norm": 1.5989471673965454, "learning_rate": 1.5578570878366656e-05, "loss": 0.6186, "step": 155 }, { "epoch": 2.6722689075630255, "grad_norm": 1.7921215295791626, "learning_rate": 1.4751990828504623e-05, "loss": 0.6041, "step": 160 }, { "epoch": 2.7563025210084033, "grad_norm": 1.5869600772857666, "learning_rate": 1.3926163974345199e-05, "loss": 0.5408, "step": 165 }, { "epoch": 2.8403361344537816, "grad_norm": 1.6724202632904053, "learning_rate": 1.3103598324902307e-05, "loss": 0.5608, "step": 170 }, { "epoch": 2.92436974789916, "grad_norm": 1.8233758211135864, "learning_rate": 1.2286791985018356e-05, "loss": 0.5338, "step": 175 }, { "epoch": 3.0, "grad_norm": 2.5403754711151123, "learning_rate": 1.1478225568701888e-05, "loss": 0.5532, "step": 180 } ], "logging_steps": 5, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.8358614913581056e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }