{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 292, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017123287671232876, "grad_norm": 1.0587148666381836, "learning_rate": 1.6438356164383561e-06, "loss": 1.2908, "step": 5 }, { "epoch": 0.03424657534246575, "grad_norm": 0.957391619682312, "learning_rate": 3.6986301369863014e-06, "loss": 1.408, "step": 10 }, { "epoch": 0.05136986301369863, "grad_norm": 0.8232783675193787, "learning_rate": 5.753424657534246e-06, "loss": 1.2972, "step": 15 }, { "epoch": 0.0684931506849315, "grad_norm": 0.707105278968811, "learning_rate": 7.808219178082192e-06, "loss": 1.2907, "step": 20 }, { "epoch": 0.08561643835616438, "grad_norm": 0.5033673048019409, "learning_rate": 9.863013698630136e-06, "loss": 1.28, "step": 25 }, { "epoch": 0.10273972602739725, "grad_norm": 0.51893150806427, "learning_rate": 1.1917808219178083e-05, "loss": 1.2497, "step": 30 }, { "epoch": 0.11986301369863013, "grad_norm": 0.5541372299194336, "learning_rate": 1.3972602739726027e-05, "loss": 1.2018, "step": 35 }, { "epoch": 0.136986301369863, "grad_norm": 0.4447920620441437, "learning_rate": 1.6027397260273974e-05, "loss": 1.218, "step": 40 }, { "epoch": 0.1541095890410959, "grad_norm": 0.5761701464653015, "learning_rate": 1.8082191780821916e-05, "loss": 1.2215, "step": 45 }, { "epoch": 0.17123287671232876, "grad_norm": 0.46446430683135986, "learning_rate": 2.0136986301369863e-05, "loss": 1.1981, "step": 50 }, { "epoch": 0.18835616438356165, "grad_norm": 0.4923893213272095, "learning_rate": 2.219178082191781e-05, "loss": 1.2212, "step": 55 }, { "epoch": 0.2054794520547945, "grad_norm": 0.4145517945289612, "learning_rate": 2.4246575342465755e-05, "loss": 1.1524, "step": 60 }, { "epoch": 0.2226027397260274, "grad_norm": 0.5622988939285278, "learning_rate": 2.6301369863013698e-05, "loss": 1.15, "step": 65 }, { "epoch": 0.23972602739726026, "grad_norm": 0.45440879464149475, "learning_rate": 2.8356164383561644e-05, "loss": 1.1336, "step": 70 }, { "epoch": 0.2568493150684932, "grad_norm": 0.5431708693504333, "learning_rate": 2.999996152240661e-05, "loss": 1.1332, "step": 75 }, { "epoch": 0.273972602739726, "grad_norm": 0.5097510814666748, "learning_rate": 2.9998614827365136e-05, "loss": 1.0534, "step": 80 }, { "epoch": 0.2910958904109589, "grad_norm": 0.523253858089447, "learning_rate": 2.999534445005289e-05, "loss": 1.0262, "step": 85 }, { "epoch": 0.3082191780821918, "grad_norm": 0.5461484789848328, "learning_rate": 2.9990150809919714e-05, "loss": 1.0322, "step": 90 }, { "epoch": 0.3253424657534247, "grad_norm": 0.5159561634063721, "learning_rate": 2.998303457308803e-05, "loss": 1.0268, "step": 95 }, { "epoch": 0.3424657534246575, "grad_norm": 0.5679484009742737, "learning_rate": 2.997399665226736e-05, "loss": 1.0459, "step": 100 }, { "epoch": 0.3595890410958904, "grad_norm": 0.659304678440094, "learning_rate": 2.9963038206637277e-05, "loss": 1.0858, "step": 105 }, { "epoch": 0.3767123287671233, "grad_norm": 0.5805976390838623, "learning_rate": 2.9950160641698755e-05, "loss": 1.0286, "step": 110 }, { "epoch": 0.3938356164383562, "grad_norm": 0.6749956011772156, "learning_rate": 2.993536560909387e-05, "loss": 0.9836, "step": 115 }, { "epoch": 0.410958904109589, "grad_norm": 0.6401397585868835, "learning_rate": 2.991865500639398e-05, "loss": 0.996, "step": 120 }, { "epoch": 0.4280821917808219, "grad_norm": 0.6153554916381836, "learning_rate": 2.990003097685634e-05, "loss": 0.9677, "step": 125 }, { "epoch": 0.4452054794520548, "grad_norm": 0.6153773069381714, "learning_rate": 2.987949590914923e-05, "loss": 0.9196, "step": 130 }, { "epoch": 0.4623287671232877, "grad_norm": 0.6787427067756653, "learning_rate": 2.985705243704559e-05, "loss": 0.9613, "step": 135 }, { "epoch": 0.4794520547945205, "grad_norm": 0.6827302575111389, "learning_rate": 2.9832703439085174e-05, "loss": 0.9559, "step": 140 }, { "epoch": 0.4965753424657534, "grad_norm": 0.7343211770057678, "learning_rate": 2.9806452038205437e-05, "loss": 0.9245, "step": 145 }, { "epoch": 0.5136986301369864, "grad_norm": 0.7253044247627258, "learning_rate": 2.977830160134091e-05, "loss": 0.9052, "step": 150 }, { "epoch": 0.5308219178082192, "grad_norm": 0.7713239789009094, "learning_rate": 2.974825573899144e-05, "loss": 0.9172, "step": 155 }, { "epoch": 0.547945205479452, "grad_norm": 0.7671046257019043, "learning_rate": 2.9716318304759057e-05, "loss": 0.9238, "step": 160 }, { "epoch": 0.565068493150685, "grad_norm": 0.8231728076934814, "learning_rate": 2.9682493394853763e-05, "loss": 0.9248, "step": 165 }, { "epoch": 0.5821917808219178, "grad_norm": 0.8311623930931091, "learning_rate": 2.9646785347568143e-05, "loss": 0.8765, "step": 170 }, { "epoch": 0.5993150684931506, "grad_norm": 0.7531796097755432, "learning_rate": 2.9609198742720957e-05, "loss": 0.8939, "step": 175 }, { "epoch": 0.6164383561643836, "grad_norm": 1.0523626804351807, "learning_rate": 2.9569738401069728e-05, "loss": 0.8488, "step": 180 }, { "epoch": 0.6335616438356164, "grad_norm": 0.9050348401069641, "learning_rate": 2.9528409383692465e-05, "loss": 0.8467, "step": 185 }, { "epoch": 0.6506849315068494, "grad_norm": 0.8227254748344421, "learning_rate": 2.948521699133853e-05, "loss": 0.83, "step": 190 }, { "epoch": 0.6678082191780822, "grad_norm": 0.8400980830192566, "learning_rate": 2.9440166763748782e-05, "loss": 0.7697, "step": 195 }, { "epoch": 0.684931506849315, "grad_norm": 0.854492723941803, "learning_rate": 2.9393264478945073e-05, "loss": 0.8413, "step": 200 }, { "epoch": 0.702054794520548, "grad_norm": 0.9226430654525757, "learning_rate": 2.934451615248915e-05, "loss": 0.8085, "step": 205 }, { "epoch": 0.7191780821917808, "grad_norm": 0.8370131850242615, "learning_rate": 2.929392803671114e-05, "loss": 0.8028, "step": 210 }, { "epoch": 0.7363013698630136, "grad_norm": 0.927441418170929, "learning_rate": 2.9241506619907636e-05, "loss": 0.8509, "step": 215 }, { "epoch": 0.7534246575342466, "grad_norm": 0.9028705358505249, "learning_rate": 2.9187258625509518e-05, "loss": 0.7967, "step": 220 }, { "epoch": 0.7705479452054794, "grad_norm": 0.8795896768569946, "learning_rate": 2.9131191011219634e-05, "loss": 0.7865, "step": 225 }, { "epoch": 0.7876712328767124, "grad_norm": 0.8585197329521179, "learning_rate": 2.907331096812041e-05, "loss": 0.764, "step": 230 }, { "epoch": 0.8047945205479452, "grad_norm": 0.8900719881057739, "learning_rate": 2.9013625919751557e-05, "loss": 0.8205, "step": 235 }, { "epoch": 0.821917808219178, "grad_norm": 0.9867483377456665, "learning_rate": 2.8952143521157933e-05, "loss": 0.7868, "step": 240 }, { "epoch": 0.839041095890411, "grad_norm": 1.04231595993042, "learning_rate": 2.888887165790775e-05, "loss": 0.7418, "step": 245 }, { "epoch": 0.8561643835616438, "grad_norm": 0.9595353007316589, "learning_rate": 2.8823818445081152e-05, "loss": 0.7532, "step": 250 }, { "epoch": 0.8732876712328768, "grad_norm": 1.2640131711959839, "learning_rate": 2.8756992226229443e-05, "loss": 0.6791, "step": 255 }, { "epoch": 0.8904109589041096, "grad_norm": 1.0312385559082031, "learning_rate": 2.8688401572304927e-05, "loss": 0.7609, "step": 260 }, { "epoch": 0.9075342465753424, "grad_norm": 0.9013313055038452, "learning_rate": 2.8618055280561656e-05, "loss": 0.7723, "step": 265 }, { "epoch": 0.9246575342465754, "grad_norm": 1.1069859266281128, "learning_rate": 2.854596237342708e-05, "loss": 0.6889, "step": 270 }, { "epoch": 0.9417808219178082, "grad_norm": 1.0332902669906616, "learning_rate": 2.8472132097344877e-05, "loss": 0.7521, "step": 275 }, { "epoch": 0.958904109589041, "grad_norm": 1.0214784145355225, "learning_rate": 2.839657392158904e-05, "loss": 0.6667, "step": 280 }, { "epoch": 0.976027397260274, "grad_norm": 1.0733942985534668, "learning_rate": 2.8319297537049338e-05, "loss": 0.6784, "step": 285 }, { "epoch": 0.9931506849315068, "grad_norm": 0.9576444029808044, "learning_rate": 2.8240312854988424e-05, "loss": 0.7012, "step": 290 } ], "logging_steps": 5, "max_steps": 1460, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.2721250868881e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }