{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016025641025641024, "grad_norm": 1.0291906595230103, "learning_rate": 1.5384615384615385e-06, "loss": 1.3399, "step": 5 }, { "epoch": 0.03205128205128205, "grad_norm": 1.1106044054031372, "learning_rate": 3.4615384615384617e-06, "loss": 1.3951, "step": 10 }, { "epoch": 0.04807692307692308, "grad_norm": 0.6151779890060425, "learning_rate": 5.384615384615385e-06, "loss": 1.3654, "step": 15 }, { "epoch": 0.0641025641025641, "grad_norm": 0.6062663793563843, "learning_rate": 7.307692307692308e-06, "loss": 1.3207, "step": 20 }, { "epoch": 0.08012820512820513, "grad_norm": 0.5593442320823669, "learning_rate": 9.230769230769232e-06, "loss": 1.3146, "step": 25 }, { "epoch": 0.09615384615384616, "grad_norm": 0.5808208584785461, "learning_rate": 1.1153846153846154e-05, "loss": 1.3007, "step": 30 }, { "epoch": 0.11217948717948718, "grad_norm": 0.600754976272583, "learning_rate": 1.3076923076923078e-05, "loss": 1.2647, "step": 35 }, { "epoch": 0.1282051282051282, "grad_norm": 0.4767054617404938, "learning_rate": 1.5e-05, "loss": 1.276, "step": 40 }, { "epoch": 0.14423076923076922, "grad_norm": 0.4327481687068939, "learning_rate": 1.6923076923076924e-05, "loss": 1.2434, "step": 45 }, { "epoch": 0.16025641025641027, "grad_norm": 0.4272952079772949, "learning_rate": 1.8846153846153846e-05, "loss": 1.2425, "step": 50 }, { "epoch": 0.1762820512820513, "grad_norm": 0.4927898943424225, "learning_rate": 2.076923076923077e-05, "loss": 1.233, "step": 55 }, { "epoch": 0.19230769230769232, "grad_norm": 0.39667901396751404, "learning_rate": 2.269230769230769e-05, "loss": 1.2061, "step": 60 }, { "epoch": 0.20833333333333334, "grad_norm": 0.5480871200561523, "learning_rate": 2.4615384615384616e-05, "loss": 1.2113, "step": 65 }, { "epoch": 0.22435897435897437, "grad_norm": 0.4961429536342621, "learning_rate": 2.6538461538461538e-05, "loss": 1.1851, "step": 70 }, { "epoch": 0.2403846153846154, "grad_norm": 0.47594135999679565, "learning_rate": 2.846153846153846e-05, "loss": 1.2022, "step": 75 }, { "epoch": 0.2564102564102564, "grad_norm": 0.5065383911132812, "learning_rate": 2.9999966297319848e-05, "loss": 1.1301, "step": 80 }, { "epoch": 0.2724358974358974, "grad_norm": 0.594122052192688, "learning_rate": 2.9998786719416534e-05, "loss": 1.1598, "step": 85 }, { "epoch": 0.28846153846153844, "grad_norm": 0.6093367338180542, "learning_rate": 2.9995922158951827e-05, "loss": 1.1405, "step": 90 }, { "epoch": 0.30448717948717946, "grad_norm": 0.5758622288703918, "learning_rate": 2.9991372937734057e-05, "loss": 1.1319, "step": 95 }, { "epoch": 0.32051282051282054, "grad_norm": 0.5893705487251282, "learning_rate": 2.9985139566828457e-05, "loss": 1.1455, "step": 100 }, { "epoch": 0.33653846153846156, "grad_norm": 0.541392982006073, "learning_rate": 2.997722274649974e-05, "loss": 1.1353, "step": 105 }, { "epoch": 0.3525641025641026, "grad_norm": 0.5347727537155151, "learning_rate": 2.9967623366133475e-05, "loss": 1.0905, "step": 110 }, { "epoch": 0.3685897435897436, "grad_norm": 0.6084627509117126, "learning_rate": 2.995634250413612e-05, "loss": 1.1164, "step": 115 }, { "epoch": 0.38461538461538464, "grad_norm": 0.6210500597953796, "learning_rate": 2.9943381427813882e-05, "loss": 1.0631, "step": 120 }, { "epoch": 0.40064102564102566, "grad_norm": 0.618479311466217, "learning_rate": 2.9928741593230393e-05, "loss": 1.0156, "step": 125 }, { "epoch": 0.4166666666666667, "grad_norm": 0.6391302943229675, "learning_rate": 2.9912424645043064e-05, "loss": 1.036, "step": 130 }, { "epoch": 0.4326923076923077, "grad_norm": 0.7931795120239258, "learning_rate": 2.989443241631839e-05, "loss": 1.0432, "step": 135 }, { "epoch": 0.44871794871794873, "grad_norm": 0.7722377181053162, "learning_rate": 2.987476692832596e-05, "loss": 1.0204, "step": 140 }, { "epoch": 0.46474358974358976, "grad_norm": 0.6615847945213318, "learning_rate": 2.9853430390311434e-05, "loss": 1.0592, "step": 145 }, { "epoch": 0.4807692307692308, "grad_norm": 0.7195934057235718, "learning_rate": 2.983042519924831e-05, "loss": 1.0199, "step": 150 }, { "epoch": 0.4967948717948718, "grad_norm": 0.731874942779541, "learning_rate": 2.980575393956869e-05, "loss": 1.003, "step": 155 }, { "epoch": 0.5128205128205128, "grad_norm": 0.7320418953895569, "learning_rate": 2.977941938287292e-05, "loss": 0.9789, "step": 160 }, { "epoch": 0.5288461538461539, "grad_norm": 0.6661425828933716, "learning_rate": 2.9751424487618196e-05, "loss": 0.9349, "step": 165 }, { "epoch": 0.5448717948717948, "grad_norm": 0.8029599189758301, "learning_rate": 2.972177239878627e-05, "loss": 0.9327, "step": 170 }, { "epoch": 0.5608974358974359, "grad_norm": 0.8484524488449097, "learning_rate": 2.969046644753008e-05, "loss": 0.9744, "step": 175 }, { "epoch": 0.5769230769230769, "grad_norm": 0.7641316652297974, "learning_rate": 2.965751015079957e-05, "loss": 0.9159, "step": 180 }, { "epoch": 0.592948717948718, "grad_norm": 0.7425558567047119, "learning_rate": 2.962290721094655e-05, "loss": 0.9187, "step": 185 }, { "epoch": 0.6089743589743589, "grad_norm": 0.8162761926651001, "learning_rate": 2.9586661515308793e-05, "loss": 0.9121, "step": 190 }, { "epoch": 0.625, "grad_norm": 0.9251567125320435, "learning_rate": 2.9548777135773338e-05, "loss": 0.8566, "step": 195 }, { "epoch": 0.6410256410256411, "grad_norm": 0.8238964676856995, "learning_rate": 2.950925832831901e-05, "loss": 0.8903, "step": 200 }, { "epoch": 0.657051282051282, "grad_norm": 0.8461718559265137, "learning_rate": 2.9468109532538346e-05, "loss": 0.8331, "step": 205 }, { "epoch": 0.6730769230769231, "grad_norm": 1.0172253847122192, "learning_rate": 2.9425335371138802e-05, "loss": 0.8476, "step": 210 }, { "epoch": 0.6891025641025641, "grad_norm": 0.8224555253982544, "learning_rate": 2.9380940649423462e-05, "loss": 0.8956, "step": 215 }, { "epoch": 0.7051282051282052, "grad_norm": 0.9176145195960999, "learning_rate": 2.933493035475119e-05, "loss": 0.834, "step": 220 }, { "epoch": 0.7211538461538461, "grad_norm": 0.8288151025772095, "learning_rate": 2.928730965597635e-05, "loss": 0.8746, "step": 225 }, { "epoch": 0.7371794871794872, "grad_norm": 1.015427589416504, "learning_rate": 2.9238083902868123e-05, "loss": 0.8229, "step": 230 }, { "epoch": 0.7532051282051282, "grad_norm": 0.8535773158073425, "learning_rate": 2.9187258625509518e-05, "loss": 0.8432, "step": 235 }, { "epoch": 0.7692307692307693, "grad_norm": 0.8888025283813477, "learning_rate": 2.9134839533676115e-05, "loss": 0.7881, "step": 240 }, { "epoch": 0.7852564102564102, "grad_norm": 0.9770839810371399, "learning_rate": 2.9080832516194596e-05, "loss": 0.7904, "step": 245 }, { "epoch": 0.8012820512820513, "grad_norm": 1.187432050704956, "learning_rate": 2.9025243640281226e-05, "loss": 0.7799, "step": 250 }, { "epoch": 0.8173076923076923, "grad_norm": 0.9135069847106934, "learning_rate": 2.8968079150860228e-05, "loss": 0.7853, "step": 255 }, { "epoch": 0.8333333333333334, "grad_norm": 1.066171407699585, "learning_rate": 2.8909345469862228e-05, "loss": 0.7577, "step": 260 }, { "epoch": 0.8493589743589743, "grad_norm": 0.9215983152389526, "learning_rate": 2.8849049195502812e-05, "loss": 0.8291, "step": 265 }, { "epoch": 0.8653846153846154, "grad_norm": 0.940805196762085, "learning_rate": 2.8787197101541266e-05, "loss": 0.7549, "step": 270 }, { "epoch": 0.8814102564102564, "grad_norm": 0.9688215851783752, "learning_rate": 2.8723796136519604e-05, "loss": 0.7478, "step": 275 }, { "epoch": 0.8974358974358975, "grad_norm": 0.9502094984054565, "learning_rate": 2.8658853422981964e-05, "loss": 0.7425, "step": 280 }, { "epoch": 0.9134615384615384, "grad_norm": 1.0309516191482544, "learning_rate": 2.8592376256674455e-05, "loss": 0.7053, "step": 285 }, { "epoch": 0.9294871794871795, "grad_norm": 1.0961329936981201, "learning_rate": 2.852437210572553e-05, "loss": 0.721, "step": 290 }, { "epoch": 0.9455128205128205, "grad_norm": 1.02060067653656, "learning_rate": 2.845484860980703e-05, "loss": 0.6888, "step": 295 }, { "epoch": 0.9615384615384616, "grad_norm": 0.9542692303657532, "learning_rate": 2.8383813579275912e-05, "loss": 0.6602, "step": 300 }, { "epoch": 0.9775641025641025, "grad_norm": 1.0014078617095947, "learning_rate": 2.8311274994296835e-05, "loss": 0.7161, "step": 305 }, { "epoch": 0.9935897435897436, "grad_norm": 0.9263032078742981, "learning_rate": 2.823724100394565e-05, "loss": 0.7149, "step": 310 } ], "logging_steps": 5, "max_steps": 1560, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.397092298219848e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }