{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 303, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01652892561983471, "grad_norm": 1.4201081991195679, "learning_rate": 1.5789473684210526e-06, "loss": 1.3341, "step": 5 }, { "epoch": 0.03305785123966942, "grad_norm": 0.8825254440307617, "learning_rate": 3.5526315789473683e-06, "loss": 1.3559, "step": 10 }, { "epoch": 0.049586776859504134, "grad_norm": 0.6610410213470459, "learning_rate": 5.526315789473684e-06, "loss": 1.3433, "step": 15 }, { "epoch": 0.06611570247933884, "grad_norm": 0.6865679025650024, "learning_rate": 7.5e-06, "loss": 1.2905, "step": 20 }, { "epoch": 0.08264462809917356, "grad_norm": 0.5552710294723511, "learning_rate": 9.473684210526315e-06, "loss": 1.3332, "step": 25 }, { "epoch": 0.09917355371900827, "grad_norm": 0.5776343941688538, "learning_rate": 1.1447368421052632e-05, "loss": 1.2557, "step": 30 }, { "epoch": 0.11570247933884298, "grad_norm": 0.4994240701198578, "learning_rate": 1.3421052631578948e-05, "loss": 1.2805, "step": 35 }, { "epoch": 0.1322314049586777, "grad_norm": 0.5451419353485107, "learning_rate": 1.5394736842105264e-05, "loss": 1.2875, "step": 40 }, { "epoch": 0.1487603305785124, "grad_norm": 0.5521426200866699, "learning_rate": 1.736842105263158e-05, "loss": 1.2729, "step": 45 }, { "epoch": 0.1652892561983471, "grad_norm": 0.448354572057724, "learning_rate": 1.9342105263157896e-05, "loss": 1.2043, "step": 50 }, { "epoch": 0.18181818181818182, "grad_norm": 0.5871981978416443, "learning_rate": 2.1315789473684212e-05, "loss": 1.1982, "step": 55 }, { "epoch": 0.19834710743801653, "grad_norm": 0.5270593762397766, "learning_rate": 2.3289473684210525e-05, "loss": 1.2128, "step": 60 }, { "epoch": 0.21487603305785125, "grad_norm": 0.513990044593811, "learning_rate": 2.526315789473684e-05, "loss": 1.1809, "step": 65 }, { "epoch": 0.23140495867768596, "grad_norm": 0.5345117449760437, "learning_rate": 2.723684210526316e-05, "loss": 1.2193, "step": 70 }, { "epoch": 0.24793388429752067, "grad_norm": 0.4606321454048157, "learning_rate": 2.9210526315789474e-05, "loss": 1.1448, "step": 75 }, { "epoch": 0.2644628099173554, "grad_norm": 0.46585503220558167, "learning_rate": 2.9999678278282968e-05, "loss": 1.1272, "step": 80 }, { "epoch": 0.2809917355371901, "grad_norm": 0.5272030234336853, "learning_rate": 2.9997712251100747e-05, "loss": 1.1208, "step": 85 }, { "epoch": 0.2975206611570248, "grad_norm": 0.5860956311225891, "learning_rate": 2.9993959165002407e-05, "loss": 1.0957, "step": 90 }, { "epoch": 0.3140495867768595, "grad_norm": 0.7299471497535706, "learning_rate": 2.998841946718855e-05, "loss": 1.1043, "step": 95 }, { "epoch": 0.3305785123966942, "grad_norm": 0.5312178134918213, "learning_rate": 2.9981093817744277e-05, "loss": 1.1348, "step": 100 }, { "epoch": 0.34710743801652894, "grad_norm": 0.7180105447769165, "learning_rate": 2.997198308956052e-05, "loss": 1.0544, "step": 105 }, { "epoch": 0.36363636363636365, "grad_norm": 0.6310957670211792, "learning_rate": 2.9961088368230065e-05, "loss": 1.0619, "step": 110 }, { "epoch": 0.38016528925619836, "grad_norm": 0.586087703704834, "learning_rate": 2.9948410951918154e-05, "loss": 1.0515, "step": 115 }, { "epoch": 0.39669421487603307, "grad_norm": 0.6914042830467224, "learning_rate": 2.993395235120784e-05, "loss": 1.0209, "step": 120 }, { "epoch": 0.4132231404958678, "grad_norm": 0.6734358668327332, "learning_rate": 2.991771428891996e-05, "loss": 0.9988, "step": 125 }, { "epoch": 0.4297520661157025, "grad_norm": 0.68830806016922, "learning_rate": 2.989969869990789e-05, "loss": 1.0326, "step": 130 }, { "epoch": 0.4462809917355372, "grad_norm": 0.7114909291267395, "learning_rate": 2.9879907730826946e-05, "loss": 1.0411, "step": 135 }, { "epoch": 0.4628099173553719, "grad_norm": 0.6947762370109558, "learning_rate": 2.9858343739878657e-05, "loss": 0.945, "step": 140 }, { "epoch": 0.4793388429752066, "grad_norm": 0.7105550765991211, "learning_rate": 2.9835009296529717e-05, "loss": 0.9779, "step": 145 }, { "epoch": 0.49586776859504134, "grad_norm": 0.9549520015716553, "learning_rate": 2.980990718120587e-05, "loss": 0.9691, "step": 150 }, { "epoch": 0.512396694214876, "grad_norm": 0.7530831694602966, "learning_rate": 2.978304038496056e-05, "loss": 0.961, "step": 155 }, { "epoch": 0.5289256198347108, "grad_norm": 0.7150148153305054, "learning_rate": 2.975441210911856e-05, "loss": 0.9344, "step": 160 }, { "epoch": 0.5454545454545454, "grad_norm": 0.6854411959648132, "learning_rate": 2.9724025764894513e-05, "loss": 0.9801, "step": 165 }, { "epoch": 0.5619834710743802, "grad_norm": 0.8350316286087036, "learning_rate": 2.9691884972986458e-05, "loss": 0.898, "step": 170 }, { "epoch": 0.5785123966942148, "grad_norm": 0.8690942525863647, "learning_rate": 2.965799356314441e-05, "loss": 0.911, "step": 175 }, { "epoch": 0.5950413223140496, "grad_norm": 0.796727180480957, "learning_rate": 2.9622355573714036e-05, "loss": 0.8893, "step": 180 }, { "epoch": 0.6115702479338843, "grad_norm": 0.8369293212890625, "learning_rate": 2.9584975251155434e-05, "loss": 0.8583, "step": 185 }, { "epoch": 0.628099173553719, "grad_norm": 0.9035501480102539, "learning_rate": 2.954585704953717e-05, "loss": 0.9182, "step": 190 }, { "epoch": 0.6446280991735537, "grad_norm": 0.8341655135154724, "learning_rate": 2.950500563000555e-05, "loss": 0.8838, "step": 195 }, { "epoch": 0.6611570247933884, "grad_norm": 0.8167555332183838, "learning_rate": 2.946242586022921e-05, "loss": 0.8592, "step": 200 }, { "epoch": 0.6776859504132231, "grad_norm": 0.958501398563385, "learning_rate": 2.94181228138191e-05, "loss": 0.8798, "step": 205 }, { "epoch": 0.6942148760330579, "grad_norm": 0.8715956807136536, "learning_rate": 2.9372101769723958e-05, "loss": 0.8307, "step": 210 }, { "epoch": 0.7107438016528925, "grad_norm": 0.8931337594985962, "learning_rate": 2.9324368211601284e-05, "loss": 0.8488, "step": 215 }, { "epoch": 0.7272727272727273, "grad_norm": 0.9372528791427612, "learning_rate": 2.9274927827163913e-05, "loss": 0.8181, "step": 220 }, { "epoch": 0.743801652892562, "grad_norm": 0.9352923631668091, "learning_rate": 2.9223786507502333e-05, "loss": 0.7852, "step": 225 }, { "epoch": 0.7603305785123967, "grad_norm": 1.0228369235992432, "learning_rate": 2.917095034638269e-05, "loss": 0.7731, "step": 230 }, { "epoch": 0.7768595041322314, "grad_norm": 1.1870708465576172, "learning_rate": 2.9116425639520713e-05, "loss": 0.8079, "step": 235 }, { "epoch": 0.7933884297520661, "grad_norm": 0.933369517326355, "learning_rate": 2.9060218883831517e-05, "loss": 0.7646, "step": 240 }, { "epoch": 0.8099173553719008, "grad_norm": 1.1031955480575562, "learning_rate": 2.9002336776655494e-05, "loss": 0.7794, "step": 245 }, { "epoch": 0.8264462809917356, "grad_norm": 0.9256858229637146, "learning_rate": 2.894278621496025e-05, "loss": 0.7541, "step": 250 }, { "epoch": 0.8429752066115702, "grad_norm": 1.17303466796875, "learning_rate": 2.888157429451883e-05, "loss": 0.7456, "step": 255 }, { "epoch": 0.859504132231405, "grad_norm": 1.0275311470031738, "learning_rate": 2.88187083090642e-05, "loss": 0.7497, "step": 260 }, { "epoch": 0.8760330578512396, "grad_norm": 0.9848041534423828, "learning_rate": 2.8754195749420145e-05, "loss": 0.7138, "step": 265 }, { "epoch": 0.8925619834710744, "grad_norm": 1.0821123123168945, "learning_rate": 2.8688044302608735e-05, "loss": 0.7326, "step": 270 }, { "epoch": 0.9090909090909091, "grad_norm": 1.0524564981460571, "learning_rate": 2.8620261850934338e-05, "loss": 0.6945, "step": 275 }, { "epoch": 0.9256198347107438, "grad_norm": 1.1582773923873901, "learning_rate": 2.8550856471044415e-05, "loss": 0.7245, "step": 280 }, { "epoch": 0.9421487603305785, "grad_norm": 0.9988569617271423, "learning_rate": 2.847983643296715e-05, "loss": 0.7613, "step": 285 }, { "epoch": 0.9586776859504132, "grad_norm": 1.1030054092407227, "learning_rate": 2.840721019912602e-05, "loss": 0.6793, "step": 290 }, { "epoch": 0.9752066115702479, "grad_norm": 1.0013433694839478, "learning_rate": 2.833298642333146e-05, "loss": 0.7052, "step": 295 }, { "epoch": 0.9917355371900827, "grad_norm": 1.1024919748306274, "learning_rate": 2.8257173949749703e-05, "loss": 0.7322, "step": 300 } ], "logging_steps": 5, "max_steps": 1515, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.2486090752537395e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }