{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 178, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.028089887640449437, "grad_norm": 1.2830733060836792, "learning_rate": 2.666666666666667e-06, "loss": 1.3035, "step": 5 }, { "epoch": 0.056179775280898875, "grad_norm": 0.6879112124443054, "learning_rate": 6e-06, "loss": 1.4105, "step": 10 }, { "epoch": 0.08426966292134831, "grad_norm": 0.5197162628173828, "learning_rate": 9.333333333333334e-06, "loss": 1.3754, "step": 15 }, { "epoch": 0.11235955056179775, "grad_norm": 0.5167145133018494, "learning_rate": 1.2666666666666667e-05, "loss": 1.3014, "step": 20 }, { "epoch": 0.1404494382022472, "grad_norm": 0.5114685297012329, "learning_rate": 1.6e-05, "loss": 1.3148, "step": 25 }, { "epoch": 0.16853932584269662, "grad_norm": 0.542929470539093, "learning_rate": 1.9333333333333333e-05, "loss": 1.2024, "step": 30 }, { "epoch": 0.19662921348314608, "grad_norm": 0.47515320777893066, "learning_rate": 2.2666666666666668e-05, "loss": 1.2537, "step": 35 }, { "epoch": 0.2247191011235955, "grad_norm": 0.4169822633266449, "learning_rate": 2.6000000000000002e-05, "loss": 1.2063, "step": 40 }, { "epoch": 0.25280898876404495, "grad_norm": 0.42015039920806885, "learning_rate": 2.9333333333333333e-05, "loss": 1.1947, "step": 45 }, { "epoch": 0.2808988764044944, "grad_norm": 0.5036826133728027, "learning_rate": 2.9998341331605073e-05, "loss": 1.1333, "step": 50 }, { "epoch": 0.3089887640449438, "grad_norm": 0.47650378942489624, "learning_rate": 2.9991603619933566e-05, "loss": 1.1118, "step": 55 }, { "epoch": 0.33707865168539325, "grad_norm": 0.5294891595840454, "learning_rate": 2.99796855246516e-05, "loss": 1.0854, "step": 60 }, { "epoch": 0.3651685393258427, "grad_norm": 0.4981288015842438, "learning_rate": 2.9962591164084806e-05, "loss": 1.0895, "step": 65 }, { "epoch": 0.39325842696629215, "grad_norm": 0.6262781620025635, "learning_rate": 2.9940326445229367e-05, "loss": 1.0693, "step": 70 }, { "epoch": 0.42134831460674155, "grad_norm": 0.6170726418495178, "learning_rate": 2.991289906171083e-05, "loss": 1.0707, "step": 75 }, { "epoch": 0.449438202247191, "grad_norm": 0.4974953830242157, "learning_rate": 2.9880318491125568e-05, "loss": 1.0228, "step": 80 }, { "epoch": 0.47752808988764045, "grad_norm": 0.6400043964385986, "learning_rate": 2.9842595991765766e-05, "loss": 1.0408, "step": 85 }, { "epoch": 0.5056179775280899, "grad_norm": 0.6472094058990479, "learning_rate": 2.9799744598729097e-05, "loss": 1.0381, "step": 90 }, { "epoch": 0.5337078651685393, "grad_norm": 0.6478850245475769, "learning_rate": 2.97517791194144e-05, "loss": 0.9523, "step": 95 }, { "epoch": 0.5617977528089888, "grad_norm": 0.6548949480056763, "learning_rate": 2.9698716128404985e-05, "loss": 0.9321, "step": 100 }, { "epoch": 0.5898876404494382, "grad_norm": 0.7128049731254578, "learning_rate": 2.964057396174119e-05, "loss": 0.9353, "step": 105 }, { "epoch": 0.6179775280898876, "grad_norm": 0.7917237281799316, "learning_rate": 2.9577372710584375e-05, "loss": 0.9522, "step": 110 }, { "epoch": 0.6460674157303371, "grad_norm": 0.7267556190490723, "learning_rate": 2.9509134214274343e-05, "loss": 0.8641, "step": 115 }, { "epoch": 0.6741573033707865, "grad_norm": 0.8102006912231445, "learning_rate": 2.9435882052782717e-05, "loss": 0.869, "step": 120 }, { "epoch": 0.702247191011236, "grad_norm": 0.740906298160553, "learning_rate": 2.935764153856481e-05, "loss": 0.8267, "step": 125 }, { "epoch": 0.7303370786516854, "grad_norm": 0.8049883246421814, "learning_rate": 2.927443970781287e-05, "loss": 0.8282, "step": 130 }, { "epoch": 0.7584269662921348, "grad_norm": 0.8820163607597351, "learning_rate": 2.918630531111365e-05, "loss": 0.8037, "step": 135 }, { "epoch": 0.7865168539325843, "grad_norm": 0.9033377766609192, "learning_rate": 2.9093268803513564e-05, "loss": 0.8585, "step": 140 }, { "epoch": 0.8146067415730337, "grad_norm": 0.9345238208770752, "learning_rate": 2.8995362333994906e-05, "loss": 0.8272, "step": 145 }, { "epoch": 0.8426966292134831, "grad_norm": 1.106722354888916, "learning_rate": 2.889261973436665e-05, "loss": 0.8008, "step": 150 }, { "epoch": 0.8707865168539326, "grad_norm": 0.9598326683044434, "learning_rate": 2.8785076507573825e-05, "loss": 0.7377, "step": 155 }, { "epoch": 0.898876404494382, "grad_norm": 0.873813271522522, "learning_rate": 2.8672769815429385e-05, "loss": 0.7478, "step": 160 }, { "epoch": 0.9269662921348315, "grad_norm": 0.90259850025177, "learning_rate": 2.855573846577284e-05, "loss": 0.7459, "step": 165 }, { "epoch": 0.9550561797752809, "grad_norm": 1.0914926528930664, "learning_rate": 2.843402289906013e-05, "loss": 0.7069, "step": 170 }, { "epoch": 0.9831460674157303, "grad_norm": 1.1495745182037354, "learning_rate": 2.8307665174389323e-05, "loss": 0.7393, "step": 175 } ], "logging_steps": 5, "max_steps": 890, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.5878203703964467e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }