| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 178, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.028089887640449437, | |
| "grad_norm": 1.2830733060836792, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 1.3035, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.056179775280898875, | |
| "grad_norm": 0.6879112124443054, | |
| "learning_rate": 6e-06, | |
| "loss": 1.4105, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08426966292134831, | |
| "grad_norm": 0.5197162628173828, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 1.3754, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.11235955056179775, | |
| "grad_norm": 0.5167145133018494, | |
| "learning_rate": 1.2666666666666667e-05, | |
| "loss": 1.3014, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1404494382022472, | |
| "grad_norm": 0.5114685297012329, | |
| "learning_rate": 1.6e-05, | |
| "loss": 1.3148, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.16853932584269662, | |
| "grad_norm": 0.542929470539093, | |
| "learning_rate": 1.9333333333333333e-05, | |
| "loss": 1.2024, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.19662921348314608, | |
| "grad_norm": 0.47515320777893066, | |
| "learning_rate": 2.2666666666666668e-05, | |
| "loss": 1.2537, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.2247191011235955, | |
| "grad_norm": 0.4169822633266449, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 1.2063, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.25280898876404495, | |
| "grad_norm": 0.42015039920806885, | |
| "learning_rate": 2.9333333333333333e-05, | |
| "loss": 1.1947, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2808988764044944, | |
| "grad_norm": 0.5036826133728027, | |
| "learning_rate": 2.9998341331605073e-05, | |
| "loss": 1.1333, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3089887640449438, | |
| "grad_norm": 0.47650378942489624, | |
| "learning_rate": 2.9991603619933566e-05, | |
| "loss": 1.1118, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.33707865168539325, | |
| "grad_norm": 0.5294891595840454, | |
| "learning_rate": 2.99796855246516e-05, | |
| "loss": 1.0854, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3651685393258427, | |
| "grad_norm": 0.4981288015842438, | |
| "learning_rate": 2.9962591164084806e-05, | |
| "loss": 1.0895, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.39325842696629215, | |
| "grad_norm": 0.6262781620025635, | |
| "learning_rate": 2.9940326445229367e-05, | |
| "loss": 1.0693, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.42134831460674155, | |
| "grad_norm": 0.6170726418495178, | |
| "learning_rate": 2.991289906171083e-05, | |
| "loss": 1.0707, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.449438202247191, | |
| "grad_norm": 0.4974953830242157, | |
| "learning_rate": 2.9880318491125568e-05, | |
| "loss": 1.0228, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.47752808988764045, | |
| "grad_norm": 0.6400043964385986, | |
| "learning_rate": 2.9842595991765766e-05, | |
| "loss": 1.0408, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5056179775280899, | |
| "grad_norm": 0.6472094058990479, | |
| "learning_rate": 2.9799744598729097e-05, | |
| "loss": 1.0381, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5337078651685393, | |
| "grad_norm": 0.6478850245475769, | |
| "learning_rate": 2.97517791194144e-05, | |
| "loss": 0.9523, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5617977528089888, | |
| "grad_norm": 0.6548949480056763, | |
| "learning_rate": 2.9698716128404985e-05, | |
| "loss": 0.9321, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5898876404494382, | |
| "grad_norm": 0.7128049731254578, | |
| "learning_rate": 2.964057396174119e-05, | |
| "loss": 0.9353, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6179775280898876, | |
| "grad_norm": 0.7917237281799316, | |
| "learning_rate": 2.9577372710584375e-05, | |
| "loss": 0.9522, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6460674157303371, | |
| "grad_norm": 0.7267556190490723, | |
| "learning_rate": 2.9509134214274343e-05, | |
| "loss": 0.8641, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6741573033707865, | |
| "grad_norm": 0.8102006912231445, | |
| "learning_rate": 2.9435882052782717e-05, | |
| "loss": 0.869, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.702247191011236, | |
| "grad_norm": 0.740906298160553, | |
| "learning_rate": 2.935764153856481e-05, | |
| "loss": 0.8267, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7303370786516854, | |
| "grad_norm": 0.8049883246421814, | |
| "learning_rate": 2.927443970781287e-05, | |
| "loss": 0.8282, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7584269662921348, | |
| "grad_norm": 0.8820163607597351, | |
| "learning_rate": 2.918630531111365e-05, | |
| "loss": 0.8037, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7865168539325843, | |
| "grad_norm": 0.9033377766609192, | |
| "learning_rate": 2.9093268803513564e-05, | |
| "loss": 0.8585, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8146067415730337, | |
| "grad_norm": 0.9345238208770752, | |
| "learning_rate": 2.8995362333994906e-05, | |
| "loss": 0.8272, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8426966292134831, | |
| "grad_norm": 1.106722354888916, | |
| "learning_rate": 2.889261973436665e-05, | |
| "loss": 0.8008, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8707865168539326, | |
| "grad_norm": 0.9598326683044434, | |
| "learning_rate": 2.8785076507573825e-05, | |
| "loss": 0.7377, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.898876404494382, | |
| "grad_norm": 0.873813271522522, | |
| "learning_rate": 2.8672769815429385e-05, | |
| "loss": 0.7478, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9269662921348315, | |
| "grad_norm": 0.90259850025177, | |
| "learning_rate": 2.855573846577284e-05, | |
| "loss": 0.7459, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.9550561797752809, | |
| "grad_norm": 1.0914926528930664, | |
| "learning_rate": 2.843402289906013e-05, | |
| "loss": 0.7069, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9831460674157303, | |
| "grad_norm": 1.1495745182037354, | |
| "learning_rate": 2.8307665174389323e-05, | |
| "loss": 0.7393, | |
| "step": 175 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 890, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.5878203703964467e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |