| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016, |
| "grad_norm": 2.7287635803222656, |
| "learning_rate": 2.3936170212765957e-06, |
| "loss": 1.8321, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 2.5078585147857666, |
| "learning_rate": 5.053191489361702e-06, |
| "loss": 1.7408, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 2.1067042350769043, |
| "learning_rate": 7.712765957446808e-06, |
| "loss": 1.5053, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 1.6803735494613647, |
| "learning_rate": 1.0372340425531916e-05, |
| "loss": 1.176, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.358530044555664, |
| "learning_rate": 1.3031914893617023e-05, |
| "loss": 0.7815, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.7375353574752808, |
| "learning_rate": 1.5691489361702127e-05, |
| "loss": 0.4287, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.5832469463348389, |
| "learning_rate": 1.8351063829787234e-05, |
| "loss": 0.3052, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.41397199034690857, |
| "learning_rate": 2.1010638297872342e-05, |
| "loss": 0.1874, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.4094902575016022, |
| "learning_rate": 2.3670212765957446e-05, |
| "loss": 0.1135, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.2328539937734604, |
| "learning_rate": 2.6329787234042553e-05, |
| "loss": 0.047, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.1451612263917923, |
| "learning_rate": 2.898936170212766e-05, |
| "loss": 0.0175, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.02662522904574871, |
| "learning_rate": 3.164893617021277e-05, |
| "loss": 0.0036, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.01232597604393959, |
| "learning_rate": 3.430851063829787e-05, |
| "loss": 0.0014, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.0077546993270516396, |
| "learning_rate": 3.696808510638298e-05, |
| "loss": 0.0007, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.005459086503833532, |
| "learning_rate": 3.962765957446809e-05, |
| "loss": 0.0006, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.005705484189093113, |
| "learning_rate": 4.228723404255319e-05, |
| "loss": 0.0005, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.0035389093682169914, |
| "learning_rate": 4.49468085106383e-05, |
| "loss": 0.0004, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.003010712331160903, |
| "learning_rate": 4.7606382978723405e-05, |
| "loss": 0.0003, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.002946693217381835, |
| "learning_rate": 4.999995665096164e-05, |
| "loss": 0.0003, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.002301169792190194, |
| "learning_rate": 4.9994754948256304e-05, |
| "loss": 0.0002, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.002140331780537963, |
| "learning_rate": 4.998088550481357e-05, |
| "loss": 0.0002, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.0022257098462432623, |
| "learning_rate": 4.9958353130312106e-05, |
| "loss": 0.0002, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.001662266324274242, |
| "learning_rate": 4.99271656385825e-05, |
| "loss": 0.0002, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.002042123582214117, |
| "learning_rate": 4.9887333844897506e-05, |
| "loss": 0.0002, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.0013473546132445335, |
| "learning_rate": 4.983887156222155e-05, |
| "loss": 0.0002, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.0018928167410194874, |
| "learning_rate": 4.978179559642061e-05, |
| "loss": 0.0002, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.0016440442996099591, |
| "learning_rate": 4.9716125740434235e-05, |
| "loss": 0.0002, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.001509015099145472, |
| "learning_rate": 4.9641884767411714e-05, |
| "loss": 0.0002, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.002279238309711218, |
| "learning_rate": 4.955909842281477e-05, |
| "loss": 0.0001, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.0011063262354582548, |
| "learning_rate": 4.946779541548942e-05, |
| "loss": 0.0001, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.0013478458859026432, |
| "learning_rate": 4.936800740771033e-05, |
| "loss": 0.0001, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.0010506451362743974, |
| "learning_rate": 4.925976900420083e-05, |
| "loss": 0.0001, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.0013516925973817706, |
| "learning_rate": 4.9143117740132667e-05, |
| "loss": 0.0001, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.001162629690952599, |
| "learning_rate": 4.901809406810942e-05, |
| "loss": 0.0001, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.0008260276517830789, |
| "learning_rate": 4.8884741344138294e-05, |
| "loss": 0.0001, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.0009220085921697319, |
| "learning_rate": 4.8743105812594944e-05, |
| "loss": 0.0001, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.0009574664290994406, |
| "learning_rate": 4.8593236590186855e-05, |
| "loss": 0.0001, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.0009008324705064297, |
| "learning_rate": 4.8435185648920403e-05, |
| "loss": 0.0001, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.0007590301102027297, |
| "learning_rate": 4.8269007798077994e-05, |
| "loss": 0.0001, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.0007175425416789949, |
| "learning_rate": 4.809476066521111e-05, |
| "loss": 0.0001, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.0008309377590194345, |
| "learning_rate": 4.791250467615608e-05, |
| "loss": 0.0001, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.0006082377512939274, |
| "learning_rate": 4.77223030340795e-05, |
| "loss": 0.0001, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.0007051264983601868, |
| "learning_rate": 4.752422169756048e-05, |
| "loss": 0.0001, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.000669407716486603, |
| "learning_rate": 4.7318329357717345e-05, |
| "loss": 0.0001, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.0006657831836491823, |
| "learning_rate": 4.710469741438679e-05, |
| "loss": 0.0001, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.0005902393022552133, |
| "learning_rate": 4.688339995136368e-05, |
| "loss": 0.0001, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.0006702489918097854, |
| "learning_rate": 4.6654513710710056e-05, |
| "loss": 0.0001, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.0005900064134038985, |
| "learning_rate": 4.6418118066142395e-05, |
| "loss": 0.0001, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.0005075543303973973, |
| "learning_rate": 4.6174294995506154e-05, |
| "loss": 0.0001, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.0005521568236872554, |
| "learning_rate": 4.5923129052347334e-05, |
| "loss": 0.0001, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1875, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9952307416334336e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|