| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.9574642181396484, |
| "learning_rate": 4.901e-05, |
| "loss": 4.8716, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.562890648841858, |
| "learning_rate": 4.801e-05, |
| "loss": 2.5735, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.5503687858581543, |
| "learning_rate": 4.7010000000000006e-05, |
| "loss": 2.2182, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.3021109104156494, |
| "learning_rate": 4.601e-05, |
| "loss": 1.9191, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.3246175050735474, |
| "learning_rate": 4.5010000000000004e-05, |
| "loss": 1.8126, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.04691481590271, |
| "learning_rate": 4.401e-05, |
| "loss": 1.7728, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.9935975670814514, |
| "learning_rate": 4.301e-05, |
| "loss": 1.6751, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.1450852155685425, |
| "learning_rate": 4.201e-05, |
| "loss": 1.5686, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.1858118772506714, |
| "learning_rate": 4.101e-05, |
| "loss": 1.6261, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.2318594455718994, |
| "learning_rate": 4.0010000000000005e-05, |
| "loss": 1.6018, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.0691641569137573, |
| "learning_rate": 3.901e-05, |
| "loss": 1.5865, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.8661352396011353, |
| "learning_rate": 3.8010000000000004e-05, |
| "loss": 1.4648, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.9778139591217041, |
| "learning_rate": 3.701e-05, |
| "loss": 1.4765, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.3476413488388062, |
| "learning_rate": 3.601e-05, |
| "loss": 1.5541, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.7631675004959106, |
| "learning_rate": 3.5010000000000005e-05, |
| "loss": 1.5048, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.1490070819854736, |
| "learning_rate": 3.401e-05, |
| "loss": 1.5052, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.077760934829712, |
| "learning_rate": 3.3010000000000004e-05, |
| "loss": 1.5638, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.8567506670951843, |
| "learning_rate": 3.201e-05, |
| "loss": 1.5387, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.842096209526062, |
| "learning_rate": 3.101e-05, |
| "loss": 1.5037, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.8851768374443054, |
| "learning_rate": 3.001e-05, |
| "loss": 1.5145, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.7667313814163208, |
| "learning_rate": 2.9010000000000005e-05, |
| "loss": 1.526, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.079816460609436, |
| "learning_rate": 2.8010000000000005e-05, |
| "loss": 1.5332, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.9301439523696899, |
| "learning_rate": 2.701e-05, |
| "loss": 1.4011, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.9722267985343933, |
| "learning_rate": 2.601e-05, |
| "loss": 1.4697, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.8725349307060242, |
| "learning_rate": 2.501e-05, |
| "loss": 1.4873, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.0267070531845093, |
| "learning_rate": 2.4010000000000002e-05, |
| "loss": 1.4103, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.70062655210495, |
| "learning_rate": 2.301e-05, |
| "loss": 1.3659, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.8035644888877869, |
| "learning_rate": 2.201e-05, |
| "loss": 1.4009, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.8089845776557922, |
| "learning_rate": 2.101e-05, |
| "loss": 1.4293, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.8427271246910095, |
| "learning_rate": 2.001e-05, |
| "loss": 1.4011, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.994111180305481, |
| "learning_rate": 1.901e-05, |
| "loss": 1.3785, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.0096153020858765, |
| "learning_rate": 1.8010000000000002e-05, |
| "loss": 1.4122, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.7762922644615173, |
| "learning_rate": 1.701e-05, |
| "loss": 1.3849, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.1585968732833862, |
| "learning_rate": 1.601e-05, |
| "loss": 1.3961, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.9618488550186157, |
| "learning_rate": 1.5010000000000002e-05, |
| "loss": 1.3458, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.007106065750122, |
| "learning_rate": 1.4010000000000001e-05, |
| "loss": 1.3952, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.9712181091308594, |
| "learning_rate": 1.301e-05, |
| "loss": 1.3275, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.9360325336456299, |
| "learning_rate": 1.201e-05, |
| "loss": 1.3479, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.889412522315979, |
| "learning_rate": 1.1010000000000001e-05, |
| "loss": 1.4002, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4792468547821045, |
| "learning_rate": 1.001e-05, |
| "loss": 1.2896, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.8564038276672363, |
| "learning_rate": 9.01e-06, |
| "loss": 1.3501, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.8316518664360046, |
| "learning_rate": 8.010000000000001e-06, |
| "loss": 1.3673, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.8671555519104004, |
| "learning_rate": 7.01e-06, |
| "loss": 1.328, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.9001341462135315, |
| "learning_rate": 6.01e-06, |
| "loss": 1.3363, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.8028829097747803, |
| "learning_rate": 5.01e-06, |
| "loss": 1.3272, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.923729419708252, |
| "learning_rate": 4.01e-06, |
| "loss": 1.3645, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.2373661994934082, |
| "learning_rate": 3.01e-06, |
| "loss": 1.3762, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.1914544105529785, |
| "learning_rate": 2.0100000000000002e-06, |
| "loss": 1.2622, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.8515170216560364, |
| "learning_rate": 1.01e-06, |
| "loss": 1.3327, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9752984046936035, |
| "learning_rate": 1e-08, |
| "loss": 1.2896, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.464359559168e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|