| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9917355371900827, |
| "eval_steps": 500, |
| "global_step": 80, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.024793388429752067, |
| "grad_norm": 0.07427436731705139, |
| "learning_rate": 1.25e-06, |
| "loss": 0.4198, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.12396694214876033, |
| "grad_norm": 0.06996875359362274, |
| "learning_rate": 6.25e-06, |
| "loss": 0.4294, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.24793388429752067, |
| "grad_norm": 0.07459353999937281, |
| "learning_rate": 9.980973490458728e-06, |
| "loss": 0.3886, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.371900826446281, |
| "grad_norm": 0.08674231581833301, |
| "learning_rate": 9.768584753741134e-06, |
| "loss": 0.4137, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.49586776859504134, |
| "grad_norm": 0.08495439793697043, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.4006, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6198347107438017, |
| "grad_norm": 0.06829258686322341, |
| "learning_rate": 8.68638668405062e-06, |
| "loss": 0.3411, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.743801652892562, |
| "grad_norm": 0.07146602412363501, |
| "learning_rate": 7.86788218175523e-06, |
| "loss": 0.3623, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8677685950413223, |
| "grad_norm": 0.0684124113756464, |
| "learning_rate": 6.913417161825449e-06, |
| "loss": 0.3255, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.9917355371900827, |
| "grad_norm": 0.08832822341085747, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.3393, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.9917355371900827, |
| "eval_loss": 0.432477205991745, |
| "eval_runtime": 28.4008, |
| "eval_samples_per_second": 19.33, |
| "eval_steps_per_second": 4.859, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.1239669421487604, |
| "grad_norm": 0.07536649029047712, |
| "learning_rate": 4.781903063173321e-06, |
| "loss": 0.3563, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.2479338842975207, |
| "grad_norm": 0.10590347165075693, |
| "learning_rate": 3.705904774487396e-06, |
| "loss": 0.2761, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.3719008264462809, |
| "grad_norm": 0.0905785145173113, |
| "learning_rate": 2.6912569338248317e-06, |
| "loss": 0.2485, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.4958677685950414, |
| "grad_norm": 0.09878550778765635, |
| "learning_rate": 1.7860619515673034e-06, |
| "loss": 0.2721, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.6198347107438016, |
| "grad_norm": 0.10195708282293185, |
| "learning_rate": 1.0332332985438248e-06, |
| "loss": 0.2466, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.743801652892562, |
| "grad_norm": 0.0922397167738883, |
| "learning_rate": 4.6846106481675035e-07, |
| "loss": 0.2357, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.8677685950413223, |
| "grad_norm": 0.09959951208433296, |
| "learning_rate": 1.185199644003332e-07, |
| "loss": 0.228, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.9917355371900827, |
| "grad_norm": 0.10210514342941603, |
| "learning_rate": 0.0, |
| "loss": 0.2399, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.9917355371900827, |
| "eval_loss": 0.44621971249580383, |
| "eval_runtime": 28.3617, |
| "eval_samples_per_second": 19.357, |
| "eval_steps_per_second": 4.866, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.9917355371900827, |
| "step": 80, |
| "total_flos": 1.979713786478592e+17, |
| "train_loss": 0.3188592839986086, |
| "train_runtime": 1045.198, |
| "train_samples_per_second": 3.703, |
| "train_steps_per_second": 0.077 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 80, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.979713786478592e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|