| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 15375, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0975609756097561, |
| "grad_norm": 19.442411422729492, |
| "learning_rate": 4.8373983739837406e-05, |
| "loss": 6.7559, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1951219512195122, |
| "grad_norm": 22.672739028930664, |
| "learning_rate": 4.6747967479674795e-05, |
| "loss": 6.6932, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2926829268292683, |
| "grad_norm": 21.795516967773438, |
| "learning_rate": 4.51219512195122e-05, |
| "loss": 6.6652, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3902439024390244, |
| "grad_norm": 19.84381866455078, |
| "learning_rate": 4.3495934959349595e-05, |
| "loss": 6.6335, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.4878048780487805, |
| "grad_norm": 14.22912883758545, |
| "learning_rate": 4.186991869918699e-05, |
| "loss": 6.6248, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5853658536585366, |
| "grad_norm": 14.391462326049805, |
| "learning_rate": 4.0243902439024395e-05, |
| "loss": 6.5929, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6829268292682927, |
| "grad_norm": 19.81720733642578, |
| "learning_rate": 3.861788617886179e-05, |
| "loss": 6.5589, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7804878048780488, |
| "grad_norm": 15.33761978149414, |
| "learning_rate": 3.699186991869919e-05, |
| "loss": 6.5327, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.8780487804878049, |
| "grad_norm": 14.190281867980957, |
| "learning_rate": 3.5365853658536584e-05, |
| "loss": 6.5175, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 16.57828712463379, |
| "learning_rate": 3.373983739837399e-05, |
| "loss": 6.5137, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.0731707317073171, |
| "grad_norm": 16.75761604309082, |
| "learning_rate": 3.2113821138211384e-05, |
| "loss": 6.495, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.170731707317073, |
| "grad_norm": 18.840726852416992, |
| "learning_rate": 3.048780487804878e-05, |
| "loss": 6.4757, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.2682926829268293, |
| "grad_norm": 17.630483627319336, |
| "learning_rate": 2.886178861788618e-05, |
| "loss": 6.4633, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.3658536585365852, |
| "grad_norm": 16.721818923950195, |
| "learning_rate": 2.7235772357723577e-05, |
| "loss": 6.4462, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.4634146341463414, |
| "grad_norm": 14.650636672973633, |
| "learning_rate": 2.5609756097560977e-05, |
| "loss": 6.4404, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.5609756097560976, |
| "grad_norm": 13.825970649719238, |
| "learning_rate": 2.3983739837398377e-05, |
| "loss": 6.4326, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.6585365853658538, |
| "grad_norm": 11.85326862335205, |
| "learning_rate": 2.2357723577235773e-05, |
| "loss": 6.4239, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.7560975609756098, |
| "grad_norm": 13.92196273803711, |
| "learning_rate": 2.073170731707317e-05, |
| "loss": 6.4098, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.8536585365853657, |
| "grad_norm": 12.077308654785156, |
| "learning_rate": 1.9105691056910573e-05, |
| "loss": 6.3987, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.951219512195122, |
| "grad_norm": 12.406614303588867, |
| "learning_rate": 1.747967479674797e-05, |
| "loss": 6.3957, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.048780487804878, |
| "grad_norm": 14.001736640930176, |
| "learning_rate": 1.5853658536585366e-05, |
| "loss": 6.3752, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.1463414634146343, |
| "grad_norm": 12.691810607910156, |
| "learning_rate": 1.4227642276422764e-05, |
| "loss": 6.3566, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.2439024390243905, |
| "grad_norm": 10.062420845031738, |
| "learning_rate": 1.2601626016260162e-05, |
| "loss": 6.3492, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.341463414634146, |
| "grad_norm": 11.78906536102295, |
| "learning_rate": 1.0975609756097562e-05, |
| "loss": 6.3447, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.4390243902439024, |
| "grad_norm": 13.368131637573242, |
| "learning_rate": 9.34959349593496e-06, |
| "loss": 6.339, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.5365853658536586, |
| "grad_norm": 12.125652313232422, |
| "learning_rate": 7.723577235772358e-06, |
| "loss": 6.3305, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.6341463414634148, |
| "grad_norm": 13.748695373535156, |
| "learning_rate": 6.0975609756097564e-06, |
| "loss": 6.3205, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.7317073170731705, |
| "grad_norm": 13.787367820739746, |
| "learning_rate": 4.471544715447155e-06, |
| "loss": 6.3196, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.8292682926829267, |
| "grad_norm": 15.013029098510742, |
| "learning_rate": 2.8455284552845528e-06, |
| "loss": 6.3116, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.926829268292683, |
| "grad_norm": 15.244904518127441, |
| "learning_rate": 1.2195121951219514e-06, |
| "loss": 6.3107, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 15375, |
| "total_flos": 5764753863475200.0, |
| "train_loss": 6.457221655868903, |
| "train_runtime": 1165.3935, |
| "train_samples_per_second": 105.536, |
| "train_steps_per_second": 13.193 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 15375, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5764753863475200.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|