| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.015873015873016, |
| "eval_steps": 126, |
| "global_step": 630, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0015873015873015873, |
| "grad_norm": 1.802138090133667, |
| "learning_rate": 1e-05, |
| "loss": 2.7056, |
| "step": 1 |
| }, |
| { |
| "epoch": 1.0015873015873016, |
| "grad_norm": 1.3845086097717285, |
| "learning_rate": 0.00063, |
| "loss": 1.2986, |
| "step": 63 |
| }, |
| { |
| "epoch": 2.003174603174603, |
| "grad_norm": 0.29827240109443665, |
| "learning_rate": 0.0009509433962264152, |
| "loss": 0.369, |
| "step": 126 |
| }, |
| { |
| "epoch": 2.003174603174603, |
| "eval_loss": 0.2990573048591614, |
| "eval_runtime": 76.7071, |
| "eval_samples_per_second": 6.479, |
| "eval_steps_per_second": 0.821, |
| "step": 126 |
| }, |
| { |
| "epoch": 3.0047619047619047, |
| "grad_norm": 0.28911444544792175, |
| "learning_rate": 0.0008320754716981132, |
| "loss": 0.2589, |
| "step": 189 |
| }, |
| { |
| "epoch": 4.006349206349206, |
| "grad_norm": 0.2769618332386017, |
| "learning_rate": 0.0007132075471698113, |
| "loss": 0.2183, |
| "step": 252 |
| }, |
| { |
| "epoch": 4.006349206349206, |
| "eval_loss": 0.24794502556324005, |
| "eval_runtime": 75.6398, |
| "eval_samples_per_second": 6.571, |
| "eval_steps_per_second": 0.833, |
| "step": 252 |
| }, |
| { |
| "epoch": 5.007936507936508, |
| "grad_norm": 0.2538459897041321, |
| "learning_rate": 0.0005943396226415095, |
| "loss": 0.1887, |
| "step": 315 |
| }, |
| { |
| "epoch": 6.0095238095238095, |
| "grad_norm": 0.28301894664764404, |
| "learning_rate": 0.0004754716981132076, |
| "loss": 0.1622, |
| "step": 378 |
| }, |
| { |
| "epoch": 6.0095238095238095, |
| "eval_loss": 0.253131628036499, |
| "eval_runtime": 75.5929, |
| "eval_samples_per_second": 6.575, |
| "eval_steps_per_second": 0.833, |
| "step": 378 |
| }, |
| { |
| "epoch": 7.011111111111111, |
| "grad_norm": 0.29330751299858093, |
| "learning_rate": 0.00035660377358490565, |
| "loss": 0.138, |
| "step": 441 |
| }, |
| { |
| "epoch": 8.012698412698413, |
| "grad_norm": 0.2697054147720337, |
| "learning_rate": 0.0002377358490566038, |
| "loss": 0.1124, |
| "step": 504 |
| }, |
| { |
| "epoch": 8.012698412698413, |
| "eval_loss": 0.2732747197151184, |
| "eval_runtime": 75.8397, |
| "eval_samples_per_second": 6.553, |
| "eval_steps_per_second": 0.831, |
| "step": 504 |
| }, |
| { |
| "epoch": 9.014285714285714, |
| "grad_norm": 0.21484734117984772, |
| "learning_rate": 0.0001188679245283019, |
| "loss": 0.0883, |
| "step": 567 |
| }, |
| { |
| "epoch": 10.015873015873016, |
| "grad_norm": 0.20847243070602417, |
| "learning_rate": 0.0, |
| "loss": 0.0692, |
| "step": 630 |
| }, |
| { |
| "epoch": 10.015873015873016, |
| "eval_loss": 0.2962268590927124, |
| "eval_runtime": 75.5859, |
| "eval_samples_per_second": 6.575, |
| "eval_steps_per_second": 0.833, |
| "step": 630 |
| } |
| ], |
| "logging_steps": 63, |
| "max_steps": 630, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 126, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.1425470562304e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|