| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.6923076923076925, |
| "eval_steps": 5, |
| "global_step": 50, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 0.69048468430386, |
| "learning_rate": 4e-05, |
| "loss": 1.4309, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.6485095380668653, |
| "learning_rate": 0.0002, |
| "loss": 1.3264, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "eval_loss": 1.070548415184021, |
| "eval_runtime": 2.6729, |
| "eval_samples_per_second": 7.482, |
| "eval_steps_per_second": 1.122, |
| "step": 5 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.3824447243065964, |
| "learning_rate": 0.00019396926207859084, |
| "loss": 0.9466, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "eval_loss": 0.7976406812667847, |
| "eval_runtime": 1.4008, |
| "eval_samples_per_second": 14.277, |
| "eval_steps_per_second": 2.142, |
| "step": 10 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 0.19520499730972615, |
| "learning_rate": 0.0001766044443118978, |
| "loss": 0.7184, |
| "step": 15 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "eval_loss": 0.685447096824646, |
| "eval_runtime": 1.4079, |
| "eval_samples_per_second": 14.205, |
| "eval_steps_per_second": 2.131, |
| "step": 15 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "grad_norm": 0.29294971119218416, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.6259, |
| "step": 20 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "eval_loss": 0.6346420645713806, |
| "eval_runtime": 1.4026, |
| "eval_samples_per_second": 14.259, |
| "eval_steps_per_second": 2.139, |
| "step": 20 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 0.1806256227517497, |
| "learning_rate": 0.00011736481776669306, |
| "loss": 0.574, |
| "step": 25 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "eval_loss": 0.6089328527450562, |
| "eval_runtime": 1.4058, |
| "eval_samples_per_second": 14.227, |
| "eval_steps_per_second": 2.134, |
| "step": 25 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 0.20442514442238752, |
| "learning_rate": 8.263518223330697e-05, |
| "loss": 0.549, |
| "step": 30 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "eval_loss": 0.6004151701927185, |
| "eval_runtime": 1.4031, |
| "eval_samples_per_second": 14.254, |
| "eval_steps_per_second": 2.138, |
| "step": 30 |
| }, |
| { |
| "epoch": 5.384615384615385, |
| "grad_norm": 0.1586388379410015, |
| "learning_rate": 5.000000000000002e-05, |
| "loss": 0.5003, |
| "step": 35 |
| }, |
| { |
| "epoch": 5.384615384615385, |
| "eval_loss": 0.5947655439376831, |
| "eval_runtime": 1.4034, |
| "eval_samples_per_second": 14.251, |
| "eval_steps_per_second": 2.138, |
| "step": 35 |
| }, |
| { |
| "epoch": 6.153846153846154, |
| "grad_norm": 0.14673467983546293, |
| "learning_rate": 2.339555568810221e-05, |
| "loss": 0.5101, |
| "step": 40 |
| }, |
| { |
| "epoch": 6.153846153846154, |
| "eval_loss": 0.5922083854675293, |
| "eval_runtime": 1.401, |
| "eval_samples_per_second": 14.276, |
| "eval_steps_per_second": 2.141, |
| "step": 40 |
| }, |
| { |
| "epoch": 6.923076923076923, |
| "grad_norm": 0.14279932168066684, |
| "learning_rate": 6.030737921409169e-06, |
| "loss": 0.481, |
| "step": 45 |
| }, |
| { |
| "epoch": 6.923076923076923, |
| "eval_loss": 0.5918287038803101, |
| "eval_runtime": 1.4019, |
| "eval_samples_per_second": 14.267, |
| "eval_steps_per_second": 2.14, |
| "step": 45 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 0.14269099669299248, |
| "learning_rate": 0.0, |
| "loss": 0.4784, |
| "step": 50 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "eval_loss": 0.5915887951850891, |
| "eval_runtime": 1.4016, |
| "eval_samples_per_second": 14.27, |
| "eval_steps_per_second": 2.14, |
| "step": 50 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "step": 50, |
| "total_flos": 1505882701824.0, |
| "train_loss": 0.673114869594574, |
| "train_runtime": 181.0651, |
| "train_samples_per_second": 4.418, |
| "train_steps_per_second": 0.276 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 50, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1505882701824.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|