| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 8510, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.1492145181693354, |
| "eval_loss": 6.3642778396606445, |
| "eval_runtime": 4.1353, |
| "eval_samples_per_second": 43.77, |
| "eval_steps_per_second": 1.451, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.1750881316098707, |
| "grad_norm": 10913.1396484375, |
| "learning_rate": 0.0005993999999999999, |
| "loss": 1.5936, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.16200495183315133, |
| "eval_loss": 6.032303333282471, |
| "eval_runtime": 3.7669, |
| "eval_samples_per_second": 48.05, |
| "eval_steps_per_second": 1.593, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.3501762632197414, |
| "grad_norm": 9681.1015625, |
| "learning_rate": 0.0005201864181091877, |
| "loss": 1.4068, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.18210420473343353, |
| "eval_loss": 5.742514610290527, |
| "eval_runtime": 3.6239, |
| "eval_samples_per_second": 49.946, |
| "eval_steps_per_second": 1.656, |
| "step": 2553 |
| }, |
| { |
| "epoch": 3.525264394829612, |
| "grad_norm": 10410.8232421875, |
| "learning_rate": 0.0004402929427430093, |
| "loss": 1.3224, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.19439729270956094, |
| "eval_loss": 5.552978038787842, |
| "eval_runtime": 3.6717, |
| "eval_samples_per_second": 49.296, |
| "eval_steps_per_second": 1.634, |
| "step": 3404 |
| }, |
| { |
| "epoch": 4.700352526439483, |
| "grad_norm": 10486.552734375, |
| "learning_rate": 0.0003603994673768309, |
| "loss": 1.2597, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.20436583018888324, |
| "eval_loss": 5.43747091293335, |
| "eval_runtime": 3.6239, |
| "eval_samples_per_second": 49.946, |
| "eval_steps_per_second": 1.656, |
| "step": 4255 |
| }, |
| { |
| "epoch": 5.875440658049354, |
| "grad_norm": 10774.3271484375, |
| "learning_rate": 0.00028050599201065245, |
| "loss": 1.2168, |
| "step": 5000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.2138262101177412, |
| "eval_loss": 5.3256096839904785, |
| "eval_runtime": 3.6215, |
| "eval_samples_per_second": 49.979, |
| "eval_steps_per_second": 1.657, |
| "step": 5106 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.2211242174914316, |
| "eval_loss": 5.247638702392578, |
| "eval_runtime": 3.6307, |
| "eval_samples_per_second": 49.852, |
| "eval_steps_per_second": 1.653, |
| "step": 5957 |
| }, |
| { |
| "epoch": 7.050528789659224, |
| "grad_norm": 11328.8466796875, |
| "learning_rate": 0.00020061251664447402, |
| "loss": 1.1808, |
| "step": 6000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.2280438096679677, |
| "eval_loss": 5.182240962982178, |
| "eval_runtime": 3.6289, |
| "eval_samples_per_second": 49.878, |
| "eval_steps_per_second": 1.653, |
| "step": 6808 |
| }, |
| { |
| "epoch": 8.225616921269095, |
| "grad_norm": 11665.4501953125, |
| "learning_rate": 0.0001207190412782956, |
| "loss": 1.1503, |
| "step": 7000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.23310376144705972, |
| "eval_loss": 5.128500461578369, |
| "eval_runtime": 3.6237, |
| "eval_samples_per_second": 49.949, |
| "eval_steps_per_second": 1.656, |
| "step": 7659 |
| }, |
| { |
| "epoch": 9.400705052878966, |
| "grad_norm": 12125.7978515625, |
| "learning_rate": 4.082556591211717e-05, |
| "loss": 1.1255, |
| "step": 8000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.23701765577191294, |
| "eval_loss": 5.099877834320068, |
| "eval_runtime": 3.6231, |
| "eval_samples_per_second": 49.957, |
| "eval_steps_per_second": 1.656, |
| "step": 8510 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 8510, |
| "total_flos": 7.112107819008e+16, |
| "train_loss": 1.2718536484535656, |
| "train_runtime": 8434.8319, |
| "train_samples_per_second": 32.27, |
| "train_steps_per_second": 1.009 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 8510, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.112107819008e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|