| { |
| "best_global_step": 2000, |
| "best_metric": 0.07912886142730713, |
| "best_model_checkpoint": "./training_output/checkpoint-2000", |
| "epoch": 0.5, |
| "eval_steps": 1000, |
| "global_step": 4000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0625, |
| "grad_norm": 1.5915584564208984, |
| "learning_rate": 1.8752500000000004e-05, |
| "loss": 0.1267, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 9.455477714538574, |
| "learning_rate": 1.7502500000000004e-05, |
| "loss": 0.0979, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.125, |
| "eval_accuracy": 0.900375, |
| "eval_loss": 0.16814623773097992, |
| "eval_runtime": 520.3566, |
| "eval_samples_per_second": 15.374, |
| "eval_steps_per_second": 0.961, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.07351929694414139, |
| "learning_rate": 1.62525e-05, |
| "loss": 0.0782, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.13799075782299042, |
| "learning_rate": 1.5002500000000002e-05, |
| "loss": 0.0618, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_accuracy": 0.923125, |
| "eval_loss": 0.07912886142730713, |
| "eval_runtime": 529.7101, |
| "eval_samples_per_second": 15.103, |
| "eval_steps_per_second": 0.944, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 1.579990029335022, |
| "learning_rate": 1.3752500000000003e-05, |
| "loss": 0.056, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.19930818676948547, |
| "learning_rate": 1.2502500000000003e-05, |
| "loss": 0.0598, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.375, |
| "eval_accuracy": 0.9135, |
| "eval_loss": 0.08944346010684967, |
| "eval_runtime": 526.2119, |
| "eval_samples_per_second": 15.203, |
| "eval_steps_per_second": 0.95, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 10.261446952819824, |
| "learning_rate": 1.1252500000000001e-05, |
| "loss": 0.0581, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 6.691280364990234, |
| "learning_rate": 1.0002500000000001e-05, |
| "loss": 0.1161, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_accuracy": 0.350625, |
| "eval_loss": 0.22534234821796417, |
| "eval_runtime": 518.3611, |
| "eval_samples_per_second": 15.433, |
| "eval_steps_per_second": 0.965, |
| "step": 4000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 8000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4910901813248e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|