| { |
| "best_global_step": 1984, |
| "best_metric": 3.17850399017334, |
| "best_model_checkpoint": null, |
| "epoch": 4.032258064516129, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0020161290322580645, |
| "grad_norm": 28.973712921142578, |
| "learning_rate": 0.0, |
| "loss": 10.4319, |
| "step": 1 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 5.415546417236328, |
| "learning_rate": 9.900000000000001e-05, |
| "loss": 4.585, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 3.5159876346588135, |
| "eval_runtime": 19.7164, |
| "eval_samples_per_second": 1614.748, |
| "eval_steps_per_second": 6.34, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.859699249267578, |
| "learning_rate": 9.477659574468086e-05, |
| "loss": 3.3235, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 3.300523519515991, |
| "eval_runtime": 19.7153, |
| "eval_samples_per_second": 1614.835, |
| "eval_steps_per_second": 6.34, |
| "step": 992 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 4.334104061126709, |
| "learning_rate": 8.950000000000001e-05, |
| "loss": 3.1692, |
| "step": 1488 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 3.223573923110962, |
| "eval_runtime": 19.6395, |
| "eval_samples_per_second": 1621.068, |
| "eval_steps_per_second": 6.365, |
| "step": 1488 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 3.5572965145111084, |
| "learning_rate": 8.422340425531914e-05, |
| "loss": 3.0793, |
| "step": 1984 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 3.17850399017334, |
| "eval_runtime": 19.9743, |
| "eval_samples_per_second": 1593.897, |
| "eval_steps_per_second": 6.258, |
| "step": 1984 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 9920, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.3379534503936e+16, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|