| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 4840, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.0661157024793386, | |
| "grad_norm": 4.206299781799316, | |
| "learning_rate": 4.4834710743801654e-05, | |
| "loss": 4.2802, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.132231404958677, | |
| "grad_norm": 4.235241889953613, | |
| "learning_rate": 3.9669421487603306e-05, | |
| "loss": 3.9317, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.198347107438017, | |
| "grad_norm": 4.2089643478393555, | |
| "learning_rate": 3.4504132231404964e-05, | |
| "loss": 3.7086, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 8.264462809917354, | |
| "grad_norm": 3.783989191055298, | |
| "learning_rate": 2.9338842975206616e-05, | |
| "loss": 3.5136, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.330578512396695, | |
| "grad_norm": 3.957587242126465, | |
| "learning_rate": 2.4173553719008264e-05, | |
| "loss": 3.3546, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 12.396694214876034, | |
| "grad_norm": 4.028080463409424, | |
| "learning_rate": 1.900826446280992e-05, | |
| "loss": 3.2309, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 14.462809917355372, | |
| "grad_norm": 4.280130863189697, | |
| "learning_rate": 1.3842975206611573e-05, | |
| "loss": 3.1278, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 16.52892561983471, | |
| "grad_norm": 3.9069178104400635, | |
| "learning_rate": 8.677685950413224e-06, | |
| "loss": 3.0433, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 18.59504132231405, | |
| "grad_norm": 4.312071800231934, | |
| "learning_rate": 3.5123966942148763e-06, | |
| "loss": 3.0038, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 4840, | |
| "total_flos": 5058613739520000.0, | |
| "train_loss": 3.4317847007562308, | |
| "train_runtime": 1978.8511, | |
| "train_samples_per_second": 4.892, | |
| "train_steps_per_second": 2.446 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 4840, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5058613739520000.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |