| { | |
| "best_metric": 2.5536394119262695, | |
| "best_model_checkpoint": "shawgpt-ft/checkpoint-13", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 13, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 2.1487667560577393, | |
| "learning_rate": 0.00019285714285714286, | |
| "loss": 4.5923, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "eval_loss": 3.957515239715576, | |
| "eval_runtime": 5.3861, | |
| "eval_samples_per_second": 1.671, | |
| "eval_steps_per_second": 0.557, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 2.26593279838562, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 4.0416, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "eval_loss": 3.429060697555542, | |
| "eval_runtime": 5.4348, | |
| "eval_samples_per_second": 1.656, | |
| "eval_steps_per_second": 0.552, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 2.352947950363159, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 3.4652, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "eval_loss": 2.976196527481079, | |
| "eval_runtime": 5.4009, | |
| "eval_samples_per_second": 1.666, | |
| "eval_steps_per_second": 0.555, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.3249692916870117, | |
| "learning_rate": 0.00012142857142857143, | |
| "loss": 2.2554, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.5536394119262695, | |
| "eval_runtime": 5.4046, | |
| "eval_samples_per_second": 1.665, | |
| "eval_steps_per_second": 0.555, | |
| "step": 13 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 30, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 43039318818816.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |