| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.05763688760806916, |
| "eval_steps": 10, |
| "global_step": 20, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01440922190201729, |
| "grad_norm": 0.888121485710144, |
| "learning_rate": 0.0002988472622478386, |
| "loss": 2.4115, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02881844380403458, |
| "grad_norm": 0.8970298767089844, |
| "learning_rate": 0.00029740634005763684, |
| "loss": 2.23, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02881844380403458, |
| "eval_loss": 2.165903091430664, |
| "eval_runtime": 1.8407, |
| "eval_samples_per_second": 84.207, |
| "eval_steps_per_second": 10.865, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.043227665706051875, |
| "grad_norm": 1.078068494796753, |
| "learning_rate": 0.00029596541786743513, |
| "loss": 2.0155, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05763688760806916, |
| "grad_norm": 1.2069385051727295, |
| "learning_rate": 0.0002945244956772334, |
| "loss": 1.9346, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05763688760806916, |
| "eval_loss": 1.7943660020828247, |
| "eval_runtime": 1.7809, |
| "eval_samples_per_second": 87.035, |
| "eval_steps_per_second": 11.23, |
| "step": 20 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1041, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 254518587555840.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|