| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.9948253557567917, |
| "eval_steps": 500, |
| "global_step": 2316, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5227811336517334, |
| "learning_rate": 0.0001501296456352636, |
| "loss": 0.8744, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.7734729051589966, |
| "eval_runtime": 6.5998, |
| "eval_samples_per_second": 7.576, |
| "eval_steps_per_second": 1.97, |
| "step": 579 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.5702121257781982, |
| "learning_rate": 0.0001, |
| "loss": 0.718, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.7661585211753845, |
| "eval_runtime": 6.6092, |
| "eval_samples_per_second": 7.565, |
| "eval_steps_per_second": 1.967, |
| "step": 1159 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.8033292889595032, |
| "learning_rate": 4.987035436473639e-05, |
| "loss": 0.6211, |
| "step": 1739 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.8084017038345337, |
| "eval_runtime": 6.6152, |
| "eval_samples_per_second": 7.558, |
| "eval_steps_per_second": 1.965, |
| "step": 1739 |
| }, |
| { |
| "epoch": 3.99, |
| "grad_norm": 1.1490143537521362, |
| "learning_rate": 0.0, |
| "loss": 0.4965, |
| "step": 2316 |
| }, |
| { |
| "epoch": 3.99, |
| "eval_loss": 0.9265226721763611, |
| "eval_runtime": 6.5997, |
| "eval_samples_per_second": 7.576, |
| "eval_steps_per_second": 1.97, |
| "step": 2316 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2316, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "total_flos": 1.0206182844745728e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|