| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 2000, |
| "global_step": 10710, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0001868460388639761, |
| "grad_norm": 8.692222595214844, |
| "learning_rate": 0.0, |
| "loss": 2.3163, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.37369207772795215, |
| "grad_norm": 5.377971172332764, |
| "learning_rate": 7.46776303494674e-06, |
| "loss": 1.7964, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.37369207772795215, |
| "eval_cosine_accuracy": 0.9531564116477966, |
| "eval_loss": 0.7834969162940979, |
| "eval_runtime": 24.5425, |
| "eval_samples_per_second": 385.332, |
| "eval_steps_per_second": 1.508, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7473841554559043, |
| "grad_norm": 4.06288480758667, |
| "learning_rate": 1.4943001308166699e-05, |
| "loss": 1.1503, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7473841554559043, |
| "eval_cosine_accuracy": 0.9590779542922974, |
| "eval_loss": 0.7469472885131836, |
| "eval_runtime": 24.2666, |
| "eval_samples_per_second": 389.712, |
| "eval_steps_per_second": 1.525, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.1209406494960805, |
| "grad_norm": 2.527675151824951, |
| "learning_rate": 1.9820278599859833e-05, |
| "loss": 0.8568, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.1209406494960805, |
| "eval_cosine_accuracy": 0.9657396674156189, |
| "eval_loss": 0.6957386136054993, |
| "eval_runtime": 24.2697, |
| "eval_samples_per_second": 389.662, |
| "eval_steps_per_second": 1.525, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.494214259051885, |
| "grad_norm": 4.718974590301514, |
| "learning_rate": 1.7133561457259503e-05, |
| "loss": 0.6922, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.494214259051885, |
| "eval_cosine_accuracy": 0.9674315452575684, |
| "eval_loss": 0.6658662557601929, |
| "eval_runtime": 24.1832, |
| "eval_samples_per_second": 391.057, |
| "eval_steps_per_second": 1.53, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.8674878686076894, |
| "grad_norm": 4.274268627166748, |
| "learning_rate": 1.2061411687112223e-05, |
| "loss": 0.6074, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.8674878686076894, |
| "eval_cosine_accuracy": 0.9702865481376648, |
| "eval_loss": 0.6324575543403625, |
| "eval_runtime": 24.354, |
| "eval_samples_per_second": 388.315, |
| "eval_steps_per_second": 1.519, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 2000, |
| "max_steps": 16056, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|