{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 10510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9514747859181731, "grad_norm": 12745.587890625, "learning_rate": 0.0005993999999999999, "loss": 1.2234, "step": 1000 }, { "epoch": 1.0, "eval_accuracy": 0.22268764859051285, "eval_loss": 4.631209373474121, "eval_runtime": 6.498, "eval_samples_per_second": 37.242, "eval_steps_per_second": 1.231, "step": 1051 }, { "epoch": 1.9029495718363463, "grad_norm": 11723.8271484375, "learning_rate": 0.0005369716088328075, "loss": 1.015, "step": 2000 }, { "epoch": 2.0, "eval_accuracy": 0.26512590771619415, "eval_loss": 4.201041221618652, "eval_runtime": 5.77, "eval_samples_per_second": 41.941, "eval_steps_per_second": 1.386, "step": 2102 }, { "epoch": 2.8544243577545196, "grad_norm": 10506.5712890625, "learning_rate": 0.00047388012618296526, "loss": 0.9468, "step": 3000 }, { "epoch": 3.0, "eval_accuracy": 0.28578706474098753, "eval_loss": 4.014913558959961, "eval_runtime": 5.9155, "eval_samples_per_second": 40.91, "eval_steps_per_second": 1.352, "step": 3153 }, { "epoch": 3.8058991436726926, "grad_norm": 9833.72265625, "learning_rate": 0.000410788643533123, "loss": 0.9083, "step": 4000 }, { "epoch": 4.0, "eval_accuracy": 0.30322977147385616, "eval_loss": 3.8843796253204346, "eval_runtime": 5.9819, "eval_samples_per_second": 40.455, "eval_steps_per_second": 1.337, "step": 4204 }, { "epoch": 4.757373929590866, "grad_norm": 9816.0380859375, "learning_rate": 0.0003476971608832807, "loss": 0.8769, "step": 5000 }, { "epoch": 5.0, "eval_accuracy": 0.3162653038120037, "eval_loss": 3.772235870361328, "eval_runtime": 5.9202, "eval_samples_per_second": 40.877, "eval_steps_per_second": 1.351, "step": 5255 }, { "epoch": 5.708848715509039, "grad_norm": 9225.9716796875, "learning_rate": 0.00028460567823343844, "loss": 0.8502, "step": 6000 }, { "epoch": 6.0, "eval_accuracy": 0.32605003962413676, "eval_loss": 3.6949210166931152, "eval_runtime": 5.9591, "eval_samples_per_second": 40.61, "eval_steps_per_second": 1.342, "step": 6306 }, { "epoch": 6.660323501427213, "grad_norm": 9708.4267578125, "learning_rate": 0.00022151419558359621, "loss": 0.8276, "step": 7000 }, { "epoch": 7.0, "eval_accuracy": 0.33672429687373645, "eval_loss": 3.613666296005249, "eval_runtime": 5.9886, "eval_samples_per_second": 40.41, "eval_steps_per_second": 1.336, "step": 7357 }, { "epoch": 7.611798287345385, "grad_norm": 9902.1474609375, "learning_rate": 0.00015842271293375394, "loss": 0.8078, "step": 8000 }, { "epoch": 8.0, "eval_accuracy": 0.3435170060325727, "eval_loss": 3.560777425765991, "eval_runtime": 5.8919, "eval_samples_per_second": 41.073, "eval_steps_per_second": 1.358, "step": 8408 }, { "epoch": 8.56327307326356, "grad_norm": 9983.5361328125, "learning_rate": 9.533123028391166e-05, "loss": 0.7895, "step": 9000 }, { "epoch": 9.0, "eval_accuracy": 0.3506978700004852, "eval_loss": 3.5129082202911377, "eval_runtime": 6.0614, "eval_samples_per_second": 39.925, "eval_steps_per_second": 1.32, "step": 9459 }, { "epoch": 9.514747859181732, "grad_norm": 10257.3037109375, "learning_rate": 3.22397476340694e-05, "loss": 0.7746, "step": 10000 }, { "epoch": 10.0, "eval_accuracy": 0.35421552295773967, "eval_loss": 3.491698741912842, "eval_runtime": 5.9119, "eval_samples_per_second": 40.935, "eval_steps_per_second": 1.353, "step": 10510 }, { "epoch": 10.0, "step": 10510, "total_flos": 8.78333165568e+16, "train_loss": 0.8954192244133191, "train_runtime": 9693.5758, "train_samples_per_second": 34.678, "train_steps_per_second": 1.084 } ], "logging_steps": 1000, "max_steps": 10510, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.78333165568e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }