{ "best_global_step": 18750, "best_metric": 0.9103205410380567, "best_model_checkpoint": "./results_colab/checkpoint-18750", "epoch": 3.0, "eval_steps": 500, "global_step": 18750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14992, "grad_norm": 14.149627685546875, "learning_rate": 9.930666666666668e-06, "loss": 0.4914, "step": 937 }, { "epoch": 0.29984, "grad_norm": 8.850069046020508, "learning_rate": 1.9925333333333334e-05, "loss": 0.3457, "step": 1874 }, { "epoch": 0.44976, "grad_norm": 6.119201183319092, "learning_rate": 1.889777777777778e-05, "loss": 0.3199, "step": 2811 }, { "epoch": 0.59968, "grad_norm": 10.599540710449219, "learning_rate": 1.778725925925926e-05, "loss": 0.3056, "step": 3748 }, { "epoch": 0.7496, "grad_norm": 9.924508094787598, "learning_rate": 1.6677925925925925e-05, "loss": 0.2968, "step": 4685 }, { "epoch": 0.89952, "grad_norm": 7.44782018661499, "learning_rate": 1.556740740740741e-05, "loss": 0.2951, "step": 5622 }, { "epoch": 1.0, "eval_accuracy": 0.89456, "eval_f1": 0.8978136145138781, "eval_loss": 0.28082841634750366, "eval_runtime": 40.5573, "eval_samples_per_second": 616.412, "eval_steps_per_second": 19.281, "step": 6250 }, { "epoch": 1.04944, "grad_norm": 29.62865447998047, "learning_rate": 1.445688888888889e-05, "loss": 0.2643, "step": 6559 }, { "epoch": 1.19936, "grad_norm": 3.2325656414031982, "learning_rate": 1.3348740740740742e-05, "loss": 0.2375, "step": 7496 }, { "epoch": 1.34928, "grad_norm": 13.932371139526367, "learning_rate": 1.2238222222222222e-05, "loss": 0.2456, "step": 8433 }, { "epoch": 1.4992, "grad_norm": 21.482507705688477, "learning_rate": 1.1127703703703705e-05, "loss": 0.2427, "step": 9370 }, { "epoch": 1.64912, "grad_norm": 13.899144172668457, "learning_rate": 1.0017185185185185e-05, "loss": 0.2296, "step": 10307 }, { "epoch": 1.79904, "grad_norm": 10.691402435302734, "learning_rate": 8.906666666666667e-06, "loss": 0.2344, "step": 11244 }, { "epoch": 1.94896, "grad_norm": 24.025405883789062, "learning_rate": 7.797333333333334e-06, "loss": 0.2338, "step": 12181 }, { "epoch": 2.0, "eval_accuracy": 0.90812, "eval_f1": 0.9074797599387764, "eval_loss": 0.2600798010826111, "eval_runtime": 40.4948, "eval_samples_per_second": 617.364, "eval_steps_per_second": 19.311, "step": 12500 }, { "epoch": 2.09888, "grad_norm": 19.96436882019043, "learning_rate": 6.686814814814815e-06, "loss": 0.2024, "step": 13118 }, { "epoch": 2.2488, "grad_norm": 16.892995834350586, "learning_rate": 5.577481481481481e-06, "loss": 0.1841, "step": 14055 }, { "epoch": 2.39872, "grad_norm": 32.59613037109375, "learning_rate": 4.468148148148149e-06, "loss": 0.1809, "step": 14992 }, { "epoch": 2.54864, "grad_norm": 21.280284881591797, "learning_rate": 3.35762962962963e-06, "loss": 0.1873, "step": 15929 }, { "epoch": 2.69856, "grad_norm": 59.44868850708008, "learning_rate": 2.2471111111111115e-06, "loss": 0.1905, "step": 16866 }, { "epoch": 2.84848, "grad_norm": 0.4454441964626312, "learning_rate": 1.1365925925925928e-06, "loss": 0.1787, "step": 17803 }, { "epoch": 2.9984, "grad_norm": 19.224084854125977, "learning_rate": 2.725925925925926e-08, "loss": 0.1906, "step": 18740 }, { "epoch": 3.0, "eval_accuracy": 0.91036, "eval_f1": 0.9103205410380567, "eval_loss": 0.33635860681533813, "eval_runtime": 40.5996, "eval_samples_per_second": 615.77, "eval_steps_per_second": 19.261, "step": 18750 }, { "epoch": 3.0, "step": 18750, "total_flos": 1.955528518972608e+16, "train_loss": 0.25284169124603273, "train_runtime": 4088.9553, "train_samples_per_second": 73.368, "train_steps_per_second": 4.586 } ], "logging_steps": 937, "max_steps": 18750, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.955528518972608e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }