{ "best_global_step": 4906, "best_metric": 0.8757205622114191, "best_model_checkpoint": "outputs/mbert-spam-binary/checkpoint-4906", "epoch": 11.0, "eval_steps": 500, "global_step": 4906, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.5657051801681519, "learning_rate": 4.998771926094749e-05, "loss": 0.3358, "step": 446 }, { "epoch": 1.0, "eval_accuracy": 0.8972904851921865, "eval_f1": 0.8565157558602947, "eval_loss": 0.3799394369125366, "eval_precision": 0.9093794413813263, "eval_recall": 0.8265391650419087, "eval_runtime": 1.8927, "eval_samples_per_second": 838.483, "eval_steps_per_second": 26.417, "step": 446 }, { "epoch": 2.0, "grad_norm": 9.706869125366211, "learning_rate": 4.995077872180951e-05, "loss": 0.239, "step": 892 }, { "epoch": 2.0, "eval_accuracy": 0.8853182104599874, "eval_f1": 0.8486677201394943, "eval_loss": 0.3240993022918701, "eval_precision": 0.86617355096017, "eval_recall": 0.8351329621515146, "eval_runtime": 1.8802, "eval_samples_per_second": 844.062, "eval_steps_per_second": 26.593, "step": 892 }, { "epoch": 3.0, "grad_norm": 5.252721309661865, "learning_rate": 4.988921477641713e-05, "loss": 0.1874, "step": 1338 }, { "epoch": 3.0, "eval_accuracy": 0.8859483301827347, "eval_f1": 0.8551266047827633, "eval_loss": 0.31036534905433655, "eval_precision": 0.8564533765313355, "eval_recall": 0.853831078772286, "eval_runtime": 1.8827, "eval_samples_per_second": 842.955, "eval_steps_per_second": 26.558, "step": 1338 }, { "epoch": 4.0, "grad_norm": 7.976099967956543, "learning_rate": 4.980308818095173e-05, "loss": 0.1425, "step": 1784 }, { "epoch": 4.0, "eval_accuracy": 0.8645242596093258, "eval_f1": 0.833393799625288, "eval_loss": 0.3800713121891022, "eval_precision": 0.8251404013423738, "eval_recall": 0.8435217382565174, "eval_runtime": 1.8889, "eval_samples_per_second": 840.172, "eval_steps_per_second": 26.47, "step": 1784 }, { "epoch": 5.0, "grad_norm": 19.17702293395996, "learning_rate": 4.9692483931964865e-05, "loss": 0.1182, "step": 2230 }, { "epoch": 5.0, "eval_accuracy": 0.8928796471329553, "eval_f1": 0.8623913575713062, "eval_loss": 0.38209661841392517, "eval_precision": 0.8681797459435936, "eval_recall": 0.857123474905027, "eval_runtime": 1.891, "eval_samples_per_second": 839.244, "eval_steps_per_second": 26.441, "step": 2230 }, { "epoch": 6.0, "grad_norm": 0.38656169176101685, "learning_rate": 4.9557511182496865e-05, "loss": 0.0966, "step": 2676 }, { "epoch": 6.0, "eval_accuracy": 0.9042218021424071, "eval_f1": 0.8754265647593472, "eval_loss": 0.439815878868103, "eval_precision": 0.8869994596227571, "eval_recall": 0.8656328516009728, "eval_runtime": 1.889, "eval_samples_per_second": 840.135, "eval_steps_per_second": 26.469, "step": 2676 }, { "epoch": 7.0, "grad_norm": 15.262068748474121, "learning_rate": 4.939830313435599e-05, "loss": 0.0787, "step": 3122 }, { "epoch": 7.0, "eval_accuracy": 0.8985507246376812, "eval_f1": 0.8709438719717468, "eval_loss": 0.500139594078064, "eval_precision": 0.8729009773098388, "eval_recall": 0.8690498683443548, "eval_runtime": 1.8913, "eval_samples_per_second": 839.098, "eval_steps_per_second": 26.437, "step": 3122 }, { "epoch": 8.0, "grad_norm": 2.0876944065093994, "learning_rate": 4.9215016906664344e-05, "loss": 0.0827, "step": 3568 }, { "epoch": 8.0, "eval_accuracy": 0.8979206049149339, "eval_f1": 0.8670210785043945, "eval_loss": 0.4308617115020752, "eval_precision": 0.8790773631628384, "eval_recall": 0.8569274989447448, "eval_runtime": 1.8901, "eval_samples_per_second": 839.645, "eval_steps_per_second": 26.454, "step": 3568 }, { "epoch": 9.0, "grad_norm": 0.03602065145969391, "learning_rate": 4.900783338080024e-05, "loss": 0.0695, "step": 4014 }, { "epoch": 9.0, "eval_accuracy": 0.9010712035286704, "eval_f1": 0.8671410632063625, "eval_loss": 0.650521457195282, "eval_precision": 0.8951755990488386, "eval_recall": 0.847398042250407, "eval_runtime": 1.8893, "eval_samples_per_second": 840.014, "eval_steps_per_second": 26.465, "step": 4014 }, { "epoch": 10.0, "grad_norm": 0.15067948400974274, "learning_rate": 4.877695702189014e-05, "loss": 0.0551, "step": 4460 }, { "epoch": 10.0, "eval_accuracy": 0.8821676118462508, "eval_f1": 0.8511983345308572, "eval_loss": 0.53841632604599, "eval_precision": 0.8504388791271249, "eval_recall": 0.8519688046471428, "eval_runtime": 1.8908, "eval_samples_per_second": 839.349, "eval_steps_per_second": 26.445, "step": 4460 }, { "epoch": 11.0, "grad_norm": 0.37308648228645325, "learning_rate": 4.852261567702625e-05, "loss": 0.0544, "step": 4906 }, { "epoch": 11.0, "eval_accuracy": 0.9029615626969124, "eval_f1": 0.8757205622114191, "eval_loss": 0.5222230553627014, "eval_precision": 0.8804347826086957, "eval_recall": 0.8713442945870435, "eval_runtime": 1.8866, "eval_samples_per_second": 841.185, "eval_steps_per_second": 26.502, "step": 4906 } ], "logging_steps": 446, "max_steps": 44600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }