| { | |
| "best_global_step": 4906, | |
| "best_metric": 0.8757205622114191, | |
| "best_model_checkpoint": "outputs/mbert-spam-binary/checkpoint-4906", | |
| "epoch": 11.0, | |
| "eval_steps": 500, | |
| "global_step": 4906, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5657051801681519, | |
| "learning_rate": 4.998771926094749e-05, | |
| "loss": 0.3358, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8972904851921865, | |
| "eval_f1": 0.8565157558602947, | |
| "eval_loss": 0.3799394369125366, | |
| "eval_precision": 0.9093794413813263, | |
| "eval_recall": 0.8265391650419087, | |
| "eval_runtime": 1.8927, | |
| "eval_samples_per_second": 838.483, | |
| "eval_steps_per_second": 26.417, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 9.706869125366211, | |
| "learning_rate": 4.995077872180951e-05, | |
| "loss": 0.239, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8853182104599874, | |
| "eval_f1": 0.8486677201394943, | |
| "eval_loss": 0.3240993022918701, | |
| "eval_precision": 0.86617355096017, | |
| "eval_recall": 0.8351329621515146, | |
| "eval_runtime": 1.8802, | |
| "eval_samples_per_second": 844.062, | |
| "eval_steps_per_second": 26.593, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 5.252721309661865, | |
| "learning_rate": 4.988921477641713e-05, | |
| "loss": 0.1874, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8859483301827347, | |
| "eval_f1": 0.8551266047827633, | |
| "eval_loss": 0.31036534905433655, | |
| "eval_precision": 0.8564533765313355, | |
| "eval_recall": 0.853831078772286, | |
| "eval_runtime": 1.8827, | |
| "eval_samples_per_second": 842.955, | |
| "eval_steps_per_second": 26.558, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 7.976099967956543, | |
| "learning_rate": 4.980308818095173e-05, | |
| "loss": 0.1425, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8645242596093258, | |
| "eval_f1": 0.833393799625288, | |
| "eval_loss": 0.3800713121891022, | |
| "eval_precision": 0.8251404013423738, | |
| "eval_recall": 0.8435217382565174, | |
| "eval_runtime": 1.8889, | |
| "eval_samples_per_second": 840.172, | |
| "eval_steps_per_second": 26.47, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 19.17702293395996, | |
| "learning_rate": 4.9692483931964865e-05, | |
| "loss": 0.1182, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8928796471329553, | |
| "eval_f1": 0.8623913575713062, | |
| "eval_loss": 0.38209661841392517, | |
| "eval_precision": 0.8681797459435936, | |
| "eval_recall": 0.857123474905027, | |
| "eval_runtime": 1.891, | |
| "eval_samples_per_second": 839.244, | |
| "eval_steps_per_second": 26.441, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.38656169176101685, | |
| "learning_rate": 4.9557511182496865e-05, | |
| "loss": 0.0966, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9042218021424071, | |
| "eval_f1": 0.8754265647593472, | |
| "eval_loss": 0.439815878868103, | |
| "eval_precision": 0.8869994596227571, | |
| "eval_recall": 0.8656328516009728, | |
| "eval_runtime": 1.889, | |
| "eval_samples_per_second": 840.135, | |
| "eval_steps_per_second": 26.469, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 15.262068748474121, | |
| "learning_rate": 4.939830313435599e-05, | |
| "loss": 0.0787, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8985507246376812, | |
| "eval_f1": 0.8709438719717468, | |
| "eval_loss": 0.500139594078064, | |
| "eval_precision": 0.8729009773098388, | |
| "eval_recall": 0.8690498683443548, | |
| "eval_runtime": 1.8913, | |
| "eval_samples_per_second": 839.098, | |
| "eval_steps_per_second": 26.437, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 2.0876944065093994, | |
| "learning_rate": 4.9215016906664344e-05, | |
| "loss": 0.0827, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8979206049149339, | |
| "eval_f1": 0.8670210785043945, | |
| "eval_loss": 0.4308617115020752, | |
| "eval_precision": 0.8790773631628384, | |
| "eval_recall": 0.8569274989447448, | |
| "eval_runtime": 1.8901, | |
| "eval_samples_per_second": 839.645, | |
| "eval_steps_per_second": 26.454, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.03602065145969391, | |
| "learning_rate": 4.900783338080024e-05, | |
| "loss": 0.0695, | |
| "step": 4014 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9010712035286704, | |
| "eval_f1": 0.8671410632063625, | |
| "eval_loss": 0.650521457195282, | |
| "eval_precision": 0.8951755990488386, | |
| "eval_recall": 0.847398042250407, | |
| "eval_runtime": 1.8893, | |
| "eval_samples_per_second": 840.014, | |
| "eval_steps_per_second": 26.465, | |
| "step": 4014 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.15067948400974274, | |
| "learning_rate": 4.877695702189014e-05, | |
| "loss": 0.0551, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8821676118462508, | |
| "eval_f1": 0.8511983345308572, | |
| "eval_loss": 0.53841632604599, | |
| "eval_precision": 0.8504388791271249, | |
| "eval_recall": 0.8519688046471428, | |
| "eval_runtime": 1.8908, | |
| "eval_samples_per_second": 839.349, | |
| "eval_steps_per_second": 26.445, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.37308648228645325, | |
| "learning_rate": 4.852261567702625e-05, | |
| "loss": 0.0544, | |
| "step": 4906 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9029615626969124, | |
| "eval_f1": 0.8757205622114191, | |
| "eval_loss": 0.5222230553627014, | |
| "eval_precision": 0.8804347826086957, | |
| "eval_recall": 0.8713442945870435, | |
| "eval_runtime": 1.8866, | |
| "eval_samples_per_second": 841.185, | |
| "eval_steps_per_second": 26.502, | |
| "step": 4906 | |
| } | |
| ], | |
| "logging_steps": 446, | |
| "max_steps": 44600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |