{ "best_metric": 0.9136428329001622, "best_model_checkpoint": "./nlu_finetuned_models/stsb/roberta-base_lr1e-05/checkpoint-3240", "epoch": 10.0, "eval_steps": 500, "global_step": 3240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_averaged_scores": 0.8816061461419562, "eval_loss": 0.5334980487823486, "eval_pearson": 0.8896182775497437, "eval_runtime": 0.3519, "eval_samples_per_second": 1634.113, "eval_spearmanr": 0.8735940147341689, "eval_steps_per_second": 102.31, "step": 324 }, { "epoch": 1.5432098765432098, "grad_norm": 32.22338104248047, "learning_rate": 8.998357963875205e-06, "loss": 1.956, "step": 500 }, { "epoch": 2.0, "eval_averaged_scores": 0.8949135047425898, "eval_loss": 0.5024552345275879, "eval_pearson": 0.9004297852516174, "eval_runtime": 0.3515, "eval_samples_per_second": 1635.643, "eval_spearmanr": 0.889397224233562, "eval_steps_per_second": 102.405, "step": 648 }, { "epoch": 3.0, "eval_averaged_scores": 0.904582944984353, "eval_loss": 0.46397966146469116, "eval_pearson": 0.909096360206604, "eval_runtime": 0.3633, "eval_samples_per_second": 1582.51, "eval_spearmanr": 0.900069529762102, "eval_steps_per_second": 99.079, "step": 972 }, { "epoch": 3.0864197530864197, "grad_norm": 13.003278732299805, "learning_rate": 7.35632183908046e-06, "loss": 0.3528, "step": 1000 }, { "epoch": 4.0, "eval_averaged_scores": 0.912462184900082, "eval_loss": 0.41295871138572693, "eval_pearson": 0.9181002974510193, "eval_runtime": 0.3567, "eval_samples_per_second": 1611.944, "eval_spearmanr": 0.9068240723491446, "eval_steps_per_second": 100.922, "step": 1296 }, { "epoch": 4.62962962962963, "grad_norm": 21.318891525268555, "learning_rate": 5.7142857142857145e-06, "loss": 0.2328, "step": 1500 }, { "epoch": 5.0, "eval_averaged_scores": 0.9084589737456479, "eval_loss": 0.4895615577697754, "eval_pearson": 0.9136067628860474, "eval_runtime": 0.3493, "eval_samples_per_second": 1646.18, "eval_spearmanr": 0.9033111846052485, "eval_steps_per_second": 103.065, "step": 1620 }, { "epoch": 6.0, "eval_averaged_scores": 0.9130023419959661, "eval_loss": 0.3939087986946106, "eval_pearson": 0.9179745316505432, "eval_runtime": 0.3505, "eval_samples_per_second": 1640.644, "eval_spearmanr": 0.908030152341389, "eval_steps_per_second": 102.719, "step": 1944 }, { "epoch": 6.172839506172839, "grad_norm": 8.538512229919434, "learning_rate": 4.072249589490969e-06, "loss": 0.1757, "step": 2000 }, { "epoch": 7.0, "eval_averaged_scores": 0.9115858672183137, "eval_loss": 0.40987882018089294, "eval_pearson": 0.9166287183761597, "eval_runtime": 0.3535, "eval_samples_per_second": 1626.735, "eval_spearmanr": 0.9065430160604677, "eval_steps_per_second": 101.848, "step": 2268 }, { "epoch": 7.716049382716049, "grad_norm": 9.155678749084473, "learning_rate": 2.4302134646962236e-06, "loss": 0.143, "step": 2500 }, { "epoch": 8.0, "eval_averaged_scores": 0.9127395750571425, "eval_loss": 0.39697495102882385, "eval_pearson": 0.9181188344955444, "eval_runtime": 0.3605, "eval_samples_per_second": 1594.808, "eval_spearmanr": 0.9073603156187405, "eval_steps_per_second": 99.849, "step": 2592 }, { "epoch": 9.0, "eval_averaged_scores": 0.9129500638808765, "eval_loss": 0.4033946394920349, "eval_pearson": 0.9185035824775696, "eval_runtime": 0.3524, "eval_samples_per_second": 1631.444, "eval_spearmanr": 0.9073965452841836, "eval_steps_per_second": 102.143, "step": 2916 }, { "epoch": 9.25925925925926, "grad_norm": 13.51919174194336, "learning_rate": 7.881773399014779e-07, "loss": 0.1187, "step": 3000 }, { "epoch": 10.0, "eval_averaged_scores": 0.9136428329001622, "eval_loss": 0.4020698368549347, "eval_pearson": 0.919518232345581, "eval_runtime": 0.3311, "eval_samples_per_second": 1736.654, "eval_spearmanr": 0.9077674334547435, "eval_steps_per_second": 108.73, "step": 3240 }, { "epoch": 10.0, "step": 3240, "total_flos": 1687665489390576.0, "train_loss": 0.4677616125271644, "train_runtime": 225.5275, "train_samples_per_second": 229.418, "train_steps_per_second": 14.366 } ], "logging_steps": 500, "max_steps": 3240, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1687665489390576.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }