| { | |
| "best_metric": 0.9136428329001622, | |
| "best_model_checkpoint": "./nlu_finetuned_models/stsb/roberta-base_lr1e-05/checkpoint-3240", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 3240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_averaged_scores": 0.8816061461419562, | |
| "eval_loss": 0.5334980487823486, | |
| "eval_pearson": 0.8896182775497437, | |
| "eval_runtime": 0.3519, | |
| "eval_samples_per_second": 1634.113, | |
| "eval_spearmanr": 0.8735940147341689, | |
| "eval_steps_per_second": 102.31, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.5432098765432098, | |
| "grad_norm": 32.22338104248047, | |
| "learning_rate": 8.998357963875205e-06, | |
| "loss": 1.956, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_averaged_scores": 0.8949135047425898, | |
| "eval_loss": 0.5024552345275879, | |
| "eval_pearson": 0.9004297852516174, | |
| "eval_runtime": 0.3515, | |
| "eval_samples_per_second": 1635.643, | |
| "eval_spearmanr": 0.889397224233562, | |
| "eval_steps_per_second": 102.405, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_averaged_scores": 0.904582944984353, | |
| "eval_loss": 0.46397966146469116, | |
| "eval_pearson": 0.909096360206604, | |
| "eval_runtime": 0.3633, | |
| "eval_samples_per_second": 1582.51, | |
| "eval_spearmanr": 0.900069529762102, | |
| "eval_steps_per_second": 99.079, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 3.0864197530864197, | |
| "grad_norm": 13.003278732299805, | |
| "learning_rate": 7.35632183908046e-06, | |
| "loss": 0.3528, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_averaged_scores": 0.912462184900082, | |
| "eval_loss": 0.41295871138572693, | |
| "eval_pearson": 0.9181002974510193, | |
| "eval_runtime": 0.3567, | |
| "eval_samples_per_second": 1611.944, | |
| "eval_spearmanr": 0.9068240723491446, | |
| "eval_steps_per_second": 100.922, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 4.62962962962963, | |
| "grad_norm": 21.318891525268555, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.2328, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_averaged_scores": 0.9084589737456479, | |
| "eval_loss": 0.4895615577697754, | |
| "eval_pearson": 0.9136067628860474, | |
| "eval_runtime": 0.3493, | |
| "eval_samples_per_second": 1646.18, | |
| "eval_spearmanr": 0.9033111846052485, | |
| "eval_steps_per_second": 103.065, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_averaged_scores": 0.9130023419959661, | |
| "eval_loss": 0.3939087986946106, | |
| "eval_pearson": 0.9179745316505432, | |
| "eval_runtime": 0.3505, | |
| "eval_samples_per_second": 1640.644, | |
| "eval_spearmanr": 0.908030152341389, | |
| "eval_steps_per_second": 102.719, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 6.172839506172839, | |
| "grad_norm": 8.538512229919434, | |
| "learning_rate": 4.072249589490969e-06, | |
| "loss": 0.1757, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_averaged_scores": 0.9115858672183137, | |
| "eval_loss": 0.40987882018089294, | |
| "eval_pearson": 0.9166287183761597, | |
| "eval_runtime": 0.3535, | |
| "eval_samples_per_second": 1626.735, | |
| "eval_spearmanr": 0.9065430160604677, | |
| "eval_steps_per_second": 101.848, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 7.716049382716049, | |
| "grad_norm": 9.155678749084473, | |
| "learning_rate": 2.4302134646962236e-06, | |
| "loss": 0.143, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_averaged_scores": 0.9127395750571425, | |
| "eval_loss": 0.39697495102882385, | |
| "eval_pearson": 0.9181188344955444, | |
| "eval_runtime": 0.3605, | |
| "eval_samples_per_second": 1594.808, | |
| "eval_spearmanr": 0.9073603156187405, | |
| "eval_steps_per_second": 99.849, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_averaged_scores": 0.9129500638808765, | |
| "eval_loss": 0.4033946394920349, | |
| "eval_pearson": 0.9185035824775696, | |
| "eval_runtime": 0.3524, | |
| "eval_samples_per_second": 1631.444, | |
| "eval_spearmanr": 0.9073965452841836, | |
| "eval_steps_per_second": 102.143, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 9.25925925925926, | |
| "grad_norm": 13.51919174194336, | |
| "learning_rate": 7.881773399014779e-07, | |
| "loss": 0.1187, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_averaged_scores": 0.9136428329001622, | |
| "eval_loss": 0.4020698368549347, | |
| "eval_pearson": 0.919518232345581, | |
| "eval_runtime": 0.3311, | |
| "eval_samples_per_second": 1736.654, | |
| "eval_spearmanr": 0.9077674334547435, | |
| "eval_steps_per_second": 108.73, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 3240, | |
| "total_flos": 1687665489390576.0, | |
| "train_loss": 0.4677616125271644, | |
| "train_runtime": 225.5275, | |
| "train_samples_per_second": 229.418, | |
| "train_steps_per_second": 14.366 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 3240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1687665489390576.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |