| { |
| "best_metric": 0.9136428329001622, |
| "best_model_checkpoint": "./nlu_finetuned_models/stsb/roberta-base_lr1e-05/checkpoint-3240", |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 3240, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_averaged_scores": 0.8816061461419562, |
| "eval_loss": 0.5334980487823486, |
| "eval_pearson": 0.8896182775497437, |
| "eval_runtime": 0.3519, |
| "eval_samples_per_second": 1634.113, |
| "eval_spearmanr": 0.8735940147341689, |
| "eval_steps_per_second": 102.31, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.5432098765432098, |
| "grad_norm": 32.22338104248047, |
| "learning_rate": 8.998357963875205e-06, |
| "loss": 1.956, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_averaged_scores": 0.8949135047425898, |
| "eval_loss": 0.5024552345275879, |
| "eval_pearson": 0.9004297852516174, |
| "eval_runtime": 0.3515, |
| "eval_samples_per_second": 1635.643, |
| "eval_spearmanr": 0.889397224233562, |
| "eval_steps_per_second": 102.405, |
| "step": 648 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_averaged_scores": 0.904582944984353, |
| "eval_loss": 0.46397966146469116, |
| "eval_pearson": 0.909096360206604, |
| "eval_runtime": 0.3633, |
| "eval_samples_per_second": 1582.51, |
| "eval_spearmanr": 0.900069529762102, |
| "eval_steps_per_second": 99.079, |
| "step": 972 |
| }, |
| { |
| "epoch": 3.0864197530864197, |
| "grad_norm": 13.003278732299805, |
| "learning_rate": 7.35632183908046e-06, |
| "loss": 0.3528, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_averaged_scores": 0.912462184900082, |
| "eval_loss": 0.41295871138572693, |
| "eval_pearson": 0.9181002974510193, |
| "eval_runtime": 0.3567, |
| "eval_samples_per_second": 1611.944, |
| "eval_spearmanr": 0.9068240723491446, |
| "eval_steps_per_second": 100.922, |
| "step": 1296 |
| }, |
| { |
| "epoch": 4.62962962962963, |
| "grad_norm": 21.318891525268555, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.2328, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_averaged_scores": 0.9084589737456479, |
| "eval_loss": 0.4895615577697754, |
| "eval_pearson": 0.9136067628860474, |
| "eval_runtime": 0.3493, |
| "eval_samples_per_second": 1646.18, |
| "eval_spearmanr": 0.9033111846052485, |
| "eval_steps_per_second": 103.065, |
| "step": 1620 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_averaged_scores": 0.9130023419959661, |
| "eval_loss": 0.3939087986946106, |
| "eval_pearson": 0.9179745316505432, |
| "eval_runtime": 0.3505, |
| "eval_samples_per_second": 1640.644, |
| "eval_spearmanr": 0.908030152341389, |
| "eval_steps_per_second": 102.719, |
| "step": 1944 |
| }, |
| { |
| "epoch": 6.172839506172839, |
| "grad_norm": 8.538512229919434, |
| "learning_rate": 4.072249589490969e-06, |
| "loss": 0.1757, |
| "step": 2000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_averaged_scores": 0.9115858672183137, |
| "eval_loss": 0.40987882018089294, |
| "eval_pearson": 0.9166287183761597, |
| "eval_runtime": 0.3535, |
| "eval_samples_per_second": 1626.735, |
| "eval_spearmanr": 0.9065430160604677, |
| "eval_steps_per_second": 101.848, |
| "step": 2268 |
| }, |
| { |
| "epoch": 7.716049382716049, |
| "grad_norm": 9.155678749084473, |
| "learning_rate": 2.4302134646962236e-06, |
| "loss": 0.143, |
| "step": 2500 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_averaged_scores": 0.9127395750571425, |
| "eval_loss": 0.39697495102882385, |
| "eval_pearson": 0.9181188344955444, |
| "eval_runtime": 0.3605, |
| "eval_samples_per_second": 1594.808, |
| "eval_spearmanr": 0.9073603156187405, |
| "eval_steps_per_second": 99.849, |
| "step": 2592 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_averaged_scores": 0.9129500638808765, |
| "eval_loss": 0.4033946394920349, |
| "eval_pearson": 0.9185035824775696, |
| "eval_runtime": 0.3524, |
| "eval_samples_per_second": 1631.444, |
| "eval_spearmanr": 0.9073965452841836, |
| "eval_steps_per_second": 102.143, |
| "step": 2916 |
| }, |
| { |
| "epoch": 9.25925925925926, |
| "grad_norm": 13.51919174194336, |
| "learning_rate": 7.881773399014779e-07, |
| "loss": 0.1187, |
| "step": 3000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_averaged_scores": 0.9136428329001622, |
| "eval_loss": 0.4020698368549347, |
| "eval_pearson": 0.919518232345581, |
| "eval_runtime": 0.3311, |
| "eval_samples_per_second": 1736.654, |
| "eval_spearmanr": 0.9077674334547435, |
| "eval_steps_per_second": 108.73, |
| "step": 3240 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 3240, |
| "total_flos": 1687665489390576.0, |
| "train_loss": 0.4677616125271644, |
| "train_runtime": 225.5275, |
| "train_samples_per_second": 229.418, |
| "train_steps_per_second": 14.366 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 3240, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1687665489390576.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|