{ "best_metric": 0.9248100807204325, "best_model_checkpoint": "./nlu_finetuned_models/stsb/roberta-large_lr1e-05/checkpoint-3240", "epoch": 10.0, "eval_steps": 500, "global_step": 3240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_averaged_scores": 0.8878156916479916, "eval_loss": 0.5759799480438232, "eval_pearson": 0.8917056282057303, "eval_runtime": 0.848, "eval_samples_per_second": 678.07, "eval_spearmanr": 0.8839257550902528, "eval_steps_per_second": 42.453, "step": 324 }, { "epoch": 1.5432098765432098, "grad_norm": 27.049880981445312, "learning_rate": 8.998357963875205e-06, "loss": 1.6206, "step": 500 }, { "epoch": 2.0, "eval_averaged_scores": 0.9113185593048319, "eval_loss": 0.442704439163208, "eval_pearson": 0.9149738888744463, "eval_runtime": 0.8467, "eval_samples_per_second": 679.071, "eval_spearmanr": 0.9076632297352173, "eval_steps_per_second": 42.516, "step": 648 }, { "epoch": 3.0, "eval_averaged_scores": 0.9175431924119682, "eval_loss": 0.36851391196250916, "eval_pearson": 0.9197302736426389, "eval_runtime": 0.8419, "eval_samples_per_second": 682.981, "eval_spearmanr": 0.9153561111812976, "eval_steps_per_second": 42.761, "step": 972 }, { "epoch": 3.0864197530864197, "grad_norm": 10.623714447021484, "learning_rate": 7.35632183908046e-06, "loss": 0.3127, "step": 1000 }, { "epoch": 4.0, "eval_averaged_scores": 0.9176047218597658, "eval_loss": 0.4543749988079071, "eval_pearson": 0.9204334809821679, "eval_runtime": 0.8423, "eval_samples_per_second": 682.619, "eval_spearmanr": 0.9147759627373636, "eval_steps_per_second": 42.738, "step": 1296 }, { "epoch": 4.62962962962963, "grad_norm": 15.085796356201172, "learning_rate": 5.7142857142857145e-06, "loss": 0.1752, "step": 1500 }, { "epoch": 5.0, "eval_averaged_scores": 0.9222599867134713, "eval_loss": 0.34371519088745117, "eval_pearson": 0.9259774205282126, "eval_runtime": 0.8451, "eval_samples_per_second": 680.427, "eval_spearmanr": 0.9185425528987299, "eval_steps_per_second": 42.601, "step": 1620 }, { "epoch": 6.0, "eval_averaged_scores": 0.9226443687740727, "eval_loss": 0.34758156538009644, "eval_pearson": 0.9264967148170972, "eval_runtime": 0.8431, "eval_samples_per_second": 682.036, "eval_spearmanr": 0.9187920227310483, "eval_steps_per_second": 42.701, "step": 1944 }, { "epoch": 6.172839506172839, "grad_norm": 10.050919532775879, "learning_rate": 4.072249589490969e-06, "loss": 0.1245, "step": 2000 }, { "epoch": 7.0, "eval_averaged_scores": 0.9214296418347622, "eval_loss": 0.40468737483024597, "eval_pearson": 0.9246199462616165, "eval_runtime": 0.8455, "eval_samples_per_second": 680.053, "eval_spearmanr": 0.9182393374079079, "eval_steps_per_second": 42.577, "step": 2268 }, { "epoch": 7.716049382716049, "grad_norm": 9.299652099609375, "learning_rate": 2.4302134646962236e-06, "loss": 0.0918, "step": 2500 }, { "epoch": 8.0, "eval_averaged_scores": 0.923742337624883, "eval_loss": 0.34257498383522034, "eval_pearson": 0.9276246828427851, "eval_runtime": 0.841, "eval_samples_per_second": 683.722, "eval_spearmanr": 0.919859992406981, "eval_steps_per_second": 42.807, "step": 2592 }, { "epoch": 9.0, "eval_averaged_scores": 0.924096808880168, "eval_loss": 0.3672405183315277, "eval_pearson": 0.9278834722408952, "eval_runtime": 0.835, "eval_samples_per_second": 688.625, "eval_spearmanr": 0.9203101455194407, "eval_steps_per_second": 43.114, "step": 2916 }, { "epoch": 9.25925925925926, "grad_norm": 9.53686237335205, "learning_rate": 7.881773399014779e-07, "loss": 0.0738, "step": 3000 }, { "epoch": 10.0, "eval_averaged_scores": 0.9248100807204325, "eval_loss": 0.34956303238868713, "eval_pearson": 0.9287178791469706, "eval_runtime": 0.8158, "eval_samples_per_second": 704.793, "eval_spearmanr": 0.9209022822938945, "eval_steps_per_second": 44.126, "step": 3240 }, { "epoch": 10.0, "step": 3240, "total_flos": 5977693356927984.0, "train_loss": 0.3749750914397063, "train_runtime": 417.0961, "train_samples_per_second": 124.048, "train_steps_per_second": 7.768 } ], "logging_steps": 500, "max_steps": 3240, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5977693356927984.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }