{ "best_metric": 0.9103662307301734, "best_model_checkpoint": "./fp32_3e_5/models/stsb-roberta-base/checkpoint-1600", "epoch": 8.88888888888889, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.56, "learning_rate": 2.777777777777778e-05, "loss": 3.7756, "step": 100 }, { "epoch": 0.56, "eval_combined_score": 0.8535264998873677, "eval_loss": 0.6678845286369324, "eval_pearson": 0.852268663307789, "eval_runtime": 7.0932, "eval_samples_per_second": 211.47, "eval_spearmanr": 0.8547843364669463, "eval_steps_per_second": 6.626, "step": 100 }, { "epoch": 1.11, "learning_rate": 2.8368794326241135e-05, "loss": 0.6723, "step": 200 }, { "epoch": 1.11, "eval_combined_score": 0.8880544841766463, "eval_loss": 0.5207411050796509, "eval_pearson": 0.888720755423408, "eval_runtime": 6.6011, "eval_samples_per_second": 227.235, "eval_spearmanr": 0.8873882129298847, "eval_steps_per_second": 7.12, "step": 200 }, { "epoch": 1.67, "learning_rate": 2.6595744680851064e-05, "loss": 0.5148, "step": 300 }, { "epoch": 1.67, "eval_combined_score": 0.8963770244622205, "eval_loss": 0.4456341564655304, "eval_pearson": 0.8974699069640754, "eval_runtime": 4.0841, "eval_samples_per_second": 367.278, "eval_spearmanr": 0.8952841419603657, "eval_steps_per_second": 11.508, "step": 300 }, { "epoch": 2.22, "learning_rate": 2.4822695035460992e-05, "loss": 0.3898, "step": 400 }, { "epoch": 2.22, "eval_combined_score": 0.900132405880169, "eval_loss": 0.44346898794174194, "eval_pearson": 0.9017121586504461, "eval_runtime": 3.8243, "eval_samples_per_second": 392.231, "eval_spearmanr": 0.898552653109892, "eval_steps_per_second": 12.29, "step": 400 }, { "epoch": 2.78, "learning_rate": 2.3049645390070924e-05, "loss": 0.3167, "step": 500 }, { "epoch": 2.78, "eval_combined_score": 0.9007290034038564, "eval_loss": 0.48722508549690247, "eval_pearson": 0.9027466027895519, "eval_runtime": 3.7737, "eval_samples_per_second": 397.491, "eval_spearmanr": 0.898711404018161, "eval_steps_per_second": 12.455, "step": 500 }, { "epoch": 3.33, "learning_rate": 2.1276595744680852e-05, "loss": 0.253, "step": 600 }, { "epoch": 3.33, "eval_combined_score": 0.9052751739076982, "eval_loss": 0.4403521716594696, "eval_pearson": 0.907271329521877, "eval_runtime": 5.4187, "eval_samples_per_second": 276.821, "eval_spearmanr": 0.9032790182935194, "eval_steps_per_second": 8.674, "step": 600 }, { "epoch": 3.89, "learning_rate": 1.950354609929078e-05, "loss": 0.2197, "step": 700 }, { "epoch": 3.89, "eval_combined_score": 0.9059989545154765, "eval_loss": 0.40818288922309875, "eval_pearson": 0.9078220385222855, "eval_runtime": 3.7738, "eval_samples_per_second": 397.48, "eval_spearmanr": 0.9041758705086674, "eval_steps_per_second": 12.454, "step": 700 }, { "epoch": 4.44, "learning_rate": 1.773049645390071e-05, "loss": 0.1904, "step": 800 }, { "epoch": 4.44, "eval_combined_score": 0.9059072653893803, "eval_loss": 0.41569381952285767, "eval_pearson": 0.9063948213345204, "eval_runtime": 4.1987, "eval_samples_per_second": 357.256, "eval_spearmanr": 0.9054197094442402, "eval_steps_per_second": 11.194, "step": 800 }, { "epoch": 5.0, "learning_rate": 1.5957446808510637e-05, "loss": 0.1708, "step": 900 }, { "epoch": 5.0, "eval_combined_score": 0.906494497393801, "eval_loss": 0.46862491965293884, "eval_pearson": 0.9083108072067478, "eval_runtime": 4.0762, "eval_samples_per_second": 367.992, "eval_spearmanr": 0.904678187580854, "eval_steps_per_second": 11.53, "step": 900 }, { "epoch": 5.56, "learning_rate": 1.4184397163120568e-05, "loss": 0.1363, "step": 1000 }, { "epoch": 5.56, "eval_combined_score": 0.9055447457890553, "eval_loss": 0.4190002977848053, "eval_pearson": 0.9069385161043947, "eval_runtime": 3.7515, "eval_samples_per_second": 399.845, "eval_spearmanr": 0.9041509754737158, "eval_steps_per_second": 12.528, "step": 1000 }, { "epoch": 6.11, "learning_rate": 1.2411347517730496e-05, "loss": 0.1291, "step": 1100 }, { "epoch": 6.11, "eval_combined_score": 0.9081783832953019, "eval_loss": 0.4186760187149048, "eval_pearson": 0.9095907875352112, "eval_runtime": 4.6081, "eval_samples_per_second": 325.513, "eval_spearmanr": 0.9067659790553926, "eval_steps_per_second": 10.199, "step": 1100 }, { "epoch": 6.67, "learning_rate": 1.0638297872340426e-05, "loss": 0.1102, "step": 1200 }, { "epoch": 6.67, "eval_combined_score": 0.907499747827053, "eval_loss": 0.4207873046398163, "eval_pearson": 0.9091866720599195, "eval_runtime": 4.3818, "eval_samples_per_second": 342.325, "eval_spearmanr": 0.9058128235941866, "eval_steps_per_second": 10.726, "step": 1200 }, { "epoch": 7.22, "learning_rate": 8.865248226950355e-06, "loss": 0.0984, "step": 1300 }, { "epoch": 7.22, "eval_combined_score": 0.906452303345736, "eval_loss": 0.4338637590408325, "eval_pearson": 0.907816988223447, "eval_runtime": 3.7556, "eval_samples_per_second": 399.402, "eval_spearmanr": 0.905087618468025, "eval_steps_per_second": 12.515, "step": 1300 }, { "epoch": 7.78, "learning_rate": 7.092198581560284e-06, "loss": 0.092, "step": 1400 }, { "epoch": 7.78, "eval_combined_score": 0.9068694360771773, "eval_loss": 0.418102502822876, "eval_pearson": 0.9085830258812985, "eval_runtime": 4.0479, "eval_samples_per_second": 370.565, "eval_spearmanr": 0.9051558462730561, "eval_steps_per_second": 11.611, "step": 1400 }, { "epoch": 8.33, "learning_rate": 5.319148936170213e-06, "loss": 0.0834, "step": 1500 }, { "epoch": 8.33, "eval_combined_score": 0.9068391544864225, "eval_loss": 0.42239323258399963, "eval_pearson": 0.9082395585902145, "eval_runtime": 4.2304, "eval_samples_per_second": 354.575, "eval_spearmanr": 0.9054387503826306, "eval_steps_per_second": 11.11, "step": 1500 }, { "epoch": 8.89, "learning_rate": 3.546099290780142e-06, "loss": 0.0798, "step": 1600 }, { "epoch": 8.89, "eval_combined_score": 0.9087789493455298, "eval_loss": 0.3950077295303345, "eval_pearson": 0.9103662307301734, "eval_runtime": 3.8353, "eval_samples_per_second": 391.105, "eval_spearmanr": 0.9071916679608862, "eval_steps_per_second": 12.255, "step": 1600 } ], "max_steps": 1800, "num_train_epochs": 10, "total_flos": 3362002879051776.0, "trial_name": null, "trial_params": null }