| { | |
| "best_metric": 0.8775343861436576, | |
| "best_model_checkpoint": "./fp32/models/mnli-roberta-base/checkpoint-57000", | |
| "epoch": 4.644719687092568, | |
| "global_step": 57000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3579576317218903e-06, | |
| "loss": 1.0971, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.5194090677534386, | |
| "eval_loss": 1.0465178489685059, | |
| "eval_runtime": 29.1271, | |
| "eval_samples_per_second": 336.972, | |
| "eval_steps_per_second": 10.54, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.7159152634437806e-06, | |
| "loss": 0.7469, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.7842078451349974, | |
| "eval_loss": 0.5564178228378296, | |
| "eval_runtime": 27.2282, | |
| "eval_samples_per_second": 360.471, | |
| "eval_steps_per_second": 11.275, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.073872895165672e-06, | |
| "loss": 0.5572, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.8114111054508405, | |
| "eval_loss": 0.4900236427783966, | |
| "eval_runtime": 27.0307, | |
| "eval_samples_per_second": 363.106, | |
| "eval_steps_per_second": 11.357, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 5.431830526887561e-06, | |
| "loss": 0.5075, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.834538970962812, | |
| "eval_loss": 0.4278336763381958, | |
| "eval_runtime": 25.1831, | |
| "eval_samples_per_second": 389.746, | |
| "eval_steps_per_second": 12.191, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 6.7897881586094514e-06, | |
| "loss": 0.4763, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.8364747834946511, | |
| "eval_loss": 0.41645798087120056, | |
| "eval_runtime": 26.3531, | |
| "eval_samples_per_second": 372.441, | |
| "eval_steps_per_second": 11.649, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 8.147745790331343e-06, | |
| "loss": 0.4613, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.8478858889454916, | |
| "eval_loss": 0.39705243706703186, | |
| "eval_runtime": 27.5189, | |
| "eval_samples_per_second": 356.663, | |
| "eval_steps_per_second": 11.156, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.505703422053234e-06, | |
| "loss": 0.4539, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.8448293428425879, | |
| "eval_loss": 0.40062254667282104, | |
| "eval_runtime": 25.5016, | |
| "eval_samples_per_second": 384.878, | |
| "eval_steps_per_second": 12.038, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.94486632684906e-06, | |
| "loss": 0.4441, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.8479877738155884, | |
| "eval_loss": 0.39510154724121094, | |
| "eval_runtime": 26.4014, | |
| "eval_samples_per_second": 371.76, | |
| "eval_steps_per_second": 11.628, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 9.85817816151739e-06, | |
| "loss": 0.4293, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.8596026490066225, | |
| "eval_loss": 0.3864976763725281, | |
| "eval_runtime": 27.4299, | |
| "eval_samples_per_second": 357.821, | |
| "eval_steps_per_second": 11.192, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.771489996185721e-06, | |
| "loss": 0.4237, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.8576668364747835, | |
| "eval_loss": 0.3852238059043884, | |
| "eval_runtime": 26.1569, | |
| "eval_samples_per_second": 375.236, | |
| "eval_steps_per_second": 11.737, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 9.684801830854052e-06, | |
| "loss": 0.4214, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.8607233825776872, | |
| "eval_loss": 0.3725791275501251, | |
| "eval_runtime": 28.6257, | |
| "eval_samples_per_second": 342.874, | |
| "eval_steps_per_second": 10.725, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.598113665522383e-06, | |
| "loss": 0.4147, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.862761079979623, | |
| "eval_loss": 0.3637676239013672, | |
| "eval_runtime": 24.8449, | |
| "eval_samples_per_second": 395.052, | |
| "eval_steps_per_second": 12.357, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.511425500190714e-06, | |
| "loss": 0.3707, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_accuracy": 0.8629648497198166, | |
| "eval_loss": 0.3756280243396759, | |
| "eval_runtime": 28.9144, | |
| "eval_samples_per_second": 339.451, | |
| "eval_steps_per_second": 10.618, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.424737334859045e-06, | |
| "loss": 0.3565, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_accuracy": 0.8644931227712684, | |
| "eval_loss": 0.3675437867641449, | |
| "eval_runtime": 28.4842, | |
| "eval_samples_per_second": 344.578, | |
| "eval_steps_per_second": 10.778, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 9.338049169527376e-06, | |
| "loss": 0.3662, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_accuracy": 0.866225165562914, | |
| "eval_loss": 0.3639264404773712, | |
| "eval_runtime": 29.9404, | |
| "eval_samples_per_second": 327.818, | |
| "eval_steps_per_second": 10.254, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 9.251361004195707e-06, | |
| "loss": 0.3561, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_accuracy": 0.8615384615384616, | |
| "eval_loss": 0.36105281114578247, | |
| "eval_runtime": 24.6832, | |
| "eval_samples_per_second": 397.639, | |
| "eval_steps_per_second": 12.438, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 9.16467283886404e-06, | |
| "loss": 0.3605, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_accuracy": 0.8665308201732043, | |
| "eval_loss": 0.3572224974632263, | |
| "eval_runtime": 25.556, | |
| "eval_samples_per_second": 384.059, | |
| "eval_steps_per_second": 12.013, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 9.07798467353237e-06, | |
| "loss": 0.3525, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_accuracy": 0.8622516556291391, | |
| "eval_loss": 0.3711968660354614, | |
| "eval_runtime": 27.8478, | |
| "eval_samples_per_second": 352.452, | |
| "eval_steps_per_second": 11.024, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 8.9912965082007e-06, | |
| "loss": 0.3489, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_accuracy": 0.8704024452368824, | |
| "eval_loss": 0.371066153049469, | |
| "eval_runtime": 26.8458, | |
| "eval_samples_per_second": 365.606, | |
| "eval_steps_per_second": 11.436, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 8.904608342869031e-06, | |
| "loss": 0.3553, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_accuracy": 0.8635761589403973, | |
| "eval_loss": 0.365778386592865, | |
| "eval_runtime": 27.8617, | |
| "eval_samples_per_second": 352.276, | |
| "eval_steps_per_second": 11.019, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 8.817920177537364e-06, | |
| "loss": 0.3478, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_accuracy": 0.8650025471217524, | |
| "eval_loss": 0.365774542093277, | |
| "eval_runtime": 25.6814, | |
| "eval_samples_per_second": 382.183, | |
| "eval_steps_per_second": 11.954, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 8.731232012205695e-06, | |
| "loss": 0.3416, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_accuracy": 0.86571574121243, | |
| "eval_loss": 0.38820841908454895, | |
| "eval_runtime": 25.5524, | |
| "eval_samples_per_second": 384.112, | |
| "eval_steps_per_second": 12.015, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 8.644543846874025e-06, | |
| "loss": 0.3463, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_accuracy": 0.8714212939378503, | |
| "eval_loss": 0.3499988317489624, | |
| "eval_runtime": 27.3842, | |
| "eval_samples_per_second": 358.418, | |
| "eval_steps_per_second": 11.211, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 8.557855681542356e-06, | |
| "loss": 0.3429, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_accuracy": 0.8645950076413652, | |
| "eval_loss": 0.37212345004081726, | |
| "eval_runtime": 27.0146, | |
| "eval_samples_per_second": 363.322, | |
| "eval_steps_per_second": 11.364, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 8.471167516210688e-06, | |
| "loss": 0.3177, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_accuracy": 0.8691798267957208, | |
| "eval_loss": 0.37507861852645874, | |
| "eval_runtime": 27.0938, | |
| "eval_samples_per_second": 362.26, | |
| "eval_steps_per_second": 11.331, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 8.38447935087902e-06, | |
| "loss": 0.2821, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_accuracy": 0.8716250636780438, | |
| "eval_loss": 0.37794411182403564, | |
| "eval_runtime": 27.0755, | |
| "eval_samples_per_second": 362.505, | |
| "eval_steps_per_second": 11.339, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 8.29779118554735e-06, | |
| "loss": 0.2726, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_accuracy": 0.8691798267957208, | |
| "eval_loss": 0.38138309121131897, | |
| "eval_runtime": 28.6712, | |
| "eval_samples_per_second": 342.329, | |
| "eval_steps_per_second": 10.708, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 8.211103020215681e-06, | |
| "loss": 0.2743, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_accuracy": 0.8757004584819155, | |
| "eval_loss": 0.3701411783695221, | |
| "eval_runtime": 28.7005, | |
| "eval_samples_per_second": 341.981, | |
| "eval_steps_per_second": 10.697, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 8.124414854884012e-06, | |
| "loss": 0.2814, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.8675496688741722, | |
| "eval_loss": 0.40641501545906067, | |
| "eval_runtime": 27.6676, | |
| "eval_samples_per_second": 354.747, | |
| "eval_steps_per_second": 11.096, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 8.037726689552343e-06, | |
| "loss": 0.2718, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_accuracy": 0.8726439123790117, | |
| "eval_loss": 0.39712002873420715, | |
| "eval_runtime": 30.6579, | |
| "eval_samples_per_second": 320.146, | |
| "eval_steps_per_second": 10.014, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.951038524220674e-06, | |
| "loss": 0.2798, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_accuracy": 0.8732552215995925, | |
| "eval_loss": 0.3579646944999695, | |
| "eval_runtime": 29.1251, | |
| "eval_samples_per_second": 336.994, | |
| "eval_steps_per_second": 10.541, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.864350358889005e-06, | |
| "loss": 0.2808, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_accuracy": 0.8733571064696892, | |
| "eval_loss": 0.3730563819408417, | |
| "eval_runtime": 28.2414, | |
| "eval_samples_per_second": 347.54, | |
| "eval_steps_per_second": 10.871, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 7.777662193557336e-06, | |
| "loss": 0.2854, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_accuracy": 0.8724401426388181, | |
| "eval_loss": 0.3838872015476227, | |
| "eval_runtime": 28.3639, | |
| "eval_samples_per_second": 346.039, | |
| "eval_steps_per_second": 10.824, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 7.690974028225667e-06, | |
| "loss": 0.2802, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_accuracy": 0.8763117677024962, | |
| "eval_loss": 0.3689836263656616, | |
| "eval_runtime": 31.7713, | |
| "eval_samples_per_second": 308.927, | |
| "eval_steps_per_second": 9.663, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 7.604285862893998e-06, | |
| "loss": 0.2825, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_accuracy": 0.872032603158431, | |
| "eval_loss": 0.37332993745803833, | |
| "eval_runtime": 28.2862, | |
| "eval_samples_per_second": 346.99, | |
| "eval_steps_per_second": 10.853, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 7.51759769756233e-06, | |
| "loss": 0.2811, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_accuracy": 0.872032603158431, | |
| "eval_loss": 0.3704814016819, | |
| "eval_runtime": 27.6376, | |
| "eval_samples_per_second": 355.133, | |
| "eval_steps_per_second": 11.108, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 7.43090953223066e-06, | |
| "loss": 0.2733, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "eval_accuracy": 0.8758023433520122, | |
| "eval_loss": 0.4095652401447296, | |
| "eval_runtime": 26.7806, | |
| "eval_samples_per_second": 366.496, | |
| "eval_steps_per_second": 11.464, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 7.344221366898991e-06, | |
| "loss": 0.2186, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "eval_accuracy": 0.8766174223127865, | |
| "eval_loss": 0.4097856283187866, | |
| "eval_runtime": 30.263, | |
| "eval_samples_per_second": 324.324, | |
| "eval_steps_per_second": 10.144, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 7.257533201567322e-06, | |
| "loss": 0.2228, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "eval_accuracy": 0.8757004584819155, | |
| "eval_loss": 0.41501158475875854, | |
| "eval_runtime": 27.8042, | |
| "eval_samples_per_second": 353.004, | |
| "eval_steps_per_second": 11.042, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 7.170845036235654e-06, | |
| "loss": 0.2247, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "eval_accuracy": 0.8736627610799796, | |
| "eval_loss": 0.4023166596889496, | |
| "eval_runtime": 28.6307, | |
| "eval_samples_per_second": 342.813, | |
| "eval_steps_per_second": 10.723, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 7.084156870903984e-06, | |
| "loss": 0.2236, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "eval_accuracy": 0.873458991339786, | |
| "eval_loss": 0.401944100856781, | |
| "eval_runtime": 42.8415, | |
| "eval_samples_per_second": 229.1, | |
| "eval_steps_per_second": 7.166, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 6.997468705572315e-06, | |
| "loss": 0.2316, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "eval_accuracy": 0.875394803871625, | |
| "eval_loss": 0.39935216307640076, | |
| "eval_runtime": 27.9679, | |
| "eval_samples_per_second": 350.938, | |
| "eval_steps_per_second": 10.977, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 6.910780540240647e-06, | |
| "loss": 0.227, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_accuracy": 0.8735608762098829, | |
| "eval_loss": 0.40627339482307434, | |
| "eval_runtime": 27.7905, | |
| "eval_samples_per_second": 353.179, | |
| "eval_steps_per_second": 11.047, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 6.824092374908978e-06, | |
| "loss": 0.225, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "eval_accuracy": 0.8738665308201732, | |
| "eval_loss": 0.3787411153316498, | |
| "eval_runtime": 26.634, | |
| "eval_samples_per_second": 368.513, | |
| "eval_steps_per_second": 11.527, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 6.737404209577309e-06, | |
| "loss": 0.2213, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "eval_accuracy": 0.8741721854304636, | |
| "eval_loss": 0.4053351879119873, | |
| "eval_runtime": 26.9451, | |
| "eval_samples_per_second": 364.259, | |
| "eval_steps_per_second": 11.394, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 6.6507160442456394e-06, | |
| "loss": 0.228, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "eval_accuracy": 0.8737646459500764, | |
| "eval_loss": 0.4027246832847595, | |
| "eval_runtime": 26.7635, | |
| "eval_samples_per_second": 366.731, | |
| "eval_steps_per_second": 11.471, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 6.564027878913971e-06, | |
| "loss": 0.2227, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "eval_accuracy": 0.87396841569027, | |
| "eval_loss": 0.4178619384765625, | |
| "eval_runtime": 29.4635, | |
| "eval_samples_per_second": 333.124, | |
| "eval_steps_per_second": 10.42, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 6.477339713582302e-06, | |
| "loss": 0.227, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "eval_accuracy": 0.8752929190015283, | |
| "eval_loss": 0.3969513773918152, | |
| "eval_runtime": 29.294, | |
| "eval_samples_per_second": 335.052, | |
| "eval_steps_per_second": 10.48, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 6.390651548250633e-06, | |
| "loss": 0.2257, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_accuracy": 0.8767193071828834, | |
| "eval_loss": 0.41893526911735535, | |
| "eval_runtime": 30.0922, | |
| "eval_samples_per_second": 326.164, | |
| "eval_steps_per_second": 10.202, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 6.303963382918964e-06, | |
| "loss": 0.1808, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "eval_accuracy": 0.8714212939378503, | |
| "eval_loss": 0.4699897766113281, | |
| "eval_runtime": 27.6446, | |
| "eval_samples_per_second": 355.042, | |
| "eval_steps_per_second": 11.105, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 6.217275217587295e-06, | |
| "loss": 0.1821, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "eval_accuracy": 0.87396841569027, | |
| "eval_loss": 0.4464610815048218, | |
| "eval_runtime": 26.6712, | |
| "eval_samples_per_second": 368.001, | |
| "eval_steps_per_second": 11.511, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 6.130587052255626e-06, | |
| "loss": 0.1788, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "eval_accuracy": 0.8727457972491085, | |
| "eval_loss": 0.46066877245903015, | |
| "eval_runtime": 27.6226, | |
| "eval_samples_per_second": 355.326, | |
| "eval_steps_per_second": 11.114, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 6.043898886923958e-06, | |
| "loss": 0.1822, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "eval_accuracy": 0.8737646459500764, | |
| "eval_loss": 0.46094420552253723, | |
| "eval_runtime": 27.8257, | |
| "eval_samples_per_second": 352.732, | |
| "eval_steps_per_second": 11.033, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 5.957210721592289e-06, | |
| "loss": 0.181, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_accuracy": 0.8774325012735609, | |
| "eval_loss": 0.4538358151912689, | |
| "eval_runtime": 27.7177, | |
| "eval_samples_per_second": 354.106, | |
| "eval_steps_per_second": 11.076, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 5.870522556260619e-06, | |
| "loss": 0.1843, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "eval_accuracy": 0.8711156393275599, | |
| "eval_loss": 0.47502145171165466, | |
| "eval_runtime": 28.0796, | |
| "eval_samples_per_second": 349.543, | |
| "eval_steps_per_second": 10.933, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 5.78383439092895e-06, | |
| "loss": 0.1842, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "eval_accuracy": 0.8742740703005604, | |
| "eval_loss": 0.44675213098526, | |
| "eval_runtime": 27.0482, | |
| "eval_samples_per_second": 362.871, | |
| "eval_steps_per_second": 11.35, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 5.697146225597282e-06, | |
| "loss": 0.1828, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "eval_accuracy": 0.8775343861436576, | |
| "eval_loss": 0.45801690220832825, | |
| "eval_runtime": 26.6517, | |
| "eval_samples_per_second": 368.27, | |
| "eval_steps_per_second": 11.519, | |
| "step": 57000 | |
| } | |
| ], | |
| "max_steps": 122720, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.1997919225713254e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |