| { | |
| "best_metric": 1.3073620796203613, | |
| "best_model_checkpoint": "mobilebert_sa_pre-training-complete/checkpoint-300000", | |
| "epoch": 41.98740377886634, | |
| "global_step": 300000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.882544181393798e-05, | |
| "loss": 1.6028, | |
| "step": 7145 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6935334549025108, | |
| "eval_loss": 1.4525387287139893, | |
| "eval_runtime": 1.4716, | |
| "eval_samples_per_second": 325.49, | |
| "eval_steps_per_second": 10.193, | |
| "step": 7145 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4.763421140380127e-05, | |
| "loss": 1.5524, | |
| "step": 14290 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6992782005371531, | |
| "eval_loss": 1.437490463256836, | |
| "eval_runtime": 1.5211, | |
| "eval_samples_per_second": 314.9, | |
| "eval_steps_per_second": 9.861, | |
| "step": 14290 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4.6442980993664556e-05, | |
| "loss": 1.5323, | |
| "step": 21435 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6993441976976554, | |
| "eval_loss": 1.4193694591522217, | |
| "eval_runtime": 1.4759, | |
| "eval_samples_per_second": 324.542, | |
| "eval_steps_per_second": 10.163, | |
| "step": 21435 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 4.5251750583527844e-05, | |
| "loss": 1.5191, | |
| "step": 28580 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7026513032777716, | |
| "eval_loss": 1.4109910726547241, | |
| "eval_runtime": 1.4968, | |
| "eval_samples_per_second": 320.019, | |
| "eval_steps_per_second": 10.021, | |
| "step": 28580 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 4.406052017339113e-05, | |
| "loss": 1.5025, | |
| "step": 35725 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7013675690761931, | |
| "eval_loss": 1.4167572259902954, | |
| "eval_runtime": 1.4782, | |
| "eval_samples_per_second": 324.039, | |
| "eval_steps_per_second": 10.147, | |
| "step": 35725 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 4.286928976325442e-05, | |
| "loss": 1.4902, | |
| "step": 42870 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7011720396863318, | |
| "eval_loss": 1.3931331634521484, | |
| "eval_runtime": 1.4734, | |
| "eval_samples_per_second": 325.107, | |
| "eval_steps_per_second": 10.181, | |
| "step": 42870 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 4.167805935311771e-05, | |
| "loss": 1.4813, | |
| "step": 50015 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7056545531078995, | |
| "eval_loss": 1.3738043308258057, | |
| "eval_runtime": 1.4644, | |
| "eval_samples_per_second": 327.106, | |
| "eval_steps_per_second": 10.243, | |
| "step": 50015 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 4.0486828942981e-05, | |
| "loss": 1.4751, | |
| "step": 57160 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6995995407320283, | |
| "eval_loss": 1.4237422943115234, | |
| "eval_runtime": 1.459, | |
| "eval_samples_per_second": 328.317, | |
| "eval_steps_per_second": 10.281, | |
| "step": 57160 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 3.929559853284429e-05, | |
| "loss": 1.4689, | |
| "step": 64305 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.704691011235955, | |
| "eval_loss": 1.3969331979751587, | |
| "eval_runtime": 1.6056, | |
| "eval_samples_per_second": 298.322, | |
| "eval_steps_per_second": 9.342, | |
| "step": 64305 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 3.8104368122707576e-05, | |
| "loss": 1.4626, | |
| "step": 71450 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7067709060449532, | |
| "eval_loss": 1.391621470451355, | |
| "eval_runtime": 1.4719, | |
| "eval_samples_per_second": 325.421, | |
| "eval_steps_per_second": 10.191, | |
| "step": 71450 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 3.691313771257086e-05, | |
| "loss": 1.4566, | |
| "step": 78595 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7071985535088711, | |
| "eval_loss": 1.3686023950576782, | |
| "eval_runtime": 1.4629, | |
| "eval_samples_per_second": 327.432, | |
| "eval_steps_per_second": 10.254, | |
| "step": 78595 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 3.572190730243415e-05, | |
| "loss": 1.451, | |
| "step": 85740 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7060222091689743, | |
| "eval_loss": 1.3811498880386353, | |
| "eval_runtime": 1.4641, | |
| "eval_samples_per_second": 327.173, | |
| "eval_steps_per_second": 10.246, | |
| "step": 85740 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 3.453067689229744e-05, | |
| "loss": 1.4478, | |
| "step": 92885 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7091579355840124, | |
| "eval_loss": 1.3597520589828491, | |
| "eval_runtime": 1.4632, | |
| "eval_samples_per_second": 327.355, | |
| "eval_steps_per_second": 10.251, | |
| "step": 92885 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 3.3339446482160726e-05, | |
| "loss": 1.4441, | |
| "step": 100030 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7054075191330094, | |
| "eval_loss": 1.3789618015289307, | |
| "eval_runtime": 1.4621, | |
| "eval_samples_per_second": 327.608, | |
| "eval_steps_per_second": 10.259, | |
| "step": 100030 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 3.214821607202401e-05, | |
| "loss": 1.4379, | |
| "step": 107175 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7065809145017066, | |
| "eval_loss": 1.379388451576233, | |
| "eval_runtime": 1.5875, | |
| "eval_samples_per_second": 301.725, | |
| "eval_steps_per_second": 9.449, | |
| "step": 107175 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 3.09569856618873e-05, | |
| "loss": 1.4353, | |
| "step": 114320 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.710198236648509, | |
| "eval_loss": 1.3609341382980347, | |
| "eval_runtime": 1.4593, | |
| "eval_samples_per_second": 328.244, | |
| "eval_steps_per_second": 10.279, | |
| "step": 114320 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2.976575525175058e-05, | |
| "loss": 1.43, | |
| "step": 121465 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7083252258512857, | |
| "eval_loss": 1.3685261011123657, | |
| "eval_runtime": 1.4875, | |
| "eval_samples_per_second": 322.019, | |
| "eval_steps_per_second": 10.084, | |
| "step": 121465 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2.857452484161387e-05, | |
| "loss": 1.4278, | |
| "step": 128610 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7036037555518075, | |
| "eval_loss": 1.3953258991241455, | |
| "eval_runtime": 1.4616, | |
| "eval_samples_per_second": 327.715, | |
| "eval_steps_per_second": 10.262, | |
| "step": 128610 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2.7383294431477156e-05, | |
| "loss": 1.4219, | |
| "step": 135755 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7085320020194088, | |
| "eval_loss": 1.3756214380264282, | |
| "eval_runtime": 1.4616, | |
| "eval_samples_per_second": 327.73, | |
| "eval_steps_per_second": 10.263, | |
| "step": 135755 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2.6192064021340444e-05, | |
| "loss": 1.4197, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7089573167311684, | |
| "eval_loss": 1.3597127199172974, | |
| "eval_runtime": 1.4718, | |
| "eval_samples_per_second": 325.445, | |
| "eval_steps_per_second": 10.191, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2.5000833611203735e-05, | |
| "loss": 1.4169, | |
| "step": 150045 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7060544426179265, | |
| "eval_loss": 1.367296576499939, | |
| "eval_runtime": 1.4625, | |
| "eval_samples_per_second": 327.518, | |
| "eval_steps_per_second": 10.256, | |
| "step": 150045 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2.3809603201067022e-05, | |
| "loss": 1.4146, | |
| "step": 157190 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.707288269036104, | |
| "eval_loss": 1.3753403425216675, | |
| "eval_runtime": 1.4573, | |
| "eval_samples_per_second": 328.688, | |
| "eval_steps_per_second": 10.293, | |
| "step": 157190 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2.2618372790930313e-05, | |
| "loss": 1.4109, | |
| "step": 164335 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7081938623386121, | |
| "eval_loss": 1.3696134090423584, | |
| "eval_runtime": 1.4581, | |
| "eval_samples_per_second": 328.502, | |
| "eval_steps_per_second": 10.287, | |
| "step": 164335 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2.14271423807936e-05, | |
| "loss": 1.4073, | |
| "step": 171480 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7092472511981956, | |
| "eval_loss": 1.356264352798462, | |
| "eval_runtime": 1.4561, | |
| "eval_samples_per_second": 328.957, | |
| "eval_steps_per_second": 10.301, | |
| "step": 171480 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2.0235911970656888e-05, | |
| "loss": 1.4054, | |
| "step": 178625 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7103286516069584, | |
| "eval_loss": 1.371171474456787, | |
| "eval_runtime": 1.475, | |
| "eval_samples_per_second": 324.736, | |
| "eval_steps_per_second": 10.169, | |
| "step": 178625 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 1.9044681560520176e-05, | |
| "loss": 1.402, | |
| "step": 185770 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7112762628520339, | |
| "eval_loss": 1.3528329133987427, | |
| "eval_runtime": 1.467, | |
| "eval_samples_per_second": 326.525, | |
| "eval_steps_per_second": 10.225, | |
| "step": 185770 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 1.7853451150383463e-05, | |
| "loss": 1.4001, | |
| "step": 192915 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.712307605886979, | |
| "eval_loss": 1.336666226387024, | |
| "eval_runtime": 1.4596, | |
| "eval_samples_per_second": 328.179, | |
| "eval_steps_per_second": 10.277, | |
| "step": 192915 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 1.666222074024675e-05, | |
| "loss": 1.397, | |
| "step": 200060 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7117655307810966, | |
| "eval_loss": 1.3508223295211792, | |
| "eval_runtime": 1.458, | |
| "eval_samples_per_second": 328.539, | |
| "eval_steps_per_second": 10.288, | |
| "step": 200060 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 1.5470990330110038e-05, | |
| "loss": 1.3955, | |
| "step": 207205 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7116529947185077, | |
| "eval_loss": 1.3571882247924805, | |
| "eval_runtime": 1.6349, | |
| "eval_samples_per_second": 292.987, | |
| "eval_steps_per_second": 9.175, | |
| "step": 207205 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 1.4279759919973326e-05, | |
| "loss": 1.3937, | |
| "step": 214350 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7095319458838688, | |
| "eval_loss": 1.356575846672058, | |
| "eval_runtime": 1.4657, | |
| "eval_samples_per_second": 326.804, | |
| "eval_steps_per_second": 10.234, | |
| "step": 214350 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 1.3088529509836615e-05, | |
| "loss": 1.3901, | |
| "step": 221495 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7116992819935238, | |
| "eval_loss": 1.3515229225158691, | |
| "eval_runtime": 1.461, | |
| "eval_samples_per_second": 327.859, | |
| "eval_steps_per_second": 10.267, | |
| "step": 221495 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 1.18972990996999e-05, | |
| "loss": 1.3874, | |
| "step": 228640 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7118393529493795, | |
| "eval_loss": 1.3445274829864502, | |
| "eval_runtime": 1.4728, | |
| "eval_samples_per_second": 325.229, | |
| "eval_steps_per_second": 10.185, | |
| "step": 228640 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 1.0706068689563188e-05, | |
| "loss": 1.386, | |
| "step": 235785 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7097090095131505, | |
| "eval_loss": 1.361108660697937, | |
| "eval_runtime": 1.4621, | |
| "eval_samples_per_second": 327.607, | |
| "eval_steps_per_second": 10.259, | |
| "step": 235785 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 9.514838279426476e-06, | |
| "loss": 1.3833, | |
| "step": 242930 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7086746246959827, | |
| "eval_loss": 1.350243091583252, | |
| "eval_runtime": 1.4812, | |
| "eval_samples_per_second": 323.387, | |
| "eval_steps_per_second": 10.127, | |
| "step": 242930 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 8.323607869289763e-06, | |
| "loss": 1.3822, | |
| "step": 250075 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7108018854610629, | |
| "eval_loss": 1.3657063245773315, | |
| "eval_runtime": 1.4712, | |
| "eval_samples_per_second": 325.58, | |
| "eval_steps_per_second": 10.196, | |
| "step": 250075 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 7.132377459153051e-06, | |
| "loss": 1.3797, | |
| "step": 257220 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7107789319595755, | |
| "eval_loss": 1.3575541973114014, | |
| "eval_runtime": 1.4667, | |
| "eval_samples_per_second": 326.589, | |
| "eval_steps_per_second": 10.227, | |
| "step": 257220 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 5.941147049016339e-06, | |
| "loss": 1.3793, | |
| "step": 264365 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.710604865960802, | |
| "eval_loss": 1.3471879959106445, | |
| "eval_runtime": 1.4747, | |
| "eval_samples_per_second": 324.802, | |
| "eval_steps_per_second": 10.171, | |
| "step": 264365 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 4.749916638879627e-06, | |
| "loss": 1.3763, | |
| "step": 271510 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7155870445344129, | |
| "eval_loss": 1.3322880268096924, | |
| "eval_runtime": 1.4923, | |
| "eval_samples_per_second": 320.979, | |
| "eval_steps_per_second": 10.052, | |
| "step": 271510 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "learning_rate": 3.5586862287429143e-06, | |
| "loss": 1.3762, | |
| "step": 278655 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.7144579664629017, | |
| "eval_loss": 1.3325406312942505, | |
| "eval_runtime": 1.6301, | |
| "eval_samples_per_second": 293.852, | |
| "eval_steps_per_second": 9.202, | |
| "step": 278655 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 2.3674558186062022e-06, | |
| "loss": 1.3748, | |
| "step": 285800 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.7138002117109589, | |
| "eval_loss": 1.3242748975753784, | |
| "eval_runtime": 1.4707, | |
| "eval_samples_per_second": 325.685, | |
| "eval_steps_per_second": 10.199, | |
| "step": 285800 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "learning_rate": 1.17622540846949e-06, | |
| "loss": 1.3733, | |
| "step": 292945 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.7170023313951855, | |
| "eval_loss": 1.3217717409133911, | |
| "eval_runtime": 1.459, | |
| "eval_samples_per_second": 328.301, | |
| "eval_steps_per_second": 10.281, | |
| "step": 292945 | |
| }, | |
| { | |
| "epoch": 41.99, | |
| "learning_rate": 0.0, | |
| "loss": 1.3722, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 41.99, | |
| "eval_accuracy": 0.7186174960946218, | |
| "eval_loss": 1.3073620796203613, | |
| "eval_runtime": 1.4662, | |
| "eval_samples_per_second": 326.688, | |
| "eval_steps_per_second": 10.23, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 41.99, | |
| "step": 300000, | |
| "total_flos": 9.562938924439962e+17, | |
| "train_loss": 1.4300982942708333, | |
| "train_runtime": 103608.4476, | |
| "train_samples_per_second": 92.657, | |
| "train_steps_per_second": 2.896 | |
| } | |
| ], | |
| "max_steps": 300000, | |
| "num_train_epochs": 42, | |
| "total_flos": 9.562938924439962e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |