{ "best_metric": 0.58554959, "best_model_checkpoint": "/data1/wjx/model/swift/output/v1d+v3_prompt/output/internvl2-26b/v0-20240810-170945/checkpoint-6379", "epoch": 4.999706084059959, "eval_steps": 1, "global_step": 6379, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.49760315, "epoch": 0.0, "learning_rate": 1.567398119122257e-07, "loss": 1.98409796, "memory(GiB)": 54.93, "step": 1, "train_speed(iter/s)": 0.023783 }, { "acc": 0.52513188, "epoch": 0.0, "learning_rate": 7.836990595611285e-07, "loss": 1.79844522, "memory(GiB)": 54.93, "step": 5, "train_speed(iter/s)": 0.024289 }, { "acc": 0.52413821, "epoch": 0.01, "learning_rate": 1.567398119122257e-06, "loss": 1.82612877, "memory(GiB)": 58.46, "step": 10, "train_speed(iter/s)": 0.023965 }, { "acc": 0.51515636, "epoch": 0.01, "learning_rate": 2.3510971786833857e-06, "loss": 1.82043953, "memory(GiB)": 58.46, "step": 15, "train_speed(iter/s)": 0.024073 }, { "acc": 0.51839466, "epoch": 0.02, "learning_rate": 3.134796238244514e-06, "loss": 1.80918102, "memory(GiB)": 58.46, "step": 20, "train_speed(iter/s)": 0.024066 }, { "acc": 0.51010175, "epoch": 0.02, "learning_rate": 3.9184952978056436e-06, "loss": 1.8428627, "memory(GiB)": 58.46, "step": 25, "train_speed(iter/s)": 0.024068 }, { "acc": 0.516012, "epoch": 0.02, "learning_rate": 4.7021943573667714e-06, "loss": 1.81627235, "memory(GiB)": 58.46, "step": 30, "train_speed(iter/s)": 0.024143 }, { "acc": 0.50783205, "epoch": 0.03, "learning_rate": 5.4858934169279e-06, "loss": 1.87089043, "memory(GiB)": 58.46, "step": 35, "train_speed(iter/s)": 0.024184 }, { "acc": 0.52221713, "epoch": 0.03, "learning_rate": 6.269592476489028e-06, "loss": 1.78321209, "memory(GiB)": 58.46, "step": 40, "train_speed(iter/s)": 0.024195 }, { "acc": 0.53826532, "epoch": 0.04, "learning_rate": 7.053291536050157e-06, "loss": 1.70393295, "memory(GiB)": 58.46, "step": 45, "train_speed(iter/s)": 0.024212 }, { "acc": 0.52223167, "epoch": 0.04, "learning_rate": 7.836990595611287e-06, "loss": 1.74184074, "memory(GiB)": 58.49, "step": 50, "train_speed(iter/s)": 0.024215 }, { "acc": 0.55080438, "epoch": 0.04, "learning_rate": 8.620689655172414e-06, "loss": 1.66886234, "memory(GiB)": 58.49, "step": 55, "train_speed(iter/s)": 0.024201 }, { "acc": 0.55040584, "epoch": 0.05, "learning_rate": 9.404388714733543e-06, "loss": 1.62168198, "memory(GiB)": 58.49, "step": 60, "train_speed(iter/s)": 0.024203 }, { "acc": 0.56035452, "epoch": 0.05, "learning_rate": 1.0188087774294672e-05, "loss": 1.61962128, "memory(GiB)": 58.49, "step": 65, "train_speed(iter/s)": 0.024223 }, { "acc": 0.5660913, "epoch": 0.05, "learning_rate": 1.09717868338558e-05, "loss": 1.59430513, "memory(GiB)": 58.49, "step": 70, "train_speed(iter/s)": 0.024236 }, { "acc": 0.57812696, "epoch": 0.06, "learning_rate": 1.1755485893416929e-05, "loss": 1.51112862, "memory(GiB)": 58.49, "step": 75, "train_speed(iter/s)": 0.024239 }, { "acc": 0.58022437, "epoch": 0.06, "learning_rate": 1.2539184952978056e-05, "loss": 1.45745325, "memory(GiB)": 58.49, "step": 80, "train_speed(iter/s)": 0.024261 }, { "acc": 0.58660021, "epoch": 0.07, "learning_rate": 1.3322884012539186e-05, "loss": 1.47154408, "memory(GiB)": 58.49, "step": 85, "train_speed(iter/s)": 0.024254 }, { "acc": 0.58845835, "epoch": 0.07, "learning_rate": 1.4106583072100313e-05, "loss": 1.46421566, "memory(GiB)": 60.96, "step": 90, "train_speed(iter/s)": 0.024265 }, { "acc": 0.59027457, "epoch": 0.07, "learning_rate": 1.4890282131661442e-05, "loss": 1.49178133, "memory(GiB)": 60.96, "step": 95, "train_speed(iter/s)": 0.024276 }, { "acc": 0.59797707, "epoch": 0.08, "learning_rate": 1.5673981191222574e-05, "loss": 1.42499933, "memory(GiB)": 60.96, "step": 100, "train_speed(iter/s)": 0.024276 }, { "acc": 0.61143513, "epoch": 0.08, "learning_rate": 1.64576802507837e-05, "loss": 1.35773869, "memory(GiB)": 60.96, "step": 105, "train_speed(iter/s)": 0.024269 }, { "acc": 0.60093813, "epoch": 0.09, "learning_rate": 1.7241379310344828e-05, "loss": 1.42934065, "memory(GiB)": 60.96, "step": 110, "train_speed(iter/s)": 0.024277 }, { "acc": 0.60922484, "epoch": 0.09, "learning_rate": 1.8025078369905957e-05, "loss": 1.38470116, "memory(GiB)": 60.96, "step": 115, "train_speed(iter/s)": 0.024276 }, { "acc": 0.59300284, "epoch": 0.09, "learning_rate": 1.8808777429467086e-05, "loss": 1.40323668, "memory(GiB)": 60.96, "step": 120, "train_speed(iter/s)": 0.02428 }, { "acc": 0.59103971, "epoch": 0.1, "learning_rate": 1.9592476489028214e-05, "loss": 1.42401762, "memory(GiB)": 60.96, "step": 125, "train_speed(iter/s)": 0.024274 }, { "acc": 0.59748178, "epoch": 0.1, "learning_rate": 2.0376175548589343e-05, "loss": 1.43538437, "memory(GiB)": 60.96, "step": 130, "train_speed(iter/s)": 0.024284 }, { "acc": 0.59937601, "epoch": 0.11, "learning_rate": 2.1159874608150472e-05, "loss": 1.37400723, "memory(GiB)": 60.96, "step": 135, "train_speed(iter/s)": 0.024272 }, { "acc": 0.60988913, "epoch": 0.11, "learning_rate": 2.19435736677116e-05, "loss": 1.33568382, "memory(GiB)": 60.96, "step": 140, "train_speed(iter/s)": 0.024269 }, { "acc": 0.59887972, "epoch": 0.11, "learning_rate": 2.272727272727273e-05, "loss": 1.37526703, "memory(GiB)": 60.96, "step": 145, "train_speed(iter/s)": 0.024273 }, { "acc": 0.60464563, "epoch": 0.12, "learning_rate": 2.3510971786833858e-05, "loss": 1.38023567, "memory(GiB)": 60.96, "step": 150, "train_speed(iter/s)": 0.02427 }, { "acc": 0.62468381, "epoch": 0.12, "learning_rate": 2.4294670846394983e-05, "loss": 1.31268759, "memory(GiB)": 60.96, "step": 155, "train_speed(iter/s)": 0.024263 }, { "acc": 0.615977, "epoch": 0.13, "learning_rate": 2.5078369905956112e-05, "loss": 1.31516523, "memory(GiB)": 60.96, "step": 160, "train_speed(iter/s)": 0.024272 }, { "acc": 0.62613187, "epoch": 0.13, "learning_rate": 2.5862068965517244e-05, "loss": 1.30936899, "memory(GiB)": 60.96, "step": 165, "train_speed(iter/s)": 0.02427 }, { "acc": 0.60663314, "epoch": 0.13, "learning_rate": 2.6645768025078373e-05, "loss": 1.34656582, "memory(GiB)": 60.96, "step": 170, "train_speed(iter/s)": 0.024273 }, { "acc": 0.6247479, "epoch": 0.14, "learning_rate": 2.7429467084639498e-05, "loss": 1.30112534, "memory(GiB)": 60.96, "step": 175, "train_speed(iter/s)": 0.024272 }, { "acc": 0.62302966, "epoch": 0.14, "learning_rate": 2.8213166144200627e-05, "loss": 1.3161747, "memory(GiB)": 63.69, "step": 180, "train_speed(iter/s)": 0.024253 }, { "acc": 0.62723155, "epoch": 0.14, "learning_rate": 2.899686520376176e-05, "loss": 1.28483057, "memory(GiB)": 63.69, "step": 185, "train_speed(iter/s)": 0.02424 }, { "acc": 0.61306233, "epoch": 0.15, "learning_rate": 2.9780564263322884e-05, "loss": 1.3089962, "memory(GiB)": 63.69, "step": 190, "train_speed(iter/s)": 0.024238 }, { "acc": 0.60289783, "epoch": 0.15, "learning_rate": 3.056426332288401e-05, "loss": 1.38051376, "memory(GiB)": 63.69, "step": 195, "train_speed(iter/s)": 0.024228 }, { "acc": 0.61080809, "epoch": 0.16, "learning_rate": 3.134796238244515e-05, "loss": 1.34134455, "memory(GiB)": 63.69, "step": 200, "train_speed(iter/s)": 0.024225 }, { "acc": 0.62451038, "epoch": 0.16, "learning_rate": 3.213166144200627e-05, "loss": 1.30470991, "memory(GiB)": 63.69, "step": 205, "train_speed(iter/s)": 0.024222 }, { "acc": 0.61880503, "epoch": 0.16, "learning_rate": 3.29153605015674e-05, "loss": 1.31368427, "memory(GiB)": 63.69, "step": 210, "train_speed(iter/s)": 0.02423 }, { "acc": 0.62762785, "epoch": 0.17, "learning_rate": 3.369905956112853e-05, "loss": 1.25870705, "memory(GiB)": 63.69, "step": 215, "train_speed(iter/s)": 0.024226 }, { "acc": 0.6347024, "epoch": 0.17, "learning_rate": 3.4482758620689657e-05, "loss": 1.25454168, "memory(GiB)": 63.69, "step": 220, "train_speed(iter/s)": 0.024227 }, { "acc": 0.63634839, "epoch": 0.18, "learning_rate": 3.5266457680250785e-05, "loss": 1.24715233, "memory(GiB)": 63.69, "step": 225, "train_speed(iter/s)": 0.024228 }, { "acc": 0.61799154, "epoch": 0.18, "learning_rate": 3.6050156739811914e-05, "loss": 1.33669329, "memory(GiB)": 63.69, "step": 230, "train_speed(iter/s)": 0.024225 }, { "acc": 0.61765265, "epoch": 0.18, "learning_rate": 3.683385579937304e-05, "loss": 1.29450636, "memory(GiB)": 63.69, "step": 235, "train_speed(iter/s)": 0.024226 }, { "acc": 0.62776251, "epoch": 0.19, "learning_rate": 3.761755485893417e-05, "loss": 1.29685411, "memory(GiB)": 63.69, "step": 240, "train_speed(iter/s)": 0.024226 }, { "acc": 0.62579837, "epoch": 0.19, "learning_rate": 3.84012539184953e-05, "loss": 1.29461927, "memory(GiB)": 63.69, "step": 245, "train_speed(iter/s)": 0.024227 }, { "acc": 0.62839761, "epoch": 0.2, "learning_rate": 3.918495297805643e-05, "loss": 1.30260792, "memory(GiB)": 63.69, "step": 250, "train_speed(iter/s)": 0.02423 }, { "acc": 0.62507124, "epoch": 0.2, "learning_rate": 3.996865203761756e-05, "loss": 1.28496952, "memory(GiB)": 63.69, "step": 255, "train_speed(iter/s)": 0.024238 }, { "acc": 0.62980595, "epoch": 0.2, "learning_rate": 4.0752351097178686e-05, "loss": 1.29334612, "memory(GiB)": 63.69, "step": 260, "train_speed(iter/s)": 0.024242 }, { "acc": 0.63067842, "epoch": 0.21, "learning_rate": 4.1536050156739815e-05, "loss": 1.30464935, "memory(GiB)": 63.69, "step": 265, "train_speed(iter/s)": 0.024247 }, { "acc": 0.637749, "epoch": 0.21, "learning_rate": 4.2319749216300944e-05, "loss": 1.26530437, "memory(GiB)": 63.69, "step": 270, "train_speed(iter/s)": 0.024251 }, { "acc": 0.62997618, "epoch": 0.22, "learning_rate": 4.3103448275862066e-05, "loss": 1.27895756, "memory(GiB)": 63.69, "step": 275, "train_speed(iter/s)": 0.02424 }, { "acc": 0.62871423, "epoch": 0.22, "learning_rate": 4.38871473354232e-05, "loss": 1.24851456, "memory(GiB)": 63.69, "step": 280, "train_speed(iter/s)": 0.024242 }, { "acc": 0.62472744, "epoch": 0.22, "learning_rate": 4.467084639498433e-05, "loss": 1.280616, "memory(GiB)": 63.69, "step": 285, "train_speed(iter/s)": 0.024244 }, { "acc": 0.6316843, "epoch": 0.23, "learning_rate": 4.545454545454546e-05, "loss": 1.25226479, "memory(GiB)": 66.18, "step": 290, "train_speed(iter/s)": 0.024242 }, { "acc": 0.63331189, "epoch": 0.23, "learning_rate": 4.623824451410659e-05, "loss": 1.24571609, "memory(GiB)": 66.18, "step": 295, "train_speed(iter/s)": 0.024241 }, { "acc": 0.6343452, "epoch": 0.24, "learning_rate": 4.7021943573667716e-05, "loss": 1.28765945, "memory(GiB)": 66.18, "step": 300, "train_speed(iter/s)": 0.024239 }, { "acc": 0.61879272, "epoch": 0.24, "learning_rate": 4.7805642633228845e-05, "loss": 1.32476501, "memory(GiB)": 66.18, "step": 305, "train_speed(iter/s)": 0.024238 }, { "acc": 0.64381948, "epoch": 0.24, "learning_rate": 4.858934169278997e-05, "loss": 1.21850481, "memory(GiB)": 66.18, "step": 310, "train_speed(iter/s)": 0.024237 }, { "acc": 0.6378974, "epoch": 0.25, "learning_rate": 4.93730407523511e-05, "loss": 1.23893595, "memory(GiB)": 66.18, "step": 315, "train_speed(iter/s)": 0.024236 }, { "acc": 0.64307256, "epoch": 0.25, "learning_rate": 5.0156739811912224e-05, "loss": 1.21721163, "memory(GiB)": 66.18, "step": 320, "train_speed(iter/s)": 0.024236 }, { "acc": 0.63342023, "epoch": 0.25, "learning_rate": 5.094043887147336e-05, "loss": 1.22513857, "memory(GiB)": 66.18, "step": 325, "train_speed(iter/s)": 0.024239 }, { "acc": 0.62956347, "epoch": 0.26, "learning_rate": 5.172413793103449e-05, "loss": 1.273248, "memory(GiB)": 66.18, "step": 330, "train_speed(iter/s)": 0.024236 }, { "acc": 0.6396666, "epoch": 0.26, "learning_rate": 5.250783699059562e-05, "loss": 1.2052968, "memory(GiB)": 66.18, "step": 335, "train_speed(iter/s)": 0.024242 }, { "acc": 0.63923178, "epoch": 0.27, "learning_rate": 5.3291536050156746e-05, "loss": 1.19400892, "memory(GiB)": 66.18, "step": 340, "train_speed(iter/s)": 0.024241 }, { "acc": 0.64041128, "epoch": 0.27, "learning_rate": 5.407523510971787e-05, "loss": 1.19951448, "memory(GiB)": 66.18, "step": 345, "train_speed(iter/s)": 0.02424 }, { "acc": 0.64085617, "epoch": 0.27, "learning_rate": 5.4858934169278996e-05, "loss": 1.21989889, "memory(GiB)": 66.18, "step": 350, "train_speed(iter/s)": 0.024241 }, { "acc": 0.63642988, "epoch": 0.28, "learning_rate": 5.5642633228840125e-05, "loss": 1.26623602, "memory(GiB)": 66.18, "step": 355, "train_speed(iter/s)": 0.024244 }, { "acc": 0.64299493, "epoch": 0.28, "learning_rate": 5.6426332288401254e-05, "loss": 1.22799644, "memory(GiB)": 66.18, "step": 360, "train_speed(iter/s)": 0.024246 }, { "acc": 0.62601166, "epoch": 0.29, "learning_rate": 5.721003134796239e-05, "loss": 1.27594986, "memory(GiB)": 66.18, "step": 365, "train_speed(iter/s)": 0.024248 }, { "acc": 0.63284698, "epoch": 0.29, "learning_rate": 5.799373040752352e-05, "loss": 1.2637042, "memory(GiB)": 66.18, "step": 370, "train_speed(iter/s)": 0.02425 }, { "acc": 0.64749308, "epoch": 0.29, "learning_rate": 5.877742946708465e-05, "loss": 1.17957096, "memory(GiB)": 68.68, "step": 375, "train_speed(iter/s)": 0.024244 }, { "acc": 0.63170052, "epoch": 0.3, "learning_rate": 5.956112852664577e-05, "loss": 1.26501703, "memory(GiB)": 68.68, "step": 380, "train_speed(iter/s)": 0.024248 }, { "acc": 0.64609647, "epoch": 0.3, "learning_rate": 6.03448275862069e-05, "loss": 1.1953764, "memory(GiB)": 68.68, "step": 385, "train_speed(iter/s)": 0.024246 }, { "acc": 0.63454189, "epoch": 0.31, "learning_rate": 6.112852664576803e-05, "loss": 1.22050667, "memory(GiB)": 68.68, "step": 390, "train_speed(iter/s)": 0.024249 }, { "acc": 0.6379528, "epoch": 0.31, "learning_rate": 6.191222570532915e-05, "loss": 1.21711063, "memory(GiB)": 68.68, "step": 395, "train_speed(iter/s)": 0.024252 }, { "acc": 0.64480844, "epoch": 0.31, "learning_rate": 6.26959247648903e-05, "loss": 1.18095789, "memory(GiB)": 68.68, "step": 400, "train_speed(iter/s)": 0.024249 }, { "acc": 0.64001446, "epoch": 0.32, "learning_rate": 6.347962382445141e-05, "loss": 1.20016241, "memory(GiB)": 68.68, "step": 405, "train_speed(iter/s)": 0.02425 }, { "acc": 0.64171805, "epoch": 0.32, "learning_rate": 6.426332288401254e-05, "loss": 1.23874407, "memory(GiB)": 68.68, "step": 410, "train_speed(iter/s)": 0.024253 }, { "acc": 0.62769017, "epoch": 0.33, "learning_rate": 6.504702194357367e-05, "loss": 1.28347731, "memory(GiB)": 68.68, "step": 415, "train_speed(iter/s)": 0.024256 }, { "acc": 0.6374104, "epoch": 0.33, "learning_rate": 6.58307210031348e-05, "loss": 1.20551748, "memory(GiB)": 68.68, "step": 420, "train_speed(iter/s)": 0.024251 }, { "acc": 0.63077922, "epoch": 0.33, "learning_rate": 6.661442006269593e-05, "loss": 1.23985853, "memory(GiB)": 68.68, "step": 425, "train_speed(iter/s)": 0.024251 }, { "acc": 0.63933249, "epoch": 0.34, "learning_rate": 6.739811912225706e-05, "loss": 1.25593414, "memory(GiB)": 68.68, "step": 430, "train_speed(iter/s)": 0.024248 }, { "acc": 0.63769989, "epoch": 0.34, "learning_rate": 6.818181818181818e-05, "loss": 1.22561283, "memory(GiB)": 68.68, "step": 435, "train_speed(iter/s)": 0.024248 }, { "acc": 0.62949352, "epoch": 0.34, "learning_rate": 6.896551724137931e-05, "loss": 1.26371164, "memory(GiB)": 68.68, "step": 440, "train_speed(iter/s)": 0.024252 }, { "acc": 0.64247766, "epoch": 0.35, "learning_rate": 6.974921630094044e-05, "loss": 1.20132828, "memory(GiB)": 68.68, "step": 445, "train_speed(iter/s)": 0.024255 }, { "acc": 0.63743186, "epoch": 0.35, "learning_rate": 7.053291536050157e-05, "loss": 1.25252228, "memory(GiB)": 68.68, "step": 450, "train_speed(iter/s)": 0.024255 }, { "acc": 0.63471522, "epoch": 0.36, "learning_rate": 7.13166144200627e-05, "loss": 1.21878319, "memory(GiB)": 68.68, "step": 455, "train_speed(iter/s)": 0.024252 }, { "acc": 0.64033532, "epoch": 0.36, "learning_rate": 7.210031347962383e-05, "loss": 1.23047905, "memory(GiB)": 68.68, "step": 460, "train_speed(iter/s)": 0.02425 }, { "acc": 0.62159195, "epoch": 0.36, "learning_rate": 7.288401253918496e-05, "loss": 1.26657543, "memory(GiB)": 68.68, "step": 465, "train_speed(iter/s)": 0.024247 }, { "acc": 0.63586564, "epoch": 0.37, "learning_rate": 7.366771159874609e-05, "loss": 1.22125769, "memory(GiB)": 68.68, "step": 470, "train_speed(iter/s)": 0.024251 }, { "acc": 0.62739558, "epoch": 0.37, "learning_rate": 7.445141065830721e-05, "loss": 1.24667158, "memory(GiB)": 68.68, "step": 475, "train_speed(iter/s)": 0.02425 }, { "acc": 0.64560175, "epoch": 0.38, "learning_rate": 7.523510971786834e-05, "loss": 1.19969473, "memory(GiB)": 68.68, "step": 480, "train_speed(iter/s)": 0.024251 }, { "acc": 0.64611869, "epoch": 0.38, "learning_rate": 7.601880877742947e-05, "loss": 1.18089733, "memory(GiB)": 68.68, "step": 485, "train_speed(iter/s)": 0.024253 }, { "acc": 0.6526392, "epoch": 0.38, "learning_rate": 7.68025078369906e-05, "loss": 1.18621178, "memory(GiB)": 68.68, "step": 490, "train_speed(iter/s)": 0.024251 }, { "acc": 0.64487033, "epoch": 0.39, "learning_rate": 7.758620689655173e-05, "loss": 1.22172861, "memory(GiB)": 68.68, "step": 495, "train_speed(iter/s)": 0.024251 }, { "acc": 0.64801369, "epoch": 0.39, "learning_rate": 7.836990595611286e-05, "loss": 1.1807312, "memory(GiB)": 68.68, "step": 500, "train_speed(iter/s)": 0.024252 }, { "acc": 0.65408354, "epoch": 0.4, "learning_rate": 7.915360501567399e-05, "loss": 1.17569618, "memory(GiB)": 68.68, "step": 505, "train_speed(iter/s)": 0.024252 }, { "acc": 0.64917159, "epoch": 0.4, "learning_rate": 7.993730407523512e-05, "loss": 1.18598318, "memory(GiB)": 68.68, "step": 510, "train_speed(iter/s)": 0.024253 }, { "acc": 0.6524426, "epoch": 0.4, "learning_rate": 8.072100313479624e-05, "loss": 1.19354515, "memory(GiB)": 68.68, "step": 515, "train_speed(iter/s)": 0.024252 }, { "acc": 0.63392072, "epoch": 0.41, "learning_rate": 8.150470219435737e-05, "loss": 1.25341883, "memory(GiB)": 68.68, "step": 520, "train_speed(iter/s)": 0.024253 }, { "acc": 0.64778457, "epoch": 0.41, "learning_rate": 8.22884012539185e-05, "loss": 1.18014956, "memory(GiB)": 68.68, "step": 525, "train_speed(iter/s)": 0.024252 }, { "acc": 0.65192814, "epoch": 0.42, "learning_rate": 8.307210031347963e-05, "loss": 1.14602833, "memory(GiB)": 68.68, "step": 530, "train_speed(iter/s)": 0.024252 }, { "acc": 0.6464828, "epoch": 0.42, "learning_rate": 8.385579937304076e-05, "loss": 1.20210142, "memory(GiB)": 68.68, "step": 535, "train_speed(iter/s)": 0.024249 }, { "acc": 0.64848366, "epoch": 0.42, "learning_rate": 8.463949843260189e-05, "loss": 1.16304598, "memory(GiB)": 68.68, "step": 540, "train_speed(iter/s)": 0.024247 }, { "acc": 0.65109825, "epoch": 0.43, "learning_rate": 8.542319749216302e-05, "loss": 1.17073431, "memory(GiB)": 68.68, "step": 545, "train_speed(iter/s)": 0.024245 }, { "acc": 0.64547157, "epoch": 0.43, "learning_rate": 8.620689655172413e-05, "loss": 1.21380081, "memory(GiB)": 68.68, "step": 550, "train_speed(iter/s)": 0.024245 }, { "acc": 0.63410115, "epoch": 0.43, "learning_rate": 8.699059561128527e-05, "loss": 1.25124855, "memory(GiB)": 68.68, "step": 555, "train_speed(iter/s)": 0.024246 }, { "acc": 0.63803411, "epoch": 0.44, "learning_rate": 8.77742946708464e-05, "loss": 1.2044651, "memory(GiB)": 68.68, "step": 560, "train_speed(iter/s)": 0.024247 }, { "acc": 0.64598598, "epoch": 0.44, "learning_rate": 8.855799373040753e-05, "loss": 1.18607101, "memory(GiB)": 68.68, "step": 565, "train_speed(iter/s)": 0.024247 }, { "acc": 0.63589487, "epoch": 0.45, "learning_rate": 8.934169278996866e-05, "loss": 1.21959095, "memory(GiB)": 68.68, "step": 570, "train_speed(iter/s)": 0.024249 }, { "acc": 0.66258736, "epoch": 0.45, "learning_rate": 9.012539184952979e-05, "loss": 1.14576664, "memory(GiB)": 68.68, "step": 575, "train_speed(iter/s)": 0.024249 }, { "acc": 0.644449, "epoch": 0.45, "learning_rate": 9.090909090909092e-05, "loss": 1.17768698, "memory(GiB)": 68.68, "step": 580, "train_speed(iter/s)": 0.02425 }, { "acc": 0.63850074, "epoch": 0.46, "learning_rate": 9.169278996865203e-05, "loss": 1.21261606, "memory(GiB)": 68.68, "step": 585, "train_speed(iter/s)": 0.024251 }, { "acc": 0.66103854, "epoch": 0.46, "learning_rate": 9.247648902821317e-05, "loss": 1.14370327, "memory(GiB)": 68.68, "step": 590, "train_speed(iter/s)": 0.024253 }, { "acc": 0.64929209, "epoch": 0.47, "learning_rate": 9.32601880877743e-05, "loss": 1.17919035, "memory(GiB)": 68.68, "step": 595, "train_speed(iter/s)": 0.024256 }, { "acc": 0.6462472, "epoch": 0.47, "learning_rate": 9.404388714733543e-05, "loss": 1.19705715, "memory(GiB)": 68.68, "step": 600, "train_speed(iter/s)": 0.024256 }, { "acc": 0.65177841, "epoch": 0.47, "learning_rate": 9.482758620689656e-05, "loss": 1.17879667, "memory(GiB)": 68.68, "step": 605, "train_speed(iter/s)": 0.024256 }, { "acc": 0.63742218, "epoch": 0.48, "learning_rate": 9.561128526645769e-05, "loss": 1.22292814, "memory(GiB)": 68.68, "step": 610, "train_speed(iter/s)": 0.024257 }, { "acc": 0.6366725, "epoch": 0.48, "learning_rate": 9.63949843260188e-05, "loss": 1.21893911, "memory(GiB)": 68.68, "step": 615, "train_speed(iter/s)": 0.024256 }, { "acc": 0.6517189, "epoch": 0.49, "learning_rate": 9.717868338557993e-05, "loss": 1.16371479, "memory(GiB)": 68.68, "step": 620, "train_speed(iter/s)": 0.024257 }, { "acc": 0.640413, "epoch": 0.49, "learning_rate": 9.796238244514106e-05, "loss": 1.21149368, "memory(GiB)": 68.68, "step": 625, "train_speed(iter/s)": 0.024256 }, { "acc": 0.64837985, "epoch": 0.49, "learning_rate": 9.87460815047022e-05, "loss": 1.18780231, "memory(GiB)": 68.68, "step": 630, "train_speed(iter/s)": 0.024257 }, { "acc": 0.64622922, "epoch": 0.5, "learning_rate": 9.952978056426333e-05, "loss": 1.19707012, "memory(GiB)": 68.68, "step": 635, "train_speed(iter/s)": 0.024259 }, { "acc": 0.63969393, "epoch": 0.5, "learning_rate": 9.999999327227841e-05, "loss": 1.19009142, "memory(GiB)": 68.68, "step": 640, "train_speed(iter/s)": 0.02426 }, { "acc": 0.66441765, "epoch": 0.51, "learning_rate": 9.999991758543125e-05, "loss": 1.15019789, "memory(GiB)": 68.68, "step": 645, "train_speed(iter/s)": 0.02426 }, { "acc": 0.64071684, "epoch": 0.51, "learning_rate": 9.999975780221265e-05, "loss": 1.21033449, "memory(GiB)": 68.68, "step": 650, "train_speed(iter/s)": 0.024262 }, { "acc": 0.65804129, "epoch": 0.51, "learning_rate": 9.999951392289139e-05, "loss": 1.14957666, "memory(GiB)": 68.68, "step": 655, "train_speed(iter/s)": 0.024264 }, { "acc": 0.64765239, "epoch": 0.52, "learning_rate": 9.999918594787761e-05, "loss": 1.19038534, "memory(GiB)": 68.68, "step": 660, "train_speed(iter/s)": 0.024263 }, { "acc": 0.64099531, "epoch": 0.52, "learning_rate": 9.999877387772296e-05, "loss": 1.21312761, "memory(GiB)": 68.68, "step": 665, "train_speed(iter/s)": 0.024263 }, { "acc": 0.63610272, "epoch": 0.53, "learning_rate": 9.999827771312054e-05, "loss": 1.20392857, "memory(GiB)": 68.68, "step": 670, "train_speed(iter/s)": 0.024261 }, { "acc": 0.65267296, "epoch": 0.53, "learning_rate": 9.999769745490481e-05, "loss": 1.17815151, "memory(GiB)": 68.68, "step": 675, "train_speed(iter/s)": 0.02426 }, { "acc": 0.64978647, "epoch": 0.53, "learning_rate": 9.999703310405178e-05, "loss": 1.17260685, "memory(GiB)": 68.68, "step": 680, "train_speed(iter/s)": 0.024259 }, { "acc": 0.64660273, "epoch": 0.54, "learning_rate": 9.999628466167881e-05, "loss": 1.1884903, "memory(GiB)": 68.68, "step": 685, "train_speed(iter/s)": 0.024255 }, { "acc": 0.64088135, "epoch": 0.54, "learning_rate": 9.999545212904473e-05, "loss": 1.21543169, "memory(GiB)": 71.19, "step": 690, "train_speed(iter/s)": 0.024251 }, { "acc": 0.64940615, "epoch": 0.54, "learning_rate": 9.999453550754981e-05, "loss": 1.16802692, "memory(GiB)": 71.19, "step": 695, "train_speed(iter/s)": 0.024251 }, { "acc": 0.64227314, "epoch": 0.55, "learning_rate": 9.999353479873575e-05, "loss": 1.19985647, "memory(GiB)": 71.19, "step": 700, "train_speed(iter/s)": 0.024253 }, { "acc": 0.65743675, "epoch": 0.55, "learning_rate": 9.999245000428563e-05, "loss": 1.13964968, "memory(GiB)": 71.19, "step": 705, "train_speed(iter/s)": 0.024253 }, { "acc": 0.65150938, "epoch": 0.56, "learning_rate": 9.999128112602406e-05, "loss": 1.13540983, "memory(GiB)": 71.19, "step": 710, "train_speed(iter/s)": 0.024254 }, { "acc": 0.65529537, "epoch": 0.56, "learning_rate": 9.999002816591696e-05, "loss": 1.148839, "memory(GiB)": 71.19, "step": 715, "train_speed(iter/s)": 0.024255 }, { "acc": 0.65492792, "epoch": 0.56, "learning_rate": 9.998869112607177e-05, "loss": 1.16663084, "memory(GiB)": 71.19, "step": 720, "train_speed(iter/s)": 0.024255 }, { "acc": 0.64847426, "epoch": 0.57, "learning_rate": 9.998727000873724e-05, "loss": 1.19009991, "memory(GiB)": 71.19, "step": 725, "train_speed(iter/s)": 0.024256 }, { "acc": 0.65983605, "epoch": 0.57, "learning_rate": 9.998576481630364e-05, "loss": 1.17517796, "memory(GiB)": 71.19, "step": 730, "train_speed(iter/s)": 0.024256 }, { "acc": 0.66514916, "epoch": 0.58, "learning_rate": 9.998417555130258e-05, "loss": 1.11018133, "memory(GiB)": 71.19, "step": 735, "train_speed(iter/s)": 0.024256 }, { "acc": 0.65130382, "epoch": 0.58, "learning_rate": 9.998250221640708e-05, "loss": 1.18467026, "memory(GiB)": 71.19, "step": 740, "train_speed(iter/s)": 0.024256 }, { "acc": 0.64631677, "epoch": 0.58, "learning_rate": 9.99807448144316e-05, "loss": 1.16014452, "memory(GiB)": 71.19, "step": 745, "train_speed(iter/s)": 0.024258 }, { "acc": 0.64330449, "epoch": 0.59, "learning_rate": 9.997890334833194e-05, "loss": 1.16912346, "memory(GiB)": 71.19, "step": 750, "train_speed(iter/s)": 0.02426 }, { "acc": 0.64438591, "epoch": 0.59, "learning_rate": 9.997697782120535e-05, "loss": 1.18001528, "memory(GiB)": 71.19, "step": 755, "train_speed(iter/s)": 0.024262 }, { "acc": 0.64493594, "epoch": 0.6, "learning_rate": 9.997496823629038e-05, "loss": 1.18717165, "memory(GiB)": 71.19, "step": 760, "train_speed(iter/s)": 0.024261 }, { "acc": 0.64458404, "epoch": 0.6, "learning_rate": 9.997287459696707e-05, "loss": 1.2015686, "memory(GiB)": 71.19, "step": 765, "train_speed(iter/s)": 0.024262 }, { "acc": 0.65334735, "epoch": 0.6, "learning_rate": 9.997069690675673e-05, "loss": 1.13985786, "memory(GiB)": 71.19, "step": 770, "train_speed(iter/s)": 0.024263 }, { "acc": 0.65646448, "epoch": 0.61, "learning_rate": 9.996843516932212e-05, "loss": 1.14643917, "memory(GiB)": 71.19, "step": 775, "train_speed(iter/s)": 0.024261 }, { "acc": 0.63831444, "epoch": 0.61, "learning_rate": 9.99660893884673e-05, "loss": 1.20351677, "memory(GiB)": 71.19, "step": 780, "train_speed(iter/s)": 0.024261 }, { "acc": 0.64026852, "epoch": 0.62, "learning_rate": 9.996365956813771e-05, "loss": 1.21742287, "memory(GiB)": 71.19, "step": 785, "train_speed(iter/s)": 0.024261 }, { "acc": 0.65602589, "epoch": 0.62, "learning_rate": 9.996114571242014e-05, "loss": 1.1601553, "memory(GiB)": 71.19, "step": 790, "train_speed(iter/s)": 0.024259 }, { "acc": 0.64420905, "epoch": 0.62, "learning_rate": 9.995854782554275e-05, "loss": 1.19600077, "memory(GiB)": 71.19, "step": 795, "train_speed(iter/s)": 0.02426 }, { "acc": 0.64605112, "epoch": 0.63, "learning_rate": 9.995586591187496e-05, "loss": 1.18536654, "memory(GiB)": 71.19, "step": 800, "train_speed(iter/s)": 0.02426 }, { "acc": 0.63376851, "epoch": 0.63, "learning_rate": 9.995309997592757e-05, "loss": 1.2327877, "memory(GiB)": 71.19, "step": 805, "train_speed(iter/s)": 0.024261 }, { "acc": 0.63126373, "epoch": 0.63, "learning_rate": 9.995025002235272e-05, "loss": 1.24970627, "memory(GiB)": 71.19, "step": 810, "train_speed(iter/s)": 0.024261 }, { "acc": 0.64044433, "epoch": 0.64, "learning_rate": 9.994731605594381e-05, "loss": 1.19621964, "memory(GiB)": 71.19, "step": 815, "train_speed(iter/s)": 0.024259 }, { "acc": 0.64977989, "epoch": 0.64, "learning_rate": 9.994429808163556e-05, "loss": 1.17283697, "memory(GiB)": 71.19, "step": 820, "train_speed(iter/s)": 0.024256 }, { "acc": 0.64557714, "epoch": 0.65, "learning_rate": 9.994119610450401e-05, "loss": 1.18671465, "memory(GiB)": 71.19, "step": 825, "train_speed(iter/s)": 0.024257 }, { "acc": 0.66244955, "epoch": 0.65, "learning_rate": 9.993801012976647e-05, "loss": 1.1419055, "memory(GiB)": 71.19, "step": 830, "train_speed(iter/s)": 0.024255 }, { "acc": 0.66743221, "epoch": 0.65, "learning_rate": 9.99347401627815e-05, "loss": 1.10382252, "memory(GiB)": 71.19, "step": 835, "train_speed(iter/s)": 0.024253 }, { "acc": 0.65264039, "epoch": 0.66, "learning_rate": 9.993138620904901e-05, "loss": 1.14855709, "memory(GiB)": 71.19, "step": 840, "train_speed(iter/s)": 0.024251 }, { "acc": 0.65064726, "epoch": 0.66, "learning_rate": 9.992794827421004e-05, "loss": 1.15438948, "memory(GiB)": 71.19, "step": 845, "train_speed(iter/s)": 0.024251 }, { "acc": 0.64738302, "epoch": 0.67, "learning_rate": 9.992442636404701e-05, "loss": 1.18792486, "memory(GiB)": 71.19, "step": 850, "train_speed(iter/s)": 0.024245 }, { "acc": 0.63004446, "epoch": 0.67, "learning_rate": 9.992082048448353e-05, "loss": 1.24233475, "memory(GiB)": 71.19, "step": 855, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65914078, "epoch": 0.67, "learning_rate": 9.991713064158442e-05, "loss": 1.13226604, "memory(GiB)": 71.19, "step": 860, "train_speed(iter/s)": 0.024242 }, { "acc": 0.65967417, "epoch": 0.68, "learning_rate": 9.991335684155574e-05, "loss": 1.1242136, "memory(GiB)": 71.19, "step": 865, "train_speed(iter/s)": 0.024243 }, { "acc": 0.64683952, "epoch": 0.68, "learning_rate": 9.990949909074476e-05, "loss": 1.14824209, "memory(GiB)": 71.19, "step": 870, "train_speed(iter/s)": 0.024245 }, { "acc": 0.64689598, "epoch": 0.69, "learning_rate": 9.990555739563994e-05, "loss": 1.18441563, "memory(GiB)": 71.19, "step": 875, "train_speed(iter/s)": 0.024245 }, { "acc": 0.64474206, "epoch": 0.69, "learning_rate": 9.990153176287094e-05, "loss": 1.17308598, "memory(GiB)": 71.19, "step": 880, "train_speed(iter/s)": 0.024245 }, { "acc": 0.64102311, "epoch": 0.69, "learning_rate": 9.989742219920861e-05, "loss": 1.1535264, "memory(GiB)": 71.19, "step": 885, "train_speed(iter/s)": 0.024246 }, { "acc": 0.63249907, "epoch": 0.7, "learning_rate": 9.989322871156492e-05, "loss": 1.24110155, "memory(GiB)": 71.19, "step": 890, "train_speed(iter/s)": 0.024246 }, { "acc": 0.65820117, "epoch": 0.7, "learning_rate": 9.988895130699305e-05, "loss": 1.14757719, "memory(GiB)": 71.19, "step": 895, "train_speed(iter/s)": 0.024247 }, { "acc": 0.6568809, "epoch": 0.71, "learning_rate": 9.988458999268728e-05, "loss": 1.1805316, "memory(GiB)": 71.19, "step": 900, "train_speed(iter/s)": 0.024247 }, { "acc": 0.65315137, "epoch": 0.71, "learning_rate": 9.988014477598304e-05, "loss": 1.18291893, "memory(GiB)": 71.19, "step": 905, "train_speed(iter/s)": 0.024248 }, { "acc": 0.65974207, "epoch": 0.71, "learning_rate": 9.987561566435689e-05, "loss": 1.11334858, "memory(GiB)": 71.19, "step": 910, "train_speed(iter/s)": 0.024245 }, { "acc": 0.66105995, "epoch": 0.72, "learning_rate": 9.987100266542644e-05, "loss": 1.10593357, "memory(GiB)": 71.19, "step": 915, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65526924, "epoch": 0.72, "learning_rate": 9.986630578695047e-05, "loss": 1.139046, "memory(GiB)": 71.19, "step": 920, "train_speed(iter/s)": 0.024243 }, { "acc": 0.64982562, "epoch": 0.72, "learning_rate": 9.986152503682879e-05, "loss": 1.15638342, "memory(GiB)": 71.19, "step": 925, "train_speed(iter/s)": 0.024244 }, { "acc": 0.65965147, "epoch": 0.73, "learning_rate": 9.985666042310229e-05, "loss": 1.10007477, "memory(GiB)": 71.19, "step": 930, "train_speed(iter/s)": 0.024245 }, { "acc": 0.64175363, "epoch": 0.73, "learning_rate": 9.98517119539529e-05, "loss": 1.2035121, "memory(GiB)": 71.19, "step": 935, "train_speed(iter/s)": 0.024246 }, { "acc": 0.67316985, "epoch": 0.74, "learning_rate": 9.984667963770361e-05, "loss": 1.0980998, "memory(GiB)": 71.19, "step": 940, "train_speed(iter/s)": 0.024244 }, { "acc": 0.65934963, "epoch": 0.74, "learning_rate": 9.984156348281842e-05, "loss": 1.15233898, "memory(GiB)": 71.19, "step": 945, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65463395, "epoch": 0.74, "learning_rate": 9.983636349790235e-05, "loss": 1.17552748, "memory(GiB)": 71.19, "step": 950, "train_speed(iter/s)": 0.024244 }, { "acc": 0.64568629, "epoch": 0.75, "learning_rate": 9.983107969170141e-05, "loss": 1.18645792, "memory(GiB)": 71.19, "step": 955, "train_speed(iter/s)": 0.024244 }, { "acc": 0.64675536, "epoch": 0.75, "learning_rate": 9.982571207310259e-05, "loss": 1.16107082, "memory(GiB)": 71.19, "step": 960, "train_speed(iter/s)": 0.024244 }, { "acc": 0.66028018, "epoch": 0.76, "learning_rate": 9.982026065113386e-05, "loss": 1.14361153, "memory(GiB)": 71.19, "step": 965, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65468411, "epoch": 0.76, "learning_rate": 9.981472543496412e-05, "loss": 1.14505243, "memory(GiB)": 71.19, "step": 970, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65591521, "epoch": 0.76, "learning_rate": 9.980910643390321e-05, "loss": 1.12250948, "memory(GiB)": 71.19, "step": 975, "train_speed(iter/s)": 0.024243 }, { "acc": 0.66190786, "epoch": 0.77, "learning_rate": 9.980340365740193e-05, "loss": 1.12542, "memory(GiB)": 71.19, "step": 980, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65410662, "epoch": 0.77, "learning_rate": 9.979761711505191e-05, "loss": 1.14729557, "memory(GiB)": 71.19, "step": 985, "train_speed(iter/s)": 0.024245 }, { "acc": 0.66674676, "epoch": 0.78, "learning_rate": 9.979174681658574e-05, "loss": 1.10079012, "memory(GiB)": 71.19, "step": 990, "train_speed(iter/s)": 0.024245 }, { "acc": 0.65360141, "epoch": 0.78, "learning_rate": 9.978579277187684e-05, "loss": 1.16522446, "memory(GiB)": 71.19, "step": 995, "train_speed(iter/s)": 0.024244 }, { "acc": 0.66134276, "epoch": 0.78, "learning_rate": 9.97797549909395e-05, "loss": 1.11977797, "memory(GiB)": 71.19, "step": 1000, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65579562, "epoch": 0.79, "learning_rate": 9.977363348392886e-05, "loss": 1.15444326, "memory(GiB)": 71.19, "step": 1005, "train_speed(iter/s)": 0.024244 }, { "acc": 0.66116667, "epoch": 0.79, "learning_rate": 9.976742826114083e-05, "loss": 1.12424622, "memory(GiB)": 71.19, "step": 1010, "train_speed(iter/s)": 0.024245 }, { "acc": 0.6683176, "epoch": 0.8, "learning_rate": 9.97611393330122e-05, "loss": 1.11872005, "memory(GiB)": 71.19, "step": 1015, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65369892, "epoch": 0.8, "learning_rate": 9.975476671012049e-05, "loss": 1.1429491, "memory(GiB)": 71.19, "step": 1020, "train_speed(iter/s)": 0.024243 }, { "acc": 0.64377575, "epoch": 0.8, "learning_rate": 9.974831040318403e-05, "loss": 1.19284143, "memory(GiB)": 71.19, "step": 1025, "train_speed(iter/s)": 0.024243 }, { "acc": 0.65243697, "epoch": 0.81, "learning_rate": 9.974177042306183e-05, "loss": 1.157862, "memory(GiB)": 71.19, "step": 1030, "train_speed(iter/s)": 0.02424 }, { "acc": 0.64958396, "epoch": 0.81, "learning_rate": 9.973514678075372e-05, "loss": 1.18240442, "memory(GiB)": 71.19, "step": 1035, "train_speed(iter/s)": 0.02424 }, { "acc": 0.6682889, "epoch": 0.82, "learning_rate": 9.972843948740019e-05, "loss": 1.11338902, "memory(GiB)": 71.19, "step": 1040, "train_speed(iter/s)": 0.024238 }, { "acc": 0.65257483, "epoch": 0.82, "learning_rate": 9.972164855428244e-05, "loss": 1.13974934, "memory(GiB)": 71.19, "step": 1045, "train_speed(iter/s)": 0.024238 }, { "acc": 0.6569396, "epoch": 0.82, "learning_rate": 9.971477399282236e-05, "loss": 1.13563824, "memory(GiB)": 71.19, "step": 1050, "train_speed(iter/s)": 0.024238 }, { "acc": 0.66365395, "epoch": 0.83, "learning_rate": 9.970781581458246e-05, "loss": 1.13148432, "memory(GiB)": 71.19, "step": 1055, "train_speed(iter/s)": 0.024238 }, { "acc": 0.66655474, "epoch": 0.83, "learning_rate": 9.970077403126592e-05, "loss": 1.12719841, "memory(GiB)": 71.19, "step": 1060, "train_speed(iter/s)": 0.024238 }, { "acc": 0.65451961, "epoch": 0.83, "learning_rate": 9.969364865471654e-05, "loss": 1.17055464, "memory(GiB)": 71.19, "step": 1065, "train_speed(iter/s)": 0.024238 }, { "acc": 0.66282625, "epoch": 0.84, "learning_rate": 9.968643969691868e-05, "loss": 1.11539078, "memory(GiB)": 71.19, "step": 1070, "train_speed(iter/s)": 0.024238 }, { "acc": 0.66324406, "epoch": 0.84, "learning_rate": 9.96791471699973e-05, "loss": 1.12122641, "memory(GiB)": 71.19, "step": 1075, "train_speed(iter/s)": 0.024237 }, { "acc": 0.66220465, "epoch": 0.85, "learning_rate": 9.967177108621798e-05, "loss": 1.1304287, "memory(GiB)": 71.19, "step": 1080, "train_speed(iter/s)": 0.024237 }, { "acc": 0.65334153, "epoch": 0.85, "learning_rate": 9.966431145798672e-05, "loss": 1.15781803, "memory(GiB)": 71.19, "step": 1085, "train_speed(iter/s)": 0.024235 }, { "acc": 0.6635685, "epoch": 0.85, "learning_rate": 9.965676829785011e-05, "loss": 1.10384178, "memory(GiB)": 71.19, "step": 1090, "train_speed(iter/s)": 0.024235 }, { "acc": 0.66880417, "epoch": 0.86, "learning_rate": 9.964914161849522e-05, "loss": 1.1105998, "memory(GiB)": 71.19, "step": 1095, "train_speed(iter/s)": 0.024235 }, { "acc": 0.65008655, "epoch": 0.86, "learning_rate": 9.96414314327496e-05, "loss": 1.15002518, "memory(GiB)": 71.19, "step": 1100, "train_speed(iter/s)": 0.024236 }, { "acc": 0.660391, "epoch": 0.87, "learning_rate": 9.963363775358123e-05, "loss": 1.13650742, "memory(GiB)": 71.19, "step": 1105, "train_speed(iter/s)": 0.024236 }, { "acc": 0.65939445, "epoch": 0.87, "learning_rate": 9.962576059409854e-05, "loss": 1.14001417, "memory(GiB)": 71.19, "step": 1110, "train_speed(iter/s)": 0.024236 }, { "acc": 0.65705018, "epoch": 0.87, "learning_rate": 9.961779996755036e-05, "loss": 1.13998594, "memory(GiB)": 71.19, "step": 1115, "train_speed(iter/s)": 0.024237 }, { "acc": 0.65304127, "epoch": 0.88, "learning_rate": 9.96097558873259e-05, "loss": 1.13927307, "memory(GiB)": 71.19, "step": 1120, "train_speed(iter/s)": 0.024238 }, { "acc": 0.67214541, "epoch": 0.88, "learning_rate": 9.960162836695478e-05, "loss": 1.10992355, "memory(GiB)": 71.19, "step": 1125, "train_speed(iter/s)": 0.024238 }, { "acc": 0.64525504, "epoch": 0.89, "learning_rate": 9.959341742010688e-05, "loss": 1.18042841, "memory(GiB)": 71.19, "step": 1130, "train_speed(iter/s)": 0.024239 }, { "acc": 0.6544466, "epoch": 0.89, "learning_rate": 9.958512306059247e-05, "loss": 1.14733744, "memory(GiB)": 71.19, "step": 1135, "train_speed(iter/s)": 0.024238 }, { "acc": 0.65201364, "epoch": 0.89, "learning_rate": 9.957674530236205e-05, "loss": 1.13061304, "memory(GiB)": 71.19, "step": 1140, "train_speed(iter/s)": 0.024239 }, { "acc": 0.66318994, "epoch": 0.9, "learning_rate": 9.956828415950645e-05, "loss": 1.1062932, "memory(GiB)": 71.19, "step": 1145, "train_speed(iter/s)": 0.02424 }, { "acc": 0.65637937, "epoch": 0.9, "learning_rate": 9.955973964625672e-05, "loss": 1.14308052, "memory(GiB)": 71.19, "step": 1150, "train_speed(iter/s)": 0.024241 }, { "acc": 0.65574675, "epoch": 0.91, "learning_rate": 9.955111177698412e-05, "loss": 1.14501066, "memory(GiB)": 71.19, "step": 1155, "train_speed(iter/s)": 0.024241 }, { "acc": 0.6562243, "epoch": 0.91, "learning_rate": 9.954240056620014e-05, "loss": 1.16590223, "memory(GiB)": 71.19, "step": 1160, "train_speed(iter/s)": 0.024241 }, { "acc": 0.66819267, "epoch": 0.91, "learning_rate": 9.953360602855641e-05, "loss": 1.10895786, "memory(GiB)": 71.19, "step": 1165, "train_speed(iter/s)": 0.024241 }, { "acc": 0.66646504, "epoch": 0.92, "learning_rate": 9.952472817884476e-05, "loss": 1.13673782, "memory(GiB)": 71.19, "step": 1170, "train_speed(iter/s)": 0.024239 }, { "acc": 0.67009692, "epoch": 0.92, "learning_rate": 9.951576703199708e-05, "loss": 1.0828022, "memory(GiB)": 71.19, "step": 1175, "train_speed(iter/s)": 0.024239 }, { "acc": 0.64940991, "epoch": 0.92, "learning_rate": 9.95067226030854e-05, "loss": 1.18824711, "memory(GiB)": 71.19, "step": 1180, "train_speed(iter/s)": 0.024238 }, { "acc": 0.67086515, "epoch": 0.93, "learning_rate": 9.949759490732185e-05, "loss": 1.11339579, "memory(GiB)": 71.19, "step": 1185, "train_speed(iter/s)": 0.024238 }, { "acc": 0.65760446, "epoch": 0.93, "learning_rate": 9.948838396005854e-05, "loss": 1.1283742, "memory(GiB)": 71.19, "step": 1190, "train_speed(iter/s)": 0.024239 }, { "acc": 0.66921711, "epoch": 0.94, "learning_rate": 9.947908977678766e-05, "loss": 1.09572687, "memory(GiB)": 71.19, "step": 1195, "train_speed(iter/s)": 0.024237 }, { "acc": 0.65912962, "epoch": 0.94, "learning_rate": 9.946971237314136e-05, "loss": 1.14600134, "memory(GiB)": 71.19, "step": 1200, "train_speed(iter/s)": 0.024236 }, { "acc": 0.66213255, "epoch": 0.94, "learning_rate": 9.94602517648918e-05, "loss": 1.12797499, "memory(GiB)": 71.19, "step": 1205, "train_speed(iter/s)": 0.024234 }, { "acc": 0.64736218, "epoch": 0.95, "learning_rate": 9.945070796795105e-05, "loss": 1.15010433, "memory(GiB)": 71.19, "step": 1210, "train_speed(iter/s)": 0.024234 }, { "acc": 0.65648313, "epoch": 0.95, "learning_rate": 9.94410809983711e-05, "loss": 1.13810177, "memory(GiB)": 71.19, "step": 1215, "train_speed(iter/s)": 0.024232 }, { "acc": 0.65412345, "epoch": 0.96, "learning_rate": 9.943137087234385e-05, "loss": 1.14729261, "memory(GiB)": 71.19, "step": 1220, "train_speed(iter/s)": 0.024231 }, { "acc": 0.65371003, "epoch": 0.96, "learning_rate": 9.942157760620108e-05, "loss": 1.15231876, "memory(GiB)": 71.19, "step": 1225, "train_speed(iter/s)": 0.024231 }, { "acc": 0.6497436, "epoch": 0.96, "learning_rate": 9.941170121641435e-05, "loss": 1.15340414, "memory(GiB)": 71.19, "step": 1230, "train_speed(iter/s)": 0.024231 }, { "acc": 0.66141253, "epoch": 0.97, "learning_rate": 9.940174171959504e-05, "loss": 1.12856808, "memory(GiB)": 71.19, "step": 1235, "train_speed(iter/s)": 0.024232 }, { "acc": 0.66497359, "epoch": 0.97, "learning_rate": 9.939169913249438e-05, "loss": 1.12229996, "memory(GiB)": 71.19, "step": 1240, "train_speed(iter/s)": 0.024232 }, { "acc": 0.67611599, "epoch": 0.98, "learning_rate": 9.938157347200327e-05, "loss": 1.0430521, "memory(GiB)": 71.19, "step": 1245, "train_speed(iter/s)": 0.024233 }, { "acc": 0.65242562, "epoch": 0.98, "learning_rate": 9.937136475515237e-05, "loss": 1.16849995, "memory(GiB)": 71.19, "step": 1250, "train_speed(iter/s)": 0.02423 }, { "acc": 0.65185585, "epoch": 0.98, "learning_rate": 9.936107299911203e-05, "loss": 1.17074966, "memory(GiB)": 71.19, "step": 1255, "train_speed(iter/s)": 0.024232 }, { "acc": 0.6592226, "epoch": 0.99, "learning_rate": 9.935069822119226e-05, "loss": 1.11288481, "memory(GiB)": 71.19, "step": 1260, "train_speed(iter/s)": 0.024232 }, { "acc": 0.66193151, "epoch": 0.99, "learning_rate": 9.934024043884271e-05, "loss": 1.16120825, "memory(GiB)": 71.19, "step": 1265, "train_speed(iter/s)": 0.024232 }, { "acc": 0.66481395, "epoch": 1.0, "learning_rate": 9.932969966965267e-05, "loss": 1.09738159, "memory(GiB)": 71.19, "step": 1270, "train_speed(iter/s)": 0.024232 }, { "acc": 0.65508566, "epoch": 1.0, "learning_rate": 9.931907593135093e-05, "loss": 1.14331837, "memory(GiB)": 71.19, "step": 1275, "train_speed(iter/s)": 0.024232 }, { "epoch": 1.0, "eval_acc": 0.6856999749561733, "eval_loss": 1.018600344657898, "eval_runtime": 107.9063, "eval_samples_per_second": 0.862, "eval_steps_per_second": 0.862, "step": 1275 }, { "acc": 0.68564477, "epoch": 1.0, "learning_rate": 9.93083692418059e-05, "loss": 1.04102726, "memory(GiB)": 71.19, "step": 1280, "train_speed(iter/s)": 0.024182 }, { "acc": 0.66535759, "epoch": 1.01, "learning_rate": 9.929757961902549e-05, "loss": 1.10843706, "memory(GiB)": 71.19, "step": 1285, "train_speed(iter/s)": 0.02418 }, { "acc": 0.67624335, "epoch": 1.01, "learning_rate": 9.928670708115708e-05, "loss": 1.03700457, "memory(GiB)": 71.19, "step": 1290, "train_speed(iter/s)": 0.02418 }, { "acc": 0.6848104, "epoch": 1.01, "learning_rate": 9.927575164648754e-05, "loss": 1.01924419, "memory(GiB)": 71.19, "step": 1295, "train_speed(iter/s)": 0.024179 }, { "acc": 0.65978332, "epoch": 1.02, "learning_rate": 9.926471333344311e-05, "loss": 1.10564556, "memory(GiB)": 71.19, "step": 1300, "train_speed(iter/s)": 0.024181 }, { "acc": 0.66694851, "epoch": 1.02, "learning_rate": 9.925359216058952e-05, "loss": 1.06940336, "memory(GiB)": 71.19, "step": 1305, "train_speed(iter/s)": 0.024181 }, { "acc": 0.67258253, "epoch": 1.03, "learning_rate": 9.924238814663173e-05, "loss": 1.05483885, "memory(GiB)": 71.19, "step": 1310, "train_speed(iter/s)": 0.024181 }, { "acc": 0.67171688, "epoch": 1.03, "learning_rate": 9.923110131041419e-05, "loss": 1.0581152, "memory(GiB)": 71.19, "step": 1315, "train_speed(iter/s)": 0.024182 }, { "acc": 0.66035485, "epoch": 1.03, "learning_rate": 9.92197316709205e-05, "loss": 1.10017042, "memory(GiB)": 71.19, "step": 1320, "train_speed(iter/s)": 0.024182 }, { "acc": 0.67783785, "epoch": 1.04, "learning_rate": 9.920827924727366e-05, "loss": 1.05424404, "memory(GiB)": 71.19, "step": 1325, "train_speed(iter/s)": 0.024182 }, { "acc": 0.67227278, "epoch": 1.04, "learning_rate": 9.91967440587358e-05, "loss": 1.07807531, "memory(GiB)": 71.19, "step": 1330, "train_speed(iter/s)": 0.024182 }, { "acc": 0.65477858, "epoch": 1.05, "learning_rate": 9.918512612470834e-05, "loss": 1.10319042, "memory(GiB)": 71.19, "step": 1335, "train_speed(iter/s)": 0.024182 }, { "acc": 0.67027154, "epoch": 1.05, "learning_rate": 9.917342546473181e-05, "loss": 1.07305937, "memory(GiB)": 71.19, "step": 1340, "train_speed(iter/s)": 0.024183 }, { "acc": 0.66486812, "epoch": 1.05, "learning_rate": 9.916164209848588e-05, "loss": 1.11207981, "memory(GiB)": 71.19, "step": 1345, "train_speed(iter/s)": 0.024183 }, { "acc": 0.66329069, "epoch": 1.06, "learning_rate": 9.914977604578941e-05, "loss": 1.10326147, "memory(GiB)": 71.19, "step": 1350, "train_speed(iter/s)": 0.024183 }, { "acc": 0.68474355, "epoch": 1.06, "learning_rate": 9.913782732660024e-05, "loss": 1.02233744, "memory(GiB)": 71.19, "step": 1355, "train_speed(iter/s)": 0.024183 }, { "acc": 0.67932301, "epoch": 1.07, "learning_rate": 9.912579596101525e-05, "loss": 1.06197271, "memory(GiB)": 71.19, "step": 1360, "train_speed(iter/s)": 0.024183 }, { "acc": 0.66768179, "epoch": 1.07, "learning_rate": 9.911368196927043e-05, "loss": 1.07399158, "memory(GiB)": 71.19, "step": 1365, "train_speed(iter/s)": 0.024184 }, { "acc": 0.67560496, "epoch": 1.07, "learning_rate": 9.91014853717406e-05, "loss": 1.05643578, "memory(GiB)": 71.19, "step": 1370, "train_speed(iter/s)": 0.024184 }, { "acc": 0.67212873, "epoch": 1.08, "learning_rate": 9.908920618893962e-05, "loss": 1.0583807, "memory(GiB)": 71.19, "step": 1375, "train_speed(iter/s)": 0.024183 }, { "acc": 0.66627192, "epoch": 1.08, "learning_rate": 9.90768444415202e-05, "loss": 1.07695656, "memory(GiB)": 71.19, "step": 1380, "train_speed(iter/s)": 0.024183 }, { "acc": 0.66721344, "epoch": 1.09, "learning_rate": 9.906440015027399e-05, "loss": 1.04808693, "memory(GiB)": 71.19, "step": 1385, "train_speed(iter/s)": 0.024183 }, { "acc": 0.67704191, "epoch": 1.09, "learning_rate": 9.905187333613134e-05, "loss": 1.06024771, "memory(GiB)": 71.19, "step": 1390, "train_speed(iter/s)": 0.024183 }, { "acc": 0.66961083, "epoch": 1.09, "learning_rate": 9.903926402016153e-05, "loss": 1.07331381, "memory(GiB)": 71.19, "step": 1395, "train_speed(iter/s)": 0.024183 }, { "acc": 0.67959518, "epoch": 1.1, "learning_rate": 9.902657222357252e-05, "loss": 1.04364729, "memory(GiB)": 71.19, "step": 1400, "train_speed(iter/s)": 0.024183 }, { "acc": 0.66278548, "epoch": 1.1, "learning_rate": 9.901379796771107e-05, "loss": 1.08651028, "memory(GiB)": 71.19, "step": 1405, "train_speed(iter/s)": 0.024185 }, { "acc": 0.65881038, "epoch": 1.11, "learning_rate": 9.900094127406253e-05, "loss": 1.09732819, "memory(GiB)": 71.19, "step": 1410, "train_speed(iter/s)": 0.024186 }, { "acc": 0.66367035, "epoch": 1.11, "learning_rate": 9.898800216425099e-05, "loss": 1.083144, "memory(GiB)": 71.19, "step": 1415, "train_speed(iter/s)": 0.024187 }, { "acc": 0.65925193, "epoch": 1.11, "learning_rate": 9.897498066003913e-05, "loss": 1.12018261, "memory(GiB)": 71.19, "step": 1420, "train_speed(iter/s)": 0.024187 }, { "acc": 0.67458215, "epoch": 1.12, "learning_rate": 9.89618767833282e-05, "loss": 1.06256332, "memory(GiB)": 71.19, "step": 1425, "train_speed(iter/s)": 0.024187 }, { "acc": 0.66883864, "epoch": 1.12, "learning_rate": 9.894869055615803e-05, "loss": 1.1081831, "memory(GiB)": 71.19, "step": 1430, "train_speed(iter/s)": 0.024187 }, { "acc": 0.66981688, "epoch": 1.12, "learning_rate": 9.893542200070691e-05, "loss": 1.07718334, "memory(GiB)": 71.19, "step": 1435, "train_speed(iter/s)": 0.024188 }, { "acc": 0.67857156, "epoch": 1.13, "learning_rate": 9.892207113929164e-05, "loss": 1.04210625, "memory(GiB)": 71.19, "step": 1440, "train_speed(iter/s)": 0.024188 }, { "acc": 0.66746936, "epoch": 1.13, "learning_rate": 9.890863799436743e-05, "loss": 1.08562384, "memory(GiB)": 71.19, "step": 1445, "train_speed(iter/s)": 0.024188 }, { "acc": 0.65901647, "epoch": 1.14, "learning_rate": 9.889512258852789e-05, "loss": 1.09942312, "memory(GiB)": 71.19, "step": 1450, "train_speed(iter/s)": 0.024188 }, { "acc": 0.67867947, "epoch": 1.14, "learning_rate": 9.888152494450498e-05, "loss": 1.06383791, "memory(GiB)": 71.19, "step": 1455, "train_speed(iter/s)": 0.024188 }, { "acc": 0.66191411, "epoch": 1.14, "learning_rate": 9.886784508516901e-05, "loss": 1.09565401, "memory(GiB)": 71.19, "step": 1460, "train_speed(iter/s)": 0.024187 }, { "acc": 0.67523494, "epoch": 1.15, "learning_rate": 9.885408303352854e-05, "loss": 1.07071381, "memory(GiB)": 71.19, "step": 1465, "train_speed(iter/s)": 0.024188 }, { "acc": 0.68043942, "epoch": 1.15, "learning_rate": 9.884023881273037e-05, "loss": 1.05898991, "memory(GiB)": 71.19, "step": 1470, "train_speed(iter/s)": 0.024187 }, { "acc": 0.66896992, "epoch": 1.16, "learning_rate": 9.882631244605952e-05, "loss": 1.08150454, "memory(GiB)": 71.19, "step": 1475, "train_speed(iter/s)": 0.024187 }, { "acc": 0.66858001, "epoch": 1.16, "learning_rate": 9.881230395693917e-05, "loss": 1.07318954, "memory(GiB)": 71.19, "step": 1480, "train_speed(iter/s)": 0.024189 }, { "acc": 0.67361059, "epoch": 1.16, "learning_rate": 9.879821336893062e-05, "loss": 1.0633007, "memory(GiB)": 71.19, "step": 1485, "train_speed(iter/s)": 0.024189 }, { "acc": 0.66314216, "epoch": 1.17, "learning_rate": 9.878404070573327e-05, "loss": 1.10530052, "memory(GiB)": 71.19, "step": 1490, "train_speed(iter/s)": 0.024189 }, { "acc": 0.66712093, "epoch": 1.17, "learning_rate": 9.876978599118452e-05, "loss": 1.074893, "memory(GiB)": 71.19, "step": 1495, "train_speed(iter/s)": 0.024189 }, { "acc": 0.67996116, "epoch": 1.18, "learning_rate": 9.875544924925981e-05, "loss": 1.03155136, "memory(GiB)": 71.19, "step": 1500, "train_speed(iter/s)": 0.024189 }, { "acc": 0.66647635, "epoch": 1.18, "learning_rate": 9.874103050407256e-05, "loss": 1.07530098, "memory(GiB)": 71.19, "step": 1505, "train_speed(iter/s)": 0.024189 }, { "acc": 0.66543493, "epoch": 1.18, "learning_rate": 9.872652977987408e-05, "loss": 1.0699563, "memory(GiB)": 71.19, "step": 1510, "train_speed(iter/s)": 0.02419 }, { "acc": 0.66304975, "epoch": 1.19, "learning_rate": 9.871194710105357e-05, "loss": 1.10289001, "memory(GiB)": 71.19, "step": 1515, "train_speed(iter/s)": 0.024191 }, { "acc": 0.67143154, "epoch": 1.19, "learning_rate": 9.86972824921381e-05, "loss": 1.07342091, "memory(GiB)": 71.19, "step": 1520, "train_speed(iter/s)": 0.024191 }, { "acc": 0.66971889, "epoch": 1.2, "learning_rate": 9.868253597779249e-05, "loss": 1.08200026, "memory(GiB)": 71.19, "step": 1525, "train_speed(iter/s)": 0.024192 }, { "acc": 0.66585174, "epoch": 1.2, "learning_rate": 9.866770758281937e-05, "loss": 1.07775135, "memory(GiB)": 71.19, "step": 1530, "train_speed(iter/s)": 0.024191 }, { "acc": 0.67415438, "epoch": 1.2, "learning_rate": 9.865279733215905e-05, "loss": 1.03115015, "memory(GiB)": 71.19, "step": 1535, "train_speed(iter/s)": 0.024192 }, { "acc": 0.6658217, "epoch": 1.21, "learning_rate": 9.863780525088955e-05, "loss": 1.11420527, "memory(GiB)": 71.19, "step": 1540, "train_speed(iter/s)": 0.024193 }, { "acc": 0.68223243, "epoch": 1.21, "learning_rate": 9.862273136422648e-05, "loss": 1.03579702, "memory(GiB)": 71.19, "step": 1545, "train_speed(iter/s)": 0.024191 }, { "acc": 0.68120694, "epoch": 1.21, "learning_rate": 9.860757569752309e-05, "loss": 1.06588163, "memory(GiB)": 71.19, "step": 1550, "train_speed(iter/s)": 0.024192 }, { "acc": 0.65559621, "epoch": 1.22, "learning_rate": 9.859233827627013e-05, "loss": 1.12858868, "memory(GiB)": 71.19, "step": 1555, "train_speed(iter/s)": 0.024193 }, { "acc": 0.67324719, "epoch": 1.22, "learning_rate": 9.857701912609589e-05, "loss": 1.10106726, "memory(GiB)": 71.19, "step": 1560, "train_speed(iter/s)": 0.024193 }, { "acc": 0.68056149, "epoch": 1.23, "learning_rate": 9.856161827276613e-05, "loss": 1.03890066, "memory(GiB)": 71.19, "step": 1565, "train_speed(iter/s)": 0.024194 }, { "acc": 0.66202497, "epoch": 1.23, "learning_rate": 9.854613574218396e-05, "loss": 1.10919619, "memory(GiB)": 71.19, "step": 1570, "train_speed(iter/s)": 0.024195 }, { "acc": 0.66488538, "epoch": 1.23, "learning_rate": 9.853057156038998e-05, "loss": 1.07370605, "memory(GiB)": 71.19, "step": 1575, "train_speed(iter/s)": 0.024197 }, { "acc": 0.66696358, "epoch": 1.24, "learning_rate": 9.851492575356201e-05, "loss": 1.0641902, "memory(GiB)": 71.19, "step": 1580, "train_speed(iter/s)": 0.024197 }, { "acc": 0.68181887, "epoch": 1.24, "learning_rate": 9.849919834801522e-05, "loss": 1.04376001, "memory(GiB)": 71.19, "step": 1585, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67546906, "epoch": 1.25, "learning_rate": 9.8483389370202e-05, "loss": 1.03430681, "memory(GiB)": 71.19, "step": 1590, "train_speed(iter/s)": 0.024197 }, { "acc": 0.67184181, "epoch": 1.25, "learning_rate": 9.846749884671198e-05, "loss": 1.09638357, "memory(GiB)": 71.19, "step": 1595, "train_speed(iter/s)": 0.024197 }, { "acc": 0.66743326, "epoch": 1.25, "learning_rate": 9.845152680427186e-05, "loss": 1.08324871, "memory(GiB)": 71.19, "step": 1600, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67404108, "epoch": 1.26, "learning_rate": 9.843547326974555e-05, "loss": 1.05259237, "memory(GiB)": 71.19, "step": 1605, "train_speed(iter/s)": 0.024195 }, { "acc": 0.67859106, "epoch": 1.26, "learning_rate": 9.841933827013394e-05, "loss": 1.02058706, "memory(GiB)": 71.19, "step": 1610, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67385159, "epoch": 1.27, "learning_rate": 9.840312183257498e-05, "loss": 1.0660202, "memory(GiB)": 71.19, "step": 1615, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68775396, "epoch": 1.27, "learning_rate": 9.83868239843436e-05, "loss": 1.01889, "memory(GiB)": 71.19, "step": 1620, "train_speed(iter/s)": 0.024197 }, { "acc": 0.67444701, "epoch": 1.27, "learning_rate": 9.837044475285165e-05, "loss": 1.0787632, "memory(GiB)": 71.19, "step": 1625, "train_speed(iter/s)": 0.024198 }, { "acc": 0.67301526, "epoch": 1.28, "learning_rate": 9.83539841656478e-05, "loss": 1.07971487, "memory(GiB)": 71.19, "step": 1630, "train_speed(iter/s)": 0.024199 }, { "acc": 0.6624773, "epoch": 1.28, "learning_rate": 9.833744225041767e-05, "loss": 1.07760115, "memory(GiB)": 71.19, "step": 1635, "train_speed(iter/s)": 0.024199 }, { "acc": 0.67510743, "epoch": 1.29, "learning_rate": 9.832081903498359e-05, "loss": 1.06419029, "memory(GiB)": 71.19, "step": 1640, "train_speed(iter/s)": 0.0242 }, { "acc": 0.68025455, "epoch": 1.29, "learning_rate": 9.830411454730464e-05, "loss": 1.03052721, "memory(GiB)": 71.19, "step": 1645, "train_speed(iter/s)": 0.0242 }, { "acc": 0.66855297, "epoch": 1.29, "learning_rate": 9.82873288154766e-05, "loss": 1.06540508, "memory(GiB)": 71.19, "step": 1650, "train_speed(iter/s)": 0.024201 }, { "acc": 0.68221936, "epoch": 1.3, "learning_rate": 9.82704618677319e-05, "loss": 1.02048054, "memory(GiB)": 71.19, "step": 1655, "train_speed(iter/s)": 0.024201 }, { "acc": 0.67271628, "epoch": 1.3, "learning_rate": 9.825351373243957e-05, "loss": 1.05244284, "memory(GiB)": 71.19, "step": 1660, "train_speed(iter/s)": 0.024202 }, { "acc": 0.67585492, "epoch": 1.3, "learning_rate": 9.82364844381052e-05, "loss": 1.03952274, "memory(GiB)": 71.19, "step": 1665, "train_speed(iter/s)": 0.024201 }, { "acc": 0.67327933, "epoch": 1.31, "learning_rate": 9.821937401337087e-05, "loss": 1.06375408, "memory(GiB)": 71.19, "step": 1670, "train_speed(iter/s)": 0.024201 }, { "acc": 0.67473063, "epoch": 1.31, "learning_rate": 9.820218248701512e-05, "loss": 1.03807678, "memory(GiB)": 71.19, "step": 1675, "train_speed(iter/s)": 0.0242 }, { "acc": 0.66647797, "epoch": 1.32, "learning_rate": 9.818490988795289e-05, "loss": 1.08794069, "memory(GiB)": 71.19, "step": 1680, "train_speed(iter/s)": 0.0242 }, { "acc": 0.6725491, "epoch": 1.32, "learning_rate": 9.816755624523548e-05, "loss": 1.04948406, "memory(GiB)": 71.19, "step": 1685, "train_speed(iter/s)": 0.024199 }, { "acc": 0.67765465, "epoch": 1.32, "learning_rate": 9.815012158805054e-05, "loss": 1.07902899, "memory(GiB)": 71.19, "step": 1690, "train_speed(iter/s)": 0.024199 }, { "acc": 0.68021631, "epoch": 1.33, "learning_rate": 9.813260594572192e-05, "loss": 1.03020191, "memory(GiB)": 71.19, "step": 1695, "train_speed(iter/s)": 0.0242 }, { "acc": 0.66895852, "epoch": 1.33, "learning_rate": 9.811500934770969e-05, "loss": 1.10171871, "memory(GiB)": 71.19, "step": 1700, "train_speed(iter/s)": 0.0242 }, { "acc": 0.65738878, "epoch": 1.34, "learning_rate": 9.809733182361014e-05, "loss": 1.12308722, "memory(GiB)": 71.19, "step": 1705, "train_speed(iter/s)": 0.0242 }, { "acc": 0.68673587, "epoch": 1.34, "learning_rate": 9.80795734031556e-05, "loss": 1.04205341, "memory(GiB)": 71.19, "step": 1710, "train_speed(iter/s)": 0.024199 }, { "acc": 0.66318197, "epoch": 1.34, "learning_rate": 9.806173411621451e-05, "loss": 1.07137575, "memory(GiB)": 71.19, "step": 1715, "train_speed(iter/s)": 0.024199 }, { "acc": 0.67586851, "epoch": 1.35, "learning_rate": 9.80438139927913e-05, "loss": 1.04353619, "memory(GiB)": 71.19, "step": 1720, "train_speed(iter/s)": 0.024199 }, { "acc": 0.67039175, "epoch": 1.35, "learning_rate": 9.802581306302638e-05, "loss": 1.0834878, "memory(GiB)": 71.19, "step": 1725, "train_speed(iter/s)": 0.024199 }, { "acc": 0.67330465, "epoch": 1.36, "learning_rate": 9.800773135719604e-05, "loss": 1.04026947, "memory(GiB)": 71.19, "step": 1730, "train_speed(iter/s)": 0.024199 }, { "acc": 0.65766926, "epoch": 1.36, "learning_rate": 9.798956890571244e-05, "loss": 1.11574898, "memory(GiB)": 71.19, "step": 1735, "train_speed(iter/s)": 0.024199 }, { "acc": 0.66705904, "epoch": 1.36, "learning_rate": 9.79713257391236e-05, "loss": 1.09899406, "memory(GiB)": 71.19, "step": 1740, "train_speed(iter/s)": 0.024199 }, { "acc": 0.66609573, "epoch": 1.37, "learning_rate": 9.795300188811319e-05, "loss": 1.10992451, "memory(GiB)": 71.19, "step": 1745, "train_speed(iter/s)": 0.0242 }, { "acc": 0.6764576, "epoch": 1.37, "learning_rate": 9.793459738350069e-05, "loss": 1.05841427, "memory(GiB)": 71.19, "step": 1750, "train_speed(iter/s)": 0.0242 }, { "acc": 0.65810442, "epoch": 1.38, "learning_rate": 9.791611225624118e-05, "loss": 1.12776289, "memory(GiB)": 71.19, "step": 1755, "train_speed(iter/s)": 0.024199 }, { "acc": 0.6705864, "epoch": 1.38, "learning_rate": 9.789754653742537e-05, "loss": 1.07466335, "memory(GiB)": 71.19, "step": 1760, "train_speed(iter/s)": 0.0242 }, { "acc": 0.67177191, "epoch": 1.38, "learning_rate": 9.787890025827948e-05, "loss": 1.08449554, "memory(GiB)": 71.19, "step": 1765, "train_speed(iter/s)": 0.024201 }, { "acc": 0.67626877, "epoch": 1.39, "learning_rate": 9.786017345016524e-05, "loss": 1.06005878, "memory(GiB)": 71.19, "step": 1770, "train_speed(iter/s)": 0.024201 }, { "acc": 0.68222189, "epoch": 1.39, "learning_rate": 9.784136614457988e-05, "loss": 1.06014185, "memory(GiB)": 71.19, "step": 1775, "train_speed(iter/s)": 0.024201 }, { "acc": 0.66160769, "epoch": 1.4, "learning_rate": 9.782247837315595e-05, "loss": 1.11386976, "memory(GiB)": 71.19, "step": 1780, "train_speed(iter/s)": 0.0242 }, { "acc": 0.68455191, "epoch": 1.4, "learning_rate": 9.780351016766136e-05, "loss": 1.04914351, "memory(GiB)": 71.19, "step": 1785, "train_speed(iter/s)": 0.0242 }, { "acc": 0.66824789, "epoch": 1.4, "learning_rate": 9.77844615599993e-05, "loss": 1.05908461, "memory(GiB)": 71.19, "step": 1790, "train_speed(iter/s)": 0.024199 }, { "acc": 0.66619282, "epoch": 1.41, "learning_rate": 9.776533258220819e-05, "loss": 1.09789734, "memory(GiB)": 71.19, "step": 1795, "train_speed(iter/s)": 0.024199 }, { "acc": 0.67257457, "epoch": 1.41, "learning_rate": 9.774612326646169e-05, "loss": 1.05887022, "memory(GiB)": 71.19, "step": 1800, "train_speed(iter/s)": 0.024199 }, { "acc": 0.67153988, "epoch": 1.41, "learning_rate": 9.772683364506847e-05, "loss": 1.07383604, "memory(GiB)": 71.19, "step": 1805, "train_speed(iter/s)": 0.024198 }, { "acc": 0.67205477, "epoch": 1.42, "learning_rate": 9.770746375047235e-05, "loss": 1.06401491, "memory(GiB)": 71.19, "step": 1810, "train_speed(iter/s)": 0.024198 }, { "acc": 0.66954546, "epoch": 1.42, "learning_rate": 9.768801361525215e-05, "loss": 1.08306198, "memory(GiB)": 71.19, "step": 1815, "train_speed(iter/s)": 0.024198 }, { "acc": 0.68025694, "epoch": 1.43, "learning_rate": 9.766848327212161e-05, "loss": 1.02885761, "memory(GiB)": 71.19, "step": 1820, "train_speed(iter/s)": 0.024198 }, { "acc": 0.68164105, "epoch": 1.43, "learning_rate": 9.764887275392943e-05, "loss": 1.05446472, "memory(GiB)": 71.19, "step": 1825, "train_speed(iter/s)": 0.024198 }, { "acc": 0.66762023, "epoch": 1.43, "learning_rate": 9.762918209365913e-05, "loss": 1.10211878, "memory(GiB)": 71.19, "step": 1830, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67623882, "epoch": 1.44, "learning_rate": 9.760941132442902e-05, "loss": 1.06018152, "memory(GiB)": 71.19, "step": 1835, "train_speed(iter/s)": 0.024196 }, { "acc": 0.6666759, "epoch": 1.44, "learning_rate": 9.758956047949215e-05, "loss": 1.11666231, "memory(GiB)": 71.19, "step": 1840, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67013221, "epoch": 1.45, "learning_rate": 9.756962959223628e-05, "loss": 1.06344328, "memory(GiB)": 71.19, "step": 1845, "train_speed(iter/s)": 0.024196 }, { "acc": 0.66876597, "epoch": 1.45, "learning_rate": 9.754961869618373e-05, "loss": 1.08094902, "memory(GiB)": 71.19, "step": 1850, "train_speed(iter/s)": 0.024198 }, { "acc": 0.66471753, "epoch": 1.45, "learning_rate": 9.752952782499147e-05, "loss": 1.0886961, "memory(GiB)": 71.19, "step": 1855, "train_speed(iter/s)": 0.024197 }, { "acc": 0.67831697, "epoch": 1.46, "learning_rate": 9.750935701245092e-05, "loss": 1.03748837, "memory(GiB)": 71.19, "step": 1860, "train_speed(iter/s)": 0.024196 }, { "acc": 0.6669693, "epoch": 1.46, "learning_rate": 9.748910629248798e-05, "loss": 1.08023806, "memory(GiB)": 71.19, "step": 1865, "train_speed(iter/s)": 0.024195 }, { "acc": 0.67126279, "epoch": 1.47, "learning_rate": 9.746877569916297e-05, "loss": 1.07919216, "memory(GiB)": 71.19, "step": 1870, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67684112, "epoch": 1.47, "learning_rate": 9.74483652666705e-05, "loss": 1.04165659, "memory(GiB)": 71.19, "step": 1875, "train_speed(iter/s)": 0.024195 }, { "acc": 0.68273034, "epoch": 1.47, "learning_rate": 9.74278750293395e-05, "loss": 1.04425545, "memory(GiB)": 71.19, "step": 1880, "train_speed(iter/s)": 0.024196 }, { "acc": 0.66332841, "epoch": 1.48, "learning_rate": 9.740730502163311e-05, "loss": 1.07659769, "memory(GiB)": 71.19, "step": 1885, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68010182, "epoch": 1.48, "learning_rate": 9.738665527814867e-05, "loss": 1.0395319, "memory(GiB)": 71.19, "step": 1890, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68821034, "epoch": 1.49, "learning_rate": 9.736592583361762e-05, "loss": 1.01133175, "memory(GiB)": 71.19, "step": 1895, "train_speed(iter/s)": 0.024196 }, { "acc": 0.66931405, "epoch": 1.49, "learning_rate": 9.73451167229054e-05, "loss": 1.06255102, "memory(GiB)": 71.19, "step": 1900, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67967229, "epoch": 1.49, "learning_rate": 9.732422798101149e-05, "loss": 1.02841129, "memory(GiB)": 71.19, "step": 1905, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67584767, "epoch": 1.5, "learning_rate": 9.73032596430693e-05, "loss": 1.05436392, "memory(GiB)": 71.19, "step": 1910, "train_speed(iter/s)": 0.024196 }, { "acc": 0.66209011, "epoch": 1.5, "learning_rate": 9.728221174434613e-05, "loss": 1.10967607, "memory(GiB)": 71.19, "step": 1915, "train_speed(iter/s)": 0.024198 }, { "acc": 0.68050909, "epoch": 1.5, "learning_rate": 9.726108432024306e-05, "loss": 1.02168255, "memory(GiB)": 71.19, "step": 1920, "train_speed(iter/s)": 0.024198 }, { "acc": 0.68102293, "epoch": 1.51, "learning_rate": 9.723987740629494e-05, "loss": 1.05781345, "memory(GiB)": 71.19, "step": 1925, "train_speed(iter/s)": 0.024198 }, { "acc": 0.6717145, "epoch": 1.51, "learning_rate": 9.721859103817033e-05, "loss": 1.06613684, "memory(GiB)": 71.19, "step": 1930, "train_speed(iter/s)": 0.024197 }, { "acc": 0.66321344, "epoch": 1.52, "learning_rate": 9.719722525167141e-05, "loss": 1.09566288, "memory(GiB)": 71.19, "step": 1935, "train_speed(iter/s)": 0.024196 }, { "acc": 0.6747292, "epoch": 1.52, "learning_rate": 9.717578008273393e-05, "loss": 1.04526234, "memory(GiB)": 71.19, "step": 1940, "train_speed(iter/s)": 0.024197 }, { "acc": 0.68077927, "epoch": 1.52, "learning_rate": 9.715425556742716e-05, "loss": 1.02439919, "memory(GiB)": 71.19, "step": 1945, "train_speed(iter/s)": 0.024196 }, { "acc": 0.67475858, "epoch": 1.53, "learning_rate": 9.713265174195388e-05, "loss": 1.05720959, "memory(GiB)": 71.19, "step": 1950, "train_speed(iter/s)": 0.024195 }, { "acc": 0.66691504, "epoch": 1.53, "learning_rate": 9.711096864265016e-05, "loss": 1.08331633, "memory(GiB)": 71.19, "step": 1955, "train_speed(iter/s)": 0.024195 }, { "acc": 0.66824608, "epoch": 1.54, "learning_rate": 9.708920630598551e-05, "loss": 1.08959455, "memory(GiB)": 71.19, "step": 1960, "train_speed(iter/s)": 0.024195 }, { "acc": 0.66691604, "epoch": 1.54, "learning_rate": 9.706736476856264e-05, "loss": 1.05755281, "memory(GiB)": 71.19, "step": 1965, "train_speed(iter/s)": 0.024196 }, { "acc": 0.66844997, "epoch": 1.54, "learning_rate": 9.704544406711746e-05, "loss": 1.07190256, "memory(GiB)": 71.19, "step": 1970, "train_speed(iter/s)": 0.024197 }, { "acc": 0.6775053, "epoch": 1.55, "learning_rate": 9.702344423851911e-05, "loss": 1.04533443, "memory(GiB)": 71.19, "step": 1975, "train_speed(iter/s)": 0.024196 }, { "acc": 0.66825333, "epoch": 1.55, "learning_rate": 9.700136531976974e-05, "loss": 1.1034709, "memory(GiB)": 71.19, "step": 1980, "train_speed(iter/s)": 0.024197 }, { "acc": 0.67021503, "epoch": 1.56, "learning_rate": 9.697920734800456e-05, "loss": 1.08550053, "memory(GiB)": 71.19, "step": 1985, "train_speed(iter/s)": 0.024198 }, { "acc": 0.66171827, "epoch": 1.56, "learning_rate": 9.695697036049172e-05, "loss": 1.09154787, "memory(GiB)": 71.19, "step": 1990, "train_speed(iter/s)": 0.024199 }, { "acc": 0.68564267, "epoch": 1.56, "learning_rate": 9.693465439463228e-05, "loss": 1.02461205, "memory(GiB)": 71.19, "step": 1995, "train_speed(iter/s)": 0.024198 }, { "acc": 0.67194061, "epoch": 1.57, "learning_rate": 9.691225948796014e-05, "loss": 1.05746717, "memory(GiB)": 71.19, "step": 2000, "train_speed(iter/s)": 0.024199 }, { "acc": 0.67832704, "epoch": 1.57, "learning_rate": 9.6889785678142e-05, "loss": 1.05458746, "memory(GiB)": 71.19, "step": 2005, "train_speed(iter/s)": 0.0242 }, { "acc": 0.67701178, "epoch": 1.58, "learning_rate": 9.68672330029772e-05, "loss": 1.05478268, "memory(GiB)": 71.19, "step": 2010, "train_speed(iter/s)": 0.0242 }, { "acc": 0.66319699, "epoch": 1.58, "learning_rate": 9.684460150039779e-05, "loss": 1.1452549, "memory(GiB)": 71.19, "step": 2015, "train_speed(iter/s)": 0.0242 }, { "acc": 0.6763217, "epoch": 1.58, "learning_rate": 9.682189120846834e-05, "loss": 1.04298344, "memory(GiB)": 71.19, "step": 2020, "train_speed(iter/s)": 0.024201 }, { "acc": 0.68313031, "epoch": 1.59, "learning_rate": 9.679910216538601e-05, "loss": 1.02893553, "memory(GiB)": 71.19, "step": 2025, "train_speed(iter/s)": 0.024201 }, { "acc": 0.67663183, "epoch": 1.59, "learning_rate": 9.677623440948038e-05, "loss": 1.04474058, "memory(GiB)": 71.19, "step": 2030, "train_speed(iter/s)": 0.024202 }, { "acc": 0.66433477, "epoch": 1.59, "learning_rate": 9.675328797921342e-05, "loss": 1.07190542, "memory(GiB)": 71.19, "step": 2035, "train_speed(iter/s)": 0.024203 }, { "acc": 0.68374462, "epoch": 1.6, "learning_rate": 9.67302629131794e-05, "loss": 1.07545433, "memory(GiB)": 71.19, "step": 2040, "train_speed(iter/s)": 0.024203 }, { "acc": 0.67807226, "epoch": 1.6, "learning_rate": 9.670715925010489e-05, "loss": 1.05842772, "memory(GiB)": 71.19, "step": 2045, "train_speed(iter/s)": 0.024203 }, { "acc": 0.67401872, "epoch": 1.61, "learning_rate": 9.668397702884866e-05, "loss": 1.07067871, "memory(GiB)": 71.19, "step": 2050, "train_speed(iter/s)": 0.024202 }, { "acc": 0.67526712, "epoch": 1.61, "learning_rate": 9.666071628840154e-05, "loss": 1.04234457, "memory(GiB)": 71.19, "step": 2055, "train_speed(iter/s)": 0.024203 }, { "acc": 0.67841988, "epoch": 1.61, "learning_rate": 9.663737706788652e-05, "loss": 1.05335855, "memory(GiB)": 71.19, "step": 2060, "train_speed(iter/s)": 0.024203 }, { "acc": 0.67320924, "epoch": 1.62, "learning_rate": 9.66139594065585e-05, "loss": 1.07291307, "memory(GiB)": 71.19, "step": 2065, "train_speed(iter/s)": 0.024203 }, { "acc": 0.67182488, "epoch": 1.62, "learning_rate": 9.659046334380439e-05, "loss": 1.06681614, "memory(GiB)": 71.19, "step": 2070, "train_speed(iter/s)": 0.024203 }, { "acc": 0.68178086, "epoch": 1.63, "learning_rate": 9.65668889191429e-05, "loss": 1.02115288, "memory(GiB)": 71.19, "step": 2075, "train_speed(iter/s)": 0.024203 }, { "acc": 0.67527838, "epoch": 1.63, "learning_rate": 9.654323617222456e-05, "loss": 1.02676697, "memory(GiB)": 71.19, "step": 2080, "train_speed(iter/s)": 0.024203 }, { "acc": 0.68160815, "epoch": 1.63, "learning_rate": 9.651950514283166e-05, "loss": 1.0352355, "memory(GiB)": 71.19, "step": 2085, "train_speed(iter/s)": 0.024202 }, { "acc": 0.67300172, "epoch": 1.64, "learning_rate": 9.649569587087814e-05, "loss": 1.06639357, "memory(GiB)": 71.19, "step": 2090, "train_speed(iter/s)": 0.024201 }, { "acc": 0.67614121, "epoch": 1.64, "learning_rate": 9.647180839640951e-05, "loss": 1.05420818, "memory(GiB)": 71.19, "step": 2095, "train_speed(iter/s)": 0.024201 }, { "acc": 0.67984071, "epoch": 1.65, "learning_rate": 9.644784275960286e-05, "loss": 1.04375839, "memory(GiB)": 71.19, "step": 2100, "train_speed(iter/s)": 0.024202 }, { "acc": 0.67311459, "epoch": 1.65, "learning_rate": 9.64237990007667e-05, "loss": 1.05994186, "memory(GiB)": 71.19, "step": 2105, "train_speed(iter/s)": 0.024203 }, { "acc": 0.66600237, "epoch": 1.65, "learning_rate": 9.639967716034095e-05, "loss": 1.08946924, "memory(GiB)": 71.19, "step": 2110, "train_speed(iter/s)": 0.024204 }, { "acc": 0.68244076, "epoch": 1.66, "learning_rate": 9.637547727889688e-05, "loss": 0.99340763, "memory(GiB)": 71.19, "step": 2115, "train_speed(iter/s)": 0.024204 }, { "acc": 0.6899344, "epoch": 1.66, "learning_rate": 9.6351199397137e-05, "loss": 1.00486383, "memory(GiB)": 71.19, "step": 2120, "train_speed(iter/s)": 0.024204 }, { "acc": 0.68560667, "epoch": 1.67, "learning_rate": 9.632684355589499e-05, "loss": 1.02728119, "memory(GiB)": 71.19, "step": 2125, "train_speed(iter/s)": 0.024204 }, { "acc": 0.67438531, "epoch": 1.67, "learning_rate": 9.630240979613569e-05, "loss": 1.08428488, "memory(GiB)": 71.19, "step": 2130, "train_speed(iter/s)": 0.024204 }, { "acc": 0.67004066, "epoch": 1.67, "learning_rate": 9.627789815895498e-05, "loss": 1.06729488, "memory(GiB)": 71.19, "step": 2135, "train_speed(iter/s)": 0.024204 }, { "acc": 0.66677608, "epoch": 1.68, "learning_rate": 9.625330868557973e-05, "loss": 1.08347321, "memory(GiB)": 71.19, "step": 2140, "train_speed(iter/s)": 0.024204 }, { "acc": 0.68743095, "epoch": 1.68, "learning_rate": 9.622864141736772e-05, "loss": 1.02547398, "memory(GiB)": 71.19, "step": 2145, "train_speed(iter/s)": 0.024205 }, { "acc": 0.66881833, "epoch": 1.69, "learning_rate": 9.620389639580753e-05, "loss": 1.10209799, "memory(GiB)": 71.19, "step": 2150, "train_speed(iter/s)": 0.024206 }, { "acc": 0.68243523, "epoch": 1.69, "learning_rate": 9.617907366251862e-05, "loss": 1.02223969, "memory(GiB)": 71.19, "step": 2155, "train_speed(iter/s)": 0.024206 }, { "acc": 0.67003131, "epoch": 1.69, "learning_rate": 9.615417325925106e-05, "loss": 1.05287113, "memory(GiB)": 71.19, "step": 2160, "train_speed(iter/s)": 0.024205 }, { "acc": 0.67350674, "epoch": 1.7, "learning_rate": 9.612919522788559e-05, "loss": 1.05360003, "memory(GiB)": 71.19, "step": 2165, "train_speed(iter/s)": 0.024205 }, { "acc": 0.67382431, "epoch": 1.7, "learning_rate": 9.610413961043354e-05, "loss": 1.06978779, "memory(GiB)": 71.19, "step": 2170, "train_speed(iter/s)": 0.024206 }, { "acc": 0.69041929, "epoch": 1.7, "learning_rate": 9.607900644903667e-05, "loss": 0.99977674, "memory(GiB)": 71.19, "step": 2175, "train_speed(iter/s)": 0.024205 }, { "acc": 0.66950564, "epoch": 1.71, "learning_rate": 9.605379578596724e-05, "loss": 1.07388229, "memory(GiB)": 71.19, "step": 2180, "train_speed(iter/s)": 0.024204 }, { "acc": 0.67316561, "epoch": 1.71, "learning_rate": 9.60285076636278e-05, "loss": 1.06690502, "memory(GiB)": 71.19, "step": 2185, "train_speed(iter/s)": 0.024205 }, { "acc": 0.68053436, "epoch": 1.72, "learning_rate": 9.600314212455123e-05, "loss": 1.02924366, "memory(GiB)": 71.19, "step": 2190, "train_speed(iter/s)": 0.024205 }, { "acc": 0.68438997, "epoch": 1.72, "learning_rate": 9.597769921140059e-05, "loss": 1.04086161, "memory(GiB)": 71.19, "step": 2195, "train_speed(iter/s)": 0.024206 }, { "acc": 0.68536568, "epoch": 1.72, "learning_rate": 9.595217896696906e-05, "loss": 1.02934484, "memory(GiB)": 71.19, "step": 2200, "train_speed(iter/s)": 0.024206 }, { "acc": 0.66752796, "epoch": 1.73, "learning_rate": 9.592658143417993e-05, "loss": 1.07700911, "memory(GiB)": 71.19, "step": 2205, "train_speed(iter/s)": 0.024206 }, { "acc": 0.6667316, "epoch": 1.73, "learning_rate": 9.590090665608647e-05, "loss": 1.10646029, "memory(GiB)": 71.19, "step": 2210, "train_speed(iter/s)": 0.024207 }, { "acc": 0.67603645, "epoch": 1.74, "learning_rate": 9.587515467587184e-05, "loss": 1.01645374, "memory(GiB)": 71.19, "step": 2215, "train_speed(iter/s)": 0.024207 }, { "acc": 0.67964444, "epoch": 1.74, "learning_rate": 9.584932553684912e-05, "loss": 1.02113829, "memory(GiB)": 71.19, "step": 2220, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67175584, "epoch": 1.74, "learning_rate": 9.582341928246105e-05, "loss": 1.07317562, "memory(GiB)": 71.19, "step": 2225, "train_speed(iter/s)": 0.024207 }, { "acc": 0.67587581, "epoch": 1.75, "learning_rate": 9.57974359562802e-05, "loss": 1.05869913, "memory(GiB)": 71.19, "step": 2230, "train_speed(iter/s)": 0.024207 }, { "acc": 0.67292519, "epoch": 1.75, "learning_rate": 9.577137560200868e-05, "loss": 1.05732136, "memory(GiB)": 71.19, "step": 2235, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67593732, "epoch": 1.76, "learning_rate": 9.574523826347821e-05, "loss": 1.05148783, "memory(GiB)": 71.19, "step": 2240, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66758423, "epoch": 1.76, "learning_rate": 9.571902398464996e-05, "loss": 1.08099422, "memory(GiB)": 71.19, "step": 2245, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67744985, "epoch": 1.76, "learning_rate": 9.569273280961451e-05, "loss": 1.0564353, "memory(GiB)": 71.19, "step": 2250, "train_speed(iter/s)": 0.024208 }, { "acc": 0.68401761, "epoch": 1.77, "learning_rate": 9.566636478259178e-05, "loss": 1.03796558, "memory(GiB)": 71.19, "step": 2255, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66767297, "epoch": 1.77, "learning_rate": 9.563991994793095e-05, "loss": 1.07227659, "memory(GiB)": 71.19, "step": 2260, "train_speed(iter/s)": 0.024209 }, { "acc": 0.66152043, "epoch": 1.78, "learning_rate": 9.56133983501104e-05, "loss": 1.12642975, "memory(GiB)": 71.19, "step": 2265, "train_speed(iter/s)": 0.024208 }, { "acc": 0.68994551, "epoch": 1.78, "learning_rate": 9.558680003373761e-05, "loss": 1.02917881, "memory(GiB)": 71.19, "step": 2270, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66738191, "epoch": 1.78, "learning_rate": 9.556012504354907e-05, "loss": 1.0579689, "memory(GiB)": 71.19, "step": 2275, "train_speed(iter/s)": 0.024209 }, { "acc": 0.68090143, "epoch": 1.79, "learning_rate": 9.553337342441026e-05, "loss": 1.02114449, "memory(GiB)": 71.19, "step": 2280, "train_speed(iter/s)": 0.024209 }, { "acc": 0.68244548, "epoch": 1.79, "learning_rate": 9.550654522131554e-05, "loss": 1.05710659, "memory(GiB)": 71.19, "step": 2285, "train_speed(iter/s)": 0.024209 }, { "acc": 0.67936034, "epoch": 1.79, "learning_rate": 9.547964047938807e-05, "loss": 1.06786947, "memory(GiB)": 71.19, "step": 2290, "train_speed(iter/s)": 0.024209 }, { "acc": 0.66966805, "epoch": 1.8, "learning_rate": 9.545265924387976e-05, "loss": 1.07189226, "memory(GiB)": 71.19, "step": 2295, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66073766, "epoch": 1.8, "learning_rate": 9.542560156017114e-05, "loss": 1.10265789, "memory(GiB)": 71.19, "step": 2300, "train_speed(iter/s)": 0.024208 }, { "acc": 0.6612524, "epoch": 1.81, "learning_rate": 9.539846747377137e-05, "loss": 1.10375538, "memory(GiB)": 71.19, "step": 2305, "train_speed(iter/s)": 0.024209 }, { "acc": 0.6832036, "epoch": 1.81, "learning_rate": 9.537125703031808e-05, "loss": 1.01659298, "memory(GiB)": 71.19, "step": 2310, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67649841, "epoch": 1.81, "learning_rate": 9.534397027557734e-05, "loss": 1.05449762, "memory(GiB)": 71.19, "step": 2315, "train_speed(iter/s)": 0.024208 }, { "acc": 0.6723896, "epoch": 1.82, "learning_rate": 9.531660725544357e-05, "loss": 1.08471155, "memory(GiB)": 71.19, "step": 2320, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67170587, "epoch": 1.82, "learning_rate": 9.528916801593945e-05, "loss": 1.06638412, "memory(GiB)": 71.19, "step": 2325, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66721401, "epoch": 1.83, "learning_rate": 9.526165260321587e-05, "loss": 1.07490387, "memory(GiB)": 71.19, "step": 2330, "train_speed(iter/s)": 0.024209 }, { "acc": 0.66922231, "epoch": 1.83, "learning_rate": 9.523406106355181e-05, "loss": 1.07525682, "memory(GiB)": 71.19, "step": 2335, "train_speed(iter/s)": 0.024209 }, { "acc": 0.68418026, "epoch": 1.83, "learning_rate": 9.520639344335436e-05, "loss": 1.02238379, "memory(GiB)": 71.19, "step": 2340, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66266646, "epoch": 1.84, "learning_rate": 9.517864978915852e-05, "loss": 1.107903, "memory(GiB)": 71.19, "step": 2345, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66747322, "epoch": 1.84, "learning_rate": 9.515083014762714e-05, "loss": 1.05063229, "memory(GiB)": 71.19, "step": 2350, "train_speed(iter/s)": 0.024207 }, { "acc": 0.68329477, "epoch": 1.85, "learning_rate": 9.512293456555094e-05, "loss": 1.04089613, "memory(GiB)": 71.19, "step": 2355, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67805209, "epoch": 1.85, "learning_rate": 9.509496308984834e-05, "loss": 1.05391035, "memory(GiB)": 71.19, "step": 2360, "train_speed(iter/s)": 0.024208 }, { "acc": 0.68161922, "epoch": 1.85, "learning_rate": 9.506691576756542e-05, "loss": 1.0376071, "memory(GiB)": 71.19, "step": 2365, "train_speed(iter/s)": 0.024208 }, { "acc": 0.68472705, "epoch": 1.86, "learning_rate": 9.50387926458758e-05, "loss": 1.03176441, "memory(GiB)": 71.19, "step": 2370, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67763271, "epoch": 1.86, "learning_rate": 9.501059377208062e-05, "loss": 1.02066383, "memory(GiB)": 71.19, "step": 2375, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67828851, "epoch": 1.87, "learning_rate": 9.49823191936084e-05, "loss": 1.03181, "memory(GiB)": 71.19, "step": 2380, "train_speed(iter/s)": 0.024208 }, { "acc": 0.68185763, "epoch": 1.87, "learning_rate": 9.495396895801504e-05, "loss": 1.01452837, "memory(GiB)": 71.19, "step": 2385, "train_speed(iter/s)": 0.024208 }, { "acc": 0.69047713, "epoch": 1.87, "learning_rate": 9.492554311298363e-05, "loss": 1.03052311, "memory(GiB)": 71.19, "step": 2390, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67350526, "epoch": 1.88, "learning_rate": 9.489704170632448e-05, "loss": 1.05733795, "memory(GiB)": 71.19, "step": 2395, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66548829, "epoch": 1.88, "learning_rate": 9.486846478597493e-05, "loss": 1.10433922, "memory(GiB)": 71.19, "step": 2400, "train_speed(iter/s)": 0.024209 }, { "acc": 0.67878256, "epoch": 1.88, "learning_rate": 9.48398123999994e-05, "loss": 1.04339581, "memory(GiB)": 71.19, "step": 2405, "train_speed(iter/s)": 0.024209 }, { "acc": 0.66891594, "epoch": 1.89, "learning_rate": 9.481108459658918e-05, "loss": 1.06247549, "memory(GiB)": 71.19, "step": 2410, "train_speed(iter/s)": 0.024209 }, { "acc": 0.67175364, "epoch": 1.89, "learning_rate": 9.478228142406245e-05, "loss": 1.08076143, "memory(GiB)": 71.19, "step": 2415, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66775222, "epoch": 1.9, "learning_rate": 9.475340293086414e-05, "loss": 1.07804575, "memory(GiB)": 71.19, "step": 2420, "train_speed(iter/s)": 0.024208 }, { "acc": 0.67420983, "epoch": 1.9, "learning_rate": 9.472444916556582e-05, "loss": 1.06133413, "memory(GiB)": 71.19, "step": 2425, "train_speed(iter/s)": 0.024208 }, { "acc": 0.66697445, "epoch": 1.9, "learning_rate": 9.469542017686574e-05, "loss": 1.08241444, "memory(GiB)": 71.19, "step": 2430, "train_speed(iter/s)": 0.024209 }, { "acc": 0.65952163, "epoch": 1.91, "learning_rate": 9.466631601358865e-05, "loss": 1.12456112, "memory(GiB)": 71.19, "step": 2435, "train_speed(iter/s)": 0.02421 }, { "acc": 0.68122878, "epoch": 1.91, "learning_rate": 9.463713672468566e-05, "loss": 1.02802782, "memory(GiB)": 71.19, "step": 2440, "train_speed(iter/s)": 0.02421 }, { "acc": 0.67782488, "epoch": 1.92, "learning_rate": 9.460788235923434e-05, "loss": 1.04834585, "memory(GiB)": 71.19, "step": 2445, "train_speed(iter/s)": 0.02421 }, { "acc": 0.6813798, "epoch": 1.92, "learning_rate": 9.457855296643847e-05, "loss": 1.03965998, "memory(GiB)": 71.19, "step": 2450, "train_speed(iter/s)": 0.02421 }, { "acc": 0.6787158, "epoch": 1.92, "learning_rate": 9.454914859562806e-05, "loss": 1.03802996, "memory(GiB)": 71.19, "step": 2455, "train_speed(iter/s)": 0.024211 }, { "acc": 0.67819457, "epoch": 1.93, "learning_rate": 9.451966929625921e-05, "loss": 1.04884434, "memory(GiB)": 71.19, "step": 2460, "train_speed(iter/s)": 0.024211 }, { "acc": 0.68044524, "epoch": 1.93, "learning_rate": 9.449011511791403e-05, "loss": 1.00336819, "memory(GiB)": 71.19, "step": 2465, "train_speed(iter/s)": 0.02421 }, { "acc": 0.68315306, "epoch": 1.94, "learning_rate": 9.44604861103006e-05, "loss": 1.0400569, "memory(GiB)": 71.19, "step": 2470, "train_speed(iter/s)": 0.02421 }, { "acc": 0.67919292, "epoch": 1.94, "learning_rate": 9.443078232325283e-05, "loss": 1.0457303, "memory(GiB)": 71.19, "step": 2475, "train_speed(iter/s)": 0.02421 }, { "acc": 0.67261739, "epoch": 1.94, "learning_rate": 9.440100380673041e-05, "loss": 1.08123322, "memory(GiB)": 71.19, "step": 2480, "train_speed(iter/s)": 0.02421 }, { "acc": 0.67910471, "epoch": 1.95, "learning_rate": 9.437115061081873e-05, "loss": 1.06696892, "memory(GiB)": 71.19, "step": 2485, "train_speed(iter/s)": 0.024211 }, { "acc": 0.68002834, "epoch": 1.95, "learning_rate": 9.434122278572881e-05, "loss": 1.01719761, "memory(GiB)": 71.19, "step": 2490, "train_speed(iter/s)": 0.024211 }, { "acc": 0.68830619, "epoch": 1.96, "learning_rate": 9.431122038179713e-05, "loss": 1.02319183, "memory(GiB)": 71.19, "step": 2495, "train_speed(iter/s)": 0.024211 }, { "acc": 0.68742871, "epoch": 1.96, "learning_rate": 9.428114344948566e-05, "loss": 1.01065245, "memory(GiB)": 71.19, "step": 2500, "train_speed(iter/s)": 0.024211 }, { "acc": 0.67193766, "epoch": 1.96, "learning_rate": 9.425099203938169e-05, "loss": 1.062183, "memory(GiB)": 71.19, "step": 2505, "train_speed(iter/s)": 0.024211 }, { "acc": 0.66727209, "epoch": 1.97, "learning_rate": 9.422076620219777e-05, "loss": 1.06330957, "memory(GiB)": 71.19, "step": 2510, "train_speed(iter/s)": 0.024211 }, { "acc": 0.68619056, "epoch": 1.97, "learning_rate": 9.419046598877169e-05, "loss": 1.02488794, "memory(GiB)": 71.19, "step": 2515, "train_speed(iter/s)": 0.024211 }, { "acc": 0.67550688, "epoch": 1.98, "learning_rate": 9.416009145006626e-05, "loss": 1.0441782, "memory(GiB)": 71.19, "step": 2520, "train_speed(iter/s)": 0.024211 }, { "acc": 0.69182601, "epoch": 1.98, "learning_rate": 9.412964263716934e-05, "loss": 0.99197636, "memory(GiB)": 71.19, "step": 2525, "train_speed(iter/s)": 0.02421 }, { "acc": 0.67208972, "epoch": 1.98, "learning_rate": 9.409911960129373e-05, "loss": 1.04292965, "memory(GiB)": 71.19, "step": 2530, "train_speed(iter/s)": 0.024209 }, { "acc": 0.69247909, "epoch": 1.99, "learning_rate": 9.406852239377702e-05, "loss": 1.03010435, "memory(GiB)": 71.19, "step": 2535, "train_speed(iter/s)": 0.02421 }, { "acc": 0.67900863, "epoch": 1.99, "learning_rate": 9.403785106608159e-05, "loss": 1.02731123, "memory(GiB)": 71.19, "step": 2540, "train_speed(iter/s)": 0.024211 }, { "acc": 0.68802767, "epoch": 1.99, "learning_rate": 9.400710566979446e-05, "loss": 1.02137775, "memory(GiB)": 71.19, "step": 2545, "train_speed(iter/s)": 0.02421 }, { "acc": 0.68384156, "epoch": 2.0, "learning_rate": 9.397628625662724e-05, "loss": 1.00198641, "memory(GiB)": 71.19, "step": 2550, "train_speed(iter/s)": 0.02421 }, { "epoch": 2.0, "eval_acc": 0.7063611319809667, "eval_loss": 0.9125774502754211, "eval_runtime": 107.4016, "eval_samples_per_second": 0.866, "eval_steps_per_second": 0.866, "step": 2551 }, { "acc": 0.69561715, "epoch": 2.0, "learning_rate": 9.394539287841606e-05, "loss": 0.96205349, "memory(GiB)": 71.19, "step": 2555, "train_speed(iter/s)": 0.024185 }, { "acc": 0.70156784, "epoch": 2.01, "learning_rate": 9.391442558712141e-05, "loss": 0.95558033, "memory(GiB)": 71.19, "step": 2560, "train_speed(iter/s)": 0.024184 }, { "acc": 0.70194588, "epoch": 2.01, "learning_rate": 9.38833844348281e-05, "loss": 0.94522266, "memory(GiB)": 71.19, "step": 2565, "train_speed(iter/s)": 0.024185 }, { "acc": 0.69583673, "epoch": 2.01, "learning_rate": 9.385226947374519e-05, "loss": 0.95971365, "memory(GiB)": 71.19, "step": 2570, "train_speed(iter/s)": 0.024185 }, { "acc": 0.69883165, "epoch": 2.02, "learning_rate": 9.382108075620588e-05, "loss": 0.95136395, "memory(GiB)": 71.19, "step": 2575, "train_speed(iter/s)": 0.024186 }, { "acc": 0.70211344, "epoch": 2.02, "learning_rate": 9.37898183346674e-05, "loss": 0.95915556, "memory(GiB)": 71.19, "step": 2580, "train_speed(iter/s)": 0.024187 }, { "acc": 0.6914197, "epoch": 2.03, "learning_rate": 9.375848226171097e-05, "loss": 0.97404881, "memory(GiB)": 71.19, "step": 2585, "train_speed(iter/s)": 0.024187 }, { "acc": 0.69058862, "epoch": 2.03, "learning_rate": 9.372707259004168e-05, "loss": 0.97601357, "memory(GiB)": 71.19, "step": 2590, "train_speed(iter/s)": 0.024187 }, { "acc": 0.69468822, "epoch": 2.03, "learning_rate": 9.369558937248841e-05, "loss": 0.94959822, "memory(GiB)": 71.19, "step": 2595, "train_speed(iter/s)": 0.024187 }, { "acc": 0.69152589, "epoch": 2.04, "learning_rate": 9.366403266200372e-05, "loss": 0.98147879, "memory(GiB)": 71.19, "step": 2600, "train_speed(iter/s)": 0.024187 }, { "acc": 0.69578485, "epoch": 2.04, "learning_rate": 9.363240251166381e-05, "loss": 0.96815405, "memory(GiB)": 71.19, "step": 2605, "train_speed(iter/s)": 0.024187 }, { "acc": 0.70924282, "epoch": 2.05, "learning_rate": 9.360069897466837e-05, "loss": 0.92589321, "memory(GiB)": 71.19, "step": 2610, "train_speed(iter/s)": 0.024187 }, { "acc": 0.69980206, "epoch": 2.05, "learning_rate": 9.356892210434054e-05, "loss": 0.93600454, "memory(GiB)": 71.19, "step": 2615, "train_speed(iter/s)": 0.024188 }, { "acc": 0.68358474, "epoch": 2.05, "learning_rate": 9.353707195412682e-05, "loss": 0.98476477, "memory(GiB)": 71.19, "step": 2620, "train_speed(iter/s)": 0.024189 }, { "acc": 0.70984707, "epoch": 2.06, "learning_rate": 9.35051485775969e-05, "loss": 0.92595644, "memory(GiB)": 71.19, "step": 2625, "train_speed(iter/s)": 0.024189 }, { "acc": 0.70100389, "epoch": 2.06, "learning_rate": 9.34731520284437e-05, "loss": 0.97320604, "memory(GiB)": 71.19, "step": 2630, "train_speed(iter/s)": 0.024189 }, { "acc": 0.69202828, "epoch": 2.07, "learning_rate": 9.344108236048317e-05, "loss": 0.9583993, "memory(GiB)": 71.19, "step": 2635, "train_speed(iter/s)": 0.02419 }, { "acc": 0.69916315, "epoch": 2.07, "learning_rate": 9.340893962765427e-05, "loss": 0.97032738, "memory(GiB)": 71.19, "step": 2640, "train_speed(iter/s)": 0.02419 }, { "acc": 0.69505591, "epoch": 2.07, "learning_rate": 9.337672388401882e-05, "loss": 0.9470933, "memory(GiB)": 71.19, "step": 2645, "train_speed(iter/s)": 0.024191 }, { "acc": 0.69918957, "epoch": 2.08, "learning_rate": 9.334443518376144e-05, "loss": 0.94485273, "memory(GiB)": 71.19, "step": 2650, "train_speed(iter/s)": 0.024191 }, { "acc": 0.69890566, "epoch": 2.08, "learning_rate": 9.33120735811895e-05, "loss": 0.95022421, "memory(GiB)": 71.19, "step": 2655, "train_speed(iter/s)": 0.024191 }, { "acc": 0.70103612, "epoch": 2.08, "learning_rate": 9.327963913073292e-05, "loss": 0.93258381, "memory(GiB)": 71.19, "step": 2660, "train_speed(iter/s)": 0.024191 }, { "acc": 0.68382578, "epoch": 2.09, "learning_rate": 9.32471318869442e-05, "loss": 0.98318644, "memory(GiB)": 71.19, "step": 2665, "train_speed(iter/s)": 0.024191 }, { "acc": 0.69849315, "epoch": 2.09, "learning_rate": 9.321455190449826e-05, "loss": 0.95109901, "memory(GiB)": 71.19, "step": 2670, "train_speed(iter/s)": 0.024191 }, { "acc": 0.69872427, "epoch": 2.1, "learning_rate": 9.318189923819237e-05, "loss": 0.95068798, "memory(GiB)": 71.19, "step": 2675, "train_speed(iter/s)": 0.024191 }, { "acc": 0.6956666, "epoch": 2.1, "learning_rate": 9.314917394294601e-05, "loss": 0.94351721, "memory(GiB)": 71.19, "step": 2680, "train_speed(iter/s)": 0.024192 }, { "acc": 0.68996038, "epoch": 2.1, "learning_rate": 9.311637607380087e-05, "loss": 0.98688478, "memory(GiB)": 71.19, "step": 2685, "train_speed(iter/s)": 0.024192 }, { "acc": 0.69613433, "epoch": 2.11, "learning_rate": 9.308350568592062e-05, "loss": 0.96690464, "memory(GiB)": 71.19, "step": 2690, "train_speed(iter/s)": 0.024193 }, { "acc": 0.70577445, "epoch": 2.11, "learning_rate": 9.305056283459101e-05, "loss": 0.94153576, "memory(GiB)": 71.19, "step": 2695, "train_speed(iter/s)": 0.024193 }, { "acc": 0.69807467, "epoch": 2.12, "learning_rate": 9.30175475752196e-05, "loss": 0.95756607, "memory(GiB)": 71.19, "step": 2700, "train_speed(iter/s)": 0.024193 }, { "acc": 0.69821429, "epoch": 2.12, "learning_rate": 9.298445996333576e-05, "loss": 0.94874229, "memory(GiB)": 71.19, "step": 2705, "train_speed(iter/s)": 0.024193 }, { "acc": 0.70757422, "epoch": 2.12, "learning_rate": 9.295130005459053e-05, "loss": 0.92731676, "memory(GiB)": 71.19, "step": 2710, "train_speed(iter/s)": 0.024193 }, { "acc": 0.70590787, "epoch": 2.13, "learning_rate": 9.291806790475659e-05, "loss": 0.91320696, "memory(GiB)": 71.19, "step": 2715, "train_speed(iter/s)": 0.024193 }, { "acc": 0.69287601, "epoch": 2.13, "learning_rate": 9.288476356972806e-05, "loss": 0.96277952, "memory(GiB)": 71.19, "step": 2720, "train_speed(iter/s)": 0.024194 }, { "acc": 0.69249158, "epoch": 2.14, "learning_rate": 9.285138710552053e-05, "loss": 0.9557168, "memory(GiB)": 71.19, "step": 2725, "train_speed(iter/s)": 0.024195 }, { "acc": 0.70517898, "epoch": 2.14, "learning_rate": 9.281793856827086e-05, "loss": 0.95910587, "memory(GiB)": 71.19, "step": 2730, "train_speed(iter/s)": 0.024194 }, { "acc": 0.68601875, "epoch": 2.14, "learning_rate": 9.278441801423718e-05, "loss": 0.98039818, "memory(GiB)": 71.19, "step": 2735, "train_speed(iter/s)": 0.024194 }, { "acc": 0.69862165, "epoch": 2.15, "learning_rate": 9.275082549979872e-05, "loss": 0.94497709, "memory(GiB)": 71.19, "step": 2740, "train_speed(iter/s)": 0.024195 }, { "acc": 0.6958168, "epoch": 2.15, "learning_rate": 9.271716108145574e-05, "loss": 0.95876045, "memory(GiB)": 71.19, "step": 2745, "train_speed(iter/s)": 0.024195 }, { "acc": 0.69509659, "epoch": 2.16, "learning_rate": 9.268342481582944e-05, "loss": 0.98702393, "memory(GiB)": 71.19, "step": 2750, "train_speed(iter/s)": 0.024195 }, { "acc": 0.69033999, "epoch": 2.16, "learning_rate": 9.264961675966186e-05, "loss": 0.991891, "memory(GiB)": 71.19, "step": 2755, "train_speed(iter/s)": 0.024195 }, { "acc": 0.70156898, "epoch": 2.16, "learning_rate": 9.261573696981579e-05, "loss": 0.93319263, "memory(GiB)": 71.19, "step": 2760, "train_speed(iter/s)": 0.024196 }, { "acc": 0.70409079, "epoch": 2.17, "learning_rate": 9.258178550327468e-05, "loss": 0.94536772, "memory(GiB)": 71.19, "step": 2765, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68395271, "epoch": 2.17, "learning_rate": 9.254776241714251e-05, "loss": 0.99986162, "memory(GiB)": 71.19, "step": 2770, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68076348, "epoch": 2.17, "learning_rate": 9.251366776864377e-05, "loss": 1.00616322, "memory(GiB)": 71.19, "step": 2775, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68825445, "epoch": 2.18, "learning_rate": 9.247950161512324e-05, "loss": 0.97691736, "memory(GiB)": 71.19, "step": 2780, "train_speed(iter/s)": 0.024195 }, { "acc": 0.70971127, "epoch": 2.18, "learning_rate": 9.244526401404604e-05, "loss": 0.92741508, "memory(GiB)": 71.19, "step": 2785, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69368434, "epoch": 2.19, "learning_rate": 9.24109550229974e-05, "loss": 0.97553349, "memory(GiB)": 71.19, "step": 2790, "train_speed(iter/s)": 0.024196 }, { "acc": 0.71132045, "epoch": 2.19, "learning_rate": 9.237657469968266e-05, "loss": 0.92014847, "memory(GiB)": 71.19, "step": 2795, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69388194, "epoch": 2.19, "learning_rate": 9.234212310192711e-05, "loss": 0.94383049, "memory(GiB)": 71.19, "step": 2800, "train_speed(iter/s)": 0.024195 }, { "acc": 0.69455805, "epoch": 2.2, "learning_rate": 9.230760028767597e-05, "loss": 0.9620759, "memory(GiB)": 71.19, "step": 2805, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69068451, "epoch": 2.2, "learning_rate": 9.227300631499416e-05, "loss": 0.94675779, "memory(GiB)": 71.19, "step": 2810, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69235501, "epoch": 2.21, "learning_rate": 9.223834124206635e-05, "loss": 0.97269449, "memory(GiB)": 71.19, "step": 2815, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69006801, "epoch": 2.21, "learning_rate": 9.220360512719676e-05, "loss": 0.95500069, "memory(GiB)": 71.19, "step": 2820, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69353352, "epoch": 2.21, "learning_rate": 9.216879802880913e-05, "loss": 0.9617177, "memory(GiB)": 71.19, "step": 2825, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69113631, "epoch": 2.22, "learning_rate": 9.213392000544656e-05, "loss": 0.97730999, "memory(GiB)": 71.19, "step": 2830, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69324679, "epoch": 2.22, "learning_rate": 9.209897111577144e-05, "loss": 0.96714506, "memory(GiB)": 71.19, "step": 2835, "train_speed(iter/s)": 0.024197 }, { "acc": 0.696696, "epoch": 2.23, "learning_rate": 9.206395141856538e-05, "loss": 0.98762465, "memory(GiB)": 71.19, "step": 2840, "train_speed(iter/s)": 0.024196 }, { "acc": 0.70372343, "epoch": 2.23, "learning_rate": 9.202886097272907e-05, "loss": 0.92046738, "memory(GiB)": 71.19, "step": 2845, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70415306, "epoch": 2.23, "learning_rate": 9.199369983728217e-05, "loss": 0.92664623, "memory(GiB)": 71.19, "step": 2850, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69124141, "epoch": 2.24, "learning_rate": 9.195846807136326e-05, "loss": 0.9966094, "memory(GiB)": 71.19, "step": 2855, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68278403, "epoch": 2.24, "learning_rate": 9.192316573422972e-05, "loss": 1.00215902, "memory(GiB)": 71.19, "step": 2860, "train_speed(iter/s)": 0.024196 }, { "acc": 0.70400138, "epoch": 2.25, "learning_rate": 9.188779288525761e-05, "loss": 0.95005407, "memory(GiB)": 71.19, "step": 2865, "train_speed(iter/s)": 0.024196 }, { "acc": 0.70103235, "epoch": 2.25, "learning_rate": 9.18523495839416e-05, "loss": 0.93353863, "memory(GiB)": 71.19, "step": 2870, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69932871, "epoch": 2.25, "learning_rate": 9.181683588989485e-05, "loss": 0.94103365, "memory(GiB)": 71.19, "step": 2875, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69865599, "epoch": 2.26, "learning_rate": 9.17812518628489e-05, "loss": 0.96315451, "memory(GiB)": 71.19, "step": 2880, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69689775, "epoch": 2.26, "learning_rate": 9.174559756265361e-05, "loss": 0.95352697, "memory(GiB)": 71.19, "step": 2885, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69051638, "epoch": 2.27, "learning_rate": 9.170987304927704e-05, "loss": 0.97824144, "memory(GiB)": 71.19, "step": 2890, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70052414, "epoch": 2.27, "learning_rate": 9.167407838280531e-05, "loss": 0.93514223, "memory(GiB)": 71.19, "step": 2895, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69622307, "epoch": 2.27, "learning_rate": 9.163821362344254e-05, "loss": 0.96024828, "memory(GiB)": 71.19, "step": 2900, "train_speed(iter/s)": 0.024197 }, { "acc": 0.68836212, "epoch": 2.28, "learning_rate": 9.160227883151077e-05, "loss": 0.97077913, "memory(GiB)": 71.19, "step": 2905, "train_speed(iter/s)": 0.024197 }, { "acc": 0.6831625, "epoch": 2.28, "learning_rate": 9.15662740674498e-05, "loss": 1.00156384, "memory(GiB)": 71.19, "step": 2910, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69906459, "epoch": 2.28, "learning_rate": 9.153019939181716e-05, "loss": 0.96558504, "memory(GiB)": 71.19, "step": 2915, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69677444, "epoch": 2.29, "learning_rate": 9.149405486528788e-05, "loss": 0.97523499, "memory(GiB)": 71.19, "step": 2920, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68966751, "epoch": 2.29, "learning_rate": 9.145784054865458e-05, "loss": 0.97728767, "memory(GiB)": 71.19, "step": 2925, "train_speed(iter/s)": 0.024196 }, { "acc": 0.7036571, "epoch": 2.3, "learning_rate": 9.14215565028272e-05, "loss": 0.94556446, "memory(GiB)": 71.19, "step": 2930, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69022222, "epoch": 2.3, "learning_rate": 9.138520278883297e-05, "loss": 0.96314068, "memory(GiB)": 71.19, "step": 2935, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68896809, "epoch": 2.3, "learning_rate": 9.13487794678163e-05, "loss": 0.99049673, "memory(GiB)": 71.19, "step": 2940, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69701419, "epoch": 2.31, "learning_rate": 9.131228660103866e-05, "loss": 0.97968254, "memory(GiB)": 71.19, "step": 2945, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69701881, "epoch": 2.31, "learning_rate": 9.127572424987853e-05, "loss": 0.97570438, "memory(GiB)": 71.19, "step": 2950, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70094166, "epoch": 2.32, "learning_rate": 9.12390924758312e-05, "loss": 0.93014622, "memory(GiB)": 71.19, "step": 2955, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70081906, "epoch": 2.32, "learning_rate": 9.12023913405088e-05, "loss": 0.97871437, "memory(GiB)": 71.19, "step": 2960, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70025148, "epoch": 2.32, "learning_rate": 9.116562090564005e-05, "loss": 0.93432608, "memory(GiB)": 71.19, "step": 2965, "train_speed(iter/s)": 0.024198 }, { "acc": 0.68864193, "epoch": 2.33, "learning_rate": 9.112878123307025e-05, "loss": 0.98558102, "memory(GiB)": 71.19, "step": 2970, "train_speed(iter/s)": 0.024198 }, { "acc": 0.69512825, "epoch": 2.33, "learning_rate": 9.109187238476116e-05, "loss": 0.94346056, "memory(GiB)": 71.19, "step": 2975, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70853047, "epoch": 2.34, "learning_rate": 9.105489442279092e-05, "loss": 0.91953535, "memory(GiB)": 71.19, "step": 2980, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70059996, "epoch": 2.34, "learning_rate": 9.101784740935383e-05, "loss": 0.93242407, "memory(GiB)": 71.19, "step": 2985, "train_speed(iter/s)": 0.024198 }, { "acc": 0.6832633, "epoch": 2.34, "learning_rate": 9.098073140676042e-05, "loss": 1.00155811, "memory(GiB)": 71.19, "step": 2990, "train_speed(iter/s)": 0.024198 }, { "acc": 0.69065361, "epoch": 2.35, "learning_rate": 9.09435464774372e-05, "loss": 0.98291121, "memory(GiB)": 71.19, "step": 2995, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69865141, "epoch": 2.35, "learning_rate": 9.090629268392661e-05, "loss": 0.96651316, "memory(GiB)": 71.19, "step": 3000, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69779391, "epoch": 2.36, "learning_rate": 9.086897008888697e-05, "loss": 0.96862831, "memory(GiB)": 71.19, "step": 3005, "train_speed(iter/s)": 0.024197 }, { "acc": 0.7000648, "epoch": 2.36, "learning_rate": 9.083157875509226e-05, "loss": 0.95280199, "memory(GiB)": 71.19, "step": 3010, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69280767, "epoch": 2.36, "learning_rate": 9.079411874543206e-05, "loss": 0.96452456, "memory(GiB)": 71.19, "step": 3015, "train_speed(iter/s)": 0.024196 }, { "acc": 0.70206046, "epoch": 2.37, "learning_rate": 9.075659012291155e-05, "loss": 0.94142323, "memory(GiB)": 71.19, "step": 3020, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69001818, "epoch": 2.37, "learning_rate": 9.071899295065122e-05, "loss": 0.97127142, "memory(GiB)": 71.19, "step": 3025, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69132757, "epoch": 2.37, "learning_rate": 9.068132729188689e-05, "loss": 0.95719604, "memory(GiB)": 71.19, "step": 3030, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69102788, "epoch": 2.38, "learning_rate": 9.064359320996958e-05, "loss": 0.97239933, "memory(GiB)": 71.19, "step": 3035, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68073072, "epoch": 2.38, "learning_rate": 9.060579076836537e-05, "loss": 1.00775642, "memory(GiB)": 71.19, "step": 3040, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69667606, "epoch": 2.39, "learning_rate": 9.056792003065535e-05, "loss": 0.9632431, "memory(GiB)": 71.19, "step": 3045, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70922837, "epoch": 2.39, "learning_rate": 9.052998106053544e-05, "loss": 0.91346598, "memory(GiB)": 71.19, "step": 3050, "train_speed(iter/s)": 0.024197 }, { "acc": 0.68625507, "epoch": 2.39, "learning_rate": 9.049197392181632e-05, "loss": 0.97347794, "memory(GiB)": 71.19, "step": 3055, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69933372, "epoch": 2.4, "learning_rate": 9.045389867842338e-05, "loss": 0.97092943, "memory(GiB)": 71.19, "step": 3060, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70825005, "epoch": 2.4, "learning_rate": 9.041575539439651e-05, "loss": 0.92969103, "memory(GiB)": 71.19, "step": 3065, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69653683, "epoch": 2.41, "learning_rate": 9.037754413389005e-05, "loss": 0.96352663, "memory(GiB)": 71.19, "step": 3070, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69451375, "epoch": 2.41, "learning_rate": 9.033926496117268e-05, "loss": 0.96977873, "memory(GiB)": 71.19, "step": 3075, "train_speed(iter/s)": 0.024197 }, { "acc": 0.687293, "epoch": 2.41, "learning_rate": 9.030091794062728e-05, "loss": 0.98685484, "memory(GiB)": 71.19, "step": 3080, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69902458, "epoch": 2.42, "learning_rate": 9.026250313675086e-05, "loss": 0.95378513, "memory(GiB)": 71.19, "step": 3085, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69570527, "epoch": 2.42, "learning_rate": 9.022402061415448e-05, "loss": 0.94871998, "memory(GiB)": 71.19, "step": 3090, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69046903, "epoch": 2.43, "learning_rate": 9.018547043756299e-05, "loss": 0.97162466, "memory(GiB)": 71.19, "step": 3095, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69036608, "epoch": 2.43, "learning_rate": 9.014685267181515e-05, "loss": 0.97763748, "memory(GiB)": 71.19, "step": 3100, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69294906, "epoch": 2.43, "learning_rate": 9.010816738186335e-05, "loss": 0.98672915, "memory(GiB)": 71.19, "step": 3105, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69603667, "epoch": 2.44, "learning_rate": 9.006941463277349e-05, "loss": 0.96144562, "memory(GiB)": 71.19, "step": 3110, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69827685, "epoch": 2.44, "learning_rate": 9.003059448972504e-05, "loss": 0.94170532, "memory(GiB)": 71.19, "step": 3115, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69537177, "epoch": 2.45, "learning_rate": 8.999170701801076e-05, "loss": 0.94646502, "memory(GiB)": 71.19, "step": 3120, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69200006, "epoch": 2.45, "learning_rate": 8.995275228303667e-05, "loss": 1.00812445, "memory(GiB)": 71.19, "step": 3125, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69033399, "epoch": 2.45, "learning_rate": 8.99137303503219e-05, "loss": 0.99082174, "memory(GiB)": 71.19, "step": 3130, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69509125, "epoch": 2.46, "learning_rate": 8.987464128549862e-05, "loss": 0.96717873, "memory(GiB)": 71.19, "step": 3135, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69468803, "epoch": 2.46, "learning_rate": 8.98354851543119e-05, "loss": 0.93988543, "memory(GiB)": 71.19, "step": 3140, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70322733, "epoch": 2.46, "learning_rate": 8.979626202261966e-05, "loss": 0.92626677, "memory(GiB)": 71.19, "step": 3145, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69417615, "epoch": 2.47, "learning_rate": 8.97569719563924e-05, "loss": 0.95528698, "memory(GiB)": 71.19, "step": 3150, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69935951, "epoch": 2.47, "learning_rate": 8.971761502171335e-05, "loss": 0.94430065, "memory(GiB)": 71.19, "step": 3155, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69067097, "epoch": 2.48, "learning_rate": 8.967819128477807e-05, "loss": 0.97750225, "memory(GiB)": 71.19, "step": 3160, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69903126, "epoch": 2.48, "learning_rate": 8.963870081189454e-05, "loss": 0.94778118, "memory(GiB)": 71.19, "step": 3165, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70578423, "epoch": 2.48, "learning_rate": 8.959914366948302e-05, "loss": 0.92893019, "memory(GiB)": 71.19, "step": 3170, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69360132, "epoch": 2.49, "learning_rate": 8.95595199240758e-05, "loss": 0.94542074, "memory(GiB)": 71.19, "step": 3175, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70106397, "epoch": 2.49, "learning_rate": 8.951982964231728e-05, "loss": 0.94324751, "memory(GiB)": 71.19, "step": 3180, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70175576, "epoch": 2.5, "learning_rate": 8.948007289096379e-05, "loss": 0.95341234, "memory(GiB)": 71.19, "step": 3185, "train_speed(iter/s)": 0.024196 }, { "acc": 0.70615592, "epoch": 2.5, "learning_rate": 8.944024973688334e-05, "loss": 0.92322388, "memory(GiB)": 71.19, "step": 3190, "train_speed(iter/s)": 0.024196 }, { "acc": 0.70504413, "epoch": 2.5, "learning_rate": 8.940036024705574e-05, "loss": 0.95310001, "memory(GiB)": 71.19, "step": 3195, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69084344, "epoch": 2.51, "learning_rate": 8.93604044885723e-05, "loss": 0.97776327, "memory(GiB)": 71.19, "step": 3200, "train_speed(iter/s)": 0.024196 }, { "acc": 0.70420504, "epoch": 2.51, "learning_rate": 8.932038252863583e-05, "loss": 0.9348093, "memory(GiB)": 71.19, "step": 3205, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69500103, "epoch": 2.52, "learning_rate": 8.92802944345605e-05, "loss": 0.9558012, "memory(GiB)": 71.19, "step": 3210, "train_speed(iter/s)": 0.024197 }, { "acc": 0.71531296, "epoch": 2.52, "learning_rate": 8.924014027377164e-05, "loss": 0.9156971, "memory(GiB)": 71.19, "step": 3215, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70045333, "epoch": 2.52, "learning_rate": 8.919992011380576e-05, "loss": 0.93506889, "memory(GiB)": 71.19, "step": 3220, "train_speed(iter/s)": 0.024197 }, { "acc": 0.68789282, "epoch": 2.53, "learning_rate": 8.915963402231038e-05, "loss": 0.97745686, "memory(GiB)": 71.19, "step": 3225, "train_speed(iter/s)": 0.024197 }, { "acc": 0.7019794, "epoch": 2.53, "learning_rate": 8.911928206704387e-05, "loss": 0.950805, "memory(GiB)": 71.19, "step": 3230, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69622092, "epoch": 2.54, "learning_rate": 8.907886431587543e-05, "loss": 0.96481142, "memory(GiB)": 71.19, "step": 3235, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69668698, "epoch": 2.54, "learning_rate": 8.903838083678486e-05, "loss": 0.95254927, "memory(GiB)": 71.19, "step": 3240, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70273995, "epoch": 2.54, "learning_rate": 8.899783169786257e-05, "loss": 0.93340073, "memory(GiB)": 71.19, "step": 3245, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69511542, "epoch": 2.55, "learning_rate": 8.895721696730939e-05, "loss": 0.96184444, "memory(GiB)": 71.19, "step": 3250, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70252714, "epoch": 2.55, "learning_rate": 8.891653671343643e-05, "loss": 0.94699078, "memory(GiB)": 71.19, "step": 3255, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68809547, "epoch": 2.56, "learning_rate": 8.887579100466508e-05, "loss": 0.96380692, "memory(GiB)": 71.19, "step": 3260, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68375864, "epoch": 2.56, "learning_rate": 8.883497990952674e-05, "loss": 0.98428965, "memory(GiB)": 71.19, "step": 3265, "train_speed(iter/s)": 0.024196 }, { "acc": 0.68760471, "epoch": 2.56, "learning_rate": 8.879410349666284e-05, "loss": 0.99547338, "memory(GiB)": 71.19, "step": 3270, "train_speed(iter/s)": 0.024197 }, { "acc": 0.6960813, "epoch": 2.57, "learning_rate": 8.875316183482464e-05, "loss": 0.96283712, "memory(GiB)": 71.19, "step": 3275, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69431438, "epoch": 2.57, "learning_rate": 8.871215499287319e-05, "loss": 0.97081881, "memory(GiB)": 71.19, "step": 3280, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69778886, "epoch": 2.57, "learning_rate": 8.867108303977912e-05, "loss": 0.95300598, "memory(GiB)": 71.19, "step": 3285, "train_speed(iter/s)": 0.024197 }, { "acc": 0.70309801, "epoch": 2.58, "learning_rate": 8.862994604462256e-05, "loss": 0.93476143, "memory(GiB)": 71.19, "step": 3290, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69512172, "epoch": 2.58, "learning_rate": 8.85887440765931e-05, "loss": 0.96246347, "memory(GiB)": 71.19, "step": 3295, "train_speed(iter/s)": 0.024197 }, { "acc": 0.71056523, "epoch": 2.59, "learning_rate": 8.854747720498954e-05, "loss": 0.90803175, "memory(GiB)": 71.19, "step": 3300, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69368334, "epoch": 2.59, "learning_rate": 8.850614549921994e-05, "loss": 0.99619999, "memory(GiB)": 71.19, "step": 3305, "train_speed(iter/s)": 0.024197 }, { "acc": 0.68741622, "epoch": 2.59, "learning_rate": 8.846474902880128e-05, "loss": 1.01268291, "memory(GiB)": 71.19, "step": 3310, "train_speed(iter/s)": 0.024197 }, { "acc": 0.68540864, "epoch": 2.6, "learning_rate": 8.842328786335956e-05, "loss": 1.00386658, "memory(GiB)": 71.19, "step": 3315, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69587579, "epoch": 2.6, "learning_rate": 8.838176207262958e-05, "loss": 0.95101843, "memory(GiB)": 71.19, "step": 3320, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69762154, "epoch": 2.61, "learning_rate": 8.834017172645478e-05, "loss": 0.94804831, "memory(GiB)": 71.19, "step": 3325, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69709311, "epoch": 2.61, "learning_rate": 8.829851689478725e-05, "loss": 0.99054155, "memory(GiB)": 71.19, "step": 3330, "train_speed(iter/s)": 0.024196 }, { "acc": 0.69299603, "epoch": 2.61, "learning_rate": 8.825679764768751e-05, "loss": 0.95626192, "memory(GiB)": 71.19, "step": 3335, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69584079, "epoch": 2.62, "learning_rate": 8.821501405532442e-05, "loss": 0.95285006, "memory(GiB)": 71.19, "step": 3340, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69174972, "epoch": 2.62, "learning_rate": 8.817316618797507e-05, "loss": 0.95945415, "memory(GiB)": 71.19, "step": 3345, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69653916, "epoch": 2.63, "learning_rate": 8.813125411602464e-05, "loss": 0.9533721, "memory(GiB)": 71.19, "step": 3350, "train_speed(iter/s)": 0.024198 }, { "acc": 0.69422874, "epoch": 2.63, "learning_rate": 8.80892779099663e-05, "loss": 0.96425648, "memory(GiB)": 71.19, "step": 3355, "train_speed(iter/s)": 0.024197 }, { "acc": 0.68755012, "epoch": 2.63, "learning_rate": 8.804723764040112e-05, "loss": 0.97660484, "memory(GiB)": 71.19, "step": 3360, "train_speed(iter/s)": 0.024197 }, { "acc": 0.6888967, "epoch": 2.64, "learning_rate": 8.800513337803788e-05, "loss": 0.95651054, "memory(GiB)": 71.19, "step": 3365, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69544282, "epoch": 2.64, "learning_rate": 8.796296519369303e-05, "loss": 0.9503314, "memory(GiB)": 71.19, "step": 3370, "train_speed(iter/s)": 0.024197 }, { "acc": 0.69458733, "epoch": 2.65, "learning_rate": 8.792073315829052e-05, "loss": 0.940135, "memory(GiB)": 71.19, "step": 3375, "train_speed(iter/s)": 0.024198 }, { "acc": 0.69739385, "epoch": 2.65, "learning_rate": 8.787843734286165e-05, "loss": 0.9286274, "memory(GiB)": 71.19, "step": 3380, "train_speed(iter/s)": 0.024198 }, { "acc": 0.71626267, "epoch": 2.65, "learning_rate": 8.783607781854506e-05, "loss": 0.91786966, "memory(GiB)": 71.19, "step": 3385, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70292225, "epoch": 2.66, "learning_rate": 8.77936546565865e-05, "loss": 0.94578028, "memory(GiB)": 71.19, "step": 3390, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70064354, "epoch": 2.66, "learning_rate": 8.775116792833878e-05, "loss": 0.95401373, "memory(GiB)": 71.19, "step": 3395, "train_speed(iter/s)": 0.024198 }, { "acc": 0.68651481, "epoch": 2.66, "learning_rate": 8.770861770526158e-05, "loss": 0.96929026, "memory(GiB)": 71.19, "step": 3400, "train_speed(iter/s)": 0.024198 }, { "acc": 0.68540168, "epoch": 2.67, "learning_rate": 8.766600405892145e-05, "loss": 1.01124077, "memory(GiB)": 71.19, "step": 3405, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70259171, "epoch": 2.67, "learning_rate": 8.762332706099153e-05, "loss": 0.94904728, "memory(GiB)": 71.19, "step": 3410, "train_speed(iter/s)": 0.024198 }, { "acc": 0.69669948, "epoch": 2.68, "learning_rate": 8.758058678325156e-05, "loss": 0.94466686, "memory(GiB)": 71.19, "step": 3415, "train_speed(iter/s)": 0.024198 }, { "acc": 0.699084, "epoch": 2.68, "learning_rate": 8.753778329758773e-05, "loss": 0.96061077, "memory(GiB)": 71.19, "step": 3420, "train_speed(iter/s)": 0.024199 }, { "acc": 0.70037179, "epoch": 2.68, "learning_rate": 8.74949166759925e-05, "loss": 0.95514784, "memory(GiB)": 71.19, "step": 3425, "train_speed(iter/s)": 0.024198 }, { "acc": 0.69292159, "epoch": 2.69, "learning_rate": 8.745198699056451e-05, "loss": 0.98498917, "memory(GiB)": 71.19, "step": 3430, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70176616, "epoch": 2.69, "learning_rate": 8.740899431350852e-05, "loss": 0.95132113, "memory(GiB)": 71.19, "step": 3435, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70519481, "epoch": 2.7, "learning_rate": 8.736593871713523e-05, "loss": 0.95861454, "memory(GiB)": 71.19, "step": 3440, "train_speed(iter/s)": 0.024198 }, { "acc": 0.69500475, "epoch": 2.7, "learning_rate": 8.732282027386111e-05, "loss": 0.96890717, "memory(GiB)": 71.19, "step": 3445, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69789162, "epoch": 2.7, "learning_rate": 8.72796390562084e-05, "loss": 0.93789511, "memory(GiB)": 71.19, "step": 3450, "train_speed(iter/s)": 0.024198 }, { "acc": 0.6822947, "epoch": 2.71, "learning_rate": 8.723639513680486e-05, "loss": 0.99033756, "memory(GiB)": 71.19, "step": 3455, "train_speed(iter/s)": 0.024198 }, { "acc": 0.70193062, "epoch": 2.71, "learning_rate": 8.719308858838377e-05, "loss": 0.95832653, "memory(GiB)": 71.19, "step": 3460, "train_speed(iter/s)": 0.024199 }, { "acc": 0.70000944, "epoch": 2.72, "learning_rate": 8.714971948378374e-05, "loss": 0.94106884, "memory(GiB)": 71.19, "step": 3465, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69467239, "epoch": 2.72, "learning_rate": 8.710628789594855e-05, "loss": 0.97685585, "memory(GiB)": 71.19, "step": 3470, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69117222, "epoch": 2.72, "learning_rate": 8.706279389792708e-05, "loss": 0.9753437, "memory(GiB)": 71.19, "step": 3475, "train_speed(iter/s)": 0.024199 }, { "acc": 0.7065135, "epoch": 2.73, "learning_rate": 8.701923756287325e-05, "loss": 0.92029209, "memory(GiB)": 71.19, "step": 3480, "train_speed(iter/s)": 0.024199 }, { "acc": 0.68631201, "epoch": 2.73, "learning_rate": 8.697561896404573e-05, "loss": 0.97980461, "memory(GiB)": 71.19, "step": 3485, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69038978, "epoch": 2.74, "learning_rate": 8.693193817480798e-05, "loss": 0.96484947, "memory(GiB)": 71.19, "step": 3490, "train_speed(iter/s)": 0.024199 }, { "acc": 0.68747773, "epoch": 2.74, "learning_rate": 8.688819526862803e-05, "loss": 0.97741976, "memory(GiB)": 71.19, "step": 3495, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69307537, "epoch": 2.74, "learning_rate": 8.684439031907843e-05, "loss": 0.97401991, "memory(GiB)": 71.19, "step": 3500, "train_speed(iter/s)": 0.024199 }, { "acc": 0.68593907, "epoch": 2.75, "learning_rate": 8.6800523399836e-05, "loss": 0.98575306, "memory(GiB)": 71.19, "step": 3505, "train_speed(iter/s)": 0.0242 }, { "acc": 0.69273987, "epoch": 2.75, "learning_rate": 8.675659458468184e-05, "loss": 0.97432585, "memory(GiB)": 71.19, "step": 3510, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69315062, "epoch": 2.75, "learning_rate": 8.671260394750119e-05, "loss": 0.95112991, "memory(GiB)": 71.19, "step": 3515, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69168601, "epoch": 2.76, "learning_rate": 8.66685515622832e-05, "loss": 0.98360157, "memory(GiB)": 71.19, "step": 3520, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69696198, "epoch": 2.76, "learning_rate": 8.66244375031209e-05, "loss": 0.95998116, "memory(GiB)": 71.19, "step": 3525, "train_speed(iter/s)": 0.024199 }, { "acc": 0.70211124, "epoch": 2.77, "learning_rate": 8.658026184421108e-05, "loss": 0.91666279, "memory(GiB)": 71.19, "step": 3530, "train_speed(iter/s)": 0.024199 }, { "acc": 0.70166306, "epoch": 2.77, "learning_rate": 8.653602465985411e-05, "loss": 0.94730425, "memory(GiB)": 71.19, "step": 3535, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69360576, "epoch": 2.77, "learning_rate": 8.649172602445384e-05, "loss": 0.96196995, "memory(GiB)": 71.19, "step": 3540, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69113541, "epoch": 2.78, "learning_rate": 8.644736601251749e-05, "loss": 0.97629557, "memory(GiB)": 71.19, "step": 3545, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69766083, "epoch": 2.78, "learning_rate": 8.640294469865549e-05, "loss": 0.97009659, "memory(GiB)": 71.19, "step": 3550, "train_speed(iter/s)": 0.024199 }, { "acc": 0.7054462, "epoch": 2.79, "learning_rate": 8.635846215758141e-05, "loss": 0.94846039, "memory(GiB)": 71.19, "step": 3555, "train_speed(iter/s)": 0.024199 }, { "acc": 0.68731637, "epoch": 2.79, "learning_rate": 8.631391846411177e-05, "loss": 1.00905914, "memory(GiB)": 71.19, "step": 3560, "train_speed(iter/s)": 0.0242 }, { "acc": 0.7051527, "epoch": 2.79, "learning_rate": 8.626931369316594e-05, "loss": 0.93751678, "memory(GiB)": 71.19, "step": 3565, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69043989, "epoch": 2.8, "learning_rate": 8.622464791976604e-05, "loss": 0.97965469, "memory(GiB)": 71.19, "step": 3570, "train_speed(iter/s)": 0.0242 }, { "acc": 0.68530822, "epoch": 2.8, "learning_rate": 8.617992121903679e-05, "loss": 1.03002996, "memory(GiB)": 71.19, "step": 3575, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69462538, "epoch": 2.81, "learning_rate": 8.613513366620538e-05, "loss": 0.958325, "memory(GiB)": 71.19, "step": 3580, "train_speed(iter/s)": 0.024199 }, { "acc": 0.70790119, "epoch": 2.81, "learning_rate": 8.609028533660135e-05, "loss": 0.91673079, "memory(GiB)": 71.19, "step": 3585, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69034023, "epoch": 2.81, "learning_rate": 8.604537630565644e-05, "loss": 0.9714694, "memory(GiB)": 71.19, "step": 3590, "train_speed(iter/s)": 0.024199 }, { "acc": 0.69581952, "epoch": 2.82, "learning_rate": 8.600040664890453e-05, "loss": 0.96100874, "memory(GiB)": 71.19, "step": 3595, "train_speed(iter/s)": 0.024199 }, { "acc": 0.70398974, "epoch": 2.82, "learning_rate": 8.595537644198141e-05, "loss": 0.9286829, "memory(GiB)": 71.19, "step": 3600, "train_speed(iter/s)": 0.024199 }, { "acc": 0.68837748, "epoch": 2.83, "learning_rate": 8.591028576062478e-05, "loss": 0.99620714, "memory(GiB)": 71.19, "step": 3605, "train_speed(iter/s)": 0.0242 }, { "acc": 0.69134026, "epoch": 2.83, "learning_rate": 8.586513468067404e-05, "loss": 0.97660503, "memory(GiB)": 71.19, "step": 3610, "train_speed(iter/s)": 0.0242 }, { "acc": 0.69011774, "epoch": 2.83, "learning_rate": 8.581992327807007e-05, "loss": 0.9757412, "memory(GiB)": 71.19, "step": 3615, "train_speed(iter/s)": 0.0242 }, { "acc": 0.6966249, "epoch": 2.84, "learning_rate": 8.577465162885538e-05, "loss": 0.96154041, "memory(GiB)": 71.19, "step": 3620, "train_speed(iter/s)": 0.024201 }, { "acc": 0.70414891, "epoch": 2.84, "learning_rate": 8.572931980917366e-05, "loss": 0.95618954, "memory(GiB)": 71.19, "step": 3625, "train_speed(iter/s)": 0.024201 }, { "acc": 0.69498696, "epoch": 2.85, "learning_rate": 8.568392789526991e-05, "loss": 0.9771987, "memory(GiB)": 71.19, "step": 3630, "train_speed(iter/s)": 0.024201 }, { "acc": 0.69978442, "epoch": 2.85, "learning_rate": 8.563847596349015e-05, "loss": 0.93518782, "memory(GiB)": 71.19, "step": 3635, "train_speed(iter/s)": 0.024201 }, { "acc": 0.70075121, "epoch": 2.85, "learning_rate": 8.559296409028134e-05, "loss": 0.95782223, "memory(GiB)": 71.19, "step": 3640, "train_speed(iter/s)": 0.024201 }, { "acc": 0.69935665, "epoch": 2.86, "learning_rate": 8.554739235219129e-05, "loss": 0.96709738, "memory(GiB)": 71.19, "step": 3645, "train_speed(iter/s)": 0.024201 }, { "acc": 0.70329223, "epoch": 2.86, "learning_rate": 8.550176082586847e-05, "loss": 0.92794161, "memory(GiB)": 71.19, "step": 3650, "train_speed(iter/s)": 0.024201 }, { "acc": 0.69627366, "epoch": 2.86, "learning_rate": 8.545606958806195e-05, "loss": 0.96420841, "memory(GiB)": 71.19, "step": 3655, "train_speed(iter/s)": 0.024202 }, { "acc": 0.69576621, "epoch": 2.87, "learning_rate": 8.541031871562118e-05, "loss": 0.97613201, "memory(GiB)": 71.19, "step": 3660, "train_speed(iter/s)": 0.024202 }, { "acc": 0.69630737, "epoch": 2.87, "learning_rate": 8.536450828549593e-05, "loss": 0.93979073, "memory(GiB)": 71.19, "step": 3665, "train_speed(iter/s)": 0.024202 }, { "acc": 0.68497229, "epoch": 2.88, "learning_rate": 8.531863837473617e-05, "loss": 0.98675127, "memory(GiB)": 71.19, "step": 3670, "train_speed(iter/s)": 0.024203 }, { "acc": 0.68888111, "epoch": 2.88, "learning_rate": 8.527270906049185e-05, "loss": 0.99011154, "memory(GiB)": 71.19, "step": 3675, "train_speed(iter/s)": 0.024203 }, { "acc": 0.70074887, "epoch": 2.88, "learning_rate": 8.522672042001291e-05, "loss": 0.95177927, "memory(GiB)": 71.19, "step": 3680, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69165268, "epoch": 2.89, "learning_rate": 8.5180672530649e-05, "loss": 0.97305288, "memory(GiB)": 71.19, "step": 3685, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69212561, "epoch": 2.89, "learning_rate": 8.51345654698495e-05, "loss": 0.96181002, "memory(GiB)": 71.19, "step": 3690, "train_speed(iter/s)": 0.024203 }, { "acc": 0.7015995, "epoch": 2.9, "learning_rate": 8.508839931516322e-05, "loss": 0.94027262, "memory(GiB)": 71.19, "step": 3695, "train_speed(iter/s)": 0.024203 }, { "acc": 0.6916369, "epoch": 2.9, "learning_rate": 8.504217414423843e-05, "loss": 0.96168337, "memory(GiB)": 71.19, "step": 3700, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69469385, "epoch": 2.9, "learning_rate": 8.499589003482264e-05, "loss": 0.96138353, "memory(GiB)": 71.19, "step": 3705, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69942169, "epoch": 2.91, "learning_rate": 8.49495470647625e-05, "loss": 0.96778059, "memory(GiB)": 71.19, "step": 3710, "train_speed(iter/s)": 0.024204 }, { "acc": 0.69427686, "epoch": 2.91, "learning_rate": 8.490314531200365e-05, "loss": 0.97816887, "memory(GiB)": 71.19, "step": 3715, "train_speed(iter/s)": 0.024203 }, { "acc": 0.7088768, "epoch": 2.92, "learning_rate": 8.485668485459057e-05, "loss": 0.94817715, "memory(GiB)": 71.19, "step": 3720, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69802999, "epoch": 2.92, "learning_rate": 8.481016577066654e-05, "loss": 0.97512226, "memory(GiB)": 71.19, "step": 3725, "train_speed(iter/s)": 0.024203 }, { "acc": 0.70213175, "epoch": 2.92, "learning_rate": 8.47635881384734e-05, "loss": 0.94330521, "memory(GiB)": 71.19, "step": 3730, "train_speed(iter/s)": 0.024204 }, { "acc": 0.68532333, "epoch": 2.93, "learning_rate": 8.47169520363515e-05, "loss": 0.96857815, "memory(GiB)": 71.19, "step": 3735, "train_speed(iter/s)": 0.024204 }, { "acc": 0.69682803, "epoch": 2.93, "learning_rate": 8.467025754273947e-05, "loss": 0.98179436, "memory(GiB)": 71.19, "step": 3740, "train_speed(iter/s)": 0.024204 }, { "acc": 0.69457374, "epoch": 2.94, "learning_rate": 8.46235047361742e-05, "loss": 0.97254868, "memory(GiB)": 71.19, "step": 3745, "train_speed(iter/s)": 0.024204 }, { "acc": 0.69419403, "epoch": 2.94, "learning_rate": 8.457669369529066e-05, "loss": 0.95455761, "memory(GiB)": 71.19, "step": 3750, "train_speed(iter/s)": 0.024204 }, { "acc": 0.70040722, "epoch": 2.94, "learning_rate": 8.452982449882175e-05, "loss": 0.92892199, "memory(GiB)": 71.19, "step": 3755, "train_speed(iter/s)": 0.024204 }, { "acc": 0.70409389, "epoch": 2.95, "learning_rate": 8.448289722559816e-05, "loss": 0.93102856, "memory(GiB)": 71.19, "step": 3760, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69685011, "epoch": 2.95, "learning_rate": 8.443591195454834e-05, "loss": 0.96960402, "memory(GiB)": 71.19, "step": 3765, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69049578, "epoch": 2.95, "learning_rate": 8.438886876469818e-05, "loss": 0.9981617, "memory(GiB)": 71.19, "step": 3770, "train_speed(iter/s)": 0.024204 }, { "acc": 0.70177493, "epoch": 2.96, "learning_rate": 8.43417677351711e-05, "loss": 0.91999292, "memory(GiB)": 71.19, "step": 3775, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69396958, "epoch": 2.96, "learning_rate": 8.429460894518771e-05, "loss": 0.98489656, "memory(GiB)": 71.19, "step": 3780, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69775419, "epoch": 2.97, "learning_rate": 8.424739247406579e-05, "loss": 0.95434799, "memory(GiB)": 71.19, "step": 3785, "train_speed(iter/s)": 0.024203 }, { "acc": 0.69874148, "epoch": 2.97, "learning_rate": 8.420011840122016e-05, "loss": 0.94881783, "memory(GiB)": 71.19, "step": 3790, "train_speed(iter/s)": 0.024203 }, { "acc": 0.70053377, "epoch": 2.97, "learning_rate": 8.415278680616252e-05, "loss": 0.95457497, "memory(GiB)": 71.19, "step": 3795, "train_speed(iter/s)": 0.024203 }, { "acc": 0.68822541, "epoch": 2.98, "learning_rate": 8.410539776850133e-05, "loss": 0.97871866, "memory(GiB)": 71.19, "step": 3800, "train_speed(iter/s)": 0.024202 }, { "acc": 0.69143753, "epoch": 2.98, "learning_rate": 8.405795136794159e-05, "loss": 0.9598156, "memory(GiB)": 71.19, "step": 3805, "train_speed(iter/s)": 0.024202 }, { "acc": 0.68940425, "epoch": 2.99, "learning_rate": 8.401044768428487e-05, "loss": 0.98408442, "memory(GiB)": 71.19, "step": 3810, "train_speed(iter/s)": 0.024202 }, { "acc": 0.71102018, "epoch": 2.99, "learning_rate": 8.396288679742905e-05, "loss": 0.91080713, "memory(GiB)": 71.19, "step": 3815, "train_speed(iter/s)": 0.024202 }, { "acc": 0.69605985, "epoch": 2.99, "learning_rate": 8.391526878736822e-05, "loss": 0.96173496, "memory(GiB)": 71.19, "step": 3820, "train_speed(iter/s)": 0.024202 }, { "acc": 0.68129706, "epoch": 3.0, "learning_rate": 8.386759373419252e-05, "loss": 1.0301609, "memory(GiB)": 71.19, "step": 3825, "train_speed(iter/s)": 0.024203 }, { "epoch": 3.0, "eval_acc": 0.7299023290758828, "eval_loss": 0.811944305896759, "eval_runtime": 107.6655, "eval_samples_per_second": 0.864, "eval_steps_per_second": 0.864, "step": 3827 }, { "acc": 0.70716329, "epoch": 3.0, "learning_rate": 8.381986171808811e-05, "loss": 0.90910578, "memory(GiB)": 71.19, "step": 3830, "train_speed(iter/s)": 0.024186 }, { "acc": 0.73387895, "epoch": 3.01, "learning_rate": 8.377207281933687e-05, "loss": 0.82784376, "memory(GiB)": 71.19, "step": 3835, "train_speed(iter/s)": 0.024186 }, { "acc": 0.72814317, "epoch": 3.01, "learning_rate": 8.372422711831644e-05, "loss": 0.83022747, "memory(GiB)": 71.19, "step": 3840, "train_speed(iter/s)": 0.024187 }, { "acc": 0.72625008, "epoch": 3.01, "learning_rate": 8.367632469549989e-05, "loss": 0.82957611, "memory(GiB)": 71.19, "step": 3845, "train_speed(iter/s)": 0.024187 }, { "acc": 0.7326128, "epoch": 3.02, "learning_rate": 8.362836563145578e-05, "loss": 0.82128582, "memory(GiB)": 71.19, "step": 3850, "train_speed(iter/s)": 0.024187 }, { "acc": 0.7478919, "epoch": 3.02, "learning_rate": 8.358035000684791e-05, "loss": 0.77482996, "memory(GiB)": 71.19, "step": 3855, "train_speed(iter/s)": 0.024186 }, { "acc": 0.73634334, "epoch": 3.03, "learning_rate": 8.353227790243521e-05, "loss": 0.804041, "memory(GiB)": 71.19, "step": 3860, "train_speed(iter/s)": 0.024186 }, { "acc": 0.73621793, "epoch": 3.03, "learning_rate": 8.34841493990716e-05, "loss": 0.80979652, "memory(GiB)": 71.19, "step": 3865, "train_speed(iter/s)": 0.024186 }, { "acc": 0.7203692, "epoch": 3.03, "learning_rate": 8.343596457770587e-05, "loss": 0.84263067, "memory(GiB)": 71.19, "step": 3870, "train_speed(iter/s)": 0.024186 }, { "acc": 0.71783953, "epoch": 3.04, "learning_rate": 8.338772351938148e-05, "loss": 0.87986736, "memory(GiB)": 71.19, "step": 3875, "train_speed(iter/s)": 0.024187 }, { "acc": 0.74515467, "epoch": 3.04, "learning_rate": 8.333942630523662e-05, "loss": 0.78914156, "memory(GiB)": 71.19, "step": 3880, "train_speed(iter/s)": 0.024187 }, { "acc": 0.71863422, "epoch": 3.04, "learning_rate": 8.329107301650374e-05, "loss": 0.85929756, "memory(GiB)": 71.19, "step": 3885, "train_speed(iter/s)": 0.024187 }, { "acc": 0.73198423, "epoch": 3.05, "learning_rate": 8.324266373450974e-05, "loss": 0.84285383, "memory(GiB)": 71.19, "step": 3890, "train_speed(iter/s)": 0.024188 }, { "acc": 0.71086202, "epoch": 3.05, "learning_rate": 8.319419854067564e-05, "loss": 0.89417582, "memory(GiB)": 71.19, "step": 3895, "train_speed(iter/s)": 0.024188 }, { "acc": 0.73028541, "epoch": 3.06, "learning_rate": 8.314567751651654e-05, "loss": 0.84059143, "memory(GiB)": 71.19, "step": 3900, "train_speed(iter/s)": 0.024187 }, { "acc": 0.73362865, "epoch": 3.06, "learning_rate": 8.309710074364138e-05, "loss": 0.82103605, "memory(GiB)": 71.19, "step": 3905, "train_speed(iter/s)": 0.024187 }, { "acc": 0.74105625, "epoch": 3.06, "learning_rate": 8.304846830375292e-05, "loss": 0.7890769, "memory(GiB)": 71.19, "step": 3910, "train_speed(iter/s)": 0.024187 }, { "acc": 0.73808637, "epoch": 3.07, "learning_rate": 8.299978027864752e-05, "loss": 0.81043692, "memory(GiB)": 71.19, "step": 3915, "train_speed(iter/s)": 0.024188 }, { "acc": 0.71901126, "epoch": 3.07, "learning_rate": 8.295103675021505e-05, "loss": 0.85230951, "memory(GiB)": 71.19, "step": 3920, "train_speed(iter/s)": 0.024188 }, { "acc": 0.7334156, "epoch": 3.08, "learning_rate": 8.290223780043874e-05, "loss": 0.8069334, "memory(GiB)": 71.19, "step": 3925, "train_speed(iter/s)": 0.024188 }, { "acc": 0.72944226, "epoch": 3.08, "learning_rate": 8.285338351139496e-05, "loss": 0.84712934, "memory(GiB)": 71.19, "step": 3930, "train_speed(iter/s)": 0.024188 }, { "acc": 0.73505478, "epoch": 3.08, "learning_rate": 8.280447396525328e-05, "loss": 0.79626627, "memory(GiB)": 71.19, "step": 3935, "train_speed(iter/s)": 0.024189 }, { "acc": 0.72741809, "epoch": 3.09, "learning_rate": 8.275550924427609e-05, "loss": 0.82367277, "memory(GiB)": 71.19, "step": 3940, "train_speed(iter/s)": 0.024188 }, { "acc": 0.72524977, "epoch": 3.09, "learning_rate": 8.270648943081867e-05, "loss": 0.84687901, "memory(GiB)": 71.19, "step": 3945, "train_speed(iter/s)": 0.024189 }, { "acc": 0.73152761, "epoch": 3.1, "learning_rate": 8.26574146073289e-05, "loss": 0.84828215, "memory(GiB)": 71.19, "step": 3950, "train_speed(iter/s)": 0.024189 }, { "acc": 0.72471776, "epoch": 3.1, "learning_rate": 8.260828485634722e-05, "loss": 0.86218882, "memory(GiB)": 71.19, "step": 3955, "train_speed(iter/s)": 0.024189 }, { "acc": 0.7207871, "epoch": 3.1, "learning_rate": 8.255910026050643e-05, "loss": 0.84007759, "memory(GiB)": 71.19, "step": 3960, "train_speed(iter/s)": 0.024189 }, { "acc": 0.73610811, "epoch": 3.11, "learning_rate": 8.250986090253163e-05, "loss": 0.82142134, "memory(GiB)": 71.19, "step": 3965, "train_speed(iter/s)": 0.02419 }, { "acc": 0.73013797, "epoch": 3.11, "learning_rate": 8.246056686523994e-05, "loss": 0.83745384, "memory(GiB)": 71.19, "step": 3970, "train_speed(iter/s)": 0.02419 }, { "acc": 0.73723321, "epoch": 3.12, "learning_rate": 8.241121823154047e-05, "loss": 0.81523333, "memory(GiB)": 71.19, "step": 3975, "train_speed(iter/s)": 0.02419 }, { "acc": 0.7233716, "epoch": 3.12, "learning_rate": 8.236181508443424e-05, "loss": 0.83602371, "memory(GiB)": 71.19, "step": 3980, "train_speed(iter/s)": 0.024189 }, { "acc": 0.72590818, "epoch": 3.12, "learning_rate": 8.231235750701385e-05, "loss": 0.8545907, "memory(GiB)": 71.19, "step": 3985, "train_speed(iter/s)": 0.024189 }, { "acc": 0.7300528, "epoch": 3.13, "learning_rate": 8.226284558246351e-05, "loss": 0.82412634, "memory(GiB)": 71.19, "step": 3990, "train_speed(iter/s)": 0.024189 }, { "acc": 0.72257438, "epoch": 3.13, "learning_rate": 8.221327939405881e-05, "loss": 0.85074911, "memory(GiB)": 71.19, "step": 3995, "train_speed(iter/s)": 0.02419 }, { "acc": 0.73442616, "epoch": 3.14, "learning_rate": 8.216365902516664e-05, "loss": 0.80528517, "memory(GiB)": 71.19, "step": 4000, "train_speed(iter/s)": 0.024189 }, { "acc": 0.72549343, "epoch": 3.14, "learning_rate": 8.211398455924497e-05, "loss": 0.86022482, "memory(GiB)": 71.19, "step": 4005, "train_speed(iter/s)": 0.02419 }, { "acc": 0.72123246, "epoch": 3.14, "learning_rate": 8.206425607984282e-05, "loss": 0.86917925, "memory(GiB)": 71.19, "step": 4010, "train_speed(iter/s)": 0.02419 }, { "acc": 0.73181915, "epoch": 3.15, "learning_rate": 8.201447367059998e-05, "loss": 0.82419157, "memory(GiB)": 71.19, "step": 4015, "train_speed(iter/s)": 0.02419 }, { "acc": 0.72473426, "epoch": 3.15, "learning_rate": 8.196463741524701e-05, "loss": 0.85585718, "memory(GiB)": 71.19, "step": 4020, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72140322, "epoch": 3.15, "learning_rate": 8.1914747397605e-05, "loss": 0.85199308, "memory(GiB)": 71.19, "step": 4025, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73175788, "epoch": 3.16, "learning_rate": 8.186480370158551e-05, "loss": 0.85048962, "memory(GiB)": 71.19, "step": 4030, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72417698, "epoch": 3.16, "learning_rate": 8.181480641119032e-05, "loss": 0.84959297, "memory(GiB)": 71.19, "step": 4035, "train_speed(iter/s)": 0.02419 }, { "acc": 0.72861757, "epoch": 3.17, "learning_rate": 8.176475561051137e-05, "loss": 0.83446856, "memory(GiB)": 71.19, "step": 4040, "train_speed(iter/s)": 0.02419 }, { "acc": 0.73830481, "epoch": 3.17, "learning_rate": 8.171465138373067e-05, "loss": 0.79196048, "memory(GiB)": 71.19, "step": 4045, "train_speed(iter/s)": 0.02419 }, { "acc": 0.73525729, "epoch": 3.17, "learning_rate": 8.166449381511998e-05, "loss": 0.80135098, "memory(GiB)": 71.19, "step": 4050, "train_speed(iter/s)": 0.02419 }, { "acc": 0.72653871, "epoch": 3.18, "learning_rate": 8.161428298904085e-05, "loss": 0.84726629, "memory(GiB)": 71.19, "step": 4055, "train_speed(iter/s)": 0.02419 }, { "acc": 0.73992734, "epoch": 3.18, "learning_rate": 8.156401898994436e-05, "loss": 0.80075979, "memory(GiB)": 71.19, "step": 4060, "train_speed(iter/s)": 0.02419 }, { "acc": 0.7188323, "epoch": 3.19, "learning_rate": 8.151370190237108e-05, "loss": 0.88397989, "memory(GiB)": 71.19, "step": 4065, "train_speed(iter/s)": 0.02419 }, { "acc": 0.73720374, "epoch": 3.19, "learning_rate": 8.146333181095084e-05, "loss": 0.82127218, "memory(GiB)": 71.19, "step": 4070, "train_speed(iter/s)": 0.02419 }, { "acc": 0.72629747, "epoch": 3.19, "learning_rate": 8.14129088004026e-05, "loss": 0.86305199, "memory(GiB)": 71.19, "step": 4075, "train_speed(iter/s)": 0.024189 }, { "acc": 0.73932276, "epoch": 3.2, "learning_rate": 8.136243295553434e-05, "loss": 0.82476034, "memory(GiB)": 71.19, "step": 4080, "train_speed(iter/s)": 0.02419 }, { "acc": 0.72464457, "epoch": 3.2, "learning_rate": 8.131190436124294e-05, "loss": 0.86362638, "memory(GiB)": 71.19, "step": 4085, "train_speed(iter/s)": 0.02419 }, { "acc": 0.72220616, "epoch": 3.21, "learning_rate": 8.126132310251393e-05, "loss": 0.83337736, "memory(GiB)": 71.19, "step": 4090, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73724484, "epoch": 3.21, "learning_rate": 8.121068926442148e-05, "loss": 0.82069883, "memory(GiB)": 71.19, "step": 4095, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7229681, "epoch": 3.21, "learning_rate": 8.116000293212815e-05, "loss": 0.85246668, "memory(GiB)": 71.19, "step": 4100, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73643503, "epoch": 3.22, "learning_rate": 8.110926419088485e-05, "loss": 0.83178596, "memory(GiB)": 71.19, "step": 4105, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73539133, "epoch": 3.22, "learning_rate": 8.105847312603056e-05, "loss": 0.82203569, "memory(GiB)": 71.19, "step": 4110, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73065267, "epoch": 3.23, "learning_rate": 8.100762982299232e-05, "loss": 0.79565067, "memory(GiB)": 71.19, "step": 4115, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72940273, "epoch": 3.23, "learning_rate": 8.095673436728504e-05, "loss": 0.83768406, "memory(GiB)": 71.19, "step": 4120, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72964163, "epoch": 3.23, "learning_rate": 8.090578684451131e-05, "loss": 0.84069805, "memory(GiB)": 71.19, "step": 4125, "train_speed(iter/s)": 0.024192 }, { "acc": 0.73201489, "epoch": 3.24, "learning_rate": 8.085478734036129e-05, "loss": 0.82388248, "memory(GiB)": 71.19, "step": 4130, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72238297, "epoch": 3.24, "learning_rate": 8.080373594061261e-05, "loss": 0.85109663, "memory(GiB)": 71.19, "step": 4135, "train_speed(iter/s)": 0.024191 }, { "acc": 0.74465365, "epoch": 3.24, "learning_rate": 8.075263273113013e-05, "loss": 0.78524365, "memory(GiB)": 71.19, "step": 4140, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71745272, "epoch": 3.25, "learning_rate": 8.070147779786593e-05, "loss": 0.87544708, "memory(GiB)": 71.19, "step": 4145, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71826987, "epoch": 3.25, "learning_rate": 8.0650271226859e-05, "loss": 0.8916872, "memory(GiB)": 71.19, "step": 4150, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72508607, "epoch": 3.26, "learning_rate": 8.05990131042352e-05, "loss": 0.83632765, "memory(GiB)": 71.19, "step": 4155, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72774878, "epoch": 3.26, "learning_rate": 8.054770351620718e-05, "loss": 0.84630947, "memory(GiB)": 71.19, "step": 4160, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71050835, "epoch": 3.26, "learning_rate": 8.049634254907404e-05, "loss": 0.88207722, "memory(GiB)": 71.19, "step": 4165, "train_speed(iter/s)": 0.024192 }, { "acc": 0.73926072, "epoch": 3.27, "learning_rate": 8.044493028922133e-05, "loss": 0.80433979, "memory(GiB)": 71.19, "step": 4170, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7169281, "epoch": 3.27, "learning_rate": 8.03934668231209e-05, "loss": 0.85596704, "memory(GiB)": 71.19, "step": 4175, "train_speed(iter/s)": 0.024192 }, { "acc": 0.7407104, "epoch": 3.28, "learning_rate": 8.034195223733074e-05, "loss": 0.80006132, "memory(GiB)": 71.19, "step": 4180, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72462053, "epoch": 3.28, "learning_rate": 8.029038661849472e-05, "loss": 0.85594482, "memory(GiB)": 71.19, "step": 4185, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72585344, "epoch": 3.28, "learning_rate": 8.023877005334268e-05, "loss": 0.84595318, "memory(GiB)": 71.19, "step": 4190, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7315063, "epoch": 3.29, "learning_rate": 8.018710262869005e-05, "loss": 0.82566252, "memory(GiB)": 71.19, "step": 4195, "train_speed(iter/s)": 0.024192 }, { "acc": 0.7300137, "epoch": 3.29, "learning_rate": 8.013538443143782e-05, "loss": 0.8305336, "memory(GiB)": 71.19, "step": 4200, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72604132, "epoch": 3.3, "learning_rate": 8.008361554857237e-05, "loss": 0.85777216, "memory(GiB)": 71.19, "step": 4205, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71913915, "epoch": 3.3, "learning_rate": 8.003179606716543e-05, "loss": 0.85751801, "memory(GiB)": 71.19, "step": 4210, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71904831, "epoch": 3.3, "learning_rate": 7.997992607437365e-05, "loss": 0.86428804, "memory(GiB)": 71.19, "step": 4215, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71747947, "epoch": 3.31, "learning_rate": 7.992800565743882e-05, "loss": 0.86572142, "memory(GiB)": 71.19, "step": 4220, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72012987, "epoch": 3.31, "learning_rate": 7.987603490368741e-05, "loss": 0.85723791, "memory(GiB)": 71.19, "step": 4225, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73201785, "epoch": 3.32, "learning_rate": 7.98240139005306e-05, "loss": 0.8323741, "memory(GiB)": 71.19, "step": 4230, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72473407, "epoch": 3.32, "learning_rate": 7.977194273546411e-05, "loss": 0.85458097, "memory(GiB)": 71.19, "step": 4235, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73437066, "epoch": 3.32, "learning_rate": 7.971982149606799e-05, "loss": 0.80146246, "memory(GiB)": 71.19, "step": 4240, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71682711, "epoch": 3.33, "learning_rate": 7.966765027000654e-05, "loss": 0.86212845, "memory(GiB)": 71.19, "step": 4245, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72666965, "epoch": 3.33, "learning_rate": 7.961542914502808e-05, "loss": 0.84145832, "memory(GiB)": 71.19, "step": 4250, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72355342, "epoch": 3.33, "learning_rate": 7.956315820896496e-05, "loss": 0.85306997, "memory(GiB)": 71.19, "step": 4255, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73941717, "epoch": 3.34, "learning_rate": 7.951083754973321e-05, "loss": 0.82038488, "memory(GiB)": 71.19, "step": 4260, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71963987, "epoch": 3.34, "learning_rate": 7.945846725533251e-05, "loss": 0.8572114, "memory(GiB)": 71.19, "step": 4265, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73555474, "epoch": 3.35, "learning_rate": 7.940604741384607e-05, "loss": 0.82101889, "memory(GiB)": 71.19, "step": 4270, "train_speed(iter/s)": 0.024191 }, { "acc": 0.74150233, "epoch": 3.35, "learning_rate": 7.935357811344042e-05, "loss": 0.80582638, "memory(GiB)": 71.19, "step": 4275, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72068758, "epoch": 3.35, "learning_rate": 7.93010594423652e-05, "loss": 0.8544672, "memory(GiB)": 71.19, "step": 4280, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73440638, "epoch": 3.36, "learning_rate": 7.924849148895321e-05, "loss": 0.80622263, "memory(GiB)": 71.19, "step": 4285, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72935481, "epoch": 3.36, "learning_rate": 7.919587434162004e-05, "loss": 0.82818518, "memory(GiB)": 71.19, "step": 4290, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72351742, "epoch": 3.37, "learning_rate": 7.914320808886409e-05, "loss": 0.85957403, "memory(GiB)": 71.19, "step": 4295, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7248076, "epoch": 3.37, "learning_rate": 7.909049281926629e-05, "loss": 0.8453124, "memory(GiB)": 71.19, "step": 4300, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72555737, "epoch": 3.37, "learning_rate": 7.903772862149004e-05, "loss": 0.8440258, "memory(GiB)": 71.19, "step": 4305, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72598963, "epoch": 3.38, "learning_rate": 7.898491558428108e-05, "loss": 0.83704453, "memory(GiB)": 71.19, "step": 4310, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71948795, "epoch": 3.38, "learning_rate": 7.893205379646724e-05, "loss": 0.86024466, "memory(GiB)": 71.19, "step": 4315, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73115282, "epoch": 3.39, "learning_rate": 7.887914334695831e-05, "loss": 0.83252287, "memory(GiB)": 71.19, "step": 4320, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72599154, "epoch": 3.39, "learning_rate": 7.882618432474604e-05, "loss": 0.83888359, "memory(GiB)": 71.19, "step": 4325, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73242888, "epoch": 3.39, "learning_rate": 7.877317681890376e-05, "loss": 0.81837931, "memory(GiB)": 71.19, "step": 4330, "train_speed(iter/s)": 0.024191 }, { "acc": 0.742377, "epoch": 3.4, "learning_rate": 7.87201209185864e-05, "loss": 0.80096016, "memory(GiB)": 71.19, "step": 4335, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7232306, "epoch": 3.4, "learning_rate": 7.866701671303032e-05, "loss": 0.86032343, "memory(GiB)": 71.19, "step": 4340, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72954855, "epoch": 3.41, "learning_rate": 7.861386429155304e-05, "loss": 0.82318716, "memory(GiB)": 71.19, "step": 4345, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71807418, "epoch": 3.41, "learning_rate": 7.856066374355326e-05, "loss": 0.88005505, "memory(GiB)": 71.19, "step": 4350, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73638568, "epoch": 3.41, "learning_rate": 7.850741515851057e-05, "loss": 0.81349354, "memory(GiB)": 71.19, "step": 4355, "train_speed(iter/s)": 0.024192 }, { "acc": 0.73490205, "epoch": 3.42, "learning_rate": 7.845411862598537e-05, "loss": 0.83284054, "memory(GiB)": 71.19, "step": 4360, "train_speed(iter/s)": 0.024192 }, { "acc": 0.73105078, "epoch": 3.42, "learning_rate": 7.840077423561871e-05, "loss": 0.84245071, "memory(GiB)": 71.19, "step": 4365, "train_speed(iter/s)": 0.024192 }, { "acc": 0.7196713, "epoch": 3.43, "learning_rate": 7.834738207713213e-05, "loss": 0.86376743, "memory(GiB)": 71.19, "step": 4370, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72638869, "epoch": 3.43, "learning_rate": 7.829394224032753e-05, "loss": 0.82913513, "memory(GiB)": 71.19, "step": 4375, "train_speed(iter/s)": 0.024192 }, { "acc": 0.73445344, "epoch": 3.43, "learning_rate": 7.824045481508696e-05, "loss": 0.8318285, "memory(GiB)": 71.19, "step": 4380, "train_speed(iter/s)": 0.024192 }, { "acc": 0.7262754, "epoch": 3.44, "learning_rate": 7.818691989137255e-05, "loss": 0.84009447, "memory(GiB)": 71.19, "step": 4385, "train_speed(iter/s)": 0.024192 }, { "acc": 0.7386354, "epoch": 3.44, "learning_rate": 7.813333755922632e-05, "loss": 0.81151896, "memory(GiB)": 71.19, "step": 4390, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72992892, "epoch": 3.44, "learning_rate": 7.807970790876997e-05, "loss": 0.81546345, "memory(GiB)": 71.19, "step": 4395, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73798876, "epoch": 3.45, "learning_rate": 7.802603103020487e-05, "loss": 0.80516491, "memory(GiB)": 71.19, "step": 4400, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72618403, "epoch": 3.45, "learning_rate": 7.797230701381177e-05, "loss": 0.83551912, "memory(GiB)": 71.19, "step": 4405, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71417842, "epoch": 3.46, "learning_rate": 7.791853594995072e-05, "loss": 0.84696274, "memory(GiB)": 71.19, "step": 4410, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7323916, "epoch": 3.46, "learning_rate": 7.78647179290609e-05, "loss": 0.82821932, "memory(GiB)": 71.19, "step": 4415, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72546659, "epoch": 3.46, "learning_rate": 7.781085304166042e-05, "loss": 0.84084616, "memory(GiB)": 71.19, "step": 4420, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73983116, "epoch": 3.47, "learning_rate": 7.775694137834632e-05, "loss": 0.80149288, "memory(GiB)": 71.19, "step": 4425, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72577829, "epoch": 3.47, "learning_rate": 7.770298302979421e-05, "loss": 0.85075827, "memory(GiB)": 71.19, "step": 4430, "train_speed(iter/s)": 0.024192 }, { "acc": 0.731323, "epoch": 3.48, "learning_rate": 7.764897808675831e-05, "loss": 0.83516245, "memory(GiB)": 71.19, "step": 4435, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72064219, "epoch": 3.48, "learning_rate": 7.759492664007114e-05, "loss": 0.87227879, "memory(GiB)": 71.19, "step": 4440, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73189783, "epoch": 3.48, "learning_rate": 7.754082878064346e-05, "loss": 0.82408819, "memory(GiB)": 71.19, "step": 4445, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7397872, "epoch": 3.49, "learning_rate": 7.748668459946408e-05, "loss": 0.8035758, "memory(GiB)": 71.19, "step": 4450, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73634014, "epoch": 3.49, "learning_rate": 7.743249418759976e-05, "loss": 0.8225769, "memory(GiB)": 71.19, "step": 4455, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72766705, "epoch": 3.5, "learning_rate": 7.7378257636195e-05, "loss": 0.83043652, "memory(GiB)": 71.19, "step": 4460, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72869205, "epoch": 3.5, "learning_rate": 7.732397503647184e-05, "loss": 0.83498526, "memory(GiB)": 71.19, "step": 4465, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71786594, "epoch": 3.5, "learning_rate": 7.726964647972988e-05, "loss": 0.86003532, "memory(GiB)": 71.19, "step": 4470, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73369732, "epoch": 3.51, "learning_rate": 7.721527205734593e-05, "loss": 0.82165928, "memory(GiB)": 71.19, "step": 4475, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71339808, "epoch": 3.51, "learning_rate": 7.716085186077398e-05, "loss": 0.86701927, "memory(GiB)": 71.19, "step": 4480, "train_speed(iter/s)": 0.024191 }, { "acc": 0.72527127, "epoch": 3.52, "learning_rate": 7.7106385981545e-05, "loss": 0.8485733, "memory(GiB)": 71.19, "step": 4485, "train_speed(iter/s)": 0.024191 }, { "acc": 0.71741381, "epoch": 3.52, "learning_rate": 7.70518745112668e-05, "loss": 0.87526913, "memory(GiB)": 71.19, "step": 4490, "train_speed(iter/s)": 0.024191 }, { "acc": 0.73170438, "epoch": 3.52, "learning_rate": 7.699731754162388e-05, "loss": 0.84059429, "memory(GiB)": 71.19, "step": 4495, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72498207, "epoch": 3.53, "learning_rate": 7.694271516437723e-05, "loss": 0.84459229, "memory(GiB)": 71.19, "step": 4500, "train_speed(iter/s)": 0.024192 }, { "acc": 0.7239903, "epoch": 3.53, "learning_rate": 7.688806747136426e-05, "loss": 0.84521217, "memory(GiB)": 71.19, "step": 4505, "train_speed(iter/s)": 0.024192 }, { "acc": 0.73728113, "epoch": 3.53, "learning_rate": 7.683337455449856e-05, "loss": 0.82545519, "memory(GiB)": 71.19, "step": 4510, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72692027, "epoch": 3.54, "learning_rate": 7.677863650576979e-05, "loss": 0.84334087, "memory(GiB)": 71.19, "step": 4515, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72908607, "epoch": 3.54, "learning_rate": 7.672385341724355e-05, "loss": 0.82269535, "memory(GiB)": 71.19, "step": 4520, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72978554, "epoch": 3.55, "learning_rate": 7.666902538106118e-05, "loss": 0.84637251, "memory(GiB)": 71.19, "step": 4525, "train_speed(iter/s)": 0.024192 }, { "acc": 0.71525011, "epoch": 3.55, "learning_rate": 7.661415248943958e-05, "loss": 0.87230482, "memory(GiB)": 71.19, "step": 4530, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72793851, "epoch": 3.55, "learning_rate": 7.655923483467114e-05, "loss": 0.81432896, "memory(GiB)": 71.19, "step": 4535, "train_speed(iter/s)": 0.024192 }, { "acc": 0.73044486, "epoch": 3.56, "learning_rate": 7.650427250912351e-05, "loss": 0.84025393, "memory(GiB)": 71.19, "step": 4540, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72327971, "epoch": 3.56, "learning_rate": 7.644926560523952e-05, "loss": 0.84944382, "memory(GiB)": 71.19, "step": 4545, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72713952, "epoch": 3.57, "learning_rate": 7.639421421553687e-05, "loss": 0.83744144, "memory(GiB)": 71.19, "step": 4550, "train_speed(iter/s)": 0.024192 }, { "acc": 0.7387239, "epoch": 3.57, "learning_rate": 7.633911843260825e-05, "loss": 0.82736578, "memory(GiB)": 71.19, "step": 4555, "train_speed(iter/s)": 0.024192 }, { "acc": 0.72111745, "epoch": 3.57, "learning_rate": 7.628397834912085e-05, "loss": 0.8412631, "memory(GiB)": 71.19, "step": 4560, "train_speed(iter/s)": 0.024192 }, { "acc": 0.71875992, "epoch": 3.58, "learning_rate": 7.622879405781645e-05, "loss": 0.85906324, "memory(GiB)": 71.19, "step": 4565, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72346978, "epoch": 3.58, "learning_rate": 7.617356565151122e-05, "loss": 0.84273167, "memory(GiB)": 71.19, "step": 4570, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72537384, "epoch": 3.59, "learning_rate": 7.611829322309544e-05, "loss": 0.86372871, "memory(GiB)": 71.19, "step": 4575, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72613001, "epoch": 3.59, "learning_rate": 7.60629768655335e-05, "loss": 0.83706751, "memory(GiB)": 71.19, "step": 4580, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72427754, "epoch": 3.59, "learning_rate": 7.600761667186362e-05, "loss": 0.83796959, "memory(GiB)": 71.19, "step": 4585, "train_speed(iter/s)": 0.024193 }, { "acc": 0.73405333, "epoch": 3.6, "learning_rate": 7.595221273519783e-05, "loss": 0.84556036, "memory(GiB)": 71.19, "step": 4590, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72501197, "epoch": 3.6, "learning_rate": 7.589676514872165e-05, "loss": 0.83936701, "memory(GiB)": 71.19, "step": 4595, "train_speed(iter/s)": 0.024193 }, { "acc": 0.71823664, "epoch": 3.61, "learning_rate": 7.584127400569408e-05, "loss": 0.85496521, "memory(GiB)": 71.19, "step": 4600, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72726426, "epoch": 3.61, "learning_rate": 7.57857393994473e-05, "loss": 0.84609032, "memory(GiB)": 71.19, "step": 4605, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72315588, "epoch": 3.61, "learning_rate": 7.573016142338668e-05, "loss": 0.85114698, "memory(GiB)": 71.19, "step": 4610, "train_speed(iter/s)": 0.024193 }, { "acc": 0.71871891, "epoch": 3.62, "learning_rate": 7.56745401709905e-05, "loss": 0.8591588, "memory(GiB)": 71.19, "step": 4615, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73322353, "epoch": 3.62, "learning_rate": 7.56188757358098e-05, "loss": 0.83172417, "memory(GiB)": 71.19, "step": 4620, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72749004, "epoch": 3.62, "learning_rate": 7.55631682114683e-05, "loss": 0.84570827, "memory(GiB)": 71.19, "step": 4625, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73905587, "epoch": 3.63, "learning_rate": 7.550741769166214e-05, "loss": 0.80478392, "memory(GiB)": 71.19, "step": 4630, "train_speed(iter/s)": 0.024193 }, { "acc": 0.73352895, "epoch": 3.63, "learning_rate": 7.545162427015981e-05, "loss": 0.81894255, "memory(GiB)": 71.19, "step": 4635, "train_speed(iter/s)": 0.024193 }, { "acc": 0.73099337, "epoch": 3.64, "learning_rate": 7.539578804080198e-05, "loss": 0.84308519, "memory(GiB)": 71.19, "step": 4640, "train_speed(iter/s)": 0.024193 }, { "acc": 0.71498265, "epoch": 3.64, "learning_rate": 7.533990909750125e-05, "loss": 0.87364149, "memory(GiB)": 71.19, "step": 4645, "train_speed(iter/s)": 0.024193 }, { "acc": 0.71505318, "epoch": 3.64, "learning_rate": 7.528398753424213e-05, "loss": 0.86194353, "memory(GiB)": 71.19, "step": 4650, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7217392, "epoch": 3.65, "learning_rate": 7.522802344508078e-05, "loss": 0.8368371, "memory(GiB)": 71.19, "step": 4655, "train_speed(iter/s)": 0.024193 }, { "acc": 0.74326339, "epoch": 3.65, "learning_rate": 7.517201692414488e-05, "loss": 0.79576321, "memory(GiB)": 71.19, "step": 4660, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72128105, "epoch": 3.66, "learning_rate": 7.51159680656335e-05, "loss": 0.84689484, "memory(GiB)": 71.19, "step": 4665, "train_speed(iter/s)": 0.024193 }, { "acc": 0.73218131, "epoch": 3.66, "learning_rate": 7.505987696381691e-05, "loss": 0.80757494, "memory(GiB)": 71.19, "step": 4670, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72402844, "epoch": 3.66, "learning_rate": 7.500374371303643e-05, "loss": 0.83772345, "memory(GiB)": 71.19, "step": 4675, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73203745, "epoch": 3.67, "learning_rate": 7.494756840770425e-05, "loss": 0.82900801, "memory(GiB)": 71.19, "step": 4680, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73576927, "epoch": 3.67, "learning_rate": 7.489135114230333e-05, "loss": 0.82100172, "memory(GiB)": 71.19, "step": 4685, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72248497, "epoch": 3.68, "learning_rate": 7.483509201138717e-05, "loss": 0.83817987, "memory(GiB)": 71.19, "step": 4690, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72471027, "epoch": 3.68, "learning_rate": 7.477879110957972e-05, "loss": 0.83272848, "memory(GiB)": 71.19, "step": 4695, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72231956, "epoch": 3.68, "learning_rate": 7.472244853157517e-05, "loss": 0.85366192, "memory(GiB)": 71.19, "step": 4700, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72966657, "epoch": 3.69, "learning_rate": 7.46660643721378e-05, "loss": 0.84410038, "memory(GiB)": 71.19, "step": 4705, "train_speed(iter/s)": 0.024193 }, { "acc": 0.71471176, "epoch": 3.69, "learning_rate": 7.460963872610181e-05, "loss": 0.86216116, "memory(GiB)": 71.19, "step": 4710, "train_speed(iter/s)": 0.024193 }, { "acc": 0.73702974, "epoch": 3.7, "learning_rate": 7.455317168837122e-05, "loss": 0.83383703, "memory(GiB)": 71.19, "step": 4715, "train_speed(iter/s)": 0.024194 }, { "acc": 0.70991087, "epoch": 3.7, "learning_rate": 7.449666335391963e-05, "loss": 0.8940443, "memory(GiB)": 71.19, "step": 4720, "train_speed(iter/s)": 0.024194 }, { "acc": 0.71537218, "epoch": 3.7, "learning_rate": 7.444011381779013e-05, "loss": 0.86577148, "memory(GiB)": 71.19, "step": 4725, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72070432, "epoch": 3.71, "learning_rate": 7.438352317509508e-05, "loss": 0.84208603, "memory(GiB)": 71.19, "step": 4730, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73044333, "epoch": 3.71, "learning_rate": 7.4326891521016e-05, "loss": 0.82190485, "memory(GiB)": 71.19, "step": 4735, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72588086, "epoch": 3.72, "learning_rate": 7.427021895080339e-05, "loss": 0.85073795, "memory(GiB)": 71.19, "step": 4740, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7267787, "epoch": 3.72, "learning_rate": 7.421350555977653e-05, "loss": 0.83103905, "memory(GiB)": 71.19, "step": 4745, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72804618, "epoch": 3.72, "learning_rate": 7.415675144332339e-05, "loss": 0.83636494, "memory(GiB)": 71.19, "step": 4750, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72480588, "epoch": 3.73, "learning_rate": 7.409995669690046e-05, "loss": 0.84966698, "memory(GiB)": 71.19, "step": 4755, "train_speed(iter/s)": 0.024193 }, { "acc": 0.72015243, "epoch": 3.73, "learning_rate": 7.404312141603251e-05, "loss": 0.86545382, "memory(GiB)": 71.19, "step": 4760, "train_speed(iter/s)": 0.024193 }, { "acc": 0.7205214, "epoch": 3.73, "learning_rate": 7.398624569631254e-05, "loss": 0.86042595, "memory(GiB)": 71.19, "step": 4765, "train_speed(iter/s)": 0.024193 }, { "acc": 0.734021, "epoch": 3.74, "learning_rate": 7.392932963340151e-05, "loss": 0.82725487, "memory(GiB)": 71.19, "step": 4770, "train_speed(iter/s)": 0.024194 }, { "acc": 0.71987934, "epoch": 3.74, "learning_rate": 7.38723733230283e-05, "loss": 0.85444155, "memory(GiB)": 71.19, "step": 4775, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7245378, "epoch": 3.75, "learning_rate": 7.381537686098942e-05, "loss": 0.85145273, "memory(GiB)": 71.19, "step": 4780, "train_speed(iter/s)": 0.024194 }, { "acc": 0.71786847, "epoch": 3.75, "learning_rate": 7.375834034314895e-05, "loss": 0.88409557, "memory(GiB)": 71.19, "step": 4785, "train_speed(iter/s)": 0.024194 }, { "acc": 0.71265817, "epoch": 3.75, "learning_rate": 7.370126386543833e-05, "loss": 0.89585934, "memory(GiB)": 71.19, "step": 4790, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7173708, "epoch": 3.76, "learning_rate": 7.364414752385622e-05, "loss": 0.88221331, "memory(GiB)": 71.19, "step": 4795, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73161063, "epoch": 3.76, "learning_rate": 7.358699141446833e-05, "loss": 0.8258852, "memory(GiB)": 71.19, "step": 4800, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72865162, "epoch": 3.77, "learning_rate": 7.35297956334072e-05, "loss": 0.84000244, "memory(GiB)": 71.19, "step": 4805, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72472777, "epoch": 3.77, "learning_rate": 7.34725602768722e-05, "loss": 0.84447889, "memory(GiB)": 71.19, "step": 4810, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7250689, "epoch": 3.77, "learning_rate": 7.341528544112915e-05, "loss": 0.866607, "memory(GiB)": 71.19, "step": 4815, "train_speed(iter/s)": 0.024195 }, { "acc": 0.72776718, "epoch": 3.78, "learning_rate": 7.335797122251038e-05, "loss": 0.84377356, "memory(GiB)": 71.19, "step": 4820, "train_speed(iter/s)": 0.024195 }, { "acc": 0.72724218, "epoch": 3.78, "learning_rate": 7.330061771741436e-05, "loss": 0.85711832, "memory(GiB)": 71.19, "step": 4825, "train_speed(iter/s)": 0.024195 }, { "acc": 0.72393856, "epoch": 3.79, "learning_rate": 7.324322502230571e-05, "loss": 0.8321475, "memory(GiB)": 71.19, "step": 4830, "train_speed(iter/s)": 0.024195 }, { "acc": 0.73015528, "epoch": 3.79, "learning_rate": 7.318579323371493e-05, "loss": 0.84410172, "memory(GiB)": 71.19, "step": 4835, "train_speed(iter/s)": 0.024195 }, { "acc": 0.73202195, "epoch": 3.79, "learning_rate": 7.312832244823827e-05, "loss": 0.81751471, "memory(GiB)": 71.19, "step": 4840, "train_speed(iter/s)": 0.024195 }, { "acc": 0.71658878, "epoch": 3.8, "learning_rate": 7.307081276253761e-05, "loss": 0.87323961, "memory(GiB)": 71.19, "step": 4845, "train_speed(iter/s)": 0.024195 }, { "acc": 0.73308387, "epoch": 3.8, "learning_rate": 7.301326427334019e-05, "loss": 0.82617254, "memory(GiB)": 71.19, "step": 4850, "train_speed(iter/s)": 0.024194 }, { "acc": 0.725809, "epoch": 3.81, "learning_rate": 7.295567707743856e-05, "loss": 0.8599144, "memory(GiB)": 71.19, "step": 4855, "train_speed(iter/s)": 0.024194 }, { "acc": 0.71556239, "epoch": 3.81, "learning_rate": 7.289805127169038e-05, "loss": 0.8742794, "memory(GiB)": 71.19, "step": 4860, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73000875, "epoch": 3.81, "learning_rate": 7.284038695301823e-05, "loss": 0.83999195, "memory(GiB)": 71.19, "step": 4865, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72756143, "epoch": 3.82, "learning_rate": 7.278268421840944e-05, "loss": 0.84345179, "memory(GiB)": 71.19, "step": 4870, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72185001, "epoch": 3.82, "learning_rate": 7.272494316491602e-05, "loss": 0.84758148, "memory(GiB)": 71.19, "step": 4875, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72773247, "epoch": 3.82, "learning_rate": 7.266716388965437e-05, "loss": 0.83907547, "memory(GiB)": 71.19, "step": 4880, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7324759, "epoch": 3.83, "learning_rate": 7.260934648980521e-05, "loss": 0.81709414, "memory(GiB)": 71.19, "step": 4885, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73284607, "epoch": 3.83, "learning_rate": 7.255149106261339e-05, "loss": 0.81797581, "memory(GiB)": 71.19, "step": 4890, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72334728, "epoch": 3.84, "learning_rate": 7.249359770538764e-05, "loss": 0.83954468, "memory(GiB)": 71.19, "step": 4895, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72269335, "epoch": 3.84, "learning_rate": 7.24356665155006e-05, "loss": 0.84613752, "memory(GiB)": 71.19, "step": 4900, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72381783, "epoch": 3.84, "learning_rate": 7.237769759038846e-05, "loss": 0.83016071, "memory(GiB)": 71.19, "step": 4905, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72248049, "epoch": 3.85, "learning_rate": 7.231969102755093e-05, "loss": 0.85668192, "memory(GiB)": 71.19, "step": 4910, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73048677, "epoch": 3.85, "learning_rate": 7.226164692455098e-05, "loss": 0.84103241, "memory(GiB)": 71.19, "step": 4915, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72759271, "epoch": 3.86, "learning_rate": 7.220356537901474e-05, "loss": 0.82246981, "memory(GiB)": 71.19, "step": 4920, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72148042, "epoch": 3.86, "learning_rate": 7.214544648863131e-05, "loss": 0.85546913, "memory(GiB)": 71.19, "step": 4925, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72802148, "epoch": 3.86, "learning_rate": 7.208729035115264e-05, "loss": 0.84229441, "memory(GiB)": 71.19, "step": 4930, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73225174, "epoch": 3.87, "learning_rate": 7.202909706439326e-05, "loss": 0.82962236, "memory(GiB)": 71.19, "step": 4935, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72838197, "epoch": 3.87, "learning_rate": 7.197086672623023e-05, "loss": 0.84410496, "memory(GiB)": 71.19, "step": 4940, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72434216, "epoch": 3.88, "learning_rate": 7.191259943460292e-05, "loss": 0.85847845, "memory(GiB)": 71.19, "step": 4945, "train_speed(iter/s)": 0.024194 }, { "acc": 0.71965685, "epoch": 3.88, "learning_rate": 7.185429528751285e-05, "loss": 0.85441608, "memory(GiB)": 71.19, "step": 4950, "train_speed(iter/s)": 0.024194 }, { "acc": 0.713869, "epoch": 3.88, "learning_rate": 7.179595438302348e-05, "loss": 0.8926239, "memory(GiB)": 71.19, "step": 4955, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72364521, "epoch": 3.89, "learning_rate": 7.173757681926021e-05, "loss": 0.84683857, "memory(GiB)": 71.19, "step": 4960, "train_speed(iter/s)": 0.024194 }, { "acc": 0.73327866, "epoch": 3.89, "learning_rate": 7.167916269440998e-05, "loss": 0.8079627, "memory(GiB)": 71.19, "step": 4965, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7266232, "epoch": 3.9, "learning_rate": 7.162071210672128e-05, "loss": 0.83985033, "memory(GiB)": 71.19, "step": 4970, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72303486, "epoch": 3.9, "learning_rate": 7.156222515450393e-05, "loss": 0.85938587, "memory(GiB)": 71.19, "step": 4975, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72731833, "epoch": 3.9, "learning_rate": 7.150370193612889e-05, "loss": 0.81108408, "memory(GiB)": 71.19, "step": 4980, "train_speed(iter/s)": 0.024194 }, { "acc": 0.72412114, "epoch": 3.91, "learning_rate": 7.144514255002813e-05, "loss": 0.86580133, "memory(GiB)": 71.19, "step": 4985, "train_speed(iter/s)": 0.024195 }, { "acc": 0.73388848, "epoch": 3.91, "learning_rate": 7.138654709469446e-05, "loss": 0.82623234, "memory(GiB)": 71.19, "step": 4990, "train_speed(iter/s)": 0.024195 }, { "acc": 0.73632493, "epoch": 3.91, "learning_rate": 7.132791566868133e-05, "loss": 0.83154993, "memory(GiB)": 71.19, "step": 4995, "train_speed(iter/s)": 0.024195 }, { "acc": 0.71437707, "epoch": 3.92, "learning_rate": 7.126924837060271e-05, "loss": 0.88094139, "memory(GiB)": 71.19, "step": 5000, "train_speed(iter/s)": 0.024195 }, { "acc": 0.71930361, "epoch": 3.92, "learning_rate": 7.121054529913292e-05, "loss": 0.85584583, "memory(GiB)": 71.19, "step": 5005, "train_speed(iter/s)": 0.024195 }, { "acc": 0.73194895, "epoch": 3.93, "learning_rate": 7.11518065530064e-05, "loss": 0.83459311, "memory(GiB)": 71.19, "step": 5010, "train_speed(iter/s)": 0.024195 }, { "acc": 0.73548174, "epoch": 3.93, "learning_rate": 7.109303223101765e-05, "loss": 0.82189837, "memory(GiB)": 71.19, "step": 5015, "train_speed(iter/s)": 0.024195 }, { "acc": 0.72127552, "epoch": 3.93, "learning_rate": 7.103422243202096e-05, "loss": 0.86382265, "memory(GiB)": 71.19, "step": 5020, "train_speed(iter/s)": 0.024196 }, { "acc": 0.73075724, "epoch": 3.94, "learning_rate": 7.09753772549303e-05, "loss": 0.81052485, "memory(GiB)": 71.19, "step": 5025, "train_speed(iter/s)": 0.024196 }, { "acc": 0.72500548, "epoch": 3.94, "learning_rate": 7.091649679871914e-05, "loss": 0.84569874, "memory(GiB)": 71.19, "step": 5030, "train_speed(iter/s)": 0.024196 }, { "acc": 0.72286496, "epoch": 3.95, "learning_rate": 7.085758116242036e-05, "loss": 0.84847412, "memory(GiB)": 71.19, "step": 5035, "train_speed(iter/s)": 0.024196 }, { "acc": 0.72470217, "epoch": 3.95, "learning_rate": 7.079863044512588e-05, "loss": 0.85448866, "memory(GiB)": 71.19, "step": 5040, "train_speed(iter/s)": 0.024196 }, { "acc": 0.71964459, "epoch": 3.95, "learning_rate": 7.07396447459867e-05, "loss": 0.86957884, "memory(GiB)": 71.19, "step": 5045, "train_speed(iter/s)": 0.024197 }, { "acc": 0.72210011, "epoch": 3.96, "learning_rate": 7.06806241642127e-05, "loss": 0.85360498, "memory(GiB)": 71.19, "step": 5050, "train_speed(iter/s)": 0.024197 }, { "acc": 0.71635909, "epoch": 3.96, "learning_rate": 7.062156879907234e-05, "loss": 0.88525906, "memory(GiB)": 71.19, "step": 5055, "train_speed(iter/s)": 0.024197 }, { "acc": 0.71934958, "epoch": 3.97, "learning_rate": 7.05624787498926e-05, "loss": 0.86156912, "memory(GiB)": 71.19, "step": 5060, "train_speed(iter/s)": 0.024197 }, { "acc": 0.7274035, "epoch": 3.97, "learning_rate": 7.050335411605888e-05, "loss": 0.84397354, "memory(GiB)": 71.19, "step": 5065, "train_speed(iter/s)": 0.024197 }, { "acc": 0.72633142, "epoch": 3.97, "learning_rate": 7.044419499701462e-05, "loss": 0.85779991, "memory(GiB)": 71.19, "step": 5070, "train_speed(iter/s)": 0.024197 }, { "acc": 0.7241158, "epoch": 3.98, "learning_rate": 7.038500149226138e-05, "loss": 0.84771671, "memory(GiB)": 71.19, "step": 5075, "train_speed(iter/s)": 0.024197 }, { "acc": 0.71850133, "epoch": 3.98, "learning_rate": 7.032577370135846e-05, "loss": 0.85709019, "memory(GiB)": 71.19, "step": 5080, "train_speed(iter/s)": 0.024198 }, { "acc": 0.71291666, "epoch": 3.99, "learning_rate": 7.026651172392293e-05, "loss": 0.88760433, "memory(GiB)": 71.19, "step": 5085, "train_speed(iter/s)": 0.024198 }, { "acc": 0.72612734, "epoch": 3.99, "learning_rate": 7.020721565962925e-05, "loss": 0.8476985, "memory(GiB)": 71.19, "step": 5090, "train_speed(iter/s)": 0.024198 }, { "acc": 0.72394257, "epoch": 3.99, "learning_rate": 7.014788560820928e-05, "loss": 0.83070583, "memory(GiB)": 71.19, "step": 5095, "train_speed(iter/s)": 0.024198 }, { "acc": 0.72538056, "epoch": 4.0, "learning_rate": 7.0088521669452e-05, "loss": 0.83622665, "memory(GiB)": 71.19, "step": 5100, "train_speed(iter/s)": 0.024198 }, { "epoch": 4.0, "eval_acc": 0.7649636864512898, "eval_loss": 0.6868141293525696, "eval_runtime": 107.3501, "eval_samples_per_second": 0.866, "eval_steps_per_second": 0.866, "step": 5103 }, { "acc": 0.74643955, "epoch": 4.0, "learning_rate": 7.002912394320344e-05, "loss": 0.78384089, "memory(GiB)": 71.19, "step": 5105, "train_speed(iter/s)": 0.024185 }, { "acc": 0.76987772, "epoch": 4.01, "learning_rate": 6.996969252936645e-05, "loss": 0.70066395, "memory(GiB)": 71.19, "step": 5110, "train_speed(iter/s)": 0.024185 }, { "acc": 0.76714759, "epoch": 4.01, "learning_rate": 6.991022752790045e-05, "loss": 0.69841785, "memory(GiB)": 71.19, "step": 5115, "train_speed(iter/s)": 0.024185 }, { "acc": 0.76072993, "epoch": 4.01, "learning_rate": 6.985072903882149e-05, "loss": 0.7248908, "memory(GiB)": 71.19, "step": 5120, "train_speed(iter/s)": 0.024185 }, { "acc": 0.76630473, "epoch": 4.02, "learning_rate": 6.979119716220184e-05, "loss": 0.71821766, "memory(GiB)": 71.19, "step": 5125, "train_speed(iter/s)": 0.024185 }, { "acc": 0.75970073, "epoch": 4.02, "learning_rate": 6.973163199816998e-05, "loss": 0.71612158, "memory(GiB)": 71.19, "step": 5130, "train_speed(iter/s)": 0.024185 }, { "acc": 0.77745566, "epoch": 4.02, "learning_rate": 6.967203364691035e-05, "loss": 0.65998745, "memory(GiB)": 71.19, "step": 5135, "train_speed(iter/s)": 0.024185 }, { "acc": 0.77100053, "epoch": 4.03, "learning_rate": 6.961240220866321e-05, "loss": 0.68110933, "memory(GiB)": 71.19, "step": 5140, "train_speed(iter/s)": 0.024185 }, { "acc": 0.76853023, "epoch": 4.03, "learning_rate": 6.955273778372448e-05, "loss": 0.69694996, "memory(GiB)": 71.19, "step": 5145, "train_speed(iter/s)": 0.024185 }, { "acc": 0.77470398, "epoch": 4.04, "learning_rate": 6.949304047244557e-05, "loss": 0.67210259, "memory(GiB)": 71.19, "step": 5150, "train_speed(iter/s)": 0.024186 }, { "acc": 0.78012462, "epoch": 4.04, "learning_rate": 6.943331037523318e-05, "loss": 0.68669949, "memory(GiB)": 71.19, "step": 5155, "train_speed(iter/s)": 0.024186 }, { "acc": 0.7726408, "epoch": 4.04, "learning_rate": 6.937354759254915e-05, "loss": 0.6830338, "memory(GiB)": 71.19, "step": 5160, "train_speed(iter/s)": 0.024186 }, { "acc": 0.78194818, "epoch": 4.05, "learning_rate": 6.931375222491035e-05, "loss": 0.66556716, "memory(GiB)": 71.19, "step": 5165, "train_speed(iter/s)": 0.024186 }, { "acc": 0.7694963, "epoch": 4.05, "learning_rate": 6.925392437288837e-05, "loss": 0.70391464, "memory(GiB)": 71.19, "step": 5170, "train_speed(iter/s)": 0.024186 }, { "acc": 0.76015844, "epoch": 4.06, "learning_rate": 6.919406413710951e-05, "loss": 0.70853, "memory(GiB)": 71.19, "step": 5175, "train_speed(iter/s)": 0.024186 }, { "acc": 0.78214989, "epoch": 4.06, "learning_rate": 6.91341716182545e-05, "loss": 0.64797435, "memory(GiB)": 71.19, "step": 5180, "train_speed(iter/s)": 0.024186 }, { "acc": 0.76053729, "epoch": 4.06, "learning_rate": 6.907424691705836e-05, "loss": 0.71844401, "memory(GiB)": 71.19, "step": 5185, "train_speed(iter/s)": 0.024185 }, { "acc": 0.76250482, "epoch": 4.07, "learning_rate": 6.90142901343103e-05, "loss": 0.69147439, "memory(GiB)": 71.19, "step": 5190, "train_speed(iter/s)": 0.024185 }, { "acc": 0.76390204, "epoch": 4.07, "learning_rate": 6.89543013708534e-05, "loss": 0.69992247, "memory(GiB)": 71.19, "step": 5195, "train_speed(iter/s)": 0.024185 }, { "acc": 0.77699156, "epoch": 4.08, "learning_rate": 6.889428072758458e-05, "loss": 0.65943475, "memory(GiB)": 71.19, "step": 5200, "train_speed(iter/s)": 0.024186 }, { "acc": 0.77530951, "epoch": 4.08, "learning_rate": 6.883422830545437e-05, "loss": 0.68648615, "memory(GiB)": 71.19, "step": 5205, "train_speed(iter/s)": 0.024186 }, { "acc": 0.7692956, "epoch": 4.08, "learning_rate": 6.87741442054668e-05, "loss": 0.70791373, "memory(GiB)": 71.19, "step": 5210, "train_speed(iter/s)": 0.024186 }, { "acc": 0.75982862, "epoch": 4.09, "learning_rate": 6.871402852867906e-05, "loss": 0.72244291, "memory(GiB)": 71.19, "step": 5215, "train_speed(iter/s)": 0.024186 }, { "acc": 0.76754866, "epoch": 4.09, "learning_rate": 6.865388137620156e-05, "loss": 0.71881785, "memory(GiB)": 71.19, "step": 5220, "train_speed(iter/s)": 0.024186 }, { "acc": 0.76008248, "epoch": 4.1, "learning_rate": 6.859370284919762e-05, "loss": 0.72455072, "memory(GiB)": 71.19, "step": 5225, "train_speed(iter/s)": 0.024186 }, { "acc": 0.76211185, "epoch": 4.1, "learning_rate": 6.853349304888331e-05, "loss": 0.71970272, "memory(GiB)": 71.19, "step": 5230, "train_speed(iter/s)": 0.024186 }, { "acc": 0.76919765, "epoch": 4.1, "learning_rate": 6.847325207652733e-05, "loss": 0.69645362, "memory(GiB)": 71.19, "step": 5235, "train_speed(iter/s)": 0.024186 }, { "acc": 0.78420725, "epoch": 4.11, "learning_rate": 6.841298003345075e-05, "loss": 0.66271205, "memory(GiB)": 71.19, "step": 5240, "train_speed(iter/s)": 0.024186 }, { "acc": 0.77155137, "epoch": 4.11, "learning_rate": 6.835267702102697e-05, "loss": 0.69231195, "memory(GiB)": 71.19, "step": 5245, "train_speed(iter/s)": 0.024186 }, { "acc": 0.76921053, "epoch": 4.11, "learning_rate": 6.829234314068143e-05, "loss": 0.69602065, "memory(GiB)": 71.19, "step": 5250, "train_speed(iter/s)": 0.024186 }, { "acc": 0.7555491, "epoch": 4.12, "learning_rate": 6.823197849389152e-05, "loss": 0.74989166, "memory(GiB)": 71.19, "step": 5255, "train_speed(iter/s)": 0.024186 }, { "acc": 0.77402582, "epoch": 4.12, "learning_rate": 6.817158318218638e-05, "loss": 0.68813601, "memory(GiB)": 71.19, "step": 5260, "train_speed(iter/s)": 0.024186 }, { "acc": 0.77681746, "epoch": 4.13, "learning_rate": 6.811115730714665e-05, "loss": 0.66130342, "memory(GiB)": 71.19, "step": 5265, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76329417, "epoch": 4.13, "learning_rate": 6.80507009704045e-05, "loss": 0.72811098, "memory(GiB)": 71.19, "step": 5270, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76749864, "epoch": 4.13, "learning_rate": 6.799021427364324e-05, "loss": 0.69163623, "memory(GiB)": 71.19, "step": 5275, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77502966, "epoch": 4.14, "learning_rate": 6.792969731859727e-05, "loss": 0.68422966, "memory(GiB)": 71.19, "step": 5280, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76088901, "epoch": 4.14, "learning_rate": 6.786915020705189e-05, "loss": 0.72609415, "memory(GiB)": 71.19, "step": 5285, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77280045, "epoch": 4.15, "learning_rate": 6.780857304084309e-05, "loss": 0.67699676, "memory(GiB)": 71.19, "step": 5290, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76591234, "epoch": 4.15, "learning_rate": 6.774796592185746e-05, "loss": 0.71467628, "memory(GiB)": 71.19, "step": 5295, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76299758, "epoch": 4.15, "learning_rate": 6.768732895203196e-05, "loss": 0.71544986, "memory(GiB)": 71.19, "step": 5300, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77150006, "epoch": 4.16, "learning_rate": 6.762666223335372e-05, "loss": 0.69964447, "memory(GiB)": 71.19, "step": 5305, "train_speed(iter/s)": 0.024187 }, { "acc": 0.75832853, "epoch": 4.16, "learning_rate": 6.756596586785992e-05, "loss": 0.74018202, "memory(GiB)": 71.19, "step": 5310, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77024693, "epoch": 4.17, "learning_rate": 6.750523995763762e-05, "loss": 0.70159526, "memory(GiB)": 71.19, "step": 5315, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77265477, "epoch": 4.17, "learning_rate": 6.744448460482357e-05, "loss": 0.65708079, "memory(GiB)": 71.19, "step": 5320, "train_speed(iter/s)": 0.024187 }, { "acc": 0.7672987, "epoch": 4.17, "learning_rate": 6.738369991160402e-05, "loss": 0.70357451, "memory(GiB)": 71.19, "step": 5325, "train_speed(iter/s)": 0.024187 }, { "acc": 0.78470359, "epoch": 4.18, "learning_rate": 6.732288598021458e-05, "loss": 0.66129317, "memory(GiB)": 71.19, "step": 5330, "train_speed(iter/s)": 0.024187 }, { "acc": 0.75318675, "epoch": 4.18, "learning_rate": 6.726204291294004e-05, "loss": 0.7573597, "memory(GiB)": 71.19, "step": 5335, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76494775, "epoch": 4.19, "learning_rate": 6.720117081211419e-05, "loss": 0.72770386, "memory(GiB)": 71.19, "step": 5340, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77200723, "epoch": 4.19, "learning_rate": 6.714026978011967e-05, "loss": 0.68737392, "memory(GiB)": 71.19, "step": 5345, "train_speed(iter/s)": 0.024187 }, { "acc": 0.75903749, "epoch": 4.19, "learning_rate": 6.707933991938776e-05, "loss": 0.70887847, "memory(GiB)": 71.19, "step": 5350, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76615915, "epoch": 4.2, "learning_rate": 6.701838133239822e-05, "loss": 0.70868678, "memory(GiB)": 71.19, "step": 5355, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76639304, "epoch": 4.2, "learning_rate": 6.695739412167916e-05, "loss": 0.71408496, "memory(GiB)": 71.19, "step": 5360, "train_speed(iter/s)": 0.024187 }, { "acc": 0.7674984, "epoch": 4.2, "learning_rate": 6.689637838980678e-05, "loss": 0.69706955, "memory(GiB)": 71.19, "step": 5365, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77202821, "epoch": 4.21, "learning_rate": 6.683533423940531e-05, "loss": 0.69185266, "memory(GiB)": 71.19, "step": 5370, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77602949, "epoch": 4.21, "learning_rate": 6.677426177314675e-05, "loss": 0.66611705, "memory(GiB)": 71.19, "step": 5375, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76406932, "epoch": 4.22, "learning_rate": 6.67131610937507e-05, "loss": 0.70172687, "memory(GiB)": 71.19, "step": 5380, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76476321, "epoch": 4.22, "learning_rate": 6.665203230398425e-05, "loss": 0.6902421, "memory(GiB)": 71.19, "step": 5385, "train_speed(iter/s)": 0.024187 }, { "acc": 0.75160317, "epoch": 4.22, "learning_rate": 6.659087550666177e-05, "loss": 0.74526944, "memory(GiB)": 71.19, "step": 5390, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76145148, "epoch": 4.23, "learning_rate": 6.652969080464472e-05, "loss": 0.73384714, "memory(GiB)": 71.19, "step": 5395, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76717629, "epoch": 4.23, "learning_rate": 6.646847830084148e-05, "loss": 0.69845848, "memory(GiB)": 71.19, "step": 5400, "train_speed(iter/s)": 0.024187 }, { "acc": 0.78010674, "epoch": 4.24, "learning_rate": 6.640723809820724e-05, "loss": 0.67031679, "memory(GiB)": 71.19, "step": 5405, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76649265, "epoch": 4.24, "learning_rate": 6.634597029974373e-05, "loss": 0.72474589, "memory(GiB)": 71.19, "step": 5410, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77016811, "epoch": 4.24, "learning_rate": 6.628467500849909e-05, "loss": 0.69438682, "memory(GiB)": 71.19, "step": 5415, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76155553, "epoch": 4.25, "learning_rate": 6.622335232756773e-05, "loss": 0.70451875, "memory(GiB)": 71.19, "step": 5420, "train_speed(iter/s)": 0.024187 }, { "acc": 0.75910053, "epoch": 4.25, "learning_rate": 6.616200236009016e-05, "loss": 0.73806357, "memory(GiB)": 71.19, "step": 5425, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76222086, "epoch": 4.26, "learning_rate": 6.61006252092527e-05, "loss": 0.7077971, "memory(GiB)": 71.19, "step": 5430, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76169763, "epoch": 4.26, "learning_rate": 6.603922097828745e-05, "loss": 0.71544447, "memory(GiB)": 71.19, "step": 5435, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77774611, "epoch": 4.26, "learning_rate": 6.597778977047205e-05, "loss": 0.68002701, "memory(GiB)": 71.19, "step": 5440, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76104169, "epoch": 4.27, "learning_rate": 6.591633168912947e-05, "loss": 0.72893152, "memory(GiB)": 71.19, "step": 5445, "train_speed(iter/s)": 0.024188 }, { "acc": 0.77122054, "epoch": 4.27, "learning_rate": 6.585484683762794e-05, "loss": 0.69275894, "memory(GiB)": 71.19, "step": 5450, "train_speed(iter/s)": 0.024188 }, { "acc": 0.77282248, "epoch": 4.28, "learning_rate": 6.57933353193807e-05, "loss": 0.69376159, "memory(GiB)": 71.19, "step": 5455, "train_speed(iter/s)": 0.024188 }, { "acc": 0.7599041, "epoch": 4.28, "learning_rate": 6.57317972378458e-05, "loss": 0.7232058, "memory(GiB)": 71.19, "step": 5460, "train_speed(iter/s)": 0.024189 }, { "acc": 0.77883954, "epoch": 4.28, "learning_rate": 6.567023269652602e-05, "loss": 0.66660323, "memory(GiB)": 71.19, "step": 5465, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76177578, "epoch": 4.29, "learning_rate": 6.56086417989686e-05, "loss": 0.71664767, "memory(GiB)": 71.19, "step": 5470, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76507282, "epoch": 4.29, "learning_rate": 6.554702464876514e-05, "loss": 0.71534958, "memory(GiB)": 71.19, "step": 5475, "train_speed(iter/s)": 0.024188 }, { "acc": 0.75966554, "epoch": 4.3, "learning_rate": 6.54853813495514e-05, "loss": 0.71393213, "memory(GiB)": 71.19, "step": 5480, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76525397, "epoch": 4.3, "learning_rate": 6.54237120050071e-05, "loss": 0.70895991, "memory(GiB)": 71.19, "step": 5485, "train_speed(iter/s)": 0.024188 }, { "acc": 0.77907438, "epoch": 4.3, "learning_rate": 6.536201671885575e-05, "loss": 0.69699798, "memory(GiB)": 71.19, "step": 5490, "train_speed(iter/s)": 0.024188 }, { "acc": 0.77636795, "epoch": 4.31, "learning_rate": 6.530029559486455e-05, "loss": 0.67650938, "memory(GiB)": 71.19, "step": 5495, "train_speed(iter/s)": 0.024188 }, { "acc": 0.75787897, "epoch": 4.31, "learning_rate": 6.523854873684409e-05, "loss": 0.73933854, "memory(GiB)": 71.19, "step": 5500, "train_speed(iter/s)": 0.024187 }, { "acc": 0.75708508, "epoch": 4.31, "learning_rate": 6.517677624864831e-05, "loss": 0.72797713, "memory(GiB)": 71.19, "step": 5505, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76517878, "epoch": 4.32, "learning_rate": 6.511497823417418e-05, "loss": 0.71284537, "memory(GiB)": 71.19, "step": 5510, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77976227, "epoch": 4.32, "learning_rate": 6.50531547973617e-05, "loss": 0.66220131, "memory(GiB)": 71.19, "step": 5515, "train_speed(iter/s)": 0.024187 }, { "acc": 0.7598443, "epoch": 4.33, "learning_rate": 6.499130604219354e-05, "loss": 0.71064129, "memory(GiB)": 71.19, "step": 5520, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76225853, "epoch": 4.33, "learning_rate": 6.492943207269498e-05, "loss": 0.74577045, "memory(GiB)": 71.19, "step": 5525, "train_speed(iter/s)": 0.024188 }, { "acc": 0.78424692, "epoch": 4.33, "learning_rate": 6.486753299293375e-05, "loss": 0.66039305, "memory(GiB)": 71.19, "step": 5530, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76662159, "epoch": 4.34, "learning_rate": 6.480560890701976e-05, "loss": 0.71364965, "memory(GiB)": 71.19, "step": 5535, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76090045, "epoch": 4.34, "learning_rate": 6.474365991910501e-05, "loss": 0.70872817, "memory(GiB)": 71.19, "step": 5540, "train_speed(iter/s)": 0.024187 }, { "acc": 0.75455337, "epoch": 4.35, "learning_rate": 6.468168613338339e-05, "loss": 0.74482007, "memory(GiB)": 71.19, "step": 5545, "train_speed(iter/s)": 0.024187 }, { "acc": 0.76986208, "epoch": 4.35, "learning_rate": 6.461968765409041e-05, "loss": 0.69489803, "memory(GiB)": 71.19, "step": 5550, "train_speed(iter/s)": 0.024187 }, { "acc": 0.77070737, "epoch": 4.35, "learning_rate": 6.455766458550329e-05, "loss": 0.71167393, "memory(GiB)": 71.19, "step": 5555, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76613202, "epoch": 4.36, "learning_rate": 6.449561703194042e-05, "loss": 0.70667567, "memory(GiB)": 71.19, "step": 5560, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76189098, "epoch": 4.36, "learning_rate": 6.44335450977615e-05, "loss": 0.71174026, "memory(GiB)": 71.19, "step": 5565, "train_speed(iter/s)": 0.024188 }, { "acc": 0.7586081, "epoch": 4.37, "learning_rate": 6.437144888736715e-05, "loss": 0.7246048, "memory(GiB)": 71.19, "step": 5570, "train_speed(iter/s)": 0.024188 }, { "acc": 0.7720696, "epoch": 4.37, "learning_rate": 6.43093285051989e-05, "loss": 0.69544797, "memory(GiB)": 71.19, "step": 5575, "train_speed(iter/s)": 0.024188 }, { "acc": 0.77282968, "epoch": 4.37, "learning_rate": 6.424718405573888e-05, "loss": 0.6709199, "memory(GiB)": 71.19, "step": 5580, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76675258, "epoch": 4.38, "learning_rate": 6.418501564350972e-05, "loss": 0.70900588, "memory(GiB)": 71.19, "step": 5585, "train_speed(iter/s)": 0.024188 }, { "acc": 0.77312188, "epoch": 4.38, "learning_rate": 6.412282337307436e-05, "loss": 0.66980944, "memory(GiB)": 71.19, "step": 5590, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76313424, "epoch": 4.39, "learning_rate": 6.406060734903582e-05, "loss": 0.71873169, "memory(GiB)": 71.19, "step": 5595, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76170006, "epoch": 4.39, "learning_rate": 6.399836767603715e-05, "loss": 0.73406935, "memory(GiB)": 71.19, "step": 5600, "train_speed(iter/s)": 0.024188 }, { "acc": 0.77519426, "epoch": 4.39, "learning_rate": 6.393610445876113e-05, "loss": 0.70191274, "memory(GiB)": 71.19, "step": 5605, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76001482, "epoch": 4.4, "learning_rate": 6.387381780193014e-05, "loss": 0.70856462, "memory(GiB)": 71.19, "step": 5610, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76338596, "epoch": 4.4, "learning_rate": 6.381150781030597e-05, "loss": 0.70342155, "memory(GiB)": 71.19, "step": 5615, "train_speed(iter/s)": 0.024188 }, { "acc": 0.77544284, "epoch": 4.4, "learning_rate": 6.37491745886897e-05, "loss": 0.68927674, "memory(GiB)": 71.19, "step": 5620, "train_speed(iter/s)": 0.024188 }, { "acc": 0.7679841, "epoch": 4.41, "learning_rate": 6.368681824192147e-05, "loss": 0.69150701, "memory(GiB)": 71.19, "step": 5625, "train_speed(iter/s)": 0.024188 }, { "acc": 0.7673996, "epoch": 4.41, "learning_rate": 6.362443887488026e-05, "loss": 0.7154283, "memory(GiB)": 71.19, "step": 5630, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76109505, "epoch": 4.42, "learning_rate": 6.356203659248386e-05, "loss": 0.72742548, "memory(GiB)": 71.19, "step": 5635, "train_speed(iter/s)": 0.024188 }, { "acc": 0.76917353, "epoch": 4.42, "learning_rate": 6.349961149968849e-05, "loss": 0.71016984, "memory(GiB)": 71.19, "step": 5640, "train_speed(iter/s)": 0.024189 }, { "acc": 0.77111497, "epoch": 4.42, "learning_rate": 6.343716370148887e-05, "loss": 0.70852461, "memory(GiB)": 71.19, "step": 5645, "train_speed(iter/s)": 0.024189 }, { "acc": 0.76087008, "epoch": 4.43, "learning_rate": 6.337469330291778e-05, "loss": 0.71220889, "memory(GiB)": 71.19, "step": 5650, "train_speed(iter/s)": 0.02419 }, { "acc": 0.7703217, "epoch": 4.43, "learning_rate": 6.331220040904612e-05, "loss": 0.69431052, "memory(GiB)": 71.19, "step": 5655, "train_speed(iter/s)": 0.024189 }, { "acc": 0.76420479, "epoch": 4.44, "learning_rate": 6.324968512498255e-05, "loss": 0.71943445, "memory(GiB)": 71.19, "step": 5660, "train_speed(iter/s)": 0.024189 }, { "acc": 0.76351399, "epoch": 4.44, "learning_rate": 6.318714755587341e-05, "loss": 0.72373419, "memory(GiB)": 71.19, "step": 5665, "train_speed(iter/s)": 0.024189 }, { "acc": 0.76989708, "epoch": 4.44, "learning_rate": 6.312458780690253e-05, "loss": 0.70568027, "memory(GiB)": 71.19, "step": 5670, "train_speed(iter/s)": 0.024189 }, { "acc": 0.7618772, "epoch": 4.45, "learning_rate": 6.306200598329105e-05, "loss": 0.72609386, "memory(GiB)": 71.19, "step": 5675, "train_speed(iter/s)": 0.024189 }, { "acc": 0.76857285, "epoch": 4.45, "learning_rate": 6.299940219029722e-05, "loss": 0.7043541, "memory(GiB)": 71.19, "step": 5680, "train_speed(iter/s)": 0.024189 }, { "acc": 0.7670013, "epoch": 4.46, "learning_rate": 6.293677653321624e-05, "loss": 0.69272251, "memory(GiB)": 71.19, "step": 5685, "train_speed(iter/s)": 0.024189 }, { "acc": 0.76531138, "epoch": 4.46, "learning_rate": 6.287412911738013e-05, "loss": 0.72677112, "memory(GiB)": 71.19, "step": 5690, "train_speed(iter/s)": 0.02419 }, { "acc": 0.76662335, "epoch": 4.46, "learning_rate": 6.281146004815743e-05, "loss": 0.71060648, "memory(GiB)": 71.19, "step": 5695, "train_speed(iter/s)": 0.02419 }, { "acc": 0.76985221, "epoch": 4.47, "learning_rate": 6.274876943095316e-05, "loss": 0.69214835, "memory(GiB)": 71.19, "step": 5700, "train_speed(iter/s)": 0.02419 }, { "acc": 0.75513039, "epoch": 4.47, "learning_rate": 6.268605737120856e-05, "loss": 0.73119974, "memory(GiB)": 71.19, "step": 5705, "train_speed(iter/s)": 0.02419 }, { "acc": 0.7597374, "epoch": 4.48, "learning_rate": 6.262332397440094e-05, "loss": 0.71646504, "memory(GiB)": 71.19, "step": 5710, "train_speed(iter/s)": 0.02419 }, { "acc": 0.76583333, "epoch": 4.48, "learning_rate": 6.256056934604348e-05, "loss": 0.71394172, "memory(GiB)": 71.19, "step": 5715, "train_speed(iter/s)": 0.02419 }, { "acc": 0.77272115, "epoch": 4.48, "learning_rate": 6.24977935916851e-05, "loss": 0.69063115, "memory(GiB)": 71.19, "step": 5720, "train_speed(iter/s)": 0.02419 }, { "acc": 0.76995649, "epoch": 4.49, "learning_rate": 6.243499681691024e-05, "loss": 0.695227, "memory(GiB)": 71.19, "step": 5725, "train_speed(iter/s)": 0.02419 }, { "acc": 0.75672503, "epoch": 4.49, "learning_rate": 6.237217912733869e-05, "loss": 0.73567729, "memory(GiB)": 71.19, "step": 5730, "train_speed(iter/s)": 0.02419 }, { "acc": 0.76756783, "epoch": 4.49, "learning_rate": 6.23093406286254e-05, "loss": 0.69406104, "memory(GiB)": 71.19, "step": 5735, "train_speed(iter/s)": 0.02419 }, { "acc": 0.78130131, "epoch": 4.5, "learning_rate": 6.224648142646037e-05, "loss": 0.68319197, "memory(GiB)": 71.19, "step": 5740, "train_speed(iter/s)": 0.024191 }, { "acc": 0.75013237, "epoch": 4.5, "learning_rate": 6.218360162656836e-05, "loss": 0.73892965, "memory(GiB)": 71.19, "step": 5745, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76402731, "epoch": 4.51, "learning_rate": 6.212070133470884e-05, "loss": 0.71493678, "memory(GiB)": 71.19, "step": 5750, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76769323, "epoch": 4.51, "learning_rate": 6.205778065667566e-05, "loss": 0.71724548, "memory(GiB)": 71.19, "step": 5755, "train_speed(iter/s)": 0.024191 }, { "acc": 0.75835562, "epoch": 4.51, "learning_rate": 6.199483969829705e-05, "loss": 0.7360487, "memory(GiB)": 71.19, "step": 5760, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76376677, "epoch": 4.52, "learning_rate": 6.19318785654353e-05, "loss": 0.71627126, "memory(GiB)": 71.19, "step": 5765, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76958847, "epoch": 4.52, "learning_rate": 6.186889736398664e-05, "loss": 0.71689034, "memory(GiB)": 71.19, "step": 5770, "train_speed(iter/s)": 0.024191 }, { "acc": 0.75499663, "epoch": 4.53, "learning_rate": 6.180589619988103e-05, "loss": 0.74792767, "memory(GiB)": 71.19, "step": 5775, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7811924, "epoch": 4.53, "learning_rate": 6.174287517908207e-05, "loss": 0.6795465, "memory(GiB)": 71.19, "step": 5780, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76298246, "epoch": 4.53, "learning_rate": 6.167983440758672e-05, "loss": 0.71815233, "memory(GiB)": 71.19, "step": 5785, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7640779, "epoch": 4.54, "learning_rate": 6.16167739914251e-05, "loss": 0.71062479, "memory(GiB)": 71.19, "step": 5790, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76794705, "epoch": 4.54, "learning_rate": 6.15536940366605e-05, "loss": 0.70243373, "memory(GiB)": 71.19, "step": 5795, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76325617, "epoch": 4.55, "learning_rate": 6.149059464938893e-05, "loss": 0.70105352, "memory(GiB)": 71.19, "step": 5800, "train_speed(iter/s)": 0.024191 }, { "acc": 0.77031732, "epoch": 4.55, "learning_rate": 6.142747593573922e-05, "loss": 0.70099783, "memory(GiB)": 71.19, "step": 5805, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76253133, "epoch": 4.55, "learning_rate": 6.136433800187262e-05, "loss": 0.72461586, "memory(GiB)": 71.19, "step": 5810, "train_speed(iter/s)": 0.024191 }, { "acc": 0.77606282, "epoch": 4.56, "learning_rate": 6.130118095398269e-05, "loss": 0.67780962, "memory(GiB)": 71.19, "step": 5815, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7617002, "epoch": 4.56, "learning_rate": 6.123800489829523e-05, "loss": 0.70110459, "memory(GiB)": 71.19, "step": 5820, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76943331, "epoch": 4.57, "learning_rate": 6.117480994106793e-05, "loss": 0.71336999, "memory(GiB)": 71.19, "step": 5825, "train_speed(iter/s)": 0.024191 }, { "acc": 0.75526834, "epoch": 4.57, "learning_rate": 6.111159618859031e-05, "loss": 0.74314675, "memory(GiB)": 71.19, "step": 5830, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76729212, "epoch": 4.57, "learning_rate": 6.104836374718347e-05, "loss": 0.6993835, "memory(GiB)": 71.19, "step": 5835, "train_speed(iter/s)": 0.024191 }, { "acc": 0.77066221, "epoch": 4.58, "learning_rate": 6.0985112723199976e-05, "loss": 0.68562093, "memory(GiB)": 71.19, "step": 5840, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76890574, "epoch": 4.58, "learning_rate": 6.0921843223023634e-05, "loss": 0.70060396, "memory(GiB)": 71.19, "step": 5845, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76268287, "epoch": 4.59, "learning_rate": 6.085855535306931e-05, "loss": 0.71980524, "memory(GiB)": 71.19, "step": 5850, "train_speed(iter/s)": 0.024192 }, { "acc": 0.7607635, "epoch": 4.59, "learning_rate": 6.0795249219782814e-05, "loss": 0.7061645, "memory(GiB)": 71.19, "step": 5855, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76794925, "epoch": 4.59, "learning_rate": 6.0731924929640614e-05, "loss": 0.70129228, "memory(GiB)": 71.19, "step": 5860, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76963058, "epoch": 4.6, "learning_rate": 6.066858258914978e-05, "loss": 0.71304092, "memory(GiB)": 71.19, "step": 5865, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76559944, "epoch": 4.6, "learning_rate": 6.060522230484769e-05, "loss": 0.70504079, "memory(GiB)": 71.19, "step": 5870, "train_speed(iter/s)": 0.024192 }, { "acc": 0.77082806, "epoch": 4.6, "learning_rate": 6.054184418330191e-05, "loss": 0.6952466, "memory(GiB)": 71.19, "step": 5875, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76205645, "epoch": 4.61, "learning_rate": 6.0478448331110015e-05, "loss": 0.71116371, "memory(GiB)": 71.19, "step": 5880, "train_speed(iter/s)": 0.024192 }, { "acc": 0.77297888, "epoch": 4.61, "learning_rate": 6.041503485489942e-05, "loss": 0.68239374, "memory(GiB)": 71.19, "step": 5885, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76356063, "epoch": 4.62, "learning_rate": 6.035160386132718e-05, "loss": 0.70998626, "memory(GiB)": 71.19, "step": 5890, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76024551, "epoch": 4.62, "learning_rate": 6.0288155457079754e-05, "loss": 0.72426705, "memory(GiB)": 71.19, "step": 5895, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76642303, "epoch": 4.62, "learning_rate": 6.022468974887295e-05, "loss": 0.72213707, "memory(GiB)": 71.19, "step": 5900, "train_speed(iter/s)": 0.024191 }, { "acc": 0.77551522, "epoch": 4.63, "learning_rate": 6.016120684345167e-05, "loss": 0.68373032, "memory(GiB)": 71.19, "step": 5905, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76769862, "epoch": 4.63, "learning_rate": 6.009770684758973e-05, "loss": 0.71322379, "memory(GiB)": 71.19, "step": 5910, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76745868, "epoch": 4.64, "learning_rate": 6.0034189868089677e-05, "loss": 0.71255426, "memory(GiB)": 71.19, "step": 5915, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76841478, "epoch": 4.64, "learning_rate": 5.9970656011782646e-05, "loss": 0.70099401, "memory(GiB)": 71.19, "step": 5920, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76973267, "epoch": 4.64, "learning_rate": 5.990710538552813e-05, "loss": 0.6992341, "memory(GiB)": 71.19, "step": 5925, "train_speed(iter/s)": 0.02419 }, { "acc": 0.74826236, "epoch": 4.65, "learning_rate": 5.984353809621388e-05, "loss": 0.74642177, "memory(GiB)": 71.19, "step": 5930, "train_speed(iter/s)": 0.02419 }, { "acc": 0.7560431, "epoch": 4.65, "learning_rate": 5.977995425075562e-05, "loss": 0.72930694, "memory(GiB)": 71.19, "step": 5935, "train_speed(iter/s)": 0.02419 }, { "acc": 0.76061263, "epoch": 4.66, "learning_rate": 5.971635395609694e-05, "loss": 0.73713555, "memory(GiB)": 71.19, "step": 5940, "train_speed(iter/s)": 0.02419 }, { "acc": 0.75073433, "epoch": 4.66, "learning_rate": 5.965273731920908e-05, "loss": 0.74310646, "memory(GiB)": 71.19, "step": 5945, "train_speed(iter/s)": 0.02419 }, { "acc": 0.76606636, "epoch": 4.66, "learning_rate": 5.958910444709083e-05, "loss": 0.7026166, "memory(GiB)": 71.19, "step": 5950, "train_speed(iter/s)": 0.02419 }, { "acc": 0.74374471, "epoch": 4.67, "learning_rate": 5.9525455446768194e-05, "loss": 0.77414222, "memory(GiB)": 71.19, "step": 5955, "train_speed(iter/s)": 0.02419 }, { "acc": 0.7663578, "epoch": 4.67, "learning_rate": 5.9461790425294375e-05, "loss": 0.71838512, "memory(GiB)": 71.19, "step": 5960, "train_speed(iter/s)": 0.02419 }, { "acc": 0.76450734, "epoch": 4.68, "learning_rate": 5.939810948974948e-05, "loss": 0.72742691, "memory(GiB)": 71.19, "step": 5965, "train_speed(iter/s)": 0.02419 }, { "acc": 0.7788063, "epoch": 4.68, "learning_rate": 5.933441274724041e-05, "loss": 0.66779647, "memory(GiB)": 71.19, "step": 5970, "train_speed(iter/s)": 0.02419 }, { "acc": 0.77166514, "epoch": 4.68, "learning_rate": 5.927070030490062e-05, "loss": 0.68094988, "memory(GiB)": 71.19, "step": 5975, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76896586, "epoch": 4.69, "learning_rate": 5.9206972269890014e-05, "loss": 0.68136163, "memory(GiB)": 71.19, "step": 5980, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76952062, "epoch": 4.69, "learning_rate": 5.914322874939466e-05, "loss": 0.7040164, "memory(GiB)": 71.19, "step": 5985, "train_speed(iter/s)": 0.024191 }, { "acc": 0.75679116, "epoch": 4.69, "learning_rate": 5.907946985062678e-05, "loss": 0.73868618, "memory(GiB)": 71.19, "step": 5990, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76209521, "epoch": 4.7, "learning_rate": 5.9015695680824325e-05, "loss": 0.72485847, "memory(GiB)": 71.19, "step": 5995, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76353059, "epoch": 4.7, "learning_rate": 5.8951906347251027e-05, "loss": 0.71688752, "memory(GiB)": 71.19, "step": 6000, "train_speed(iter/s)": 0.024191 }, { "acc": 0.75670443, "epoch": 4.71, "learning_rate": 5.888810195719609e-05, "loss": 0.73053293, "memory(GiB)": 71.19, "step": 6005, "train_speed(iter/s)": 0.024191 }, { "acc": 0.75863943, "epoch": 4.71, "learning_rate": 5.8824282617974045e-05, "loss": 0.70380878, "memory(GiB)": 71.19, "step": 6010, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76119242, "epoch": 4.71, "learning_rate": 5.876044843692456e-05, "loss": 0.7133338, "memory(GiB)": 71.19, "step": 6015, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76129284, "epoch": 4.72, "learning_rate": 5.869659952141228e-05, "loss": 0.71880956, "memory(GiB)": 71.19, "step": 6020, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76461582, "epoch": 4.72, "learning_rate": 5.8632735978826626e-05, "loss": 0.71644011, "memory(GiB)": 71.19, "step": 6025, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76767044, "epoch": 4.73, "learning_rate": 5.856885791658158e-05, "loss": 0.69806213, "memory(GiB)": 71.19, "step": 6030, "train_speed(iter/s)": 0.024192 }, { "acc": 0.75022593, "epoch": 4.73, "learning_rate": 5.8504965442115644e-05, "loss": 0.75385232, "memory(GiB)": 71.19, "step": 6035, "train_speed(iter/s)": 0.024191 }, { "acc": 0.75788932, "epoch": 4.73, "learning_rate": 5.844105866289147e-05, "loss": 0.73194909, "memory(GiB)": 71.19, "step": 6040, "train_speed(iter/s)": 0.024192 }, { "acc": 0.75529494, "epoch": 4.74, "learning_rate": 5.83771376863958e-05, "loss": 0.72789454, "memory(GiB)": 71.19, "step": 6045, "train_speed(iter/s)": 0.024192 }, { "acc": 0.77266655, "epoch": 4.74, "learning_rate": 5.831320262013926e-05, "loss": 0.6826066, "memory(GiB)": 71.19, "step": 6050, "train_speed(iter/s)": 0.024192 }, { "acc": 0.75812349, "epoch": 4.75, "learning_rate": 5.824925357165617e-05, "loss": 0.73104687, "memory(GiB)": 71.19, "step": 6055, "train_speed(iter/s)": 0.024191 }, { "acc": 0.76626301, "epoch": 4.75, "learning_rate": 5.818529064850436e-05, "loss": 0.7082016, "memory(GiB)": 71.19, "step": 6060, "train_speed(iter/s)": 0.024191 }, { "acc": 0.7526711, "epoch": 4.75, "learning_rate": 5.8121313958265e-05, "loss": 0.74757395, "memory(GiB)": 71.19, "step": 6065, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76689634, "epoch": 4.76, "learning_rate": 5.8057323608542425e-05, "loss": 0.70414853, "memory(GiB)": 71.19, "step": 6070, "train_speed(iter/s)": 0.024192 }, { "acc": 0.75855989, "epoch": 4.76, "learning_rate": 5.799331970696393e-05, "loss": 0.73491292, "memory(GiB)": 71.19, "step": 6075, "train_speed(iter/s)": 0.024192 }, { "acc": 0.74774814, "epoch": 4.77, "learning_rate": 5.792930236117964e-05, "loss": 0.76034083, "memory(GiB)": 71.19, "step": 6080, "train_speed(iter/s)": 0.024192 }, { "acc": 0.74793382, "epoch": 4.77, "learning_rate": 5.786527167886221e-05, "loss": 0.76183257, "memory(GiB)": 71.19, "step": 6085, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76219697, "epoch": 4.77, "learning_rate": 5.78012277677068e-05, "loss": 0.73324904, "memory(GiB)": 71.19, "step": 6090, "train_speed(iter/s)": 0.024192 }, { "acc": 0.76384172, "epoch": 4.78, "learning_rate": 5.7737170735430825e-05, "loss": 0.71691432, "memory(GiB)": 71.19, "step": 6095, "train_speed(iter/s)": 0.024192 }, { "acc": 0.74610209, "epoch": 4.78, "learning_rate": 5.7673100689773707e-05, "loss": 0.75250793, "memory(GiB)": 71.19, "step": 6100, "train_speed(iter/s)": 0.024193 }, { "acc": 0.75925684, "epoch": 4.78, "learning_rate": 5.760901773849682e-05, "loss": 0.71436172, "memory(GiB)": 71.19, "step": 6105, "train_speed(iter/s)": 0.024193 }, { "acc": 0.76955056, "epoch": 4.79, "learning_rate": 5.754492198938318e-05, "loss": 0.69148941, "memory(GiB)": 71.19, "step": 6110, "train_speed(iter/s)": 0.024193 }, { "acc": 0.75973935, "epoch": 4.79, "learning_rate": 5.748081355023739e-05, "loss": 0.7391295, "memory(GiB)": 71.19, "step": 6115, "train_speed(iter/s)": 0.024193 }, { "acc": 0.75714288, "epoch": 4.8, "learning_rate": 5.741669252888535e-05, "loss": 0.71268778, "memory(GiB)": 71.19, "step": 6120, "train_speed(iter/s)": 0.024193 }, { "acc": 0.77134218, "epoch": 4.8, "learning_rate": 5.735255903317417e-05, "loss": 0.69459882, "memory(GiB)": 71.19, "step": 6125, "train_speed(iter/s)": 0.024193 }, { "acc": 0.76627364, "epoch": 4.8, "learning_rate": 5.72884131709719e-05, "loss": 0.69015708, "memory(GiB)": 71.19, "step": 6130, "train_speed(iter/s)": 0.024193 }, { "acc": 0.75454583, "epoch": 4.81, "learning_rate": 5.7224255050167394e-05, "loss": 0.75072398, "memory(GiB)": 71.19, "step": 6135, "train_speed(iter/s)": 0.024193 }, { "acc": 0.77098956, "epoch": 4.81, "learning_rate": 5.7160084778670156e-05, "loss": 0.69697561, "memory(GiB)": 71.19, "step": 6140, "train_speed(iter/s)": 0.024193 }, { "acc": 0.76857743, "epoch": 4.82, "learning_rate": 5.709590246441007e-05, "loss": 0.71755748, "memory(GiB)": 71.19, "step": 6145, "train_speed(iter/s)": 0.024193 }, { "acc": 0.75801721, "epoch": 4.82, "learning_rate": 5.703170821533733e-05, "loss": 0.71778822, "memory(GiB)": 71.19, "step": 6150, "train_speed(iter/s)": 0.024193 }, { "acc": 0.76996355, "epoch": 4.82, "learning_rate": 5.696750213942218e-05, "loss": 0.67777882, "memory(GiB)": 71.19, "step": 6155, "train_speed(iter/s)": 0.024193 }, { "acc": 0.7731328, "epoch": 4.83, "learning_rate": 5.690328434465475e-05, "loss": 0.70121741, "memory(GiB)": 71.19, "step": 6160, "train_speed(iter/s)": 0.024194 }, { "acc": 0.75167289, "epoch": 4.83, "learning_rate": 5.68390549390449e-05, "loss": 0.73404212, "memory(GiB)": 71.19, "step": 6165, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76081247, "epoch": 4.84, "learning_rate": 5.677481403062199e-05, "loss": 0.72303152, "memory(GiB)": 71.19, "step": 6170, "train_speed(iter/s)": 0.024194 }, { "acc": 0.77228723, "epoch": 4.84, "learning_rate": 5.671056172743479e-05, "loss": 0.68236284, "memory(GiB)": 71.19, "step": 6175, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76532092, "epoch": 4.84, "learning_rate": 5.6646298137551125e-05, "loss": 0.70234327, "memory(GiB)": 71.19, "step": 6180, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76404386, "epoch": 4.85, "learning_rate": 5.658202336905791e-05, "loss": 0.71822534, "memory(GiB)": 71.19, "step": 6185, "train_speed(iter/s)": 0.024194 }, { "acc": 0.75237103, "epoch": 4.85, "learning_rate": 5.651773753006081e-05, "loss": 0.74845624, "memory(GiB)": 71.19, "step": 6190, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7637733, "epoch": 4.86, "learning_rate": 5.64534407286841e-05, "loss": 0.71927757, "memory(GiB)": 71.19, "step": 6195, "train_speed(iter/s)": 0.024194 }, { "acc": 0.7700985, "epoch": 4.86, "learning_rate": 5.638913307307055e-05, "loss": 0.71858625, "memory(GiB)": 71.19, "step": 6200, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76865525, "epoch": 4.86, "learning_rate": 5.632481467138111e-05, "loss": 0.70387897, "memory(GiB)": 71.19, "step": 6205, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76575408, "epoch": 4.87, "learning_rate": 5.626048563179487e-05, "loss": 0.6937983, "memory(GiB)": 71.19, "step": 6210, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76595235, "epoch": 4.87, "learning_rate": 5.619614606250877e-05, "loss": 0.70782719, "memory(GiB)": 71.19, "step": 6215, "train_speed(iter/s)": 0.024194 }, { "acc": 0.77544599, "epoch": 4.88, "learning_rate": 5.613179607173748e-05, "loss": 0.70953703, "memory(GiB)": 71.19, "step": 6220, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76125436, "epoch": 4.88, "learning_rate": 5.6067435767713184e-05, "loss": 0.71401358, "memory(GiB)": 71.19, "step": 6225, "train_speed(iter/s)": 0.024194 }, { "acc": 0.77986512, "epoch": 4.88, "learning_rate": 5.600306525868544e-05, "loss": 0.66382799, "memory(GiB)": 71.19, "step": 6230, "train_speed(iter/s)": 0.024194 }, { "acc": 0.77460809, "epoch": 4.89, "learning_rate": 5.593868465292094e-05, "loss": 0.70628066, "memory(GiB)": 71.19, "step": 6235, "train_speed(iter/s)": 0.024194 }, { "acc": 0.77852488, "epoch": 4.89, "learning_rate": 5.5874294058703346e-05, "loss": 0.68490057, "memory(GiB)": 71.19, "step": 6240, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76132755, "epoch": 4.89, "learning_rate": 5.58098935843332e-05, "loss": 0.71025805, "memory(GiB)": 71.19, "step": 6245, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76081877, "epoch": 4.9, "learning_rate": 5.574548333812753e-05, "loss": 0.70690475, "memory(GiB)": 71.19, "step": 6250, "train_speed(iter/s)": 0.024194 }, { "acc": 0.76902518, "epoch": 4.9, "learning_rate": 5.5681063428419944e-05, "loss": 0.69533243, "memory(GiB)": 71.19, "step": 6255, "train_speed(iter/s)": 0.024194 }, { "acc": 0.77235093, "epoch": 4.91, "learning_rate": 5.5616633963560194e-05, "loss": 0.69402099, "memory(GiB)": 71.19, "step": 6260, "train_speed(iter/s)": 0.024194 }, { "acc": 0.75550814, "epoch": 4.91, "learning_rate": 5.555219505191417e-05, "loss": 0.7272469, "memory(GiB)": 71.19, "step": 6265, "train_speed(iter/s)": 0.024194 }, { "acc": 0.75596237, "epoch": 4.91, "learning_rate": 5.548774680186358e-05, "loss": 0.74081788, "memory(GiB)": 71.19, "step": 6270, "train_speed(iter/s)": 0.024195 }, { "acc": 0.7628664, "epoch": 4.92, "learning_rate": 5.5423289321805914e-05, "loss": 0.71786399, "memory(GiB)": 71.19, "step": 6275, "train_speed(iter/s)": 0.024195 }, { "acc": 0.76951499, "epoch": 4.92, "learning_rate": 5.535882272015417e-05, "loss": 0.70130796, "memory(GiB)": 71.19, "step": 6280, "train_speed(iter/s)": 0.024195 }, { "acc": 0.75305843, "epoch": 4.93, "learning_rate": 5.529434710533664e-05, "loss": 0.74882407, "memory(GiB)": 71.19, "step": 6285, "train_speed(iter/s)": 0.024195 }, { "acc": 0.76734796, "epoch": 4.93, "learning_rate": 5.522986258579681e-05, "loss": 0.71578069, "memory(GiB)": 71.19, "step": 6290, "train_speed(iter/s)": 0.024195 }, { "acc": 0.76613975, "epoch": 4.93, "learning_rate": 5.516536926999316e-05, "loss": 0.70828838, "memory(GiB)": 71.19, "step": 6295, "train_speed(iter/s)": 0.024195 }, { "acc": 0.77031693, "epoch": 4.94, "learning_rate": 5.510086726639894e-05, "loss": 0.70443668, "memory(GiB)": 71.19, "step": 6300, "train_speed(iter/s)": 0.024195 }, { "acc": 0.75785489, "epoch": 4.94, "learning_rate": 5.5036356683502e-05, "loss": 0.7272397, "memory(GiB)": 71.19, "step": 6305, "train_speed(iter/s)": 0.024195 }, { "acc": 0.77772474, "epoch": 4.95, "learning_rate": 5.497183762980467e-05, "loss": 0.67617226, "memory(GiB)": 71.19, "step": 6310, "train_speed(iter/s)": 0.024195 }, { "acc": 0.76613221, "epoch": 4.95, "learning_rate": 5.490731021382346e-05, "loss": 0.71000295, "memory(GiB)": 71.19, "step": 6315, "train_speed(iter/s)": 0.024195 }, { "acc": 0.74996147, "epoch": 4.95, "learning_rate": 5.4842774544089e-05, "loss": 0.75001779, "memory(GiB)": 71.19, "step": 6320, "train_speed(iter/s)": 0.024196 }, { "acc": 0.76159739, "epoch": 4.96, "learning_rate": 5.477823072914579e-05, "loss": 0.71828237, "memory(GiB)": 71.19, "step": 6325, "train_speed(iter/s)": 0.024196 }, { "acc": 0.74771967, "epoch": 4.96, "learning_rate": 5.4713678877552e-05, "loss": 0.77577434, "memory(GiB)": 71.19, "step": 6330, "train_speed(iter/s)": 0.024196 }, { "acc": 0.76517348, "epoch": 4.97, "learning_rate": 5.464911909787934e-05, "loss": 0.72100554, "memory(GiB)": 71.19, "step": 6335, "train_speed(iter/s)": 0.024196 }, { "acc": 0.76637516, "epoch": 4.97, "learning_rate": 5.4584551498712865e-05, "loss": 0.70268941, "memory(GiB)": 71.19, "step": 6340, "train_speed(iter/s)": 0.024196 }, { "acc": 0.76800013, "epoch": 4.97, "learning_rate": 5.451997618865078e-05, "loss": 0.72028384, "memory(GiB)": 71.19, "step": 6345, "train_speed(iter/s)": 0.024196 }, { "acc": 0.76661825, "epoch": 4.98, "learning_rate": 5.445539327630422e-05, "loss": 0.69725647, "memory(GiB)": 71.19, "step": 6350, "train_speed(iter/s)": 0.024196 }, { "acc": 0.75148797, "epoch": 4.98, "learning_rate": 5.439080287029716e-05, "loss": 0.73803744, "memory(GiB)": 71.19, "step": 6355, "train_speed(iter/s)": 0.024197 }, { "acc": 0.75281067, "epoch": 4.98, "learning_rate": 5.432620507926616e-05, "loss": 0.75406656, "memory(GiB)": 71.19, "step": 6360, "train_speed(iter/s)": 0.024197 }, { "acc": 0.76611757, "epoch": 4.99, "learning_rate": 5.426160001186016e-05, "loss": 0.70822902, "memory(GiB)": 71.19, "step": 6365, "train_speed(iter/s)": 0.024197 }, { "acc": 0.77433181, "epoch": 4.99, "learning_rate": 5.41969877767404e-05, "loss": 0.68499122, "memory(GiB)": 71.19, "step": 6370, "train_speed(iter/s)": 0.024197 }, { "acc": 0.7715888, "epoch": 5.0, "learning_rate": 5.413236848258015e-05, "loss": 0.70902772, "memory(GiB)": 71.19, "step": 6375, "train_speed(iter/s)": 0.024197 }, { "epoch": 5.0, "eval_acc": 0.7930127723516154, "eval_loss": 0.5855495929718018, "eval_runtime": 107.3505, "eval_samples_per_second": 0.866, "eval_steps_per_second": 0.866, "step": 6379 } ], "logging_steps": 5, "max_steps": 12750, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1, "total_flos": 2.781799283988169e+22, "train_batch_size": 1, "trial_name": null, "trial_params": null }