{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 704, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-05, "loss": 11.881, "step": 2 }, { "epoch": 0.01, "learning_rate": 4.985795454545455e-05, "loss": 11.0362, "step": 4 }, { "epoch": 0.01, "learning_rate": 4.971590909090909e-05, "loss": 4.7507, "step": 6 }, { "epoch": 0.01, "learning_rate": 4.9644886363636365e-05, "loss": 5.152, "step": 8 }, { "epoch": 0.01, "learning_rate": 4.950284090909092e-05, "loss": 4.4355, "step": 10 }, { "epoch": 0.02, "learning_rate": 4.9360795454545455e-05, "loss": 4.1403, "step": 12 }, { "epoch": 0.02, "learning_rate": 4.921875e-05, "loss": 3.8628, "step": 14 }, { "epoch": 0.02, "learning_rate": 4.907670454545455e-05, "loss": 3.3533, "step": 16 }, { "epoch": 0.03, "learning_rate": 4.893465909090909e-05, "loss": 3.6164, "step": 18 }, { "epoch": 0.03, "learning_rate": 4.8792613636363636e-05, "loss": 4.0321, "step": 20 }, { "epoch": 0.03, "learning_rate": 4.865056818181819e-05, "loss": 3.5433, "step": 22 }, { "epoch": 0.03, "learning_rate": 4.850852272727273e-05, "loss": 4.0187, "step": 24 }, { "epoch": 0.04, "learning_rate": 4.836647727272727e-05, "loss": 3.601, "step": 26 }, { "epoch": 0.04, "learning_rate": 4.822443181818182e-05, "loss": 3.4055, "step": 28 }, { "epoch": 0.04, "learning_rate": 4.808238636363637e-05, "loss": 3.4381, "step": 30 }, { "epoch": 0.05, "learning_rate": 4.794034090909091e-05, "loss": 3.3023, "step": 32 }, { "epoch": 0.05, "learning_rate": 4.779829545454546e-05, "loss": 3.7605, "step": 34 }, { "epoch": 0.05, "learning_rate": 4.765625e-05, "loss": 2.0996, "step": 36 }, { "epoch": 0.05, "learning_rate": 4.751420454545455e-05, "loss": 2.5817, "step": 38 }, { "epoch": 0.06, "learning_rate": 4.737215909090909e-05, "loss": 2.0194, "step": 40 }, { "epoch": 0.06, "learning_rate": 4.723011363636364e-05, "loss": 2.2909, "step": 42 }, { "epoch": 0.06, "learning_rate": 4.708806818181818e-05, "loss": 1.8493, "step": 44 }, { "epoch": 0.07, "learning_rate": 4.694602272727273e-05, "loss": 2.2346, "step": 46 }, { "epoch": 0.07, "learning_rate": 4.6803977272727274e-05, "loss": 2.3284, "step": 48 }, { "epoch": 0.07, "learning_rate": 4.666193181818182e-05, "loss": 2.1243, "step": 50 }, { "epoch": 0.07, "learning_rate": 4.6519886363636364e-05, "loss": 2.0376, "step": 52 }, { "epoch": 0.08, "learning_rate": 4.6377840909090916e-05, "loss": 4.0457, "step": 54 }, { "epoch": 0.08, "learning_rate": 4.6235795454545454e-05, "loss": 2.4413, "step": 56 }, { "epoch": 0.08, "learning_rate": 4.609375e-05, "loss": 1.7908, "step": 58 }, { "epoch": 0.09, "learning_rate": 4.595170454545455e-05, "loss": 2.1725, "step": 60 }, { "epoch": 0.09, "learning_rate": 4.580965909090909e-05, "loss": 1.2263, "step": 62 }, { "epoch": 0.09, "learning_rate": 4.5667613636363634e-05, "loss": 2.2057, "step": 64 }, { "epoch": 0.09, "learning_rate": 4.5525568181818186e-05, "loss": 1.4206, "step": 66 }, { "epoch": 0.1, "learning_rate": 4.538352272727273e-05, "loss": 1.6346, "step": 68 }, { "epoch": 0.1, "learning_rate": 4.5241477272727276e-05, "loss": 1.6167, "step": 70 }, { "epoch": 0.1, "learning_rate": 4.509943181818182e-05, "loss": 1.3222, "step": 72 }, { "epoch": 0.11, "learning_rate": 4.4957386363636366e-05, "loss": 1.1639, "step": 74 }, { "epoch": 0.11, "learning_rate": 4.481534090909091e-05, "loss": 2.0723, "step": 76 }, { "epoch": 0.11, "learning_rate": 4.4673295454545457e-05, "loss": 1.707, "step": 78 }, { "epoch": 0.11, "learning_rate": 4.453125e-05, "loss": 1.8604, "step": 80 }, { "epoch": 0.12, "learning_rate": 4.438920454545455e-05, "loss": 1.9273, "step": 82 }, { "epoch": 0.12, "learning_rate": 4.424715909090909e-05, "loss": 1.8075, "step": 84 }, { "epoch": 0.12, "learning_rate": 4.410511363636364e-05, "loss": 1.4449, "step": 86 }, { "epoch": 0.12, "learning_rate": 4.396306818181818e-05, "loss": 1.0887, "step": 88 }, { "epoch": 0.13, "learning_rate": 4.3821022727272734e-05, "loss": 1.4518, "step": 90 }, { "epoch": 0.13, "learning_rate": 4.367897727272727e-05, "loss": 1.3403, "step": 92 }, { "epoch": 0.13, "learning_rate": 4.353693181818182e-05, "loss": 0.9441, "step": 94 }, { "epoch": 0.14, "learning_rate": 4.339488636363637e-05, "loss": 1.1635, "step": 96 }, { "epoch": 0.14, "learning_rate": 4.3252840909090914e-05, "loss": 1.2348, "step": 98 }, { "epoch": 0.14, "learning_rate": 4.311079545454545e-05, "loss": 1.3606, "step": 100 }, { "epoch": 0.14, "learning_rate": 4.2968750000000004e-05, "loss": 1.0958, "step": 102 }, { "epoch": 0.15, "learning_rate": 4.282670454545455e-05, "loss": 1.6382, "step": 104 }, { "epoch": 0.15, "learning_rate": 4.268465909090909e-05, "loss": 1.9738, "step": 106 }, { "epoch": 0.15, "learning_rate": 4.254261363636364e-05, "loss": 1.2341, "step": 108 }, { "epoch": 0.16, "learning_rate": 4.2400568181818185e-05, "loss": 1.3189, "step": 110 }, { "epoch": 0.16, "learning_rate": 4.225852272727273e-05, "loss": 1.3616, "step": 112 }, { "epoch": 0.16, "learning_rate": 4.2116477272727275e-05, "loss": 1.525, "step": 114 }, { "epoch": 0.16, "learning_rate": 4.197443181818182e-05, "loss": 1.4151, "step": 116 }, { "epoch": 0.17, "learning_rate": 4.1832386363636365e-05, "loss": 1.6387, "step": 118 }, { "epoch": 0.17, "learning_rate": 4.169034090909092e-05, "loss": 1.3773, "step": 120 }, { "epoch": 0.17, "learning_rate": 4.1548295454545455e-05, "loss": 1.4374, "step": 122 }, { "epoch": 0.18, "learning_rate": 4.140625e-05, "loss": 1.3446, "step": 124 }, { "epoch": 0.18, "learning_rate": 4.126420454545455e-05, "loss": 1.8676, "step": 126 }, { "epoch": 0.18, "learning_rate": 4.112215909090909e-05, "loss": 1.8264, "step": 128 }, { "epoch": 0.18, "learning_rate": 4.0980113636363635e-05, "loss": 1.0708, "step": 130 }, { "epoch": 0.19, "learning_rate": 4.083806818181819e-05, "loss": 1.4238, "step": 132 }, { "epoch": 0.19, "learning_rate": 4.069602272727273e-05, "loss": 1.6779, "step": 134 }, { "epoch": 0.19, "learning_rate": 4.055397727272727e-05, "loss": 1.5807, "step": 136 }, { "epoch": 0.2, "learning_rate": 4.041193181818182e-05, "loss": 1.1561, "step": 138 }, { "epoch": 0.2, "learning_rate": 4.026988636363637e-05, "loss": 1.6651, "step": 140 }, { "epoch": 0.2, "learning_rate": 4.012784090909091e-05, "loss": 1.9554, "step": 142 }, { "epoch": 0.2, "learning_rate": 3.998579545454546e-05, "loss": 1.27, "step": 144 }, { "epoch": 0.21, "learning_rate": 3.984375e-05, "loss": 1.9976, "step": 146 }, { "epoch": 0.21, "learning_rate": 3.970170454545455e-05, "loss": 1.0606, "step": 148 }, { "epoch": 0.21, "learning_rate": 3.955965909090909e-05, "loss": 1.2185, "step": 150 }, { "epoch": 0.22, "learning_rate": 3.941761363636364e-05, "loss": 1.6675, "step": 152 }, { "epoch": 0.22, "learning_rate": 3.927556818181818e-05, "loss": 1.0349, "step": 154 }, { "epoch": 0.22, "learning_rate": 3.913352272727273e-05, "loss": 0.9453, "step": 156 }, { "epoch": 0.22, "learning_rate": 3.899147727272727e-05, "loss": 1.0831, "step": 158 }, { "epoch": 0.23, "learning_rate": 3.884943181818182e-05, "loss": 2.1055, "step": 160 }, { "epoch": 0.23, "learning_rate": 3.8707386363636364e-05, "loss": 1.5656, "step": 162 }, { "epoch": 0.23, "learning_rate": 3.8565340909090915e-05, "loss": 1.0042, "step": 164 }, { "epoch": 0.24, "learning_rate": 3.8423295454545454e-05, "loss": 1.8098, "step": 166 }, { "epoch": 0.24, "learning_rate": 3.828125e-05, "loss": 1.4084, "step": 168 }, { "epoch": 0.24, "learning_rate": 3.813920454545455e-05, "loss": 1.5181, "step": 170 }, { "epoch": 0.24, "learning_rate": 3.799715909090909e-05, "loss": 1.334, "step": 172 }, { "epoch": 0.25, "learning_rate": 3.7855113636363634e-05, "loss": 1.1288, "step": 174 }, { "epoch": 0.25, "learning_rate": 3.7713068181818186e-05, "loss": 0.9293, "step": 176 }, { "epoch": 0.25, "learning_rate": 3.757102272727273e-05, "loss": 1.1451, "step": 178 }, { "epoch": 0.26, "learning_rate": 3.742897727272727e-05, "loss": 1.1264, "step": 180 }, { "epoch": 0.26, "learning_rate": 3.728693181818182e-05, "loss": 2.3384, "step": 182 }, { "epoch": 0.26, "learning_rate": 3.7144886363636366e-05, "loss": 0.8733, "step": 184 }, { "epoch": 0.26, "learning_rate": 3.700284090909091e-05, "loss": 1.4847, "step": 186 }, { "epoch": 0.27, "learning_rate": 3.6860795454545456e-05, "loss": 1.8384, "step": 188 }, { "epoch": 0.27, "learning_rate": 3.671875e-05, "loss": 0.973, "step": 190 }, { "epoch": 0.27, "learning_rate": 3.6576704545454547e-05, "loss": 1.5602, "step": 192 }, { "epoch": 0.28, "learning_rate": 3.643465909090909e-05, "loss": 1.459, "step": 194 }, { "epoch": 0.28, "learning_rate": 3.629261363636364e-05, "loss": 1.6167, "step": 196 }, { "epoch": 0.28, "learning_rate": 3.615056818181818e-05, "loss": 0.7387, "step": 198 }, { "epoch": 0.28, "learning_rate": 3.6008522727272734e-05, "loss": 1.1738, "step": 200 }, { "epoch": 0.28, "eval_cer": 0.22616305849838783, "eval_loss": 1.2406114339828491, "eval_runtime": 381.0451, "eval_samples_per_second": 3.698, "eval_steps_per_second": 0.465, "step": 200 }, { "epoch": 0.29, "learning_rate": 3.586647727272727e-05, "loss": 1.1026, "step": 202 }, { "epoch": 0.29, "learning_rate": 3.572443181818182e-05, "loss": 1.2022, "step": 204 }, { "epoch": 0.29, "learning_rate": 3.558238636363637e-05, "loss": 1.2101, "step": 206 }, { "epoch": 0.3, "learning_rate": 3.5440340909090914e-05, "loss": 0.9208, "step": 208 }, { "epoch": 0.3, "learning_rate": 3.529829545454545e-05, "loss": 1.1579, "step": 210 }, { "epoch": 0.3, "learning_rate": 3.5156250000000004e-05, "loss": 0.8407, "step": 212 }, { "epoch": 0.3, "learning_rate": 3.501420454545455e-05, "loss": 1.3193, "step": 214 }, { "epoch": 0.31, "learning_rate": 3.487215909090909e-05, "loss": 1.7611, "step": 216 }, { "epoch": 0.31, "learning_rate": 3.473011363636364e-05, "loss": 0.8605, "step": 218 }, { "epoch": 0.31, "learning_rate": 3.4588068181818184e-05, "loss": 1.2032, "step": 220 }, { "epoch": 0.32, "learning_rate": 3.444602272727273e-05, "loss": 1.5094, "step": 222 }, { "epoch": 0.32, "learning_rate": 3.4303977272727275e-05, "loss": 0.8793, "step": 224 }, { "epoch": 0.32, "learning_rate": 3.416193181818182e-05, "loss": 1.4906, "step": 226 }, { "epoch": 0.32, "learning_rate": 3.4019886363636365e-05, "loss": 1.5084, "step": 228 }, { "epoch": 0.33, "learning_rate": 3.387784090909091e-05, "loss": 1.0827, "step": 230 }, { "epoch": 0.33, "learning_rate": 3.3735795454545455e-05, "loss": 1.9808, "step": 232 }, { "epoch": 0.33, "learning_rate": 3.359375e-05, "loss": 1.022, "step": 234 }, { "epoch": 0.34, "learning_rate": 3.345170454545455e-05, "loss": 0.9199, "step": 236 }, { "epoch": 0.34, "learning_rate": 3.330965909090909e-05, "loss": 1.3656, "step": 238 }, { "epoch": 0.34, "learning_rate": 3.3167613636363635e-05, "loss": 1.1733, "step": 240 }, { "epoch": 0.34, "learning_rate": 3.302556818181819e-05, "loss": 1.098, "step": 242 }, { "epoch": 0.35, "learning_rate": 3.288352272727273e-05, "loss": 1.5639, "step": 244 }, { "epoch": 0.35, "learning_rate": 3.274147727272727e-05, "loss": 1.2712, "step": 246 }, { "epoch": 0.35, "learning_rate": 3.259943181818182e-05, "loss": 1.0763, "step": 248 }, { "epoch": 0.36, "learning_rate": 3.245738636363637e-05, "loss": 0.7826, "step": 250 }, { "epoch": 0.36, "learning_rate": 3.231534090909091e-05, "loss": 1.7579, "step": 252 }, { "epoch": 0.36, "learning_rate": 3.217329545454546e-05, "loss": 1.1492, "step": 254 }, { "epoch": 0.36, "learning_rate": 3.203125e-05, "loss": 0.8802, "step": 256 }, { "epoch": 0.37, "learning_rate": 3.188920454545455e-05, "loss": 0.9283, "step": 258 }, { "epoch": 0.37, "learning_rate": 3.174715909090909e-05, "loss": 1.4768, "step": 260 }, { "epoch": 0.37, "learning_rate": 3.160511363636364e-05, "loss": 1.2097, "step": 262 }, { "epoch": 0.38, "learning_rate": 3.146306818181818e-05, "loss": 1.083, "step": 264 }, { "epoch": 0.38, "learning_rate": 3.132102272727273e-05, "loss": 1.5882, "step": 266 }, { "epoch": 0.38, "learning_rate": 3.117897727272727e-05, "loss": 1.3427, "step": 268 }, { "epoch": 0.38, "learning_rate": 3.103693181818182e-05, "loss": 1.1064, "step": 270 }, { "epoch": 0.39, "learning_rate": 3.089488636363636e-05, "loss": 1.3874, "step": 272 }, { "epoch": 0.39, "learning_rate": 3.0752840909090915e-05, "loss": 2.1061, "step": 274 }, { "epoch": 0.39, "learning_rate": 3.0610795454545454e-05, "loss": 0.8418, "step": 276 }, { "epoch": 0.39, "learning_rate": 3.0468750000000002e-05, "loss": 0.897, "step": 278 }, { "epoch": 0.4, "learning_rate": 3.0326704545454547e-05, "loss": 1.3175, "step": 280 }, { "epoch": 0.4, "learning_rate": 3.018465909090909e-05, "loss": 0.7876, "step": 282 }, { "epoch": 0.4, "learning_rate": 3.0042613636363637e-05, "loss": 1.4139, "step": 284 }, { "epoch": 0.41, "learning_rate": 2.9900568181818182e-05, "loss": 0.8398, "step": 286 }, { "epoch": 0.41, "learning_rate": 2.975852272727273e-05, "loss": 0.7938, "step": 288 }, { "epoch": 0.41, "learning_rate": 2.9616477272727272e-05, "loss": 0.9088, "step": 290 }, { "epoch": 0.41, "learning_rate": 2.9474431818181818e-05, "loss": 1.0675, "step": 292 }, { "epoch": 0.42, "learning_rate": 2.9332386363636366e-05, "loss": 1.2702, "step": 294 }, { "epoch": 0.42, "learning_rate": 2.9190340909090915e-05, "loss": 1.0515, "step": 296 }, { "epoch": 0.42, "learning_rate": 2.9048295454545453e-05, "loss": 0.9444, "step": 298 }, { "epoch": 0.43, "learning_rate": 2.890625e-05, "loss": 1.4437, "step": 300 }, { "epoch": 0.43, "learning_rate": 2.876420454545455e-05, "loss": 0.8352, "step": 302 }, { "epoch": 0.43, "learning_rate": 2.862215909090909e-05, "loss": 0.9627, "step": 304 }, { "epoch": 0.43, "learning_rate": 2.8480113636363637e-05, "loss": 0.964, "step": 306 }, { "epoch": 0.44, "learning_rate": 2.8338068181818185e-05, "loss": 1.0729, "step": 308 }, { "epoch": 0.44, "learning_rate": 2.819602272727273e-05, "loss": 1.094, "step": 310 }, { "epoch": 0.44, "learning_rate": 2.8053977272727272e-05, "loss": 1.3392, "step": 312 }, { "epoch": 0.45, "learning_rate": 2.791193181818182e-05, "loss": 0.9211, "step": 314 }, { "epoch": 0.45, "learning_rate": 2.7769886363636365e-05, "loss": 1.0152, "step": 316 }, { "epoch": 0.45, "learning_rate": 2.7627840909090914e-05, "loss": 1.3656, "step": 318 }, { "epoch": 0.45, "learning_rate": 2.7485795454545455e-05, "loss": 1.6998, "step": 320 }, { "epoch": 0.46, "learning_rate": 2.734375e-05, "loss": 0.9577, "step": 322 }, { "epoch": 0.46, "learning_rate": 2.720170454545455e-05, "loss": 1.7129, "step": 324 }, { "epoch": 0.46, "learning_rate": 2.705965909090909e-05, "loss": 1.3892, "step": 326 }, { "epoch": 0.47, "learning_rate": 2.6917613636363636e-05, "loss": 0.8672, "step": 328 }, { "epoch": 0.47, "learning_rate": 2.6775568181818184e-05, "loss": 0.7077, "step": 330 }, { "epoch": 0.47, "learning_rate": 2.663352272727273e-05, "loss": 1.4751, "step": 332 }, { "epoch": 0.47, "learning_rate": 2.649147727272727e-05, "loss": 1.221, "step": 334 }, { "epoch": 0.48, "learning_rate": 2.634943181818182e-05, "loss": 1.01, "step": 336 }, { "epoch": 0.48, "learning_rate": 2.6207386363636365e-05, "loss": 1.0246, "step": 338 }, { "epoch": 0.48, "learning_rate": 2.6065340909090913e-05, "loss": 0.9274, "step": 340 }, { "epoch": 0.49, "learning_rate": 2.5923295454545455e-05, "loss": 1.5181, "step": 342 }, { "epoch": 0.49, "learning_rate": 2.578125e-05, "loss": 0.6105, "step": 344 }, { "epoch": 0.49, "learning_rate": 2.563920454545455e-05, "loss": 0.8329, "step": 346 }, { "epoch": 0.49, "learning_rate": 2.549715909090909e-05, "loss": 0.707, "step": 348 }, { "epoch": 0.5, "learning_rate": 2.5355113636363635e-05, "loss": 1.2725, "step": 350 }, { "epoch": 0.5, "learning_rate": 2.5213068181818184e-05, "loss": 1.7603, "step": 352 }, { "epoch": 0.5, "learning_rate": 2.5071022727272732e-05, "loss": 0.8998, "step": 354 }, { "epoch": 0.51, "learning_rate": 2.4928977272727274e-05, "loss": 0.8211, "step": 356 }, { "epoch": 0.51, "learning_rate": 2.478693181818182e-05, "loss": 1.2931, "step": 358 }, { "epoch": 0.51, "learning_rate": 2.471590909090909e-05, "loss": 1.1298, "step": 360 }, { "epoch": 0.51, "learning_rate": 2.4573863636363636e-05, "loss": 1.799, "step": 362 }, { "epoch": 0.52, "learning_rate": 2.4431818181818185e-05, "loss": 0.9355, "step": 364 }, { "epoch": 0.52, "learning_rate": 2.4289772727272727e-05, "loss": 0.9555, "step": 366 }, { "epoch": 0.52, "learning_rate": 2.4147727272727275e-05, "loss": 0.5778, "step": 368 }, { "epoch": 0.53, "learning_rate": 2.400568181818182e-05, "loss": 0.9761, "step": 370 }, { "epoch": 0.53, "learning_rate": 2.3863636363636365e-05, "loss": 0.9289, "step": 372 }, { "epoch": 0.53, "learning_rate": 2.372159090909091e-05, "loss": 0.7408, "step": 374 }, { "epoch": 0.53, "learning_rate": 2.3579545454545455e-05, "loss": 0.6301, "step": 376 }, { "epoch": 0.54, "learning_rate": 2.34375e-05, "loss": 0.9952, "step": 378 }, { "epoch": 0.54, "learning_rate": 2.3295454545454546e-05, "loss": 0.8253, "step": 380 }, { "epoch": 0.54, "learning_rate": 2.315340909090909e-05, "loss": 0.6469, "step": 382 }, { "epoch": 0.55, "learning_rate": 2.3011363636363636e-05, "loss": 1.0202, "step": 384 }, { "epoch": 0.55, "learning_rate": 2.2869318181818184e-05, "loss": 0.9247, "step": 386 }, { "epoch": 0.55, "learning_rate": 2.272727272727273e-05, "loss": 0.6159, "step": 388 }, { "epoch": 0.55, "learning_rate": 2.2585227272727274e-05, "loss": 0.6967, "step": 390 }, { "epoch": 0.56, "learning_rate": 2.244318181818182e-05, "loss": 0.6891, "step": 392 }, { "epoch": 0.56, "learning_rate": 2.2301136363636365e-05, "loss": 0.8713, "step": 394 }, { "epoch": 0.56, "learning_rate": 2.215909090909091e-05, "loss": 0.7181, "step": 396 }, { "epoch": 0.57, "learning_rate": 2.2017045454545458e-05, "loss": 0.9892, "step": 398 }, { "epoch": 0.57, "learning_rate": 2.1875e-05, "loss": 0.9181, "step": 400 }, { "epoch": 0.57, "eval_cer": 0.10839858744050361, "eval_loss": 0.8294418454170227, "eval_runtime": 369.2935, "eval_samples_per_second": 3.815, "eval_steps_per_second": 0.479, "step": 400 }, { "epoch": 0.57, "learning_rate": 2.1732954545454545e-05, "loss": 0.8469, "step": 402 }, { "epoch": 0.57, "learning_rate": 2.1590909090909093e-05, "loss": 1.125, "step": 404 }, { "epoch": 0.58, "learning_rate": 2.1448863636363635e-05, "loss": 0.65, "step": 406 }, { "epoch": 0.58, "learning_rate": 2.1306818181818183e-05, "loss": 1.1301, "step": 408 }, { "epoch": 0.58, "learning_rate": 2.116477272727273e-05, "loss": 0.7355, "step": 410 }, { "epoch": 0.59, "learning_rate": 2.1022727272727274e-05, "loss": 0.8691, "step": 412 }, { "epoch": 0.59, "learning_rate": 2.088068181818182e-05, "loss": 1.2798, "step": 414 }, { "epoch": 0.59, "learning_rate": 2.0738636363636367e-05, "loss": 0.9451, "step": 416 }, { "epoch": 0.59, "learning_rate": 2.059659090909091e-05, "loss": 0.8104, "step": 418 }, { "epoch": 0.6, "learning_rate": 2.0454545454545457e-05, "loss": 0.9429, "step": 420 }, { "epoch": 0.6, "learning_rate": 2.0312500000000002e-05, "loss": 1.0933, "step": 422 }, { "epoch": 0.6, "learning_rate": 2.0170454545454544e-05, "loss": 0.8391, "step": 424 }, { "epoch": 0.61, "learning_rate": 2.0028409090909093e-05, "loss": 0.5393, "step": 426 }, { "epoch": 0.61, "learning_rate": 1.9886363636363638e-05, "loss": 0.7289, "step": 428 }, { "epoch": 0.61, "learning_rate": 1.9744318181818183e-05, "loss": 1.4527, "step": 430 }, { "epoch": 0.61, "learning_rate": 1.9602272727272728e-05, "loss": 0.6219, "step": 432 }, { "epoch": 0.62, "learning_rate": 1.9460227272727273e-05, "loss": 0.6851, "step": 434 }, { "epoch": 0.62, "learning_rate": 1.9318181818181818e-05, "loss": 0.6631, "step": 436 }, { "epoch": 0.62, "learning_rate": 1.9176136363636366e-05, "loss": 0.947, "step": 438 }, { "epoch": 0.62, "learning_rate": 1.9034090909090908e-05, "loss": 0.5946, "step": 440 }, { "epoch": 0.63, "learning_rate": 1.8892045454545457e-05, "loss": 0.8884, "step": 442 }, { "epoch": 0.63, "learning_rate": 1.8750000000000002e-05, "loss": 0.8167, "step": 444 }, { "epoch": 0.63, "learning_rate": 1.8607954545454543e-05, "loss": 0.5471, "step": 446 }, { "epoch": 0.64, "learning_rate": 1.8465909090909092e-05, "loss": 0.6034, "step": 448 }, { "epoch": 0.64, "learning_rate": 1.8323863636363637e-05, "loss": 0.604, "step": 450 }, { "epoch": 0.64, "learning_rate": 1.8181818181818182e-05, "loss": 0.6084, "step": 452 }, { "epoch": 0.64, "learning_rate": 1.8039772727272727e-05, "loss": 0.6083, "step": 454 }, { "epoch": 0.65, "learning_rate": 1.7897727272727276e-05, "loss": 0.6998, "step": 456 }, { "epoch": 0.65, "learning_rate": 1.7755681818181817e-05, "loss": 0.9948, "step": 458 }, { "epoch": 0.65, "learning_rate": 1.7613636363636366e-05, "loss": 0.7974, "step": 460 }, { "epoch": 0.66, "learning_rate": 1.747159090909091e-05, "loss": 1.1083, "step": 462 }, { "epoch": 0.66, "learning_rate": 1.7329545454545456e-05, "loss": 1.481, "step": 464 }, { "epoch": 0.66, "learning_rate": 1.71875e-05, "loss": 1.5551, "step": 466 }, { "epoch": 0.66, "learning_rate": 1.7045454545454546e-05, "loss": 0.747, "step": 468 }, { "epoch": 0.67, "learning_rate": 1.690340909090909e-05, "loss": 0.7262, "step": 470 }, { "epoch": 0.67, "learning_rate": 1.6761363636363636e-05, "loss": 0.7682, "step": 472 }, { "epoch": 0.67, "learning_rate": 1.6619318181818185e-05, "loss": 0.8529, "step": 474 }, { "epoch": 0.68, "learning_rate": 1.6477272727272726e-05, "loss": 0.7041, "step": 476 }, { "epoch": 0.68, "learning_rate": 1.6335227272727275e-05, "loss": 0.5702, "step": 478 }, { "epoch": 0.68, "learning_rate": 1.619318181818182e-05, "loss": 1.1103, "step": 480 }, { "epoch": 0.68, "learning_rate": 1.6051136363636365e-05, "loss": 0.7418, "step": 482 }, { "epoch": 0.69, "learning_rate": 1.590909090909091e-05, "loss": 1.1425, "step": 484 }, { "epoch": 0.69, "learning_rate": 1.5767045454545455e-05, "loss": 0.7602, "step": 486 }, { "epoch": 0.69, "learning_rate": 1.5625e-05, "loss": 0.6746, "step": 488 }, { "epoch": 0.7, "learning_rate": 1.5482954545454545e-05, "loss": 0.591, "step": 490 }, { "epoch": 0.7, "learning_rate": 1.534090909090909e-05, "loss": 0.6343, "step": 492 }, { "epoch": 0.7, "learning_rate": 1.5198863636363636e-05, "loss": 0.7024, "step": 494 }, { "epoch": 0.7, "learning_rate": 1.5056818181818182e-05, "loss": 0.8327, "step": 496 }, { "epoch": 0.71, "learning_rate": 1.4914772727272727e-05, "loss": 0.6407, "step": 498 }, { "epoch": 0.71, "learning_rate": 1.4772727272727274e-05, "loss": 0.5521, "step": 500 }, { "epoch": 0.71, "learning_rate": 1.4630681818181818e-05, "loss": 0.6699, "step": 502 }, { "epoch": 0.72, "learning_rate": 1.4488636363636366e-05, "loss": 0.6511, "step": 504 }, { "epoch": 0.72, "learning_rate": 1.434659090909091e-05, "loss": 0.5944, "step": 506 }, { "epoch": 0.72, "learning_rate": 1.4204545454545456e-05, "loss": 0.707, "step": 508 }, { "epoch": 0.72, "learning_rate": 1.4062500000000001e-05, "loss": 0.8783, "step": 510 }, { "epoch": 0.73, "learning_rate": 1.3920454545454545e-05, "loss": 0.6688, "step": 512 }, { "epoch": 0.73, "learning_rate": 1.3778409090909091e-05, "loss": 0.672, "step": 514 }, { "epoch": 0.73, "learning_rate": 1.3636363636363637e-05, "loss": 0.8398, "step": 516 }, { "epoch": 0.74, "learning_rate": 1.3494318181818183e-05, "loss": 0.6358, "step": 518 }, { "epoch": 0.74, "learning_rate": 1.3352272727272727e-05, "loss": 0.8281, "step": 520 }, { "epoch": 0.74, "learning_rate": 1.3210227272727273e-05, "loss": 0.576, "step": 522 }, { "epoch": 0.74, "learning_rate": 1.3068181818181819e-05, "loss": 0.7594, "step": 524 }, { "epoch": 0.75, "learning_rate": 1.2926136363636365e-05, "loss": 0.4573, "step": 526 }, { "epoch": 0.75, "learning_rate": 1.2784090909090909e-05, "loss": 0.6455, "step": 528 }, { "epoch": 0.75, "learning_rate": 1.2642045454545457e-05, "loss": 0.5414, "step": 530 }, { "epoch": 0.76, "learning_rate": 1.25e-05, "loss": 0.6023, "step": 532 }, { "epoch": 0.76, "learning_rate": 1.2357954545454546e-05, "loss": 0.5274, "step": 534 }, { "epoch": 0.76, "learning_rate": 1.2215909090909092e-05, "loss": 0.5246, "step": 536 }, { "epoch": 0.76, "learning_rate": 1.2073863636363638e-05, "loss": 0.54, "step": 538 }, { "epoch": 0.77, "learning_rate": 1.1931818181818183e-05, "loss": 0.6186, "step": 540 }, { "epoch": 0.77, "learning_rate": 1.1789772727272728e-05, "loss": 0.641, "step": 542 }, { "epoch": 0.77, "learning_rate": 1.1647727272727273e-05, "loss": 0.589, "step": 544 }, { "epoch": 0.78, "learning_rate": 1.1505681818181818e-05, "loss": 0.8244, "step": 546 }, { "epoch": 0.78, "learning_rate": 1.1363636363636365e-05, "loss": 0.6111, "step": 548 }, { "epoch": 0.78, "learning_rate": 1.122159090909091e-05, "loss": 1.014, "step": 550 }, { "epoch": 0.78, "learning_rate": 1.1079545454545455e-05, "loss": 0.6972, "step": 552 }, { "epoch": 0.79, "learning_rate": 1.09375e-05, "loss": 0.5662, "step": 554 }, { "epoch": 0.79, "learning_rate": 1.0795454545454547e-05, "loss": 1.0225, "step": 556 }, { "epoch": 0.79, "learning_rate": 1.0653409090909092e-05, "loss": 0.6447, "step": 558 }, { "epoch": 0.8, "learning_rate": 1.0511363636363637e-05, "loss": 0.8831, "step": 560 }, { "epoch": 0.8, "learning_rate": 1.0369318181818184e-05, "loss": 0.5089, "step": 562 }, { "epoch": 0.8, "learning_rate": 1.0227272727272729e-05, "loss": 0.88, "step": 564 }, { "epoch": 0.8, "learning_rate": 1.0085227272727272e-05, "loss": 0.5177, "step": 566 }, { "epoch": 0.81, "learning_rate": 9.943181818181819e-06, "loss": 0.4923, "step": 568 }, { "epoch": 0.81, "learning_rate": 9.801136363636364e-06, "loss": 0.6019, "step": 570 }, { "epoch": 0.81, "learning_rate": 9.659090909090909e-06, "loss": 0.5321, "step": 572 }, { "epoch": 0.82, "learning_rate": 9.517045454545454e-06, "loss": 0.6639, "step": 574 }, { "epoch": 0.82, "learning_rate": 9.375000000000001e-06, "loss": 0.5423, "step": 576 }, { "epoch": 0.82, "learning_rate": 9.232954545454546e-06, "loss": 0.5199, "step": 578 }, { "epoch": 0.82, "learning_rate": 9.090909090909091e-06, "loss": 0.5719, "step": 580 }, { "epoch": 0.83, "learning_rate": 8.948863636363638e-06, "loss": 0.5513, "step": 582 }, { "epoch": 0.83, "learning_rate": 8.806818181818183e-06, "loss": 0.8127, "step": 584 }, { "epoch": 0.83, "learning_rate": 8.664772727272728e-06, "loss": 0.7982, "step": 586 }, { "epoch": 0.84, "learning_rate": 8.522727272727273e-06, "loss": 0.4545, "step": 588 }, { "epoch": 0.84, "learning_rate": 8.380681818181818e-06, "loss": 0.452, "step": 590 }, { "epoch": 0.84, "learning_rate": 8.238636363636363e-06, "loss": 0.6663, "step": 592 }, { "epoch": 0.84, "learning_rate": 8.09659090909091e-06, "loss": 0.6574, "step": 594 }, { "epoch": 0.85, "learning_rate": 7.954545454545455e-06, "loss": 0.4983, "step": 596 }, { "epoch": 0.85, "learning_rate": 7.8125e-06, "loss": 0.5488, "step": 598 }, { "epoch": 0.85, "learning_rate": 7.670454545454545e-06, "loss": 0.6552, "step": 600 }, { "epoch": 0.85, "eval_cer": 0.05650237985567327, "eval_loss": 0.6485163569450378, "eval_runtime": 362.1859, "eval_samples_per_second": 3.89, "eval_steps_per_second": 0.489, "step": 600 }, { "epoch": 0.86, "learning_rate": 7.528409090909091e-06, "loss": 0.6316, "step": 602 }, { "epoch": 0.86, "learning_rate": 7.386363636363637e-06, "loss": 0.4354, "step": 604 }, { "epoch": 0.86, "learning_rate": 7.244318181818183e-06, "loss": 0.7282, "step": 606 }, { "epoch": 0.86, "learning_rate": 7.102272727272728e-06, "loss": 0.4656, "step": 608 }, { "epoch": 0.87, "learning_rate": 6.960227272727272e-06, "loss": 0.7131, "step": 610 }, { "epoch": 0.87, "learning_rate": 6.818181818181818e-06, "loss": 0.5702, "step": 612 }, { "epoch": 0.87, "learning_rate": 6.676136363636363e-06, "loss": 0.7321, "step": 614 }, { "epoch": 0.88, "learning_rate": 6.534090909090909e-06, "loss": 0.8692, "step": 616 }, { "epoch": 0.88, "learning_rate": 6.392045454545454e-06, "loss": 0.6667, "step": 618 }, { "epoch": 0.88, "learning_rate": 6.25e-06, "loss": 0.5151, "step": 620 }, { "epoch": 0.88, "learning_rate": 6.107954545454546e-06, "loss": 0.458, "step": 622 }, { "epoch": 0.89, "learning_rate": 5.965909090909091e-06, "loss": 0.5797, "step": 624 }, { "epoch": 0.89, "learning_rate": 5.823863636363636e-06, "loss": 0.7189, "step": 626 }, { "epoch": 0.89, "learning_rate": 5.681818181818182e-06, "loss": 0.6369, "step": 628 }, { "epoch": 0.89, "learning_rate": 5.539772727272727e-06, "loss": 1.0862, "step": 630 }, { "epoch": 0.9, "learning_rate": 5.397727272727273e-06, "loss": 0.5919, "step": 632 }, { "epoch": 0.9, "learning_rate": 5.255681818181818e-06, "loss": 0.4427, "step": 634 }, { "epoch": 0.9, "learning_rate": 5.113636363636364e-06, "loss": 0.5736, "step": 636 }, { "epoch": 0.91, "learning_rate": 4.9715909090909094e-06, "loss": 1.3038, "step": 638 }, { "epoch": 0.91, "learning_rate": 4.8295454545454545e-06, "loss": 0.5233, "step": 640 }, { "epoch": 0.91, "learning_rate": 4.6875000000000004e-06, "loss": 0.5587, "step": 642 }, { "epoch": 0.91, "learning_rate": 4.5454545454545455e-06, "loss": 0.5579, "step": 644 }, { "epoch": 0.92, "learning_rate": 4.4034090909090914e-06, "loss": 0.501, "step": 646 }, { "epoch": 0.92, "learning_rate": 4.2613636363636365e-06, "loss": 0.5076, "step": 648 }, { "epoch": 0.92, "learning_rate": 4.119318181818182e-06, "loss": 0.6053, "step": 650 }, { "epoch": 0.93, "learning_rate": 3.9772727272727275e-06, "loss": 0.6975, "step": 652 }, { "epoch": 0.93, "learning_rate": 3.835227272727273e-06, "loss": 0.6589, "step": 654 }, { "epoch": 0.93, "learning_rate": 3.6931818181818186e-06, "loss": 0.4327, "step": 656 }, { "epoch": 0.93, "learning_rate": 3.551136363636364e-06, "loss": 0.9146, "step": 658 }, { "epoch": 0.94, "learning_rate": 3.409090909090909e-06, "loss": 0.4549, "step": 660 }, { "epoch": 0.94, "learning_rate": 3.2670454545454546e-06, "loss": 0.3903, "step": 662 }, { "epoch": 0.94, "learning_rate": 3.125e-06, "loss": 0.5719, "step": 664 }, { "epoch": 0.95, "learning_rate": 2.9829545454545457e-06, "loss": 0.4896, "step": 666 }, { "epoch": 0.95, "learning_rate": 2.840909090909091e-06, "loss": 0.6437, "step": 668 }, { "epoch": 0.95, "learning_rate": 2.6988636363636367e-06, "loss": 0.4993, "step": 670 }, { "epoch": 0.95, "learning_rate": 2.556818181818182e-06, "loss": 0.6381, "step": 672 }, { "epoch": 0.96, "learning_rate": 2.4147727272727273e-06, "loss": 0.7237, "step": 674 }, { "epoch": 0.96, "learning_rate": 2.2727272727272728e-06, "loss": 0.7229, "step": 676 }, { "epoch": 0.96, "learning_rate": 2.1306818181818183e-06, "loss": 0.4903, "step": 678 }, { "epoch": 0.97, "learning_rate": 1.9886363636363638e-06, "loss": 0.602, "step": 680 }, { "epoch": 0.97, "learning_rate": 1.8465909090909093e-06, "loss": 0.5082, "step": 682 }, { "epoch": 0.97, "learning_rate": 1.7045454545454546e-06, "loss": 0.5288, "step": 684 }, { "epoch": 0.97, "learning_rate": 1.5625e-06, "loss": 0.7898, "step": 686 }, { "epoch": 0.98, "learning_rate": 1.4204545454545456e-06, "loss": 0.5447, "step": 688 }, { "epoch": 0.98, "learning_rate": 1.278409090909091e-06, "loss": 0.6344, "step": 690 }, { "epoch": 0.98, "learning_rate": 1.1363636363636364e-06, "loss": 0.9671, "step": 692 }, { "epoch": 0.99, "learning_rate": 9.943181818181819e-07, "loss": 0.6259, "step": 694 }, { "epoch": 0.99, "learning_rate": 8.522727272727273e-07, "loss": 0.492, "step": 696 }, { "epoch": 0.99, "learning_rate": 7.102272727272728e-07, "loss": 0.5994, "step": 698 }, { "epoch": 0.99, "learning_rate": 5.681818181818182e-07, "loss": 0.4469, "step": 700 }, { "epoch": 1.0, "learning_rate": 4.2613636363636364e-07, "loss": 0.5537, "step": 702 }, { "epoch": 1.0, "learning_rate": 2.840909090909091e-07, "loss": 0.5373, "step": 704 }, { "epoch": 1.0, "step": 704, "total_flos": 4.983731178121986e+18, "train_loss": 1.219155483286489, "train_runtime": 1812.2679, "train_samples_per_second": 3.108, "train_steps_per_second": 0.388 } ], "max_steps": 704, "num_train_epochs": 1, "total_flos": 4.983731178121986e+18, "trial_name": null, "trial_params": null }