| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "global_step": 704, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 11.881, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.985795454545455e-05, | |
| "loss": 11.0362, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.971590909090909e-05, | |
| "loss": 4.7507, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9644886363636365e-05, | |
| "loss": 5.152, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.950284090909092e-05, | |
| "loss": 4.4355, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9360795454545455e-05, | |
| "loss": 4.1403, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.921875e-05, | |
| "loss": 3.8628, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.907670454545455e-05, | |
| "loss": 3.3533, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.893465909090909e-05, | |
| "loss": 3.6164, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8792613636363636e-05, | |
| "loss": 4.0321, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.865056818181819e-05, | |
| "loss": 3.5433, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.850852272727273e-05, | |
| "loss": 4.0187, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.836647727272727e-05, | |
| "loss": 3.601, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.822443181818182e-05, | |
| "loss": 3.4055, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.808238636363637e-05, | |
| "loss": 3.4381, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.794034090909091e-05, | |
| "loss": 3.3023, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.779829545454546e-05, | |
| "loss": 3.7605, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.765625e-05, | |
| "loss": 2.0996, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.751420454545455e-05, | |
| "loss": 2.5817, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.737215909090909e-05, | |
| "loss": 2.0194, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.723011363636364e-05, | |
| "loss": 2.2909, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.708806818181818e-05, | |
| "loss": 1.8493, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.694602272727273e-05, | |
| "loss": 2.2346, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6803977272727274e-05, | |
| "loss": 2.3284, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.666193181818182e-05, | |
| "loss": 2.1243, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6519886363636364e-05, | |
| "loss": 2.0376, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6377840909090916e-05, | |
| "loss": 4.0457, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6235795454545454e-05, | |
| "loss": 2.4413, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.609375e-05, | |
| "loss": 1.7908, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.595170454545455e-05, | |
| "loss": 2.1725, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.580965909090909e-05, | |
| "loss": 1.2263, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5667613636363634e-05, | |
| "loss": 2.2057, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5525568181818186e-05, | |
| "loss": 1.4206, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.538352272727273e-05, | |
| "loss": 1.6346, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5241477272727276e-05, | |
| "loss": 1.6167, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.509943181818182e-05, | |
| "loss": 1.3222, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4957386363636366e-05, | |
| "loss": 1.1639, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.481534090909091e-05, | |
| "loss": 2.0723, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4673295454545457e-05, | |
| "loss": 1.707, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.453125e-05, | |
| "loss": 1.8604, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.438920454545455e-05, | |
| "loss": 1.9273, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.424715909090909e-05, | |
| "loss": 1.8075, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.410511363636364e-05, | |
| "loss": 1.4449, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.396306818181818e-05, | |
| "loss": 1.0887, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3821022727272734e-05, | |
| "loss": 1.4518, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.367897727272727e-05, | |
| "loss": 1.3403, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.353693181818182e-05, | |
| "loss": 0.9441, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.339488636363637e-05, | |
| "loss": 1.1635, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.3252840909090914e-05, | |
| "loss": 1.2348, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.311079545454545e-05, | |
| "loss": 1.3606, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.2968750000000004e-05, | |
| "loss": 1.0958, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.282670454545455e-05, | |
| "loss": 1.6382, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.268465909090909e-05, | |
| "loss": 1.9738, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.254261363636364e-05, | |
| "loss": 1.2341, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.2400568181818185e-05, | |
| "loss": 1.3189, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.225852272727273e-05, | |
| "loss": 1.3616, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.2116477272727275e-05, | |
| "loss": 1.525, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.197443181818182e-05, | |
| "loss": 1.4151, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1832386363636365e-05, | |
| "loss": 1.6387, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.169034090909092e-05, | |
| "loss": 1.3773, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1548295454545455e-05, | |
| "loss": 1.4374, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.140625e-05, | |
| "loss": 1.3446, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.126420454545455e-05, | |
| "loss": 1.8676, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.112215909090909e-05, | |
| "loss": 1.8264, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.0980113636363635e-05, | |
| "loss": 1.0708, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.083806818181819e-05, | |
| "loss": 1.4238, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.069602272727273e-05, | |
| "loss": 1.6779, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.055397727272727e-05, | |
| "loss": 1.5807, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.041193181818182e-05, | |
| "loss": 1.1561, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.026988636363637e-05, | |
| "loss": 1.6651, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.012784090909091e-05, | |
| "loss": 1.9554, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.998579545454546e-05, | |
| "loss": 1.27, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.984375e-05, | |
| "loss": 1.9976, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.970170454545455e-05, | |
| "loss": 1.0606, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.955965909090909e-05, | |
| "loss": 1.2185, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.941761363636364e-05, | |
| "loss": 1.6675, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.927556818181818e-05, | |
| "loss": 1.0349, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.913352272727273e-05, | |
| "loss": 0.9453, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.899147727272727e-05, | |
| "loss": 1.0831, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.884943181818182e-05, | |
| "loss": 2.1055, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8707386363636364e-05, | |
| "loss": 1.5656, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8565340909090915e-05, | |
| "loss": 1.0042, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8423295454545454e-05, | |
| "loss": 1.8098, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.828125e-05, | |
| "loss": 1.4084, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.813920454545455e-05, | |
| "loss": 1.5181, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.799715909090909e-05, | |
| "loss": 1.334, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7855113636363634e-05, | |
| "loss": 1.1288, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7713068181818186e-05, | |
| "loss": 0.9293, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.757102272727273e-05, | |
| "loss": 1.1451, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.742897727272727e-05, | |
| "loss": 1.1264, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.728693181818182e-05, | |
| "loss": 2.3384, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7144886363636366e-05, | |
| "loss": 0.8733, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.700284090909091e-05, | |
| "loss": 1.4847, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6860795454545456e-05, | |
| "loss": 1.8384, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.671875e-05, | |
| "loss": 0.973, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6576704545454547e-05, | |
| "loss": 1.5602, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.643465909090909e-05, | |
| "loss": 1.459, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.629261363636364e-05, | |
| "loss": 1.6167, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.615056818181818e-05, | |
| "loss": 0.7387, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6008522727272734e-05, | |
| "loss": 1.1738, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_cer": 0.22616305849838783, | |
| "eval_loss": 1.2406114339828491, | |
| "eval_runtime": 381.0451, | |
| "eval_samples_per_second": 3.698, | |
| "eval_steps_per_second": 0.465, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.586647727272727e-05, | |
| "loss": 1.1026, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.572443181818182e-05, | |
| "loss": 1.2022, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.558238636363637e-05, | |
| "loss": 1.2101, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5440340909090914e-05, | |
| "loss": 0.9208, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.529829545454545e-05, | |
| "loss": 1.1579, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5156250000000004e-05, | |
| "loss": 0.8407, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.501420454545455e-05, | |
| "loss": 1.3193, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.487215909090909e-05, | |
| "loss": 1.7611, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.473011363636364e-05, | |
| "loss": 0.8605, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4588068181818184e-05, | |
| "loss": 1.2032, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.444602272727273e-05, | |
| "loss": 1.5094, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.4303977272727275e-05, | |
| "loss": 0.8793, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.416193181818182e-05, | |
| "loss": 1.4906, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.4019886363636365e-05, | |
| "loss": 1.5084, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.387784090909091e-05, | |
| "loss": 1.0827, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3735795454545455e-05, | |
| "loss": 1.9808, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.359375e-05, | |
| "loss": 1.022, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.345170454545455e-05, | |
| "loss": 0.9199, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.330965909090909e-05, | |
| "loss": 1.3656, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.3167613636363635e-05, | |
| "loss": 1.1733, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.302556818181819e-05, | |
| "loss": 1.098, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.288352272727273e-05, | |
| "loss": 1.5639, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.274147727272727e-05, | |
| "loss": 1.2712, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.259943181818182e-05, | |
| "loss": 1.0763, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.245738636363637e-05, | |
| "loss": 0.7826, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.231534090909091e-05, | |
| "loss": 1.7579, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.217329545454546e-05, | |
| "loss": 1.1492, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.203125e-05, | |
| "loss": 0.8802, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.188920454545455e-05, | |
| "loss": 0.9283, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.174715909090909e-05, | |
| "loss": 1.4768, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.160511363636364e-05, | |
| "loss": 1.2097, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.146306818181818e-05, | |
| "loss": 1.083, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.132102272727273e-05, | |
| "loss": 1.5882, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.117897727272727e-05, | |
| "loss": 1.3427, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.103693181818182e-05, | |
| "loss": 1.1064, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.089488636363636e-05, | |
| "loss": 1.3874, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0752840909090915e-05, | |
| "loss": 2.1061, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0610795454545454e-05, | |
| "loss": 0.8418, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0468750000000002e-05, | |
| "loss": 0.897, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0326704545454547e-05, | |
| "loss": 1.3175, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.018465909090909e-05, | |
| "loss": 0.7876, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0042613636363637e-05, | |
| "loss": 1.4139, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9900568181818182e-05, | |
| "loss": 0.8398, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.975852272727273e-05, | |
| "loss": 0.7938, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9616477272727272e-05, | |
| "loss": 0.9088, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9474431818181818e-05, | |
| "loss": 1.0675, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9332386363636366e-05, | |
| "loss": 1.2702, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9190340909090915e-05, | |
| "loss": 1.0515, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9048295454545453e-05, | |
| "loss": 0.9444, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.890625e-05, | |
| "loss": 1.4437, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.876420454545455e-05, | |
| "loss": 0.8352, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.862215909090909e-05, | |
| "loss": 0.9627, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8480113636363637e-05, | |
| "loss": 0.964, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8338068181818185e-05, | |
| "loss": 1.0729, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.819602272727273e-05, | |
| "loss": 1.094, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8053977272727272e-05, | |
| "loss": 1.3392, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.791193181818182e-05, | |
| "loss": 0.9211, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7769886363636365e-05, | |
| "loss": 1.0152, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7627840909090914e-05, | |
| "loss": 1.3656, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7485795454545455e-05, | |
| "loss": 1.6998, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.734375e-05, | |
| "loss": 0.9577, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.720170454545455e-05, | |
| "loss": 1.7129, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.705965909090909e-05, | |
| "loss": 1.3892, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6917613636363636e-05, | |
| "loss": 0.8672, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6775568181818184e-05, | |
| "loss": 0.7077, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.663352272727273e-05, | |
| "loss": 1.4751, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.649147727272727e-05, | |
| "loss": 1.221, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.634943181818182e-05, | |
| "loss": 1.01, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6207386363636365e-05, | |
| "loss": 1.0246, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6065340909090913e-05, | |
| "loss": 0.9274, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5923295454545455e-05, | |
| "loss": 1.5181, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.578125e-05, | |
| "loss": 0.6105, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.563920454545455e-05, | |
| "loss": 0.8329, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.549715909090909e-05, | |
| "loss": 0.707, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5355113636363635e-05, | |
| "loss": 1.2725, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5213068181818184e-05, | |
| "loss": 1.7603, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5071022727272732e-05, | |
| "loss": 0.8998, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4928977272727274e-05, | |
| "loss": 0.8211, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.478693181818182e-05, | |
| "loss": 1.2931, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.471590909090909e-05, | |
| "loss": 1.1298, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4573863636363636e-05, | |
| "loss": 1.799, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4431818181818185e-05, | |
| "loss": 0.9355, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4289772727272727e-05, | |
| "loss": 0.9555, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4147727272727275e-05, | |
| "loss": 0.5778, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.400568181818182e-05, | |
| "loss": 0.9761, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3863636363636365e-05, | |
| "loss": 0.9289, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.372159090909091e-05, | |
| "loss": 0.7408, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3579545454545455e-05, | |
| "loss": 0.6301, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.34375e-05, | |
| "loss": 0.9952, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3295454545454546e-05, | |
| "loss": 0.8253, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.315340909090909e-05, | |
| "loss": 0.6469, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.3011363636363636e-05, | |
| "loss": 1.0202, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2869318181818184e-05, | |
| "loss": 0.9247, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 0.6159, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2585227272727274e-05, | |
| "loss": 0.6967, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.244318181818182e-05, | |
| "loss": 0.6891, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2301136363636365e-05, | |
| "loss": 0.8713, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.215909090909091e-05, | |
| "loss": 0.7181, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.2017045454545458e-05, | |
| "loss": 0.9892, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1875e-05, | |
| "loss": 0.9181, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_cer": 0.10839858744050361, | |
| "eval_loss": 0.8294418454170227, | |
| "eval_runtime": 369.2935, | |
| "eval_samples_per_second": 3.815, | |
| "eval_steps_per_second": 0.479, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1732954545454545e-05, | |
| "loss": 0.8469, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1590909090909093e-05, | |
| "loss": 1.125, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1448863636363635e-05, | |
| "loss": 0.65, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1306818181818183e-05, | |
| "loss": 1.1301, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.116477272727273e-05, | |
| "loss": 0.7355, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.1022727272727274e-05, | |
| "loss": 0.8691, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.088068181818182e-05, | |
| "loss": 1.2798, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0738636363636367e-05, | |
| "loss": 0.9451, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.059659090909091e-05, | |
| "loss": 0.8104, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0454545454545457e-05, | |
| "loss": 0.9429, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0312500000000002e-05, | |
| "loss": 1.0933, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0170454545454544e-05, | |
| "loss": 0.8391, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.0028409090909093e-05, | |
| "loss": 0.5393, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9886363636363638e-05, | |
| "loss": 0.7289, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9744318181818183e-05, | |
| "loss": 1.4527, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9602272727272728e-05, | |
| "loss": 0.6219, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9460227272727273e-05, | |
| "loss": 0.6851, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9318181818181818e-05, | |
| "loss": 0.6631, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9176136363636366e-05, | |
| "loss": 0.947, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9034090909090908e-05, | |
| "loss": 0.5946, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8892045454545457e-05, | |
| "loss": 0.8884, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.8167, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8607954545454543e-05, | |
| "loss": 0.5471, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8465909090909092e-05, | |
| "loss": 0.6034, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8323863636363637e-05, | |
| "loss": 0.604, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.6084, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8039772727272727e-05, | |
| "loss": 0.6083, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7897727272727276e-05, | |
| "loss": 0.6998, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7755681818181817e-05, | |
| "loss": 0.9948, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7613636363636366e-05, | |
| "loss": 0.7974, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.747159090909091e-05, | |
| "loss": 1.1083, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7329545454545456e-05, | |
| "loss": 1.481, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.71875e-05, | |
| "loss": 1.5551, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7045454545454546e-05, | |
| "loss": 0.747, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.690340909090909e-05, | |
| "loss": 0.7262, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6761363636363636e-05, | |
| "loss": 0.7682, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6619318181818185e-05, | |
| "loss": 0.8529, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6477272727272726e-05, | |
| "loss": 0.7041, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6335227272727275e-05, | |
| "loss": 0.5702, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.619318181818182e-05, | |
| "loss": 1.1103, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6051136363636365e-05, | |
| "loss": 0.7418, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.590909090909091e-05, | |
| "loss": 1.1425, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5767045454545455e-05, | |
| "loss": 0.7602, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 0.6746, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5482954545454545e-05, | |
| "loss": 0.591, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.534090909090909e-05, | |
| "loss": 0.6343, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5198863636363636e-05, | |
| "loss": 0.7024, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5056818181818182e-05, | |
| "loss": 0.8327, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4914772727272727e-05, | |
| "loss": 0.6407, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4772727272727274e-05, | |
| "loss": 0.5521, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4630681818181818e-05, | |
| "loss": 0.6699, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4488636363636366e-05, | |
| "loss": 0.6511, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.434659090909091e-05, | |
| "loss": 0.5944, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4204545454545456e-05, | |
| "loss": 0.707, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4062500000000001e-05, | |
| "loss": 0.8783, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3920454545454545e-05, | |
| "loss": 0.6688, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3778409090909091e-05, | |
| "loss": 0.672, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.8398, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3494318181818183e-05, | |
| "loss": 0.6358, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3352272727272727e-05, | |
| "loss": 0.8281, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3210227272727273e-05, | |
| "loss": 0.576, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3068181818181819e-05, | |
| "loss": 0.7594, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2926136363636365e-05, | |
| "loss": 0.4573, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2784090909090909e-05, | |
| "loss": 0.6455, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2642045454545457e-05, | |
| "loss": 0.5414, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.6023, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2357954545454546e-05, | |
| "loss": 0.5274, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2215909090909092e-05, | |
| "loss": 0.5246, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2073863636363638e-05, | |
| "loss": 0.54, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1931818181818183e-05, | |
| "loss": 0.6186, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1789772727272728e-05, | |
| "loss": 0.641, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1647727272727273e-05, | |
| "loss": 0.589, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1505681818181818e-05, | |
| "loss": 0.8244, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1363636363636365e-05, | |
| "loss": 0.6111, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.122159090909091e-05, | |
| "loss": 1.014, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1079545454545455e-05, | |
| "loss": 0.6972, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.09375e-05, | |
| "loss": 0.5662, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0795454545454547e-05, | |
| "loss": 1.0225, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0653409090909092e-05, | |
| "loss": 0.6447, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0511363636363637e-05, | |
| "loss": 0.8831, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0369318181818184e-05, | |
| "loss": 0.5089, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0227272727272729e-05, | |
| "loss": 0.88, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0085227272727272e-05, | |
| "loss": 0.5177, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.943181818181819e-06, | |
| "loss": 0.4923, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.801136363636364e-06, | |
| "loss": 0.6019, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.659090909090909e-06, | |
| "loss": 0.5321, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.517045454545454e-06, | |
| "loss": 0.6639, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.5423, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.232954545454546e-06, | |
| "loss": 0.5199, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.5719, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.948863636363638e-06, | |
| "loss": 0.5513, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.806818181818183e-06, | |
| "loss": 0.8127, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.664772727272728e-06, | |
| "loss": 0.7982, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.522727272727273e-06, | |
| "loss": 0.4545, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.380681818181818e-06, | |
| "loss": 0.452, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.238636363636363e-06, | |
| "loss": 0.6663, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.09659090909091e-06, | |
| "loss": 0.6574, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.954545454545455e-06, | |
| "loss": 0.4983, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.8125e-06, | |
| "loss": 0.5488, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.670454545454545e-06, | |
| "loss": 0.6552, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_cer": 0.05650237985567327, | |
| "eval_loss": 0.6485163569450378, | |
| "eval_runtime": 362.1859, | |
| "eval_samples_per_second": 3.89, | |
| "eval_steps_per_second": 0.489, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.528409090909091e-06, | |
| "loss": 0.6316, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.386363636363637e-06, | |
| "loss": 0.4354, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.244318181818183e-06, | |
| "loss": 0.7282, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.102272727272728e-06, | |
| "loss": 0.4656, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.960227272727272e-06, | |
| "loss": 0.7131, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 0.5702, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.676136363636363e-06, | |
| "loss": 0.7321, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.534090909090909e-06, | |
| "loss": 0.8692, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.392045454545454e-06, | |
| "loss": 0.6667, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.5151, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.107954545454546e-06, | |
| "loss": 0.458, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.965909090909091e-06, | |
| "loss": 0.5797, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.823863636363636e-06, | |
| "loss": 0.7189, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.681818181818182e-06, | |
| "loss": 0.6369, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.539772727272727e-06, | |
| "loss": 1.0862, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.397727272727273e-06, | |
| "loss": 0.5919, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.255681818181818e-06, | |
| "loss": 0.4427, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.113636363636364e-06, | |
| "loss": 0.5736, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.9715909090909094e-06, | |
| "loss": 1.3038, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.8295454545454545e-06, | |
| "loss": 0.5233, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.6875000000000004e-06, | |
| "loss": 0.5587, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.5579, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.4034090909090914e-06, | |
| "loss": 0.501, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.2613636363636365e-06, | |
| "loss": 0.5076, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.119318181818182e-06, | |
| "loss": 0.6053, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.9772727272727275e-06, | |
| "loss": 0.6975, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.835227272727273e-06, | |
| "loss": 0.6589, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.6931818181818186e-06, | |
| "loss": 0.4327, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.551136363636364e-06, | |
| "loss": 0.9146, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.409090909090909e-06, | |
| "loss": 0.4549, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.2670454545454546e-06, | |
| "loss": 0.3903, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.5719, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.9829545454545457e-06, | |
| "loss": 0.4896, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.840909090909091e-06, | |
| "loss": 0.6437, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.6988636363636367e-06, | |
| "loss": 0.4993, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.556818181818182e-06, | |
| "loss": 0.6381, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.4147727272727273e-06, | |
| "loss": 0.7237, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.2727272727272728e-06, | |
| "loss": 0.7229, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.1306818181818183e-06, | |
| "loss": 0.4903, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.9886363636363638e-06, | |
| "loss": 0.602, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.8465909090909093e-06, | |
| "loss": 0.5082, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.7045454545454546e-06, | |
| "loss": 0.5288, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 0.7898, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.4204545454545456e-06, | |
| "loss": 0.5447, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.278409090909091e-06, | |
| "loss": 0.6344, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.1363636363636364e-06, | |
| "loss": 0.9671, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 9.943181818181819e-07, | |
| "loss": 0.6259, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.522727272727273e-07, | |
| "loss": 0.492, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 7.102272727272728e-07, | |
| "loss": 0.5994, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.681818181818182e-07, | |
| "loss": 0.4469, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.2613636363636364e-07, | |
| "loss": 0.5537, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.840909090909091e-07, | |
| "loss": 0.5373, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 704, | |
| "total_flos": 4.983731178121986e+18, | |
| "train_loss": 1.219155483286489, | |
| "train_runtime": 1812.2679, | |
| "train_samples_per_second": 3.108, | |
| "train_steps_per_second": 0.388 | |
| } | |
| ], | |
| "max_steps": 704, | |
| "num_train_epochs": 1, | |
| "total_flos": 4.983731178121986e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |