{ "best_global_step": 415, "best_metric": 0.2885131265635832, "best_model_checkpoint": "trocr\\checkpoint-415", "epoch": 0.603112840466926, "eval_steps": 5, "global_step": 465, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025940337224383916, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 8.3361, "step": 2 }, { "epoch": 0.005188067444876783, "grad_norm": 42.44731521606445, "learning_rate": 0.0, "loss": 9.0712, "step": 4 }, { "epoch": 0.00648508430609598, "eval_cer": 0.9740181451005214, "eval_loss": 10.673304557800293, "eval_runtime": 121.4924, "eval_samples_per_second": 21.763, "eval_steps_per_second": 2.724, "step": 5 }, { "epoch": 0.007782101167315175, "grad_norm": 50.13338088989258, "learning_rate": 1.2000000000000002e-07, "loss": 9.0523, "step": 6 }, { "epoch": 0.010376134889753566, "grad_norm": 54.91879653930664, "learning_rate": 2.4000000000000003e-07, "loss": 8.6874, "step": 8 }, { "epoch": 0.01297016861219196, "grad_norm": 57.007408142089844, "learning_rate": 3.6e-07, "loss": 9.1085, "step": 10 }, { "epoch": 0.01297016861219196, "eval_cer": 0.9735660246556349, "eval_loss": 10.574353218078613, "eval_runtime": 92.0698, "eval_samples_per_second": 28.717, "eval_steps_per_second": 3.595, "step": 10 }, { "epoch": 0.01556420233463035, "grad_norm": 65.91108703613281, "learning_rate": 4.800000000000001e-07, "loss": 8.1886, "step": 12 }, { "epoch": 0.018158236057068743, "grad_norm": 34.232662200927734, "learning_rate": 6.000000000000001e-07, "loss": 8.3056, "step": 14 }, { "epoch": 0.019455252918287938, "eval_cer": 0.9708231606233234, "eval_loss": 10.372063636779785, "eval_runtime": 93.5905, "eval_samples_per_second": 28.251, "eval_steps_per_second": 3.537, "step": 15 }, { "epoch": 0.020752269779507133, "grad_norm": 42.87216567993164, "learning_rate": 7.2e-07, "loss": 8.7293, "step": 16 }, { "epoch": 0.023346303501945526, "grad_norm": 39.21005630493164, "learning_rate": 8.4e-07, "loss": 8.6748, "step": 18 }, { "epoch": 0.02594033722438392, "grad_norm": 94.66182708740234, "learning_rate": 9.600000000000001e-07, "loss": 8.2991, "step": 20 }, { "epoch": 0.02594033722438392, "eval_cer": 0.9659101184555565, "eval_loss": 10.076202392578125, "eval_runtime": 88.5272, "eval_samples_per_second": 29.867, "eval_steps_per_second": 3.739, "step": 20 }, { "epoch": 0.028534370946822308, "grad_norm": 34.152923583984375, "learning_rate": 1.08e-06, "loss": 8.3962, "step": 22 }, { "epoch": 0.0311284046692607, "grad_norm": NaN, "learning_rate": 1.2000000000000002e-06, "loss": 8.2708, "step": 24 }, { "epoch": 0.0324254215304799, "eval_cer": 0.9612683485547217, "eval_loss": 9.797987937927246, "eval_runtime": 96.7288, "eval_samples_per_second": 27.334, "eval_steps_per_second": 3.422, "step": 25 }, { "epoch": 0.03372243839169909, "grad_norm": 45.9756965637207, "learning_rate": 1.26e-06, "loss": 7.8454, "step": 26 }, { "epoch": 0.03631647211413749, "grad_norm": 32.557674407958984, "learning_rate": 1.38e-06, "loss": 8.2102, "step": 28 }, { "epoch": 0.038910505836575876, "grad_norm": 32.354164123535156, "learning_rate": 1.5e-06, "loss": 8.6184, "step": 30 }, { "epoch": 0.038910505836575876, "eval_cer": 0.9502366096994906, "eval_loss": 9.3967866897583, "eval_runtime": 98.634, "eval_samples_per_second": 26.806, "eval_steps_per_second": 3.356, "step": 30 }, { "epoch": 0.041504539559014265, "grad_norm": 33.6341667175293, "learning_rate": 1.62e-06, "loss": 8.1516, "step": 32 }, { "epoch": 0.04409857328145266, "grad_norm": 39.92112350463867, "learning_rate": 1.74e-06, "loss": 8.0678, "step": 34 }, { "epoch": 0.04539559014267185, "eval_cer": 0.9342315459505078, "eval_loss": 9.007755279541016, "eval_runtime": 81.7686, "eval_samples_per_second": 32.335, "eval_steps_per_second": 4.048, "step": 35 }, { "epoch": 0.04669260700389105, "grad_norm": 29.05873680114746, "learning_rate": 1.86e-06, "loss": 7.6011, "step": 36 }, { "epoch": 0.04928664072632944, "grad_norm": 25.371105194091797, "learning_rate": 1.98e-06, "loss": 7.2863, "step": 38 }, { "epoch": 0.05188067444876784, "grad_norm": 26.220144271850586, "learning_rate": 2.1000000000000002e-06, "loss": 7.699, "step": 40 }, { "epoch": 0.05188067444876784, "eval_cer": 0.9170509690448202, "eval_loss": 8.67233657836914, "eval_runtime": 84.2938, "eval_samples_per_second": 31.366, "eval_steps_per_second": 3.927, "step": 40 }, { "epoch": 0.054474708171206226, "grad_norm": 29.047372817993164, "learning_rate": 2.22e-06, "loss": 7.8821, "step": 42 }, { "epoch": 0.057068741893644616, "grad_norm": 27.84252166748047, "learning_rate": 2.34e-06, "loss": 7.091, "step": 44 }, { "epoch": 0.058365758754863814, "eval_cer": 0.9047231515809145, "eval_loss": 8.38408088684082, "eval_runtime": 92.3132, "eval_samples_per_second": 28.642, "eval_steps_per_second": 3.586, "step": 45 }, { "epoch": 0.05966277561608301, "grad_norm": 22.684850692749023, "learning_rate": 2.46e-06, "loss": 7.4182, "step": 46 }, { "epoch": 0.0622568093385214, "grad_norm": 33.87125778198242, "learning_rate": 2.58e-06, "loss": 7.2075, "step": 48 }, { "epoch": 0.0648508430609598, "grad_norm": 21.509159088134766, "learning_rate": 2.7e-06, "loss": 7.1052, "step": 50 }, { "epoch": 0.0648508430609598, "eval_cer": 0.899629261235193, "eval_loss": 8.13282585144043, "eval_runtime": 101.5177, "eval_samples_per_second": 26.045, "eval_steps_per_second": 3.261, "step": 50 }, { "epoch": 0.06744487678339818, "grad_norm": NaN, "learning_rate": 2.82e-06, "loss": 6.7926, "step": 52 }, { "epoch": 0.07003891050583658, "grad_norm": 21.155956268310547, "learning_rate": 2.88e-06, "loss": 6.8734, "step": 54 }, { "epoch": 0.07133592736705577, "eval_cer": 0.8984236067154957, "eval_loss": 7.93735408782959, "eval_runtime": 106.5732, "eval_samples_per_second": 24.809, "eval_steps_per_second": 3.106, "step": 55 }, { "epoch": 0.07263294422827497, "grad_norm": 19.74744415283203, "learning_rate": 3e-06, "loss": 6.8722, "step": 56 }, { "epoch": 0.07522697795071336, "grad_norm": 319.0416564941406, "learning_rate": 3.1199999999999998e-06, "loss": 6.4253, "step": 58 }, { "epoch": 0.07782101167315175, "grad_norm": 23.68337631225586, "learning_rate": 3.24e-06, "loss": 6.8842, "step": 60 }, { "epoch": 0.07782101167315175, "eval_cer": 0.8882056846610604, "eval_loss": 7.703160285949707, "eval_runtime": 110.1461, "eval_samples_per_second": 24.004, "eval_steps_per_second": 3.005, "step": 60 }, { "epoch": 0.08041504539559015, "grad_norm": 20.635147094726562, "learning_rate": 3.36e-06, "loss": 6.2162, "step": 62 }, { "epoch": 0.08300907911802853, "grad_norm": 19.55179786682129, "learning_rate": 3.48e-06, "loss": 6.7139, "step": 64 }, { "epoch": 0.08430609597924774, "eval_cer": 0.8779576212436326, "eval_loss": 7.470834255218506, "eval_runtime": 111.6785, "eval_samples_per_second": 23.675, "eval_steps_per_second": 2.964, "step": 65 }, { "epoch": 0.08560311284046693, "grad_norm": 17.086580276489258, "learning_rate": 3.6e-06, "loss": 6.6201, "step": 66 }, { "epoch": 0.08819714656290532, "grad_norm": 15.556456565856934, "learning_rate": 3.72e-06, "loss": 6.093, "step": 68 }, { "epoch": 0.0907911802853437, "grad_norm": 49.20164489746094, "learning_rate": 3.8400000000000005e-06, "loss": 5.9421, "step": 70 }, { "epoch": 0.0907911802853437, "eval_cer": 0.8584561593875275, "eval_loss": 7.236043930053711, "eval_runtime": 126.8066, "eval_samples_per_second": 20.851, "eval_steps_per_second": 2.61, "step": 70 }, { "epoch": 0.0933852140077821, "grad_norm": 17.137556076049805, "learning_rate": 3.96e-06, "loss": 6.7995, "step": 72 }, { "epoch": 0.0959792477302205, "grad_norm": 24.586021423339844, "learning_rate": 4.080000000000001e-06, "loss": 6.043, "step": 74 }, { "epoch": 0.09727626459143969, "eval_cer": 0.8255719323627815, "eval_loss": 7.014294624328613, "eval_runtime": 128.9469, "eval_samples_per_second": 20.505, "eval_steps_per_second": 2.567, "step": 75 }, { "epoch": 0.09857328145265888, "grad_norm": 17.460948944091797, "learning_rate": 4.2000000000000004e-06, "loss": 6.0706, "step": 76 }, { "epoch": 0.10116731517509728, "grad_norm": 26.46303939819336, "learning_rate": 4.32e-06, "loss": 6.7666, "step": 78 }, { "epoch": 0.10376134889753567, "grad_norm": 32.067054748535156, "learning_rate": 4.44e-06, "loss": 6.0295, "step": 80 }, { "epoch": 0.10376134889753567, "eval_cer": 0.782530066009585, "eval_loss": 6.801568984985352, "eval_runtime": 114.6152, "eval_samples_per_second": 23.069, "eval_steps_per_second": 2.888, "step": 80 }, { "epoch": 0.10635538261997406, "grad_norm": 14.94288158416748, "learning_rate": 4.56e-06, "loss": 5.7654, "step": 82 }, { "epoch": 0.10894941634241245, "grad_norm": 21.89992904663086, "learning_rate": 4.68e-06, "loss": 5.7672, "step": 84 }, { "epoch": 0.11024643320363164, "eval_cer": 0.7390059378485095, "eval_loss": 6.629735469818115, "eval_runtime": 116.8302, "eval_samples_per_second": 22.631, "eval_steps_per_second": 2.833, "step": 85 }, { "epoch": 0.11154345006485085, "grad_norm": 23.829565048217773, "learning_rate": 4.800000000000001e-06, "loss": 5.4628, "step": 86 }, { "epoch": 0.11413748378728923, "grad_norm": 18.05320930480957, "learning_rate": 4.92e-06, "loss": 5.1534, "step": 88 }, { "epoch": 0.11673151750972763, "grad_norm": 25.233701705932617, "learning_rate": 5.04e-06, "loss": 5.3042, "step": 90 }, { "epoch": 0.11673151750972763, "eval_cer": 0.7084124544111885, "eval_loss": 6.476639747619629, "eval_runtime": 118.7446, "eval_samples_per_second": 22.266, "eval_steps_per_second": 2.787, "step": 90 }, { "epoch": 0.11932555123216602, "grad_norm": 35.21253967285156, "learning_rate": 5.16e-06, "loss": 5.4296, "step": 92 }, { "epoch": 0.1219195849546044, "grad_norm": 17.080265045166016, "learning_rate": 5.279999999999999e-06, "loss": 5.3031, "step": 94 }, { "epoch": 0.12321660181582361, "eval_cer": 0.6901467884377731, "eval_loss": 6.346051216125488, "eval_runtime": 119.9281, "eval_samples_per_second": 22.047, "eval_steps_per_second": 2.76, "step": 95 }, { "epoch": 0.1245136186770428, "grad_norm": 14.830784797668457, "learning_rate": 5.4e-06, "loss": 5.4933, "step": 96 }, { "epoch": 0.12710765239948119, "grad_norm": 14.9316987991333, "learning_rate": 5.52e-06, "loss": 5.3361, "step": 98 }, { "epoch": 0.1297016861219196, "grad_norm": 42.79384994506836, "learning_rate": 5.64e-06, "loss": 4.6843, "step": 100 }, { "epoch": 0.1297016861219196, "eval_cer": 0.6729059288061006, "eval_loss": 6.210666179656982, "eval_runtime": 120.5118, "eval_samples_per_second": 21.94, "eval_steps_per_second": 2.747, "step": 100 }, { "epoch": 0.13229571984435798, "grad_norm": 69.97132110595703, "learning_rate": 5.76e-06, "loss": 5.278, "step": 102 }, { "epoch": 0.13488975356679636, "grad_norm": 68.64286041259766, "learning_rate": 5.8800000000000005e-06, "loss": 5.3985, "step": 104 }, { "epoch": 0.13618677042801555, "eval_cer": 0.6567501582421557, "eval_loss": 6.105401039123535, "eval_runtime": 120.3425, "eval_samples_per_second": 21.971, "eval_steps_per_second": 2.75, "step": 105 }, { "epoch": 0.13748378728923477, "grad_norm": 16.881147384643555, "learning_rate": 6e-06, "loss": 5.5177, "step": 106 }, { "epoch": 0.14007782101167315, "grad_norm": 13.392465591430664, "learning_rate": 6.12e-06, "loss": 5.6112, "step": 108 }, { "epoch": 0.14267185473411154, "grad_norm": 12.735469818115234, "learning_rate": 6.2399999999999995e-06, "loss": 4.3825, "step": 110 }, { "epoch": 0.14267185473411154, "eval_cer": 0.6333303191970341, "eval_loss": 5.990973472595215, "eval_runtime": 122.2904, "eval_samples_per_second": 21.621, "eval_steps_per_second": 2.707, "step": 110 }, { "epoch": 0.14526588845654995, "grad_norm": 13.771244049072266, "learning_rate": 6.36e-06, "loss": 4.9807, "step": 112 }, { "epoch": 0.14785992217898833, "grad_norm": 16.60836410522461, "learning_rate": 6.48e-06, "loss": 5.0463, "step": 114 }, { "epoch": 0.14915693904020752, "eval_cer": 0.6150345118606263, "eval_loss": 5.89418363571167, "eval_runtime": 115.3341, "eval_samples_per_second": 22.925, "eval_steps_per_second": 2.87, "step": 115 }, { "epoch": 0.1504539559014267, "grad_norm": 11.2271146774292, "learning_rate": 6.6e-06, "loss": 5.1669, "step": 116 }, { "epoch": 0.15304798962386512, "grad_norm": 14.511568069458008, "learning_rate": 6.72e-06, "loss": 4.883, "step": 118 }, { "epoch": 0.1556420233463035, "grad_norm": 12.640761375427246, "learning_rate": 6.840000000000001e-06, "loss": 5.3346, "step": 120 }, { "epoch": 0.1556420233463035, "eval_cer": 0.6130451819031256, "eval_loss": 5.80028772354126, "eval_runtime": 113.846, "eval_samples_per_second": 23.224, "eval_steps_per_second": 2.907, "step": 120 }, { "epoch": 0.1582360570687419, "grad_norm": 10.243730545043945, "learning_rate": 6.96e-06, "loss": 4.669, "step": 122 }, { "epoch": 0.1608300907911803, "grad_norm": 19.194486618041992, "learning_rate": 7.08e-06, "loss": 5.2001, "step": 124 }, { "epoch": 0.1621271076523995, "eval_cer": 0.6162703077433161, "eval_loss": 5.716529369354248, "eval_runtime": 113.3314, "eval_samples_per_second": 23.33, "eval_steps_per_second": 2.921, "step": 125 }, { "epoch": 0.16342412451361868, "grad_norm": 25.41460418701172, "learning_rate": 7.2e-06, "loss": 4.8738, "step": 126 }, { "epoch": 0.16601815823605706, "grad_norm": 11.016427040100098, "learning_rate": 7.32e-06, "loss": 5.5428, "step": 128 }, { "epoch": 0.16861219195849547, "grad_norm": 19.337942123413086, "learning_rate": 7.44e-06, "loss": 5.097, "step": 130 }, { "epoch": 0.16861219195849547, "eval_cer": 0.6253127166410465, "eval_loss": 5.637584686279297, "eval_runtime": 111.6419, "eval_samples_per_second": 23.683, "eval_steps_per_second": 2.965, "step": 130 }, { "epoch": 0.17120622568093385, "grad_norm": 14.668910026550293, "learning_rate": 7.5600000000000005e-06, "loss": 5.1702, "step": 132 }, { "epoch": 0.17380025940337224, "grad_norm": 14.700507164001465, "learning_rate": 7.680000000000001e-06, "loss": 5.1842, "step": 134 }, { "epoch": 0.17509727626459143, "eval_cer": 0.6279952979473732, "eval_loss": 5.5578293800354, "eval_runtime": 115.7363, "eval_samples_per_second": 22.845, "eval_steps_per_second": 2.86, "step": 135 }, { "epoch": 0.17639429312581065, "grad_norm": 11.149736404418945, "learning_rate": 7.8e-06, "loss": 5.1527, "step": 136 }, { "epoch": 0.17898832684824903, "grad_norm": 12.17773723602295, "learning_rate": 7.92e-06, "loss": 5.3414, "step": 138 }, { "epoch": 0.1815823605706874, "grad_norm": 15.777327537536621, "learning_rate": 8.040000000000001e-06, "loss": 5.1606, "step": 140 }, { "epoch": 0.1815823605706874, "eval_cer": 0.611960092835398, "eval_loss": 5.470022201538086, "eval_runtime": 115.9413, "eval_samples_per_second": 22.805, "eval_steps_per_second": 2.855, "step": 140 }, { "epoch": 0.18417639429312582, "grad_norm": 22.217771530151367, "learning_rate": 8.160000000000001e-06, "loss": 5.0028, "step": 142 }, { "epoch": 0.1867704280155642, "grad_norm": 13.488722801208496, "learning_rate": 8.28e-06, "loss": 4.7495, "step": 144 }, { "epoch": 0.1880674448767834, "eval_cer": 0.6009886367061519, "eval_loss": 5.381906509399414, "eval_runtime": 109.493, "eval_samples_per_second": 24.148, "eval_steps_per_second": 3.023, "step": 145 }, { "epoch": 0.1893644617380026, "grad_norm": 12.898096084594727, "learning_rate": 8.400000000000001e-06, "loss": 4.847, "step": 146 }, { "epoch": 0.191958495460441, "grad_norm": 13.653580665588379, "learning_rate": 8.52e-06, "loss": 5.2004, "step": 148 }, { "epoch": 0.19455252918287938, "grad_norm": 10.915148735046387, "learning_rate": 8.64e-06, "loss": 4.9847, "step": 150 }, { "epoch": 0.19455252918287938, "eval_cer": 0.594960364107665, "eval_loss": 5.297786712646484, "eval_runtime": 108.0527, "eval_samples_per_second": 24.47, "eval_steps_per_second": 3.063, "step": 150 }, { "epoch": 0.19714656290531776, "grad_norm": 23.160659790039062, "learning_rate": 8.759999999999999e-06, "loss": 4.7818, "step": 152 }, { "epoch": 0.19974059662775617, "grad_norm": 14.993002891540527, "learning_rate": 8.88e-06, "loss": 4.7727, "step": 154 }, { "epoch": 0.20103761348897536, "eval_cer": 0.5589715766946981, "eval_loss": 5.1952223777771, "eval_runtime": 111.9206, "eval_samples_per_second": 23.624, "eval_steps_per_second": 2.957, "step": 155 }, { "epoch": 0.20233463035019456, "grad_norm": 13.507874488830566, "learning_rate": 9e-06, "loss": 4.4196, "step": 156 }, { "epoch": 0.20492866407263294, "grad_norm": 12.171915054321289, "learning_rate": 9.12e-06, "loss": 4.088, "step": 158 }, { "epoch": 0.20752269779507135, "grad_norm": 10.198915481567383, "learning_rate": 9.24e-06, "loss": 4.0656, "step": 160 }, { "epoch": 0.20752269779507135, "eval_cer": 0.5371492298881756, "eval_loss": 5.124251365661621, "eval_runtime": 115.3755, "eval_samples_per_second": 22.916, "eval_steps_per_second": 2.869, "step": 160 }, { "epoch": 0.21011673151750973, "grad_norm": 10.800135612487793, "learning_rate": 9.36e-06, "loss": 4.2534, "step": 162 }, { "epoch": 0.2127107652399481, "grad_norm": 28.48563575744629, "learning_rate": 9.48e-06, "loss": 4.0234, "step": 164 }, { "epoch": 0.2140077821011673, "eval_cer": 0.530246857762908, "eval_loss": 5.063826084136963, "eval_runtime": 114.849, "eval_samples_per_second": 23.022, "eval_steps_per_second": 2.882, "step": 165 }, { "epoch": 0.21530479896238652, "grad_norm": 20.067386627197266, "learning_rate": 9.600000000000001e-06, "loss": 4.3631, "step": 166 }, { "epoch": 0.2178988326848249, "grad_norm": 16.29481315612793, "learning_rate": 9.72e-06, "loss": 4.4636, "step": 168 }, { "epoch": 0.2204928664072633, "grad_norm": 101.64680480957031, "learning_rate": 9.84e-06, "loss": 5.033, "step": 170 }, { "epoch": 0.2204928664072633, "eval_cer": 0.5299755854959761, "eval_loss": 4.967648029327393, "eval_runtime": 111.3501, "eval_samples_per_second": 23.745, "eval_steps_per_second": 2.973, "step": 170 }, { "epoch": 0.2230869001297017, "grad_norm": 22.31951904296875, "learning_rate": 9.960000000000001e-06, "loss": 4.0891, "step": 172 }, { "epoch": 0.22568093385214008, "grad_norm": 56.18452072143555, "learning_rate": 1.008e-05, "loss": 4.284, "step": 174 }, { "epoch": 0.22697795071335927, "eval_cer": 0.520179642523435, "eval_loss": 4.895947456359863, "eval_runtime": 112.6531, "eval_samples_per_second": 23.47, "eval_steps_per_second": 2.938, "step": 175 }, { "epoch": 0.22827496757457846, "grad_norm": 13.42766284942627, "learning_rate": 1.02e-05, "loss": 4.4092, "step": 176 }, { "epoch": 0.23086900129701687, "grad_norm": 21.44829559326172, "learning_rate": 1.032e-05, "loss": 4.1597, "step": 178 }, { "epoch": 0.23346303501945526, "grad_norm": 14.215667724609375, "learning_rate": 1.044e-05, "loss": 3.8574, "step": 180 }, { "epoch": 0.23346303501945526, "eval_cer": 0.5017632697350575, "eval_loss": 4.84341287612915, "eval_runtime": 115.4414, "eval_samples_per_second": 22.903, "eval_steps_per_second": 2.867, "step": 180 }, { "epoch": 0.23605706874189364, "grad_norm": 13.1805419921875, "learning_rate": 1.0559999999999999e-05, "loss": 4.0776, "step": 182 }, { "epoch": 0.23865110246433205, "grad_norm": 98.30623626708984, "learning_rate": 1.068e-05, "loss": 4.2507, "step": 184 }, { "epoch": 0.23994811932555124, "eval_cer": 0.48886276637429543, "eval_loss": 4.78084135055542, "eval_runtime": 123.8484, "eval_samples_per_second": 21.349, "eval_steps_per_second": 2.673, "step": 185 }, { "epoch": 0.24124513618677043, "grad_norm": 35.59674072265625, "learning_rate": 1.08e-05, "loss": 4.1455, "step": 186 }, { "epoch": 0.2438391699092088, "grad_norm": 21.6286563873291, "learning_rate": 1.092e-05, "loss": 5.0675, "step": 188 }, { "epoch": 0.24643320363164722, "grad_norm": 17.0408878326416, "learning_rate": 1.104e-05, "loss": 3.953, "step": 190 }, { "epoch": 0.24643320363164722, "eval_cer": 0.47979021611357264, "eval_loss": 4.711887836456299, "eval_runtime": 135.169, "eval_samples_per_second": 19.561, "eval_steps_per_second": 2.449, "step": 190 }, { "epoch": 0.2490272373540856, "grad_norm": 24.359798431396484, "learning_rate": 1.116e-05, "loss": 4.4154, "step": 192 }, { "epoch": 0.251621271076524, "grad_norm": 13.193626403808594, "learning_rate": 1.128e-05, "loss": 4.3269, "step": 194 }, { "epoch": 0.2529182879377432, "eval_cer": 0.4659251891370528, "eval_loss": 4.614772319793701, "eval_runtime": 132.7599, "eval_samples_per_second": 19.916, "eval_steps_per_second": 2.493, "step": 195 }, { "epoch": 0.25421530479896237, "grad_norm": 14.497838973999023, "learning_rate": 1.1400000000000001e-05, "loss": 4.0266, "step": 196 }, { "epoch": 0.25680933852140075, "grad_norm": 12.457406997680664, "learning_rate": 1.152e-05, "loss": 4.3069, "step": 198 }, { "epoch": 0.2594033722438392, "grad_norm": 18.889881134033203, "learning_rate": 1.164e-05, "loss": 4.1068, "step": 200 }, { "epoch": 0.2594033722438392, "eval_cer": 0.4684872049914097, "eval_loss": 4.578884601593018, "eval_runtime": 129.8465, "eval_samples_per_second": 20.363, "eval_steps_per_second": 2.549, "step": 200 }, { "epoch": 0.2619974059662776, "grad_norm": 11.648727416992188, "learning_rate": 1.1760000000000001e-05, "loss": 3.7185, "step": 202 }, { "epoch": 0.26459143968871596, "grad_norm": 13.08809757232666, "learning_rate": 1.1880000000000001e-05, "loss": 4.1442, "step": 204 }, { "epoch": 0.26588845654993515, "eval_cer": 0.46004762335352806, "eval_loss": 4.4956889152526855, "eval_runtime": 114.7341, "eval_samples_per_second": 23.045, "eval_steps_per_second": 2.885, "step": 205 }, { "epoch": 0.26718547341115434, "grad_norm": 12.64474105834961, "learning_rate": 1.2e-05, "loss": 3.7967, "step": 206 }, { "epoch": 0.2697795071335927, "grad_norm": 12.794676780700684, "learning_rate": 1.2120000000000001e-05, "loss": 3.8475, "step": 208 }, { "epoch": 0.2723735408560311, "grad_norm": 13.091010093688965, "learning_rate": 1.224e-05, "loss": 3.5213, "step": 210 }, { "epoch": 0.2723735408560311, "eval_cer": 0.43729089429424, "eval_loss": 4.389532089233398, "eval_runtime": 111.3543, "eval_samples_per_second": 23.744, "eval_steps_per_second": 2.972, "step": 210 }, { "epoch": 0.27496757457846954, "grad_norm": 13.716208457946777, "learning_rate": 1.236e-05, "loss": 4.0241, "step": 212 }, { "epoch": 0.2775616083009079, "grad_norm": 14.271407127380371, "learning_rate": 1.2479999999999999e-05, "loss": 4.1152, "step": 214 }, { "epoch": 0.2788586251621271, "eval_cer": 0.449709135847123, "eval_loss": 4.357320308685303, "eval_runtime": 113.2044, "eval_samples_per_second": 23.356, "eval_steps_per_second": 2.924, "step": 215 }, { "epoch": 0.2801556420233463, "grad_norm": 12.267643928527832, "learning_rate": 1.26e-05, "loss": 3.2922, "step": 216 }, { "epoch": 0.2827496757457847, "grad_norm": 13.182437896728516, "learning_rate": 1.272e-05, "loss": 3.8861, "step": 218 }, { "epoch": 0.2853437094682231, "grad_norm": 12.376786231994629, "learning_rate": 1.284e-05, "loss": 3.5171, "step": 220 }, { "epoch": 0.2853437094682231, "eval_cer": 0.45802815203303493, "eval_loss": 4.331967830657959, "eval_runtime": 114.1848, "eval_samples_per_second": 23.155, "eval_steps_per_second": 2.899, "step": 220 }, { "epoch": 0.28793774319066145, "grad_norm": 11.612021446228027, "learning_rate": 1.296e-05, "loss": 4.1036, "step": 222 }, { "epoch": 0.2905317769130999, "grad_norm": 12.48078727722168, "learning_rate": 1.308e-05, "loss": 3.1498, "step": 224 }, { "epoch": 0.2918287937743191, "eval_cer": 0.4402146065045061, "eval_loss": 4.2296319007873535, "eval_runtime": 118.3956, "eval_samples_per_second": 22.332, "eval_steps_per_second": 2.796, "step": 225 }, { "epoch": 0.2931258106355383, "grad_norm": 28.98529815673828, "learning_rate": 1.32e-05, "loss": 3.7617, "step": 226 }, { "epoch": 0.29571984435797666, "grad_norm": 16.19705581665039, "learning_rate": 1.3320000000000001e-05, "loss": 4.0489, "step": 228 }, { "epoch": 0.29831387808041504, "grad_norm": 12.226841926574707, "learning_rate": 1.344e-05, "loss": 3.6797, "step": 230 }, { "epoch": 0.29831387808041504, "eval_cer": 0.43195587304457905, "eval_loss": 4.158808708190918, "eval_runtime": 112.6694, "eval_samples_per_second": 23.467, "eval_steps_per_second": 2.938, "step": 230 }, { "epoch": 0.3009079118028534, "grad_norm": 20.419113159179688, "learning_rate": 1.356e-05, "loss": 3.6434, "step": 232 }, { "epoch": 0.3035019455252918, "grad_norm": 18.819181442260742, "learning_rate": 1.3680000000000001e-05, "loss": 3.2154, "step": 234 }, { "epoch": 0.30479896238651105, "eval_cer": 0.4050697772553275, "eval_loss": 4.025953769683838, "eval_runtime": 113.1476, "eval_samples_per_second": 23.368, "eval_steps_per_second": 2.925, "step": 235 }, { "epoch": 0.30609597924773024, "grad_norm": 12.357544898986816, "learning_rate": 1.3800000000000002e-05, "loss": 3.9309, "step": 236 }, { "epoch": 0.3086900129701686, "grad_norm": 14.584222793579102, "learning_rate": 1.392e-05, "loss": 3.5647, "step": 238 }, { "epoch": 0.311284046692607, "grad_norm": 11.948848724365234, "learning_rate": 1.4040000000000001e-05, "loss": 3.439, "step": 240 }, { "epoch": 0.311284046692607, "eval_cer": 0.3937064834071797, "eval_loss": 3.905667304992676, "eval_runtime": 113.4696, "eval_samples_per_second": 23.301, "eval_steps_per_second": 2.917, "step": 240 }, { "epoch": 0.3138780804150454, "grad_norm": 13.00888729095459, "learning_rate": 1.416e-05, "loss": 3.2106, "step": 242 }, { "epoch": 0.3164721141374838, "grad_norm": 12.678916931152344, "learning_rate": 1.428e-05, "loss": 3.4027, "step": 244 }, { "epoch": 0.31776913099870296, "eval_cer": 0.3925912529764596, "eval_loss": 3.8874895572662354, "eval_runtime": 189.1601, "eval_samples_per_second": 13.978, "eval_steps_per_second": 1.75, "step": 245 }, { "epoch": 0.31906614785992216, "grad_norm": 16.414127349853516, "learning_rate": 1.44e-05, "loss": 2.5058, "step": 246 }, { "epoch": 0.3216601815823606, "grad_norm": 16.07786750793457, "learning_rate": 1.452e-05, "loss": 3.3537, "step": 248 }, { "epoch": 0.324254215304799, "grad_norm": 15.625645637512207, "learning_rate": 1.464e-05, "loss": 3.6318, "step": 250 }, { "epoch": 0.324254215304799, "eval_cer": 0.3870151008228592, "eval_loss": 3.824657917022705, "eval_runtime": 233.078, "eval_samples_per_second": 11.344, "eval_steps_per_second": 1.42, "step": 250 }, { "epoch": 0.32684824902723736, "grad_norm": 16.048980712890625, "learning_rate": 1.4760000000000001e-05, "loss": 3.6032, "step": 252 }, { "epoch": 0.32944228274967574, "grad_norm": 12.714573860168457, "learning_rate": 1.488e-05, "loss": 3.8301, "step": 254 }, { "epoch": 0.33073929961089493, "eval_cer": 0.374446152455014, "eval_loss": 3.690356969833374, "eval_runtime": 139.2198, "eval_samples_per_second": 18.992, "eval_steps_per_second": 2.378, "step": 255 }, { "epoch": 0.3320363164721141, "grad_norm": 12.425498962402344, "learning_rate": 1.5e-05, "loss": 3.0105, "step": 256 }, { "epoch": 0.3346303501945525, "grad_norm": 13.164816856384277, "learning_rate": 1.5120000000000001e-05, "loss": 3.6224, "step": 258 }, { "epoch": 0.33722438391699094, "grad_norm": 14.977278709411621, "learning_rate": 1.524e-05, "loss": 3.2017, "step": 260 }, { "epoch": 0.33722438391699094, "eval_cer": 0.3727582361274377, "eval_loss": 3.6364212036132812, "eval_runtime": 123.239, "eval_samples_per_second": 21.454, "eval_steps_per_second": 2.686, "step": 260 }, { "epoch": 0.3398184176394293, "grad_norm": 16.265350341796875, "learning_rate": 1.5360000000000002e-05, "loss": 3.9344, "step": 262 }, { "epoch": 0.3424124513618677, "grad_norm": 13.141109466552734, "learning_rate": 1.548e-05, "loss": 2.757, "step": 264 }, { "epoch": 0.3437094682230869, "eval_cer": 0.3830062995448654, "eval_loss": 3.622190475463867, "eval_runtime": 117.2686, "eval_samples_per_second": 22.547, "eval_steps_per_second": 2.823, "step": 265 }, { "epoch": 0.3450064850843061, "grad_norm": 12.868675231933594, "learning_rate": 1.56e-05, "loss": 3.4981, "step": 266 }, { "epoch": 0.3476005188067445, "grad_norm": 12.726391792297363, "learning_rate": 1.5720000000000002e-05, "loss": 2.9291, "step": 268 }, { "epoch": 0.35019455252918286, "grad_norm": 31.09647560119629, "learning_rate": 1.584e-05, "loss": 3.1786, "step": 270 }, { "epoch": 0.35019455252918286, "eval_cer": 0.3782741055550532, "eval_loss": 3.598266124725342, "eval_runtime": 115.8076, "eval_samples_per_second": 22.831, "eval_steps_per_second": 2.858, "step": 270 }, { "epoch": 0.3527885862516213, "grad_norm": 12.694645881652832, "learning_rate": 1.596e-05, "loss": 3.2305, "step": 272 }, { "epoch": 0.3553826199740597, "grad_norm": 20.454267501831055, "learning_rate": 1.6080000000000002e-05, "loss": 2.9721, "step": 274 }, { "epoch": 0.35667963683527887, "eval_cer": 0.3755613828857341, "eval_loss": 3.512030839920044, "eval_runtime": 133.3206, "eval_samples_per_second": 19.832, "eval_steps_per_second": 2.483, "step": 275 }, { "epoch": 0.35797665369649806, "grad_norm": 20.09004783630371, "learning_rate": 1.62e-05, "loss": 3.0882, "step": 276 }, { "epoch": 0.36057068741893644, "grad_norm": 16.710346221923828, "learning_rate": 1.6320000000000003e-05, "loss": 2.762, "step": 278 }, { "epoch": 0.3631647211413748, "grad_norm": 19.519004821777344, "learning_rate": 1.6440000000000002e-05, "loss": 3.0841, "step": 280 }, { "epoch": 0.3631647211413748, "eval_cer": 0.36691081170690537, "eval_loss": 3.4976441860198975, "eval_runtime": 135.0965, "eval_samples_per_second": 19.571, "eval_steps_per_second": 2.45, "step": 280 }, { "epoch": 0.3657587548638132, "grad_norm": 13.175090789794922, "learning_rate": 1.656e-05, "loss": 3.5364, "step": 282 }, { "epoch": 0.36835278858625164, "grad_norm": 16.523889541625977, "learning_rate": 1.6680000000000003e-05, "loss": 2.6431, "step": 284 }, { "epoch": 0.36964980544747084, "eval_cer": 0.3738734665581578, "eval_loss": 3.458583116531372, "eval_runtime": 229.2061, "eval_samples_per_second": 11.535, "eval_steps_per_second": 1.444, "step": 285 }, { "epoch": 0.37094682230869, "grad_norm": 14.984639167785645, "learning_rate": 1.6800000000000002e-05, "loss": 4.2237, "step": 286 }, { "epoch": 0.3735408560311284, "grad_norm": 13.299590110778809, "learning_rate": 1.6919999999999997e-05, "loss": 2.6747, "step": 288 }, { "epoch": 0.3761348897535668, "grad_norm": 12.275932312011719, "learning_rate": 1.704e-05, "loss": 2.3366, "step": 290 }, { "epoch": 0.3761348897535668, "eval_cer": 0.3661271362691021, "eval_loss": 3.3806421756744385, "eval_runtime": 245.1909, "eval_samples_per_second": 10.783, "eval_steps_per_second": 1.35, "step": 290 }, { "epoch": 0.3787289234760052, "grad_norm": 14.83483600616455, "learning_rate": 1.716e-05, "loss": 3.1826, "step": 292 }, { "epoch": 0.38132295719844356, "grad_norm": 14.161396026611328, "learning_rate": 1.728e-05, "loss": 3.5359, "step": 294 }, { "epoch": 0.38261997405966275, "eval_cer": 0.3429182867649275, "eval_loss": 3.350353956222534, "eval_runtime": 219.4263, "eval_samples_per_second": 12.05, "eval_steps_per_second": 1.508, "step": 295 }, { "epoch": 0.383916990920882, "grad_norm": 12.273178100585938, "learning_rate": 1.74e-05, "loss": 3.6439, "step": 296 }, { "epoch": 0.3865110246433204, "grad_norm": 14.881448745727539, "learning_rate": 1.7519999999999998e-05, "loss": 3.6943, "step": 298 }, { "epoch": 0.38910505836575876, "grad_norm": 14.406302452087402, "learning_rate": 1.764e-05, "loss": 3.4593, "step": 300 }, { "epoch": 0.38910505836575876, "eval_cer": 0.3484040148295506, "eval_loss": 3.2906293869018555, "eval_runtime": 141.4541, "eval_samples_per_second": 18.692, "eval_steps_per_second": 2.34, "step": 300 }, { "epoch": 0.39169909208819714, "grad_norm": 15.321798324584961, "learning_rate": 1.776e-05, "loss": 3.1981, "step": 302 }, { "epoch": 0.3942931258106355, "grad_norm": 12.990147590637207, "learning_rate": 1.7879999999999998e-05, "loss": 3.1501, "step": 304 }, { "epoch": 0.3955901426718547, "eval_cer": 0.34671609850197427, "eval_loss": 3.222804069519043, "eval_runtime": 189.6181, "eval_samples_per_second": 13.944, "eval_steps_per_second": 1.746, "step": 305 }, { "epoch": 0.3968871595330739, "grad_norm": 11.798747062683105, "learning_rate": 1.8e-05, "loss": 3.6844, "step": 306 }, { "epoch": 0.39948119325551235, "grad_norm": 15.285426139831543, "learning_rate": 1.812e-05, "loss": 2.115, "step": 308 }, { "epoch": 0.40207522697795073, "grad_norm": 14.921792984008789, "learning_rate": 1.824e-05, "loss": 2.8101, "step": 310 }, { "epoch": 0.40207522697795073, "eval_cer": 0.342466166320041, "eval_loss": 3.1945455074310303, "eval_runtime": 145.2418, "eval_samples_per_second": 18.204, "eval_steps_per_second": 2.279, "step": 310 }, { "epoch": 0.4046692607003891, "grad_norm": 12.135457992553711, "learning_rate": 1.836e-05, "loss": 3.4391, "step": 312 }, { "epoch": 0.4072632944228275, "grad_norm": 14.905659675598145, "learning_rate": 1.848e-05, "loss": 2.8493, "step": 314 }, { "epoch": 0.4085603112840467, "eval_cer": 0.35328691563432496, "eval_loss": 3.176107883453369, "eval_runtime": 245.7678, "eval_samples_per_second": 10.758, "eval_steps_per_second": 1.347, "step": 315 }, { "epoch": 0.4098573281452659, "grad_norm": 13.051637649536133, "learning_rate": 1.86e-05, "loss": 2.8454, "step": 316 }, { "epoch": 0.41245136186770426, "grad_norm": 14.108623504638672, "learning_rate": 1.872e-05, "loss": 3.4802, "step": 318 }, { "epoch": 0.4150453955901427, "grad_norm": 19.467906951904297, "learning_rate": 1.884e-05, "loss": 2.8067, "step": 320 }, { "epoch": 0.4150453955901427, "eval_cer": 0.3708593302589143, "eval_loss": 3.1531643867492676, "eval_runtime": 211.2143, "eval_samples_per_second": 12.518, "eval_steps_per_second": 1.567, "step": 320 }, { "epoch": 0.4176394293125811, "grad_norm": 15.744620323181152, "learning_rate": 1.896e-05, "loss": 2.2496, "step": 322 }, { "epoch": 0.42023346303501946, "grad_norm": 14.49579906463623, "learning_rate": 1.908e-05, "loss": 2.7236, "step": 324 }, { "epoch": 0.42153047989623865, "eval_cer": 0.35376917744220393, "eval_loss": 3.1204159259796143, "eval_runtime": 194.3013, "eval_samples_per_second": 13.608, "eval_steps_per_second": 1.704, "step": 325 }, { "epoch": 0.42282749675745784, "grad_norm": 16.304920196533203, "learning_rate": 1.9200000000000003e-05, "loss": 2.785, "step": 326 }, { "epoch": 0.4254215304798962, "grad_norm": 12.900490760803223, "learning_rate": 1.932e-05, "loss": 2.5259, "step": 328 }, { "epoch": 0.4280155642023346, "grad_norm": 15.345794677734375, "learning_rate": 1.944e-05, "loss": 3.023, "step": 330 }, { "epoch": 0.4280155642023346, "eval_cer": 0.3658558640021702, "eval_loss": 3.054361581802368, "eval_runtime": 234.2094, "eval_samples_per_second": 11.289, "eval_steps_per_second": 1.413, "step": 330 }, { "epoch": 0.43060959792477305, "grad_norm": 17.006378173828125, "learning_rate": 1.9560000000000002e-05, "loss": 2.5932, "step": 332 }, { "epoch": 0.43320363164721143, "grad_norm": 12.188159942626953, "learning_rate": 1.968e-05, "loss": 3.1202, "step": 334 }, { "epoch": 0.4345006485084306, "eval_cer": 0.35904391596588, "eval_loss": 3.008192300796509, "eval_runtime": 161.9322, "eval_samples_per_second": 16.328, "eval_steps_per_second": 2.044, "step": 335 }, { "epoch": 0.4357976653696498, "grad_norm": 13.4000883102417, "learning_rate": 1.98e-05, "loss": 2.9631, "step": 336 }, { "epoch": 0.4383916990920882, "grad_norm": 12.92082691192627, "learning_rate": 1.9920000000000002e-05, "loss": 2.6806, "step": 338 }, { "epoch": 0.4409857328145266, "grad_norm": 19.443449020385742, "learning_rate": 2.004e-05, "loss": 2.6839, "step": 340 }, { "epoch": 0.4409857328145266, "eval_cer": 0.34593242306417094, "eval_loss": 2.986955404281616, "eval_runtime": 209.5827, "eval_samples_per_second": 12.616, "eval_steps_per_second": 1.579, "step": 340 }, { "epoch": 0.44357976653696496, "grad_norm": 13.657390594482422, "learning_rate": 2.016e-05, "loss": 3.3844, "step": 342 }, { "epoch": 0.4461738002594034, "grad_norm": 12.142219543457031, "learning_rate": 2.0280000000000002e-05, "loss": 2.5051, "step": 344 }, { "epoch": 0.4474708171206226, "eval_cer": 0.34991108297917234, "eval_loss": 2.9238357543945312, "eval_runtime": 172.4959, "eval_samples_per_second": 15.328, "eval_steps_per_second": 1.919, "step": 345 }, { "epoch": 0.4487678339818418, "grad_norm": 11.67302417755127, "learning_rate": 2.04e-05, "loss": 2.6669, "step": 346 }, { "epoch": 0.45136186770428016, "grad_norm": 9.952072143554688, "learning_rate": 2.0520000000000003e-05, "loss": 3.1192, "step": 348 }, { "epoch": 0.45395590142671854, "grad_norm": 13.724016189575195, "learning_rate": 2.064e-05, "loss": 3.026, "step": 350 }, { "epoch": 0.45395590142671854, "eval_cer": 0.35256352292250653, "eval_loss": 2.923769235610962, "eval_runtime": 178.4978, "eval_samples_per_second": 14.813, "eval_steps_per_second": 1.854, "step": 350 }, { "epoch": 0.4565499351491569, "grad_norm": 12.271801948547363, "learning_rate": 2.0759999999999998e-05, "loss": 2.0673, "step": 352 }, { "epoch": 0.4591439688715953, "grad_norm": 11.006880760192871, "learning_rate": 2.088e-05, "loss": 2.1873, "step": 354 }, { "epoch": 0.4604409857328145, "eval_cer": 0.3467763812279591, "eval_loss": 2.878007173538208, "eval_runtime": 239.5277, "eval_samples_per_second": 11.038, "eval_steps_per_second": 1.382, "step": 355 }, { "epoch": 0.46173800259403375, "grad_norm": 20.289621353149414, "learning_rate": 2.1e-05, "loss": 2.8391, "step": 356 }, { "epoch": 0.46433203631647213, "grad_norm": 12.577980995178223, "learning_rate": 2.1119999999999998e-05, "loss": 2.2868, "step": 358 }, { "epoch": 0.4669260700389105, "grad_norm": 12.996379852294922, "learning_rate": 2.124e-05, "loss": 2.8355, "step": 360 }, { "epoch": 0.4669260700389105, "eval_cer": 0.32480332760647435, "eval_loss": 2.820495843887329, "eval_runtime": 183.724, "eval_samples_per_second": 14.391, "eval_steps_per_second": 1.802, "step": 360 }, { "epoch": 0.4695201037613489, "grad_norm": 20.477155685424805, "learning_rate": 2.136e-05, "loss": 2.8462, "step": 362 }, { "epoch": 0.4721141374837873, "grad_norm": 32.31524658203125, "learning_rate": 2.148e-05, "loss": 2.0954, "step": 364 }, { "epoch": 0.47341115434500647, "eval_cer": 0.3083160020496127, "eval_loss": 2.8296797275543213, "eval_runtime": 276.3938, "eval_samples_per_second": 9.566, "eval_steps_per_second": 1.198, "step": 365 }, { "epoch": 0.47470817120622566, "grad_norm": 14.220911026000977, "learning_rate": 2.16e-05, "loss": 2.4953, "step": 366 }, { "epoch": 0.4773022049286641, "grad_norm": 16.390596389770508, "learning_rate": 2.172e-05, "loss": 2.624, "step": 368 }, { "epoch": 0.4798962386511025, "grad_norm": 14.07410717010498, "learning_rate": 2.184e-05, "loss": 2.9978, "step": 370 }, { "epoch": 0.4798962386511025, "eval_cer": 0.3032522530668837, "eval_loss": 2.793341875076294, "eval_runtime": 279.1859, "eval_samples_per_second": 9.47, "eval_steps_per_second": 1.186, "step": 370 }, { "epoch": 0.48249027237354086, "grad_norm": 24.049970626831055, "learning_rate": 2.196e-05, "loss": 2.9861, "step": 372 }, { "epoch": 0.48508430609597925, "grad_norm": 10.874021530151367, "learning_rate": 2.208e-05, "loss": 2.1597, "step": 374 }, { "epoch": 0.48638132295719844, "eval_cer": 0.3145251228260542, "eval_loss": 2.7285666465759277, "eval_runtime": 309.9236, "eval_samples_per_second": 8.531, "eval_steps_per_second": 1.068, "step": 375 }, { "epoch": 0.4876783398184176, "grad_norm": 12.51282787322998, "learning_rate": 2.22e-05, "loss": 2.2976, "step": 376 }, { "epoch": 0.490272373540856, "grad_norm": 11.6898775100708, "learning_rate": 2.232e-05, "loss": 2.703, "step": 378 }, { "epoch": 0.49286640726329445, "grad_norm": 14.729179382324219, "learning_rate": 2.2440000000000002e-05, "loss": 2.0108, "step": 380 }, { "epoch": 0.49286640726329445, "eval_cer": 0.31328932694336437, "eval_loss": 2.689061403274536, "eval_runtime": 188.7169, "eval_samples_per_second": 14.01, "eval_steps_per_second": 1.754, "step": 380 }, { "epoch": 0.49546044098573283, "grad_norm": 11.870194435119629, "learning_rate": 2.256e-05, "loss": 2.2467, "step": 382 }, { "epoch": 0.4980544747081712, "grad_norm": 12.500712394714355, "learning_rate": 2.268e-05, "loss": 2.0565, "step": 384 }, { "epoch": 0.4993514915693904, "eval_cer": 0.3065075202700666, "eval_loss": 2.683504104614258, "eval_runtime": 139.7652, "eval_samples_per_second": 18.917, "eval_steps_per_second": 2.368, "step": 385 }, { "epoch": 0.5006485084306096, "grad_norm": 17.805166244506836, "learning_rate": 2.2800000000000002e-05, "loss": 2.0304, "step": 386 }, { "epoch": 0.503242542153048, "grad_norm": 10.874719619750977, "learning_rate": 2.292e-05, "loss": 2.2385, "step": 388 }, { "epoch": 0.5058365758754864, "grad_norm": 19.65207862854004, "learning_rate": 2.304e-05, "loss": 2.2806, "step": 390 }, { "epoch": 0.5058365758754864, "eval_cer": 0.310697169726015, "eval_loss": 2.64746356010437, "eval_runtime": 158.3003, "eval_samples_per_second": 16.702, "eval_steps_per_second": 2.091, "step": 390 }, { "epoch": 0.5084306095979247, "grad_norm": 11.749553680419922, "learning_rate": 2.3160000000000002e-05, "loss": 2.0118, "step": 392 }, { "epoch": 0.5110246433203631, "grad_norm": 12.608861923217773, "learning_rate": 2.328e-05, "loss": 1.8522, "step": 394 }, { "epoch": 0.5123216601815823, "eval_cer": 0.32664195074901287, "eval_loss": 2.6235055923461914, "eval_runtime": 186.0047, "eval_samples_per_second": 14.215, "eval_steps_per_second": 1.78, "step": 395 }, { "epoch": 0.5136186770428015, "grad_norm": 15.640968322753906, "learning_rate": 2.3400000000000003e-05, "loss": 2.5232, "step": 396 }, { "epoch": 0.51621271076524, "grad_norm": 12.88823127746582, "learning_rate": 2.3520000000000002e-05, "loss": 2.196, "step": 398 }, { "epoch": 0.5188067444876784, "grad_norm": 12.094499588012695, "learning_rate": 2.364e-05, "loss": 2.3092, "step": 400 }, { "epoch": 0.5188067444876784, "eval_cer": 0.3185640654670404, "eval_loss": 2.6280529499053955, "eval_runtime": 155.4528, "eval_samples_per_second": 17.008, "eval_steps_per_second": 2.129, "step": 400 }, { "epoch": 0.5214007782101168, "grad_norm": 27.93305015563965, "learning_rate": 2.3760000000000003e-05, "loss": 2.1069, "step": 402 }, { "epoch": 0.5239948119325551, "grad_norm": 14.44329833984375, "learning_rate": 2.3880000000000002e-05, "loss": 2.513, "step": 404 }, { "epoch": 0.5252918287937743, "eval_cer": 0.2939988546282063, "eval_loss": 2.567127227783203, "eval_runtime": 158.9316, "eval_samples_per_second": 16.636, "eval_steps_per_second": 2.083, "step": 405 }, { "epoch": 0.5265888456549935, "grad_norm": 12.40060806274414, "learning_rate": 2.4e-05, "loss": 2.412, "step": 406 }, { "epoch": 0.5291828793774319, "grad_norm": 12.270583152770996, "learning_rate": 2.4120000000000003e-05, "loss": 2.0339, "step": 408 }, { "epoch": 0.5317769130998703, "grad_norm": 17.22001838684082, "learning_rate": 2.4240000000000002e-05, "loss": 2.0117, "step": 410 }, { "epoch": 0.5317769130998703, "eval_cer": 0.2961388914006691, "eval_loss": 2.5344009399414062, "eval_runtime": 164.3351, "eval_samples_per_second": 16.089, "eval_steps_per_second": 2.014, "step": 410 }, { "epoch": 0.5343709468223087, "grad_norm": 10.44601058959961, "learning_rate": 2.4360000000000004e-05, "loss": 1.6976, "step": 412 }, { "epoch": 0.5369649805447471, "grad_norm": 16.720975875854492, "learning_rate": 2.448e-05, "loss": 3.0921, "step": 414 }, { "epoch": 0.5382619974059663, "eval_cer": 0.2885131265635832, "eval_loss": 2.51837158203125, "eval_runtime": 169.9004, "eval_samples_per_second": 15.562, "eval_steps_per_second": 1.948, "step": 415 }, { "epoch": 0.5395590142671854, "grad_norm": 11.013751983642578, "learning_rate": 2.4599999999999998e-05, "loss": 1.8176, "step": 416 }, { "epoch": 0.5421530479896238, "grad_norm": 13.207280158996582, "learning_rate": 2.472e-05, "loss": 2.0911, "step": 418 }, { "epoch": 0.5447470817120622, "grad_norm": 13.864497184753418, "learning_rate": 2.484e-05, "loss": 2.1863, "step": 420 }, { "epoch": 0.5447470817120622, "eval_cer": 0.3223920185670796, "eval_loss": 2.489891767501831, "eval_runtime": 198.7131, "eval_samples_per_second": 13.306, "eval_steps_per_second": 1.666, "step": 420 }, { "epoch": 0.5473411154345007, "grad_norm": 13.306368827819824, "learning_rate": 2.4959999999999998e-05, "loss": 2.1374, "step": 422 }, { "epoch": 0.5499351491569391, "grad_norm": 14.835345268249512, "learning_rate": 2.508e-05, "loss": 2.0687, "step": 424 }, { "epoch": 0.5512321660181583, "eval_cer": 0.3371612864333725, "eval_loss": 2.540318250656128, "eval_runtime": 177.6188, "eval_samples_per_second": 14.886, "eval_steps_per_second": 1.864, "step": 425 }, { "epoch": 0.5525291828793775, "grad_norm": 15.374982833862305, "learning_rate": 2.52e-05, "loss": 2.0655, "step": 426 }, { "epoch": 0.5551232166018158, "grad_norm": 14.928581237792969, "learning_rate": 2.5319999999999998e-05, "loss": 2.7183, "step": 428 }, { "epoch": 0.5577172503242542, "grad_norm": 14.489096641540527, "learning_rate": 2.544e-05, "loss": 2.0051, "step": 430 }, { "epoch": 0.5577172503242542, "eval_cer": 0.31229466196461403, "eval_loss": 2.4285073280334473, "eval_runtime": 181.8863, "eval_samples_per_second": 14.537, "eval_steps_per_second": 1.82, "step": 430 }, { "epoch": 0.5603112840466926, "grad_norm": 11.531155586242676, "learning_rate": 2.556e-05, "loss": 2.2619, "step": 432 }, { "epoch": 0.562905317769131, "grad_norm": 17.837749481201172, "learning_rate": 2.568e-05, "loss": 2.5056, "step": 434 }, { "epoch": 0.5642023346303502, "eval_cer": 0.3202519817946168, "eval_loss": 2.4498050212860107, "eval_runtime": 230.2751, "eval_samples_per_second": 11.482, "eval_steps_per_second": 1.437, "step": 435 }, { "epoch": 0.5654993514915694, "grad_norm": 13.783636093139648, "learning_rate": 2.58e-05, "loss": 2.0943, "step": 436 }, { "epoch": 0.5680933852140078, "grad_norm": 17.753210067749023, "learning_rate": 2.592e-05, "loss": 2.12, "step": 438 }, { "epoch": 0.5706874189364461, "grad_norm": 15.3496732711792, "learning_rate": 2.604e-05, "loss": 2.2611, "step": 440 }, { "epoch": 0.5706874189364461, "eval_cer": 0.33478011875697017, "eval_loss": 2.4362807273864746, "eval_runtime": 233.7715, "eval_samples_per_second": 11.31, "eval_steps_per_second": 1.416, "step": 440 }, { "epoch": 0.5732814526588845, "grad_norm": 12.754862785339355, "learning_rate": 2.616e-05, "loss": 1.7839, "step": 442 }, { "epoch": 0.5758754863813229, "grad_norm": 15.386824607849121, "learning_rate": 2.628e-05, "loss": 2.4994, "step": 444 }, { "epoch": 0.5771725032425421, "eval_cer": 0.310908159266962, "eval_loss": 2.458259105682373, "eval_runtime": 167.458, "eval_samples_per_second": 15.789, "eval_steps_per_second": 1.977, "step": 445 }, { "epoch": 0.5784695201037614, "grad_norm": 14.832752227783203, "learning_rate": 2.64e-05, "loss": 2.0773, "step": 446 }, { "epoch": 0.5810635538261998, "grad_norm": 15.057633399963379, "learning_rate": 2.652e-05, "loss": 2.0135, "step": 448 }, { "epoch": 0.5836575875486382, "grad_norm": 17.804443359375, "learning_rate": 2.6640000000000002e-05, "loss": 2.4173, "step": 450 }, { "epoch": 0.5836575875486382, "eval_cer": 0.3094312324803328, "eval_loss": 2.376800060272217, "eval_runtime": 172.2385, "eval_samples_per_second": 15.351, "eval_steps_per_second": 1.922, "step": 450 }, { "epoch": 0.5862516212710766, "grad_norm": 22.79265022277832, "learning_rate": 2.676e-05, "loss": 1.9889, "step": 452 }, { "epoch": 0.5888456549935149, "grad_norm": 11.24325942993164, "learning_rate": 2.688e-05, "loss": 2.9177, "step": 454 }, { "epoch": 0.5901426718547341, "eval_cer": 0.31289748922446275, "eval_loss": 2.4268851280212402, "eval_runtime": 150.7156, "eval_samples_per_second": 17.543, "eval_steps_per_second": 2.196, "step": 455 }, { "epoch": 0.5914396887159533, "grad_norm": 14.807707786560059, "learning_rate": 2.7000000000000002e-05, "loss": 1.6092, "step": 456 }, { "epoch": 0.5940337224383917, "grad_norm": 16.166181564331055, "learning_rate": 2.712e-05, "loss": 2.2921, "step": 458 }, { "epoch": 0.5966277561608301, "grad_norm": 18.733001708984375, "learning_rate": 2.724e-05, "loss": 2.4549, "step": 460 }, { "epoch": 0.5966277561608301, "eval_cer": 0.3229647044639359, "eval_loss": 2.331587076187134, "eval_runtime": 176.3172, "eval_samples_per_second": 14.996, "eval_steps_per_second": 1.877, "step": 460 }, { "epoch": 0.5992217898832685, "grad_norm": 13.820377349853516, "learning_rate": 2.7360000000000002e-05, "loss": 2.3386, "step": 462 }, { "epoch": 0.6018158236057068, "grad_norm": 11.139546394348145, "learning_rate": 2.748e-05, "loss": 2.3171, "step": 464 }, { "epoch": 0.603112840466926, "eval_cer": 0.3494589625342858, "eval_loss": 2.3250718116760254, "eval_runtime": 227.8398, "eval_samples_per_second": 11.605, "eval_steps_per_second": 1.453, "step": 465 } ], "logging_steps": 2, "max_steps": 77100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 5, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 10 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.783620910505001e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }