Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": 415, | |
| "best_metric": 0.2885131265635832, | |
| "best_model_checkpoint": "trocr\\checkpoint-415", | |
| "epoch": 0.603112840466926, | |
| "eval_steps": 5, | |
| "global_step": 465, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0025940337224383916, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0, | |
| "loss": 8.3361, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.005188067444876783, | |
| "grad_norm": 42.44731521606445, | |
| "learning_rate": 0.0, | |
| "loss": 9.0712, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00648508430609598, | |
| "eval_cer": 0.9740181451005214, | |
| "eval_loss": 10.673304557800293, | |
| "eval_runtime": 121.4924, | |
| "eval_samples_per_second": 21.763, | |
| "eval_steps_per_second": 2.724, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007782101167315175, | |
| "grad_norm": 50.13338088989258, | |
| "learning_rate": 1.2000000000000002e-07, | |
| "loss": 9.0523, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.010376134889753566, | |
| "grad_norm": 54.91879653930664, | |
| "learning_rate": 2.4000000000000003e-07, | |
| "loss": 8.6874, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01297016861219196, | |
| "grad_norm": 57.007408142089844, | |
| "learning_rate": 3.6e-07, | |
| "loss": 9.1085, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01297016861219196, | |
| "eval_cer": 0.9735660246556349, | |
| "eval_loss": 10.574353218078613, | |
| "eval_runtime": 92.0698, | |
| "eval_samples_per_second": 28.717, | |
| "eval_steps_per_second": 3.595, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01556420233463035, | |
| "grad_norm": 65.91108703613281, | |
| "learning_rate": 4.800000000000001e-07, | |
| "loss": 8.1886, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.018158236057068743, | |
| "grad_norm": 34.232662200927734, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 8.3056, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.019455252918287938, | |
| "eval_cer": 0.9708231606233234, | |
| "eval_loss": 10.372063636779785, | |
| "eval_runtime": 93.5905, | |
| "eval_samples_per_second": 28.251, | |
| "eval_steps_per_second": 3.537, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.020752269779507133, | |
| "grad_norm": 42.87216567993164, | |
| "learning_rate": 7.2e-07, | |
| "loss": 8.7293, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.023346303501945526, | |
| "grad_norm": 39.21005630493164, | |
| "learning_rate": 8.4e-07, | |
| "loss": 8.6748, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02594033722438392, | |
| "grad_norm": 94.66182708740234, | |
| "learning_rate": 9.600000000000001e-07, | |
| "loss": 8.2991, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02594033722438392, | |
| "eval_cer": 0.9659101184555565, | |
| "eval_loss": 10.076202392578125, | |
| "eval_runtime": 88.5272, | |
| "eval_samples_per_second": 29.867, | |
| "eval_steps_per_second": 3.739, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.028534370946822308, | |
| "grad_norm": 34.152923583984375, | |
| "learning_rate": 1.08e-06, | |
| "loss": 8.3962, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0311284046692607, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 8.2708, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0324254215304799, | |
| "eval_cer": 0.9612683485547217, | |
| "eval_loss": 9.797987937927246, | |
| "eval_runtime": 96.7288, | |
| "eval_samples_per_second": 27.334, | |
| "eval_steps_per_second": 3.422, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03372243839169909, | |
| "grad_norm": 45.9756965637207, | |
| "learning_rate": 1.26e-06, | |
| "loss": 7.8454, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03631647211413749, | |
| "grad_norm": 32.557674407958984, | |
| "learning_rate": 1.38e-06, | |
| "loss": 8.2102, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.038910505836575876, | |
| "grad_norm": 32.354164123535156, | |
| "learning_rate": 1.5e-06, | |
| "loss": 8.6184, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.038910505836575876, | |
| "eval_cer": 0.9502366096994906, | |
| "eval_loss": 9.3967866897583, | |
| "eval_runtime": 98.634, | |
| "eval_samples_per_second": 26.806, | |
| "eval_steps_per_second": 3.356, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.041504539559014265, | |
| "grad_norm": 33.6341667175293, | |
| "learning_rate": 1.62e-06, | |
| "loss": 8.1516, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04409857328145266, | |
| "grad_norm": 39.92112350463867, | |
| "learning_rate": 1.74e-06, | |
| "loss": 8.0678, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04539559014267185, | |
| "eval_cer": 0.9342315459505078, | |
| "eval_loss": 9.007755279541016, | |
| "eval_runtime": 81.7686, | |
| "eval_samples_per_second": 32.335, | |
| "eval_steps_per_second": 4.048, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04669260700389105, | |
| "grad_norm": 29.05873680114746, | |
| "learning_rate": 1.86e-06, | |
| "loss": 7.6011, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04928664072632944, | |
| "grad_norm": 25.371105194091797, | |
| "learning_rate": 1.98e-06, | |
| "loss": 7.2863, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.05188067444876784, | |
| "grad_norm": 26.220144271850586, | |
| "learning_rate": 2.1000000000000002e-06, | |
| "loss": 7.699, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05188067444876784, | |
| "eval_cer": 0.9170509690448202, | |
| "eval_loss": 8.67233657836914, | |
| "eval_runtime": 84.2938, | |
| "eval_samples_per_second": 31.366, | |
| "eval_steps_per_second": 3.927, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.054474708171206226, | |
| "grad_norm": 29.047372817993164, | |
| "learning_rate": 2.22e-06, | |
| "loss": 7.8821, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.057068741893644616, | |
| "grad_norm": 27.84252166748047, | |
| "learning_rate": 2.34e-06, | |
| "loss": 7.091, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.058365758754863814, | |
| "eval_cer": 0.9047231515809145, | |
| "eval_loss": 8.38408088684082, | |
| "eval_runtime": 92.3132, | |
| "eval_samples_per_second": 28.642, | |
| "eval_steps_per_second": 3.586, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05966277561608301, | |
| "grad_norm": 22.684850692749023, | |
| "learning_rate": 2.46e-06, | |
| "loss": 7.4182, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0622568093385214, | |
| "grad_norm": 33.87125778198242, | |
| "learning_rate": 2.58e-06, | |
| "loss": 7.2075, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0648508430609598, | |
| "grad_norm": 21.509159088134766, | |
| "learning_rate": 2.7e-06, | |
| "loss": 7.1052, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0648508430609598, | |
| "eval_cer": 0.899629261235193, | |
| "eval_loss": 8.13282585144043, | |
| "eval_runtime": 101.5177, | |
| "eval_samples_per_second": 26.045, | |
| "eval_steps_per_second": 3.261, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06744487678339818, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.82e-06, | |
| "loss": 6.7926, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07003891050583658, | |
| "grad_norm": 21.155956268310547, | |
| "learning_rate": 2.88e-06, | |
| "loss": 6.8734, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.07133592736705577, | |
| "eval_cer": 0.8984236067154957, | |
| "eval_loss": 7.93735408782959, | |
| "eval_runtime": 106.5732, | |
| "eval_samples_per_second": 24.809, | |
| "eval_steps_per_second": 3.106, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.07263294422827497, | |
| "grad_norm": 19.74744415283203, | |
| "learning_rate": 3e-06, | |
| "loss": 6.8722, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07522697795071336, | |
| "grad_norm": 319.0416564941406, | |
| "learning_rate": 3.1199999999999998e-06, | |
| "loss": 6.4253, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07782101167315175, | |
| "grad_norm": 23.68337631225586, | |
| "learning_rate": 3.24e-06, | |
| "loss": 6.8842, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07782101167315175, | |
| "eval_cer": 0.8882056846610604, | |
| "eval_loss": 7.703160285949707, | |
| "eval_runtime": 110.1461, | |
| "eval_samples_per_second": 24.004, | |
| "eval_steps_per_second": 3.005, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08041504539559015, | |
| "grad_norm": 20.635147094726562, | |
| "learning_rate": 3.36e-06, | |
| "loss": 6.2162, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.08300907911802853, | |
| "grad_norm": 19.55179786682129, | |
| "learning_rate": 3.48e-06, | |
| "loss": 6.7139, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08430609597924774, | |
| "eval_cer": 0.8779576212436326, | |
| "eval_loss": 7.470834255218506, | |
| "eval_runtime": 111.6785, | |
| "eval_samples_per_second": 23.675, | |
| "eval_steps_per_second": 2.964, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.08560311284046693, | |
| "grad_norm": 17.086580276489258, | |
| "learning_rate": 3.6e-06, | |
| "loss": 6.6201, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08819714656290532, | |
| "grad_norm": 15.556456565856934, | |
| "learning_rate": 3.72e-06, | |
| "loss": 6.093, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0907911802853437, | |
| "grad_norm": 49.20164489746094, | |
| "learning_rate": 3.8400000000000005e-06, | |
| "loss": 5.9421, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0907911802853437, | |
| "eval_cer": 0.8584561593875275, | |
| "eval_loss": 7.236043930053711, | |
| "eval_runtime": 126.8066, | |
| "eval_samples_per_second": 20.851, | |
| "eval_steps_per_second": 2.61, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0933852140077821, | |
| "grad_norm": 17.137556076049805, | |
| "learning_rate": 3.96e-06, | |
| "loss": 6.7995, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0959792477302205, | |
| "grad_norm": 24.586021423339844, | |
| "learning_rate": 4.080000000000001e-06, | |
| "loss": 6.043, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.09727626459143969, | |
| "eval_cer": 0.8255719323627815, | |
| "eval_loss": 7.014294624328613, | |
| "eval_runtime": 128.9469, | |
| "eval_samples_per_second": 20.505, | |
| "eval_steps_per_second": 2.567, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.09857328145265888, | |
| "grad_norm": 17.460948944091797, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 6.0706, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.10116731517509728, | |
| "grad_norm": 26.46303939819336, | |
| "learning_rate": 4.32e-06, | |
| "loss": 6.7666, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.10376134889753567, | |
| "grad_norm": 32.067054748535156, | |
| "learning_rate": 4.44e-06, | |
| "loss": 6.0295, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10376134889753567, | |
| "eval_cer": 0.782530066009585, | |
| "eval_loss": 6.801568984985352, | |
| "eval_runtime": 114.6152, | |
| "eval_samples_per_second": 23.069, | |
| "eval_steps_per_second": 2.888, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10635538261997406, | |
| "grad_norm": 14.94288158416748, | |
| "learning_rate": 4.56e-06, | |
| "loss": 5.7654, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10894941634241245, | |
| "grad_norm": 21.89992904663086, | |
| "learning_rate": 4.68e-06, | |
| "loss": 5.7672, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.11024643320363164, | |
| "eval_cer": 0.7390059378485095, | |
| "eval_loss": 6.629735469818115, | |
| "eval_runtime": 116.8302, | |
| "eval_samples_per_second": 22.631, | |
| "eval_steps_per_second": 2.833, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.11154345006485085, | |
| "grad_norm": 23.829565048217773, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 5.4628, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.11413748378728923, | |
| "grad_norm": 18.05320930480957, | |
| "learning_rate": 4.92e-06, | |
| "loss": 5.1534, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.11673151750972763, | |
| "grad_norm": 25.233701705932617, | |
| "learning_rate": 5.04e-06, | |
| "loss": 5.3042, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11673151750972763, | |
| "eval_cer": 0.7084124544111885, | |
| "eval_loss": 6.476639747619629, | |
| "eval_runtime": 118.7446, | |
| "eval_samples_per_second": 22.266, | |
| "eval_steps_per_second": 2.787, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11932555123216602, | |
| "grad_norm": 35.21253967285156, | |
| "learning_rate": 5.16e-06, | |
| "loss": 5.4296, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.1219195849546044, | |
| "grad_norm": 17.080265045166016, | |
| "learning_rate": 5.279999999999999e-06, | |
| "loss": 5.3031, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.12321660181582361, | |
| "eval_cer": 0.6901467884377731, | |
| "eval_loss": 6.346051216125488, | |
| "eval_runtime": 119.9281, | |
| "eval_samples_per_second": 22.047, | |
| "eval_steps_per_second": 2.76, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1245136186770428, | |
| "grad_norm": 14.830784797668457, | |
| "learning_rate": 5.4e-06, | |
| "loss": 5.4933, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.12710765239948119, | |
| "grad_norm": 14.9316987991333, | |
| "learning_rate": 5.52e-06, | |
| "loss": 5.3361, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1297016861219196, | |
| "grad_norm": 42.79384994506836, | |
| "learning_rate": 5.64e-06, | |
| "loss": 4.6843, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1297016861219196, | |
| "eval_cer": 0.6729059288061006, | |
| "eval_loss": 6.210666179656982, | |
| "eval_runtime": 120.5118, | |
| "eval_samples_per_second": 21.94, | |
| "eval_steps_per_second": 2.747, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13229571984435798, | |
| "grad_norm": 69.97132110595703, | |
| "learning_rate": 5.76e-06, | |
| "loss": 5.278, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.13488975356679636, | |
| "grad_norm": 68.64286041259766, | |
| "learning_rate": 5.8800000000000005e-06, | |
| "loss": 5.3985, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.13618677042801555, | |
| "eval_cer": 0.6567501582421557, | |
| "eval_loss": 6.105401039123535, | |
| "eval_runtime": 120.3425, | |
| "eval_samples_per_second": 21.971, | |
| "eval_steps_per_second": 2.75, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.13748378728923477, | |
| "grad_norm": 16.881147384643555, | |
| "learning_rate": 6e-06, | |
| "loss": 5.5177, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.14007782101167315, | |
| "grad_norm": 13.392465591430664, | |
| "learning_rate": 6.12e-06, | |
| "loss": 5.6112, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.14267185473411154, | |
| "grad_norm": 12.735469818115234, | |
| "learning_rate": 6.2399999999999995e-06, | |
| "loss": 4.3825, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.14267185473411154, | |
| "eval_cer": 0.6333303191970341, | |
| "eval_loss": 5.990973472595215, | |
| "eval_runtime": 122.2904, | |
| "eval_samples_per_second": 21.621, | |
| "eval_steps_per_second": 2.707, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.14526588845654995, | |
| "grad_norm": 13.771244049072266, | |
| "learning_rate": 6.36e-06, | |
| "loss": 4.9807, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.14785992217898833, | |
| "grad_norm": 16.60836410522461, | |
| "learning_rate": 6.48e-06, | |
| "loss": 5.0463, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.14915693904020752, | |
| "eval_cer": 0.6150345118606263, | |
| "eval_loss": 5.89418363571167, | |
| "eval_runtime": 115.3341, | |
| "eval_samples_per_second": 22.925, | |
| "eval_steps_per_second": 2.87, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1504539559014267, | |
| "grad_norm": 11.2271146774292, | |
| "learning_rate": 6.6e-06, | |
| "loss": 5.1669, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.15304798962386512, | |
| "grad_norm": 14.511568069458008, | |
| "learning_rate": 6.72e-06, | |
| "loss": 4.883, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1556420233463035, | |
| "grad_norm": 12.640761375427246, | |
| "learning_rate": 6.840000000000001e-06, | |
| "loss": 5.3346, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1556420233463035, | |
| "eval_cer": 0.6130451819031256, | |
| "eval_loss": 5.80028772354126, | |
| "eval_runtime": 113.846, | |
| "eval_samples_per_second": 23.224, | |
| "eval_steps_per_second": 2.907, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1582360570687419, | |
| "grad_norm": 10.243730545043945, | |
| "learning_rate": 6.96e-06, | |
| "loss": 4.669, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1608300907911803, | |
| "grad_norm": 19.194486618041992, | |
| "learning_rate": 7.08e-06, | |
| "loss": 5.2001, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.1621271076523995, | |
| "eval_cer": 0.6162703077433161, | |
| "eval_loss": 5.716529369354248, | |
| "eval_runtime": 113.3314, | |
| "eval_samples_per_second": 23.33, | |
| "eval_steps_per_second": 2.921, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.16342412451361868, | |
| "grad_norm": 25.41460418701172, | |
| "learning_rate": 7.2e-06, | |
| "loss": 4.8738, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.16601815823605706, | |
| "grad_norm": 11.016427040100098, | |
| "learning_rate": 7.32e-06, | |
| "loss": 5.5428, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.16861219195849547, | |
| "grad_norm": 19.337942123413086, | |
| "learning_rate": 7.44e-06, | |
| "loss": 5.097, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16861219195849547, | |
| "eval_cer": 0.6253127166410465, | |
| "eval_loss": 5.637584686279297, | |
| "eval_runtime": 111.6419, | |
| "eval_samples_per_second": 23.683, | |
| "eval_steps_per_second": 2.965, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.17120622568093385, | |
| "grad_norm": 14.668910026550293, | |
| "learning_rate": 7.5600000000000005e-06, | |
| "loss": 5.1702, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.17380025940337224, | |
| "grad_norm": 14.700507164001465, | |
| "learning_rate": 7.680000000000001e-06, | |
| "loss": 5.1842, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.17509727626459143, | |
| "eval_cer": 0.6279952979473732, | |
| "eval_loss": 5.5578293800354, | |
| "eval_runtime": 115.7363, | |
| "eval_samples_per_second": 22.845, | |
| "eval_steps_per_second": 2.86, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.17639429312581065, | |
| "grad_norm": 11.149736404418945, | |
| "learning_rate": 7.8e-06, | |
| "loss": 5.1527, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.17898832684824903, | |
| "grad_norm": 12.17773723602295, | |
| "learning_rate": 7.92e-06, | |
| "loss": 5.3414, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.1815823605706874, | |
| "grad_norm": 15.777327537536621, | |
| "learning_rate": 8.040000000000001e-06, | |
| "loss": 5.1606, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1815823605706874, | |
| "eval_cer": 0.611960092835398, | |
| "eval_loss": 5.470022201538086, | |
| "eval_runtime": 115.9413, | |
| "eval_samples_per_second": 22.805, | |
| "eval_steps_per_second": 2.855, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18417639429312582, | |
| "grad_norm": 22.217771530151367, | |
| "learning_rate": 8.160000000000001e-06, | |
| "loss": 5.0028, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.1867704280155642, | |
| "grad_norm": 13.488722801208496, | |
| "learning_rate": 8.28e-06, | |
| "loss": 4.7495, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1880674448767834, | |
| "eval_cer": 0.6009886367061519, | |
| "eval_loss": 5.381906509399414, | |
| "eval_runtime": 109.493, | |
| "eval_samples_per_second": 24.148, | |
| "eval_steps_per_second": 3.023, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1893644617380026, | |
| "grad_norm": 12.898096084594727, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 4.847, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.191958495460441, | |
| "grad_norm": 13.653580665588379, | |
| "learning_rate": 8.52e-06, | |
| "loss": 5.2004, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.19455252918287938, | |
| "grad_norm": 10.915148735046387, | |
| "learning_rate": 8.64e-06, | |
| "loss": 4.9847, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.19455252918287938, | |
| "eval_cer": 0.594960364107665, | |
| "eval_loss": 5.297786712646484, | |
| "eval_runtime": 108.0527, | |
| "eval_samples_per_second": 24.47, | |
| "eval_steps_per_second": 3.063, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.19714656290531776, | |
| "grad_norm": 23.160659790039062, | |
| "learning_rate": 8.759999999999999e-06, | |
| "loss": 4.7818, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.19974059662775617, | |
| "grad_norm": 14.993002891540527, | |
| "learning_rate": 8.88e-06, | |
| "loss": 4.7727, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.20103761348897536, | |
| "eval_cer": 0.5589715766946981, | |
| "eval_loss": 5.1952223777771, | |
| "eval_runtime": 111.9206, | |
| "eval_samples_per_second": 23.624, | |
| "eval_steps_per_second": 2.957, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.20233463035019456, | |
| "grad_norm": 13.507874488830566, | |
| "learning_rate": 9e-06, | |
| "loss": 4.4196, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.20492866407263294, | |
| "grad_norm": 12.171915054321289, | |
| "learning_rate": 9.12e-06, | |
| "loss": 4.088, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.20752269779507135, | |
| "grad_norm": 10.198915481567383, | |
| "learning_rate": 9.24e-06, | |
| "loss": 4.0656, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.20752269779507135, | |
| "eval_cer": 0.5371492298881756, | |
| "eval_loss": 5.124251365661621, | |
| "eval_runtime": 115.3755, | |
| "eval_samples_per_second": 22.916, | |
| "eval_steps_per_second": 2.869, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21011673151750973, | |
| "grad_norm": 10.800135612487793, | |
| "learning_rate": 9.36e-06, | |
| "loss": 4.2534, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.2127107652399481, | |
| "grad_norm": 28.48563575744629, | |
| "learning_rate": 9.48e-06, | |
| "loss": 4.0234, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2140077821011673, | |
| "eval_cer": 0.530246857762908, | |
| "eval_loss": 5.063826084136963, | |
| "eval_runtime": 114.849, | |
| "eval_samples_per_second": 23.022, | |
| "eval_steps_per_second": 2.882, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.21530479896238652, | |
| "grad_norm": 20.067386627197266, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 4.3631, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.2178988326848249, | |
| "grad_norm": 16.29481315612793, | |
| "learning_rate": 9.72e-06, | |
| "loss": 4.4636, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.2204928664072633, | |
| "grad_norm": 101.64680480957031, | |
| "learning_rate": 9.84e-06, | |
| "loss": 5.033, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2204928664072633, | |
| "eval_cer": 0.5299755854959761, | |
| "eval_loss": 4.967648029327393, | |
| "eval_runtime": 111.3501, | |
| "eval_samples_per_second": 23.745, | |
| "eval_steps_per_second": 2.973, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2230869001297017, | |
| "grad_norm": 22.31951904296875, | |
| "learning_rate": 9.960000000000001e-06, | |
| "loss": 4.0891, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.22568093385214008, | |
| "grad_norm": 56.18452072143555, | |
| "learning_rate": 1.008e-05, | |
| "loss": 4.284, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.22697795071335927, | |
| "eval_cer": 0.520179642523435, | |
| "eval_loss": 4.895947456359863, | |
| "eval_runtime": 112.6531, | |
| "eval_samples_per_second": 23.47, | |
| "eval_steps_per_second": 2.938, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.22827496757457846, | |
| "grad_norm": 13.42766284942627, | |
| "learning_rate": 1.02e-05, | |
| "loss": 4.4092, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.23086900129701687, | |
| "grad_norm": 21.44829559326172, | |
| "learning_rate": 1.032e-05, | |
| "loss": 4.1597, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.23346303501945526, | |
| "grad_norm": 14.215667724609375, | |
| "learning_rate": 1.044e-05, | |
| "loss": 3.8574, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.23346303501945526, | |
| "eval_cer": 0.5017632697350575, | |
| "eval_loss": 4.84341287612915, | |
| "eval_runtime": 115.4414, | |
| "eval_samples_per_second": 22.903, | |
| "eval_steps_per_second": 2.867, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.23605706874189364, | |
| "grad_norm": 13.1805419921875, | |
| "learning_rate": 1.0559999999999999e-05, | |
| "loss": 4.0776, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.23865110246433205, | |
| "grad_norm": 98.30623626708984, | |
| "learning_rate": 1.068e-05, | |
| "loss": 4.2507, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.23994811932555124, | |
| "eval_cer": 0.48886276637429543, | |
| "eval_loss": 4.78084135055542, | |
| "eval_runtime": 123.8484, | |
| "eval_samples_per_second": 21.349, | |
| "eval_steps_per_second": 2.673, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.24124513618677043, | |
| "grad_norm": 35.59674072265625, | |
| "learning_rate": 1.08e-05, | |
| "loss": 4.1455, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2438391699092088, | |
| "grad_norm": 21.6286563873291, | |
| "learning_rate": 1.092e-05, | |
| "loss": 5.0675, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.24643320363164722, | |
| "grad_norm": 17.0408878326416, | |
| "learning_rate": 1.104e-05, | |
| "loss": 3.953, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.24643320363164722, | |
| "eval_cer": 0.47979021611357264, | |
| "eval_loss": 4.711887836456299, | |
| "eval_runtime": 135.169, | |
| "eval_samples_per_second": 19.561, | |
| "eval_steps_per_second": 2.449, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2490272373540856, | |
| "grad_norm": 24.359798431396484, | |
| "learning_rate": 1.116e-05, | |
| "loss": 4.4154, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.251621271076524, | |
| "grad_norm": 13.193626403808594, | |
| "learning_rate": 1.128e-05, | |
| "loss": 4.3269, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2529182879377432, | |
| "eval_cer": 0.4659251891370528, | |
| "eval_loss": 4.614772319793701, | |
| "eval_runtime": 132.7599, | |
| "eval_samples_per_second": 19.916, | |
| "eval_steps_per_second": 2.493, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.25421530479896237, | |
| "grad_norm": 14.497838973999023, | |
| "learning_rate": 1.1400000000000001e-05, | |
| "loss": 4.0266, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.25680933852140075, | |
| "grad_norm": 12.457406997680664, | |
| "learning_rate": 1.152e-05, | |
| "loss": 4.3069, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2594033722438392, | |
| "grad_norm": 18.889881134033203, | |
| "learning_rate": 1.164e-05, | |
| "loss": 4.1068, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2594033722438392, | |
| "eval_cer": 0.4684872049914097, | |
| "eval_loss": 4.578884601593018, | |
| "eval_runtime": 129.8465, | |
| "eval_samples_per_second": 20.363, | |
| "eval_steps_per_second": 2.549, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2619974059662776, | |
| "grad_norm": 11.648727416992188, | |
| "learning_rate": 1.1760000000000001e-05, | |
| "loss": 3.7185, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.26459143968871596, | |
| "grad_norm": 13.08809757232666, | |
| "learning_rate": 1.1880000000000001e-05, | |
| "loss": 4.1442, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.26588845654993515, | |
| "eval_cer": 0.46004762335352806, | |
| "eval_loss": 4.4956889152526855, | |
| "eval_runtime": 114.7341, | |
| "eval_samples_per_second": 23.045, | |
| "eval_steps_per_second": 2.885, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.26718547341115434, | |
| "grad_norm": 12.64474105834961, | |
| "learning_rate": 1.2e-05, | |
| "loss": 3.7967, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.2697795071335927, | |
| "grad_norm": 12.794676780700684, | |
| "learning_rate": 1.2120000000000001e-05, | |
| "loss": 3.8475, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2723735408560311, | |
| "grad_norm": 13.091010093688965, | |
| "learning_rate": 1.224e-05, | |
| "loss": 3.5213, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2723735408560311, | |
| "eval_cer": 0.43729089429424, | |
| "eval_loss": 4.389532089233398, | |
| "eval_runtime": 111.3543, | |
| "eval_samples_per_second": 23.744, | |
| "eval_steps_per_second": 2.972, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.27496757457846954, | |
| "grad_norm": 13.716208457946777, | |
| "learning_rate": 1.236e-05, | |
| "loss": 4.0241, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2775616083009079, | |
| "grad_norm": 14.271407127380371, | |
| "learning_rate": 1.2479999999999999e-05, | |
| "loss": 4.1152, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.2788586251621271, | |
| "eval_cer": 0.449709135847123, | |
| "eval_loss": 4.357320308685303, | |
| "eval_runtime": 113.2044, | |
| "eval_samples_per_second": 23.356, | |
| "eval_steps_per_second": 2.924, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2801556420233463, | |
| "grad_norm": 12.267643928527832, | |
| "learning_rate": 1.26e-05, | |
| "loss": 3.2922, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2827496757457847, | |
| "grad_norm": 13.182437896728516, | |
| "learning_rate": 1.272e-05, | |
| "loss": 3.8861, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2853437094682231, | |
| "grad_norm": 12.376786231994629, | |
| "learning_rate": 1.284e-05, | |
| "loss": 3.5171, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2853437094682231, | |
| "eval_cer": 0.45802815203303493, | |
| "eval_loss": 4.331967830657959, | |
| "eval_runtime": 114.1848, | |
| "eval_samples_per_second": 23.155, | |
| "eval_steps_per_second": 2.899, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.28793774319066145, | |
| "grad_norm": 11.612021446228027, | |
| "learning_rate": 1.296e-05, | |
| "loss": 4.1036, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2905317769130999, | |
| "grad_norm": 12.48078727722168, | |
| "learning_rate": 1.308e-05, | |
| "loss": 3.1498, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.2918287937743191, | |
| "eval_cer": 0.4402146065045061, | |
| "eval_loss": 4.2296319007873535, | |
| "eval_runtime": 118.3956, | |
| "eval_samples_per_second": 22.332, | |
| "eval_steps_per_second": 2.796, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2931258106355383, | |
| "grad_norm": 28.98529815673828, | |
| "learning_rate": 1.32e-05, | |
| "loss": 3.7617, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.29571984435797666, | |
| "grad_norm": 16.19705581665039, | |
| "learning_rate": 1.3320000000000001e-05, | |
| "loss": 4.0489, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.29831387808041504, | |
| "grad_norm": 12.226841926574707, | |
| "learning_rate": 1.344e-05, | |
| "loss": 3.6797, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.29831387808041504, | |
| "eval_cer": 0.43195587304457905, | |
| "eval_loss": 4.158808708190918, | |
| "eval_runtime": 112.6694, | |
| "eval_samples_per_second": 23.467, | |
| "eval_steps_per_second": 2.938, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3009079118028534, | |
| "grad_norm": 20.419113159179688, | |
| "learning_rate": 1.356e-05, | |
| "loss": 3.6434, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3035019455252918, | |
| "grad_norm": 18.819181442260742, | |
| "learning_rate": 1.3680000000000001e-05, | |
| "loss": 3.2154, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.30479896238651105, | |
| "eval_cer": 0.4050697772553275, | |
| "eval_loss": 4.025953769683838, | |
| "eval_runtime": 113.1476, | |
| "eval_samples_per_second": 23.368, | |
| "eval_steps_per_second": 2.925, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.30609597924773024, | |
| "grad_norm": 12.357544898986816, | |
| "learning_rate": 1.3800000000000002e-05, | |
| "loss": 3.9309, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3086900129701686, | |
| "grad_norm": 14.584222793579102, | |
| "learning_rate": 1.392e-05, | |
| "loss": 3.5647, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.311284046692607, | |
| "grad_norm": 11.948848724365234, | |
| "learning_rate": 1.4040000000000001e-05, | |
| "loss": 3.439, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.311284046692607, | |
| "eval_cer": 0.3937064834071797, | |
| "eval_loss": 3.905667304992676, | |
| "eval_runtime": 113.4696, | |
| "eval_samples_per_second": 23.301, | |
| "eval_steps_per_second": 2.917, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3138780804150454, | |
| "grad_norm": 13.00888729095459, | |
| "learning_rate": 1.416e-05, | |
| "loss": 3.2106, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3164721141374838, | |
| "grad_norm": 12.678916931152344, | |
| "learning_rate": 1.428e-05, | |
| "loss": 3.4027, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.31776913099870296, | |
| "eval_cer": 0.3925912529764596, | |
| "eval_loss": 3.8874895572662354, | |
| "eval_runtime": 189.1601, | |
| "eval_samples_per_second": 13.978, | |
| "eval_steps_per_second": 1.75, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.31906614785992216, | |
| "grad_norm": 16.414127349853516, | |
| "learning_rate": 1.44e-05, | |
| "loss": 2.5058, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3216601815823606, | |
| "grad_norm": 16.07786750793457, | |
| "learning_rate": 1.452e-05, | |
| "loss": 3.3537, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.324254215304799, | |
| "grad_norm": 15.625645637512207, | |
| "learning_rate": 1.464e-05, | |
| "loss": 3.6318, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.324254215304799, | |
| "eval_cer": 0.3870151008228592, | |
| "eval_loss": 3.824657917022705, | |
| "eval_runtime": 233.078, | |
| "eval_samples_per_second": 11.344, | |
| "eval_steps_per_second": 1.42, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.32684824902723736, | |
| "grad_norm": 16.048980712890625, | |
| "learning_rate": 1.4760000000000001e-05, | |
| "loss": 3.6032, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.32944228274967574, | |
| "grad_norm": 12.714573860168457, | |
| "learning_rate": 1.488e-05, | |
| "loss": 3.8301, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.33073929961089493, | |
| "eval_cer": 0.374446152455014, | |
| "eval_loss": 3.690356969833374, | |
| "eval_runtime": 139.2198, | |
| "eval_samples_per_second": 18.992, | |
| "eval_steps_per_second": 2.378, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.3320363164721141, | |
| "grad_norm": 12.425498962402344, | |
| "learning_rate": 1.5e-05, | |
| "loss": 3.0105, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3346303501945525, | |
| "grad_norm": 13.164816856384277, | |
| "learning_rate": 1.5120000000000001e-05, | |
| "loss": 3.6224, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.33722438391699094, | |
| "grad_norm": 14.977278709411621, | |
| "learning_rate": 1.524e-05, | |
| "loss": 3.2017, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.33722438391699094, | |
| "eval_cer": 0.3727582361274377, | |
| "eval_loss": 3.6364212036132812, | |
| "eval_runtime": 123.239, | |
| "eval_samples_per_second": 21.454, | |
| "eval_steps_per_second": 2.686, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3398184176394293, | |
| "grad_norm": 16.265350341796875, | |
| "learning_rate": 1.5360000000000002e-05, | |
| "loss": 3.9344, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3424124513618677, | |
| "grad_norm": 13.141109466552734, | |
| "learning_rate": 1.548e-05, | |
| "loss": 2.757, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3437094682230869, | |
| "eval_cer": 0.3830062995448654, | |
| "eval_loss": 3.622190475463867, | |
| "eval_runtime": 117.2686, | |
| "eval_samples_per_second": 22.547, | |
| "eval_steps_per_second": 2.823, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.3450064850843061, | |
| "grad_norm": 12.868675231933594, | |
| "learning_rate": 1.56e-05, | |
| "loss": 3.4981, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.3476005188067445, | |
| "grad_norm": 12.726391792297363, | |
| "learning_rate": 1.5720000000000002e-05, | |
| "loss": 2.9291, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.35019455252918286, | |
| "grad_norm": 31.09647560119629, | |
| "learning_rate": 1.584e-05, | |
| "loss": 3.1786, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.35019455252918286, | |
| "eval_cer": 0.3782741055550532, | |
| "eval_loss": 3.598266124725342, | |
| "eval_runtime": 115.8076, | |
| "eval_samples_per_second": 22.831, | |
| "eval_steps_per_second": 2.858, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3527885862516213, | |
| "grad_norm": 12.694645881652832, | |
| "learning_rate": 1.596e-05, | |
| "loss": 3.2305, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.3553826199740597, | |
| "grad_norm": 20.454267501831055, | |
| "learning_rate": 1.6080000000000002e-05, | |
| "loss": 2.9721, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.35667963683527887, | |
| "eval_cer": 0.3755613828857341, | |
| "eval_loss": 3.512030839920044, | |
| "eval_runtime": 133.3206, | |
| "eval_samples_per_second": 19.832, | |
| "eval_steps_per_second": 2.483, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.35797665369649806, | |
| "grad_norm": 20.09004783630371, | |
| "learning_rate": 1.62e-05, | |
| "loss": 3.0882, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.36057068741893644, | |
| "grad_norm": 16.710346221923828, | |
| "learning_rate": 1.6320000000000003e-05, | |
| "loss": 2.762, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3631647211413748, | |
| "grad_norm": 19.519004821777344, | |
| "learning_rate": 1.6440000000000002e-05, | |
| "loss": 3.0841, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3631647211413748, | |
| "eval_cer": 0.36691081170690537, | |
| "eval_loss": 3.4976441860198975, | |
| "eval_runtime": 135.0965, | |
| "eval_samples_per_second": 19.571, | |
| "eval_steps_per_second": 2.45, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3657587548638132, | |
| "grad_norm": 13.175090789794922, | |
| "learning_rate": 1.656e-05, | |
| "loss": 3.5364, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.36835278858625164, | |
| "grad_norm": 16.523889541625977, | |
| "learning_rate": 1.6680000000000003e-05, | |
| "loss": 2.6431, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.36964980544747084, | |
| "eval_cer": 0.3738734665581578, | |
| "eval_loss": 3.458583116531372, | |
| "eval_runtime": 229.2061, | |
| "eval_samples_per_second": 11.535, | |
| "eval_steps_per_second": 1.444, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.37094682230869, | |
| "grad_norm": 14.984639167785645, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 4.2237, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3735408560311284, | |
| "grad_norm": 13.299590110778809, | |
| "learning_rate": 1.6919999999999997e-05, | |
| "loss": 2.6747, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3761348897535668, | |
| "grad_norm": 12.275932312011719, | |
| "learning_rate": 1.704e-05, | |
| "loss": 2.3366, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3761348897535668, | |
| "eval_cer": 0.3661271362691021, | |
| "eval_loss": 3.3806421756744385, | |
| "eval_runtime": 245.1909, | |
| "eval_samples_per_second": 10.783, | |
| "eval_steps_per_second": 1.35, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3787289234760052, | |
| "grad_norm": 14.83483600616455, | |
| "learning_rate": 1.716e-05, | |
| "loss": 3.1826, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.38132295719844356, | |
| "grad_norm": 14.161396026611328, | |
| "learning_rate": 1.728e-05, | |
| "loss": 3.5359, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.38261997405966275, | |
| "eval_cer": 0.3429182867649275, | |
| "eval_loss": 3.350353956222534, | |
| "eval_runtime": 219.4263, | |
| "eval_samples_per_second": 12.05, | |
| "eval_steps_per_second": 1.508, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.383916990920882, | |
| "grad_norm": 12.273178100585938, | |
| "learning_rate": 1.74e-05, | |
| "loss": 3.6439, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3865110246433204, | |
| "grad_norm": 14.881448745727539, | |
| "learning_rate": 1.7519999999999998e-05, | |
| "loss": 3.6943, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.38910505836575876, | |
| "grad_norm": 14.406302452087402, | |
| "learning_rate": 1.764e-05, | |
| "loss": 3.4593, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38910505836575876, | |
| "eval_cer": 0.3484040148295506, | |
| "eval_loss": 3.2906293869018555, | |
| "eval_runtime": 141.4541, | |
| "eval_samples_per_second": 18.692, | |
| "eval_steps_per_second": 2.34, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.39169909208819714, | |
| "grad_norm": 15.321798324584961, | |
| "learning_rate": 1.776e-05, | |
| "loss": 3.1981, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3942931258106355, | |
| "grad_norm": 12.990147590637207, | |
| "learning_rate": 1.7879999999999998e-05, | |
| "loss": 3.1501, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3955901426718547, | |
| "eval_cer": 0.34671609850197427, | |
| "eval_loss": 3.222804069519043, | |
| "eval_runtime": 189.6181, | |
| "eval_samples_per_second": 13.944, | |
| "eval_steps_per_second": 1.746, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3968871595330739, | |
| "grad_norm": 11.798747062683105, | |
| "learning_rate": 1.8e-05, | |
| "loss": 3.6844, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.39948119325551235, | |
| "grad_norm": 15.285426139831543, | |
| "learning_rate": 1.812e-05, | |
| "loss": 2.115, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.40207522697795073, | |
| "grad_norm": 14.921792984008789, | |
| "learning_rate": 1.824e-05, | |
| "loss": 2.8101, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.40207522697795073, | |
| "eval_cer": 0.342466166320041, | |
| "eval_loss": 3.1945455074310303, | |
| "eval_runtime": 145.2418, | |
| "eval_samples_per_second": 18.204, | |
| "eval_steps_per_second": 2.279, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4046692607003891, | |
| "grad_norm": 12.135457992553711, | |
| "learning_rate": 1.836e-05, | |
| "loss": 3.4391, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4072632944228275, | |
| "grad_norm": 14.905659675598145, | |
| "learning_rate": 1.848e-05, | |
| "loss": 2.8493, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4085603112840467, | |
| "eval_cer": 0.35328691563432496, | |
| "eval_loss": 3.176107883453369, | |
| "eval_runtime": 245.7678, | |
| "eval_samples_per_second": 10.758, | |
| "eval_steps_per_second": 1.347, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.4098573281452659, | |
| "grad_norm": 13.051637649536133, | |
| "learning_rate": 1.86e-05, | |
| "loss": 2.8454, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.41245136186770426, | |
| "grad_norm": 14.108623504638672, | |
| "learning_rate": 1.872e-05, | |
| "loss": 3.4802, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.4150453955901427, | |
| "grad_norm": 19.467906951904297, | |
| "learning_rate": 1.884e-05, | |
| "loss": 2.8067, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4150453955901427, | |
| "eval_cer": 0.3708593302589143, | |
| "eval_loss": 3.1531643867492676, | |
| "eval_runtime": 211.2143, | |
| "eval_samples_per_second": 12.518, | |
| "eval_steps_per_second": 1.567, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4176394293125811, | |
| "grad_norm": 15.744620323181152, | |
| "learning_rate": 1.896e-05, | |
| "loss": 2.2496, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.42023346303501946, | |
| "grad_norm": 14.49579906463623, | |
| "learning_rate": 1.908e-05, | |
| "loss": 2.7236, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.42153047989623865, | |
| "eval_cer": 0.35376917744220393, | |
| "eval_loss": 3.1204159259796143, | |
| "eval_runtime": 194.3013, | |
| "eval_samples_per_second": 13.608, | |
| "eval_steps_per_second": 1.704, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.42282749675745784, | |
| "grad_norm": 16.304920196533203, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 2.785, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4254215304798962, | |
| "grad_norm": 12.900490760803223, | |
| "learning_rate": 1.932e-05, | |
| "loss": 2.5259, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.4280155642023346, | |
| "grad_norm": 15.345794677734375, | |
| "learning_rate": 1.944e-05, | |
| "loss": 3.023, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4280155642023346, | |
| "eval_cer": 0.3658558640021702, | |
| "eval_loss": 3.054361581802368, | |
| "eval_runtime": 234.2094, | |
| "eval_samples_per_second": 11.289, | |
| "eval_steps_per_second": 1.413, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.43060959792477305, | |
| "grad_norm": 17.006378173828125, | |
| "learning_rate": 1.9560000000000002e-05, | |
| "loss": 2.5932, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.43320363164721143, | |
| "grad_norm": 12.188159942626953, | |
| "learning_rate": 1.968e-05, | |
| "loss": 3.1202, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4345006485084306, | |
| "eval_cer": 0.35904391596588, | |
| "eval_loss": 3.008192300796509, | |
| "eval_runtime": 161.9322, | |
| "eval_samples_per_second": 16.328, | |
| "eval_steps_per_second": 2.044, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.4357976653696498, | |
| "grad_norm": 13.4000883102417, | |
| "learning_rate": 1.98e-05, | |
| "loss": 2.9631, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4383916990920882, | |
| "grad_norm": 12.92082691192627, | |
| "learning_rate": 1.9920000000000002e-05, | |
| "loss": 2.6806, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.4409857328145266, | |
| "grad_norm": 19.443449020385742, | |
| "learning_rate": 2.004e-05, | |
| "loss": 2.6839, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4409857328145266, | |
| "eval_cer": 0.34593242306417094, | |
| "eval_loss": 2.986955404281616, | |
| "eval_runtime": 209.5827, | |
| "eval_samples_per_second": 12.616, | |
| "eval_steps_per_second": 1.579, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.44357976653696496, | |
| "grad_norm": 13.657390594482422, | |
| "learning_rate": 2.016e-05, | |
| "loss": 3.3844, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.4461738002594034, | |
| "grad_norm": 12.142219543457031, | |
| "learning_rate": 2.0280000000000002e-05, | |
| "loss": 2.5051, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.4474708171206226, | |
| "eval_cer": 0.34991108297917234, | |
| "eval_loss": 2.9238357543945312, | |
| "eval_runtime": 172.4959, | |
| "eval_samples_per_second": 15.328, | |
| "eval_steps_per_second": 1.919, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.4487678339818418, | |
| "grad_norm": 11.67302417755127, | |
| "learning_rate": 2.04e-05, | |
| "loss": 2.6669, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.45136186770428016, | |
| "grad_norm": 9.952072143554688, | |
| "learning_rate": 2.0520000000000003e-05, | |
| "loss": 3.1192, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.45395590142671854, | |
| "grad_norm": 13.724016189575195, | |
| "learning_rate": 2.064e-05, | |
| "loss": 3.026, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.45395590142671854, | |
| "eval_cer": 0.35256352292250653, | |
| "eval_loss": 2.923769235610962, | |
| "eval_runtime": 178.4978, | |
| "eval_samples_per_second": 14.813, | |
| "eval_steps_per_second": 1.854, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4565499351491569, | |
| "grad_norm": 12.271801948547363, | |
| "learning_rate": 2.0759999999999998e-05, | |
| "loss": 2.0673, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.4591439688715953, | |
| "grad_norm": 11.006880760192871, | |
| "learning_rate": 2.088e-05, | |
| "loss": 2.1873, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4604409857328145, | |
| "eval_cer": 0.3467763812279591, | |
| "eval_loss": 2.878007173538208, | |
| "eval_runtime": 239.5277, | |
| "eval_samples_per_second": 11.038, | |
| "eval_steps_per_second": 1.382, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.46173800259403375, | |
| "grad_norm": 20.289621353149414, | |
| "learning_rate": 2.1e-05, | |
| "loss": 2.8391, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.46433203631647213, | |
| "grad_norm": 12.577980995178223, | |
| "learning_rate": 2.1119999999999998e-05, | |
| "loss": 2.2868, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.4669260700389105, | |
| "grad_norm": 12.996379852294922, | |
| "learning_rate": 2.124e-05, | |
| "loss": 2.8355, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4669260700389105, | |
| "eval_cer": 0.32480332760647435, | |
| "eval_loss": 2.820495843887329, | |
| "eval_runtime": 183.724, | |
| "eval_samples_per_second": 14.391, | |
| "eval_steps_per_second": 1.802, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4695201037613489, | |
| "grad_norm": 20.477155685424805, | |
| "learning_rate": 2.136e-05, | |
| "loss": 2.8462, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4721141374837873, | |
| "grad_norm": 32.31524658203125, | |
| "learning_rate": 2.148e-05, | |
| "loss": 2.0954, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.47341115434500647, | |
| "eval_cer": 0.3083160020496127, | |
| "eval_loss": 2.8296797275543213, | |
| "eval_runtime": 276.3938, | |
| "eval_samples_per_second": 9.566, | |
| "eval_steps_per_second": 1.198, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.47470817120622566, | |
| "grad_norm": 14.220911026000977, | |
| "learning_rate": 2.16e-05, | |
| "loss": 2.4953, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.4773022049286641, | |
| "grad_norm": 16.390596389770508, | |
| "learning_rate": 2.172e-05, | |
| "loss": 2.624, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4798962386511025, | |
| "grad_norm": 14.07410717010498, | |
| "learning_rate": 2.184e-05, | |
| "loss": 2.9978, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4798962386511025, | |
| "eval_cer": 0.3032522530668837, | |
| "eval_loss": 2.793341875076294, | |
| "eval_runtime": 279.1859, | |
| "eval_samples_per_second": 9.47, | |
| "eval_steps_per_second": 1.186, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.48249027237354086, | |
| "grad_norm": 24.049970626831055, | |
| "learning_rate": 2.196e-05, | |
| "loss": 2.9861, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.48508430609597925, | |
| "grad_norm": 10.874021530151367, | |
| "learning_rate": 2.208e-05, | |
| "loss": 2.1597, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.48638132295719844, | |
| "eval_cer": 0.3145251228260542, | |
| "eval_loss": 2.7285666465759277, | |
| "eval_runtime": 309.9236, | |
| "eval_samples_per_second": 8.531, | |
| "eval_steps_per_second": 1.068, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4876783398184176, | |
| "grad_norm": 12.51282787322998, | |
| "learning_rate": 2.22e-05, | |
| "loss": 2.2976, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.490272373540856, | |
| "grad_norm": 11.6898775100708, | |
| "learning_rate": 2.232e-05, | |
| "loss": 2.703, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.49286640726329445, | |
| "grad_norm": 14.729179382324219, | |
| "learning_rate": 2.2440000000000002e-05, | |
| "loss": 2.0108, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.49286640726329445, | |
| "eval_cer": 0.31328932694336437, | |
| "eval_loss": 2.689061403274536, | |
| "eval_runtime": 188.7169, | |
| "eval_samples_per_second": 14.01, | |
| "eval_steps_per_second": 1.754, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.49546044098573283, | |
| "grad_norm": 11.870194435119629, | |
| "learning_rate": 2.256e-05, | |
| "loss": 2.2467, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.4980544747081712, | |
| "grad_norm": 12.500712394714355, | |
| "learning_rate": 2.268e-05, | |
| "loss": 2.0565, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4993514915693904, | |
| "eval_cer": 0.3065075202700666, | |
| "eval_loss": 2.683504104614258, | |
| "eval_runtime": 139.7652, | |
| "eval_samples_per_second": 18.917, | |
| "eval_steps_per_second": 2.368, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5006485084306096, | |
| "grad_norm": 17.805166244506836, | |
| "learning_rate": 2.2800000000000002e-05, | |
| "loss": 2.0304, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.503242542153048, | |
| "grad_norm": 10.874719619750977, | |
| "learning_rate": 2.292e-05, | |
| "loss": 2.2385, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5058365758754864, | |
| "grad_norm": 19.65207862854004, | |
| "learning_rate": 2.304e-05, | |
| "loss": 2.2806, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5058365758754864, | |
| "eval_cer": 0.310697169726015, | |
| "eval_loss": 2.64746356010437, | |
| "eval_runtime": 158.3003, | |
| "eval_samples_per_second": 16.702, | |
| "eval_steps_per_second": 2.091, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5084306095979247, | |
| "grad_norm": 11.749553680419922, | |
| "learning_rate": 2.3160000000000002e-05, | |
| "loss": 2.0118, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5110246433203631, | |
| "grad_norm": 12.608861923217773, | |
| "learning_rate": 2.328e-05, | |
| "loss": 1.8522, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5123216601815823, | |
| "eval_cer": 0.32664195074901287, | |
| "eval_loss": 2.6235055923461914, | |
| "eval_runtime": 186.0047, | |
| "eval_samples_per_second": 14.215, | |
| "eval_steps_per_second": 1.78, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5136186770428015, | |
| "grad_norm": 15.640968322753906, | |
| "learning_rate": 2.3400000000000003e-05, | |
| "loss": 2.5232, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.51621271076524, | |
| "grad_norm": 12.88823127746582, | |
| "learning_rate": 2.3520000000000002e-05, | |
| "loss": 2.196, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.5188067444876784, | |
| "grad_norm": 12.094499588012695, | |
| "learning_rate": 2.364e-05, | |
| "loss": 2.3092, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5188067444876784, | |
| "eval_cer": 0.3185640654670404, | |
| "eval_loss": 2.6280529499053955, | |
| "eval_runtime": 155.4528, | |
| "eval_samples_per_second": 17.008, | |
| "eval_steps_per_second": 2.129, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5214007782101168, | |
| "grad_norm": 27.93305015563965, | |
| "learning_rate": 2.3760000000000003e-05, | |
| "loss": 2.1069, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5239948119325551, | |
| "grad_norm": 14.44329833984375, | |
| "learning_rate": 2.3880000000000002e-05, | |
| "loss": 2.513, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5252918287937743, | |
| "eval_cer": 0.2939988546282063, | |
| "eval_loss": 2.567127227783203, | |
| "eval_runtime": 158.9316, | |
| "eval_samples_per_second": 16.636, | |
| "eval_steps_per_second": 2.083, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.5265888456549935, | |
| "grad_norm": 12.40060806274414, | |
| "learning_rate": 2.4e-05, | |
| "loss": 2.412, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5291828793774319, | |
| "grad_norm": 12.270583152770996, | |
| "learning_rate": 2.4120000000000003e-05, | |
| "loss": 2.0339, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5317769130998703, | |
| "grad_norm": 17.22001838684082, | |
| "learning_rate": 2.4240000000000002e-05, | |
| "loss": 2.0117, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5317769130998703, | |
| "eval_cer": 0.2961388914006691, | |
| "eval_loss": 2.5344009399414062, | |
| "eval_runtime": 164.3351, | |
| "eval_samples_per_second": 16.089, | |
| "eval_steps_per_second": 2.014, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5343709468223087, | |
| "grad_norm": 10.44601058959961, | |
| "learning_rate": 2.4360000000000004e-05, | |
| "loss": 1.6976, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5369649805447471, | |
| "grad_norm": 16.720975875854492, | |
| "learning_rate": 2.448e-05, | |
| "loss": 3.0921, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5382619974059663, | |
| "eval_cer": 0.2885131265635832, | |
| "eval_loss": 2.51837158203125, | |
| "eval_runtime": 169.9004, | |
| "eval_samples_per_second": 15.562, | |
| "eval_steps_per_second": 1.948, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.5395590142671854, | |
| "grad_norm": 11.013751983642578, | |
| "learning_rate": 2.4599999999999998e-05, | |
| "loss": 1.8176, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5421530479896238, | |
| "grad_norm": 13.207280158996582, | |
| "learning_rate": 2.472e-05, | |
| "loss": 2.0911, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5447470817120622, | |
| "grad_norm": 13.864497184753418, | |
| "learning_rate": 2.484e-05, | |
| "loss": 2.1863, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5447470817120622, | |
| "eval_cer": 0.3223920185670796, | |
| "eval_loss": 2.489891767501831, | |
| "eval_runtime": 198.7131, | |
| "eval_samples_per_second": 13.306, | |
| "eval_steps_per_second": 1.666, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5473411154345007, | |
| "grad_norm": 13.306368827819824, | |
| "learning_rate": 2.4959999999999998e-05, | |
| "loss": 2.1374, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5499351491569391, | |
| "grad_norm": 14.835345268249512, | |
| "learning_rate": 2.508e-05, | |
| "loss": 2.0687, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5512321660181583, | |
| "eval_cer": 0.3371612864333725, | |
| "eval_loss": 2.540318250656128, | |
| "eval_runtime": 177.6188, | |
| "eval_samples_per_second": 14.886, | |
| "eval_steps_per_second": 1.864, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.5525291828793775, | |
| "grad_norm": 15.374982833862305, | |
| "learning_rate": 2.52e-05, | |
| "loss": 2.0655, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5551232166018158, | |
| "grad_norm": 14.928581237792969, | |
| "learning_rate": 2.5319999999999998e-05, | |
| "loss": 2.7183, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.5577172503242542, | |
| "grad_norm": 14.489096641540527, | |
| "learning_rate": 2.544e-05, | |
| "loss": 2.0051, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5577172503242542, | |
| "eval_cer": 0.31229466196461403, | |
| "eval_loss": 2.4285073280334473, | |
| "eval_runtime": 181.8863, | |
| "eval_samples_per_second": 14.537, | |
| "eval_steps_per_second": 1.82, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5603112840466926, | |
| "grad_norm": 11.531155586242676, | |
| "learning_rate": 2.556e-05, | |
| "loss": 2.2619, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.562905317769131, | |
| "grad_norm": 17.837749481201172, | |
| "learning_rate": 2.568e-05, | |
| "loss": 2.5056, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5642023346303502, | |
| "eval_cer": 0.3202519817946168, | |
| "eval_loss": 2.4498050212860107, | |
| "eval_runtime": 230.2751, | |
| "eval_samples_per_second": 11.482, | |
| "eval_steps_per_second": 1.437, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.5654993514915694, | |
| "grad_norm": 13.783636093139648, | |
| "learning_rate": 2.58e-05, | |
| "loss": 2.0943, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5680933852140078, | |
| "grad_norm": 17.753210067749023, | |
| "learning_rate": 2.592e-05, | |
| "loss": 2.12, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5706874189364461, | |
| "grad_norm": 15.3496732711792, | |
| "learning_rate": 2.604e-05, | |
| "loss": 2.2611, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5706874189364461, | |
| "eval_cer": 0.33478011875697017, | |
| "eval_loss": 2.4362807273864746, | |
| "eval_runtime": 233.7715, | |
| "eval_samples_per_second": 11.31, | |
| "eval_steps_per_second": 1.416, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5732814526588845, | |
| "grad_norm": 12.754862785339355, | |
| "learning_rate": 2.616e-05, | |
| "loss": 1.7839, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5758754863813229, | |
| "grad_norm": 15.386824607849121, | |
| "learning_rate": 2.628e-05, | |
| "loss": 2.4994, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5771725032425421, | |
| "eval_cer": 0.310908159266962, | |
| "eval_loss": 2.458259105682373, | |
| "eval_runtime": 167.458, | |
| "eval_samples_per_second": 15.789, | |
| "eval_steps_per_second": 1.977, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.5784695201037614, | |
| "grad_norm": 14.832752227783203, | |
| "learning_rate": 2.64e-05, | |
| "loss": 2.0773, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5810635538261998, | |
| "grad_norm": 15.057633399963379, | |
| "learning_rate": 2.652e-05, | |
| "loss": 2.0135, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5836575875486382, | |
| "grad_norm": 17.804443359375, | |
| "learning_rate": 2.6640000000000002e-05, | |
| "loss": 2.4173, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5836575875486382, | |
| "eval_cer": 0.3094312324803328, | |
| "eval_loss": 2.376800060272217, | |
| "eval_runtime": 172.2385, | |
| "eval_samples_per_second": 15.351, | |
| "eval_steps_per_second": 1.922, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5862516212710766, | |
| "grad_norm": 22.79265022277832, | |
| "learning_rate": 2.676e-05, | |
| "loss": 1.9889, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5888456549935149, | |
| "grad_norm": 11.24325942993164, | |
| "learning_rate": 2.688e-05, | |
| "loss": 2.9177, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5901426718547341, | |
| "eval_cer": 0.31289748922446275, | |
| "eval_loss": 2.4268851280212402, | |
| "eval_runtime": 150.7156, | |
| "eval_samples_per_second": 17.543, | |
| "eval_steps_per_second": 2.196, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.5914396887159533, | |
| "grad_norm": 14.807707786560059, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 1.6092, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5940337224383917, | |
| "grad_norm": 16.166181564331055, | |
| "learning_rate": 2.712e-05, | |
| "loss": 2.2921, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5966277561608301, | |
| "grad_norm": 18.733001708984375, | |
| "learning_rate": 2.724e-05, | |
| "loss": 2.4549, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5966277561608301, | |
| "eval_cer": 0.3229647044639359, | |
| "eval_loss": 2.331587076187134, | |
| "eval_runtime": 176.3172, | |
| "eval_samples_per_second": 14.996, | |
| "eval_steps_per_second": 1.877, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5992217898832685, | |
| "grad_norm": 13.820377349853516, | |
| "learning_rate": 2.7360000000000002e-05, | |
| "loss": 2.3386, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6018158236057068, | |
| "grad_norm": 11.139546394348145, | |
| "learning_rate": 2.748e-05, | |
| "loss": 2.3171, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.603112840466926, | |
| "eval_cer": 0.3494589625342858, | |
| "eval_loss": 2.3250718116760254, | |
| "eval_runtime": 227.8398, | |
| "eval_samples_per_second": 11.605, | |
| "eval_steps_per_second": 1.453, | |
| "step": 465 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 77100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 5, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 10, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 10 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.783620910505001e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |