diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4857 @@ +{ + "best_metric": 0.27972195589645255, + "best_model_checkpoint": "./save/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn/checkpoint-28965", + "epoch": 20.0, + "global_step": 38620, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 4.993785603314345e-05, + "loss": 5.0672, + "step": 50 + }, + { + "epoch": 0.05, + "learning_rate": 4.9873122734334545e-05, + "loss": 2.5588, + "step": 100 + }, + { + "epoch": 0.08, + "learning_rate": 4.980838943552564e-05, + "loss": 2.2204, + "step": 150 + }, + { + "epoch": 0.1, + "learning_rate": 4.974495080269291e-05, + "loss": 1.9017, + "step": 200 + }, + { + "epoch": 0.13, + "learning_rate": 4.9680217503884e-05, + "loss": 1.7674, + "step": 250 + }, + { + "epoch": 0.16, + "learning_rate": 4.961548420507509e-05, + "loss": 1.7578, + "step": 300 + }, + { + "epoch": 0.18, + "learning_rate": 4.955075090626619e-05, + "loss": 1.7046, + "step": 350 + }, + { + "epoch": 0.21, + "learning_rate": 4.948601760745728e-05, + "loss": 1.6142, + "step": 400 + }, + { + "epoch": 0.23, + "learning_rate": 4.942128430864837e-05, + "loss": 1.6178, + "step": 450 + }, + { + "epoch": 0.26, + "learning_rate": 4.935655100983946e-05, + "loss": 1.5983, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.929181771103056e-05, + "loss": 1.3485, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.9227084412221646e-05, + "loss": 1.376, + "step": 600 + }, + { + "epoch": 0.34, + "learning_rate": 4.916235111341274e-05, + "loss": 1.4661, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.909761781460384e-05, + "loss": 1.3063, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.9032884515794925e-05, + "loss": 1.3421, + "step": 750 + }, + { + "epoch": 0.41, + "learning_rate": 4.896815121698602e-05, + "loss": 1.2428, + "step": 800 + }, + { + "epoch": 0.44, + "learning_rate": 4.890341791817711e-05, + "loss": 1.2512, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.883868461936821e-05, + "loss": 1.286, + "step": 900 + }, + { + "epoch": 0.49, + "learning_rate": 4.87739513205593e-05, + "loss": 1.2782, + "step": 950 + }, + { + "epoch": 0.52, + "learning_rate": 4.870921802175039e-05, + "loss": 1.0988, + "step": 1000 + }, + { + "epoch": 0.54, + "learning_rate": 4.864448472294148e-05, + "loss": 1.1542, + "step": 1050 + }, + { + "epoch": 0.57, + "learning_rate": 4.8579751424132576e-05, + "loss": 1.0869, + "step": 1100 + }, + { + "epoch": 0.6, + "learning_rate": 4.851501812532367e-05, + "loss": 1.1135, + "step": 1150 + }, + { + "epoch": 0.62, + "learning_rate": 4.845028482651476e-05, + "loss": 1.1439, + "step": 1200 + }, + { + "epoch": 0.65, + "learning_rate": 4.8385551527705855e-05, + "loss": 1.1334, + "step": 1250 + }, + { + "epoch": 0.67, + "learning_rate": 4.832081822889695e-05, + "loss": 1.1235, + "step": 1300 + }, + { + "epoch": 0.7, + "learning_rate": 4.825608493008804e-05, + "loss": 0.9544, + "step": 1350 + }, + { + "epoch": 0.73, + "learning_rate": 4.819135163127913e-05, + "loss": 1.1476, + "step": 1400 + }, + { + "epoch": 0.75, + "learning_rate": 4.812661833247023e-05, + "loss": 1.052, + "step": 1450 + }, + { + "epoch": 0.78, + "learning_rate": 4.806188503366132e-05, + "loss": 1.1177, + "step": 1500 + }, + { + "epoch": 0.8, + "learning_rate": 4.7997151734852406e-05, + "loss": 1.0574, + "step": 1550 + }, + { + "epoch": 0.83, + "learning_rate": 4.79324184360435e-05, + "loss": 0.936, + "step": 1600 + }, + { + "epoch": 0.85, + "learning_rate": 4.78676851372346e-05, + "loss": 1.075, + "step": 1650 + }, + { + "epoch": 0.88, + "learning_rate": 4.7802951838425685e-05, + "loss": 0.9814, + "step": 1700 + }, + { + "epoch": 0.91, + "learning_rate": 4.773821853961678e-05, + "loss": 1.0105, + "step": 1750 + }, + { + "epoch": 0.93, + "learning_rate": 4.767348524080788e-05, + "loss": 0.9463, + "step": 1800 + }, + { + "epoch": 0.96, + "learning_rate": 4.760875194199897e-05, + "loss": 0.9525, + "step": 1850 + }, + { + "epoch": 0.98, + "learning_rate": 4.754401864319006e-05, + "loss": 1.0543, + "step": 1900 + }, + { + "epoch": 1.0, + "eval_cer": 0.32264885386131004, + "eval_loss": 1.3474498987197876, + "eval_mer": 0.36984659635666345, + "eval_runtime": 96.0449, + "eval_samples_per_second": 11.734, + "eval_steps_per_second": 1.468, + "step": 1931 + }, + { + "epoch": 1.01, + "learning_rate": 4.747928534438115e-05, + "loss": 0.8595, + "step": 1950 + }, + { + "epoch": 1.04, + "learning_rate": 4.741455204557225e-05, + "loss": 0.7768, + "step": 2000 + }, + { + "epoch": 1.06, + "learning_rate": 4.7349818746763336e-05, + "loss": 0.8032, + "step": 2050 + }, + { + "epoch": 1.09, + "learning_rate": 4.728508544795443e-05, + "loss": 0.7222, + "step": 2100 + }, + { + "epoch": 1.11, + "learning_rate": 4.722035214914552e-05, + "loss": 0.738, + "step": 2150 + }, + { + "epoch": 1.14, + "learning_rate": 4.7155618850336615e-05, + "loss": 0.8004, + "step": 2200 + }, + { + "epoch": 1.17, + "learning_rate": 4.709088555152771e-05, + "loss": 0.8262, + "step": 2250 + }, + { + "epoch": 1.19, + "learning_rate": 4.70261522527188e-05, + "loss": 0.8493, + "step": 2300 + }, + { + "epoch": 1.22, + "learning_rate": 4.6961418953909894e-05, + "loss": 0.8406, + "step": 2350 + }, + { + "epoch": 1.24, + "learning_rate": 4.689668565510099e-05, + "loss": 0.7815, + "step": 2400 + }, + { + "epoch": 1.27, + "learning_rate": 4.683195235629208e-05, + "loss": 0.7685, + "step": 2450 + }, + { + "epoch": 1.29, + "learning_rate": 4.6767219057483166e-05, + "loss": 0.7967, + "step": 2500 + }, + { + "epoch": 1.32, + "learning_rate": 4.670507509062662e-05, + "loss": 0.7799, + "step": 2550 + }, + { + "epoch": 1.35, + "learning_rate": 4.6640341791817716e-05, + "loss": 0.7236, + "step": 2600 + }, + { + "epoch": 1.37, + "learning_rate": 4.65756084930088e-05, + "loss": 0.7469, + "step": 2650 + }, + { + "epoch": 1.4, + "learning_rate": 4.65108751941999e-05, + "loss": 0.7104, + "step": 2700 + }, + { + "epoch": 1.42, + "learning_rate": 4.6446141895390995e-05, + "loss": 0.8183, + "step": 2750 + }, + { + "epoch": 1.45, + "learning_rate": 4.638140859658208e-05, + "loss": 0.724, + "step": 2800 + }, + { + "epoch": 1.48, + "learning_rate": 4.6316675297773174e-05, + "loss": 0.6662, + "step": 2850 + }, + { + "epoch": 1.5, + "learning_rate": 4.6251941998964274e-05, + "loss": 0.6851, + "step": 2900 + }, + { + "epoch": 1.53, + "learning_rate": 4.618720870015536e-05, + "loss": 0.6922, + "step": 2950 + }, + { + "epoch": 1.55, + "learning_rate": 4.612247540134645e-05, + "loss": 0.6734, + "step": 3000 + }, + { + "epoch": 1.58, + "learning_rate": 4.6059036768513724e-05, + "loss": 0.6749, + "step": 3050 + }, + { + "epoch": 1.61, + "learning_rate": 4.599430346970482e-05, + "loss": 0.6354, + "step": 3100 + }, + { + "epoch": 1.63, + "learning_rate": 4.592957017089592e-05, + "loss": 0.6655, + "step": 3150 + }, + { + "epoch": 1.66, + "learning_rate": 4.5864836872087e-05, + "loss": 0.7088, + "step": 3200 + }, + { + "epoch": 1.68, + "learning_rate": 4.5800103573278096e-05, + "loss": 0.7648, + "step": 3250 + }, + { + "epoch": 1.71, + "learning_rate": 4.573537027446919e-05, + "loss": 0.7203, + "step": 3300 + }, + { + "epoch": 1.73, + "learning_rate": 4.567063697566028e-05, + "loss": 0.7324, + "step": 3350 + }, + { + "epoch": 1.76, + "learning_rate": 4.5605903676851375e-05, + "loss": 0.6245, + "step": 3400 + }, + { + "epoch": 1.79, + "learning_rate": 4.554117037804247e-05, + "loss": 0.7085, + "step": 3450 + }, + { + "epoch": 1.81, + "learning_rate": 4.547643707923356e-05, + "loss": 0.7435, + "step": 3500 + }, + { + "epoch": 1.84, + "learning_rate": 4.5411703780424654e-05, + "loss": 0.7059, + "step": 3550 + }, + { + "epoch": 1.86, + "learning_rate": 4.534697048161575e-05, + "loss": 0.7296, + "step": 3600 + }, + { + "epoch": 1.89, + "learning_rate": 4.528223718280683e-05, + "loss": 0.7191, + "step": 3650 + }, + { + "epoch": 1.92, + "learning_rate": 4.521750388399793e-05, + "loss": 0.7144, + "step": 3700 + }, + { + "epoch": 1.94, + "learning_rate": 4.5152770585189026e-05, + "loss": 0.6424, + "step": 3750 + }, + { + "epoch": 1.97, + "learning_rate": 4.508803728638011e-05, + "loss": 0.6824, + "step": 3800 + }, + { + "epoch": 1.99, + "learning_rate": 4.5023303987571205e-05, + "loss": 0.8031, + "step": 3850 + }, + { + "epoch": 2.0, + "eval_cer": 0.2872795425056466, + "eval_loss": 1.2660095691680908, + "eval_mer": 0.34675615212527966, + "eval_runtime": 100.6716, + "eval_samples_per_second": 11.195, + "eval_steps_per_second": 1.401, + "step": 3862 + }, + { + "epoch": 2.02, + "learning_rate": 4.4958570688762305e-05, + "loss": 0.6218, + "step": 3900 + }, + { + "epoch": 2.05, + "learning_rate": 4.489383738995339e-05, + "loss": 0.4636, + "step": 3950 + }, + { + "epoch": 2.07, + "learning_rate": 4.4829104091144484e-05, + "loss": 0.5037, + "step": 4000 + }, + { + "epoch": 2.1, + "learning_rate": 4.4764370792335584e-05, + "loss": 0.4665, + "step": 4050 + }, + { + "epoch": 2.12, + "learning_rate": 4.469963749352668e-05, + "loss": 0.4765, + "step": 4100 + }, + { + "epoch": 2.15, + "learning_rate": 4.463490419471776e-05, + "loss": 0.5868, + "step": 4150 + }, + { + "epoch": 2.18, + "learning_rate": 4.4570170895908856e-05, + "loss": 0.5356, + "step": 4200 + }, + { + "epoch": 2.2, + "learning_rate": 4.4505437597099956e-05, + "loss": 0.5202, + "step": 4250 + }, + { + "epoch": 2.23, + "learning_rate": 4.444070429829104e-05, + "loss": 0.5393, + "step": 4300 + }, + { + "epoch": 2.25, + "learning_rate": 4.4375970999482135e-05, + "loss": 0.5147, + "step": 4350 + }, + { + "epoch": 2.28, + "learning_rate": 4.431123770067323e-05, + "loss": 0.4785, + "step": 4400 + }, + { + "epoch": 2.3, + "learning_rate": 4.424650440186432e-05, + "loss": 0.5986, + "step": 4450 + }, + { + "epoch": 2.33, + "learning_rate": 4.4181771103055414e-05, + "loss": 0.5603, + "step": 4500 + }, + { + "epoch": 2.36, + "learning_rate": 4.411703780424651e-05, + "loss": 0.5424, + "step": 4550 + }, + { + "epoch": 2.38, + "learning_rate": 4.40523045054376e-05, + "loss": 0.4871, + "step": 4600 + }, + { + "epoch": 2.41, + "learning_rate": 4.398757120662869e-05, + "loss": 0.478, + "step": 4650 + }, + { + "epoch": 2.43, + "learning_rate": 4.3922837907819786e-05, + "loss": 0.5392, + "step": 4700 + }, + { + "epoch": 2.46, + "learning_rate": 4.385810460901087e-05, + "loss": 0.5235, + "step": 4750 + }, + { + "epoch": 2.49, + "learning_rate": 4.379337131020197e-05, + "loss": 0.5507, + "step": 4800 + }, + { + "epoch": 2.51, + "learning_rate": 4.3728638011393065e-05, + "loss": 0.5279, + "step": 4850 + }, + { + "epoch": 2.54, + "learning_rate": 4.366390471258415e-05, + "loss": 0.5535, + "step": 4900 + }, + { + "epoch": 2.56, + "learning_rate": 4.3599171413775244e-05, + "loss": 0.5516, + "step": 4950 + }, + { + "epoch": 2.59, + "learning_rate": 4.3534438114966344e-05, + "loss": 0.4935, + "step": 5000 + }, + { + "epoch": 2.62, + "learning_rate": 4.346970481615744e-05, + "loss": 0.5609, + "step": 5050 + }, + { + "epoch": 2.64, + "learning_rate": 4.340497151734852e-05, + "loss": 0.5695, + "step": 5100 + }, + { + "epoch": 2.67, + "learning_rate": 4.334023821853962e-05, + "loss": 0.5646, + "step": 5150 + }, + { + "epoch": 2.69, + "learning_rate": 4.3275504919730716e-05, + "loss": 0.633, + "step": 5200 + }, + { + "epoch": 2.72, + "learning_rate": 4.32107716209218e-05, + "loss": 0.5412, + "step": 5250 + }, + { + "epoch": 2.74, + "learning_rate": 4.3146038322112895e-05, + "loss": 0.596, + "step": 5300 + }, + { + "epoch": 2.77, + "learning_rate": 4.3081305023303995e-05, + "loss": 0.5464, + "step": 5350 + }, + { + "epoch": 2.8, + "learning_rate": 4.301657172449508e-05, + "loss": 0.5067, + "step": 5400 + }, + { + "epoch": 2.82, + "learning_rate": 4.2951838425686174e-05, + "loss": 0.5868, + "step": 5450 + }, + { + "epoch": 2.85, + "learning_rate": 4.288710512687727e-05, + "loss": 0.6006, + "step": 5500 + }, + { + "epoch": 2.87, + "learning_rate": 4.282237182806836e-05, + "loss": 0.5681, + "step": 5550 + }, + { + "epoch": 2.9, + "learning_rate": 4.275763852925945e-05, + "loss": 0.464, + "step": 5600 + }, + { + "epoch": 2.93, + "learning_rate": 4.2692905230450546e-05, + "loss": 0.4489, + "step": 5650 + }, + { + "epoch": 2.95, + "learning_rate": 4.262817193164164e-05, + "loss": 0.5192, + "step": 5700 + }, + { + "epoch": 2.98, + "learning_rate": 4.256343863283273e-05, + "loss": 0.5972, + "step": 5750 + }, + { + "epoch": 3.0, + "eval_cer": 0.26983516747561154, + "eval_loss": 1.2912081480026245, + "eval_mer": 0.32246724193032916, + "eval_runtime": 95.7162, + "eval_samples_per_second": 11.774, + "eval_steps_per_second": 1.473, + "step": 5793 + }, + { + "epoch": 3.0, + "learning_rate": 4.2498705334023825e-05, + "loss": 0.5107, + "step": 5800 + }, + { + "epoch": 3.03, + "learning_rate": 4.243397203521491e-05, + "loss": 0.4185, + "step": 5850 + }, + { + "epoch": 3.06, + "learning_rate": 4.236923873640601e-05, + "loss": 0.4359, + "step": 5900 + }, + { + "epoch": 3.08, + "learning_rate": 4.2304505437597104e-05, + "loss": 0.3892, + "step": 5950 + }, + { + "epoch": 3.11, + "learning_rate": 4.22397721387882e-05, + "loss": 0.3933, + "step": 6000 + }, + { + "epoch": 3.13, + "learning_rate": 4.217503883997928e-05, + "loss": 0.3828, + "step": 6050 + }, + { + "epoch": 3.16, + "learning_rate": 4.211030554117038e-05, + "loss": 0.4035, + "step": 6100 + }, + { + "epoch": 3.18, + "learning_rate": 4.2045572242361476e-05, + "loss": 0.4641, + "step": 6150 + }, + { + "epoch": 3.21, + "learning_rate": 4.198083894355256e-05, + "loss": 0.363, + "step": 6200 + }, + { + "epoch": 3.24, + "learning_rate": 4.191610564474366e-05, + "loss": 0.4096, + "step": 6250 + }, + { + "epoch": 3.26, + "learning_rate": 4.1851372345934755e-05, + "loss": 0.445, + "step": 6300 + }, + { + "epoch": 3.29, + "learning_rate": 4.178663904712584e-05, + "loss": 0.4671, + "step": 6350 + }, + { + "epoch": 3.31, + "learning_rate": 4.1721905748316934e-05, + "loss": 0.366, + "step": 6400 + }, + { + "epoch": 3.34, + "learning_rate": 4.1657172449508034e-05, + "loss": 0.4559, + "step": 6450 + }, + { + "epoch": 3.37, + "learning_rate": 4.159243915069912e-05, + "loss": 0.3574, + "step": 6500 + }, + { + "epoch": 3.39, + "learning_rate": 4.152770585189021e-05, + "loss": 0.4152, + "step": 6550 + }, + { + "epoch": 3.42, + "learning_rate": 4.1462972553081306e-05, + "loss": 0.4111, + "step": 6600 + }, + { + "epoch": 3.44, + "learning_rate": 4.13982392542724e-05, + "loss": 0.4044, + "step": 6650 + }, + { + "epoch": 3.47, + "learning_rate": 4.133350595546349e-05, + "loss": 0.4742, + "step": 6700 + }, + { + "epoch": 3.5, + "learning_rate": 4.1268772656654585e-05, + "loss": 0.4402, + "step": 6750 + }, + { + "epoch": 3.52, + "learning_rate": 4.120403935784568e-05, + "loss": 0.4025, + "step": 6800 + }, + { + "epoch": 3.55, + "learning_rate": 4.113930605903677e-05, + "loss": 0.416, + "step": 6850 + }, + { + "epoch": 3.57, + "learning_rate": 4.1074572760227864e-05, + "loss": 0.4158, + "step": 6900 + }, + { + "epoch": 3.6, + "learning_rate": 4.100983946141896e-05, + "loss": 0.4212, + "step": 6950 + }, + { + "epoch": 3.63, + "learning_rate": 4.094510616261005e-05, + "loss": 0.3651, + "step": 7000 + }, + { + "epoch": 3.65, + "learning_rate": 4.088037286380114e-05, + "loss": 0.3613, + "step": 7050 + }, + { + "epoch": 3.68, + "learning_rate": 4.0815639564992236e-05, + "loss": 0.4658, + "step": 7100 + }, + { + "epoch": 3.7, + "learning_rate": 4.075090626618332e-05, + "loss": 0.4176, + "step": 7150 + }, + { + "epoch": 3.73, + "learning_rate": 4.068617296737442e-05, + "loss": 0.4016, + "step": 7200 + }, + { + "epoch": 3.75, + "learning_rate": 4.0621439668565515e-05, + "loss": 0.3975, + "step": 7250 + }, + { + "epoch": 3.78, + "learning_rate": 4.05567063697566e-05, + "loss": 0.4978, + "step": 7300 + }, + { + "epoch": 3.81, + "learning_rate": 4.04919730709477e-05, + "loss": 0.4552, + "step": 7350 + }, + { + "epoch": 3.83, + "learning_rate": 4.0427239772138794e-05, + "loss": 0.4543, + "step": 7400 + }, + { + "epoch": 3.86, + "learning_rate": 4.036250647332988e-05, + "loss": 0.422, + "step": 7450 + }, + { + "epoch": 3.88, + "learning_rate": 4.029777317452097e-05, + "loss": 0.3743, + "step": 7500 + }, + { + "epoch": 3.91, + "learning_rate": 4.023303987571207e-05, + "loss": 0.3999, + "step": 7550 + }, + { + "epoch": 3.94, + "learning_rate": 4.016830657690316e-05, + "loss": 0.3713, + "step": 7600 + }, + { + "epoch": 3.96, + "learning_rate": 4.010357327809425e-05, + "loss": 0.3827, + "step": 7650 + }, + { + "epoch": 3.99, + "learning_rate": 4.0038839979285345e-05, + "loss": 0.3639, + "step": 7700 + }, + { + "epoch": 4.0, + "eval_cer": 0.2572444615310683, + "eval_loss": 1.2356128692626953, + "eval_mer": 0.3155161393416427, + "eval_runtime": 96.3574, + "eval_samples_per_second": 11.696, + "eval_steps_per_second": 1.463, + "step": 7724 + }, + { + "epoch": 4.01, + "learning_rate": 3.9975401346452616e-05, + "loss": 0.3732, + "step": 7750 + }, + { + "epoch": 4.04, + "learning_rate": 3.991066804764371e-05, + "loss": 0.385, + "step": 7800 + }, + { + "epoch": 4.07, + "learning_rate": 3.98459347488348e-05, + "loss": 0.2639, + "step": 7850 + }, + { + "epoch": 4.09, + "learning_rate": 3.9781201450025895e-05, + "loss": 0.3626, + "step": 7900 + }, + { + "epoch": 4.12, + "learning_rate": 3.971646815121699e-05, + "loss": 0.3103, + "step": 7950 + }, + { + "epoch": 4.14, + "learning_rate": 3.965173485240808e-05, + "loss": 0.3483, + "step": 8000 + }, + { + "epoch": 4.17, + "learning_rate": 3.9587001553599174e-05, + "loss": 0.2899, + "step": 8050 + }, + { + "epoch": 4.19, + "learning_rate": 3.952226825479027e-05, + "loss": 0.314, + "step": 8100 + }, + { + "epoch": 4.22, + "learning_rate": 3.945753495598136e-05, + "loss": 0.335, + "step": 8150 + }, + { + "epoch": 4.25, + "learning_rate": 3.939280165717245e-05, + "loss": 0.3566, + "step": 8200 + }, + { + "epoch": 4.27, + "learning_rate": 3.9328068358363546e-05, + "loss": 0.3467, + "step": 8250 + }, + { + "epoch": 4.3, + "learning_rate": 3.926333505955463e-05, + "loss": 0.2699, + "step": 8300 + }, + { + "epoch": 4.32, + "learning_rate": 3.919860176074573e-05, + "loss": 0.2872, + "step": 8350 + }, + { + "epoch": 4.35, + "learning_rate": 3.9133868461936825e-05, + "loss": 0.3076, + "step": 8400 + }, + { + "epoch": 4.38, + "learning_rate": 3.906913516312791e-05, + "loss": 0.3202, + "step": 8450 + }, + { + "epoch": 4.4, + "learning_rate": 3.9004401864319004e-05, + "loss": 0.3056, + "step": 8500 + }, + { + "epoch": 4.43, + "learning_rate": 3.8939668565510104e-05, + "loss": 0.3712, + "step": 8550 + }, + { + "epoch": 4.45, + "learning_rate": 3.88749352667012e-05, + "loss": 0.332, + "step": 8600 + }, + { + "epoch": 4.48, + "learning_rate": 3.881020196789228e-05, + "loss": 0.3718, + "step": 8650 + }, + { + "epoch": 4.51, + "learning_rate": 3.8745468669083376e-05, + "loss": 0.3768, + "step": 8700 + }, + { + "epoch": 4.53, + "learning_rate": 3.8680735370274476e-05, + "loss": 0.3527, + "step": 8750 + }, + { + "epoch": 4.56, + "learning_rate": 3.861600207146556e-05, + "loss": 0.2875, + "step": 8800 + }, + { + "epoch": 4.58, + "learning_rate": 3.8551268772656655e-05, + "loss": 0.3617, + "step": 8850 + }, + { + "epoch": 4.61, + "learning_rate": 3.848653547384775e-05, + "loss": 0.3203, + "step": 8900 + }, + { + "epoch": 4.63, + "learning_rate": 3.842180217503884e-05, + "loss": 0.3488, + "step": 8950 + }, + { + "epoch": 4.66, + "learning_rate": 3.8357068876229934e-05, + "loss": 0.3543, + "step": 9000 + }, + { + "epoch": 4.69, + "learning_rate": 3.829233557742103e-05, + "loss": 0.3555, + "step": 9050 + }, + { + "epoch": 4.71, + "learning_rate": 3.822760227861212e-05, + "loss": 0.3036, + "step": 9100 + }, + { + "epoch": 4.74, + "learning_rate": 3.816286897980321e-05, + "loss": 0.3667, + "step": 9150 + }, + { + "epoch": 4.76, + "learning_rate": 3.8098135680994306e-05, + "loss": 0.3763, + "step": 9200 + }, + { + "epoch": 4.79, + "learning_rate": 3.80334023821854e-05, + "loss": 0.32, + "step": 9250 + }, + { + "epoch": 4.82, + "learning_rate": 3.796866908337649e-05, + "loss": 0.327, + "step": 9300 + }, + { + "epoch": 4.84, + "learning_rate": 3.7903935784567585e-05, + "loss": 0.3192, + "step": 9350 + }, + { + "epoch": 4.87, + "learning_rate": 3.783920248575867e-05, + "loss": 0.31, + "step": 9400 + }, + { + "epoch": 4.89, + "learning_rate": 3.777446918694977e-05, + "loss": 0.2768, + "step": 9450 + }, + { + "epoch": 4.92, + "learning_rate": 3.7709735888140864e-05, + "loss": 0.2873, + "step": 9500 + }, + { + "epoch": 4.95, + "learning_rate": 3.764500258933196e-05, + "loss": 0.3175, + "step": 9550 + }, + { + "epoch": 4.97, + "learning_rate": 3.758026929052304e-05, + "loss": 0.3742, + "step": 9600 + }, + { + "epoch": 5.0, + "learning_rate": 3.751553599171414e-05, + "loss": 0.32, + "step": 9650 + }, + { + "epoch": 5.0, + "eval_cer": 0.24859435821038975, + "eval_loss": 1.2657582759857178, + "eval_mer": 0.30073505912432086, + "eval_runtime": 95.7524, + "eval_samples_per_second": 11.77, + "eval_steps_per_second": 1.473, + "step": 9655 + }, + { + "epoch": 5.02, + "learning_rate": 3.7450802692905236e-05, + "loss": 0.2563, + "step": 9700 + }, + { + "epoch": 5.05, + "learning_rate": 3.738606939409632e-05, + "loss": 0.277, + "step": 9750 + }, + { + "epoch": 5.08, + "learning_rate": 3.7321336095287415e-05, + "loss": 0.2643, + "step": 9800 + }, + { + "epoch": 5.1, + "learning_rate": 3.7256602796478515e-05, + "loss": 0.2865, + "step": 9850 + }, + { + "epoch": 5.13, + "learning_rate": 3.71918694976696e-05, + "loss": 0.2754, + "step": 9900 + }, + { + "epoch": 5.15, + "learning_rate": 3.7127136198860694e-05, + "loss": 0.2354, + "step": 9950 + }, + { + "epoch": 5.18, + "learning_rate": 3.706240290005179e-05, + "loss": 0.2297, + "step": 10000 + }, + { + "epoch": 5.2, + "learning_rate": 3.699766960124288e-05, + "loss": 0.3088, + "step": 10050 + }, + { + "epoch": 5.23, + "learning_rate": 3.693293630243397e-05, + "loss": 0.2676, + "step": 10100 + }, + { + "epoch": 5.26, + "learning_rate": 3.6868203003625066e-05, + "loss": 0.3025, + "step": 10150 + }, + { + "epoch": 5.28, + "learning_rate": 3.680346970481616e-05, + "loss": 0.2237, + "step": 10200 + }, + { + "epoch": 5.31, + "learning_rate": 3.673873640600725e-05, + "loss": 0.2849, + "step": 10250 + }, + { + "epoch": 5.33, + "learning_rate": 3.6674003107198345e-05, + "loss": 0.2638, + "step": 10300 + }, + { + "epoch": 5.36, + "learning_rate": 3.660926980838944e-05, + "loss": 0.2742, + "step": 10350 + }, + { + "epoch": 5.39, + "learning_rate": 3.654453650958053e-05, + "loss": 0.2432, + "step": 10400 + }, + { + "epoch": 5.41, + "learning_rate": 3.6479803210771624e-05, + "loss": 0.2743, + "step": 10450 + }, + { + "epoch": 5.44, + "learning_rate": 3.641506991196272e-05, + "loss": 0.3096, + "step": 10500 + }, + { + "epoch": 5.46, + "learning_rate": 3.635033661315381e-05, + "loss": 0.3206, + "step": 10550 + }, + { + "epoch": 5.49, + "learning_rate": 3.62856033143449e-05, + "loss": 0.2632, + "step": 10600 + }, + { + "epoch": 5.52, + "learning_rate": 3.6220870015535996e-05, + "loss": 0.2221, + "step": 10650 + }, + { + "epoch": 5.54, + "learning_rate": 3.615613671672708e-05, + "loss": 0.2429, + "step": 10700 + }, + { + "epoch": 5.57, + "learning_rate": 3.609140341791818e-05, + "loss": 0.222, + "step": 10750 + }, + { + "epoch": 5.59, + "learning_rate": 3.6026670119109275e-05, + "loss": 0.2219, + "step": 10800 + }, + { + "epoch": 5.62, + "learning_rate": 3.596193682030036e-05, + "loss": 0.2908, + "step": 10850 + }, + { + "epoch": 5.64, + "learning_rate": 3.5897203521491454e-05, + "loss": 0.3083, + "step": 10900 + }, + { + "epoch": 5.67, + "learning_rate": 3.5832470222682554e-05, + "loss": 0.2202, + "step": 10950 + }, + { + "epoch": 5.7, + "learning_rate": 3.576773692387364e-05, + "loss": 0.3106, + "step": 11000 + }, + { + "epoch": 5.72, + "learning_rate": 3.570300362506473e-05, + "loss": 0.2613, + "step": 11050 + }, + { + "epoch": 5.75, + "learning_rate": 3.563827032625583e-05, + "loss": 0.2948, + "step": 11100 + }, + { + "epoch": 5.77, + "learning_rate": 3.557353702744692e-05, + "loss": 0.273, + "step": 11150 + }, + { + "epoch": 5.8, + "learning_rate": 3.550880372863801e-05, + "loss": 0.2231, + "step": 11200 + }, + { + "epoch": 5.83, + "learning_rate": 3.5444070429829105e-05, + "loss": 0.3036, + "step": 11250 + }, + { + "epoch": 5.85, + "learning_rate": 3.53793371310202e-05, + "loss": 0.3122, + "step": 11300 + }, + { + "epoch": 5.88, + "learning_rate": 3.531460383221129e-05, + "loss": 0.3125, + "step": 11350 + }, + { + "epoch": 5.9, + "learning_rate": 3.5249870533402384e-05, + "loss": 0.3096, + "step": 11400 + }, + { + "epoch": 5.93, + "learning_rate": 3.518513723459348e-05, + "loss": 0.2774, + "step": 11450 + }, + { + "epoch": 5.96, + "learning_rate": 3.512040393578457e-05, + "loss": 0.2293, + "step": 11500 + }, + { + "epoch": 5.98, + "learning_rate": 3.505567063697566e-05, + "loss": 0.223, + "step": 11550 + }, + { + "epoch": 6.0, + "eval_cer": 0.25311163438896633, + "eval_loss": 1.2627549171447754, + "eval_mer": 0.3053691275167785, + "eval_runtime": 95.6529, + "eval_samples_per_second": 11.782, + "eval_steps_per_second": 1.474, + "step": 11586 + }, + { + "epoch": 6.01, + "learning_rate": 3.4990937338166756e-05, + "loss": 0.2642, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 3.492620403935785e-05, + "loss": 0.1908, + "step": 11650 + }, + { + "epoch": 6.06, + "learning_rate": 3.486147074054894e-05, + "loss": 0.2397, + "step": 11700 + }, + { + "epoch": 6.08, + "learning_rate": 3.479803210771621e-05, + "loss": 0.1996, + "step": 11750 + }, + { + "epoch": 6.11, + "learning_rate": 3.4733298808907306e-05, + "loss": 0.2727, + "step": 11800 + }, + { + "epoch": 6.14, + "learning_rate": 3.466856551009839e-05, + "loss": 0.1985, + "step": 11850 + }, + { + "epoch": 6.16, + "learning_rate": 3.460383221128949e-05, + "loss": 0.2072, + "step": 11900 + }, + { + "epoch": 6.19, + "learning_rate": 3.4539098912480585e-05, + "loss": 0.2409, + "step": 11950 + }, + { + "epoch": 6.21, + "learning_rate": 3.447436561367167e-05, + "loss": 0.1925, + "step": 12000 + }, + { + "epoch": 6.24, + "learning_rate": 3.4409632314862764e-05, + "loss": 0.3027, + "step": 12050 + }, + { + "epoch": 6.27, + "learning_rate": 3.4344899016053864e-05, + "loss": 0.2315, + "step": 12100 + }, + { + "epoch": 6.29, + "learning_rate": 3.428016571724495e-05, + "loss": 0.2668, + "step": 12150 + }, + { + "epoch": 6.32, + "learning_rate": 3.421543241843604e-05, + "loss": 0.1743, + "step": 12200 + }, + { + "epoch": 6.34, + "learning_rate": 3.4150699119627136e-05, + "loss": 0.2799, + "step": 12250 + }, + { + "epoch": 6.37, + "learning_rate": 3.4085965820818236e-05, + "loss": 0.1927, + "step": 12300 + }, + { + "epoch": 6.4, + "learning_rate": 3.402123252200932e-05, + "loss": 0.2008, + "step": 12350 + }, + { + "epoch": 6.42, + "learning_rate": 3.3956499223200415e-05, + "loss": 0.194, + "step": 12400 + }, + { + "epoch": 6.45, + "learning_rate": 3.389176592439151e-05, + "loss": 0.2082, + "step": 12450 + }, + { + "epoch": 6.47, + "learning_rate": 3.38270326255826e-05, + "loss": 0.2391, + "step": 12500 + }, + { + "epoch": 6.5, + "learning_rate": 3.3762299326773694e-05, + "loss": 0.1784, + "step": 12550 + }, + { + "epoch": 6.53, + "learning_rate": 3.369756602796479e-05, + "loss": 0.2038, + "step": 12600 + }, + { + "epoch": 6.55, + "learning_rate": 3.363283272915588e-05, + "loss": 0.2425, + "step": 12650 + }, + { + "epoch": 6.58, + "learning_rate": 3.356809943034697e-05, + "loss": 0.2533, + "step": 12700 + }, + { + "epoch": 6.6, + "learning_rate": 3.3503366131538066e-05, + "loss": 0.2447, + "step": 12750 + }, + { + "epoch": 6.63, + "learning_rate": 3.343863283272915e-05, + "loss": 0.1886, + "step": 12800 + }, + { + "epoch": 6.65, + "learning_rate": 3.337389953392025e-05, + "loss": 0.2611, + "step": 12850 + }, + { + "epoch": 6.68, + "learning_rate": 3.3309166235111345e-05, + "loss": 0.2229, + "step": 12900 + }, + { + "epoch": 6.71, + "learning_rate": 3.324443293630243e-05, + "loss": 0.1758, + "step": 12950 + }, + { + "epoch": 6.73, + "learning_rate": 3.317969963749353e-05, + "loss": 0.1984, + "step": 13000 + }, + { + "epoch": 6.76, + "learning_rate": 3.3114966338684624e-05, + "loss": 0.218, + "step": 13050 + }, + { + "epoch": 6.78, + "learning_rate": 3.305023303987571e-05, + "loss": 0.2453, + "step": 13100 + }, + { + "epoch": 6.81, + "learning_rate": 3.29854997410668e-05, + "loss": 0.2053, + "step": 13150 + }, + { + "epoch": 6.84, + "learning_rate": 3.29207664422579e-05, + "loss": 0.2025, + "step": 13200 + }, + { + "epoch": 6.86, + "learning_rate": 3.2856033143448996e-05, + "loss": 0.2146, + "step": 13250 + }, + { + "epoch": 6.89, + "learning_rate": 3.279129984464008e-05, + "loss": 0.2196, + "step": 13300 + }, + { + "epoch": 6.91, + "learning_rate": 3.2726566545831175e-05, + "loss": 0.2202, + "step": 13350 + }, + { + "epoch": 6.94, + "learning_rate": 3.2661833247022275e-05, + "loss": 0.2404, + "step": 13400 + }, + { + "epoch": 6.97, + "learning_rate": 3.259709994821336e-05, + "loss": 0.2821, + "step": 13450 + }, + { + "epoch": 6.99, + "learning_rate": 3.2532366649404454e-05, + "loss": 0.2935, + "step": 13500 + }, + { + "epoch": 7.0, + "eval_cer": 0.24652794463933875, + "eval_loss": 1.3700717687606812, + "eval_mer": 0.2988974113135187, + "eval_runtime": 96.1397, + "eval_samples_per_second": 11.723, + "eval_steps_per_second": 1.467, + "step": 13517 + }, + { + "epoch": 7.02, + "learning_rate": 3.246763335059555e-05, + "loss": 0.1851, + "step": 13550 + }, + { + "epoch": 7.04, + "learning_rate": 3.240290005178664e-05, + "loss": 0.1333, + "step": 13600 + }, + { + "epoch": 7.07, + "learning_rate": 3.233816675297773e-05, + "loss": 0.1974, + "step": 13650 + }, + { + "epoch": 7.09, + "learning_rate": 3.2273433454168826e-05, + "loss": 0.232, + "step": 13700 + }, + { + "epoch": 7.12, + "learning_rate": 3.220870015535992e-05, + "loss": 0.17, + "step": 13750 + }, + { + "epoch": 7.15, + "learning_rate": 3.214396685655101e-05, + "loss": 0.1754, + "step": 13800 + }, + { + "epoch": 7.17, + "learning_rate": 3.2079233557742105e-05, + "loss": 0.1573, + "step": 13850 + }, + { + "epoch": 7.2, + "learning_rate": 3.201450025893319e-05, + "loss": 0.2036, + "step": 13900 + }, + { + "epoch": 7.22, + "learning_rate": 3.194976696012429e-05, + "loss": 0.1945, + "step": 13950 + }, + { + "epoch": 7.25, + "learning_rate": 3.1885033661315384e-05, + "loss": 0.1808, + "step": 14000 + }, + { + "epoch": 7.28, + "learning_rate": 3.182030036250647e-05, + "loss": 0.1712, + "step": 14050 + }, + { + "epoch": 7.3, + "learning_rate": 3.175556706369757e-05, + "loss": 0.1705, + "step": 14100 + }, + { + "epoch": 7.33, + "learning_rate": 3.169083376488866e-05, + "loss": 0.1658, + "step": 14150 + }, + { + "epoch": 7.35, + "learning_rate": 3.1626100466079756e-05, + "loss": 0.2061, + "step": 14200 + }, + { + "epoch": 7.38, + "learning_rate": 3.156136716727084e-05, + "loss": 0.1637, + "step": 14250 + }, + { + "epoch": 7.41, + "learning_rate": 3.149663386846194e-05, + "loss": 0.1648, + "step": 14300 + }, + { + "epoch": 7.43, + "learning_rate": 3.1431900569653035e-05, + "loss": 0.219, + "step": 14350 + }, + { + "epoch": 7.46, + "learning_rate": 3.136716727084412e-05, + "loss": 0.1614, + "step": 14400 + }, + { + "epoch": 7.48, + "learning_rate": 3.1302433972035214e-05, + "loss": 0.2173, + "step": 14450 + }, + { + "epoch": 7.51, + "learning_rate": 3.1237700673226314e-05, + "loss": 0.2032, + "step": 14500 + }, + { + "epoch": 7.53, + "learning_rate": 3.11729673744174e-05, + "loss": 0.2341, + "step": 14550 + }, + { + "epoch": 7.56, + "learning_rate": 3.110823407560849e-05, + "loss": 0.1906, + "step": 14600 + }, + { + "epoch": 7.59, + "learning_rate": 3.1043500776799586e-05, + "loss": 0.2092, + "step": 14650 + }, + { + "epoch": 7.61, + "learning_rate": 3.097876747799068e-05, + "loss": 0.1799, + "step": 14700 + }, + { + "epoch": 7.64, + "learning_rate": 3.091532884515795e-05, + "loss": 0.1731, + "step": 14750 + }, + { + "epoch": 7.66, + "learning_rate": 3.085059554634904e-05, + "loss": 0.1996, + "step": 14800 + }, + { + "epoch": 7.69, + "learning_rate": 3.0785862247540136e-05, + "loss": 0.2069, + "step": 14850 + }, + { + "epoch": 7.72, + "learning_rate": 3.072112894873123e-05, + "loss": 0.1657, + "step": 14900 + }, + { + "epoch": 7.74, + "learning_rate": 3.065639564992232e-05, + "loss": 0.1726, + "step": 14950 + }, + { + "epoch": 7.77, + "learning_rate": 3.0591662351113415e-05, + "loss": 0.2388, + "step": 15000 + }, + { + "epoch": 7.79, + "learning_rate": 3.052692905230451e-05, + "loss": 0.2378, + "step": 15050 + }, + { + "epoch": 7.82, + "learning_rate": 3.0462195753495598e-05, + "loss": 0.2064, + "step": 15100 + }, + { + "epoch": 7.85, + "learning_rate": 3.0397462454686694e-05, + "loss": 0.2135, + "step": 15150 + }, + { + "epoch": 7.87, + "learning_rate": 3.0332729155877787e-05, + "loss": 0.172, + "step": 15200 + }, + { + "epoch": 7.9, + "learning_rate": 3.0267995857068877e-05, + "loss": 0.2241, + "step": 15250 + }, + { + "epoch": 7.92, + "learning_rate": 3.020326255825997e-05, + "loss": 0.1541, + "step": 15300 + }, + { + "epoch": 7.95, + "learning_rate": 3.0138529259451066e-05, + "loss": 0.1698, + "step": 15350 + }, + { + "epoch": 7.98, + "learning_rate": 3.0073795960642152e-05, + "loss": 0.1734, + "step": 15400 + }, + { + "epoch": 8.0, + "eval_cer": 0.2401364794079485, + "eval_loss": 1.334208607673645, + "eval_mer": 0.29466283157558326, + "eval_runtime": 95.4506, + "eval_samples_per_second": 11.807, + "eval_steps_per_second": 1.477, + "step": 15448 + }, + { + "epoch": 8.0, + "learning_rate": 3.000906266183325e-05, + "loss": 0.1791, + "step": 15450 + }, + { + "epoch": 8.03, + "learning_rate": 2.994432936302434e-05, + "loss": 0.1773, + "step": 15500 + }, + { + "epoch": 8.05, + "learning_rate": 2.987959606421543e-05, + "loss": 0.1476, + "step": 15550 + }, + { + "epoch": 8.08, + "learning_rate": 2.9814862765406528e-05, + "loss": 0.1641, + "step": 15600 + }, + { + "epoch": 8.1, + "learning_rate": 2.975012946659762e-05, + "loss": 0.1359, + "step": 15650 + }, + { + "epoch": 8.13, + "learning_rate": 2.968539616778871e-05, + "loss": 0.1348, + "step": 15700 + }, + { + "epoch": 8.16, + "learning_rate": 2.9620662868979803e-05, + "loss": 0.1732, + "step": 15750 + }, + { + "epoch": 8.18, + "learning_rate": 2.95559295701709e-05, + "loss": 0.1725, + "step": 15800 + }, + { + "epoch": 8.21, + "learning_rate": 2.9491196271361993e-05, + "loss": 0.1652, + "step": 15850 + }, + { + "epoch": 8.23, + "learning_rate": 2.9426462972553082e-05, + "loss": 0.1664, + "step": 15900 + }, + { + "epoch": 8.26, + "learning_rate": 2.9361729673744175e-05, + "loss": 0.1606, + "step": 15950 + }, + { + "epoch": 8.29, + "learning_rate": 2.929699637493527e-05, + "loss": 0.1751, + "step": 16000 + }, + { + "epoch": 8.31, + "learning_rate": 2.9232263076126358e-05, + "loss": 0.1484, + "step": 16050 + }, + { + "epoch": 8.34, + "learning_rate": 2.9167529777317454e-05, + "loss": 0.1547, + "step": 16100 + }, + { + "epoch": 8.36, + "learning_rate": 2.9102796478508547e-05, + "loss": 0.1412, + "step": 16150 + }, + { + "epoch": 8.39, + "learning_rate": 2.9038063179699637e-05, + "loss": 0.1397, + "step": 16200 + }, + { + "epoch": 8.42, + "learning_rate": 2.8973329880890733e-05, + "loss": 0.1285, + "step": 16250 + }, + { + "epoch": 8.44, + "learning_rate": 2.8908596582081826e-05, + "loss": 0.1408, + "step": 16300 + }, + { + "epoch": 8.47, + "learning_rate": 2.8845157949249097e-05, + "loss": 0.142, + "step": 16350 + }, + { + "epoch": 8.49, + "learning_rate": 2.8780424650440187e-05, + "loss": 0.189, + "step": 16400 + }, + { + "epoch": 8.52, + "learning_rate": 2.871569135163128e-05, + "loss": 0.1398, + "step": 16450 + }, + { + "epoch": 8.54, + "learning_rate": 2.8650958052822373e-05, + "loss": 0.1997, + "step": 16500 + }, + { + "epoch": 8.57, + "learning_rate": 2.858622475401347e-05, + "loss": 0.188, + "step": 16550 + }, + { + "epoch": 8.6, + "learning_rate": 2.852149145520456e-05, + "loss": 0.1416, + "step": 16600 + }, + { + "epoch": 8.62, + "learning_rate": 2.8456758156395652e-05, + "loss": 0.1413, + "step": 16650 + }, + { + "epoch": 8.65, + "learning_rate": 2.8392024857586745e-05, + "loss": 0.1759, + "step": 16700 + }, + { + "epoch": 8.67, + "learning_rate": 2.8327291558777834e-05, + "loss": 0.1901, + "step": 16750 + }, + { + "epoch": 8.7, + "learning_rate": 2.826255825996893e-05, + "loss": 0.1485, + "step": 16800 + }, + { + "epoch": 8.73, + "learning_rate": 2.8197824961160024e-05, + "loss": 0.1846, + "step": 16850 + }, + { + "epoch": 8.75, + "learning_rate": 2.8133091662351113e-05, + "loss": 0.1458, + "step": 16900 + }, + { + "epoch": 8.78, + "learning_rate": 2.8068358363542206e-05, + "loss": 0.1722, + "step": 16950 + }, + { + "epoch": 8.8, + "learning_rate": 2.8003625064733303e-05, + "loss": 0.1798, + "step": 17000 + }, + { + "epoch": 8.83, + "learning_rate": 2.7938891765924392e-05, + "loss": 0.1315, + "step": 17050 + }, + { + "epoch": 8.86, + "learning_rate": 2.7874158467115485e-05, + "loss": 0.1616, + "step": 17100 + }, + { + "epoch": 8.88, + "learning_rate": 2.780942516830658e-05, + "loss": 0.1465, + "step": 17150 + }, + { + "epoch": 8.91, + "learning_rate": 2.7744691869497668e-05, + "loss": 0.1523, + "step": 17200 + }, + { + "epoch": 8.93, + "learning_rate": 2.7679958570688764e-05, + "loss": 0.1692, + "step": 17250 + }, + { + "epoch": 8.96, + "learning_rate": 2.7615225271879857e-05, + "loss": 0.1777, + "step": 17300 + }, + { + "epoch": 8.98, + "learning_rate": 2.7550491973070947e-05, + "loss": 0.1652, + "step": 17350 + }, + { + "epoch": 9.0, + "eval_cer": 0.23869479552116873, + "eval_loss": 1.5562121868133545, + "eval_mer": 0.2893096836049856, + "eval_runtime": 95.7928, + "eval_samples_per_second": 11.765, + "eval_steps_per_second": 1.472, + "step": 17379 + }, + { + "epoch": 9.01, + "learning_rate": 2.748575867426204e-05, + "loss": 0.1672, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 2.7421025375453136e-05, + "loss": 0.1131, + "step": 17450 + }, + { + "epoch": 9.06, + "learning_rate": 2.7356292076644226e-05, + "loss": 0.1373, + "step": 17500 + }, + { + "epoch": 9.09, + "learning_rate": 2.729155877783532e-05, + "loss": 0.1213, + "step": 17550 + }, + { + "epoch": 9.11, + "learning_rate": 2.7226825479026412e-05, + "loss": 0.1548, + "step": 17600 + }, + { + "epoch": 9.14, + "learning_rate": 2.7162092180217508e-05, + "loss": 0.119, + "step": 17650 + }, + { + "epoch": 9.17, + "learning_rate": 2.7097358881408598e-05, + "loss": 0.1396, + "step": 17700 + }, + { + "epoch": 9.19, + "learning_rate": 2.703262558259969e-05, + "loss": 0.1163, + "step": 17750 + }, + { + "epoch": 9.22, + "learning_rate": 2.6967892283790784e-05, + "loss": 0.0957, + "step": 17800 + }, + { + "epoch": 9.24, + "learning_rate": 2.6903158984981873e-05, + "loss": 0.1508, + "step": 17850 + }, + { + "epoch": 9.27, + "learning_rate": 2.683842568617297e-05, + "loss": 0.1323, + "step": 17900 + }, + { + "epoch": 9.3, + "learning_rate": 2.6773692387364063e-05, + "loss": 0.1168, + "step": 17950 + }, + { + "epoch": 9.32, + "learning_rate": 2.6708959088555152e-05, + "loss": 0.1357, + "step": 18000 + }, + { + "epoch": 9.35, + "learning_rate": 2.6644225789746245e-05, + "loss": 0.1783, + "step": 18050 + }, + { + "epoch": 9.37, + "learning_rate": 2.6579492490937342e-05, + "loss": 0.1371, + "step": 18100 + }, + { + "epoch": 9.4, + "learning_rate": 2.651475919212843e-05, + "loss": 0.155, + "step": 18150 + }, + { + "epoch": 9.43, + "learning_rate": 2.6450025893319524e-05, + "loss": 0.1269, + "step": 18200 + }, + { + "epoch": 9.45, + "learning_rate": 2.6385292594510617e-05, + "loss": 0.1481, + "step": 18250 + }, + { + "epoch": 9.48, + "learning_rate": 2.6320559295701707e-05, + "loss": 0.1457, + "step": 18300 + }, + { + "epoch": 9.5, + "learning_rate": 2.6255825996892803e-05, + "loss": 0.127, + "step": 18350 + }, + { + "epoch": 9.53, + "learning_rate": 2.6191092698083896e-05, + "loss": 0.149, + "step": 18400 + }, + { + "epoch": 9.55, + "learning_rate": 2.6126359399274986e-05, + "loss": 0.1282, + "step": 18450 + }, + { + "epoch": 9.58, + "learning_rate": 2.606162610046608e-05, + "loss": 0.17, + "step": 18500 + }, + { + "epoch": 9.61, + "learning_rate": 2.5996892801657175e-05, + "loss": 0.1343, + "step": 18550 + }, + { + "epoch": 9.63, + "learning_rate": 2.5932159502848268e-05, + "loss": 0.1782, + "step": 18600 + }, + { + "epoch": 9.66, + "learning_rate": 2.5867426204039358e-05, + "loss": 0.1549, + "step": 18650 + }, + { + "epoch": 9.68, + "learning_rate": 2.580269290523045e-05, + "loss": 0.1142, + "step": 18700 + }, + { + "epoch": 9.71, + "learning_rate": 2.5737959606421547e-05, + "loss": 0.1277, + "step": 18750 + }, + { + "epoch": 9.74, + "learning_rate": 2.5673226307612637e-05, + "loss": 0.1759, + "step": 18800 + }, + { + "epoch": 9.76, + "learning_rate": 2.560849300880373e-05, + "loss": 0.1288, + "step": 18850 + }, + { + "epoch": 9.79, + "learning_rate": 2.5543759709994823e-05, + "loss": 0.1683, + "step": 18900 + }, + { + "epoch": 9.81, + "learning_rate": 2.5479026411185912e-05, + "loss": 0.1491, + "step": 18950 + }, + { + "epoch": 9.84, + "learning_rate": 2.541429311237701e-05, + "loss": 0.1229, + "step": 19000 + }, + { + "epoch": 9.87, + "learning_rate": 2.5349559813568102e-05, + "loss": 0.138, + "step": 19050 + }, + { + "epoch": 9.89, + "learning_rate": 2.528482651475919e-05, + "loss": 0.1199, + "step": 19100 + }, + { + "epoch": 9.92, + "learning_rate": 2.5220093215950284e-05, + "loss": 0.1313, + "step": 19150 + }, + { + "epoch": 9.94, + "learning_rate": 2.515535991714138e-05, + "loss": 0.1415, + "step": 19200 + }, + { + "epoch": 9.97, + "learning_rate": 2.509062661833247e-05, + "loss": 0.0995, + "step": 19250 + }, + { + "epoch": 9.99, + "learning_rate": 2.5025893319523563e-05, + "loss": 0.1226, + "step": 19300 + }, + { + "epoch": 10.0, + "eval_cer": 0.23912730068720264, + "eval_loss": 1.4614193439483643, + "eval_mer": 0.29338446788111217, + "eval_runtime": 98.7007, + "eval_samples_per_second": 11.418, + "eval_steps_per_second": 1.429, + "step": 19310 + }, + { + "epoch": 10.02, + "learning_rate": 2.4961160020714656e-05, + "loss": 0.0873, + "step": 19350 + }, + { + "epoch": 10.05, + "learning_rate": 2.4897721387881928e-05, + "loss": 0.1397, + "step": 19400 + }, + { + "epoch": 10.07, + "learning_rate": 2.483298808907302e-05, + "loss": 0.1128, + "step": 19450 + }, + { + "epoch": 10.1, + "learning_rate": 2.4768254790264114e-05, + "loss": 0.1149, + "step": 19500 + }, + { + "epoch": 10.12, + "learning_rate": 2.4703521491455207e-05, + "loss": 0.1352, + "step": 19550 + }, + { + "epoch": 10.15, + "learning_rate": 2.4638788192646296e-05, + "loss": 0.0971, + "step": 19600 + }, + { + "epoch": 10.18, + "learning_rate": 2.4574054893837393e-05, + "loss": 0.11, + "step": 19650 + }, + { + "epoch": 10.2, + "learning_rate": 2.4509321595028482e-05, + "loss": 0.1148, + "step": 19700 + }, + { + "epoch": 10.23, + "learning_rate": 2.4444588296219575e-05, + "loss": 0.1147, + "step": 19750 + }, + { + "epoch": 10.25, + "learning_rate": 2.437985499741067e-05, + "loss": 0.1135, + "step": 19800 + }, + { + "epoch": 10.28, + "learning_rate": 2.431512169860176e-05, + "loss": 0.1287, + "step": 19850 + }, + { + "epoch": 10.31, + "learning_rate": 2.4250388399792854e-05, + "loss": 0.1017, + "step": 19900 + }, + { + "epoch": 10.33, + "learning_rate": 2.4185655100983947e-05, + "loss": 0.1131, + "step": 19950 + }, + { + "epoch": 10.36, + "learning_rate": 2.412092180217504e-05, + "loss": 0.158, + "step": 20000 + }, + { + "epoch": 10.38, + "learning_rate": 2.4056188503366133e-05, + "loss": 0.1069, + "step": 20050 + }, + { + "epoch": 10.41, + "learning_rate": 2.3991455204557226e-05, + "loss": 0.1045, + "step": 20100 + }, + { + "epoch": 10.44, + "learning_rate": 2.3926721905748316e-05, + "loss": 0.109, + "step": 20150 + }, + { + "epoch": 10.46, + "learning_rate": 2.3861988606939412e-05, + "loss": 0.18, + "step": 20200 + }, + { + "epoch": 10.49, + "learning_rate": 2.3797255308130505e-05, + "loss": 0.1424, + "step": 20250 + }, + { + "epoch": 10.51, + "learning_rate": 2.3732522009321595e-05, + "loss": 0.124, + "step": 20300 + }, + { + "epoch": 10.54, + "learning_rate": 2.366778871051269e-05, + "loss": 0.1223, + "step": 20350 + }, + { + "epoch": 10.56, + "learning_rate": 2.360305541170378e-05, + "loss": 0.0921, + "step": 20400 + }, + { + "epoch": 10.59, + "learning_rate": 2.3538322112894877e-05, + "loss": 0.1322, + "step": 20450 + }, + { + "epoch": 10.62, + "learning_rate": 2.3473588814085967e-05, + "loss": 0.126, + "step": 20500 + }, + { + "epoch": 10.64, + "learning_rate": 2.340885551527706e-05, + "loss": 0.1362, + "step": 20550 + }, + { + "epoch": 10.67, + "learning_rate": 2.3344122216468153e-05, + "loss": 0.1094, + "step": 20600 + }, + { + "epoch": 10.69, + "learning_rate": 2.3279388917659246e-05, + "loss": 0.1194, + "step": 20650 + }, + { + "epoch": 10.72, + "learning_rate": 2.3214655618850335e-05, + "loss": 0.1166, + "step": 20700 + }, + { + "epoch": 10.75, + "learning_rate": 2.314992232004143e-05, + "loss": 0.1019, + "step": 20750 + }, + { + "epoch": 10.77, + "learning_rate": 2.3085189021232524e-05, + "loss": 0.1064, + "step": 20800 + }, + { + "epoch": 10.8, + "learning_rate": 2.3020455722423614e-05, + "loss": 0.1367, + "step": 20850 + }, + { + "epoch": 10.82, + "learning_rate": 2.295572242361471e-05, + "loss": 0.1358, + "step": 20900 + }, + { + "epoch": 10.85, + "learning_rate": 2.28909891248058e-05, + "loss": 0.1673, + "step": 20950 + }, + { + "epoch": 10.88, + "learning_rate": 2.2826255825996896e-05, + "loss": 0.1174, + "step": 21000 + }, + { + "epoch": 10.9, + "learning_rate": 2.2761522527187986e-05, + "loss": 0.1104, + "step": 21050 + }, + { + "epoch": 10.93, + "learning_rate": 2.269678922837908e-05, + "loss": 0.1012, + "step": 21100 + }, + { + "epoch": 10.95, + "learning_rate": 2.2632055929570172e-05, + "loss": 0.1009, + "step": 21150 + }, + { + "epoch": 10.98, + "learning_rate": 2.2567322630761265e-05, + "loss": 0.1172, + "step": 21200 + }, + { + "epoch": 11.0, + "eval_cer": 0.24114565812869432, + "eval_loss": 1.648364782333374, + "eval_mer": 0.29194630872483224, + "eval_runtime": 102.6637, + "eval_samples_per_second": 10.978, + "eval_steps_per_second": 1.373, + "step": 21241 + }, + { + "epoch": 11.0, + "learning_rate": 2.2502589331952355e-05, + "loss": 0.1047, + "step": 21250 + }, + { + "epoch": 11.03, + "learning_rate": 2.243785603314345e-05, + "loss": 0.0934, + "step": 21300 + }, + { + "epoch": 11.06, + "learning_rate": 2.2373122734334544e-05, + "loss": 0.1212, + "step": 21350 + }, + { + "epoch": 11.08, + "learning_rate": 2.2308389435525637e-05, + "loss": 0.1217, + "step": 21400 + }, + { + "epoch": 11.11, + "learning_rate": 2.224365613671673e-05, + "loss": 0.1223, + "step": 21450 + }, + { + "epoch": 11.13, + "learning_rate": 2.217892283790782e-05, + "loss": 0.1034, + "step": 21500 + }, + { + "epoch": 11.16, + "learning_rate": 2.2114189539098916e-05, + "loss": 0.1182, + "step": 21550 + }, + { + "epoch": 11.19, + "learning_rate": 2.2049456240290005e-05, + "loss": 0.0701, + "step": 21600 + }, + { + "epoch": 11.21, + "learning_rate": 2.19847229414811e-05, + "loss": 0.1111, + "step": 21650 + }, + { + "epoch": 11.24, + "learning_rate": 2.191998964267219e-05, + "loss": 0.0831, + "step": 21700 + }, + { + "epoch": 11.26, + "learning_rate": 2.1855256343863284e-05, + "loss": 0.1463, + "step": 21750 + }, + { + "epoch": 11.29, + "learning_rate": 2.1790523045054374e-05, + "loss": 0.1218, + "step": 21800 + }, + { + "epoch": 11.32, + "learning_rate": 2.172578974624547e-05, + "loss": 0.0985, + "step": 21850 + }, + { + "epoch": 11.34, + "learning_rate": 2.1661056447436563e-05, + "loss": 0.0864, + "step": 21900 + }, + { + "epoch": 11.37, + "learning_rate": 2.1596323148627656e-05, + "loss": 0.0829, + "step": 21950 + }, + { + "epoch": 11.39, + "learning_rate": 2.153158984981875e-05, + "loss": 0.1142, + "step": 22000 + }, + { + "epoch": 11.42, + "learning_rate": 2.146685655100984e-05, + "loss": 0.1085, + "step": 22050 + }, + { + "epoch": 11.44, + "learning_rate": 2.1402123252200935e-05, + "loss": 0.1193, + "step": 22100 + }, + { + "epoch": 11.47, + "learning_rate": 2.1337389953392025e-05, + "loss": 0.1069, + "step": 22150 + }, + { + "epoch": 11.5, + "learning_rate": 2.1272656654583118e-05, + "loss": 0.1013, + "step": 22200 + }, + { + "epoch": 11.52, + "learning_rate": 2.120792335577421e-05, + "loss": 0.0731, + "step": 22250 + }, + { + "epoch": 11.55, + "learning_rate": 2.1143190056965304e-05, + "loss": 0.0985, + "step": 22300 + }, + { + "epoch": 11.57, + "learning_rate": 2.1078456758156397e-05, + "loss": 0.1231, + "step": 22350 + }, + { + "epoch": 11.6, + "learning_rate": 2.101372345934749e-05, + "loss": 0.1052, + "step": 22400 + }, + { + "epoch": 11.63, + "learning_rate": 2.0948990160538583e-05, + "loss": 0.0934, + "step": 22450 + }, + { + "epoch": 11.65, + "learning_rate": 2.0884256861729676e-05, + "loss": 0.1105, + "step": 22500 + }, + { + "epoch": 11.68, + "learning_rate": 2.081952356292077e-05, + "loss": 0.0994, + "step": 22550 + }, + { + "epoch": 11.7, + "learning_rate": 2.0756084930088037e-05, + "loss": 0.082, + "step": 22600 + }, + { + "epoch": 11.73, + "learning_rate": 2.0691351631279133e-05, + "loss": 0.1214, + "step": 22650 + }, + { + "epoch": 11.76, + "learning_rate": 2.0626618332470223e-05, + "loss": 0.0848, + "step": 22700 + }, + { + "epoch": 11.78, + "learning_rate": 2.0561885033661316e-05, + "loss": 0.1289, + "step": 22750 + }, + { + "epoch": 11.81, + "learning_rate": 2.049715173485241e-05, + "loss": 0.0847, + "step": 22800 + }, + { + "epoch": 11.83, + "learning_rate": 2.0432418436043502e-05, + "loss": 0.0949, + "step": 22850 + }, + { + "epoch": 11.86, + "learning_rate": 2.0367685137234595e-05, + "loss": 0.0817, + "step": 22900 + }, + { + "epoch": 11.89, + "learning_rate": 2.0302951838425688e-05, + "loss": 0.0834, + "step": 22950 + }, + { + "epoch": 11.91, + "learning_rate": 2.023821853961678e-05, + "loss": 0.0978, + "step": 23000 + }, + { + "epoch": 11.94, + "learning_rate": 2.017348524080787e-05, + "loss": 0.1189, + "step": 23050 + }, + { + "epoch": 11.96, + "learning_rate": 2.0108751941998967e-05, + "loss": 0.0922, + "step": 23100 + }, + { + "epoch": 11.99, + "learning_rate": 2.0044018643190056e-05, + "loss": 0.1008, + "step": 23150 + }, + { + "epoch": 12.0, + "eval_cer": 0.23537892258157528, + "eval_loss": 1.6310499906539917, + "eval_mer": 0.28803131991051456, + "eval_runtime": 96.226, + "eval_samples_per_second": 11.712, + "eval_steps_per_second": 1.465, + "step": 23172 + }, + { + "epoch": 12.01, + "learning_rate": 1.9979285344381153e-05, + "loss": 0.1545, + "step": 23200 + }, + { + "epoch": 12.04, + "learning_rate": 1.9914552045572242e-05, + "loss": 0.0689, + "step": 23250 + }, + { + "epoch": 12.07, + "learning_rate": 1.9849818746763335e-05, + "loss": 0.105, + "step": 23300 + }, + { + "epoch": 12.09, + "learning_rate": 1.9785085447954428e-05, + "loss": 0.0841, + "step": 23350 + }, + { + "epoch": 12.12, + "learning_rate": 1.972035214914552e-05, + "loss": 0.0926, + "step": 23400 + }, + { + "epoch": 12.14, + "learning_rate": 1.9655618850336614e-05, + "loss": 0.0901, + "step": 23450 + }, + { + "epoch": 12.17, + "learning_rate": 1.9590885551527707e-05, + "loss": 0.096, + "step": 23500 + }, + { + "epoch": 12.2, + "learning_rate": 1.95261522527188e-05, + "loss": 0.0761, + "step": 23550 + }, + { + "epoch": 12.22, + "learning_rate": 1.9461418953909893e-05, + "loss": 0.0932, + "step": 23600 + }, + { + "epoch": 12.25, + "learning_rate": 1.9396685655100986e-05, + "loss": 0.1041, + "step": 23650 + }, + { + "epoch": 12.27, + "learning_rate": 1.9331952356292076e-05, + "loss": 0.0788, + "step": 23700 + }, + { + "epoch": 12.3, + "learning_rate": 1.9267219057483172e-05, + "loss": 0.0894, + "step": 23750 + }, + { + "epoch": 12.33, + "learning_rate": 1.920248575867426e-05, + "loss": 0.1125, + "step": 23800 + }, + { + "epoch": 12.35, + "learning_rate": 1.9137752459865355e-05, + "loss": 0.0807, + "step": 23850 + }, + { + "epoch": 12.38, + "learning_rate": 1.9073019161056448e-05, + "loss": 0.0863, + "step": 23900 + }, + { + "epoch": 12.4, + "learning_rate": 1.900828586224754e-05, + "loss": 0.0881, + "step": 23950 + }, + { + "epoch": 12.43, + "learning_rate": 1.8943552563438634e-05, + "loss": 0.0838, + "step": 24000 + }, + { + "epoch": 12.45, + "learning_rate": 1.8878819264629727e-05, + "loss": 0.1266, + "step": 24050 + }, + { + "epoch": 12.48, + "learning_rate": 1.881408596582082e-05, + "loss": 0.1028, + "step": 24100 + }, + { + "epoch": 12.51, + "learning_rate": 1.8749352667011913e-05, + "loss": 0.0897, + "step": 24150 + }, + { + "epoch": 12.53, + "learning_rate": 1.8684619368203006e-05, + "loss": 0.0841, + "step": 24200 + }, + { + "epoch": 12.56, + "learning_rate": 1.8619886069394095e-05, + "loss": 0.1013, + "step": 24250 + }, + { + "epoch": 12.58, + "learning_rate": 1.855515277058519e-05, + "loss": 0.1076, + "step": 24300 + }, + { + "epoch": 12.61, + "learning_rate": 1.849041947177628e-05, + "loss": 0.1021, + "step": 24350 + }, + { + "epoch": 12.64, + "learning_rate": 1.8425686172967374e-05, + "loss": 0.0659, + "step": 24400 + }, + { + "epoch": 12.66, + "learning_rate": 1.8360952874158467e-05, + "loss": 0.0998, + "step": 24450 + }, + { + "epoch": 12.69, + "learning_rate": 1.829621957534956e-05, + "loss": 0.1125, + "step": 24500 + }, + { + "epoch": 12.71, + "learning_rate": 1.8231486276540653e-05, + "loss": 0.0985, + "step": 24550 + }, + { + "epoch": 12.74, + "learning_rate": 1.8166752977731746e-05, + "loss": 0.0941, + "step": 24600 + }, + { + "epoch": 12.77, + "learning_rate": 1.810201967892284e-05, + "loss": 0.0866, + "step": 24650 + }, + { + "epoch": 12.79, + "learning_rate": 1.8037286380113932e-05, + "loss": 0.1083, + "step": 24700 + }, + { + "epoch": 12.82, + "learning_rate": 1.7972553081305025e-05, + "loss": 0.0763, + "step": 24750 + }, + { + "epoch": 12.84, + "learning_rate": 1.7907819782496115e-05, + "loss": 0.0954, + "step": 24800 + }, + { + "epoch": 12.87, + "learning_rate": 1.784308648368721e-05, + "loss": 0.0953, + "step": 24850 + }, + { + "epoch": 12.89, + "learning_rate": 1.77783531848783e-05, + "loss": 0.0848, + "step": 24900 + }, + { + "epoch": 12.92, + "learning_rate": 1.7713619886069394e-05, + "loss": 0.1109, + "step": 24950 + }, + { + "epoch": 12.95, + "learning_rate": 1.7648886587260487e-05, + "loss": 0.1465, + "step": 25000 + }, + { + "epoch": 12.97, + "learning_rate": 1.758415328845158e-05, + "loss": 0.0985, + "step": 25050 + }, + { + "epoch": 13.0, + "learning_rate": 1.7519419989642673e-05, + "loss": 0.1246, + "step": 25100 + }, + { + "epoch": 13.0, + "eval_cer": 0.23114998318035465, + "eval_loss": 1.5862568616867065, + "eval_mer": 0.28203899009268135, + "eval_runtime": 96.4935, + "eval_samples_per_second": 11.68, + "eval_steps_per_second": 1.461, + "step": 25103 + }, + { + "epoch": 13.02, + "learning_rate": 1.7454686690833766e-05, + "loss": 0.0577, + "step": 25150 + }, + { + "epoch": 13.05, + "learning_rate": 1.738995339202486e-05, + "loss": 0.0826, + "step": 25200 + }, + { + "epoch": 13.08, + "learning_rate": 1.732651475919213e-05, + "loss": 0.0626, + "step": 25250 + }, + { + "epoch": 13.1, + "learning_rate": 1.7261781460383223e-05, + "loss": 0.0665, + "step": 25300 + }, + { + "epoch": 13.13, + "learning_rate": 1.7197048161574312e-05, + "loss": 0.0685, + "step": 25350 + }, + { + "epoch": 13.15, + "learning_rate": 1.713231486276541e-05, + "loss": 0.0748, + "step": 25400 + }, + { + "epoch": 13.18, + "learning_rate": 1.70675815639565e-05, + "loss": 0.0822, + "step": 25450 + }, + { + "epoch": 13.21, + "learning_rate": 1.700284826514759e-05, + "loss": 0.0858, + "step": 25500 + }, + { + "epoch": 13.23, + "learning_rate": 1.6938114966338688e-05, + "loss": 0.0709, + "step": 25550 + }, + { + "epoch": 13.26, + "learning_rate": 1.6873381667529777e-05, + "loss": 0.0684, + "step": 25600 + }, + { + "epoch": 13.28, + "learning_rate": 1.680864836872087e-05, + "loss": 0.0726, + "step": 25650 + }, + { + "epoch": 13.31, + "learning_rate": 1.6743915069911963e-05, + "loss": 0.0873, + "step": 25700 + }, + { + "epoch": 13.34, + "learning_rate": 1.6679181771103056e-05, + "loss": 0.0945, + "step": 25750 + }, + { + "epoch": 13.36, + "learning_rate": 1.661444847229415e-05, + "loss": 0.0811, + "step": 25800 + }, + { + "epoch": 13.39, + "learning_rate": 1.6549715173485242e-05, + "loss": 0.0901, + "step": 25850 + }, + { + "epoch": 13.41, + "learning_rate": 1.6484981874676332e-05, + "loss": 0.0747, + "step": 25900 + }, + { + "epoch": 13.44, + "learning_rate": 1.6420248575867428e-05, + "loss": 0.0661, + "step": 25950 + }, + { + "epoch": 13.46, + "learning_rate": 1.635551527705852e-05, + "loss": 0.0655, + "step": 26000 + }, + { + "epoch": 13.49, + "learning_rate": 1.629078197824961e-05, + "loss": 0.0822, + "step": 26050 + }, + { + "epoch": 13.52, + "learning_rate": 1.6226048679440707e-05, + "loss": 0.1383, + "step": 26100 + }, + { + "epoch": 13.54, + "learning_rate": 1.6161315380631797e-05, + "loss": 0.104, + "step": 26150 + }, + { + "epoch": 13.57, + "learning_rate": 1.6096582081822893e-05, + "loss": 0.0973, + "step": 26200 + }, + { + "epoch": 13.59, + "learning_rate": 1.6031848783013983e-05, + "loss": 0.1006, + "step": 26250 + }, + { + "epoch": 13.62, + "learning_rate": 1.5967115484205076e-05, + "loss": 0.084, + "step": 26300 + }, + { + "epoch": 13.65, + "learning_rate": 1.590238218539617e-05, + "loss": 0.0732, + "step": 26350 + }, + { + "epoch": 13.67, + "learning_rate": 1.5837648886587262e-05, + "loss": 0.0667, + "step": 26400 + }, + { + "epoch": 13.7, + "learning_rate": 1.577291558777835e-05, + "loss": 0.0775, + "step": 26450 + }, + { + "epoch": 13.72, + "learning_rate": 1.5708182288969448e-05, + "loss": 0.0927, + "step": 26500 + }, + { + "epoch": 13.75, + "learning_rate": 1.564344899016054e-05, + "loss": 0.0653, + "step": 26550 + }, + { + "epoch": 13.78, + "learning_rate": 1.557871569135163e-05, + "loss": 0.098, + "step": 26600 + }, + { + "epoch": 13.8, + "learning_rate": 1.5513982392542727e-05, + "loss": 0.0581, + "step": 26650 + }, + { + "epoch": 13.83, + "learning_rate": 1.5449249093733816e-05, + "loss": 0.0803, + "step": 26700 + }, + { + "epoch": 13.85, + "learning_rate": 1.5384515794924913e-05, + "loss": 0.0861, + "step": 26750 + }, + { + "epoch": 13.88, + "learning_rate": 1.5319782496116002e-05, + "loss": 0.0788, + "step": 26800 + }, + { + "epoch": 13.9, + "learning_rate": 1.5255049197307095e-05, + "loss": 0.099, + "step": 26850 + }, + { + "epoch": 13.93, + "learning_rate": 1.519031589849819e-05, + "loss": 0.0685, + "step": 26900 + }, + { + "epoch": 13.96, + "learning_rate": 1.5125582599689281e-05, + "loss": 0.0801, + "step": 26950 + }, + { + "epoch": 13.98, + "learning_rate": 1.5060849300880373e-05, + "loss": 0.0569, + "step": 27000 + }, + { + "epoch": 14.0, + "eval_cer": 0.23763756067086356, + "eval_loss": 1.8162798881530762, + "eval_mer": 0.2862735698306168, + "eval_runtime": 96.7832, + "eval_samples_per_second": 11.645, + "eval_steps_per_second": 1.457, + "step": 27034 + }, + { + "epoch": 14.01, + "learning_rate": 1.4996116002071467e-05, + "loss": 0.0843, + "step": 27050 + }, + { + "epoch": 14.03, + "learning_rate": 1.4931382703262559e-05, + "loss": 0.0703, + "step": 27100 + }, + { + "epoch": 14.06, + "learning_rate": 1.486664940445365e-05, + "loss": 0.0578, + "step": 27150 + }, + { + "epoch": 14.09, + "learning_rate": 1.4801916105644745e-05, + "loss": 0.0964, + "step": 27200 + }, + { + "epoch": 14.11, + "learning_rate": 1.4737182806835836e-05, + "loss": 0.0566, + "step": 27250 + }, + { + "epoch": 14.14, + "learning_rate": 1.467244950802693e-05, + "loss": 0.0736, + "step": 27300 + }, + { + "epoch": 14.16, + "learning_rate": 1.4607716209218022e-05, + "loss": 0.0629, + "step": 27350 + }, + { + "epoch": 14.19, + "learning_rate": 1.4542982910409115e-05, + "loss": 0.0761, + "step": 27400 + }, + { + "epoch": 14.22, + "learning_rate": 1.447824961160021e-05, + "loss": 0.079, + "step": 27450 + }, + { + "epoch": 14.24, + "learning_rate": 1.44135163127913e-05, + "loss": 0.0768, + "step": 27500 + }, + { + "epoch": 14.27, + "learning_rate": 1.4348783013982392e-05, + "loss": 0.0595, + "step": 27550 + }, + { + "epoch": 14.29, + "learning_rate": 1.4284049715173487e-05, + "loss": 0.0785, + "step": 27600 + }, + { + "epoch": 14.32, + "learning_rate": 1.4219316416364578e-05, + "loss": 0.0731, + "step": 27650 + }, + { + "epoch": 14.34, + "learning_rate": 1.4154583117555673e-05, + "loss": 0.0534, + "step": 27700 + }, + { + "epoch": 14.37, + "learning_rate": 1.4089849818746764e-05, + "loss": 0.0695, + "step": 27750 + }, + { + "epoch": 14.4, + "learning_rate": 1.4025116519937855e-05, + "loss": 0.0557, + "step": 27800 + }, + { + "epoch": 14.42, + "learning_rate": 1.396038322112895e-05, + "loss": 0.0768, + "step": 27850 + }, + { + "epoch": 14.45, + "learning_rate": 1.3895649922320041e-05, + "loss": 0.0526, + "step": 27900 + }, + { + "epoch": 14.47, + "learning_rate": 1.3830916623511134e-05, + "loss": 0.0676, + "step": 27950 + }, + { + "epoch": 14.5, + "learning_rate": 1.3766183324702229e-05, + "loss": 0.0728, + "step": 28000 + }, + { + "epoch": 14.53, + "learning_rate": 1.370145002589332e-05, + "loss": 0.0854, + "step": 28050 + }, + { + "epoch": 14.55, + "learning_rate": 1.3636716727084412e-05, + "loss": 0.0766, + "step": 28100 + }, + { + "epoch": 14.58, + "learning_rate": 1.3571983428275506e-05, + "loss": 0.0628, + "step": 28150 + }, + { + "epoch": 14.6, + "learning_rate": 1.3507250129466598e-05, + "loss": 0.1061, + "step": 28200 + }, + { + "epoch": 14.63, + "learning_rate": 1.3442516830657692e-05, + "loss": 0.0766, + "step": 28250 + }, + { + "epoch": 14.66, + "learning_rate": 1.3377783531848783e-05, + "loss": 0.072, + "step": 28300 + }, + { + "epoch": 14.68, + "learning_rate": 1.3313050233039875e-05, + "loss": 0.077, + "step": 28350 + }, + { + "epoch": 14.71, + "learning_rate": 1.324831693423097e-05, + "loss": 0.0743, + "step": 28400 + }, + { + "epoch": 14.73, + "learning_rate": 1.318358363542206e-05, + "loss": 0.0843, + "step": 28450 + }, + { + "epoch": 14.76, + "learning_rate": 1.3118850336613154e-05, + "loss": 0.0619, + "step": 28500 + }, + { + "epoch": 14.79, + "learning_rate": 1.3054117037804248e-05, + "loss": 0.0634, + "step": 28550 + }, + { + "epoch": 14.81, + "learning_rate": 1.298938373899534e-05, + "loss": 0.0838, + "step": 28600 + }, + { + "epoch": 14.84, + "learning_rate": 1.2924650440186434e-05, + "loss": 0.0714, + "step": 28650 + }, + { + "epoch": 14.86, + "learning_rate": 1.2859917141377526e-05, + "loss": 0.0839, + "step": 28700 + }, + { + "epoch": 14.89, + "learning_rate": 1.2795183842568617e-05, + "loss": 0.0672, + "step": 28750 + }, + { + "epoch": 14.91, + "learning_rate": 1.2730450543759712e-05, + "loss": 0.07, + "step": 28800 + }, + { + "epoch": 14.94, + "learning_rate": 1.2665717244950803e-05, + "loss": 0.0713, + "step": 28850 + }, + { + "epoch": 14.97, + "learning_rate": 1.2600983946141894e-05, + "loss": 0.0495, + "step": 28900 + }, + { + "epoch": 14.99, + "learning_rate": 1.2536250647332989e-05, + "loss": 0.0629, + "step": 28950 + }, + { + "epoch": 15.0, + "eval_cer": 0.23283194771493104, + "eval_loss": 1.7688497304916382, + "eval_mer": 0.27972195589645255, + "eval_runtime": 130.9285, + "eval_samples_per_second": 8.608, + "eval_steps_per_second": 1.077, + "step": 28965 + }, + { + "epoch": 15.02, + "learning_rate": 1.247151734852408e-05, + "loss": 0.0751, + "step": 29000 + }, + { + "epoch": 15.04, + "learning_rate": 1.2406784049715175e-05, + "loss": 0.0748, + "step": 29050 + }, + { + "epoch": 15.07, + "learning_rate": 1.2342050750906268e-05, + "loss": 0.0828, + "step": 29100 + }, + { + "epoch": 15.1, + "learning_rate": 1.227731745209736e-05, + "loss": 0.0519, + "step": 29150 + }, + { + "epoch": 15.12, + "learning_rate": 1.2212584153288452e-05, + "loss": 0.0869, + "step": 29200 + }, + { + "epoch": 15.15, + "learning_rate": 1.2147850854479545e-05, + "loss": 0.0665, + "step": 29250 + }, + { + "epoch": 15.17, + "learning_rate": 1.2084412221646815e-05, + "loss": 0.0667, + "step": 29300 + }, + { + "epoch": 15.2, + "learning_rate": 1.2019678922837908e-05, + "loss": 0.0669, + "step": 29350 + }, + { + "epoch": 15.23, + "learning_rate": 1.1954945624029e-05, + "loss": 0.0644, + "step": 29400 + }, + { + "epoch": 15.25, + "learning_rate": 1.1890212325220094e-05, + "loss": 0.059, + "step": 29450 + }, + { + "epoch": 15.28, + "learning_rate": 1.1825479026411187e-05, + "loss": 0.0827, + "step": 29500 + }, + { + "epoch": 15.3, + "learning_rate": 1.176074572760228e-05, + "loss": 0.0479, + "step": 29550 + }, + { + "epoch": 15.33, + "learning_rate": 1.1696012428793373e-05, + "loss": 0.0785, + "step": 29600 + }, + { + "epoch": 15.35, + "learning_rate": 1.1631279129984464e-05, + "loss": 0.0576, + "step": 29650 + }, + { + "epoch": 15.38, + "learning_rate": 1.1566545831175557e-05, + "loss": 0.0678, + "step": 29700 + }, + { + "epoch": 15.41, + "learning_rate": 1.150181253236665e-05, + "loss": 0.0703, + "step": 29750 + }, + { + "epoch": 15.43, + "learning_rate": 1.1437079233557743e-05, + "loss": 0.0621, + "step": 29800 + }, + { + "epoch": 15.46, + "learning_rate": 1.1372345934748834e-05, + "loss": 0.069, + "step": 29850 + }, + { + "epoch": 15.48, + "learning_rate": 1.1307612635939927e-05, + "loss": 0.0616, + "step": 29900 + }, + { + "epoch": 15.51, + "learning_rate": 1.124287933713102e-05, + "loss": 0.0995, + "step": 29950 + }, + { + "epoch": 15.54, + "learning_rate": 1.1178146038322113e-05, + "loss": 0.0579, + "step": 30000 + }, + { + "epoch": 15.56, + "learning_rate": 1.1113412739513206e-05, + "loss": 0.0492, + "step": 30050 + }, + { + "epoch": 15.59, + "learning_rate": 1.10486794407043e-05, + "loss": 0.0841, + "step": 30100 + }, + { + "epoch": 15.61, + "learning_rate": 1.0983946141895392e-05, + "loss": 0.08, + "step": 30150 + }, + { + "epoch": 15.64, + "learning_rate": 1.0919212843086485e-05, + "loss": 0.049, + "step": 30200 + }, + { + "epoch": 15.67, + "learning_rate": 1.0854479544277576e-05, + "loss": 0.0645, + "step": 30250 + }, + { + "epoch": 15.69, + "learning_rate": 1.078974624546867e-05, + "loss": 0.0733, + "step": 30300 + }, + { + "epoch": 15.72, + "learning_rate": 1.0725012946659762e-05, + "loss": 0.0538, + "step": 30350 + }, + { + "epoch": 15.74, + "learning_rate": 1.0660279647850854e-05, + "loss": 0.059, + "step": 30400 + }, + { + "epoch": 15.77, + "learning_rate": 1.0595546349041947e-05, + "loss": 0.0501, + "step": 30450 + }, + { + "epoch": 15.79, + "learning_rate": 1.053081305023304e-05, + "loss": 0.0549, + "step": 30500 + }, + { + "epoch": 15.82, + "learning_rate": 1.0466079751424133e-05, + "loss": 0.0575, + "step": 30550 + }, + { + "epoch": 15.85, + "learning_rate": 1.0401346452615226e-05, + "loss": 0.0548, + "step": 30600 + }, + { + "epoch": 15.87, + "learning_rate": 1.0336613153806319e-05, + "loss": 0.0717, + "step": 30650 + }, + { + "epoch": 15.9, + "learning_rate": 1.0271879854997412e-05, + "loss": 0.0797, + "step": 30700 + }, + { + "epoch": 15.92, + "learning_rate": 1.0207146556188505e-05, + "loss": 0.0476, + "step": 30750 + }, + { + "epoch": 15.95, + "learning_rate": 1.0142413257379596e-05, + "loss": 0.0608, + "step": 30800 + }, + { + "epoch": 15.98, + "learning_rate": 1.0077679958570689e-05, + "loss": 0.0592, + "step": 30850 + }, + { + "epoch": 16.0, + "eval_cer": 0.2342736316017108, + "eval_loss": 1.707387089729309, + "eval_mer": 0.2841962288271013, + "eval_runtime": 98.3179, + "eval_samples_per_second": 11.463, + "eval_steps_per_second": 1.434, + "step": 30896 + }, + { + "epoch": 16.0, + "learning_rate": 1.0012946659761782e-05, + "loss": 0.0732, + "step": 30900 + }, + { + "epoch": 16.03, + "learning_rate": 9.948213360952875e-06, + "loss": 0.0495, + "step": 30950 + }, + { + "epoch": 16.05, + "learning_rate": 9.883480062143966e-06, + "loss": 0.0625, + "step": 31000 + }, + { + "epoch": 16.08, + "learning_rate": 9.818746763335059e-06, + "loss": 0.0517, + "step": 31050 + }, + { + "epoch": 16.11, + "learning_rate": 9.754013464526154e-06, + "loss": 0.0447, + "step": 31100 + }, + { + "epoch": 16.13, + "learning_rate": 9.689280165717247e-06, + "loss": 0.1293, + "step": 31150 + }, + { + "epoch": 16.16, + "learning_rate": 9.624546866908338e-06, + "loss": 0.0454, + "step": 31200 + }, + { + "epoch": 16.18, + "learning_rate": 9.559813568099431e-06, + "loss": 0.0423, + "step": 31250 + }, + { + "epoch": 16.21, + "learning_rate": 9.495080269290524e-06, + "loss": 0.0443, + "step": 31300 + }, + { + "epoch": 16.24, + "learning_rate": 9.430346970481615e-06, + "loss": 0.0452, + "step": 31350 + }, + { + "epoch": 16.26, + "learning_rate": 9.365613671672708e-06, + "loss": 0.0542, + "step": 31400 + }, + { + "epoch": 16.29, + "learning_rate": 9.300880372863801e-06, + "loss": 0.055, + "step": 31450 + }, + { + "epoch": 16.31, + "learning_rate": 9.236147074054894e-06, + "loss": 0.0615, + "step": 31500 + }, + { + "epoch": 16.34, + "learning_rate": 9.171413775245986e-06, + "loss": 0.0679, + "step": 31550 + }, + { + "epoch": 16.36, + "learning_rate": 9.106680476437079e-06, + "loss": 0.1088, + "step": 31600 + }, + { + "epoch": 16.39, + "learning_rate": 9.041947177628173e-06, + "loss": 0.0625, + "step": 31650 + }, + { + "epoch": 16.42, + "learning_rate": 8.977213878819266e-06, + "loss": 0.0788, + "step": 31700 + }, + { + "epoch": 16.44, + "learning_rate": 8.912480580010358e-06, + "loss": 0.0485, + "step": 31750 + }, + { + "epoch": 16.47, + "learning_rate": 8.84774728120145e-06, + "loss": 0.0699, + "step": 31800 + }, + { + "epoch": 16.49, + "learning_rate": 8.783013982392544e-06, + "loss": 0.0488, + "step": 31850 + }, + { + "epoch": 16.52, + "learning_rate": 8.718280683583637e-06, + "loss": 0.0483, + "step": 31900 + }, + { + "epoch": 16.55, + "learning_rate": 8.653547384774728e-06, + "loss": 0.0627, + "step": 31950 + }, + { + "epoch": 16.57, + "learning_rate": 8.58881408596582e-06, + "loss": 0.0463, + "step": 32000 + }, + { + "epoch": 16.6, + "learning_rate": 8.524080787156914e-06, + "loss": 0.0402, + "step": 32050 + }, + { + "epoch": 16.62, + "learning_rate": 8.459347488348007e-06, + "loss": 0.0736, + "step": 32100 + }, + { + "epoch": 16.65, + "learning_rate": 8.394614189539098e-06, + "loss": 0.0539, + "step": 32150 + }, + { + "epoch": 16.68, + "learning_rate": 8.329880890730193e-06, + "loss": 0.0649, + "step": 32200 + }, + { + "epoch": 16.7, + "learning_rate": 8.265147591921286e-06, + "loss": 0.0452, + "step": 32250 + }, + { + "epoch": 16.73, + "learning_rate": 8.200414293112377e-06, + "loss": 0.1346, + "step": 32300 + }, + { + "epoch": 16.75, + "learning_rate": 8.13568099430347e-06, + "loss": 0.0685, + "step": 32350 + }, + { + "epoch": 16.78, + "learning_rate": 8.070947695494563e-06, + "loss": 0.0553, + "step": 32400 + }, + { + "epoch": 16.8, + "learning_rate": 8.006214396685656e-06, + "loss": 0.0381, + "step": 32450 + }, + { + "epoch": 16.83, + "learning_rate": 7.941481097876747e-06, + "loss": 0.0606, + "step": 32500 + }, + { + "epoch": 16.86, + "learning_rate": 7.87674779906784e-06, + "loss": 0.0854, + "step": 32550 + }, + { + "epoch": 16.88, + "learning_rate": 7.812014500258933e-06, + "loss": 0.0536, + "step": 32600 + }, + { + "epoch": 16.91, + "learning_rate": 7.747281201450026e-06, + "loss": 0.0678, + "step": 32650 + }, + { + "epoch": 16.93, + "learning_rate": 7.68254790264112e-06, + "loss": 0.0529, + "step": 32700 + }, + { + "epoch": 16.96, + "learning_rate": 7.6178146038322114e-06, + "loss": 0.053, + "step": 32750 + }, + { + "epoch": 16.99, + "learning_rate": 7.553081305023304e-06, + "loss": 0.0663, + "step": 32800 + }, + { + "epoch": 17.0, + "eval_cer": 0.23643615743188043, + "eval_loss": 1.7551674842834473, + "eval_mer": 0.2871524448705657, + "eval_runtime": 99.3594, + "eval_samples_per_second": 11.343, + "eval_steps_per_second": 1.419, + "step": 32827 + }, + { + "epoch": 17.01, + "learning_rate": 7.489642672190575e-06, + "loss": 0.0838, + "step": 32850 + }, + { + "epoch": 17.04, + "learning_rate": 7.424909373381668e-06, + "loss": 0.0597, + "step": 32900 + }, + { + "epoch": 17.06, + "learning_rate": 7.360176074572761e-06, + "loss": 0.0525, + "step": 32950 + }, + { + "epoch": 17.09, + "learning_rate": 7.295442775763853e-06, + "loss": 0.0444, + "step": 33000 + }, + { + "epoch": 17.12, + "learning_rate": 7.230709476954946e-06, + "loss": 0.0436, + "step": 33050 + }, + { + "epoch": 17.14, + "learning_rate": 7.165976178146039e-06, + "loss": 0.0725, + "step": 33100 + }, + { + "epoch": 17.17, + "learning_rate": 7.101242879337132e-06, + "loss": 0.0627, + "step": 33150 + }, + { + "epoch": 17.19, + "learning_rate": 7.037804246504402e-06, + "loss": 0.0713, + "step": 33200 + }, + { + "epoch": 17.22, + "learning_rate": 6.973070947695495e-06, + "loss": 0.0475, + "step": 33250 + }, + { + "epoch": 17.24, + "learning_rate": 6.908337648886587e-06, + "loss": 0.073, + "step": 33300 + }, + { + "epoch": 17.27, + "learning_rate": 6.8436043500776804e-06, + "loss": 0.0631, + "step": 33350 + }, + { + "epoch": 17.3, + "learning_rate": 6.7788710512687734e-06, + "loss": 0.0607, + "step": 33400 + }, + { + "epoch": 17.32, + "learning_rate": 6.714137752459866e-06, + "loss": 0.0441, + "step": 33450 + }, + { + "epoch": 17.35, + "learning_rate": 6.649404453650958e-06, + "loss": 0.0891, + "step": 33500 + }, + { + "epoch": 17.37, + "learning_rate": 6.584671154842051e-06, + "loss": 0.042, + "step": 33550 + }, + { + "epoch": 17.4, + "learning_rate": 6.519937856033144e-06, + "loss": 0.0391, + "step": 33600 + }, + { + "epoch": 17.43, + "learning_rate": 6.455204557224237e-06, + "loss": 0.0423, + "step": 33650 + }, + { + "epoch": 17.45, + "learning_rate": 6.390471258415329e-06, + "loss": 0.0505, + "step": 33700 + }, + { + "epoch": 17.48, + "learning_rate": 6.325737959606422e-06, + "loss": 0.0719, + "step": 33750 + }, + { + "epoch": 17.5, + "learning_rate": 6.261004660797515e-06, + "loss": 0.0501, + "step": 33800 + }, + { + "epoch": 17.53, + "learning_rate": 6.196271361988607e-06, + "loss": 0.0444, + "step": 33850 + }, + { + "epoch": 17.56, + "learning_rate": 6.1315380631797e-06, + "loss": 0.0535, + "step": 33900 + }, + { + "epoch": 17.58, + "learning_rate": 6.066804764370793e-06, + "loss": 0.063, + "step": 33950 + }, + { + "epoch": 17.61, + "learning_rate": 6.002071465561886e-06, + "loss": 0.0342, + "step": 34000 + }, + { + "epoch": 17.63, + "learning_rate": 5.937338166752978e-06, + "loss": 0.0587, + "step": 34050 + }, + { + "epoch": 17.66, + "learning_rate": 5.87260486794407e-06, + "loss": 0.0348, + "step": 34100 + }, + { + "epoch": 17.69, + "learning_rate": 5.807871569135163e-06, + "loss": 0.0528, + "step": 34150 + }, + { + "epoch": 17.71, + "learning_rate": 5.743138270326256e-06, + "loss": 0.0567, + "step": 34200 + }, + { + "epoch": 17.74, + "learning_rate": 5.678404971517349e-06, + "loss": 0.0507, + "step": 34250 + }, + { + "epoch": 17.76, + "learning_rate": 5.613671672708441e-06, + "loss": 0.0623, + "step": 34300 + }, + { + "epoch": 17.79, + "learning_rate": 5.548938373899534e-06, + "loss": 0.043, + "step": 34350 + }, + { + "epoch": 17.81, + "learning_rate": 5.484205075090626e-06, + "loss": 0.048, + "step": 34400 + }, + { + "epoch": 17.84, + "learning_rate": 5.419471776281719e-06, + "loss": 0.0552, + "step": 34450 + }, + { + "epoch": 17.87, + "learning_rate": 5.354738477472812e-06, + "loss": 0.0568, + "step": 34500 + }, + { + "epoch": 17.89, + "learning_rate": 5.290005178663905e-06, + "loss": 0.0359, + "step": 34550 + }, + { + "epoch": 17.92, + "learning_rate": 5.2252718798549975e-06, + "loss": 0.0341, + "step": 34600 + }, + { + "epoch": 17.94, + "learning_rate": 5.1605385810460905e-06, + "loss": 0.0423, + "step": 34650 + }, + { + "epoch": 17.97, + "learning_rate": 5.095805282237183e-06, + "loss": 0.0527, + "step": 34700 + }, + { + "epoch": 18.0, + "learning_rate": 5.031071983428276e-06, + "loss": 0.0708, + "step": 34750 + }, + { + "epoch": 18.0, + "eval_cer": 0.2352347541928973, + "eval_loss": 1.800703763961792, + "eval_mer": 0.28523489932885904, + "eval_runtime": 119.8795, + "eval_samples_per_second": 9.401, + "eval_steps_per_second": 1.176, + "step": 34758 + }, + { + "epoch": 18.02, + "learning_rate": 4.966338684619369e-06, + "loss": 0.0418, + "step": 34800 + }, + { + "epoch": 18.05, + "learning_rate": 4.901605385810462e-06, + "loss": 0.0639, + "step": 34850 + }, + { + "epoch": 18.07, + "learning_rate": 4.836872087001554e-06, + "loss": 0.0388, + "step": 34900 + }, + { + "epoch": 18.1, + "learning_rate": 4.772138788192647e-06, + "loss": 0.053, + "step": 34950 + }, + { + "epoch": 18.13, + "learning_rate": 4.707405489383739e-06, + "loss": 0.0498, + "step": 35000 + }, + { + "epoch": 18.15, + "learning_rate": 4.642672190574832e-06, + "loss": 0.0654, + "step": 35050 + }, + { + "epoch": 18.18, + "learning_rate": 4.577938891765925e-06, + "loss": 0.0588, + "step": 35100 + }, + { + "epoch": 18.2, + "learning_rate": 4.513205592957017e-06, + "loss": 0.0392, + "step": 35150 + }, + { + "epoch": 18.23, + "learning_rate": 4.44847229414811e-06, + "loss": 0.0529, + "step": 35200 + }, + { + "epoch": 18.25, + "learning_rate": 4.383738995339202e-06, + "loss": 0.0478, + "step": 35250 + }, + { + "epoch": 18.28, + "learning_rate": 4.319005696530295e-06, + "loss": 0.0654, + "step": 35300 + }, + { + "epoch": 18.31, + "learning_rate": 4.254272397721388e-06, + "loss": 0.0462, + "step": 35350 + }, + { + "epoch": 18.33, + "learning_rate": 4.189539098912481e-06, + "loss": 0.0426, + "step": 35400 + }, + { + "epoch": 18.36, + "learning_rate": 4.124805800103573e-06, + "loss": 0.0578, + "step": 35450 + }, + { + "epoch": 18.38, + "learning_rate": 4.060072501294666e-06, + "loss": 0.0552, + "step": 35500 + }, + { + "epoch": 18.41, + "learning_rate": 3.995339202485758e-06, + "loss": 0.0485, + "step": 35550 + }, + { + "epoch": 18.44, + "learning_rate": 3.930605903676852e-06, + "loss": 0.0593, + "step": 35600 + }, + { + "epoch": 18.46, + "learning_rate": 3.865872604867944e-06, + "loss": 0.0779, + "step": 35650 + }, + { + "epoch": 18.49, + "learning_rate": 3.8011393060590373e-06, + "loss": 0.0399, + "step": 35700 + }, + { + "epoch": 18.51, + "learning_rate": 3.7364060072501295e-06, + "loss": 0.0452, + "step": 35750 + }, + { + "epoch": 18.54, + "learning_rate": 3.6716727084412225e-06, + "loss": 0.049, + "step": 35800 + }, + { + "epoch": 18.57, + "learning_rate": 3.606939409632315e-06, + "loss": 0.0637, + "step": 35850 + }, + { + "epoch": 18.59, + "learning_rate": 3.5422061108234076e-06, + "loss": 0.0565, + "step": 35900 + }, + { + "epoch": 18.62, + "learning_rate": 3.4774728120145006e-06, + "loss": 0.0482, + "step": 35950 + }, + { + "epoch": 18.64, + "learning_rate": 3.4127395132055927e-06, + "loss": 0.0461, + "step": 36000 + }, + { + "epoch": 18.67, + "learning_rate": 3.3480062143966857e-06, + "loss": 0.0489, + "step": 36050 + }, + { + "epoch": 18.69, + "learning_rate": 3.2832729155877783e-06, + "loss": 0.0379, + "step": 36100 + }, + { + "epoch": 18.72, + "learning_rate": 3.2185396167788713e-06, + "loss": 0.0406, + "step": 36150 + }, + { + "epoch": 18.75, + "learning_rate": 3.153806317969964e-06, + "loss": 0.051, + "step": 36200 + }, + { + "epoch": 18.77, + "learning_rate": 3.089073019161057e-06, + "loss": 0.037, + "step": 36250 + }, + { + "epoch": 18.8, + "learning_rate": 3.0243397203521494e-06, + "loss": 0.0562, + "step": 36300 + }, + { + "epoch": 18.82, + "learning_rate": 2.959606421543242e-06, + "loss": 0.064, + "step": 36350 + }, + { + "epoch": 18.85, + "learning_rate": 2.8961677887105128e-06, + "loss": 0.0468, + "step": 36400 + }, + { + "epoch": 18.88, + "learning_rate": 2.8314344899016058e-06, + "loss": 0.0478, + "step": 36450 + }, + { + "epoch": 18.9, + "learning_rate": 2.7667011910926983e-06, + "loss": 0.055, + "step": 36500 + }, + { + "epoch": 18.93, + "learning_rate": 2.701967892283791e-06, + "loss": 0.0339, + "step": 36550 + }, + { + "epoch": 18.95, + "learning_rate": 2.637234593474884e-06, + "loss": 0.0627, + "step": 36600 + }, + { + "epoch": 18.98, + "learning_rate": 2.5725012946659764e-06, + "loss": 0.0406, + "step": 36650 + }, + { + "epoch": 19.0, + "eval_cer": 0.2302849728482868, + "eval_loss": 1.8154484033584595, + "eval_mer": 0.28044103547459254, + "eval_runtime": 98.8158, + "eval_samples_per_second": 11.405, + "eval_steps_per_second": 1.427, + "step": 36689 + }, + { + "epoch": 19.01, + "learning_rate": 2.507767995857069e-06, + "loss": 0.0603, + "step": 36700 + }, + { + "epoch": 19.03, + "learning_rate": 2.443034697048162e-06, + "loss": 0.0538, + "step": 36750 + }, + { + "epoch": 19.06, + "learning_rate": 2.378301398239254e-06, + "loss": 0.0482, + "step": 36800 + }, + { + "epoch": 19.08, + "learning_rate": 2.313568099430347e-06, + "loss": 0.0505, + "step": 36850 + }, + { + "epoch": 19.11, + "learning_rate": 2.2488348006214397e-06, + "loss": 0.0712, + "step": 36900 + }, + { + "epoch": 19.14, + "learning_rate": 2.1841015018125323e-06, + "loss": 0.0746, + "step": 36950 + }, + { + "epoch": 19.16, + "learning_rate": 2.1193682030036252e-06, + "loss": 0.0573, + "step": 37000 + }, + { + "epoch": 19.19, + "learning_rate": 2.054634904194718e-06, + "loss": 0.0583, + "step": 37050 + }, + { + "epoch": 19.21, + "learning_rate": 1.9899016053858104e-06, + "loss": 0.0409, + "step": 37100 + }, + { + "epoch": 19.24, + "learning_rate": 1.9251683065769034e-06, + "loss": 0.0594, + "step": 37150 + }, + { + "epoch": 19.26, + "learning_rate": 1.860435007767996e-06, + "loss": 0.0355, + "step": 37200 + }, + { + "epoch": 19.29, + "learning_rate": 1.7957017089590887e-06, + "loss": 0.0407, + "step": 37250 + }, + { + "epoch": 19.32, + "learning_rate": 1.7309684101501813e-06, + "loss": 0.0578, + "step": 37300 + }, + { + "epoch": 19.34, + "learning_rate": 1.666235111341274e-06, + "loss": 0.0357, + "step": 37350 + }, + { + "epoch": 19.37, + "learning_rate": 1.6015018125323668e-06, + "loss": 0.0425, + "step": 37400 + }, + { + "epoch": 19.39, + "learning_rate": 1.5367685137234596e-06, + "loss": 0.0585, + "step": 37450 + }, + { + "epoch": 19.42, + "learning_rate": 1.472035214914552e-06, + "loss": 0.0527, + "step": 37500 + }, + { + "epoch": 19.45, + "learning_rate": 1.4073019161056447e-06, + "loss": 0.0463, + "step": 37550 + }, + { + "epoch": 19.47, + "learning_rate": 1.3425686172967375e-06, + "loss": 0.0412, + "step": 37600 + }, + { + "epoch": 19.5, + "learning_rate": 1.2778353184878303e-06, + "loss": 0.0343, + "step": 37650 + }, + { + "epoch": 19.52, + "learning_rate": 1.2131020196789228e-06, + "loss": 0.0478, + "step": 37700 + }, + { + "epoch": 19.55, + "learning_rate": 1.1483687208700156e-06, + "loss": 0.0433, + "step": 37750 + }, + { + "epoch": 19.58, + "learning_rate": 1.0836354220611084e-06, + "loss": 0.042, + "step": 37800 + }, + { + "epoch": 19.6, + "learning_rate": 1.018902123252201e-06, + "loss": 0.0462, + "step": 37850 + }, + { + "epoch": 19.63, + "learning_rate": 9.541688244432937e-07, + "loss": 0.0595, + "step": 37900 + }, + { + "epoch": 19.65, + "learning_rate": 8.894355256343863e-07, + "loss": 0.0434, + "step": 37950 + }, + { + "epoch": 19.68, + "learning_rate": 8.24702226825479e-07, + "loss": 0.0368, + "step": 38000 + }, + { + "epoch": 19.7, + "learning_rate": 7.599689280165717e-07, + "loss": 0.0522, + "step": 38050 + }, + { + "epoch": 19.73, + "learning_rate": 6.952356292076644e-07, + "loss": 0.0385, + "step": 38100 + }, + { + "epoch": 19.76, + "learning_rate": 6.305023303987571e-07, + "loss": 0.0522, + "step": 38150 + }, + { + "epoch": 19.78, + "learning_rate": 5.657690315898499e-07, + "loss": 0.052, + "step": 38200 + }, + { + "epoch": 19.81, + "learning_rate": 5.010357327809425e-07, + "loss": 0.0455, + "step": 38250 + }, + { + "epoch": 19.83, + "learning_rate": 4.363024339720352e-07, + "loss": 0.05, + "step": 38300 + }, + { + "epoch": 19.86, + "learning_rate": 3.7156913516312793e-07, + "loss": 0.054, + "step": 38350 + }, + { + "epoch": 19.89, + "learning_rate": 3.068358363542206e-07, + "loss": 0.0402, + "step": 38400 + }, + { + "epoch": 19.91, + "learning_rate": 2.421025375453133e-07, + "loss": 0.0437, + "step": 38450 + }, + { + "epoch": 19.94, + "learning_rate": 1.7736923873640602e-07, + "loss": 0.0387, + "step": 38500 + }, + { + "epoch": 19.96, + "learning_rate": 1.1263593992749872e-07, + "loss": 0.0398, + "step": 38550 + }, + { + "epoch": 19.99, + "learning_rate": 4.7902641118591406e-08, + "loss": 0.0491, + "step": 38600 + }, + { + "epoch": 20.0, + "eval_cer": 0.23235138641933778, + "eval_loss": 1.8284550905227661, + "eval_mer": 0.2810802173218281, + "eval_runtime": 95.8286, + "eval_samples_per_second": 11.761, + "eval_steps_per_second": 1.471, + "step": 38620 + }, + { + "epoch": 20.0, + "step": 38620, + "total_flos": 5.893073305278712e+19, + "train_loss": 0.2576987418716032, + "train_runtime": 14101.2638, + "train_samples_per_second": 21.91, + "train_steps_per_second": 2.739 + } + ], + "max_steps": 38620, + "num_train_epochs": 20, + "total_flos": 5.893073305278712e+19, + "trial_name": null, + "trial_params": null +}