End of training
Browse files- all_results.json +4 -1
- training_log.jsonl +265 -57
all_results.json
CHANGED
|
@@ -3,7 +3,10 @@
|
|
| 3 |
"eval_cer": 2.950426138652145,
|
| 4 |
"eval_loss": 0.10621041804552078,
|
| 5 |
"eval_runtime": 5892.3475,
|
|
|
|
| 6 |
"eval_samples_per_second": 4.612,
|
| 7 |
"eval_steps_per_second": 0.577,
|
| 8 |
-
"eval_wer": 2.6982781026640676
|
|
|
|
|
|
|
| 9 |
}
|
|
|
|
| 3 |
"eval_cer": 2.950426138652145,
|
| 4 |
"eval_loss": 0.10621041804552078,
|
| 5 |
"eval_runtime": 5892.3475,
|
| 6 |
+
"eval_samples": 27174,
|
| 7 |
"eval_samples_per_second": 4.612,
|
| 8 |
"eval_steps_per_second": 0.577,
|
| 9 |
+
"eval_wer": 2.6982781026640676,
|
| 10 |
+
"test_samples": 12987,
|
| 11 |
+
"train_samples": 207181
|
| 12 |
}
|
training_log.jsonl
CHANGED
|
@@ -1,57 +1,265 @@
|
|
| 1 |
-
{"loss":
|
| 2 |
-
{"loss": 1.
|
| 3 |
-
{"loss": 0.
|
| 4 |
-
{"loss": 0.
|
| 5 |
-
{"loss": 0.
|
| 6 |
-
{"loss": 0.
|
| 7 |
-
{"loss": 0.
|
| 8 |
-
{"loss": 0.
|
| 9 |
-
{"loss": 0.
|
| 10 |
-
{"loss": 0.
|
| 11 |
-
{"loss": 0.
|
| 12 |
-
{"loss": 0.
|
| 13 |
-
{"
|
| 14 |
-
{"loss": 0.
|
| 15 |
-
{"loss": 0.
|
| 16 |
-
{"loss": 0.
|
| 17 |
-
{"loss": 0.
|
| 18 |
-
{"loss": 0.
|
| 19 |
-
{"loss": 0.
|
| 20 |
-
{"loss": 0.
|
| 21 |
-
{"loss": 0.
|
| 22 |
-
{"loss": 0.
|
| 23 |
-
{"loss": 0.
|
| 24 |
-
{"loss": 0.
|
| 25 |
-
{"loss": 0.
|
| 26 |
-
{"loss": 0.
|
| 27 |
-
{"
|
| 28 |
-
{"loss": 0.
|
| 29 |
-
{"loss": 0.
|
| 30 |
-
{"loss": 0.
|
| 31 |
-
{"loss": 0.
|
| 32 |
-
{"loss": 0.
|
| 33 |
-
{"loss": 0.
|
| 34 |
-
{"loss": 0.
|
| 35 |
-
{"loss": 0.
|
| 36 |
-
{"loss": 0.
|
| 37 |
-
{"loss": 0.
|
| 38 |
-
{"loss": 0.
|
| 39 |
-
{"loss": 0.
|
| 40 |
-
{"loss": 0.
|
| 41 |
-
{"
|
| 42 |
-
{"loss": 0.
|
| 43 |
-
{"loss": 0.
|
| 44 |
-
{"loss": 0.
|
| 45 |
-
{"loss": 0.
|
| 46 |
-
{"loss": 0.
|
| 47 |
-
{"loss": 0.
|
| 48 |
-
{"loss": 0.
|
| 49 |
-
{"loss": 0.
|
| 50 |
-
{"loss": 0.
|
| 51 |
-
{"loss": 0.
|
| 52 |
-
{"loss": 0.
|
| 53 |
-
{"loss": 0.
|
| 54 |
-
{"loss": 0.
|
| 55 |
-
{"
|
| 56 |
-
{"
|
| 57 |
-
{"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"loss": 2.3246, "grad_norm": 34.954978942871094, "learning_rate": 1.2375e-06, "epoch": 0.015445208124179472, "step": 100}
|
| 2 |
+
{"loss": 1.1027, "grad_norm": 22.29987144470215, "learning_rate": 2.4875000000000003e-06, "epoch": 0.030890416248358945, "step": 200}
|
| 3 |
+
{"loss": 0.7925, "grad_norm": 21.926483154296875, "learning_rate": 3.7375000000000006e-06, "epoch": 0.04633562437253842, "step": 300}
|
| 4 |
+
{"loss": 0.4957, "grad_norm": 14.501327514648438, "learning_rate": 4.987500000000001e-06, "epoch": 0.06178083249671789, "step": 400}
|
| 5 |
+
{"loss": 0.3257, "grad_norm": 10.838948249816895, "learning_rate": 6.237500000000001e-06, "epoch": 0.07722604062089737, "step": 500}
|
| 6 |
+
{"loss": 0.3014, "grad_norm": 10.551115036010742, "learning_rate": 7.487500000000001e-06, "epoch": 0.09267124874507685, "step": 600}
|
| 7 |
+
{"loss": 0.2735, "grad_norm": 9.680395126342773, "learning_rate": 8.7375e-06, "epoch": 0.10811645686925632, "step": 700}
|
| 8 |
+
{"loss": 0.2593, "grad_norm": 7.195150852203369, "learning_rate": 9.9875e-06, "epoch": 0.12356166499343578, "step": 800}
|
| 9 |
+
{"loss": 0.2491, "grad_norm": 9.643333435058594, "learning_rate": 1.12375e-05, "epoch": 0.13900687311761525, "step": 900}
|
| 10 |
+
{"loss": 0.2399, "grad_norm": 6.902649879455566, "learning_rate": 1.2487500000000002e-05, "epoch": 0.15445208124179474, "step": 1000}
|
| 11 |
+
{"loss": 0.2248, "grad_norm": 8.377968788146973, "learning_rate": 1.3737500000000002e-05, "epoch": 0.1698972893659742, "step": 1100}
|
| 12 |
+
{"loss": 0.2149, "grad_norm": 8.831914901733398, "learning_rate": 1.4987500000000002e-05, "epoch": 0.1853424974901537, "step": 1200}
|
| 13 |
+
{"loss": 0.2096, "grad_norm": 5.751692295074463, "learning_rate": 1.62375e-05, "epoch": 0.20078770561433315, "step": 1300}
|
| 14 |
+
{"loss": 0.2007, "grad_norm": 4.953655242919922, "learning_rate": 1.74875e-05, "epoch": 0.21623291373851264, "step": 1400}
|
| 15 |
+
{"loss": 0.198, "grad_norm": 7.7289605140686035, "learning_rate": 1.87375e-05, "epoch": 0.2316781218626921, "step": 1500}
|
| 16 |
+
{"loss": 0.1961, "grad_norm": 7.267148017883301, "learning_rate": 1.99875e-05, "epoch": 0.24712332998687156, "step": 1600}
|
| 17 |
+
{"loss": 0.1937, "grad_norm": 8.58221435546875, "learning_rate": 1.9946845637583895e-05, "epoch": 0.262568538111051, "step": 1700}
|
| 18 |
+
{"loss": 0.1791, "grad_norm": 6.28731107711792, "learning_rate": 1.989315436241611e-05, "epoch": 0.2780137462352305, "step": 1800}
|
| 19 |
+
{"loss": 0.1787, "grad_norm": 6.678342342376709, "learning_rate": 1.983946308724832e-05, "epoch": 0.29345895435941, "step": 1900}
|
| 20 |
+
{"loss": 0.1672, "grad_norm": 7.137376308441162, "learning_rate": 1.978577181208054e-05, "epoch": 0.3089041624835895, "step": 2000}
|
| 21 |
+
{"loss": 0.1679, "grad_norm": 4.73854398727417, "learning_rate": 1.9732080536912754e-05, "epoch": 0.3243493706077689, "step": 2100}
|
| 22 |
+
{"loss": 0.1643, "grad_norm": 7.315737247467041, "learning_rate": 1.967838926174497e-05, "epoch": 0.3397945787319484, "step": 2200}
|
| 23 |
+
{"loss": 0.1566, "grad_norm": 5.333303928375244, "learning_rate": 1.9624697986577184e-05, "epoch": 0.3552397868561279, "step": 2300}
|
| 24 |
+
{"loss": 0.1566, "grad_norm": 7.0448150634765625, "learning_rate": 1.95710067114094e-05, "epoch": 0.3706849949803074, "step": 2400}
|
| 25 |
+
{"loss": 0.1486, "grad_norm": 6.843142032623291, "learning_rate": 1.9517315436241614e-05, "epoch": 0.3861302031044868, "step": 2500}
|
| 26 |
+
{"loss": 0.1515, "grad_norm": 8.338215827941895, "learning_rate": 1.9463624161073825e-05, "epoch": 0.4015754112286663, "step": 2600}
|
| 27 |
+
{"loss": 0.1392, "grad_norm": 6.6149444580078125, "learning_rate": 1.9409932885906043e-05, "epoch": 0.4170206193528458, "step": 2700}
|
| 28 |
+
{"loss": 0.1388, "grad_norm": 6.691015243530273, "learning_rate": 1.9356241610738255e-05, "epoch": 0.4324658274770253, "step": 2800}
|
| 29 |
+
{"loss": 0.1411, "grad_norm": 7.022567272186279, "learning_rate": 1.9302550335570473e-05, "epoch": 0.4479110356012047, "step": 2900}
|
| 30 |
+
{"loss": 0.138, "grad_norm": 6.332781791687012, "learning_rate": 1.9248859060402688e-05, "epoch": 0.4633562437253842, "step": 3000}
|
| 31 |
+
{"loss": 0.1346, "grad_norm": 5.324976444244385, "learning_rate": 1.91951677852349e-05, "epoch": 0.4788014518495637, "step": 3100}
|
| 32 |
+
{"loss": 0.1348, "grad_norm": 6.0165114402771, "learning_rate": 1.9141476510067117e-05, "epoch": 0.4942466599737431, "step": 3200}
|
| 33 |
+
{"loss": 0.1329, "grad_norm": 4.956530570983887, "learning_rate": 1.908778523489933e-05, "epoch": 0.5096918680979227, "step": 3300}
|
| 34 |
+
{"loss": 0.1268, "grad_norm": 4.433960437774658, "learning_rate": 1.9034093959731547e-05, "epoch": 0.525137076222102, "step": 3400}
|
| 35 |
+
{"loss": 0.1275, "grad_norm": 4.740108489990234, "learning_rate": 1.898040268456376e-05, "epoch": 0.5405822843462815, "step": 3500}
|
| 36 |
+
{"loss": 0.1239, "grad_norm": 5.591785907745361, "learning_rate": 1.8926711409395973e-05, "epoch": 0.556027492470461, "step": 3600}
|
| 37 |
+
{"loss": 0.1269, "grad_norm": 4.656253814697266, "learning_rate": 1.887302013422819e-05, "epoch": 0.5714727005946405, "step": 3700}
|
| 38 |
+
{"loss": 0.1214, "grad_norm": 4.906614303588867, "learning_rate": 1.8819328859060403e-05, "epoch": 0.58691790871882, "step": 3800}
|
| 39 |
+
{"loss": 0.1316, "grad_norm": 5.207762718200684, "learning_rate": 1.876563758389262e-05, "epoch": 0.6023631168429995, "step": 3900}
|
| 40 |
+
{"loss": 0.1151, "grad_norm": 6.727694034576416, "learning_rate": 1.8711946308724833e-05, "epoch": 0.617808324967179, "step": 4000}
|
| 41 |
+
{"loss": 0.1187, "grad_norm": 6.817234992980957, "learning_rate": 1.8658255033557047e-05, "epoch": 0.6332535330913585, "step": 4100}
|
| 42 |
+
{"loss": 0.1201, "grad_norm": 5.209933280944824, "learning_rate": 1.8604563758389262e-05, "epoch": 0.6486987412155378, "step": 4200}
|
| 43 |
+
{"loss": 0.1074, "grad_norm": 4.433166980743408, "learning_rate": 1.8550872483221477e-05, "epoch": 0.6641439493397173, "step": 4300}
|
| 44 |
+
{"loss": 0.1182, "grad_norm": 5.06485652923584, "learning_rate": 1.8497181208053695e-05, "epoch": 0.6795891574638968, "step": 4400}
|
| 45 |
+
{"loss": 0.1062, "grad_norm": 6.160850524902344, "learning_rate": 1.8443489932885907e-05, "epoch": 0.6950343655880763, "step": 4500}
|
| 46 |
+
{"loss": 0.1137, "grad_norm": 5.975137710571289, "learning_rate": 1.838979865771812e-05, "epoch": 0.7104795737122558, "step": 4600}
|
| 47 |
+
{"loss": 0.1047, "grad_norm": 5.255064964294434, "learning_rate": 1.8336107382550336e-05, "epoch": 0.7259247818364353, "step": 4700}
|
| 48 |
+
{"loss": 0.1079, "grad_norm": 4.182297229766846, "learning_rate": 1.828241610738255e-05, "epoch": 0.7413699899606148, "step": 4800}
|
| 49 |
+
{"loss": 0.104, "grad_norm": 3.115267515182495, "learning_rate": 1.8228724832214766e-05, "epoch": 0.7568151980847941, "step": 4900}
|
| 50 |
+
{"loss": 0.1025, "grad_norm": 4.368956089019775, "learning_rate": 1.817503355704698e-05, "epoch": 0.7722604062089736, "step": 5000}
|
| 51 |
+
{"loss": 0.098, "grad_norm": 5.158085346221924, "learning_rate": 1.8121342281879196e-05, "epoch": 0.7877056143331531, "step": 5100}
|
| 52 |
+
{"loss": 0.103, "grad_norm": 5.80360746383667, "learning_rate": 1.806765100671141e-05, "epoch": 0.8031508224573326, "step": 5200}
|
| 53 |
+
{"loss": 0.1097, "grad_norm": 6.364624977111816, "learning_rate": 1.8013959731543625e-05, "epoch": 0.8185960305815121, "step": 5300}
|
| 54 |
+
{"loss": 0.1009, "grad_norm": 6.2352495193481445, "learning_rate": 1.796026845637584e-05, "epoch": 0.8340412387056916, "step": 5400}
|
| 55 |
+
{"loss": 0.1087, "grad_norm": 5.61583948135376, "learning_rate": 1.7906577181208055e-05, "epoch": 0.8494864468298711, "step": 5500}
|
| 56 |
+
{"loss": 0.099, "grad_norm": 5.854990482330322, "learning_rate": 1.785288590604027e-05, "epoch": 0.8649316549540506, "step": 5600}
|
| 57 |
+
{"loss": 0.0982, "grad_norm": 5.854545593261719, "learning_rate": 1.7799194630872485e-05, "epoch": 0.8803768630782299, "step": 5700}
|
| 58 |
+
{"loss": 0.094, "grad_norm": 5.2870612144470215, "learning_rate": 1.77455033557047e-05, "epoch": 0.8958220712024094, "step": 5800}
|
| 59 |
+
{"loss": 0.0924, "grad_norm": 4.898568630218506, "learning_rate": 1.7691812080536914e-05, "epoch": 0.9112672793265889, "step": 5900}
|
| 60 |
+
{"loss": 0.1027, "grad_norm": 4.1877923011779785, "learning_rate": 1.763812080536913e-05, "epoch": 0.9267124874507684, "step": 6000}
|
| 61 |
+
{"loss": 0.0939, "grad_norm": 3.115177631378174, "learning_rate": 1.7584429530201344e-05, "epoch": 0.9421576955749479, "step": 6100}
|
| 62 |
+
{"loss": 0.0987, "grad_norm": 5.382924556732178, "learning_rate": 1.753073825503356e-05, "epoch": 0.9576029036991274, "step": 6200}
|
| 63 |
+
{"loss": 0.0937, "grad_norm": 4.143594264984131, "learning_rate": 1.7477046979865774e-05, "epoch": 0.9730481118233069, "step": 6300}
|
| 64 |
+
{"loss": 0.0873, "grad_norm": 5.049436092376709, "learning_rate": 1.742335570469799e-05, "epoch": 0.9884933199474862, "step": 6400}
|
| 65 |
+
{"eval_loss": 0.10621471703052521, "eval_wer": 2.8609889027513296, "eval_cer": 2.990143485705616, "eval_runtime": 6156.5377, "eval_samples_per_second": 4.414, "eval_steps_per_second": 0.552, "epoch": 1.0, "step": 6475}
|
| 66 |
+
{"loss": 0.0893, "grad_norm": 1.4537144899368286, "learning_rate": 1.7369664429530203e-05, "epoch": 1.003861302031045, "step": 6500}
|
| 67 |
+
{"loss": 0.0585, "grad_norm": 1.554871916770935, "learning_rate": 1.7315973154362418e-05, "epoch": 1.0193065101552243, "step": 6600}
|
| 68 |
+
{"loss": 0.0578, "grad_norm": 0.8061501979827881, "learning_rate": 1.7262281879194633e-05, "epoch": 1.0347517182794037, "step": 6700}
|
| 69 |
+
{"loss": 0.0554, "grad_norm": 1.4121989011764526, "learning_rate": 1.7208590604026848e-05, "epoch": 1.0501969264035833, "step": 6800}
|
| 70 |
+
{"loss": 0.0559, "grad_norm": 1.322690725326538, "learning_rate": 1.7154899328859062e-05, "epoch": 1.0656421345277627, "step": 6900}
|
| 71 |
+
{"loss": 0.0569, "grad_norm": 0.8532437086105347, "learning_rate": 1.7101208053691277e-05, "epoch": 1.0810873426519423, "step": 7000}
|
| 72 |
+
{"loss": 0.0532, "grad_norm": 1.3657441139221191, "learning_rate": 1.7047516778523492e-05, "epoch": 1.0965325507761217, "step": 7100}
|
| 73 |
+
{"loss": 0.057, "grad_norm": 1.5799016952514648, "learning_rate": 1.6993825503355707e-05, "epoch": 1.1119777589003013, "step": 7200}
|
| 74 |
+
{"loss": 0.0541, "grad_norm": 1.6521095037460327, "learning_rate": 1.6940134228187922e-05, "epoch": 1.1274229670244806, "step": 7300}
|
| 75 |
+
{"loss": 0.0562, "grad_norm": 1.3745259046554565, "learning_rate": 1.6886442953020133e-05, "epoch": 1.14286817514866, "step": 7400}
|
| 76 |
+
{"loss": 0.0525, "grad_norm": 1.4325778484344482, "learning_rate": 1.683275167785235e-05, "epoch": 1.1583133832728396, "step": 7500}
|
| 77 |
+
{"loss": 0.0525, "grad_norm": 2.0049407482147217, "learning_rate": 1.6779060402684566e-05, "epoch": 1.173758591397019, "step": 7600}
|
| 78 |
+
{"loss": 0.0523, "grad_norm": 1.5848065614700317, "learning_rate": 1.672536912751678e-05, "epoch": 1.1892037995211986, "step": 7700}
|
| 79 |
+
{"loss": 0.0571, "grad_norm": 1.4551335573196411, "learning_rate": 1.6671677852348996e-05, "epoch": 1.204649007645378, "step": 7800}
|
| 80 |
+
{"loss": 0.0572, "grad_norm": 1.6041462421417236, "learning_rate": 1.6617986577181207e-05, "epoch": 1.2200942157695576, "step": 7900}
|
| 81 |
+
{"loss": 0.0574, "grad_norm": 2.061491012573242, "learning_rate": 1.6564295302013426e-05, "epoch": 1.235539423893737, "step": 8000}
|
| 82 |
+
{"loss": 0.0545, "grad_norm": 1.7531427145004272, "learning_rate": 1.6510604026845637e-05, "epoch": 1.2509846320179165, "step": 8100}
|
| 83 |
+
{"loss": 0.0544, "grad_norm": 1.1294121742248535, "learning_rate": 1.6456912751677855e-05, "epoch": 1.266429840142096, "step": 8200}
|
| 84 |
+
{"loss": 0.0588, "grad_norm": 1.2551779747009277, "learning_rate": 1.640322147651007e-05, "epoch": 1.2818750482662753, "step": 8300}
|
| 85 |
+
{"loss": 0.0536, "grad_norm": 1.8939077854156494, "learning_rate": 1.634953020134228e-05, "epoch": 1.297320256390455, "step": 8400}
|
| 86 |
+
{"loss": 0.0629, "grad_norm": 0.9555742144584656, "learning_rate": 1.62958389261745e-05, "epoch": 1.3127654645146343, "step": 8500}
|
| 87 |
+
{"loss": 0.0565, "grad_norm": 1.3192559480667114, "learning_rate": 1.624214765100671e-05, "epoch": 1.3282106726388139, "step": 8600}
|
| 88 |
+
{"loss": 0.052, "grad_norm": 1.640885353088379, "learning_rate": 1.618845637583893e-05, "epoch": 1.3436558807629932, "step": 8700}
|
| 89 |
+
{"loss": 0.0544, "grad_norm": 1.0616875886917114, "learning_rate": 1.613476510067114e-05, "epoch": 1.3591010888871726, "step": 8800}
|
| 90 |
+
{"loss": 0.0502, "grad_norm": 1.1386332511901855, "learning_rate": 1.6081073825503356e-05, "epoch": 1.3745462970113522, "step": 8900}
|
| 91 |
+
{"loss": 0.0495, "grad_norm": 1.1952922344207764, "learning_rate": 1.6027382550335574e-05, "epoch": 1.3899915051355318, "step": 9000}
|
| 92 |
+
{"loss": 0.0546, "grad_norm": 0.9143213629722595, "learning_rate": 1.5973691275167785e-05, "epoch": 1.4054367132597112, "step": 9100}
|
| 93 |
+
{"loss": 0.053, "grad_norm": 1.0954219102859497, "learning_rate": 1.5920000000000003e-05, "epoch": 1.4208819213838906, "step": 9200}
|
| 94 |
+
{"loss": 0.0565, "grad_norm": 1.3947011232376099, "learning_rate": 1.5866308724832215e-05, "epoch": 1.4363271295080702, "step": 9300}
|
| 95 |
+
{"loss": 0.0536, "grad_norm": 1.048740029335022, "learning_rate": 1.581261744966443e-05, "epoch": 1.4517723376322496, "step": 9400}
|
| 96 |
+
{"loss": 0.055, "grad_norm": 1.1906921863555908, "learning_rate": 1.5758926174496645e-05, "epoch": 1.4672175457564292, "step": 9500}
|
| 97 |
+
{"loss": 0.0525, "grad_norm": 1.979690432548523, "learning_rate": 1.570523489932886e-05, "epoch": 1.4826627538806085, "step": 9600}
|
| 98 |
+
{"loss": 0.0522, "grad_norm": 1.6308488845825195, "learning_rate": 1.5651543624161074e-05, "epoch": 1.498107962004788, "step": 9700}
|
| 99 |
+
{"loss": 0.0517, "grad_norm": 1.525758147239685, "learning_rate": 1.559785234899329e-05, "epoch": 1.5135531701289675, "step": 9800}
|
| 100 |
+
{"loss": 0.0551, "grad_norm": 1.4599488973617554, "learning_rate": 1.5544161073825507e-05, "epoch": 1.528998378253147, "step": 9900}
|
| 101 |
+
{"loss": 0.0507, "grad_norm": 1.186352014541626, "learning_rate": 1.549046979865772e-05, "epoch": 1.5444435863773265, "step": 10000}
|
| 102 |
+
{"loss": 0.0525, "grad_norm": 0.6791055798530579, "learning_rate": 1.5436778523489933e-05, "epoch": 1.5598887945015059, "step": 10100}
|
| 103 |
+
{"loss": 0.0567, "grad_norm": 1.0277873277664185, "learning_rate": 1.5383087248322148e-05, "epoch": 1.5753340026256852, "step": 10200}
|
| 104 |
+
{"loss": 0.0543, "grad_norm": 1.0918126106262207, "learning_rate": 1.5329395973154363e-05, "epoch": 1.5907792107498648, "step": 10300}
|
| 105 |
+
{"loss": 0.0486, "grad_norm": 1.1759934425354004, "learning_rate": 1.5275704697986578e-05, "epoch": 1.6062244188740444, "step": 10400}
|
| 106 |
+
{"loss": 0.0543, "grad_norm": 1.0399106740951538, "learning_rate": 1.5222013422818793e-05, "epoch": 1.6216696269982238, "step": 10500}
|
| 107 |
+
{"loss": 0.0515, "grad_norm": 1.9203470945358276, "learning_rate": 1.516832214765101e-05, "epoch": 1.6371148351224032, "step": 10600}
|
| 108 |
+
{"loss": 0.0543, "grad_norm": 2.4237537384033203, "learning_rate": 1.5114630872483222e-05, "epoch": 1.6525600432465828, "step": 10700}
|
| 109 |
+
{"loss": 0.0466, "grad_norm": 1.7403265237808228, "learning_rate": 1.5060939597315439e-05, "epoch": 1.6680052513707624, "step": 10800}
|
| 110 |
+
{"loss": 0.057, "grad_norm": 1.2284775972366333, "learning_rate": 1.5007248322147652e-05, "epoch": 1.6834504594949418, "step": 10900}
|
| 111 |
+
{"loss": 0.0532, "grad_norm": 1.6382396221160889, "learning_rate": 1.4953557046979867e-05, "epoch": 1.6988956676191211, "step": 11000}
|
| 112 |
+
{"loss": 0.0503, "grad_norm": 0.968417227268219, "learning_rate": 1.4899865771812082e-05, "epoch": 1.7143408757433005, "step": 11100}
|
| 113 |
+
{"loss": 0.0522, "grad_norm": 0.697189450263977, "learning_rate": 1.4846174496644297e-05, "epoch": 1.7297860838674801, "step": 11200}
|
| 114 |
+
{"loss": 0.0539, "grad_norm": 2.441664695739746, "learning_rate": 1.479248322147651e-05, "epoch": 1.7452312919916597, "step": 11300}
|
| 115 |
+
{"loss": 0.0535, "grad_norm": 2.1173808574676514, "learning_rate": 1.4738791946308726e-05, "epoch": 1.760676500115839, "step": 11400}
|
| 116 |
+
{"loss": 0.0533, "grad_norm": 1.1768113374710083, "learning_rate": 1.4685100671140941e-05, "epoch": 1.7761217082400185, "step": 11500}
|
| 117 |
+
{"loss": 0.055, "grad_norm": 1.0143743753433228, "learning_rate": 1.4631409395973156e-05, "epoch": 1.7915669163641978, "step": 11600}
|
| 118 |
+
{"loss": 0.0487, "grad_norm": 1.4157536029815674, "learning_rate": 1.457771812080537e-05, "epoch": 1.8070121244883774, "step": 11700}
|
| 119 |
+
{"loss": 0.0463, "grad_norm": 1.8482532501220703, "learning_rate": 1.4524026845637584e-05, "epoch": 1.822457332612557, "step": 11800}
|
| 120 |
+
{"loss": 0.0483, "grad_norm": 1.1774625778198242, "learning_rate": 1.44703355704698e-05, "epoch": 1.8379025407367364, "step": 11900}
|
| 121 |
+
{"loss": 0.0527, "grad_norm": 2.0068328380584717, "learning_rate": 1.4416644295302013e-05, "epoch": 1.8533477488609158, "step": 12000}
|
| 122 |
+
{"loss": 0.0509, "grad_norm": 1.3660190105438232, "learning_rate": 1.436295302013423e-05, "epoch": 1.8687929569850954, "step": 12100}
|
| 123 |
+
{"loss": 0.0486, "grad_norm": 1.4517139196395874, "learning_rate": 1.4309261744966445e-05, "epoch": 1.884238165109275, "step": 12200}
|
| 124 |
+
{"loss": 0.0468, "grad_norm": 1.676147699356079, "learning_rate": 1.4255570469798658e-05, "epoch": 1.8996833732334544, "step": 12300}
|
| 125 |
+
{"loss": 0.0482, "grad_norm": 1.2899450063705444, "learning_rate": 1.4201879194630874e-05, "epoch": 1.9151285813576338, "step": 12400}
|
| 126 |
+
{"loss": 0.0487, "grad_norm": 0.5366058945655823, "learning_rate": 1.4148187919463088e-05, "epoch": 1.9305737894818131, "step": 12500}
|
| 127 |
+
{"loss": 0.0449, "grad_norm": 1.4144234657287598, "learning_rate": 1.4094496644295304e-05, "epoch": 1.9460189976059927, "step": 12600}
|
| 128 |
+
{"loss": 0.0486, "grad_norm": 1.9612187147140503, "learning_rate": 1.4040805369127517e-05, "epoch": 1.9614642057301723, "step": 12700}
|
| 129 |
+
{"loss": 0.0482, "grad_norm": 2.1188743114471436, "learning_rate": 1.3987114093959732e-05, "epoch": 1.9769094138543517, "step": 12800}
|
| 130 |
+
{"loss": 0.0493, "grad_norm": 2.2827799320220947, "learning_rate": 1.3933422818791949e-05, "epoch": 1.992354621978531, "step": 12900}
|
| 131 |
+
{"eval_loss": 0.08518411964178085, "eval_wer": 3.8648850742872005, "eval_cer": 3.7046732368471686, "eval_runtime": 5892.2601, "eval_samples_per_second": 4.612, "eval_steps_per_second": 0.577, "epoch": 2.0, "step": 12950}
|
| 132 |
+
{"loss": 0.0333, "grad_norm": 1.141129493713379, "learning_rate": 1.3879731543624162e-05, "epoch": 2.00772260406209, "step": 13000}
|
| 133 |
+
{"loss": 0.0212, "grad_norm": 1.739894151687622, "learning_rate": 1.3826040268456378e-05, "epoch": 2.0231678121862693, "step": 13100}
|
| 134 |
+
{"loss": 0.0228, "grad_norm": 2.4176976680755615, "learning_rate": 1.3772348993288591e-05, "epoch": 2.0386130203104487, "step": 13200}
|
| 135 |
+
{"loss": 0.0219, "grad_norm": 2.188455581665039, "learning_rate": 1.3718657718120806e-05, "epoch": 2.054058228434628, "step": 13300}
|
| 136 |
+
{"loss": 0.0232, "grad_norm": 1.5634980201721191, "learning_rate": 1.3664966442953021e-05, "epoch": 2.0695034365588074, "step": 13400}
|
| 137 |
+
{"loss": 0.0238, "grad_norm": 0.5609109997749329, "learning_rate": 1.3611275167785236e-05, "epoch": 2.0849486446829872, "step": 13500}
|
| 138 |
+
{"loss": 0.0211, "grad_norm": 2.691328763961792, "learning_rate": 1.3557583892617449e-05, "epoch": 2.1003938528071666, "step": 13600}
|
| 139 |
+
{"loss": 0.0247, "grad_norm": 1.5539088249206543, "learning_rate": 1.3503892617449665e-05, "epoch": 2.115839060931346, "step": 13700}
|
| 140 |
+
{"loss": 0.0261, "grad_norm": 3.458829641342163, "learning_rate": 1.3450201342281882e-05, "epoch": 2.1312842690555254, "step": 13800}
|
| 141 |
+
{"loss": 0.0238, "grad_norm": 2.3176066875457764, "learning_rate": 1.3396510067114095e-05, "epoch": 2.146729477179705, "step": 13900}
|
| 142 |
+
{"loss": 0.0243, "grad_norm": 3.316974401473999, "learning_rate": 1.334281879194631e-05, "epoch": 2.1621746853038846, "step": 14000}
|
| 143 |
+
{"loss": 0.024, "grad_norm": 6.100398063659668, "learning_rate": 1.3289127516778523e-05, "epoch": 2.177619893428064, "step": 14100}
|
| 144 |
+
{"loss": 0.0238, "grad_norm": 2.5312767028808594, "learning_rate": 1.323543624161074e-05, "epoch": 2.1930651015522433, "step": 14200}
|
| 145 |
+
{"loss": 0.0255, "grad_norm": 1.7092468738555908, "learning_rate": 1.3181744966442953e-05, "epoch": 2.2085103096764227, "step": 14300}
|
| 146 |
+
{"loss": 0.0273, "grad_norm": 2.5337600708007812, "learning_rate": 1.312805369127517e-05, "epoch": 2.2239555178006025, "step": 14400}
|
| 147 |
+
{"loss": 0.0222, "grad_norm": 2.240237236022949, "learning_rate": 1.3074362416107384e-05, "epoch": 2.239400725924782, "step": 14500}
|
| 148 |
+
{"loss": 0.0247, "grad_norm": 1.514408826828003, "learning_rate": 1.3020671140939599e-05, "epoch": 2.2548459340489613, "step": 14600}
|
| 149 |
+
{"loss": 0.0247, "grad_norm": 3.8781371116638184, "learning_rate": 1.2966979865771814e-05, "epoch": 2.2702911421731407, "step": 14700}
|
| 150 |
+
{"loss": 0.0235, "grad_norm": 1.4282342195510864, "learning_rate": 1.2913288590604027e-05, "epoch": 2.28573635029732, "step": 14800}
|
| 151 |
+
{"loss": 0.0238, "grad_norm": 3.004528045654297, "learning_rate": 1.2859597315436243e-05, "epoch": 2.3011815584215, "step": 14900}
|
| 152 |
+
{"loss": 0.0235, "grad_norm": 2.147857666015625, "learning_rate": 1.2805906040268456e-05, "epoch": 2.3166267665456792, "step": 15000}
|
| 153 |
+
{"loss": 0.027, "grad_norm": 2.3109426498413086, "learning_rate": 1.2752214765100673e-05, "epoch": 2.3320719746698586, "step": 15100}
|
| 154 |
+
{"loss": 0.022, "grad_norm": 1.1319489479064941, "learning_rate": 1.2698523489932888e-05, "epoch": 2.347517182794038, "step": 15200}
|
| 155 |
+
{"loss": 0.021, "grad_norm": 1.3498146533966064, "learning_rate": 1.2644832214765101e-05, "epoch": 2.362962390918218, "step": 15300}
|
| 156 |
+
{"loss": 0.0228, "grad_norm": 2.1282238960266113, "learning_rate": 1.2591140939597317e-05, "epoch": 2.378407599042397, "step": 15400}
|
| 157 |
+
{"loss": 0.0262, "grad_norm": 2.113837957382202, "learning_rate": 1.253744966442953e-05, "epoch": 2.3938528071665766, "step": 15500}
|
| 158 |
+
{"loss": 0.0227, "grad_norm": 2.8750193119049072, "learning_rate": 1.2483758389261747e-05, "epoch": 2.409298015290756, "step": 15600}
|
| 159 |
+
{"loss": 0.0231, "grad_norm": 1.7577595710754395, "learning_rate": 1.243006711409396e-05, "epoch": 2.4247432234149358, "step": 15700}
|
| 160 |
+
{"loss": 0.0241, "grad_norm": 3.5743813514709473, "learning_rate": 1.2376375838926175e-05, "epoch": 2.440188431539115, "step": 15800}
|
| 161 |
+
{"loss": 0.022, "grad_norm": 1.040603756904602, "learning_rate": 1.232268456375839e-05, "epoch": 2.4556336396632945, "step": 15900}
|
| 162 |
+
{"loss": 0.0231, "grad_norm": 2.63417649269104, "learning_rate": 1.2268993288590605e-05, "epoch": 2.471078847787474, "step": 16000}
|
| 163 |
+
{"loss": 0.0249, "grad_norm": 3.670100450515747, "learning_rate": 1.2215302013422821e-05, "epoch": 2.4865240559116533, "step": 16100}
|
| 164 |
+
{"loss": 0.0259, "grad_norm": 5.80275297164917, "learning_rate": 1.2161610738255034e-05, "epoch": 2.501969264035833, "step": 16200}
|
| 165 |
+
{"loss": 0.0252, "grad_norm": 2.539285659790039, "learning_rate": 1.210791946308725e-05, "epoch": 2.5174144721600125, "step": 16300}
|
| 166 |
+
{"loss": 0.0241, "grad_norm": Infinity, "learning_rate": 1.2054228187919464e-05, "epoch": 2.532859680284192, "step": 16400}
|
| 167 |
+
{"loss": 0.0233, "grad_norm": 2.7852771282196045, "learning_rate": 1.2000536912751679e-05, "epoch": 2.548304888408371, "step": 16500}
|
| 168 |
+
{"loss": 0.0248, "grad_norm": 1.3278162479400635, "learning_rate": 1.1946845637583892e-05, "epoch": 2.5637500965325506, "step": 16600}
|
| 169 |
+
{"loss": 0.0242, "grad_norm": 1.2023398876190186, "learning_rate": 1.1893154362416108e-05, "epoch": 2.5791953046567304, "step": 16700}
|
| 170 |
+
{"loss": 0.0244, "grad_norm": 4.091363430023193, "learning_rate": 1.1839463087248323e-05, "epoch": 2.59464051278091, "step": 16800}
|
| 171 |
+
{"loss": 0.0212, "grad_norm": 2.2210638523101807, "learning_rate": 1.1785771812080538e-05, "epoch": 2.610085720905089, "step": 16900}
|
| 172 |
+
{"loss": 0.0264, "grad_norm": 1.3215293884277344, "learning_rate": 1.1732080536912753e-05, "epoch": 2.6255309290292685, "step": 17000}
|
| 173 |
+
{"loss": 0.0253, "grad_norm": 1.8649264574050903, "learning_rate": 1.1678389261744966e-05, "epoch": 2.640976137153448, "step": 17100}
|
| 174 |
+
{"loss": 0.0255, "grad_norm": 2.791181802749634, "learning_rate": 1.1624697986577183e-05, "epoch": 2.6564213452776277, "step": 17200}
|
| 175 |
+
{"loss": 0.0261, "grad_norm": 3.361290216445923, "learning_rate": 1.1571006711409396e-05, "epoch": 2.671866553401807, "step": 17300}
|
| 176 |
+
{"loss": 0.0234, "grad_norm": 1.8036214113235474, "learning_rate": 1.1517315436241612e-05, "epoch": 2.6873117615259865, "step": 17400}
|
| 177 |
+
{"loss": 0.0238, "grad_norm": 1.8118869066238403, "learning_rate": 1.1463624161073827e-05, "epoch": 2.702756969650166, "step": 17500}
|
| 178 |
+
{"loss": 0.0212, "grad_norm": 3.0242505073547363, "learning_rate": 1.140993288590604e-05, "epoch": 2.7182021777743453, "step": 17600}
|
| 179 |
+
{"loss": 0.0244, "grad_norm": 3.848574161529541, "learning_rate": 1.1356241610738257e-05, "epoch": 2.733647385898525, "step": 17700}
|
| 180 |
+
{"loss": 0.0232, "grad_norm": 2.7855026721954346, "learning_rate": 1.130255033557047e-05, "epoch": 2.7490925940227044, "step": 17800}
|
| 181 |
+
{"loss": 0.025, "grad_norm": 3.0796775817871094, "learning_rate": 1.1248859060402686e-05, "epoch": 2.764537802146884, "step": 17900}
|
| 182 |
+
{"loss": 0.0232, "grad_norm": 0.9707315564155579, "learning_rate": 1.11951677852349e-05, "epoch": 2.7799830102710636, "step": 18000}
|
| 183 |
+
{"loss": 0.0258, "grad_norm": 1.9708441495895386, "learning_rate": 1.1141476510067114e-05, "epoch": 2.795428218395243, "step": 18100}
|
| 184 |
+
{"loss": 0.0231, "grad_norm": 0.9377394318580627, "learning_rate": 1.108778523489933e-05, "epoch": 2.8108734265194224, "step": 18200}
|
| 185 |
+
{"loss": 0.0211, "grad_norm": 3.1204702854156494, "learning_rate": 1.1034093959731544e-05, "epoch": 2.826318634643602, "step": 18300}
|
| 186 |
+
{"loss": 0.0228, "grad_norm": 2.8107941150665283, "learning_rate": 1.098040268456376e-05, "epoch": 2.841763842767781, "step": 18400}
|
| 187 |
+
{"loss": 0.0254, "grad_norm": 2.400878667831421, "learning_rate": 1.0926711409395974e-05, "epoch": 2.857209050891961, "step": 18500}
|
| 188 |
+
{"loss": 0.0242, "grad_norm": 0.6793500781059265, "learning_rate": 1.087302013422819e-05, "epoch": 2.8726542590161404, "step": 18600}
|
| 189 |
+
{"loss": 0.0225, "grad_norm": 2.416689872741699, "learning_rate": 1.0819328859060403e-05, "epoch": 2.8880994671403197, "step": 18700}
|
| 190 |
+
{"loss": 0.0243, "grad_norm": 2.974879026412964, "learning_rate": 1.0765637583892618e-05, "epoch": 2.903544675264499, "step": 18800}
|
| 191 |
+
{"loss": 0.0225, "grad_norm": 2.052161455154419, "learning_rate": 1.0711946308724833e-05, "epoch": 2.9189898833886785, "step": 18900}
|
| 192 |
+
{"loss": 0.0217, "grad_norm": 2.398977518081665, "learning_rate": 1.0658255033557048e-05, "epoch": 2.9344350915128583, "step": 19000}
|
| 193 |
+
{"loss": 0.0231, "grad_norm": 1.0758559703826904, "learning_rate": 1.0604563758389264e-05, "epoch": 2.9498802996370377, "step": 19100}
|
| 194 |
+
{"loss": 0.0211, "grad_norm": 2.3403520584106445, "learning_rate": 1.0550872483221477e-05, "epoch": 2.965325507761217, "step": 19200}
|
| 195 |
+
{"loss": 0.0251, "grad_norm": 6.15524435043335, "learning_rate": 1.0497181208053692e-05, "epoch": 2.9807707158853964, "step": 19300}
|
| 196 |
+
{"loss": 0.023, "grad_norm": 3.1562724113464355, "learning_rate": 1.0443489932885907e-05, "epoch": 2.996215924009576, "step": 19400}
|
| 197 |
+
{"eval_loss": 0.07923509925603867, "eval_wer": 4.4319838736771375, "eval_cer": 4.22036403494204, "eval_runtime": 5893.7538, "eval_samples_per_second": 4.611, "eval_steps_per_second": 0.576, "epoch": 3.0, "step": 19425}
|
| 198 |
+
{"loss": 0.0142, "grad_norm": 0.7176849246025085, "learning_rate": 1.0389798657718122e-05, "epoch": 3.0115839060931346, "step": 19500}
|
| 199 |
+
{"loss": 0.0089, "grad_norm": 1.074440836906433, "learning_rate": 1.0336107382550335e-05, "epoch": 3.027029114217314, "step": 19600}
|
| 200 |
+
{"loss": 0.0088, "grad_norm": 0.862775444984436, "learning_rate": 1.0282416107382552e-05, "epoch": 3.0424743223414934, "step": 19700}
|
| 201 |
+
{"loss": 0.0084, "grad_norm": 0.2428913414478302, "learning_rate": 1.0228724832214766e-05, "epoch": 3.057919530465673, "step": 19800}
|
| 202 |
+
{"loss": 0.0104, "grad_norm": 0.22252975404262543, "learning_rate": 1.0175033557046981e-05, "epoch": 3.0733647385898526, "step": 19900}
|
| 203 |
+
{"loss": 0.0089, "grad_norm": 0.16029539704322815, "learning_rate": 1.0121342281879196e-05, "epoch": 3.088809946714032, "step": 20000}
|
| 204 |
+
{"loss": 0.0106, "grad_norm": 0.16912202537059784, "learning_rate": 1.0067651006711409e-05, "epoch": 3.1042551548382114, "step": 20100}
|
| 205 |
+
{"loss": 0.01, "grad_norm": 0.3454485535621643, "learning_rate": 1.0013959731543626e-05, "epoch": 3.1197003629623907, "step": 20200}
|
| 206 |
+
{"loss": 0.0085, "grad_norm": 0.4891042709350586, "learning_rate": 9.96026845637584e-06, "epoch": 3.1351455710865705, "step": 20300}
|
| 207 |
+
{"loss": 0.0086, "grad_norm": 0.22724980115890503, "learning_rate": 9.906577181208055e-06, "epoch": 3.15059077921075, "step": 20400}
|
| 208 |
+
{"loss": 0.0098, "grad_norm": 0.6482119560241699, "learning_rate": 9.85288590604027e-06, "epoch": 3.1660359873349293, "step": 20500}
|
| 209 |
+
{"loss": 0.0102, "grad_norm": 1.339402437210083, "learning_rate": 9.799194630872483e-06, "epoch": 3.1814811954591087, "step": 20600}
|
| 210 |
+
{"loss": 0.0111, "grad_norm": 0.3536001145839691, "learning_rate": 9.745503355704698e-06, "epoch": 3.196926403583288, "step": 20700}
|
| 211 |
+
{"loss": 0.0091, "grad_norm": 0.5454040169715881, "learning_rate": 9.691812080536913e-06, "epoch": 3.212371611707468, "step": 20800}
|
| 212 |
+
{"loss": 0.011, "grad_norm": 0.5609560012817383, "learning_rate": 9.638120805369128e-06, "epoch": 3.2278168198316473, "step": 20900}
|
| 213 |
+
{"loss": 0.0098, "grad_norm": 0.39234450459480286, "learning_rate": 9.584429530201344e-06, "epoch": 3.2432620279558266, "step": 21000}
|
| 214 |
+
{"loss": 0.0093, "grad_norm": 0.9599013328552246, "learning_rate": 9.530738255033557e-06, "epoch": 3.258707236080006, "step": 21100}
|
| 215 |
+
{"loss": 0.0092, "grad_norm": 0.6345553398132324, "learning_rate": 9.477046979865772e-06, "epoch": 3.274152444204186, "step": 21200}
|
| 216 |
+
{"loss": 0.0091, "grad_norm": 0.7367005348205566, "learning_rate": 9.423355704697987e-06, "epoch": 3.289597652328365, "step": 21300}
|
| 217 |
+
{"loss": 0.0105, "grad_norm": 0.7480056285858154, "learning_rate": 9.369664429530202e-06, "epoch": 3.3050428604525446, "step": 21400}
|
| 218 |
+
{"loss": 0.0101, "grad_norm": 0.8892471790313721, "learning_rate": 9.315973154362417e-06, "epoch": 3.320488068576724, "step": 21500}
|
| 219 |
+
{"loss": 0.0108, "grad_norm": 0.8230651617050171, "learning_rate": 9.262281879194631e-06, "epoch": 3.335933276700904, "step": 21600}
|
| 220 |
+
{"loss": 0.0106, "grad_norm": 0.404323011636734, "learning_rate": 9.208590604026846e-06, "epoch": 3.351378484825083, "step": 21700}
|
| 221 |
+
{"loss": 0.0088, "grad_norm": 0.22209642827510834, "learning_rate": 9.154899328859061e-06, "epoch": 3.3668236929492625, "step": 21800}
|
| 222 |
+
{"loss": 0.0112, "grad_norm": 1.7332512140274048, "learning_rate": 9.101208053691276e-06, "epoch": 3.382268901073442, "step": 21900}
|
| 223 |
+
{"loss": 0.0112, "grad_norm": 0.1417224407196045, "learning_rate": 9.04751677852349e-06, "epoch": 3.3977141091976213, "step": 22000}
|
| 224 |
+
{"loss": 0.011, "grad_norm": 1.5628505945205688, "learning_rate": 8.993825503355706e-06, "epoch": 3.413159317321801, "step": 22100}
|
| 225 |
+
{"loss": 0.0118, "grad_norm": 1.268794059753418, "learning_rate": 8.94013422818792e-06, "epoch": 3.4286045254459805, "step": 22200}
|
| 226 |
+
{"loss": 0.0092, "grad_norm": 0.8178913593292236, "learning_rate": 8.886442953020135e-06, "epoch": 3.44404973357016, "step": 22300}
|
| 227 |
+
{"loss": 0.0098, "grad_norm": 0.20564699172973633, "learning_rate": 8.832751677852348e-06, "epoch": 3.4594949416943392, "step": 22400}
|
| 228 |
+
{"loss": 0.0095, "grad_norm": 1.3289437294006348, "learning_rate": 8.779060402684565e-06, "epoch": 3.4749401498185186, "step": 22500}
|
| 229 |
+
{"loss": 0.011, "grad_norm": 0.6527641415596008, "learning_rate": 8.72536912751678e-06, "epoch": 3.4903853579426984, "step": 22600}
|
| 230 |
+
{"loss": 0.0119, "grad_norm": 0.7414153218269348, "learning_rate": 8.671677852348995e-06, "epoch": 3.505830566066878, "step": 22700}
|
| 231 |
+
{"loss": 0.0104, "grad_norm": 0.38472020626068115, "learning_rate": 8.61798657718121e-06, "epoch": 3.521275774191057, "step": 22800}
|
| 232 |
+
{"loss": 0.0105, "grad_norm": 1.4406596422195435, "learning_rate": 8.564295302013424e-06, "epoch": 3.5367209823152366, "step": 22900}
|
| 233 |
+
{"loss": 0.0097, "grad_norm": 1.0406662225723267, "learning_rate": 8.510604026845637e-06, "epoch": 3.552166190439416, "step": 23000}
|
| 234 |
+
{"loss": 0.0113, "grad_norm": 0.6790725588798523, "learning_rate": 8.456912751677852e-06, "epoch": 3.5676113985635958, "step": 23100}
|
| 235 |
+
{"loss": 0.0096, "grad_norm": 0.7238831520080566, "learning_rate": 8.403221476510067e-06, "epoch": 3.583056606687775, "step": 23200}
|
| 236 |
+
{"loss": 0.0114, "grad_norm": 0.9182873368263245, "learning_rate": 8.349530201342284e-06, "epoch": 3.5985018148119545, "step": 23300}
|
| 237 |
+
{"loss": 0.0108, "grad_norm": 0.3223716616630554, "learning_rate": 8.295838926174498e-06, "epoch": 3.613947022936134, "step": 23400}
|
| 238 |
+
{"loss": 0.01, "grad_norm": 1.3225654363632202, "learning_rate": 8.242147651006711e-06, "epoch": 3.6293922310603133, "step": 23500}
|
| 239 |
+
{"loss": 0.0105, "grad_norm": 0.2384531944990158, "learning_rate": 8.188456375838926e-06, "epoch": 3.644837439184493, "step": 23600}
|
| 240 |
+
{"loss": 0.013, "grad_norm": 0.9148604273796082, "learning_rate": 8.134765100671141e-06, "epoch": 3.6602826473086725, "step": 23700}
|
| 241 |
+
{"loss": 0.0117, "grad_norm": 0.41117745637893677, "learning_rate": 8.081073825503356e-06, "epoch": 3.675727855432852, "step": 23800}
|
| 242 |
+
{"loss": 0.0101, "grad_norm": 1.2300703525543213, "learning_rate": 8.02738255033557e-06, "epoch": 3.6911730635570317, "step": 23900}
|
| 243 |
+
{"loss": 0.0122, "grad_norm": 1.2263121604919434, "learning_rate": 7.973691275167786e-06, "epoch": 3.706618271681211, "step": 24000}
|
| 244 |
+
{"loss": 0.0103, "grad_norm": 0.6322954893112183, "learning_rate": 7.92e-06, "epoch": 3.7220634798053904, "step": 24100}
|
| 245 |
+
{"loss": 0.0102, "grad_norm": 0.44400155544281006, "learning_rate": 7.866308724832215e-06, "epoch": 3.73750868792957, "step": 24200}
|
| 246 |
+
{"loss": 0.0101, "grad_norm": 0.6776632070541382, "learning_rate": 7.81261744966443e-06, "epoch": 3.752953896053749, "step": 24300}
|
| 247 |
+
{"loss": 0.0108, "grad_norm": 0.7691863179206848, "learning_rate": 7.758926174496645e-06, "epoch": 3.768399104177929, "step": 24400}
|
| 248 |
+
{"loss": 0.0104, "grad_norm": 1.2351208925247192, "learning_rate": 7.70523489932886e-06, "epoch": 3.7838443123021084, "step": 24500}
|
| 249 |
+
{"loss": 0.0104, "grad_norm": 0.3325941264629364, "learning_rate": 7.651543624161075e-06, "epoch": 3.7992895204262878, "step": 24600}
|
| 250 |
+
{"loss": 0.0108, "grad_norm": 0.2872284948825836, "learning_rate": 7.5978523489932885e-06, "epoch": 3.814734728550467, "step": 24700}
|
| 251 |
+
{"loss": 0.0098, "grad_norm": 0.6922308206558228, "learning_rate": 7.544161073825504e-06, "epoch": 3.8301799366746465, "step": 24800}
|
| 252 |
+
{"loss": 0.0113, "grad_norm": 0.9661895036697388, "learning_rate": 7.490469798657719e-06, "epoch": 3.8456251447988263, "step": 24900}
|
| 253 |
+
{"loss": 0.0097, "grad_norm": 1.0570372343063354, "learning_rate": 7.436778523489934e-06, "epoch": 3.8610703529230057, "step": 25000}
|
| 254 |
+
{"loss": 0.011, "grad_norm": 0.5973047018051147, "learning_rate": 7.383087248322149e-06, "epoch": 3.876515561047185, "step": 25100}
|
| 255 |
+
{"loss": 0.0087, "grad_norm": 0.9165611863136292, "learning_rate": 7.329395973154363e-06, "epoch": 3.8919607691713645, "step": 25200}
|
| 256 |
+
{"loss": 0.0106, "grad_norm": 1.2968380451202393, "learning_rate": 7.2757046979865774e-06, "epoch": 3.907405977295544, "step": 25300}
|
| 257 |
+
{"loss": 0.0108, "grad_norm": 0.8751053810119629, "learning_rate": 7.222013422818792e-06, "epoch": 3.9228511854197237, "step": 25400}
|
| 258 |
+
{"loss": 0.0093, "grad_norm": 0.5580429434776306, "learning_rate": 7.168322147651007e-06, "epoch": 3.938296393543903, "step": 25500}
|
| 259 |
+
{"loss": 0.0118, "grad_norm": 0.944416880607605, "learning_rate": 7.114630872483223e-06, "epoch": 3.9537416016680824, "step": 25600}
|
| 260 |
+
{"loss": 0.0102, "grad_norm": 0.4454007148742676, "learning_rate": 7.060939597315437e-06, "epoch": 3.969186809792262, "step": 25700}
|
| 261 |
+
{"loss": 0.0103, "grad_norm": 0.2635466754436493, "learning_rate": 7.0072483221476516e-06, "epoch": 3.984632017916441, "step": 25800}
|
| 262 |
+
{"loss": 0.0094, "grad_norm": 1.7829089164733887, "learning_rate": 6.953557046979866e-06, "epoch": 4.0, "step": 25900}
|
| 263 |
+
{"eval_loss": 0.08570433408021927, "eval_wer": 4.363855264870494, "eval_cer": 4.3785517775007525, "eval_runtime": 5893.5199, "eval_samples_per_second": 4.611, "eval_steps_per_second": 0.576, "epoch": 4.0, "step": 25900}
|
| 264 |
+
{"train_runtime": 61536.7575, "train_samples_per_second": 20.201, "train_steps_per_second": 0.631, "total_flos": 2.3915764729479168e+20, "train_loss": 0.07450532003949507, "epoch": 4.0, "step": 25900}
|
| 265 |
+
{"eval_loss": 0.10621041804552078, "eval_wer": 2.6982781026640676, "eval_cer": 2.950426138652145, "eval_runtime": 5892.3475, "eval_samples_per_second": 4.612, "eval_steps_per_second": 0.577, "epoch": 4.0, "step": 25900}
|