IonGrozea commited on
Commit
e43219b
·
verified ·
1 Parent(s): acd9a09

End of training

Browse files
Files changed (2) hide show
  1. all_results.json +4 -1
  2. training_log.jsonl +265 -57
all_results.json CHANGED
@@ -3,7 +3,10 @@
3
  "eval_cer": 2.950426138652145,
4
  "eval_loss": 0.10621041804552078,
5
  "eval_runtime": 5892.3475,
 
6
  "eval_samples_per_second": 4.612,
7
  "eval_steps_per_second": 0.577,
8
- "eval_wer": 2.6982781026640676
 
 
9
  }
 
3
  "eval_cer": 2.950426138652145,
4
  "eval_loss": 0.10621041804552078,
5
  "eval_runtime": 5892.3475,
6
+ "eval_samples": 27174,
7
  "eval_samples_per_second": 4.612,
8
  "eval_steps_per_second": 0.577,
9
+ "eval_wer": 2.6982781026640676,
10
+ "test_samples": 12987,
11
+ "train_samples": 207181
12
  }
training_log.jsonl CHANGED
@@ -1,57 +1,265 @@
1
- {"loss": 3.0903, "grad_norm": 52.09074401855469, "learning_rate": 1.2375e-06, "epoch": 0.07722007722007722, "step": 100}
2
- {"loss": 1.3555, "grad_norm": 15.287393569946289, "learning_rate": 2.4875000000000003e-06, "epoch": 0.15444015444015444, "step": 200}
3
- {"loss": 0.9605, "grad_norm": 9.833135604858398, "learning_rate": 3.7375000000000006e-06, "epoch": 0.23166023166023167, "step": 300}
4
- {"loss": 0.7983, "grad_norm": 9.241231918334961, "learning_rate": 4.987500000000001e-06, "epoch": 0.3088803088803089, "step": 400}
5
- {"loss": 0.6965, "grad_norm": 7.730131149291992, "learning_rate": 6.237500000000001e-06, "epoch": 0.3861003861003861, "step": 500}
6
- {"loss": 0.6305, "grad_norm": 7.762710094451904, "learning_rate": 7.487500000000001e-06, "epoch": 0.46332046332046334, "step": 600}
7
- {"loss": 0.5762, "grad_norm": 8.128190994262695, "learning_rate": 8.7375e-06, "epoch": 0.5405405405405406, "step": 700}
8
- {"loss": 0.5252, "grad_norm": 6.426775932312012, "learning_rate": 9.9875e-06, "epoch": 0.6177606177606177, "step": 800}
9
- {"loss": 0.4872, "grad_norm": 6.644321918487549, "learning_rate": 1.12375e-05, "epoch": 0.694980694980695, "step": 900}
10
- {"loss": 0.4524, "grad_norm": 6.043028831481934, "learning_rate": 1.2487500000000002e-05, "epoch": 0.7722007722007722, "step": 1000}
11
- {"loss": 0.4275, "grad_norm": 6.488497257232666, "learning_rate": 1.3737500000000002e-05, "epoch": 0.8494208494208494, "step": 1100}
12
- {"loss": 0.3971, "grad_norm": 6.440267086029053, "learning_rate": 1.4987500000000002e-05, "epoch": 0.9266409266409267, "step": 1200}
13
- {"eval_loss": 0.36673977971076965, "eval_wer": 1.6646976179832684, "eval_cer": 3.011137930490479, "eval_runtime": 729.5278, "eval_samples_per_second": 37.249, "eval_steps_per_second": 0.932, "epoch": 1.0, "step": 1295}
14
- {"loss": 0.3801, "grad_norm": 2.6387903690338135, "learning_rate": 1.62375e-05, "epoch": 1.0038610038610039, "step": 1300}
15
- {"loss": 0.3335, "grad_norm": 2.6088716983795166, "learning_rate": 1.74875e-05, "epoch": 1.0810810810810811, "step": 1400}
16
- {"loss": 0.3172, "grad_norm": 2.2746968269348145, "learning_rate": 1.87375e-05, "epoch": 1.1583011583011582, "step": 1500}
17
- {"loss": 0.3059, "grad_norm": 2.5048341751098633, "learning_rate": 1.99875e-05, "epoch": 1.2355212355212355, "step": 1600}
18
- {"loss": 0.2914, "grad_norm": 2.2925240993499756, "learning_rate": 1.967909238249595e-05, "epoch": 1.3127413127413128, "step": 1700}
19
- {"loss": 0.2813, "grad_norm": 2.320096492767334, "learning_rate": 1.9354943273905997e-05, "epoch": 1.3899613899613898, "step": 1800}
20
- {"loss": 0.2667, "grad_norm": 2.3951454162597656, "learning_rate": 1.9030794165316048e-05, "epoch": 1.4671814671814671, "step": 1900}
21
- {"loss": 0.258, "grad_norm": 2.2465109825134277, "learning_rate": 1.8706645056726096e-05, "epoch": 1.5444015444015444, "step": 2000}
22
- {"loss": 0.25, "grad_norm": 2.50040864944458, "learning_rate": 1.8382495948136143e-05, "epoch": 1.6216216216216215, "step": 2100}
23
- {"loss": 0.2446, "grad_norm": 2.0514800548553467, "learning_rate": 1.805834683954619e-05, "epoch": 1.698841698841699, "step": 2200}
24
- {"loss": 0.2344, "grad_norm": 2.195955276489258, "learning_rate": 1.7734197730956242e-05, "epoch": 1.776061776061776, "step": 2300}
25
- {"loss": 0.234, "grad_norm": 2.2508327960968018, "learning_rate": 1.741004862236629e-05, "epoch": 1.8532818532818531, "step": 2400}
26
- {"loss": 0.2235, "grad_norm": 1.9678505659103394, "learning_rate": 1.7085899513776337e-05, "epoch": 1.9305019305019306, "step": 2500}
27
- {"eval_loss": 0.23749442398548126, "eval_wer": 2.6866812227074237, "eval_cer": 3.252883800989044, "eval_runtime": 674.0293, "eval_samples_per_second": 40.316, "eval_steps_per_second": 1.009, "epoch": 2.0, "step": 2590}
28
- {"loss": 0.2162, "grad_norm": 2.063410997390747, "learning_rate": 1.676175040518639e-05, "epoch": 2.0077220077220077, "step": 2600}
29
- {"loss": 0.1878, "grad_norm": 2.169840097427368, "learning_rate": 1.6437601296596436e-05, "epoch": 2.0849420849420848, "step": 2700}
30
- {"loss": 0.1802, "grad_norm": 1.973263144493103, "learning_rate": 1.6113452188006484e-05, "epoch": 2.1621621621621623, "step": 2800}
31
- {"loss": 0.1787, "grad_norm": 2.0943984985351562, "learning_rate": 1.5789303079416535e-05, "epoch": 2.2393822393822393, "step": 2900}
32
- {"loss": 0.1773, "grad_norm": 2.225027084350586, "learning_rate": 1.546515397082658e-05, "epoch": 2.3166023166023164, "step": 3000}
33
- {"loss": 0.1697, "grad_norm": 2.071420907974243, "learning_rate": 1.514100486223663e-05, "epoch": 2.393822393822394, "step": 3100}
34
- {"loss": 0.1714, "grad_norm": 2.080707550048828, "learning_rate": 1.481685575364668e-05, "epoch": 2.471042471042471, "step": 3200}
35
- {"loss": 0.1671, "grad_norm": 2.005873441696167, "learning_rate": 1.4492706645056727e-05, "epoch": 2.5482625482625485, "step": 3300}
36
- {"loss": 0.1669, "grad_norm": 1.9529402256011963, "learning_rate": 1.4168557536466776e-05, "epoch": 2.6254826254826256, "step": 3400}
37
- {"loss": 0.1638, "grad_norm": 1.8977361917495728, "learning_rate": 1.3844408427876826e-05, "epoch": 2.7027027027027026, "step": 3500}
38
- {"loss": 0.1629, "grad_norm": 2.063570737838745, "learning_rate": 1.3520259319286872e-05, "epoch": 2.7799227799227797, "step": 3600}
39
- {"loss": 0.1566, "grad_norm": 1.9151242971420288, "learning_rate": 1.3196110210696921e-05, "epoch": 2.857142857142857, "step": 3700}
40
- {"loss": 0.1579, "grad_norm": 2.2624733448028564, "learning_rate": 1.287196110210697e-05, "epoch": 2.9343629343629343, "step": 3800}
41
- {"eval_loss": 0.20184487104415894, "eval_wer": 4.618768714422033, "eval_cer": 4.555486050294007, "eval_runtime": 673.067, "eval_samples_per_second": 40.373, "eval_steps_per_second": 1.01, "epoch": 3.0, "step": 3885}
42
- {"loss": 0.1534, "grad_norm": 1.8281205892562866, "learning_rate": 1.2547811993517018e-05, "epoch": 3.011583011583012, "step": 3900}
43
- {"loss": 0.1289, "grad_norm": 1.9231871366500854, "learning_rate": 1.2223662884927067e-05, "epoch": 3.088803088803089, "step": 4000}
44
- {"loss": 0.1302, "grad_norm": 1.9235191345214844, "learning_rate": 1.1899513776337117e-05, "epoch": 3.166023166023166, "step": 4100}
45
- {"loss": 0.1307, "grad_norm": 1.7773590087890625, "learning_rate": 1.1575364667747164e-05, "epoch": 3.2432432432432434, "step": 4200}
46
- {"loss": 0.1313, "grad_norm": 1.8975107669830322, "learning_rate": 1.1251215559157214e-05, "epoch": 3.3204633204633205, "step": 4300}
47
- {"loss": 0.1275, "grad_norm": 2.0632779598236084, "learning_rate": 1.0927066450567261e-05, "epoch": 3.3976833976833976, "step": 4400}
48
- {"loss": 0.1253, "grad_norm": 1.9733749628067017, "learning_rate": 1.060291734197731e-05, "epoch": 3.474903474903475, "step": 4500}
49
- {"loss": 0.1276, "grad_norm": 1.836798071861267, "learning_rate": 1.027876823338736e-05, "epoch": 3.552123552123552, "step": 4600}
50
- {"loss": 0.1266, "grad_norm": 1.928240180015564, "learning_rate": 9.954619124797408e-06, "epoch": 3.629343629343629, "step": 4700}
51
- {"loss": 0.1259, "grad_norm": 2.1545467376708984, "learning_rate": 9.630470016207455e-06, "epoch": 3.7065637065637067, "step": 4800}
52
- {"loss": 0.1233, "grad_norm": 2.007277011871338, "learning_rate": 9.306320907617505e-06, "epoch": 3.7837837837837838, "step": 4900}
53
- {"loss": 0.125, "grad_norm": 1.8559694290161133, "learning_rate": 8.982171799027554e-06, "epoch": 3.861003861003861, "step": 5000}
54
- {"loss": 0.1235, "grad_norm": 1.9122380018234253, "learning_rate": 8.658022690437602e-06, "epoch": 3.9382239382239383, "step": 5100}
55
- {"eval_loss": 0.18047837913036346, "eval_wer": 3.727931999865079, "eval_cer": 3.977300565676362, "eval_runtime": 673.899, "eval_samples_per_second": 40.324, "eval_steps_per_second": 1.009, "epoch": 4.0, "step": 5180}
56
- {"train_runtime": 13783.8582, "train_samples_per_second": 90.184, "train_steps_per_second": 0.564, "total_flos": 2.040226095218688e+19, "train_loss": 0.3482298602468719, "epoch": 4.0, "step": 5180}
57
- {"eval_loss": 0.36666831374168396, "eval_wer": 1.7215252071936318, "eval_cer": 3.0229437650696855, "eval_runtime": 672.2172, "eval_samples_per_second": 40.424, "eval_steps_per_second": 1.012, "epoch": 4.0, "step": 5180}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"loss": 2.3246, "grad_norm": 34.954978942871094, "learning_rate": 1.2375e-06, "epoch": 0.015445208124179472, "step": 100}
2
+ {"loss": 1.1027, "grad_norm": 22.29987144470215, "learning_rate": 2.4875000000000003e-06, "epoch": 0.030890416248358945, "step": 200}
3
+ {"loss": 0.7925, "grad_norm": 21.926483154296875, "learning_rate": 3.7375000000000006e-06, "epoch": 0.04633562437253842, "step": 300}
4
+ {"loss": 0.4957, "grad_norm": 14.501327514648438, "learning_rate": 4.987500000000001e-06, "epoch": 0.06178083249671789, "step": 400}
5
+ {"loss": 0.3257, "grad_norm": 10.838948249816895, "learning_rate": 6.237500000000001e-06, "epoch": 0.07722604062089737, "step": 500}
6
+ {"loss": 0.3014, "grad_norm": 10.551115036010742, "learning_rate": 7.487500000000001e-06, "epoch": 0.09267124874507685, "step": 600}
7
+ {"loss": 0.2735, "grad_norm": 9.680395126342773, "learning_rate": 8.7375e-06, "epoch": 0.10811645686925632, "step": 700}
8
+ {"loss": 0.2593, "grad_norm": 7.195150852203369, "learning_rate": 9.9875e-06, "epoch": 0.12356166499343578, "step": 800}
9
+ {"loss": 0.2491, "grad_norm": 9.643333435058594, "learning_rate": 1.12375e-05, "epoch": 0.13900687311761525, "step": 900}
10
+ {"loss": 0.2399, "grad_norm": 6.902649879455566, "learning_rate": 1.2487500000000002e-05, "epoch": 0.15445208124179474, "step": 1000}
11
+ {"loss": 0.2248, "grad_norm": 8.377968788146973, "learning_rate": 1.3737500000000002e-05, "epoch": 0.1698972893659742, "step": 1100}
12
+ {"loss": 0.2149, "grad_norm": 8.831914901733398, "learning_rate": 1.4987500000000002e-05, "epoch": 0.1853424974901537, "step": 1200}
13
+ {"loss": 0.2096, "grad_norm": 5.751692295074463, "learning_rate": 1.62375e-05, "epoch": 0.20078770561433315, "step": 1300}
14
+ {"loss": 0.2007, "grad_norm": 4.953655242919922, "learning_rate": 1.74875e-05, "epoch": 0.21623291373851264, "step": 1400}
15
+ {"loss": 0.198, "grad_norm": 7.7289605140686035, "learning_rate": 1.87375e-05, "epoch": 0.2316781218626921, "step": 1500}
16
+ {"loss": 0.1961, "grad_norm": 7.267148017883301, "learning_rate": 1.99875e-05, "epoch": 0.24712332998687156, "step": 1600}
17
+ {"loss": 0.1937, "grad_norm": 8.58221435546875, "learning_rate": 1.9946845637583895e-05, "epoch": 0.262568538111051, "step": 1700}
18
+ {"loss": 0.1791, "grad_norm": 6.28731107711792, "learning_rate": 1.989315436241611e-05, "epoch": 0.2780137462352305, "step": 1800}
19
+ {"loss": 0.1787, "grad_norm": 6.678342342376709, "learning_rate": 1.983946308724832e-05, "epoch": 0.29345895435941, "step": 1900}
20
+ {"loss": 0.1672, "grad_norm": 7.137376308441162, "learning_rate": 1.978577181208054e-05, "epoch": 0.3089041624835895, "step": 2000}
21
+ {"loss": 0.1679, "grad_norm": 4.73854398727417, "learning_rate": 1.9732080536912754e-05, "epoch": 0.3243493706077689, "step": 2100}
22
+ {"loss": 0.1643, "grad_norm": 7.315737247467041, "learning_rate": 1.967838926174497e-05, "epoch": 0.3397945787319484, "step": 2200}
23
+ {"loss": 0.1566, "grad_norm": 5.333303928375244, "learning_rate": 1.9624697986577184e-05, "epoch": 0.3552397868561279, "step": 2300}
24
+ {"loss": 0.1566, "grad_norm": 7.0448150634765625, "learning_rate": 1.95710067114094e-05, "epoch": 0.3706849949803074, "step": 2400}
25
+ {"loss": 0.1486, "grad_norm": 6.843142032623291, "learning_rate": 1.9517315436241614e-05, "epoch": 0.3861302031044868, "step": 2500}
26
+ {"loss": 0.1515, "grad_norm": 8.338215827941895, "learning_rate": 1.9463624161073825e-05, "epoch": 0.4015754112286663, "step": 2600}
27
+ {"loss": 0.1392, "grad_norm": 6.6149444580078125, "learning_rate": 1.9409932885906043e-05, "epoch": 0.4170206193528458, "step": 2700}
28
+ {"loss": 0.1388, "grad_norm": 6.691015243530273, "learning_rate": 1.9356241610738255e-05, "epoch": 0.4324658274770253, "step": 2800}
29
+ {"loss": 0.1411, "grad_norm": 7.022567272186279, "learning_rate": 1.9302550335570473e-05, "epoch": 0.4479110356012047, "step": 2900}
30
+ {"loss": 0.138, "grad_norm": 6.332781791687012, "learning_rate": 1.9248859060402688e-05, "epoch": 0.4633562437253842, "step": 3000}
31
+ {"loss": 0.1346, "grad_norm": 5.324976444244385, "learning_rate": 1.91951677852349e-05, "epoch": 0.4788014518495637, "step": 3100}
32
+ {"loss": 0.1348, "grad_norm": 6.0165114402771, "learning_rate": 1.9141476510067117e-05, "epoch": 0.4942466599737431, "step": 3200}
33
+ {"loss": 0.1329, "grad_norm": 4.956530570983887, "learning_rate": 1.908778523489933e-05, "epoch": 0.5096918680979227, "step": 3300}
34
+ {"loss": 0.1268, "grad_norm": 4.433960437774658, "learning_rate": 1.9034093959731547e-05, "epoch": 0.525137076222102, "step": 3400}
35
+ {"loss": 0.1275, "grad_norm": 4.740108489990234, "learning_rate": 1.898040268456376e-05, "epoch": 0.5405822843462815, "step": 3500}
36
+ {"loss": 0.1239, "grad_norm": 5.591785907745361, "learning_rate": 1.8926711409395973e-05, "epoch": 0.556027492470461, "step": 3600}
37
+ {"loss": 0.1269, "grad_norm": 4.656253814697266, "learning_rate": 1.887302013422819e-05, "epoch": 0.5714727005946405, "step": 3700}
38
+ {"loss": 0.1214, "grad_norm": 4.906614303588867, "learning_rate": 1.8819328859060403e-05, "epoch": 0.58691790871882, "step": 3800}
39
+ {"loss": 0.1316, "grad_norm": 5.207762718200684, "learning_rate": 1.876563758389262e-05, "epoch": 0.6023631168429995, "step": 3900}
40
+ {"loss": 0.1151, "grad_norm": 6.727694034576416, "learning_rate": 1.8711946308724833e-05, "epoch": 0.617808324967179, "step": 4000}
41
+ {"loss": 0.1187, "grad_norm": 6.817234992980957, "learning_rate": 1.8658255033557047e-05, "epoch": 0.6332535330913585, "step": 4100}
42
+ {"loss": 0.1201, "grad_norm": 5.209933280944824, "learning_rate": 1.8604563758389262e-05, "epoch": 0.6486987412155378, "step": 4200}
43
+ {"loss": 0.1074, "grad_norm": 4.433166980743408, "learning_rate": 1.8550872483221477e-05, "epoch": 0.6641439493397173, "step": 4300}
44
+ {"loss": 0.1182, "grad_norm": 5.06485652923584, "learning_rate": 1.8497181208053695e-05, "epoch": 0.6795891574638968, "step": 4400}
45
+ {"loss": 0.1062, "grad_norm": 6.160850524902344, "learning_rate": 1.8443489932885907e-05, "epoch": 0.6950343655880763, "step": 4500}
46
+ {"loss": 0.1137, "grad_norm": 5.975137710571289, "learning_rate": 1.838979865771812e-05, "epoch": 0.7104795737122558, "step": 4600}
47
+ {"loss": 0.1047, "grad_norm": 5.255064964294434, "learning_rate": 1.8336107382550336e-05, "epoch": 0.7259247818364353, "step": 4700}
48
+ {"loss": 0.1079, "grad_norm": 4.182297229766846, "learning_rate": 1.828241610738255e-05, "epoch": 0.7413699899606148, "step": 4800}
49
+ {"loss": 0.104, "grad_norm": 3.115267515182495, "learning_rate": 1.8228724832214766e-05, "epoch": 0.7568151980847941, "step": 4900}
50
+ {"loss": 0.1025, "grad_norm": 4.368956089019775, "learning_rate": 1.817503355704698e-05, "epoch": 0.7722604062089736, "step": 5000}
51
+ {"loss": 0.098, "grad_norm": 5.158085346221924, "learning_rate": 1.8121342281879196e-05, "epoch": 0.7877056143331531, "step": 5100}
52
+ {"loss": 0.103, "grad_norm": 5.80360746383667, "learning_rate": 1.806765100671141e-05, "epoch": 0.8031508224573326, "step": 5200}
53
+ {"loss": 0.1097, "grad_norm": 6.364624977111816, "learning_rate": 1.8013959731543625e-05, "epoch": 0.8185960305815121, "step": 5300}
54
+ {"loss": 0.1009, "grad_norm": 6.2352495193481445, "learning_rate": 1.796026845637584e-05, "epoch": 0.8340412387056916, "step": 5400}
55
+ {"loss": 0.1087, "grad_norm": 5.61583948135376, "learning_rate": 1.7906577181208055e-05, "epoch": 0.8494864468298711, "step": 5500}
56
+ {"loss": 0.099, "grad_norm": 5.854990482330322, "learning_rate": 1.785288590604027e-05, "epoch": 0.8649316549540506, "step": 5600}
57
+ {"loss": 0.0982, "grad_norm": 5.854545593261719, "learning_rate": 1.7799194630872485e-05, "epoch": 0.8803768630782299, "step": 5700}
58
+ {"loss": 0.094, "grad_norm": 5.2870612144470215, "learning_rate": 1.77455033557047e-05, "epoch": 0.8958220712024094, "step": 5800}
59
+ {"loss": 0.0924, "grad_norm": 4.898568630218506, "learning_rate": 1.7691812080536914e-05, "epoch": 0.9112672793265889, "step": 5900}
60
+ {"loss": 0.1027, "grad_norm": 4.1877923011779785, "learning_rate": 1.763812080536913e-05, "epoch": 0.9267124874507684, "step": 6000}
61
+ {"loss": 0.0939, "grad_norm": 3.115177631378174, "learning_rate": 1.7584429530201344e-05, "epoch": 0.9421576955749479, "step": 6100}
62
+ {"loss": 0.0987, "grad_norm": 5.382924556732178, "learning_rate": 1.753073825503356e-05, "epoch": 0.9576029036991274, "step": 6200}
63
+ {"loss": 0.0937, "grad_norm": 4.143594264984131, "learning_rate": 1.7477046979865774e-05, "epoch": 0.9730481118233069, "step": 6300}
64
+ {"loss": 0.0873, "grad_norm": 5.049436092376709, "learning_rate": 1.742335570469799e-05, "epoch": 0.9884933199474862, "step": 6400}
65
+ {"eval_loss": 0.10621471703052521, "eval_wer": 2.8609889027513296, "eval_cer": 2.990143485705616, "eval_runtime": 6156.5377, "eval_samples_per_second": 4.414, "eval_steps_per_second": 0.552, "epoch": 1.0, "step": 6475}
66
+ {"loss": 0.0893, "grad_norm": 1.4537144899368286, "learning_rate": 1.7369664429530203e-05, "epoch": 1.003861302031045, "step": 6500}
67
+ {"loss": 0.0585, "grad_norm": 1.554871916770935, "learning_rate": 1.7315973154362418e-05, "epoch": 1.0193065101552243, "step": 6600}
68
+ {"loss": 0.0578, "grad_norm": 0.8061501979827881, "learning_rate": 1.7262281879194633e-05, "epoch": 1.0347517182794037, "step": 6700}
69
+ {"loss": 0.0554, "grad_norm": 1.4121989011764526, "learning_rate": 1.7208590604026848e-05, "epoch": 1.0501969264035833, "step": 6800}
70
+ {"loss": 0.0559, "grad_norm": 1.322690725326538, "learning_rate": 1.7154899328859062e-05, "epoch": 1.0656421345277627, "step": 6900}
71
+ {"loss": 0.0569, "grad_norm": 0.8532437086105347, "learning_rate": 1.7101208053691277e-05, "epoch": 1.0810873426519423, "step": 7000}
72
+ {"loss": 0.0532, "grad_norm": 1.3657441139221191, "learning_rate": 1.7047516778523492e-05, "epoch": 1.0965325507761217, "step": 7100}
73
+ {"loss": 0.057, "grad_norm": 1.5799016952514648, "learning_rate": 1.6993825503355707e-05, "epoch": 1.1119777589003013, "step": 7200}
74
+ {"loss": 0.0541, "grad_norm": 1.6521095037460327, "learning_rate": 1.6940134228187922e-05, "epoch": 1.1274229670244806, "step": 7300}
75
+ {"loss": 0.0562, "grad_norm": 1.3745259046554565, "learning_rate": 1.6886442953020133e-05, "epoch": 1.14286817514866, "step": 7400}
76
+ {"loss": 0.0525, "grad_norm": 1.4325778484344482, "learning_rate": 1.683275167785235e-05, "epoch": 1.1583133832728396, "step": 7500}
77
+ {"loss": 0.0525, "grad_norm": 2.0049407482147217, "learning_rate": 1.6779060402684566e-05, "epoch": 1.173758591397019, "step": 7600}
78
+ {"loss": 0.0523, "grad_norm": 1.5848065614700317, "learning_rate": 1.672536912751678e-05, "epoch": 1.1892037995211986, "step": 7700}
79
+ {"loss": 0.0571, "grad_norm": 1.4551335573196411, "learning_rate": 1.6671677852348996e-05, "epoch": 1.204649007645378, "step": 7800}
80
+ {"loss": 0.0572, "grad_norm": 1.6041462421417236, "learning_rate": 1.6617986577181207e-05, "epoch": 1.2200942157695576, "step": 7900}
81
+ {"loss": 0.0574, "grad_norm": 2.061491012573242, "learning_rate": 1.6564295302013426e-05, "epoch": 1.235539423893737, "step": 8000}
82
+ {"loss": 0.0545, "grad_norm": 1.7531427145004272, "learning_rate": 1.6510604026845637e-05, "epoch": 1.2509846320179165, "step": 8100}
83
+ {"loss": 0.0544, "grad_norm": 1.1294121742248535, "learning_rate": 1.6456912751677855e-05, "epoch": 1.266429840142096, "step": 8200}
84
+ {"loss": 0.0588, "grad_norm": 1.2551779747009277, "learning_rate": 1.640322147651007e-05, "epoch": 1.2818750482662753, "step": 8300}
85
+ {"loss": 0.0536, "grad_norm": 1.8939077854156494, "learning_rate": 1.634953020134228e-05, "epoch": 1.297320256390455, "step": 8400}
86
+ {"loss": 0.0629, "grad_norm": 0.9555742144584656, "learning_rate": 1.62958389261745e-05, "epoch": 1.3127654645146343, "step": 8500}
87
+ {"loss": 0.0565, "grad_norm": 1.3192559480667114, "learning_rate": 1.624214765100671e-05, "epoch": 1.3282106726388139, "step": 8600}
88
+ {"loss": 0.052, "grad_norm": 1.640885353088379, "learning_rate": 1.618845637583893e-05, "epoch": 1.3436558807629932, "step": 8700}
89
+ {"loss": 0.0544, "grad_norm": 1.0616875886917114, "learning_rate": 1.613476510067114e-05, "epoch": 1.3591010888871726, "step": 8800}
90
+ {"loss": 0.0502, "grad_norm": 1.1386332511901855, "learning_rate": 1.6081073825503356e-05, "epoch": 1.3745462970113522, "step": 8900}
91
+ {"loss": 0.0495, "grad_norm": 1.1952922344207764, "learning_rate": 1.6027382550335574e-05, "epoch": 1.3899915051355318, "step": 9000}
92
+ {"loss": 0.0546, "grad_norm": 0.9143213629722595, "learning_rate": 1.5973691275167785e-05, "epoch": 1.4054367132597112, "step": 9100}
93
+ {"loss": 0.053, "grad_norm": 1.0954219102859497, "learning_rate": 1.5920000000000003e-05, "epoch": 1.4208819213838906, "step": 9200}
94
+ {"loss": 0.0565, "grad_norm": 1.3947011232376099, "learning_rate": 1.5866308724832215e-05, "epoch": 1.4363271295080702, "step": 9300}
95
+ {"loss": 0.0536, "grad_norm": 1.048740029335022, "learning_rate": 1.581261744966443e-05, "epoch": 1.4517723376322496, "step": 9400}
96
+ {"loss": 0.055, "grad_norm": 1.1906921863555908, "learning_rate": 1.5758926174496645e-05, "epoch": 1.4672175457564292, "step": 9500}
97
+ {"loss": 0.0525, "grad_norm": 1.979690432548523, "learning_rate": 1.570523489932886e-05, "epoch": 1.4826627538806085, "step": 9600}
98
+ {"loss": 0.0522, "grad_norm": 1.6308488845825195, "learning_rate": 1.5651543624161074e-05, "epoch": 1.498107962004788, "step": 9700}
99
+ {"loss": 0.0517, "grad_norm": 1.525758147239685, "learning_rate": 1.559785234899329e-05, "epoch": 1.5135531701289675, "step": 9800}
100
+ {"loss": 0.0551, "grad_norm": 1.4599488973617554, "learning_rate": 1.5544161073825507e-05, "epoch": 1.528998378253147, "step": 9900}
101
+ {"loss": 0.0507, "grad_norm": 1.186352014541626, "learning_rate": 1.549046979865772e-05, "epoch": 1.5444435863773265, "step": 10000}
102
+ {"loss": 0.0525, "grad_norm": 0.6791055798530579, "learning_rate": 1.5436778523489933e-05, "epoch": 1.5598887945015059, "step": 10100}
103
+ {"loss": 0.0567, "grad_norm": 1.0277873277664185, "learning_rate": 1.5383087248322148e-05, "epoch": 1.5753340026256852, "step": 10200}
104
+ {"loss": 0.0543, "grad_norm": 1.0918126106262207, "learning_rate": 1.5329395973154363e-05, "epoch": 1.5907792107498648, "step": 10300}
105
+ {"loss": 0.0486, "grad_norm": 1.1759934425354004, "learning_rate": 1.5275704697986578e-05, "epoch": 1.6062244188740444, "step": 10400}
106
+ {"loss": 0.0543, "grad_norm": 1.0399106740951538, "learning_rate": 1.5222013422818793e-05, "epoch": 1.6216696269982238, "step": 10500}
107
+ {"loss": 0.0515, "grad_norm": 1.9203470945358276, "learning_rate": 1.516832214765101e-05, "epoch": 1.6371148351224032, "step": 10600}
108
+ {"loss": 0.0543, "grad_norm": 2.4237537384033203, "learning_rate": 1.5114630872483222e-05, "epoch": 1.6525600432465828, "step": 10700}
109
+ {"loss": 0.0466, "grad_norm": 1.7403265237808228, "learning_rate": 1.5060939597315439e-05, "epoch": 1.6680052513707624, "step": 10800}
110
+ {"loss": 0.057, "grad_norm": 1.2284775972366333, "learning_rate": 1.5007248322147652e-05, "epoch": 1.6834504594949418, "step": 10900}
111
+ {"loss": 0.0532, "grad_norm": 1.6382396221160889, "learning_rate": 1.4953557046979867e-05, "epoch": 1.6988956676191211, "step": 11000}
112
+ {"loss": 0.0503, "grad_norm": 0.968417227268219, "learning_rate": 1.4899865771812082e-05, "epoch": 1.7143408757433005, "step": 11100}
113
+ {"loss": 0.0522, "grad_norm": 0.697189450263977, "learning_rate": 1.4846174496644297e-05, "epoch": 1.7297860838674801, "step": 11200}
114
+ {"loss": 0.0539, "grad_norm": 2.441664695739746, "learning_rate": 1.479248322147651e-05, "epoch": 1.7452312919916597, "step": 11300}
115
+ {"loss": 0.0535, "grad_norm": 2.1173808574676514, "learning_rate": 1.4738791946308726e-05, "epoch": 1.760676500115839, "step": 11400}
116
+ {"loss": 0.0533, "grad_norm": 1.1768113374710083, "learning_rate": 1.4685100671140941e-05, "epoch": 1.7761217082400185, "step": 11500}
117
+ {"loss": 0.055, "grad_norm": 1.0143743753433228, "learning_rate": 1.4631409395973156e-05, "epoch": 1.7915669163641978, "step": 11600}
118
+ {"loss": 0.0487, "grad_norm": 1.4157536029815674, "learning_rate": 1.457771812080537e-05, "epoch": 1.8070121244883774, "step": 11700}
119
+ {"loss": 0.0463, "grad_norm": 1.8482532501220703, "learning_rate": 1.4524026845637584e-05, "epoch": 1.822457332612557, "step": 11800}
120
+ {"loss": 0.0483, "grad_norm": 1.1774625778198242, "learning_rate": 1.44703355704698e-05, "epoch": 1.8379025407367364, "step": 11900}
121
+ {"loss": 0.0527, "grad_norm": 2.0068328380584717, "learning_rate": 1.4416644295302013e-05, "epoch": 1.8533477488609158, "step": 12000}
122
+ {"loss": 0.0509, "grad_norm": 1.3660190105438232, "learning_rate": 1.436295302013423e-05, "epoch": 1.8687929569850954, "step": 12100}
123
+ {"loss": 0.0486, "grad_norm": 1.4517139196395874, "learning_rate": 1.4309261744966445e-05, "epoch": 1.884238165109275, "step": 12200}
124
+ {"loss": 0.0468, "grad_norm": 1.676147699356079, "learning_rate": 1.4255570469798658e-05, "epoch": 1.8996833732334544, "step": 12300}
125
+ {"loss": 0.0482, "grad_norm": 1.2899450063705444, "learning_rate": 1.4201879194630874e-05, "epoch": 1.9151285813576338, "step": 12400}
126
+ {"loss": 0.0487, "grad_norm": 0.5366058945655823, "learning_rate": 1.4148187919463088e-05, "epoch": 1.9305737894818131, "step": 12500}
127
+ {"loss": 0.0449, "grad_norm": 1.4144234657287598, "learning_rate": 1.4094496644295304e-05, "epoch": 1.9460189976059927, "step": 12600}
128
+ {"loss": 0.0486, "grad_norm": 1.9612187147140503, "learning_rate": 1.4040805369127517e-05, "epoch": 1.9614642057301723, "step": 12700}
129
+ {"loss": 0.0482, "grad_norm": 2.1188743114471436, "learning_rate": 1.3987114093959732e-05, "epoch": 1.9769094138543517, "step": 12800}
130
+ {"loss": 0.0493, "grad_norm": 2.2827799320220947, "learning_rate": 1.3933422818791949e-05, "epoch": 1.992354621978531, "step": 12900}
131
+ {"eval_loss": 0.08518411964178085, "eval_wer": 3.8648850742872005, "eval_cer": 3.7046732368471686, "eval_runtime": 5892.2601, "eval_samples_per_second": 4.612, "eval_steps_per_second": 0.577, "epoch": 2.0, "step": 12950}
132
+ {"loss": 0.0333, "grad_norm": 1.141129493713379, "learning_rate": 1.3879731543624162e-05, "epoch": 2.00772260406209, "step": 13000}
133
+ {"loss": 0.0212, "grad_norm": 1.739894151687622, "learning_rate": 1.3826040268456378e-05, "epoch": 2.0231678121862693, "step": 13100}
134
+ {"loss": 0.0228, "grad_norm": 2.4176976680755615, "learning_rate": 1.3772348993288591e-05, "epoch": 2.0386130203104487, "step": 13200}
135
+ {"loss": 0.0219, "grad_norm": 2.188455581665039, "learning_rate": 1.3718657718120806e-05, "epoch": 2.054058228434628, "step": 13300}
136
+ {"loss": 0.0232, "grad_norm": 1.5634980201721191, "learning_rate": 1.3664966442953021e-05, "epoch": 2.0695034365588074, "step": 13400}
137
+ {"loss": 0.0238, "grad_norm": 0.5609109997749329, "learning_rate": 1.3611275167785236e-05, "epoch": 2.0849486446829872, "step": 13500}
138
+ {"loss": 0.0211, "grad_norm": 2.691328763961792, "learning_rate": 1.3557583892617449e-05, "epoch": 2.1003938528071666, "step": 13600}
139
+ {"loss": 0.0247, "grad_norm": 1.5539088249206543, "learning_rate": 1.3503892617449665e-05, "epoch": 2.115839060931346, "step": 13700}
140
+ {"loss": 0.0261, "grad_norm": 3.458829641342163, "learning_rate": 1.3450201342281882e-05, "epoch": 2.1312842690555254, "step": 13800}
141
+ {"loss": 0.0238, "grad_norm": 2.3176066875457764, "learning_rate": 1.3396510067114095e-05, "epoch": 2.146729477179705, "step": 13900}
142
+ {"loss": 0.0243, "grad_norm": 3.316974401473999, "learning_rate": 1.334281879194631e-05, "epoch": 2.1621746853038846, "step": 14000}
143
+ {"loss": 0.024, "grad_norm": 6.100398063659668, "learning_rate": 1.3289127516778523e-05, "epoch": 2.177619893428064, "step": 14100}
144
+ {"loss": 0.0238, "grad_norm": 2.5312767028808594, "learning_rate": 1.323543624161074e-05, "epoch": 2.1930651015522433, "step": 14200}
145
+ {"loss": 0.0255, "grad_norm": 1.7092468738555908, "learning_rate": 1.3181744966442953e-05, "epoch": 2.2085103096764227, "step": 14300}
146
+ {"loss": 0.0273, "grad_norm": 2.5337600708007812, "learning_rate": 1.312805369127517e-05, "epoch": 2.2239555178006025, "step": 14400}
147
+ {"loss": 0.0222, "grad_norm": 2.240237236022949, "learning_rate": 1.3074362416107384e-05, "epoch": 2.239400725924782, "step": 14500}
148
+ {"loss": 0.0247, "grad_norm": 1.514408826828003, "learning_rate": 1.3020671140939599e-05, "epoch": 2.2548459340489613, "step": 14600}
149
+ {"loss": 0.0247, "grad_norm": 3.8781371116638184, "learning_rate": 1.2966979865771814e-05, "epoch": 2.2702911421731407, "step": 14700}
150
+ {"loss": 0.0235, "grad_norm": 1.4282342195510864, "learning_rate": 1.2913288590604027e-05, "epoch": 2.28573635029732, "step": 14800}
151
+ {"loss": 0.0238, "grad_norm": 3.004528045654297, "learning_rate": 1.2859597315436243e-05, "epoch": 2.3011815584215, "step": 14900}
152
+ {"loss": 0.0235, "grad_norm": 2.147857666015625, "learning_rate": 1.2805906040268456e-05, "epoch": 2.3166267665456792, "step": 15000}
153
+ {"loss": 0.027, "grad_norm": 2.3109426498413086, "learning_rate": 1.2752214765100673e-05, "epoch": 2.3320719746698586, "step": 15100}
154
+ {"loss": 0.022, "grad_norm": 1.1319489479064941, "learning_rate": 1.2698523489932888e-05, "epoch": 2.347517182794038, "step": 15200}
155
+ {"loss": 0.021, "grad_norm": 1.3498146533966064, "learning_rate": 1.2644832214765101e-05, "epoch": 2.362962390918218, "step": 15300}
156
+ {"loss": 0.0228, "grad_norm": 2.1282238960266113, "learning_rate": 1.2591140939597317e-05, "epoch": 2.378407599042397, "step": 15400}
157
+ {"loss": 0.0262, "grad_norm": 2.113837957382202, "learning_rate": 1.253744966442953e-05, "epoch": 2.3938528071665766, "step": 15500}
158
+ {"loss": 0.0227, "grad_norm": 2.8750193119049072, "learning_rate": 1.2483758389261747e-05, "epoch": 2.409298015290756, "step": 15600}
159
+ {"loss": 0.0231, "grad_norm": 1.7577595710754395, "learning_rate": 1.243006711409396e-05, "epoch": 2.4247432234149358, "step": 15700}
160
+ {"loss": 0.0241, "grad_norm": 3.5743813514709473, "learning_rate": 1.2376375838926175e-05, "epoch": 2.440188431539115, "step": 15800}
161
+ {"loss": 0.022, "grad_norm": 1.040603756904602, "learning_rate": 1.232268456375839e-05, "epoch": 2.4556336396632945, "step": 15900}
162
+ {"loss": 0.0231, "grad_norm": 2.63417649269104, "learning_rate": 1.2268993288590605e-05, "epoch": 2.471078847787474, "step": 16000}
163
+ {"loss": 0.0249, "grad_norm": 3.670100450515747, "learning_rate": 1.2215302013422821e-05, "epoch": 2.4865240559116533, "step": 16100}
164
+ {"loss": 0.0259, "grad_norm": 5.80275297164917, "learning_rate": 1.2161610738255034e-05, "epoch": 2.501969264035833, "step": 16200}
165
+ {"loss": 0.0252, "grad_norm": 2.539285659790039, "learning_rate": 1.210791946308725e-05, "epoch": 2.5174144721600125, "step": 16300}
166
+ {"loss": 0.0241, "grad_norm": Infinity, "learning_rate": 1.2054228187919464e-05, "epoch": 2.532859680284192, "step": 16400}
167
+ {"loss": 0.0233, "grad_norm": 2.7852771282196045, "learning_rate": 1.2000536912751679e-05, "epoch": 2.548304888408371, "step": 16500}
168
+ {"loss": 0.0248, "grad_norm": 1.3278162479400635, "learning_rate": 1.1946845637583892e-05, "epoch": 2.5637500965325506, "step": 16600}
169
+ {"loss": 0.0242, "grad_norm": 1.2023398876190186, "learning_rate": 1.1893154362416108e-05, "epoch": 2.5791953046567304, "step": 16700}
170
+ {"loss": 0.0244, "grad_norm": 4.091363430023193, "learning_rate": 1.1839463087248323e-05, "epoch": 2.59464051278091, "step": 16800}
171
+ {"loss": 0.0212, "grad_norm": 2.2210638523101807, "learning_rate": 1.1785771812080538e-05, "epoch": 2.610085720905089, "step": 16900}
172
+ {"loss": 0.0264, "grad_norm": 1.3215293884277344, "learning_rate": 1.1732080536912753e-05, "epoch": 2.6255309290292685, "step": 17000}
173
+ {"loss": 0.0253, "grad_norm": 1.8649264574050903, "learning_rate": 1.1678389261744966e-05, "epoch": 2.640976137153448, "step": 17100}
174
+ {"loss": 0.0255, "grad_norm": 2.791181802749634, "learning_rate": 1.1624697986577183e-05, "epoch": 2.6564213452776277, "step": 17200}
175
+ {"loss": 0.0261, "grad_norm": 3.361290216445923, "learning_rate": 1.1571006711409396e-05, "epoch": 2.671866553401807, "step": 17300}
176
+ {"loss": 0.0234, "grad_norm": 1.8036214113235474, "learning_rate": 1.1517315436241612e-05, "epoch": 2.6873117615259865, "step": 17400}
177
+ {"loss": 0.0238, "grad_norm": 1.8118869066238403, "learning_rate": 1.1463624161073827e-05, "epoch": 2.702756969650166, "step": 17500}
178
+ {"loss": 0.0212, "grad_norm": 3.0242505073547363, "learning_rate": 1.140993288590604e-05, "epoch": 2.7182021777743453, "step": 17600}
179
+ {"loss": 0.0244, "grad_norm": 3.848574161529541, "learning_rate": 1.1356241610738257e-05, "epoch": 2.733647385898525, "step": 17700}
180
+ {"loss": 0.0232, "grad_norm": 2.7855026721954346, "learning_rate": 1.130255033557047e-05, "epoch": 2.7490925940227044, "step": 17800}
181
+ {"loss": 0.025, "grad_norm": 3.0796775817871094, "learning_rate": 1.1248859060402686e-05, "epoch": 2.764537802146884, "step": 17900}
182
+ {"loss": 0.0232, "grad_norm": 0.9707315564155579, "learning_rate": 1.11951677852349e-05, "epoch": 2.7799830102710636, "step": 18000}
183
+ {"loss": 0.0258, "grad_norm": 1.9708441495895386, "learning_rate": 1.1141476510067114e-05, "epoch": 2.795428218395243, "step": 18100}
184
+ {"loss": 0.0231, "grad_norm": 0.9377394318580627, "learning_rate": 1.108778523489933e-05, "epoch": 2.8108734265194224, "step": 18200}
185
+ {"loss": 0.0211, "grad_norm": 3.1204702854156494, "learning_rate": 1.1034093959731544e-05, "epoch": 2.826318634643602, "step": 18300}
186
+ {"loss": 0.0228, "grad_norm": 2.8107941150665283, "learning_rate": 1.098040268456376e-05, "epoch": 2.841763842767781, "step": 18400}
187
+ {"loss": 0.0254, "grad_norm": 2.400878667831421, "learning_rate": 1.0926711409395974e-05, "epoch": 2.857209050891961, "step": 18500}
188
+ {"loss": 0.0242, "grad_norm": 0.6793500781059265, "learning_rate": 1.087302013422819e-05, "epoch": 2.8726542590161404, "step": 18600}
189
+ {"loss": 0.0225, "grad_norm": 2.416689872741699, "learning_rate": 1.0819328859060403e-05, "epoch": 2.8880994671403197, "step": 18700}
190
+ {"loss": 0.0243, "grad_norm": 2.974879026412964, "learning_rate": 1.0765637583892618e-05, "epoch": 2.903544675264499, "step": 18800}
191
+ {"loss": 0.0225, "grad_norm": 2.052161455154419, "learning_rate": 1.0711946308724833e-05, "epoch": 2.9189898833886785, "step": 18900}
192
+ {"loss": 0.0217, "grad_norm": 2.398977518081665, "learning_rate": 1.0658255033557048e-05, "epoch": 2.9344350915128583, "step": 19000}
193
+ {"loss": 0.0231, "grad_norm": 1.0758559703826904, "learning_rate": 1.0604563758389264e-05, "epoch": 2.9498802996370377, "step": 19100}
194
+ {"loss": 0.0211, "grad_norm": 2.3403520584106445, "learning_rate": 1.0550872483221477e-05, "epoch": 2.965325507761217, "step": 19200}
195
+ {"loss": 0.0251, "grad_norm": 6.15524435043335, "learning_rate": 1.0497181208053692e-05, "epoch": 2.9807707158853964, "step": 19300}
196
+ {"loss": 0.023, "grad_norm": 3.1562724113464355, "learning_rate": 1.0443489932885907e-05, "epoch": 2.996215924009576, "step": 19400}
197
+ {"eval_loss": 0.07923509925603867, "eval_wer": 4.4319838736771375, "eval_cer": 4.22036403494204, "eval_runtime": 5893.7538, "eval_samples_per_second": 4.611, "eval_steps_per_second": 0.576, "epoch": 3.0, "step": 19425}
198
+ {"loss": 0.0142, "grad_norm": 0.7176849246025085, "learning_rate": 1.0389798657718122e-05, "epoch": 3.0115839060931346, "step": 19500}
199
+ {"loss": 0.0089, "grad_norm": 1.074440836906433, "learning_rate": 1.0336107382550335e-05, "epoch": 3.027029114217314, "step": 19600}
200
+ {"loss": 0.0088, "grad_norm": 0.862775444984436, "learning_rate": 1.0282416107382552e-05, "epoch": 3.0424743223414934, "step": 19700}
201
+ {"loss": 0.0084, "grad_norm": 0.2428913414478302, "learning_rate": 1.0228724832214766e-05, "epoch": 3.057919530465673, "step": 19800}
202
+ {"loss": 0.0104, "grad_norm": 0.22252975404262543, "learning_rate": 1.0175033557046981e-05, "epoch": 3.0733647385898526, "step": 19900}
203
+ {"loss": 0.0089, "grad_norm": 0.16029539704322815, "learning_rate": 1.0121342281879196e-05, "epoch": 3.088809946714032, "step": 20000}
204
+ {"loss": 0.0106, "grad_norm": 0.16912202537059784, "learning_rate": 1.0067651006711409e-05, "epoch": 3.1042551548382114, "step": 20100}
205
+ {"loss": 0.01, "grad_norm": 0.3454485535621643, "learning_rate": 1.0013959731543626e-05, "epoch": 3.1197003629623907, "step": 20200}
206
+ {"loss": 0.0085, "grad_norm": 0.4891042709350586, "learning_rate": 9.96026845637584e-06, "epoch": 3.1351455710865705, "step": 20300}
207
+ {"loss": 0.0086, "grad_norm": 0.22724980115890503, "learning_rate": 9.906577181208055e-06, "epoch": 3.15059077921075, "step": 20400}
208
+ {"loss": 0.0098, "grad_norm": 0.6482119560241699, "learning_rate": 9.85288590604027e-06, "epoch": 3.1660359873349293, "step": 20500}
209
+ {"loss": 0.0102, "grad_norm": 1.339402437210083, "learning_rate": 9.799194630872483e-06, "epoch": 3.1814811954591087, "step": 20600}
210
+ {"loss": 0.0111, "grad_norm": 0.3536001145839691, "learning_rate": 9.745503355704698e-06, "epoch": 3.196926403583288, "step": 20700}
211
+ {"loss": 0.0091, "grad_norm": 0.5454040169715881, "learning_rate": 9.691812080536913e-06, "epoch": 3.212371611707468, "step": 20800}
212
+ {"loss": 0.011, "grad_norm": 0.5609560012817383, "learning_rate": 9.638120805369128e-06, "epoch": 3.2278168198316473, "step": 20900}
213
+ {"loss": 0.0098, "grad_norm": 0.39234450459480286, "learning_rate": 9.584429530201344e-06, "epoch": 3.2432620279558266, "step": 21000}
214
+ {"loss": 0.0093, "grad_norm": 0.9599013328552246, "learning_rate": 9.530738255033557e-06, "epoch": 3.258707236080006, "step": 21100}
215
+ {"loss": 0.0092, "grad_norm": 0.6345553398132324, "learning_rate": 9.477046979865772e-06, "epoch": 3.274152444204186, "step": 21200}
216
+ {"loss": 0.0091, "grad_norm": 0.7367005348205566, "learning_rate": 9.423355704697987e-06, "epoch": 3.289597652328365, "step": 21300}
217
+ {"loss": 0.0105, "grad_norm": 0.7480056285858154, "learning_rate": 9.369664429530202e-06, "epoch": 3.3050428604525446, "step": 21400}
218
+ {"loss": 0.0101, "grad_norm": 0.8892471790313721, "learning_rate": 9.315973154362417e-06, "epoch": 3.320488068576724, "step": 21500}
219
+ {"loss": 0.0108, "grad_norm": 0.8230651617050171, "learning_rate": 9.262281879194631e-06, "epoch": 3.335933276700904, "step": 21600}
220
+ {"loss": 0.0106, "grad_norm": 0.404323011636734, "learning_rate": 9.208590604026846e-06, "epoch": 3.351378484825083, "step": 21700}
221
+ {"loss": 0.0088, "grad_norm": 0.22209642827510834, "learning_rate": 9.154899328859061e-06, "epoch": 3.3668236929492625, "step": 21800}
222
+ {"loss": 0.0112, "grad_norm": 1.7332512140274048, "learning_rate": 9.101208053691276e-06, "epoch": 3.382268901073442, "step": 21900}
223
+ {"loss": 0.0112, "grad_norm": 0.1417224407196045, "learning_rate": 9.04751677852349e-06, "epoch": 3.3977141091976213, "step": 22000}
224
+ {"loss": 0.011, "grad_norm": 1.5628505945205688, "learning_rate": 8.993825503355706e-06, "epoch": 3.413159317321801, "step": 22100}
225
+ {"loss": 0.0118, "grad_norm": 1.268794059753418, "learning_rate": 8.94013422818792e-06, "epoch": 3.4286045254459805, "step": 22200}
226
+ {"loss": 0.0092, "grad_norm": 0.8178913593292236, "learning_rate": 8.886442953020135e-06, "epoch": 3.44404973357016, "step": 22300}
227
+ {"loss": 0.0098, "grad_norm": 0.20564699172973633, "learning_rate": 8.832751677852348e-06, "epoch": 3.4594949416943392, "step": 22400}
228
+ {"loss": 0.0095, "grad_norm": 1.3289437294006348, "learning_rate": 8.779060402684565e-06, "epoch": 3.4749401498185186, "step": 22500}
229
+ {"loss": 0.011, "grad_norm": 0.6527641415596008, "learning_rate": 8.72536912751678e-06, "epoch": 3.4903853579426984, "step": 22600}
230
+ {"loss": 0.0119, "grad_norm": 0.7414153218269348, "learning_rate": 8.671677852348995e-06, "epoch": 3.505830566066878, "step": 22700}
231
+ {"loss": 0.0104, "grad_norm": 0.38472020626068115, "learning_rate": 8.61798657718121e-06, "epoch": 3.521275774191057, "step": 22800}
232
+ {"loss": 0.0105, "grad_norm": 1.4406596422195435, "learning_rate": 8.564295302013424e-06, "epoch": 3.5367209823152366, "step": 22900}
233
+ {"loss": 0.0097, "grad_norm": 1.0406662225723267, "learning_rate": 8.510604026845637e-06, "epoch": 3.552166190439416, "step": 23000}
234
+ {"loss": 0.0113, "grad_norm": 0.6790725588798523, "learning_rate": 8.456912751677852e-06, "epoch": 3.5676113985635958, "step": 23100}
235
+ {"loss": 0.0096, "grad_norm": 0.7238831520080566, "learning_rate": 8.403221476510067e-06, "epoch": 3.583056606687775, "step": 23200}
236
+ {"loss": 0.0114, "grad_norm": 0.9182873368263245, "learning_rate": 8.349530201342284e-06, "epoch": 3.5985018148119545, "step": 23300}
237
+ {"loss": 0.0108, "grad_norm": 0.3223716616630554, "learning_rate": 8.295838926174498e-06, "epoch": 3.613947022936134, "step": 23400}
238
+ {"loss": 0.01, "grad_norm": 1.3225654363632202, "learning_rate": 8.242147651006711e-06, "epoch": 3.6293922310603133, "step": 23500}
239
+ {"loss": 0.0105, "grad_norm": 0.2384531944990158, "learning_rate": 8.188456375838926e-06, "epoch": 3.644837439184493, "step": 23600}
240
+ {"loss": 0.013, "grad_norm": 0.9148604273796082, "learning_rate": 8.134765100671141e-06, "epoch": 3.6602826473086725, "step": 23700}
241
+ {"loss": 0.0117, "grad_norm": 0.41117745637893677, "learning_rate": 8.081073825503356e-06, "epoch": 3.675727855432852, "step": 23800}
242
+ {"loss": 0.0101, "grad_norm": 1.2300703525543213, "learning_rate": 8.02738255033557e-06, "epoch": 3.6911730635570317, "step": 23900}
243
+ {"loss": 0.0122, "grad_norm": 1.2263121604919434, "learning_rate": 7.973691275167786e-06, "epoch": 3.706618271681211, "step": 24000}
244
+ {"loss": 0.0103, "grad_norm": 0.6322954893112183, "learning_rate": 7.92e-06, "epoch": 3.7220634798053904, "step": 24100}
245
+ {"loss": 0.0102, "grad_norm": 0.44400155544281006, "learning_rate": 7.866308724832215e-06, "epoch": 3.73750868792957, "step": 24200}
246
+ {"loss": 0.0101, "grad_norm": 0.6776632070541382, "learning_rate": 7.81261744966443e-06, "epoch": 3.752953896053749, "step": 24300}
247
+ {"loss": 0.0108, "grad_norm": 0.7691863179206848, "learning_rate": 7.758926174496645e-06, "epoch": 3.768399104177929, "step": 24400}
248
+ {"loss": 0.0104, "grad_norm": 1.2351208925247192, "learning_rate": 7.70523489932886e-06, "epoch": 3.7838443123021084, "step": 24500}
249
+ {"loss": 0.0104, "grad_norm": 0.3325941264629364, "learning_rate": 7.651543624161075e-06, "epoch": 3.7992895204262878, "step": 24600}
250
+ {"loss": 0.0108, "grad_norm": 0.2872284948825836, "learning_rate": 7.5978523489932885e-06, "epoch": 3.814734728550467, "step": 24700}
251
+ {"loss": 0.0098, "grad_norm": 0.6922308206558228, "learning_rate": 7.544161073825504e-06, "epoch": 3.8301799366746465, "step": 24800}
252
+ {"loss": 0.0113, "grad_norm": 0.9661895036697388, "learning_rate": 7.490469798657719e-06, "epoch": 3.8456251447988263, "step": 24900}
253
+ {"loss": 0.0097, "grad_norm": 1.0570372343063354, "learning_rate": 7.436778523489934e-06, "epoch": 3.8610703529230057, "step": 25000}
254
+ {"loss": 0.011, "grad_norm": 0.5973047018051147, "learning_rate": 7.383087248322149e-06, "epoch": 3.876515561047185, "step": 25100}
255
+ {"loss": 0.0087, "grad_norm": 0.9165611863136292, "learning_rate": 7.329395973154363e-06, "epoch": 3.8919607691713645, "step": 25200}
256
+ {"loss": 0.0106, "grad_norm": 1.2968380451202393, "learning_rate": 7.2757046979865774e-06, "epoch": 3.907405977295544, "step": 25300}
257
+ {"loss": 0.0108, "grad_norm": 0.8751053810119629, "learning_rate": 7.222013422818792e-06, "epoch": 3.9228511854197237, "step": 25400}
258
+ {"loss": 0.0093, "grad_norm": 0.5580429434776306, "learning_rate": 7.168322147651007e-06, "epoch": 3.938296393543903, "step": 25500}
259
+ {"loss": 0.0118, "grad_norm": 0.944416880607605, "learning_rate": 7.114630872483223e-06, "epoch": 3.9537416016680824, "step": 25600}
260
+ {"loss": 0.0102, "grad_norm": 0.4454007148742676, "learning_rate": 7.060939597315437e-06, "epoch": 3.969186809792262, "step": 25700}
261
+ {"loss": 0.0103, "grad_norm": 0.2635466754436493, "learning_rate": 7.0072483221476516e-06, "epoch": 3.984632017916441, "step": 25800}
262
+ {"loss": 0.0094, "grad_norm": 1.7829089164733887, "learning_rate": 6.953557046979866e-06, "epoch": 4.0, "step": 25900}
263
+ {"eval_loss": 0.08570433408021927, "eval_wer": 4.363855264870494, "eval_cer": 4.3785517775007525, "eval_runtime": 5893.5199, "eval_samples_per_second": 4.611, "eval_steps_per_second": 0.576, "epoch": 4.0, "step": 25900}
264
+ {"train_runtime": 61536.7575, "train_samples_per_second": 20.201, "train_steps_per_second": 0.631, "total_flos": 2.3915764729479168e+20, "train_loss": 0.07450532003949507, "epoch": 4.0, "step": 25900}
265
+ {"eval_loss": 0.10621041804552078, "eval_wer": 2.6982781026640676, "eval_cer": 2.950426138652145, "eval_runtime": 5892.3475, "eval_samples_per_second": 4.612, "eval_steps_per_second": 0.577, "epoch": 4.0, "step": 25900}