diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,6 +1,6 @@ { - "best_metric": Infinity, - "best_model_checkpoint": null, + "best_metric": 95.4926921263555, + "best_model_checkpoint": "./iteboshi_temp/checkpoint-4000", "epoch": 16.50206440957886, "eval_steps": 1000, "global_step": 10000, @@ -10,2902 +10,2912 @@ "log_history": [ { "epoch": 0.04128819157720892, - "grad_norm": 48.47208786010742, + "grad_norm": 48.469512939453125, "learning_rate": 1.0000000000000002e-06, - "loss": 17.3809, + "loss": 17.381, "step": 25 }, { "epoch": 0.08257638315441784, - "grad_norm": 52.7760009765625, + "grad_norm": 52.77425003051758, "learning_rate": 1.9600000000000003e-06, "loss": 15.9672, "step": 50 }, { "epoch": 0.12386457473162675, - "grad_norm": 64.02676391601562, + "grad_norm": 63.97101974487305, "learning_rate": 2.96e-06, - "loss": 12.6576, + "loss": 12.6578, "step": 75 }, { "epoch": 0.16515276630883569, - "grad_norm": 26.37363052368164, + "grad_norm": 26.345178604125977, "learning_rate": 3.96e-06, - "loss": 8.293, + "loss": 8.2932, "step": 100 }, { "epoch": 0.20644095788604458, - "grad_norm": 12.368265151977539, + "grad_norm": 12.366632461547852, "learning_rate": 4.960000000000001e-06, - "loss": 6.2484, + "loss": 6.2485, "step": 125 }, { "epoch": 0.2477291494632535, - "grad_norm": 9.140127182006836, + "grad_norm": 9.131484985351562, "learning_rate": 5.9600000000000005e-06, "loss": 5.0845, "step": 150 }, { "epoch": 0.28901734104046245, - "grad_norm": 8.285837173461914, + "grad_norm": 8.288640975952148, "learning_rate": 6.96e-06, - "loss": 4.5217, + "loss": 4.5216, "step": 175 }, { "epoch": 0.33030553261767137, - "grad_norm": 8.110857009887695, + "grad_norm": 8.107129096984863, "learning_rate": 7.960000000000002e-06, "loss": 4.2152, "step": 200 }, { "epoch": 0.37159372419488024, - "grad_norm": 8.587676048278809, + "grad_norm": 8.564615249633789, "learning_rate": 8.96e-06, "loss": 4.095, "step": 225 }, { "epoch": 0.41288191577208916, - "grad_norm": 8.696348190307617, + "grad_norm": 8.67183780670166, "learning_rate": 9.960000000000001e-06, - "loss": 3.9265, + "loss": 3.9264, "step": 250 }, { "epoch": 0.4541701073492981, - "grad_norm": 8.708649635314941, + "grad_norm": 8.779706954956055, "learning_rate": 1.0960000000000002e-05, - "loss": 3.7976, + "loss": 3.7974, "step": 275 }, { "epoch": 0.495458298926507, - "grad_norm": 8.679261207580566, + "grad_norm": 8.62621021270752, "learning_rate": 1.196e-05, - "loss": 3.7627, + "loss": 3.7625, "step": 300 }, { "epoch": 0.5367464905037159, - "grad_norm": 9.209630012512207, + "grad_norm": 9.157522201538086, "learning_rate": 1.2960000000000001e-05, - "loss": 3.6967, + "loss": 3.6965, "step": 325 }, { "epoch": 0.5780346820809249, - "grad_norm": 8.386543273925781, + "grad_norm": 8.446877479553223, "learning_rate": 1.396e-05, "loss": 3.6216, "step": 350 }, { "epoch": 0.6193228736581338, - "grad_norm": 8.898401260375977, + "grad_norm": 8.873468399047852, "learning_rate": 1.496e-05, - "loss": 3.5456, + "loss": 3.5455, "step": 375 }, { "epoch": 0.6606110652353427, - "grad_norm": 7.772883415222168, + "grad_norm": 7.768847465515137, "learning_rate": 1.5960000000000003e-05, - "loss": 3.4954, + "loss": 3.4952, "step": 400 }, { "epoch": 0.7018992568125516, - "grad_norm": 8.508569717407227, + "grad_norm": 8.365340232849121, "learning_rate": 1.696e-05, - "loss": 3.4144, + "loss": 3.414, "step": 425 }, { "epoch": 0.7431874483897605, - "grad_norm": 7.907943248748779, + "grad_norm": 7.865358829498291, "learning_rate": 1.796e-05, - "loss": 3.3481, + "loss": 3.3472, "step": 450 }, { "epoch": 0.7844756399669695, - "grad_norm": 8.553926467895508, + "grad_norm": 8.56877613067627, "learning_rate": 1.896e-05, - "loss": 3.3021, + "loss": 3.3018, "step": 475 }, { "epoch": 0.8257638315441783, - "grad_norm": 9.033528327941895, + "grad_norm": 9.091480255126953, "learning_rate": 1.9960000000000002e-05, - "loss": 3.2553, + "loss": 3.255, "step": 500 }, { "epoch": 0.8670520231213873, - "grad_norm": 8.92698860168457, + "grad_norm": 8.974713325500488, "learning_rate": 1.9949473684210527e-05, - "loss": 3.1524, + "loss": 3.1519, "step": 525 }, { "epoch": 0.9083402146985962, - "grad_norm": 9.119793891906738, + "grad_norm": 9.134045600891113, "learning_rate": 1.989684210526316e-05, - "loss": 3.034, + "loss": 3.0334, "step": 550 }, { "epoch": 0.9496284062758051, - "grad_norm": 10.161808967590332, + "grad_norm": 10.075201988220215, "learning_rate": 1.984421052631579e-05, - "loss": 2.9658, + "loss": 2.9656, "step": 575 }, { "epoch": 0.990916597853014, - "grad_norm": 9.213955879211426, + "grad_norm": 9.26815128326416, "learning_rate": 1.9791578947368423e-05, - "loss": 2.8887, + "loss": 2.8884, "step": 600 }, { "epoch": 1.0313790255986788, - "grad_norm": 8.941823959350586, + "grad_norm": 8.885565757751465, "learning_rate": 1.9738947368421057e-05, - "loss": 2.719, + "loss": 2.7179, "step": 625 }, { "epoch": 1.0726672171758878, - "grad_norm": 8.994600296020508, + "grad_norm": 8.9259672164917, "learning_rate": 1.9686315789473687e-05, - "loss": 2.6422, + "loss": 2.6417, "step": 650 }, { "epoch": 1.1139554087530965, - "grad_norm": 10.292838096618652, + "grad_norm": 10.3223295211792, "learning_rate": 1.9633684210526316e-05, - "loss": 2.5674, + "loss": 2.5669, "step": 675 }, { "epoch": 1.1552436003303055, - "grad_norm": 10.515816688537598, + "grad_norm": 10.508142471313477, "learning_rate": 1.958105263157895e-05, - "loss": 2.5031, + "loss": 2.5026, "step": 700 }, { "epoch": 1.1965317919075145, - "grad_norm": 9.79793643951416, + "grad_norm": 9.78288745880127, "learning_rate": 1.952842105263158e-05, - "loss": 2.3914, + "loss": 2.3907, "step": 725 }, { "epoch": 1.2378199834847234, - "grad_norm": 9.563103675842285, + "grad_norm": 9.629091262817383, "learning_rate": 1.9475789473684213e-05, - "loss": 2.3986, + "loss": 2.3983, "step": 750 }, { "epoch": 1.2791081750619324, - "grad_norm": 9.516156196594238, + "grad_norm": 9.543179512023926, "learning_rate": 1.9423157894736843e-05, - "loss": 2.2832, + "loss": 2.283, "step": 775 }, { "epoch": 1.3203963666391412, - "grad_norm": 9.650139808654785, + "grad_norm": 9.676267623901367, "learning_rate": 1.9370526315789476e-05, - "loss": 2.252, + "loss": 2.2519, "step": 800 }, { "epoch": 1.3616845582163501, - "grad_norm": 9.165299415588379, + "grad_norm": 9.192870140075684, "learning_rate": 1.9317894736842106e-05, - "loss": 2.1517, + "loss": 2.1516, "step": 825 }, { "epoch": 1.402972749793559, - "grad_norm": 9.827438354492188, + "grad_norm": 9.859146118164062, "learning_rate": 1.926526315789474e-05, "loss": 2.1643, "step": 850 }, { "epoch": 1.4442609413707679, - "grad_norm": 9.822501182556152, + "grad_norm": 9.815247535705566, "learning_rate": 1.921263157894737e-05, - "loss": 2.0519, + "loss": 2.0514, "step": 875 }, { "epoch": 1.4855491329479769, - "grad_norm": 10.055537223815918, + "grad_norm": 10.037995338439941, "learning_rate": 1.916e-05, - "loss": 2.0872, + "loss": 2.0867, "step": 900 }, { "epoch": 1.5268373245251858, - "grad_norm": 9.642505645751953, + "grad_norm": 9.680682182312012, "learning_rate": 1.9107368421052633e-05, "loss": 1.999, "step": 925 }, { "epoch": 1.5681255161023948, - "grad_norm": 9.428027153015137, + "grad_norm": 9.410001754760742, "learning_rate": 1.9054736842105263e-05, "loss": 1.954, "step": 950 }, { "epoch": 1.6094137076796038, - "grad_norm": 8.909976959228516, + "grad_norm": 8.89158821105957, "learning_rate": 1.9002105263157896e-05, - "loss": 1.9201, + "loss": 1.92, "step": 975 }, { "epoch": 1.6507018992568125, - "grad_norm": 9.313642501831055, + "grad_norm": 9.326930046081543, "learning_rate": 1.894947368421053e-05, - "loss": 1.834, + "loss": 1.8339, "step": 1000 }, { "epoch": 1.6507018992568125, - "eval_loss": 1.9117295742034912, - "eval_runtime": 784.2462, - "eval_samples_per_second": 13.492, - "eval_steps_per_second": 1.125, - "eval_wer": Infinity, + "eval_cer": 93.62053351369322, + "eval_loss": 1.911484956741333, + "eval_runtime": 801.8096, + "eval_samples_per_second": 13.196, + "eval_steps_per_second": 1.1, + "eval_wer": 99.67939651107967, "step": 1000 }, { "epoch": 1.6919900908340215, - "grad_norm": 9.633246421813965, + "grad_norm": 9.614924430847168, "learning_rate": 1.889684210526316e-05, - "loss": 1.8354, + "loss": 1.8351, "step": 1025 }, { "epoch": 1.7332782824112303, - "grad_norm": 9.896079063415527, + "grad_norm": 9.912728309631348, "learning_rate": 1.8844210526315793e-05, - "loss": 1.7921, + "loss": 1.7924, "step": 1050 }, { "epoch": 1.7745664739884393, - "grad_norm": 9.53538703918457, + "grad_norm": 9.571353912353516, "learning_rate": 1.8791578947368423e-05, "loss": 1.7655, "step": 1075 }, { "epoch": 1.8158546655656482, - "grad_norm": 9.284989356994629, + "grad_norm": 9.284981727600098, "learning_rate": 1.8738947368421056e-05, - "loss": 1.7743, + "loss": 1.7737, "step": 1100 }, { "epoch": 1.8571428571428572, - "grad_norm": 10.101766586303711, + "grad_norm": 10.073586463928223, "learning_rate": 1.8686315789473686e-05, - "loss": 1.7346, + "loss": 1.7343, "step": 1125 }, { "epoch": 1.8984310487200662, - "grad_norm": 8.92200756072998, + "grad_norm": 8.913012504577637, "learning_rate": 1.8633684210526316e-05, - "loss": 1.6885, + "loss": 1.6881, "step": 1150 }, { "epoch": 1.939719240297275, - "grad_norm": 9.907315254211426, + "grad_norm": 9.885432243347168, "learning_rate": 1.858105263157895e-05, - "loss": 1.6912, + "loss": 1.6909, "step": 1175 }, { "epoch": 1.981007431874484, - "grad_norm": 9.401750564575195, + "grad_norm": 9.40440559387207, "learning_rate": 1.852842105263158e-05, - "loss": 1.6279, + "loss": 1.6276, "step": 1200 }, { "epoch": 2.0214698596201486, - "grad_norm": 10.461494445800781, + "grad_norm": 10.476126670837402, "learning_rate": 1.8475789473684212e-05, - "loss": 1.5268, + "loss": 1.5264, "step": 1225 }, { "epoch": 2.0627580511973576, - "grad_norm": 8.997374534606934, + "grad_norm": 8.929309844970703, "learning_rate": 1.8423157894736842e-05, - "loss": 1.4142, + "loss": 1.4139, "step": 1250 }, { "epoch": 2.1040462427745665, - "grad_norm": 9.4984712600708, + "grad_norm": 9.487678527832031, "learning_rate": 1.8370526315789476e-05, - "loss": 1.4769, + "loss": 1.4765, "step": 1275 }, { "epoch": 2.1453344343517755, - "grad_norm": 9.436307907104492, + "grad_norm": 9.428024291992188, "learning_rate": 1.831789473684211e-05, - "loss": 1.4284, + "loss": 1.4275, "step": 1300 }, { "epoch": 2.1866226259289845, - "grad_norm": 9.324549674987793, + "grad_norm": 9.300884246826172, "learning_rate": 1.826526315789474e-05, - "loss": 1.4117, + "loss": 1.4115, "step": 1325 }, { "epoch": 2.227910817506193, - "grad_norm": 9.608540534973145, + "grad_norm": 9.60734748840332, "learning_rate": 1.821263157894737e-05, - "loss": 1.4093, + "loss": 1.4087, "step": 1350 }, { "epoch": 2.269199009083402, - "grad_norm": 8.661190032958984, + "grad_norm": 8.660606384277344, "learning_rate": 1.8160000000000002e-05, - "loss": 1.3663, + "loss": 1.3658, "step": 1375 }, { "epoch": 2.310487200660611, - "grad_norm": 9.19803524017334, + "grad_norm": 9.169739723205566, "learning_rate": 1.8107368421052632e-05, - "loss": 1.3647, + "loss": 1.3645, "step": 1400 }, { "epoch": 2.35177539223782, - "grad_norm": 9.229018211364746, + "grad_norm": 9.20264720916748, "learning_rate": 1.8054736842105266e-05, - "loss": 1.3282, + "loss": 1.328, "step": 1425 }, { "epoch": 2.393063583815029, - "grad_norm": 9.667059898376465, + "grad_norm": 9.625157356262207, "learning_rate": 1.8002105263157896e-05, - "loss": 1.3098, + "loss": 1.3095, "step": 1450 }, { "epoch": 2.434351775392238, - "grad_norm": 8.727728843688965, + "grad_norm": 8.743439674377441, "learning_rate": 1.794947368421053e-05, - "loss": 1.3246, + "loss": 1.3244, "step": 1475 }, { "epoch": 2.475639966969447, - "grad_norm": 9.386205673217773, + "grad_norm": 9.378725051879883, "learning_rate": 1.789684210526316e-05, - "loss": 1.2703, + "loss": 1.2695, "step": 1500 }, { "epoch": 2.516928158546656, - "grad_norm": 9.435502052307129, + "grad_norm": 9.474600791931152, "learning_rate": 1.7844210526315792e-05, - "loss": 1.3239, + "loss": 1.3238, "step": 1525 }, { "epoch": 2.558216350123865, - "grad_norm": 8.652278900146484, + "grad_norm": 8.615851402282715, "learning_rate": 1.7791578947368422e-05, - "loss": 1.2833, + "loss": 1.2829, "step": 1550 }, { "epoch": 2.5995045417010734, - "grad_norm": 8.806912422180176, + "grad_norm": 8.702414512634277, "learning_rate": 1.7738947368421052e-05, - "loss": 1.2917, + "loss": 1.2914, "step": 1575 }, { "epoch": 2.6407927332782823, - "grad_norm": 9.386215209960938, + "grad_norm": 9.374676704406738, "learning_rate": 1.7686315789473685e-05, - "loss": 1.2775, + "loss": 1.2769, "step": 1600 }, { "epoch": 2.6820809248554913, - "grad_norm": 8.931626319885254, + "grad_norm": 8.985688209533691, "learning_rate": 1.7633684210526315e-05, - "loss": 1.2501, + "loss": 1.25, "step": 1625 }, { "epoch": 2.7233691164327003, - "grad_norm": 8.861407279968262, + "grad_norm": 8.864657402038574, "learning_rate": 1.758105263157895e-05, - "loss": 1.2031, + "loss": 1.2028, "step": 1650 }, { "epoch": 2.7646573080099093, - "grad_norm": 8.485048294067383, + "grad_norm": 8.514275550842285, "learning_rate": 1.7528421052631582e-05, - "loss": 1.2482, + "loss": 1.2477, "step": 1675 }, { "epoch": 2.805945499587118, - "grad_norm": 9.203420639038086, + "grad_norm": 9.147972106933594, "learning_rate": 1.7475789473684212e-05, - "loss": 1.2105, + "loss": 1.2103, "step": 1700 }, { "epoch": 2.847233691164327, - "grad_norm": 9.27082633972168, + "grad_norm": 9.296348571777344, "learning_rate": 1.7423157894736845e-05, - "loss": 1.2441, + "loss": 1.2437, "step": 1725 }, { "epoch": 2.8885218827415358, - "grad_norm": 9.1829833984375, + "grad_norm": 9.178862571716309, "learning_rate": 1.7370526315789475e-05, - "loss": 1.2006, + "loss": 1.1999, "step": 1750 }, { "epoch": 2.9298100743187447, - "grad_norm": 8.873353958129883, + "grad_norm": 8.8689603805542, "learning_rate": 1.731789473684211e-05, "loss": 1.167, "step": 1775 }, { "epoch": 2.9710982658959537, - "grad_norm": 8.414481163024902, + "grad_norm": 8.430618286132812, "learning_rate": 1.726526315789474e-05, - "loss": 1.162, + "loss": 1.1616, "step": 1800 }, { "epoch": 3.0115606936416186, - "grad_norm": 8.181883811950684, + "grad_norm": 8.175687789916992, "learning_rate": 1.721263157894737e-05, - "loss": 1.1539, + "loss": 1.1536, "step": 1825 }, { "epoch": 3.0528488852188276, - "grad_norm": 8.511463165283203, + "grad_norm": 8.549736022949219, "learning_rate": 1.7160000000000002e-05, - "loss": 1.0424, + "loss": 1.0421, "step": 1850 }, { "epoch": 3.094137076796036, - "grad_norm": 8.532715797424316, + "grad_norm": 8.512212753295898, "learning_rate": 1.710736842105263e-05, - "loss": 1.0293, + "loss": 1.0288, "step": 1875 }, { "epoch": 3.135425268373245, - "grad_norm": 7.531360149383545, + "grad_norm": 7.529111385345459, "learning_rate": 1.7054736842105265e-05, - "loss": 0.997, + "loss": 0.9965, "step": 1900 }, { "epoch": 3.176713459950454, - "grad_norm": 8.686123847961426, + "grad_norm": 8.706369400024414, "learning_rate": 1.7002105263157895e-05, - "loss": 1.0264, + "loss": 1.0262, "step": 1925 }, { "epoch": 3.218001651527663, - "grad_norm": 8.670825958251953, + "grad_norm": 8.718097686767578, "learning_rate": 1.6949473684210528e-05, - "loss": 1.0235, + "loss": 1.0232, "step": 1950 }, { "epoch": 3.259289843104872, - "grad_norm": 9.212960243225098, + "grad_norm": 9.124544143676758, "learning_rate": 1.689684210526316e-05, - "loss": 1.0109, + "loss": 1.0104, "step": 1975 }, { "epoch": 3.300578034682081, - "grad_norm": 7.840266704559326, + "grad_norm": 7.853435039520264, "learning_rate": 1.684421052631579e-05, - "loss": 0.9955, + "loss": 0.9948, "step": 2000 }, { "epoch": 3.300578034682081, - "eval_loss": 1.2765947580337524, - "eval_runtime": 710.8083, - "eval_samples_per_second": 14.886, - "eval_steps_per_second": 1.241, - "eval_wer": Infinity, + "eval_cer": 59.421319913335545, + "eval_loss": 1.2762852907180786, + "eval_runtime": 738.0908, + "eval_samples_per_second": 14.336, + "eval_steps_per_second": 1.195, + "eval_wer": 97.35030645921735, "step": 2000 }, { "epoch": 3.34186622625929, - "grad_norm": 7.762492656707764, + "grad_norm": 7.776848793029785, "learning_rate": 1.6791578947368425e-05, - "loss": 0.9837, + "loss": 0.9834, "step": 2025 }, { "epoch": 3.383154417836499, - "grad_norm": 8.323661804199219, + "grad_norm": 8.306714057922363, "learning_rate": 1.6738947368421055e-05, - "loss": 1.0037, + "loss": 1.0036, "step": 2050 }, { "epoch": 3.424442609413708, - "grad_norm": 9.538601875305176, + "grad_norm": 9.474985122680664, "learning_rate": 1.6686315789473685e-05, - "loss": 1.0539, + "loss": 1.0534, "step": 2075 }, { "epoch": 3.4657308009909165, - "grad_norm": 7.49852180480957, + "grad_norm": 7.486320972442627, "learning_rate": 1.6633684210526318e-05, - "loss": 0.9676, + "loss": 0.9673, "step": 2100 }, { "epoch": 3.5070189925681254, - "grad_norm": 8.388888359069824, + "grad_norm": 8.373710632324219, "learning_rate": 1.6581052631578948e-05, - "loss": 1.0012, + "loss": 1.0009, "step": 2125 }, { "epoch": 3.5483071841453344, - "grad_norm": 8.11563491821289, + "grad_norm": 8.073158264160156, "learning_rate": 1.652842105263158e-05, - "loss": 1.0151, + "loss": 1.0142, "step": 2150 }, { "epoch": 3.5895953757225434, - "grad_norm": 8.094681739807129, + "grad_norm": 8.070113182067871, "learning_rate": 1.647578947368421e-05, - "loss": 0.9746, + "loss": 0.9742, "step": 2175 }, { "epoch": 3.6308835672997524, - "grad_norm": 7.915094375610352, + "grad_norm": 7.891678333282471, "learning_rate": 1.6423157894736845e-05, - "loss": 1.0166, + "loss": 1.0163, "step": 2200 }, { "epoch": 3.6721717588769613, - "grad_norm": 7.892699718475342, + "grad_norm": 7.933286666870117, "learning_rate": 1.6370526315789475e-05, - "loss": 0.9443, + "loss": 0.9442, "step": 2225 }, { "epoch": 3.71345995045417, - "grad_norm": 8.693673133850098, + "grad_norm": 8.791912078857422, "learning_rate": 1.6317894736842105e-05, - "loss": 0.9299, + "loss": 0.9295, "step": 2250 }, { "epoch": 3.754748142031379, - "grad_norm": 7.691691875457764, + "grad_norm": 7.6395583152771, "learning_rate": 1.6265263157894738e-05, - "loss": 0.9333, + "loss": 0.9328, "step": 2275 }, { "epoch": 3.796036333608588, - "grad_norm": 8.604223251342773, + "grad_norm": 8.580839157104492, "learning_rate": 1.6212631578947368e-05, - "loss": 0.9782, + "loss": 0.9779, "step": 2300 }, { "epoch": 3.837324525185797, - "grad_norm": 8.439273834228516, + "grad_norm": 8.464973449707031, "learning_rate": 1.616e-05, - "loss": 0.9229, + "loss": 0.923, "step": 2325 }, { "epoch": 3.878612716763006, - "grad_norm": 8.69174575805664, + "grad_norm": 8.684146881103516, "learning_rate": 1.6107368421052634e-05, - "loss": 0.9848, + "loss": 0.9842, "step": 2350 }, { "epoch": 3.9199009083402148, - "grad_norm": 8.314153671264648, + "grad_norm": 8.286194801330566, "learning_rate": 1.6054736842105264e-05, - "loss": 0.9285, + "loss": 0.9283, "step": 2375 }, { "epoch": 3.9611890999174237, - "grad_norm": 8.562870979309082, + "grad_norm": 8.621734619140625, "learning_rate": 1.6002105263157898e-05, - "loss": 0.9432, + "loss": 0.9424, "step": 2400 }, { "epoch": 4.001651527663088, - "grad_norm": 7.700297832489014, + "grad_norm": 7.654041767120361, "learning_rate": 1.5949473684210528e-05, - "loss": 0.9091, + "loss": 0.9083, "step": 2425 }, { "epoch": 4.042939719240297, - "grad_norm": 8.673540115356445, + "grad_norm": 8.66199016571045, "learning_rate": 1.589684210526316e-05, - "loss": 0.8088, + "loss": 0.8082, "step": 2450 }, { "epoch": 4.084227910817506, - "grad_norm": 7.37542724609375, + "grad_norm": 7.401526927947998, "learning_rate": 1.584421052631579e-05, "loss": 0.7986, "step": 2475 }, { "epoch": 4.125516102394715, - "grad_norm": 6.779449462890625, + "grad_norm": 6.765837669372559, "learning_rate": 1.579157894736842e-05, - "loss": 0.8114, + "loss": 0.8111, "step": 2500 }, { "epoch": 4.166804293971924, - "grad_norm": 7.313191890716553, + "grad_norm": 7.28730583190918, "learning_rate": 1.5738947368421054e-05, "loss": 0.8089, "step": 2525 }, { "epoch": 4.208092485549133, - "grad_norm": 6.947272777557373, + "grad_norm": 7.018956661224365, "learning_rate": 1.5686315789473684e-05, - "loss": 0.778, + "loss": 0.7773, "step": 2550 }, { "epoch": 4.249380677126342, - "grad_norm": 8.322178840637207, + "grad_norm": 8.369226455688477, "learning_rate": 1.5633684210526318e-05, - "loss": 0.7897, + "loss": 0.7895, "step": 2575 }, { "epoch": 4.290668868703551, - "grad_norm": 8.319843292236328, + "grad_norm": 8.268858909606934, "learning_rate": 1.5581052631578947e-05, - "loss": 0.8583, + "loss": 0.857, "step": 2600 }, { "epoch": 4.33195706028076, - "grad_norm": 7.782474994659424, + "grad_norm": 7.69369649887085, "learning_rate": 1.552842105263158e-05, - "loss": 0.8077, + "loss": 0.807, "step": 2625 }, { "epoch": 4.373245251857969, - "grad_norm": 8.119930267333984, + "grad_norm": 8.12571907043457, "learning_rate": 1.547578947368421e-05, - "loss": 0.807, + "loss": 0.8066, "step": 2650 }, { "epoch": 4.414533443435178, - "grad_norm": 10.038588523864746, + "grad_norm": 9.947578430175781, "learning_rate": 1.5423157894736844e-05, - "loss": 0.8248, + "loss": 0.8245, "step": 2675 }, { "epoch": 4.455821635012386, - "grad_norm": 7.1363115310668945, + "grad_norm": 7.1731791496276855, "learning_rate": 1.5370526315789477e-05, - "loss": 0.8092, + "loss": 0.809, "step": 2700 }, { "epoch": 4.497109826589595, - "grad_norm": 8.094152450561523, + "grad_norm": 8.121570587158203, "learning_rate": 1.5317894736842107e-05, - "loss": 0.8386, + "loss": 0.8382, "step": 2725 }, { "epoch": 4.538398018166804, - "grad_norm": 8.467181205749512, + "grad_norm": 8.422553062438965, "learning_rate": 1.5265263157894737e-05, - "loss": 0.802, + "loss": 0.8017, "step": 2750 }, { "epoch": 4.579686209744013, - "grad_norm": 9.26181697845459, + "grad_norm": 9.263009071350098, "learning_rate": 1.521263157894737e-05, - "loss": 0.8057, + "loss": 0.805, "step": 2775 }, { "epoch": 4.620974401321222, - "grad_norm": 8.754571914672852, + "grad_norm": 8.761811256408691, "learning_rate": 1.516e-05, - "loss": 0.7905, + "loss": 0.7896, "step": 2800 }, { "epoch": 4.662262592898431, - "grad_norm": 7.060452461242676, + "grad_norm": 7.018781661987305, "learning_rate": 1.5107368421052632e-05, - "loss": 0.7644, + "loss": 0.764, "step": 2825 }, { "epoch": 4.70355078447564, - "grad_norm": 8.613765716552734, + "grad_norm": 8.710026741027832, "learning_rate": 1.5054736842105264e-05, - "loss": 0.7676, + "loss": 0.7673, "step": 2850 }, { "epoch": 4.744838976052849, - "grad_norm": 7.17141056060791, + "grad_norm": 7.182971954345703, "learning_rate": 1.5002105263157895e-05, - "loss": 0.7648, + "loss": 0.7649, "step": 2875 }, { "epoch": 4.786127167630058, - "grad_norm": 8.803303718566895, + "grad_norm": 8.922381401062012, "learning_rate": 1.4949473684210527e-05, - "loss": 0.8048, + "loss": 0.8047, "step": 2900 }, { "epoch": 4.827415359207267, - "grad_norm": 7.4490203857421875, + "grad_norm": 7.456427097320557, "learning_rate": 1.4896842105263159e-05, - "loss": 0.78, + "loss": 0.7793, "step": 2925 }, { "epoch": 4.868703550784476, - "grad_norm": 8.119257926940918, + "grad_norm": 8.16557502746582, "learning_rate": 1.484421052631579e-05, - "loss": 0.7647, + "loss": 0.7642, "step": 2950 }, { "epoch": 4.909991742361685, - "grad_norm": 7.520335674285889, + "grad_norm": 7.600188255310059, "learning_rate": 1.4791578947368422e-05, - "loss": 0.772, + "loss": 0.7719, "step": 2975 }, { "epoch": 4.951279933938894, - "grad_norm": 7.8852219581604, + "grad_norm": 7.850863456726074, "learning_rate": 1.4738947368421055e-05, - "loss": 0.7584, + "loss": 0.7577, "step": 3000 }, { "epoch": 4.951279933938894, - "eval_loss": 1.1080505847930908, - "eval_runtime": 699.2603, - "eval_samples_per_second": 15.132, - "eval_steps_per_second": 1.261, - "eval_wer": Infinity, + "eval_cer": 53.34678390058808, + "eval_loss": 1.1084543466567993, + "eval_runtime": 754.0137, + "eval_samples_per_second": 14.033, + "eval_steps_per_second": 1.17, + "eval_wer": 96.64309288071664, "step": 3000 }, { "epoch": 4.992568125516103, - "grad_norm": 8.253033638000488, + "grad_norm": 7.915883541107178, "learning_rate": 1.4686315789473687e-05, - "loss": 0.7497, + "loss": 0.7496, "step": 3025 }, { "epoch": 5.033030553261767, - "grad_norm": 7.215710163116455, + "grad_norm": 7.240871906280518, "learning_rate": 1.4633684210526317e-05, - "loss": 0.6874, + "loss": 0.687, "step": 3050 }, { "epoch": 5.074318744838976, - "grad_norm": 7.491812229156494, + "grad_norm": 7.480823516845703, "learning_rate": 1.4581052631578949e-05, - "loss": 0.6607, + "loss": 0.6601, "step": 3075 }, { "epoch": 5.115606936416185, - "grad_norm": 7.644138813018799, + "grad_norm": 7.587548732757568, "learning_rate": 1.452842105263158e-05, - "loss": 0.6834, + "loss": 0.6831, "step": 3100 }, { "epoch": 5.156895127993394, - "grad_norm": 7.1401686668396, + "grad_norm": 7.099322319030762, "learning_rate": 1.4475789473684212e-05, - "loss": 0.6667, + "loss": 0.6658, "step": 3125 }, { "epoch": 5.198183319570603, - "grad_norm": 7.159341335296631, + "grad_norm": 7.174043655395508, "learning_rate": 1.4423157894736843e-05, - "loss": 0.6706, + "loss": 0.6704, "step": 3150 }, { "epoch": 5.239471511147812, - "grad_norm": 6.769131660461426, + "grad_norm": 6.899098873138428, "learning_rate": 1.4370526315789475e-05, - "loss": 0.6508, + "loss": 0.65, "step": 3175 }, { "epoch": 5.280759702725021, - "grad_norm": 7.549663543701172, + "grad_norm": 7.519511699676514, "learning_rate": 1.4317894736842107e-05, - "loss": 0.6674, + "loss": 0.6665, "step": 3200 }, { "epoch": 5.32204789430223, - "grad_norm": 7.3631720542907715, + "grad_norm": 7.408304214477539, "learning_rate": 1.4265263157894738e-05, - "loss": 0.6401, + "loss": 0.6398, "step": 3225 }, { "epoch": 5.363336085879438, - "grad_norm": 7.147627353668213, + "grad_norm": 7.133185386657715, "learning_rate": 1.4212631578947368e-05, - "loss": 0.6805, + "loss": 0.6802, "step": 3250 }, { "epoch": 5.404624277456647, - "grad_norm": 7.807270050048828, + "grad_norm": 7.851741313934326, "learning_rate": 1.416e-05, - "loss": 0.6333, + "loss": 0.6327, "step": 3275 }, { "epoch": 5.445912469033856, - "grad_norm": 7.820711612701416, + "grad_norm": 7.773444175720215, "learning_rate": 1.4107368421052632e-05, - "loss": 0.7001, + "loss": 0.6998, "step": 3300 }, { "epoch": 5.487200660611065, - "grad_norm": 8.526030540466309, + "grad_norm": 8.28067398071289, "learning_rate": 1.4054736842105263e-05, - "loss": 0.6728, + "loss": 0.6726, "step": 3325 }, { "epoch": 5.528488852188274, - "grad_norm": 6.857508659362793, + "grad_norm": 6.886124134063721, "learning_rate": 1.4002105263157897e-05, - "loss": 0.6425, + "loss": 0.6418, "step": 3350 }, { "epoch": 5.569777043765483, - "grad_norm": 6.723370552062988, + "grad_norm": 6.617015361785889, "learning_rate": 1.3949473684210528e-05, - "loss": 0.6616, + "loss": 0.6613, "step": 3375 }, { "epoch": 5.611065235342692, - "grad_norm": 7.373733043670654, + "grad_norm": 7.447840690612793, "learning_rate": 1.389684210526316e-05, - "loss": 0.6609, + "loss": 0.6608, "step": 3400 }, { "epoch": 5.652353426919901, - "grad_norm": 7.192294120788574, + "grad_norm": 7.151592254638672, "learning_rate": 1.3844210526315791e-05, - "loss": 0.6413, + "loss": 0.6409, "step": 3425 }, { "epoch": 5.69364161849711, - "grad_norm": 6.876883029937744, + "grad_norm": 7.587296962738037, "learning_rate": 1.3791578947368423e-05, - "loss": 0.648, + "loss": 0.6483, "step": 3450 }, { "epoch": 5.734929810074319, - "grad_norm": 7.807666301727295, + "grad_norm": 7.848781585693359, "learning_rate": 1.3738947368421055e-05, - "loss": 0.6643, + "loss": 0.6638, "step": 3475 }, { "epoch": 5.776218001651528, - "grad_norm": 7.850846767425537, + "grad_norm": 7.8602986335754395, "learning_rate": 1.3686315789473685e-05, - "loss": 0.6469, + "loss": 0.6464, "step": 3500 }, { "epoch": 5.817506193228737, - "grad_norm": 7.765756607055664, + "grad_norm": 7.792623043060303, "learning_rate": 1.3633684210526316e-05, - "loss": 0.6642, + "loss": 0.6641, "step": 3525 }, { "epoch": 5.858794384805946, - "grad_norm": 7.8592915534973145, + "grad_norm": 7.796040058135986, "learning_rate": 1.3581052631578948e-05, - "loss": 0.6531, + "loss": 0.6529, "step": 3550 }, { "epoch": 5.900082576383154, - "grad_norm": 7.569016456604004, + "grad_norm": 7.527121543884277, "learning_rate": 1.352842105263158e-05, - "loss": 0.646, + "loss": 0.6458, "step": 3575 }, { "epoch": 5.941370767960363, - "grad_norm": 6.537998199462891, + "grad_norm": 6.546653747558594, "learning_rate": 1.3475789473684211e-05, - "loss": 0.609, + "loss": 0.6087, "step": 3600 }, { "epoch": 5.982658959537572, - "grad_norm": 7.915237903594971, + "grad_norm": 8.012529373168945, "learning_rate": 1.3423157894736843e-05, - "loss": 0.6304, + "loss": 0.6298, "step": 3625 }, { "epoch": 6.023121387283237, - "grad_norm": 7.4267683029174805, + "grad_norm": 7.391585350036621, "learning_rate": 1.3370526315789475e-05, - "loss": 0.5556, + "loss": 0.5543, "step": 3650 }, { "epoch": 6.064409578860446, - "grad_norm": 6.749971389770508, + "grad_norm": 6.664039134979248, "learning_rate": 1.3317894736842108e-05, - "loss": 0.5266, + "loss": 0.5252, "step": 3675 }, { "epoch": 6.105697770437655, - "grad_norm": 6.487602233886719, + "grad_norm": 6.451517105102539, "learning_rate": 1.326526315789474e-05, - "loss": 0.5717, + "loss": 0.5712, "step": 3700 }, { "epoch": 6.146985962014864, - "grad_norm": 6.367309093475342, + "grad_norm": 6.391648292541504, "learning_rate": 1.321263157894737e-05, - "loss": 0.5661, + "loss": 0.5664, "step": 3725 }, { "epoch": 6.188274153592072, - "grad_norm": 6.503593444824219, + "grad_norm": 6.524181842803955, "learning_rate": 1.3160000000000001e-05, - "loss": 0.552, + "loss": 0.5514, "step": 3750 }, { "epoch": 6.229562345169281, - "grad_norm": 6.83872127532959, + "grad_norm": 6.788370609283447, "learning_rate": 1.3107368421052633e-05, - "loss": 0.57, + "loss": 0.5693, "step": 3775 }, { "epoch": 6.27085053674649, - "grad_norm": 6.761080741882324, + "grad_norm": 6.802549839019775, "learning_rate": 1.3054736842105264e-05, "loss": 0.5332, "step": 3800 }, { "epoch": 6.312138728323699, - "grad_norm": 7.023492336273193, + "grad_norm": 6.864429473876953, "learning_rate": 1.3002105263157896e-05, - "loss": 0.5588, + "loss": 0.5582, "step": 3825 }, { "epoch": 6.353426919900908, - "grad_norm": 7.079690933227539, + "grad_norm": 7.168697357177734, "learning_rate": 1.2949473684210528e-05, "loss": 0.5471, "step": 3850 }, { "epoch": 6.394715111478117, - "grad_norm": 7.1418633460998535, + "grad_norm": 6.500988960266113, "learning_rate": 1.289684210526316e-05, - "loss": 0.5471, + "loss": 0.5472, "step": 3875 }, { "epoch": 6.436003303055326, - "grad_norm": 6.428386211395264, + "grad_norm": 6.378172874450684, "learning_rate": 1.2844210526315791e-05, - "loss": 0.5519, + "loss": 0.5512, "step": 3900 }, { "epoch": 6.477291494632535, - "grad_norm": 6.753655910491943, + "grad_norm": 6.865861415863037, "learning_rate": 1.279157894736842e-05, - "loss": 0.5745, + "loss": 0.5739, "step": 3925 }, { "epoch": 6.518579686209744, - "grad_norm": 6.977758407592773, + "grad_norm": 6.929198741912842, "learning_rate": 1.2738947368421052e-05, - "loss": 0.5657, + "loss": 0.5654, "step": 3950 }, { "epoch": 6.559867877786953, - "grad_norm": 6.839511394500732, + "grad_norm": 6.951797008514404, "learning_rate": 1.2686315789473684e-05, - "loss": 0.5432, + "loss": 0.5422, "step": 3975 }, { "epoch": 6.601156069364162, - "grad_norm": 7.744399547576904, + "grad_norm": 7.957674980163574, "learning_rate": 1.2633684210526316e-05, - "loss": 0.5473, + "loss": 0.5464, "step": 4000 }, { "epoch": 6.601156069364162, - "eval_loss": 1.056926965713501, - "eval_runtime": 696.4132, - "eval_samples_per_second": 15.194, - "eval_steps_per_second": 1.266, - "eval_wer": Infinity, + "eval_cer": 48.25066202010707, + "eval_loss": 1.0574802160263062, + "eval_runtime": 742.515, + "eval_samples_per_second": 14.25, + "eval_steps_per_second": 1.188, + "eval_wer": 95.4926921263555, "step": 4000 }, { "epoch": 6.642444260941371, - "grad_norm": 7.011012077331543, + "grad_norm": 7.094240665435791, "learning_rate": 1.2581052631578949e-05, - "loss": 0.5369, + "loss": 0.5365, "step": 4025 }, { "epoch": 6.68373245251858, - "grad_norm": 8.44078540802002, + "grad_norm": 8.255016326904297, "learning_rate": 1.252842105263158e-05, - "loss": 0.5776, + "loss": 0.5768, "step": 4050 }, { "epoch": 6.725020644095789, - "grad_norm": 7.306187629699707, + "grad_norm": 7.434171676635742, "learning_rate": 1.2475789473684212e-05, - "loss": 0.5903, + "loss": 0.5892, "step": 4075 }, { "epoch": 6.766308835672998, - "grad_norm": 6.952062606811523, + "grad_norm": 6.989074230194092, "learning_rate": 1.2423157894736844e-05, - "loss": 0.55, + "loss": 0.5494, "step": 4100 }, { "epoch": 6.807597027250207, - "grad_norm": 6.906418323516846, + "grad_norm": 6.808770656585693, "learning_rate": 1.2370526315789476e-05, - "loss": 0.528, + "loss": 0.5281, "step": 4125 }, { "epoch": 6.848885218827416, - "grad_norm": 7.448940277099609, + "grad_norm": 7.470989227294922, "learning_rate": 1.2317894736842107e-05, - "loss": 0.5505, + "loss": 0.5499, "step": 4150 }, { "epoch": 6.890173410404624, - "grad_norm": 7.480178356170654, + "grad_norm": 7.557090759277344, "learning_rate": 1.2265263157894737e-05, - "loss": 0.5409, + "loss": 0.5413, "step": 4175 }, { "epoch": 6.931461601981833, - "grad_norm": 7.500922679901123, + "grad_norm": 7.504119396209717, "learning_rate": 1.2212631578947369e-05, - "loss": 0.5136, + "loss": 0.513, "step": 4200 }, { "epoch": 6.972749793559042, - "grad_norm": 6.731492042541504, + "grad_norm": 6.75087308883667, "learning_rate": 1.216e-05, - "loss": 0.534, + "loss": 0.5343, "step": 4225 }, { "epoch": 7.013212221304707, - "grad_norm": 5.806182861328125, + "grad_norm": 5.9934515953063965, "learning_rate": 1.2107368421052632e-05, - "loss": 0.4876, + "loss": 0.4879, "step": 4250 }, { "epoch": 7.054500412881916, - "grad_norm": 6.7710089683532715, + "grad_norm": 6.634411334991455, "learning_rate": 1.2054736842105264e-05, - "loss": 0.4452, + "loss": 0.4454, "step": 4275 }, { "epoch": 7.095788604459124, - "grad_norm": 7.5482563972473145, + "grad_norm": 7.614193916320801, "learning_rate": 1.2002105263157895e-05, - "loss": 0.4388, + "loss": 0.4392, "step": 4300 }, { "epoch": 7.137076796036333, - "grad_norm": 5.3507537841796875, + "grad_norm": 5.324835777282715, "learning_rate": 1.1949473684210527e-05, - "loss": 0.4572, + "loss": 0.456, "step": 4325 }, { "epoch": 7.178364987613542, - "grad_norm": 6.261397838592529, + "grad_norm": 6.1424241065979, "learning_rate": 1.189684210526316e-05, "loss": 0.46, "step": 4350 }, { "epoch": 7.219653179190751, - "grad_norm": 7.379156589508057, + "grad_norm": 7.1868767738342285, "learning_rate": 1.1844210526315792e-05, - "loss": 0.4595, + "loss": 0.4591, "step": 4375 }, { "epoch": 7.26094137076796, - "grad_norm": 6.441304683685303, + "grad_norm": 6.41041374206543, "learning_rate": 1.1791578947368424e-05, - "loss": 0.4623, + "loss": 0.4618, "step": 4400 }, { "epoch": 7.302229562345169, - "grad_norm": 6.100840091705322, + "grad_norm": 6.095521450042725, "learning_rate": 1.1738947368421054e-05, - "loss": 0.4686, + "loss": 0.4692, "step": 4425 }, { "epoch": 7.343517753922378, - "grad_norm": 6.594038486480713, + "grad_norm": 6.523583889007568, "learning_rate": 1.1686315789473685e-05, - "loss": 0.4638, + "loss": 0.4631, "step": 4450 }, { "epoch": 7.384805945499587, - "grad_norm": 6.223761558532715, + "grad_norm": 6.192616939544678, "learning_rate": 1.1633684210526317e-05, - "loss": 0.4687, + "loss": 0.4689, "step": 4475 }, { "epoch": 7.426094137076796, - "grad_norm": 5.7157063484191895, + "grad_norm": 5.833901882171631, "learning_rate": 1.1581052631578948e-05, - "loss": 0.4475, + "loss": 0.4472, "step": 4500 }, { "epoch": 7.467382328654005, - "grad_norm": 7.293723106384277, + "grad_norm": 7.350675582885742, "learning_rate": 1.152842105263158e-05, - "loss": 0.4725, + "loss": 0.472, "step": 4525 }, { "epoch": 7.508670520231214, - "grad_norm": 6.591246604919434, + "grad_norm": 6.825387477874756, "learning_rate": 1.1475789473684212e-05, - "loss": 0.4783, + "loss": 0.4773, "step": 4550 }, { "epoch": 7.549958711808423, - "grad_norm": 7.094688415527344, + "grad_norm": 7.2338762283325195, "learning_rate": 1.1423157894736843e-05, - "loss": 0.4521, + "loss": 0.4515, "step": 4575 }, { "epoch": 7.591246903385632, - "grad_norm": 7.4666595458984375, + "grad_norm": 7.510733604431152, "learning_rate": 1.1370526315789473e-05, - "loss": 0.4679, + "loss": 0.4676, "step": 4600 }, { "epoch": 7.632535094962841, - "grad_norm": 6.891570568084717, + "grad_norm": 6.926485538482666, "learning_rate": 1.1317894736842105e-05, - "loss": 0.469, + "loss": 0.4692, "step": 4625 }, { "epoch": 7.673823286540049, - "grad_norm": 7.736660003662109, + "grad_norm": 7.200405120849609, "learning_rate": 1.1265263157894737e-05, "loss": 0.4874, "step": 4650 }, { "epoch": 7.715111478117258, - "grad_norm": 7.7448248863220215, + "grad_norm": 7.665420055389404, "learning_rate": 1.1212631578947368e-05, - "loss": 0.4516, + "loss": 0.4515, "step": 4675 }, { "epoch": 7.756399669694467, - "grad_norm": 6.311560153961182, + "grad_norm": 6.377760410308838, "learning_rate": 1.1160000000000002e-05, - "loss": 0.4642, + "loss": 0.4639, "step": 4700 }, { "epoch": 7.797687861271676, - "grad_norm": 7.201409816741943, + "grad_norm": 7.134575366973877, "learning_rate": 1.1107368421052633e-05, - "loss": 0.4545, + "loss": 0.4543, "step": 4725 }, { "epoch": 7.838976052848885, - "grad_norm": 6.123779773712158, + "grad_norm": 6.138679027557373, "learning_rate": 1.1054736842105265e-05, - "loss": 0.4287, + "loss": 0.4276, "step": 4750 }, { "epoch": 7.880264244426094, - "grad_norm": 7.237743377685547, + "grad_norm": 6.741207122802734, "learning_rate": 1.1002105263157896e-05, - "loss": 0.4993, + "loss": 0.4975, "step": 4775 }, { "epoch": 7.921552436003303, - "grad_norm": 6.445917129516602, + "grad_norm": 6.514609336853027, "learning_rate": 1.0949473684210528e-05, - "loss": 0.4843, + "loss": 0.4841, "step": 4800 }, { "epoch": 7.962840627580512, - "grad_norm": 6.234178066253662, + "grad_norm": 6.2714009284973145, "learning_rate": 1.089684210526316e-05, - "loss": 0.4657, + "loss": 0.4658, "step": 4825 }, { "epoch": 8.003303055326176, - "grad_norm": 7.247951507568359, + "grad_norm": 7.009129524230957, "learning_rate": 1.084421052631579e-05, - "loss": 0.4923, + "loss": 0.4919, "step": 4850 }, { "epoch": 8.044591246903385, - "grad_norm": 5.9410223960876465, + "grad_norm": 6.334277153015137, "learning_rate": 1.0791578947368421e-05, - "loss": 0.3872, + "loss": 0.3873, "step": 4875 }, { "epoch": 8.085879438480594, - "grad_norm": 6.340057849884033, + "grad_norm": 6.403853416442871, "learning_rate": 1.0738947368421053e-05, - "loss": 0.3966, + "loss": 0.3961, "step": 4900 }, { "epoch": 8.127167630057803, - "grad_norm": 6.049722194671631, + "grad_norm": 6.143258571624756, "learning_rate": 1.0686315789473685e-05, - "loss": 0.3706, + "loss": 0.3699, "step": 4925 }, { "epoch": 8.168455821635012, - "grad_norm": 5.964404582977295, + "grad_norm": 6.0696539878845215, "learning_rate": 1.0633684210526316e-05, - "loss": 0.3854, + "loss": 0.3858, "step": 4950 }, { "epoch": 8.209744013212221, - "grad_norm": 6.412349224090576, + "grad_norm": 6.363874435424805, "learning_rate": 1.0581052631578948e-05, - "loss": 0.3923, + "loss": 0.392, "step": 4975 }, { "epoch": 8.25103220478943, - "grad_norm": 6.039672374725342, + "grad_norm": 5.990923881530762, "learning_rate": 1.052842105263158e-05, - "loss": 0.4191, + "loss": 0.4182, "step": 5000 }, { "epoch": 8.25103220478943, - "eval_loss": 1.0567598342895508, - "eval_runtime": 694.1236, - "eval_samples_per_second": 15.244, - "eval_steps_per_second": 1.271, - "eval_wer": Infinity, + "eval_cer": 47.292882280788234, + "eval_loss": 1.0573757886886597, + "eval_runtime": 732.0504, + "eval_samples_per_second": 14.454, + "eval_steps_per_second": 1.205, + "eval_wer": 96.23762376237623, "step": 5000 }, { "epoch": 8.29232039636664, - "grad_norm": 6.2119011878967285, + "grad_norm": 5.979728698730469, "learning_rate": 1.047578947368421e-05, - "loss": 0.3807, + "loss": 0.38, "step": 5025 }, { "epoch": 8.333608587943848, - "grad_norm": 6.253497123718262, + "grad_norm": 6.235442161560059, "learning_rate": 1.0423157894736844e-05, - "loss": 0.3827, + "loss": 0.3823, "step": 5050 }, { "epoch": 8.374896779521057, - "grad_norm": 6.059364318847656, + "grad_norm": 6.0435471534729, "learning_rate": 1.0370526315789476e-05, - "loss": 0.389, + "loss": 0.3889, "step": 5075 }, { "epoch": 8.416184971098266, - "grad_norm": 5.26777982711792, + "grad_norm": 5.2331624031066895, "learning_rate": 1.0317894736842106e-05, - "loss": 0.3998, + "loss": 0.4002, "step": 5100 }, { "epoch": 8.457473162675475, - "grad_norm": 5.529355525970459, + "grad_norm": 5.645880699157715, "learning_rate": 1.0265263157894738e-05, - "loss": 0.3954, + "loss": 0.395, "step": 5125 }, { "epoch": 8.498761354252684, - "grad_norm": 6.751101016998291, + "grad_norm": 6.611478805541992, "learning_rate": 1.021263157894737e-05, "loss": 0.3964, "step": 5150 }, { "epoch": 8.540049545829893, - "grad_norm": 5.405825138092041, + "grad_norm": 5.404575347900391, "learning_rate": 1.0160000000000001e-05, - "loss": 0.389, + "loss": 0.3886, "step": 5175 }, { "epoch": 8.581337737407102, - "grad_norm": 7.287736415863037, + "grad_norm": 7.283960819244385, "learning_rate": 1.0107368421052633e-05, - "loss": 0.4196, + "loss": 0.4193, "step": 5200 }, { "epoch": 8.622625928984311, - "grad_norm": 6.384584903717041, + "grad_norm": 6.334190368652344, "learning_rate": 1.0054736842105264e-05, - "loss": 0.4133, + "loss": 0.4129, "step": 5225 }, { "epoch": 8.66391412056152, - "grad_norm": 6.451061725616455, + "grad_norm": 6.392534255981445, "learning_rate": 1.0002105263157896e-05, - "loss": 0.3837, + "loss": 0.3832, "step": 5250 }, { "epoch": 8.705202312138729, - "grad_norm": 5.920280933380127, + "grad_norm": 6.049492359161377, "learning_rate": 9.949473684210526e-06, - "loss": 0.4004, + "loss": 0.4008, "step": 5275 }, { "epoch": 8.746490503715938, - "grad_norm": 5.891169548034668, + "grad_norm": 5.881133556365967, "learning_rate": 9.89684210526316e-06, - "loss": 0.3932, + "loss": 0.3927, "step": 5300 }, { "epoch": 8.787778695293147, - "grad_norm": 6.570876598358154, + "grad_norm": 6.470844745635986, "learning_rate": 9.84421052631579e-06, - "loss": 0.4032, + "loss": 0.4036, "step": 5325 }, { "epoch": 8.829066886870356, - "grad_norm": 7.074426174163818, + "grad_norm": 6.188573837280273, "learning_rate": 9.791578947368422e-06, - "loss": 0.3951, + "loss": 0.394, "step": 5350 }, { "epoch": 8.870355078447563, - "grad_norm": 6.7733564376831055, + "grad_norm": 6.691900730133057, "learning_rate": 9.738947368421054e-06, - "loss": 0.3992, + "loss": 0.399, "step": 5375 }, { "epoch": 8.911643270024772, - "grad_norm": 6.8463969230651855, + "grad_norm": 6.903393745422363, "learning_rate": 9.686315789473684e-06, - "loss": 0.4004, + "loss": 0.3999, "step": 5400 }, { "epoch": 8.952931461601981, - "grad_norm": 6.444080352783203, + "grad_norm": 6.473091125488281, "learning_rate": 9.633684210526316e-06, - "loss": 0.3844, + "loss": 0.3833, "step": 5425 }, { "epoch": 8.99421965317919, - "grad_norm": 6.648684024810791, + "grad_norm": 6.615601062774658, "learning_rate": 9.581052631578947e-06, - "loss": 0.3962, + "loss": 0.3961, "step": 5450 }, { "epoch": 9.034682080924856, - "grad_norm": 5.462916851043701, + "grad_norm": 5.409454345703125, "learning_rate": 9.52842105263158e-06, - "loss": 0.3186, + "loss": 0.3184, "step": 5475 }, { "epoch": 9.075970272502065, - "grad_norm": 4.91270112991333, + "grad_norm": 4.625083923339844, "learning_rate": 9.475789473684212e-06, - "loss": 0.3091, + "loss": 0.308, "step": 5500 }, { "epoch": 9.117258464079274, - "grad_norm": 6.3385467529296875, + "grad_norm": 6.347021579742432, "learning_rate": 9.423157894736842e-06, - "loss": 0.3088, + "loss": 0.3087, "step": 5525 }, { "epoch": 9.158546655656481, - "grad_norm": 6.092479228973389, + "grad_norm": 6.036218166351318, "learning_rate": 9.370526315789474e-06, - "loss": 0.3366, + "loss": 0.3361, "step": 5550 }, { "epoch": 9.19983484723369, - "grad_norm": 6.2821364402771, + "grad_norm": 6.974196910858154, "learning_rate": 9.317894736842105e-06, - "loss": 0.328, + "loss": 0.3277, "step": 5575 }, { "epoch": 9.2411230388109, - "grad_norm": 6.607597827911377, + "grad_norm": 6.37134313583374, "learning_rate": 9.265263157894737e-06, - "loss": 0.3237, + "loss": 0.3227, "step": 5600 }, { "epoch": 9.282411230388108, - "grad_norm": 6.044376850128174, + "grad_norm": 5.647594928741455, "learning_rate": 9.21263157894737e-06, - "loss": 0.3405, + "loss": 0.3408, "step": 5625 }, { "epoch": 9.323699421965317, - "grad_norm": 5.687169551849365, + "grad_norm": 5.690819263458252, "learning_rate": 9.16e-06, - "loss": 0.3433, + "loss": 0.3431, "step": 5650 }, { "epoch": 9.364987613542526, - "grad_norm": 6.336760997772217, + "grad_norm": 6.394257545471191, "learning_rate": 9.107368421052632e-06, - "loss": 0.3681, + "loss": 0.3684, "step": 5675 }, { "epoch": 9.406275805119735, - "grad_norm": 5.349738597869873, + "grad_norm": 5.489740371704102, "learning_rate": 9.054736842105264e-06, - "loss": 0.3414, + "loss": 0.3401, "step": 5700 }, { "epoch": 9.447563996696944, - "grad_norm": 5.843466758728027, + "grad_norm": 5.952576637268066, "learning_rate": 9.002105263157895e-06, - "loss": 0.3494, + "loss": 0.3485, "step": 5725 }, { "epoch": 9.488852188274153, - "grad_norm": 5.646897315979004, + "grad_norm": 5.653362274169922, "learning_rate": 8.949473684210527e-06, - "loss": 0.3491, + "loss": 0.349, "step": 5750 }, { "epoch": 9.530140379851362, - "grad_norm": 6.2915472984313965, + "grad_norm": 6.067948341369629, "learning_rate": 8.896842105263159e-06, - "loss": 0.3627, + "loss": 0.3629, "step": 5775 }, { "epoch": 9.571428571428571, - "grad_norm": 5.424402236938477, + "grad_norm": 5.475063800811768, "learning_rate": 8.84421052631579e-06, - "loss": 0.3203, + "loss": 0.3207, "step": 5800 }, { "epoch": 9.61271676300578, - "grad_norm": 6.5985260009765625, + "grad_norm": 6.7783308029174805, "learning_rate": 8.791578947368422e-06, - "loss": 0.3449, + "loss": 0.3454, "step": 5825 }, { "epoch": 9.65400495458299, - "grad_norm": 5.538530349731445, + "grad_norm": 5.636470317840576, "learning_rate": 8.738947368421053e-06, - "loss": 0.3407, + "loss": 0.3408, "step": 5850 }, { "epoch": 9.695293146160198, - "grad_norm": 5.327653408050537, + "grad_norm": 5.290091037750244, "learning_rate": 8.686315789473685e-06, - "loss": 0.3235, + "loss": 0.3229, "step": 5875 }, { "epoch": 9.736581337737407, - "grad_norm": 6.340036392211914, + "grad_norm": 5.795228481292725, "learning_rate": 8.633684210526317e-06, - "loss": 0.3595, + "loss": 0.36, "step": 5900 }, { "epoch": 9.777869529314616, - "grad_norm": 5.395780086517334, + "grad_norm": 5.348466396331787, "learning_rate": 8.581052631578948e-06, - "loss": 0.3368, + "loss": 0.3358, "step": 5925 }, { "epoch": 9.819157720891825, - "grad_norm": 5.803914546966553, + "grad_norm": 5.813995361328125, "learning_rate": 8.528421052631578e-06, - "loss": 0.3497, + "loss": 0.349, "step": 5950 }, { "epoch": 9.860445912469034, - "grad_norm": 6.23885440826416, + "grad_norm": 5.806293964385986, "learning_rate": 8.475789473684212e-06, - "loss": 0.3504, + "loss": 0.3493, "step": 5975 }, { "epoch": 9.901734104046243, - "grad_norm": 6.402750015258789, - "learning_rate": 8.425263157894737e-06, - "loss": 0.3167, + "grad_norm": 6.348217487335205, + "learning_rate": 8.423157894736843e-06, + "loss": 0.3164, "step": 6000 }, { "epoch": 9.901734104046243, - "eval_loss": 1.060942530632019, - "eval_runtime": 695.1498, - "eval_samples_per_second": 15.221, - "eval_steps_per_second": 1.269, - "eval_wer": Infinity, + "eval_cer": 49.44172503525042, + "eval_loss": 1.0615819692611694, + "eval_runtime": 712.2161, + "eval_samples_per_second": 14.856, + "eval_steps_per_second": 1.238, + "eval_wer": 96.38849599245638, "step": 6000 }, { "epoch": 9.943022295623452, - "grad_norm": 5.562705993652344, - "learning_rate": 8.372631578947368e-06, - "loss": 0.3252, + "grad_norm": 5.729813575744629, + "learning_rate": 8.370526315789475e-06, + "loss": 0.3253, "step": 6025 }, { "epoch": 9.984310487200661, - "grad_norm": 7.461434364318848, - "learning_rate": 8.32e-06, - "loss": 0.344, + "grad_norm": 7.191168785095215, + "learning_rate": 8.317894736842107e-06, + "loss": 0.3433, "step": 6050 }, { "epoch": 10.024772914946325, - "grad_norm": 5.992921352386475, - "learning_rate": 8.267368421052632e-06, + "grad_norm": 6.767040729522705, + "learning_rate": 8.265263157894737e-06, "loss": 0.2875, "step": 6075 }, { "epoch": 10.066061106523534, - "grad_norm": 4.986021995544434, - "learning_rate": 8.214736842105265e-06, + "grad_norm": 5.048575401306152, + "learning_rate": 8.212631578947368e-06, "loss": 0.2773, "step": 6100 }, { "epoch": 10.107349298100743, - "grad_norm": 4.8151092529296875, - "learning_rate": 8.162105263157895e-06, - "loss": 0.2886, + "grad_norm": 4.836114883422852, + "learning_rate": 8.16e-06, + "loss": 0.2889, "step": 6125 }, { "epoch": 10.148637489677952, - "grad_norm": 4.7961883544921875, - "learning_rate": 8.109473684210527e-06, - "loss": 0.3002, + "grad_norm": 4.830049514770508, + "learning_rate": 8.107368421052633e-06, + "loss": 0.3004, "step": 6150 }, { "epoch": 10.189925681255161, - "grad_norm": 5.7180399894714355, - "learning_rate": 8.056842105263158e-06, - "loss": 0.2995, + "grad_norm": 5.718193054199219, + "learning_rate": 8.054736842105265e-06, + "loss": 0.2989, "step": 6175 }, { "epoch": 10.23121387283237, - "grad_norm": 5.287256717681885, - "learning_rate": 8.00421052631579e-06, - "loss": 0.2723, + "grad_norm": 7.24569034576416, + "learning_rate": 8.002105263157895e-06, + "loss": 0.2712, "step": 6200 }, { "epoch": 10.27250206440958, - "grad_norm": 6.107348442077637, - "learning_rate": 7.951578947368421e-06, - "loss": 0.2909, + "grad_norm": 6.074246883392334, + "learning_rate": 7.949473684210526e-06, + "loss": 0.2899, "step": 6225 }, { "epoch": 10.313790255986788, - "grad_norm": 5.607096195220947, - "learning_rate": 7.898947368421053e-06, - "loss": 0.3019, + "grad_norm": 5.600971698760986, + "learning_rate": 7.896842105263158e-06, + "loss": 0.3, "step": 6250 }, { "epoch": 10.355078447563997, - "grad_norm": 5.941197872161865, - "learning_rate": 7.846315789473685e-06, - "loss": 0.29, + "grad_norm": 6.002727508544922, + "learning_rate": 7.84421052631579e-06, + "loss": 0.2895, "step": 6275 }, { "epoch": 10.396366639141206, - "grad_norm": 5.736889362335205, + "grad_norm": 5.616031169891357, "learning_rate": 7.793684210526316e-06, - "loss": 0.3138, + "loss": 0.3123, "step": 6300 }, { "epoch": 10.437654830718415, - "grad_norm": 5.2913126945495605, + "grad_norm": 5.227489948272705, "learning_rate": 7.741052631578948e-06, - "loss": 0.28, + "loss": 0.2796, "step": 6325 }, { "epoch": 10.478943022295624, - "grad_norm": 5.760861396789551, + "grad_norm": 5.6762261390686035, "learning_rate": 7.68842105263158e-06, - "loss": 0.3069, + "loss": 0.3061, "step": 6350 }, { "epoch": 10.520231213872833, - "grad_norm": 4.886830806732178, + "grad_norm": 4.9320197105407715, "learning_rate": 7.635789473684211e-06, - "loss": 0.2785, + "loss": 0.2779, "step": 6375 }, { "epoch": 10.561519405450042, - "grad_norm": 5.5294389724731445, + "grad_norm": 5.518511772155762, "learning_rate": 7.583157894736842e-06, - "loss": 0.2862, + "loss": 0.2861, "step": 6400 }, { "epoch": 10.602807597027251, - "grad_norm": 5.370204448699951, + "grad_norm": 5.470175266265869, "learning_rate": 7.5305263157894745e-06, - "loss": 0.2794, + "loss": 0.279, "step": 6425 }, { "epoch": 10.64409578860446, - "grad_norm": 4.194797039031982, + "grad_norm": 4.248904228210449, "learning_rate": 7.477894736842106e-06, - "loss": 0.3064, + "loss": 0.3054, "step": 6450 }, { "epoch": 10.685383980181667, - "grad_norm": 6.214608669281006, + "grad_norm": 6.3203935623168945, "learning_rate": 7.425263157894738e-06, - "loss": 0.3177, + "loss": 0.3168, "step": 6475 }, { "epoch": 10.726672171758876, - "grad_norm": 5.77274227142334, + "grad_norm": 5.901068687438965, "learning_rate": 7.3726315789473694e-06, - "loss": 0.31, + "loss": 0.3103, "step": 6500 }, { "epoch": 10.767960363336085, - "grad_norm": 6.4990105628967285, + "grad_norm": 6.379432678222656, "learning_rate": 7.32e-06, - "loss": 0.2716, + "loss": 0.2715, "step": 6525 }, { "epoch": 10.809248554913294, - "grad_norm": 5.706321716308594, + "grad_norm": 5.802628040313721, "learning_rate": 7.267368421052632e-06, - "loss": 0.2983, + "loss": 0.2981, "step": 6550 }, { "epoch": 10.850536746490503, - "grad_norm": 6.846646308898926, + "grad_norm": 6.295346260070801, "learning_rate": 7.2147368421052635e-06, - "loss": 0.2678, + "loss": 0.2676, "step": 6575 }, { "epoch": 10.891824938067712, - "grad_norm": 5.539628982543945, + "grad_norm": 5.411930561065674, "learning_rate": 7.162105263157896e-06, "loss": 0.2809, "step": 6600 }, { "epoch": 10.933113129644921, - "grad_norm": 5.272496700286865, + "grad_norm": 5.577861309051514, "learning_rate": 7.109473684210528e-06, "loss": 0.2473, "step": 6625 }, { "epoch": 10.97440132122213, - "grad_norm": 5.260607719421387, + "grad_norm": 5.272998332977295, "learning_rate": 7.056842105263158e-06, "loss": 0.2931, "step": 6650 }, { "epoch": 11.014863748967795, - "grad_norm": 5.241586685180664, + "grad_norm": 5.220921993255615, "learning_rate": 7.00421052631579e-06, - "loss": 0.2771, + "loss": 0.2772, "step": 6675 }, { "epoch": 11.056151940545003, - "grad_norm": 4.1120781898498535, + "grad_norm": 4.028062343597412, "learning_rate": 6.951578947368422e-06, "loss": 0.2511, "step": 6700 }, { "epoch": 11.097440132122212, - "grad_norm": 5.060145378112793, + "grad_norm": 4.835925579071045, "learning_rate": 6.8989473684210525e-06, "loss": 0.2502, "step": 6725 }, { "epoch": 11.138728323699421, - "grad_norm": 6.010138988494873, + "grad_norm": 5.920368194580078, "learning_rate": 6.846315789473684e-06, - "loss": 0.2442, + "loss": 0.2438, "step": 6750 }, { "epoch": 11.18001651527663, - "grad_norm": 5.282010078430176, + "grad_norm": 5.114356517791748, "learning_rate": 6.793684210526317e-06, - "loss": 0.2485, + "loss": 0.2478, "step": 6775 }, { "epoch": 11.22130470685384, - "grad_norm": 4.690225601196289, + "grad_norm": 4.871094703674316, "learning_rate": 6.741052631578948e-06, - "loss": 0.2506, + "loss": 0.2504, "step": 6800 }, { "epoch": 11.262592898431048, - "grad_norm": 3.9595465660095215, + "grad_norm": 4.20628547668457, "learning_rate": 6.68842105263158e-06, - "loss": 0.2325, + "loss": 0.2316, "step": 6825 }, { "epoch": 11.303881090008257, - "grad_norm": 5.889693737030029, + "grad_norm": 5.95331335067749, "learning_rate": 6.635789473684211e-06, - "loss": 0.2527, + "loss": 0.2524, "step": 6850 }, { "epoch": 11.345169281585466, - "grad_norm": 5.33756685256958, + "grad_norm": 5.462084770202637, "learning_rate": 6.583157894736842e-06, "loss": 0.2488, "step": 6875 }, { "epoch": 11.386457473162675, - "grad_norm": 5.049898147583008, + "grad_norm": 4.980792999267578, "learning_rate": 6.530526315789474e-06, - "loss": 0.2355, + "loss": 0.2344, "step": 6900 }, { "epoch": 11.427745664739884, - "grad_norm": 5.452274799346924, + "grad_norm": 5.869212627410889, "learning_rate": 6.477894736842106e-06, - "loss": 0.249, + "loss": 0.2484, "step": 6925 }, { "epoch": 11.469033856317093, - "grad_norm": 4.464880466461182, + "grad_norm": 5.4397873878479, "learning_rate": 6.425263157894738e-06, "loss": 0.2523, "step": 6950 }, { "epoch": 11.510322047894302, - "grad_norm": 5.923793792724609, + "grad_norm": 5.890199661254883, "learning_rate": 6.372631578947369e-06, - "loss": 0.2564, + "loss": 0.2565, "step": 6975 }, { "epoch": 11.551610239471511, - "grad_norm": 4.821681976318359, + "grad_norm": 4.83267879486084, "learning_rate": 6.3200000000000005e-06, - "loss": 0.2322, + "loss": 0.2319, "step": 7000 }, { "epoch": 11.551610239471511, - "eval_loss": 1.0933067798614502, - "eval_runtime": 721.5402, - "eval_samples_per_second": 14.664, - "eval_steps_per_second": 1.222, - "eval_wer": Infinity, + "eval_cer": 49.55349466371671, + "eval_loss": 1.0929207801818848, + "eval_runtime": 718.5035, + "eval_samples_per_second": 14.726, + "eval_steps_per_second": 1.228, + "eval_wer": 96.25648279113625, "step": 7000 }, { "epoch": 11.59289843104872, - "grad_norm": 6.9122748374938965, + "grad_norm": 6.01038932800293, "learning_rate": 6.267368421052632e-06, - "loss": 0.2701, + "loss": 0.2703, "step": 7025 }, { "epoch": 11.63418662262593, - "grad_norm": 4.586194038391113, + "grad_norm": 4.515256404876709, "learning_rate": 6.214736842105264e-06, - "loss": 0.2496, + "loss": 0.2497, "step": 7050 }, { "epoch": 11.675474814203138, - "grad_norm": 5.90902853012085, + "grad_norm": 5.914691925048828, "learning_rate": 6.1621052631578945e-06, - "loss": 0.2384, + "loss": 0.2374, "step": 7075 }, { "epoch": 11.716763005780347, - "grad_norm": 5.328728199005127, + "grad_norm": 5.570655822753906, "learning_rate": 6.109473684210527e-06, "loss": 0.253, "step": 7100 }, { "epoch": 11.758051197357556, - "grad_norm": 6.127770900726318, + "grad_norm": 5.9783244132995605, "learning_rate": 6.056842105263159e-06, - "loss": 0.2359, + "loss": 0.2364, "step": 7125 }, { "epoch": 11.799339388934765, - "grad_norm": 5.675428867340088, + "grad_norm": 5.595921516418457, "learning_rate": 6.00421052631579e-06, - "loss": 0.2383, + "loss": 0.2375, "step": 7150 }, { "epoch": 11.840627580511974, - "grad_norm": 4.921765327453613, + "grad_norm": 4.966527938842773, "learning_rate": 5.951578947368422e-06, - "loss": 0.2648, + "loss": 0.2652, "step": 7175 }, { "epoch": 11.881915772089183, - "grad_norm": 4.198760986328125, + "grad_norm": 4.243671894073486, "learning_rate": 5.898947368421053e-06, - "loss": 0.2351, + "loss": 0.2348, "step": 7200 }, { "epoch": 11.923203963666392, - "grad_norm": 4.786856174468994, + "grad_norm": 4.7744669914245605, "learning_rate": 5.846315789473684e-06, - "loss": 0.2468, + "loss": 0.2461, "step": 7225 }, { "epoch": 11.964492155243601, - "grad_norm": 4.955503463745117, + "grad_norm": 4.887286186218262, "learning_rate": 5.793684210526316e-06, - "loss": 0.2471, + "loss": 0.2466, "step": 7250 }, { "epoch": 12.004954582989265, - "grad_norm": 4.753754138946533, + "grad_norm": 4.767704963684082, "learning_rate": 5.7410526315789485e-06, - "loss": 0.2625, + "loss": 0.2628, "step": 7275 }, { "epoch": 12.046242774566474, - "grad_norm": 3.7155563831329346, + "grad_norm": 3.731494426727295, "learning_rate": 5.68842105263158e-06, - "loss": 0.2123, + "loss": 0.2121, "step": 7300 }, { "epoch": 12.087530966143683, - "grad_norm": 4.637264251708984, + "grad_norm": 4.329957962036133, "learning_rate": 5.635789473684211e-06, - "loss": 0.209, + "loss": 0.208, "step": 7325 }, { "epoch": 12.128819157720892, - "grad_norm": 5.065235614776611, + "grad_norm": 4.992996692657471, "learning_rate": 5.5831578947368425e-06, - "loss": 0.2016, + "loss": 0.2007, "step": 7350 }, { "epoch": 12.170107349298101, - "grad_norm": 4.761934280395508, + "grad_norm": 4.756143093109131, "learning_rate": 5.530526315789474e-06, - "loss": 0.2081, + "loss": 0.2084, "step": 7375 }, { "epoch": 12.21139554087531, - "grad_norm": 5.17978048324585, + "grad_norm": 5.0002827644348145, "learning_rate": 5.477894736842105e-06, - "loss": 0.2288, + "loss": 0.2285, "step": 7400 }, { "epoch": 12.25268373245252, - "grad_norm": 3.554265260696411, + "grad_norm": 3.5156126022338867, "learning_rate": 5.425263157894737e-06, - "loss": 0.2143, + "loss": 0.214, "step": 7425 }, { "epoch": 12.293971924029728, - "grad_norm": 5.236491680145264, + "grad_norm": 4.908341884613037, "learning_rate": 5.372631578947369e-06, - "loss": 0.2025, + "loss": 0.2023, "step": 7450 }, { "epoch": 12.335260115606937, - "grad_norm": 5.198639392852783, + "grad_norm": 5.120205879211426, "learning_rate": 5.320000000000001e-06, "loss": 0.2199, "step": 7475 }, { "epoch": 12.376548307184144, - "grad_norm": 5.509646892547607, + "grad_norm": 5.24783992767334, "learning_rate": 5.267368421052632e-06, - "loss": 0.1932, + "loss": 0.1918, "step": 7500 }, { "epoch": 12.417836498761353, - "grad_norm": 4.569394588470459, + "grad_norm": 4.663553714752197, "learning_rate": 5.214736842105263e-06, "loss": 0.2018, "step": 7525 }, { "epoch": 12.459124690338562, - "grad_norm": 4.686092376708984, + "grad_norm": 4.562893867492676, "learning_rate": 5.162105263157895e-06, - "loss": 0.2148, + "loss": 0.2141, "step": 7550 }, { "epoch": 12.500412881915771, - "grad_norm": 5.779174327850342, + "grad_norm": 5.8733062744140625, "learning_rate": 5.1094736842105264e-06, - "loss": 0.2362, + "loss": 0.2359, "step": 7575 }, { "epoch": 12.54170107349298, - "grad_norm": 7.2701873779296875, + "grad_norm": 7.215207576751709, "learning_rate": 5.056842105263158e-06, - "loss": 0.2421, + "loss": 0.2422, "step": 7600 }, { "epoch": 12.58298926507019, - "grad_norm": 4.832854270935059, + "grad_norm": 4.633525371551514, "learning_rate": 5.0042105263157906e-06, - "loss": 0.219, + "loss": 0.2188, "step": 7625 }, { "epoch": 12.624277456647398, - "grad_norm": 5.803878307342529, + "grad_norm": 5.8343305587768555, "learning_rate": 4.951578947368421e-06, - "loss": 0.2172, + "loss": 0.2169, "step": 7650 }, { "epoch": 12.665565648224607, - "grad_norm": 4.021119594573975, + "grad_norm": 3.9599239826202393, "learning_rate": 4.898947368421053e-06, - "loss": 0.227, + "loss": 0.2275, "step": 7675 }, { "epoch": 12.706853839801816, - "grad_norm": 5.169255256652832, + "grad_norm": 5.128805637359619, "learning_rate": 4.846315789473685e-06, - "loss": 0.21, + "loss": 0.2095, "step": 7700 }, { "epoch": 12.748142031379025, - "grad_norm": 4.798264503479004, + "grad_norm": 4.6408162117004395, "learning_rate": 4.793684210526316e-06, - "loss": 0.1984, + "loss": 0.1983, "step": 7725 }, { "epoch": 12.789430222956234, - "grad_norm": 5.631106853485107, + "grad_norm": 4.684074878692627, "learning_rate": 4.741052631578948e-06, - "loss": 0.225, + "loss": 0.2247, "step": 7750 }, { "epoch": 12.830718414533443, - "grad_norm": 4.223946571350098, + "grad_norm": 4.256105422973633, "learning_rate": 4.6884210526315795e-06, - "loss": 0.2163, + "loss": 0.2164, "step": 7775 }, { "epoch": 12.872006606110652, - "grad_norm": 4.958042621612549, + "grad_norm": 4.794992446899414, "learning_rate": 4.63578947368421e-06, - "loss": 0.2131, + "loss": 0.2133, "step": 7800 }, { "epoch": 12.913294797687861, - "grad_norm": 6.063140392303467, + "grad_norm": 6.137476921081543, "learning_rate": 4.583157894736843e-06, - "loss": 0.2443, + "loss": 0.2434, "step": 7825 }, { "epoch": 12.95458298926507, - "grad_norm": 4.037540435791016, + "grad_norm": 4.048770427703857, "learning_rate": 4.5305263157894744e-06, - "loss": 0.2033, + "loss": 0.2031, "step": 7850 }, { "epoch": 12.995871180842279, - "grad_norm": 4.495209217071533, + "grad_norm": 4.465664863586426, "learning_rate": 4.477894736842105e-06, - "loss": 0.2307, + "loss": 0.2296, "step": 7875 }, { "epoch": 13.036333608587944, - "grad_norm": 4.494256496429443, + "grad_norm": 4.5229668617248535, "learning_rate": 4.425263157894737e-06, - "loss": 0.1894, + "loss": 0.189, "step": 7900 }, { "epoch": 13.077621800165153, - "grad_norm": 3.877058744430542, + "grad_norm": 3.7040534019470215, "learning_rate": 4.3726315789473685e-06, - "loss": 0.195, + "loss": 0.1951, "step": 7925 }, { "epoch": 13.118909991742361, - "grad_norm": 4.717246055603027, + "grad_norm": 4.726925373077393, "learning_rate": 4.32e-06, - "loss": 0.1745, + "loss": 0.1743, "step": 7950 }, { "epoch": 13.16019818331957, - "grad_norm": 4.590612411499023, + "grad_norm": 4.432371616363525, "learning_rate": 4.267368421052632e-06, - "loss": 0.195, + "loss": 0.1948, "step": 7975 }, { "epoch": 13.20148637489678, - "grad_norm": 6.558470249176025, + "grad_norm": 6.360472679138184, "learning_rate": 4.214736842105263e-06, - "loss": 0.1913, + "loss": 0.1899, "step": 8000 }, { "epoch": 13.20148637489678, - "eval_loss": 1.1226582527160645, - "eval_runtime": 703.5225, - "eval_samples_per_second": 15.04, - "eval_steps_per_second": 1.254, - "eval_wer": Infinity, + "eval_cer": 48.673667075533345, + "eval_loss": 1.1223334074020386, + "eval_runtime": 698.1183, + "eval_samples_per_second": 15.156, + "eval_steps_per_second": 1.263, + "eval_wer": 97.27487034417727, "step": 8000 }, { "epoch": 13.242774566473988, - "grad_norm": 4.316694736480713, + "grad_norm": 4.355146408081055, "learning_rate": 4.162105263157895e-06, - "loss": 0.2075, + "loss": 0.207, "step": 8025 }, { "epoch": 13.284062758051197, - "grad_norm": 4.108456611633301, + "grad_norm": 4.026797771453857, "learning_rate": 4.109473684210527e-06, - "loss": 0.1654, + "loss": 0.1656, "step": 8050 }, { "epoch": 13.325350949628406, - "grad_norm": 3.923902750015259, + "grad_norm": 4.001533031463623, "learning_rate": 4.056842105263158e-06, - "loss": 0.19, + "loss": 0.1899, "step": 8075 }, { "epoch": 13.366639141205615, - "grad_norm": 3.792008638381958, + "grad_norm": 3.8727056980133057, "learning_rate": 4.00421052631579e-06, - "loss": 0.1924, + "loss": 0.1919, "step": 8100 }, { "epoch": 13.407927332782824, - "grad_norm": 4.64762020111084, + "grad_norm": 4.644744873046875, "learning_rate": 3.951578947368422e-06, - "loss": 0.1988, + "loss": 0.1987, "step": 8125 }, { "epoch": 13.449215524360033, - "grad_norm": 4.5705246925354, + "grad_norm": 4.7693071365356445, "learning_rate": 3.898947368421052e-06, - "loss": 0.1857, + "loss": 0.185, "step": 8150 }, { "epoch": 13.490503715937242, - "grad_norm": 4.8831610679626465, + "grad_norm": 5.065967082977295, "learning_rate": 3.846315789473685e-06, - "loss": 0.1813, + "loss": 0.1811, "step": 8175 }, { "epoch": 13.531791907514451, - "grad_norm": 3.7012646198272705, + "grad_norm": 3.682410955429077, "learning_rate": 3.793684210526316e-06, - "loss": 0.212, + "loss": 0.2118, "step": 8200 }, { "epoch": 13.57308009909166, - "grad_norm": 4.698606967926025, + "grad_norm": 4.680816650390625, "learning_rate": 3.7410526315789473e-06, - "loss": 0.1999, + "loss": 0.1995, "step": 8225 }, { "epoch": 13.61436829066887, - "grad_norm": 5.355463027954102, + "grad_norm": 5.313553333282471, "learning_rate": 3.6884210526315794e-06, - "loss": 0.188, + "loss": 0.1871, "step": 8250 }, { "epoch": 13.655656482246078, - "grad_norm": 4.198258876800537, + "grad_norm": 4.104048728942871, "learning_rate": 3.635789473684211e-06, - "loss": 0.187, + "loss": 0.1861, "step": 8275 }, { "epoch": 13.696944673823287, - "grad_norm": 4.937903881072998, + "grad_norm": 4.6280293464660645, "learning_rate": 3.5831578947368422e-06, - "loss": 0.1942, + "loss": 0.1936, "step": 8300 }, { "epoch": 13.738232865400496, - "grad_norm": 5.5391645431518555, + "grad_norm": 5.779093265533447, "learning_rate": 3.5305263157894743e-06, - "loss": 0.1897, + "loss": 0.1899, "step": 8325 }, { "epoch": 13.779521056977705, - "grad_norm": 3.7583065032958984, + "grad_norm": 3.84360933303833, "learning_rate": 3.4778947368421055e-06, - "loss": 0.1932, + "loss": 0.1937, "step": 8350 }, { "epoch": 13.820809248554914, - "grad_norm": 5.243954181671143, + "grad_norm": 5.995066165924072, "learning_rate": 3.425263157894737e-06, - "loss": 0.198, + "loss": 0.1976, "step": 8375 }, { "epoch": 13.862097440132121, - "grad_norm": 5.0445556640625, + "grad_norm": 5.045938491821289, "learning_rate": 3.3726315789473683e-06, - "loss": 0.1773, + "loss": 0.177, "step": 8400 }, { "epoch": 13.90338563170933, - "grad_norm": 4.348180770874023, + "grad_norm": 4.3831658363342285, "learning_rate": 3.3200000000000004e-06, - "loss": 0.1834, + "loss": 0.1831, "step": 8425 }, { "epoch": 13.94467382328654, - "grad_norm": 5.649644374847412, + "grad_norm": 5.6045002937316895, "learning_rate": 3.267368421052632e-06, - "loss": 0.1772, + "loss": 0.177, "step": 8450 }, { "epoch": 13.985962014863748, - "grad_norm": 5.276495933532715, + "grad_norm": 5.313602447509766, "learning_rate": 3.2147368421052633e-06, - "loss": 0.211, + "loss": 0.2106, "step": 8475 }, { "epoch": 14.026424442609414, - "grad_norm": 4.379940032958984, + "grad_norm": 4.277268409729004, "learning_rate": 3.1621052631578953e-06, "loss": 0.1697, "step": 8500 }, { "epoch": 14.067712634186623, - "grad_norm": 4.398263454437256, + "grad_norm": 4.43281364440918, "learning_rate": 3.1094736842105265e-06, - "loss": 0.1656, + "loss": 0.1655, "step": 8525 }, { "epoch": 14.109000825763832, - "grad_norm": 5.636984348297119, + "grad_norm": 5.5066304206848145, "learning_rate": 3.056842105263158e-06, - "loss": 0.1613, + "loss": 0.1611, "step": 8550 }, { "epoch": 14.15028901734104, - "grad_norm": 5.673081398010254, + "grad_norm": 5.53204345703125, "learning_rate": 3.0042105263157894e-06, - "loss": 0.1814, + "loss": 0.1808, "step": 8575 }, { "epoch": 14.191577208918249, - "grad_norm": 4.340714454650879, + "grad_norm": 4.399189472198486, "learning_rate": 2.9515789473684214e-06, - "loss": 0.1482, + "loss": 0.1487, "step": 8600 }, { "epoch": 14.232865400495458, - "grad_norm": 4.788120269775391, + "grad_norm": 4.710522174835205, "learning_rate": 2.8989473684210526e-06, - "loss": 0.1688, + "loss": 0.1684, "step": 8625 }, { "epoch": 14.274153592072667, - "grad_norm": 4.258752822875977, + "grad_norm": 4.319856643676758, "learning_rate": 2.8463157894736843e-06, - "loss": 0.1623, + "loss": 0.1626, "step": 8650 }, { "epoch": 14.315441783649876, - "grad_norm": 5.6167988777160645, + "grad_norm": 5.691439628601074, "learning_rate": 2.7936842105263163e-06, - "loss": 0.1736, + "loss": 0.1734, "step": 8675 }, { "epoch": 14.356729975227084, - "grad_norm": 4.973904609680176, + "grad_norm": 5.005030632019043, "learning_rate": 2.7410526315789476e-06, - "loss": 0.163, + "loss": 0.1627, "step": 8700 }, { "epoch": 14.398018166804293, - "grad_norm": 5.393108367919922, + "grad_norm": 4.130620002746582, "learning_rate": 2.688421052631579e-06, - "loss": 0.1721, + "loss": 0.1718, "step": 8725 }, { "epoch": 14.439306358381502, - "grad_norm": 3.455310583114624, + "grad_norm": 3.437147617340088, "learning_rate": 2.635789473684211e-06, - "loss": 0.1762, + "loss": 0.1756, "step": 8750 }, { "epoch": 14.480594549958711, - "grad_norm": 4.485291957855225, + "grad_norm": 4.4854044914245605, "learning_rate": 2.5831578947368425e-06, - "loss": 0.167, + "loss": 0.1671, "step": 8775 }, { "epoch": 14.52188274153592, - "grad_norm": 5.023384094238281, + "grad_norm": 5.259097099304199, "learning_rate": 2.5305263157894737e-06, - "loss": 0.1797, + "loss": 0.1782, "step": 8800 }, { "epoch": 14.56317093311313, - "grad_norm": 3.6772608757019043, + "grad_norm": 3.7995481491088867, "learning_rate": 2.4778947368421053e-06, - "loss": 0.1688, + "loss": 0.1684, "step": 8825 }, { "epoch": 14.604459124690338, - "grad_norm": 4.417513847351074, + "grad_norm": 4.331813335418701, "learning_rate": 2.425263157894737e-06, - "loss": 0.176, + "loss": 0.1757, "step": 8850 }, { "epoch": 14.645747316267547, - "grad_norm": 5.2892231941223145, + "grad_norm": 5.367282867431641, "learning_rate": 2.3726315789473686e-06, - "loss": 0.1802, + "loss": 0.1796, "step": 8875 }, { "epoch": 14.687035507844756, - "grad_norm": 5.4643025398254395, + "grad_norm": 5.606546401977539, "learning_rate": 2.3200000000000002e-06, - "loss": 0.1914, + "loss": 0.1911, "step": 8900 }, { "epoch": 14.728323699421965, - "grad_norm": 3.4271011352539062, + "grad_norm": 3.9387214183807373, "learning_rate": 2.267368421052632e-06, - "loss": 0.1668, + "loss": 0.1666, "step": 8925 }, { "epoch": 14.769611890999174, - "grad_norm": 4.431731224060059, + "grad_norm": 4.426852703094482, "learning_rate": 2.2147368421052635e-06, - "loss": 0.1725, + "loss": 0.1717, "step": 8950 }, { "epoch": 14.810900082576383, - "grad_norm": 3.5097200870513916, + "grad_norm": 3.4610788822174072, "learning_rate": 2.1621052631578947e-06, - "loss": 0.169, + "loss": 0.168, "step": 8975 }, { "epoch": 14.852188274153592, - "grad_norm": 4.471765995025635, + "grad_norm": 4.5235676765441895, "learning_rate": 2.1094736842105264e-06, - "loss": 0.1424, + "loss": 0.1425, "step": 9000 }, { "epoch": 14.852188274153592, - "eval_loss": 1.1418445110321045, - "eval_runtime": 697.6196, - "eval_samples_per_second": 15.167, - "eval_steps_per_second": 1.264, - "eval_wer": Infinity, + "eval_cer": 48.44840828585512, + "eval_loss": 1.142202377319336, + "eval_runtime": 701.7243, + "eval_samples_per_second": 15.079, + "eval_steps_per_second": 1.257, + "eval_wer": 96.61480433757662, "step": 9000 }, { "epoch": 14.893476465730801, - "grad_norm": 4.103869915008545, - "learning_rate": 2.056842105263158e-06, - "loss": 0.1855, + "grad_norm": 4.106043815612793, + "learning_rate": 2.058947368421053e-06, + "loss": 0.1853, "step": 9025 }, { "epoch": 14.93476465730801, - "grad_norm": 4.589334487915039, - "learning_rate": 2.0042105263157896e-06, - "loss": 0.1723, + "grad_norm": 4.604458808898926, + "learning_rate": 2.0063157894736843e-06, + "loss": 0.1725, "step": 9050 }, { "epoch": 14.97605284888522, - "grad_norm": 5.06859016418457, - "learning_rate": 1.9515789473684213e-06, - "loss": 0.1568, + "grad_norm": 5.01749324798584, + "learning_rate": 1.953684210526316e-06, + "loss": 0.1566, "step": 9075 }, { "epoch": 15.016515276630884, - "grad_norm": 3.8599202632904053, - "learning_rate": 1.8989473684210527e-06, - "loss": 0.1802, + "grad_norm": 3.69610857963562, + "learning_rate": 1.9010526315789476e-06, + "loss": 0.1791, "step": 9100 }, { "epoch": 15.057803468208093, - "grad_norm": 3.51918363571167, - "learning_rate": 1.8463157894736843e-06, - "loss": 0.163, + "grad_norm": 3.566075563430786, + "learning_rate": 1.848421052631579e-06, + "loss": 0.1626, "step": 9125 }, { "epoch": 15.099091659785302, - "grad_norm": 3.9094550609588623, - "learning_rate": 1.7936842105263158e-06, - "loss": 0.1508, + "grad_norm": 3.8838043212890625, + "learning_rate": 1.7957894736842108e-06, + "loss": 0.1507, "step": 9150 }, { "epoch": 15.14037985136251, - "grad_norm": 4.491118907928467, - "learning_rate": 1.7410526315789474e-06, - "loss": 0.1683, + "grad_norm": 4.462072372436523, + "learning_rate": 1.7431578947368423e-06, + "loss": 0.1682, "step": 9175 }, { "epoch": 15.18166804293972, - "grad_norm": 5.513247966766357, - "learning_rate": 1.6884210526315792e-06, - "loss": 0.1413, + "grad_norm": 5.543285846710205, + "learning_rate": 1.6905263157894739e-06, + "loss": 0.141, "step": 9200 }, { "epoch": 15.222956234516928, - "grad_norm": 4.3536577224731445, - "learning_rate": 1.6357894736842107e-06, - "loss": 0.1749, + "grad_norm": 4.328714847564697, + "learning_rate": 1.6378947368421053e-06, + "loss": 0.1747, "step": 9225 }, { "epoch": 15.264244426094137, - "grad_norm": 3.8934166431427, - "learning_rate": 1.5831578947368423e-06, + "grad_norm": 3.924700975418091, + "learning_rate": 1.585263157894737e-06, "loss": 0.1727, "step": 9250 }, { "epoch": 15.305532617671346, - "grad_norm": 4.610677242279053, - "learning_rate": 1.5305263157894737e-06, - "loss": 0.1518, + "grad_norm": 4.647414207458496, + "learning_rate": 1.5326315789473686e-06, + "loss": 0.1512, "step": 9275 }, { "epoch": 15.346820809248555, - "grad_norm": 3.230677366256714, - "learning_rate": 1.4778947368421054e-06, - "loss": 0.1525, + "grad_norm": 3.2526192665100098, + "learning_rate": 1.48e-06, + "loss": 0.1521, "step": 9300 }, { "epoch": 15.388109000825764, - "grad_norm": 3.854262590408325, - "learning_rate": 1.425263157894737e-06, - "loss": 0.1501, + "grad_norm": 3.697683334350586, + "learning_rate": 1.4273684210526317e-06, + "loss": 0.1498, "step": 9325 }, { "epoch": 15.429397192402973, - "grad_norm": 3.96171498298645, - "learning_rate": 1.3726315789473684e-06, - "loss": 0.1655, + "grad_norm": 3.709627866744995, + "learning_rate": 1.374736842105263e-06, + "loss": 0.1654, "step": 9350 }, { "epoch": 15.470685383980182, - "grad_norm": 3.8165667057037354, - "learning_rate": 1.32e-06, - "loss": 0.1469, + "grad_norm": 3.7767739295959473, + "learning_rate": 1.322105263157895e-06, + "loss": 0.1466, "step": 9375 }, { "epoch": 15.511973575557391, - "grad_norm": 3.6944658756256104, - "learning_rate": 1.2673684210526315e-06, - "loss": 0.15, + "grad_norm": 3.6882431507110596, + "learning_rate": 1.2694736842105266e-06, + "loss": 0.1496, "step": 9400 }, { "epoch": 15.5532617671346, - "grad_norm": 3.9097495079040527, - "learning_rate": 1.2147368421052633e-06, - "loss": 0.1492, + "grad_norm": 3.9242100715637207, + "learning_rate": 1.216842105263158e-06, + "loss": 0.1491, "step": 9425 }, { "epoch": 15.59454995871181, - "grad_norm": 5.010981559753418, - "learning_rate": 1.1621052631578948e-06, - "loss": 0.1551, + "grad_norm": 5.024509429931641, + "learning_rate": 1.1642105263157896e-06, + "loss": 0.1549, "step": 9450 }, { "epoch": 15.635838150289018, - "grad_norm": 4.151124000549316, - "learning_rate": 1.1094736842105264e-06, - "loss": 0.1569, + "grad_norm": 4.048088550567627, + "learning_rate": 1.1115789473684213e-06, + "loss": 0.1564, "step": 9475 }, { "epoch": 15.677126341866225, - "grad_norm": 4.455909729003906, - "learning_rate": 1.0568421052631578e-06, - "loss": 0.1551, + "grad_norm": 4.535757541656494, + "learning_rate": 1.0589473684210527e-06, + "loss": 0.1548, "step": 9500 }, { "epoch": 15.718414533443434, - "grad_norm": 4.5624895095825195, - "learning_rate": 1.0042105263157897e-06, - "loss": 0.1526, + "grad_norm": 4.47163200378418, + "learning_rate": 1.0063157894736843e-06, + "loss": 0.1521, "step": 9525 }, { "epoch": 15.759702725020643, - "grad_norm": 3.476386070251465, - "learning_rate": 9.515789473684212e-07, + "grad_norm": 3.4826717376708984, + "learning_rate": 9.536842105263158e-07, "loss": 0.151, "step": 9550 }, { "epoch": 15.800990916597852, - "grad_norm": 3.7594966888427734, - "learning_rate": 8.989473684210527e-07, - "loss": 0.1614, + "grad_norm": 3.6962897777557373, + "learning_rate": 9.010526315789474e-07, + "loss": 0.1611, "step": 9575 }, { "epoch": 15.842279108175061, - "grad_norm": 3.677940607070923, - "learning_rate": 8.463157894736843e-07, - "loss": 0.1607, + "grad_norm": 3.6767818927764893, + "learning_rate": 8.48421052631579e-07, + "loss": 0.1594, "step": 9600 }, { "epoch": 15.88356729975227, - "grad_norm": 2.814276695251465, - "learning_rate": 7.936842105263158e-07, - "loss": 0.1584, + "grad_norm": 2.950598955154419, + "learning_rate": 7.957894736842107e-07, + "loss": 0.1583, "step": 9625 }, { "epoch": 15.92485549132948, - "grad_norm": 4.551584243774414, - "learning_rate": 7.410526315789475e-07, - "loss": 0.1746, + "grad_norm": 4.535677909851074, + "learning_rate": 7.431578947368422e-07, + "loss": 0.1743, "step": 9650 }, { "epoch": 15.966143682906688, - "grad_norm": 3.54571533203125, - "learning_rate": 6.884210526315791e-07, - "loss": 0.1283, + "grad_norm": 3.578261613845825, + "learning_rate": 6.905263157894737e-07, + "loss": 0.1279, "step": 9675 }, { "epoch": 16.006606110652353, - "grad_norm": 3.034546136856079, - "learning_rate": 6.357894736842106e-07, - "loss": 0.1445, + "grad_norm": 3.0115976333618164, + "learning_rate": 6.378947368421053e-07, + "loss": 0.1442, "step": 9700 }, { "epoch": 16.04789430222956, - "grad_norm": 4.239781379699707, - "learning_rate": 5.831578947368421e-07, - "loss": 0.1526, + "grad_norm": 4.302647590637207, + "learning_rate": 5.852631578947369e-07, + "loss": 0.1524, "step": 9725 }, { "epoch": 16.08918249380677, - "grad_norm": 3.5759620666503906, - "learning_rate": 5.305263157894737e-07, - "loss": 0.1435, + "grad_norm": 3.516667366027832, + "learning_rate": 5.326315789473684e-07, + "loss": 0.1431, "step": 9750 }, { "epoch": 16.13047068538398, - "grad_norm": 3.807652473449707, - "learning_rate": 4.778947368421053e-07, - "loss": 0.1616, + "grad_norm": 3.802489757537842, + "learning_rate": 4.800000000000001e-07, + "loss": 0.1607, "step": 9775 }, { "epoch": 16.17175887696119, - "grad_norm": 2.9478118419647217, - "learning_rate": 4.2526315789473684e-07, - "loss": 0.1344, + "grad_norm": 2.9078941345214844, + "learning_rate": 4.273684210526316e-07, + "loss": 0.134, "step": 9800 }, { "epoch": 16.213047068538398, - "grad_norm": 3.351928472518921, - "learning_rate": 3.726315789473685e-07, - "loss": 0.1616, + "grad_norm": 3.3042197227478027, + "learning_rate": 3.7473684210526323e-07, + "loss": 0.1614, "step": 9825 }, { "epoch": 16.254335260115607, - "grad_norm": 4.71775484085083, - "learning_rate": 3.2e-07, - "loss": 0.138, + "grad_norm": 4.696882724761963, + "learning_rate": 3.2210526315789476e-07, + "loss": 0.1379, "step": 9850 }, { "epoch": 16.295623451692816, - "grad_norm": 4.4986138343811035, - "learning_rate": 2.6736842105263164e-07, - "loss": 0.1592, + "grad_norm": 4.522494316101074, + "learning_rate": 2.6947368421052635e-07, + "loss": 0.1586, "step": 9875 }, { "epoch": 16.336911643270025, - "grad_norm": 4.285621643066406, - "learning_rate": 2.1473684210526317e-07, + "grad_norm": 4.257038593292236, + "learning_rate": 2.168421052631579e-07, "loss": 0.1423, "step": 9900 }, { "epoch": 16.378199834847234, - "grad_norm": 4.092608451843262, - "learning_rate": 1.6210526315789476e-07, - "loss": 0.1619, + "grad_norm": 4.159156799316406, + "learning_rate": 1.642105263157895e-07, + "loss": 0.1617, "step": 9925 }, { "epoch": 16.419488026424442, - "grad_norm": 4.384539604187012, - "learning_rate": 1.0947368421052632e-07, - "loss": 0.1606, + "grad_norm": 4.338253974914551, + "learning_rate": 1.1157894736842106e-07, + "loss": 0.1597, "step": 9950 }, { "epoch": 16.46077621800165, - "grad_norm": 5.559605121612549, - "learning_rate": 5.68421052631579e-08, - "loss": 0.164, + "grad_norm": 5.437859058380127, + "learning_rate": 5.8947368421052637e-08, + "loss": 0.1629, "step": 9975 }, { "epoch": 16.50206440957886, - "grad_norm": 4.197207450866699, - "learning_rate": 4.210526315789474e-09, - "loss": 0.1615, + "grad_norm": 4.178699970245361, + "learning_rate": 6.315789473684211e-09, + "loss": 0.161, "step": 10000 }, { "epoch": 16.50206440957886, - "eval_loss": 1.1515085697174072, - "eval_runtime": 702.9423, - "eval_samples_per_second": 15.052, - "eval_steps_per_second": 1.255, - "eval_wer": Infinity, + "eval_cer": 49.64348354407163, + "eval_loss": 1.1519368886947632, + "eval_runtime": 708.8725, + "eval_samples_per_second": 14.927, + "eval_steps_per_second": 1.244, + "eval_wer": 96.77510608203677, "step": 10000 }, { "epoch": 16.50206440957886, "step": 10000, "total_flos": 1.949150849531904e+19, - "train_loss": 0.8644906110763549, - "train_runtime": 22776.5193, - "train_samples_per_second": 21.074, - "train_steps_per_second": 0.439 + "train_loss": 0.8641470371723176, + "train_runtime": 21435.9254, + "train_samples_per_second": 22.392, + "train_steps_per_second": 0.467 } ], "logging_steps": 25,