{ "best_global_step": 12000, "best_metric": 0.08954626010536282, "best_model_checkpoint": "./output/bm-byt5-text-normalization-10/checkpoint-12000", "epoch": 1.0, "eval_steps": 1500, "global_step": 12451, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008031483414986749, "grad_norm": 2.8291845321655273, "learning_rate": 7.9454253611557e-06, "loss": 1.5212, "step": 100 }, { "epoch": 0.016062966829973498, "grad_norm": 1.04109525680542, "learning_rate": 1.5971107544141253e-05, "loss": 0.8546, "step": 200 }, { "epoch": 0.024094450244960243, "grad_norm": 0.7079290151596069, "learning_rate": 2.3996789727126808e-05, "loss": 0.5929, "step": 300 }, { "epoch": 0.032125933659946995, "grad_norm": 1.0854698419570923, "learning_rate": 3.202247191011236e-05, "loss": 0.5379, "step": 400 }, { "epoch": 0.04015741707493374, "grad_norm": 0.31283432245254517, "learning_rate": 4.0048154093097914e-05, "loss": 0.4776, "step": 500 }, { "epoch": 0.048188900489920486, "grad_norm": 0.37612250447273254, "learning_rate": 4.807383627608347e-05, "loss": 0.4267, "step": 600 }, { "epoch": 0.05622038390490724, "grad_norm": 0.41092032194137573, "learning_rate": 5.6099518459069025e-05, "loss": 0.3954, "step": 700 }, { "epoch": 0.06425186731989399, "grad_norm": 0.393480509519577, "learning_rate": 6.412520064205457e-05, "loss": 0.3465, "step": 800 }, { "epoch": 0.07228335073488074, "grad_norm": 0.31357675790786743, "learning_rate": 7.215088282504013e-05, "loss": 0.3212, "step": 900 }, { "epoch": 0.08031483414986748, "grad_norm": 0.3486596345901489, "learning_rate": 8.017656500802569e-05, "loss": 0.2934, "step": 1000 }, { "epoch": 0.08834631756485423, "grad_norm": 0.22963479161262512, "learning_rate": 8.820224719101124e-05, "loss": 0.2953, "step": 1100 }, { "epoch": 0.09637780097984097, "grad_norm": 0.3239472508430481, "learning_rate": 9.62279293739968e-05, "loss": 0.2663, "step": 1200 }, { "epoch": 0.10440928439482773, "grad_norm": 0.24768926203250885, "learning_rate": 9.952699687639447e-05, "loss": 0.2508, "step": 1300 }, { "epoch": 0.11244076780981448, "grad_norm": 0.32661986351013184, "learning_rate": 9.863453815261045e-05, "loss": 0.234, "step": 1400 }, { "epoch": 0.12047225122480122, "grad_norm": 0.39115390181541443, "learning_rate": 9.774207942882642e-05, "loss": 0.236, "step": 1500 }, { "epoch": 0.12047225122480122, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.3998931805491491, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.21429085731506348, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 273.5873, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 1.828, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.23, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.5533452842190482, "step": 1500 }, { "epoch": 0.12047225122480122, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 1.2055611636230155, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.576806366443634, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 136.9717, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 3.563, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 0.445, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 1.61514319340918, "step": 1500 }, { "epoch": 0.12047225122480122, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.8101503759398496, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5432849526405334, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.8989, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.321, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.315, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.8331527627302275, "step": 1500 }, { "epoch": 0.12850373463978798, "grad_norm": 0.35263243317604065, "learning_rate": 9.68496207050424e-05, "loss": 0.217, "step": 1600 }, { "epoch": 0.13653521805477473, "grad_norm": 0.35822373628616333, "learning_rate": 9.595716198125837e-05, "loss": 0.2156, "step": 1700 }, { "epoch": 0.14456670146976147, "grad_norm": 0.36933159828186035, "learning_rate": 9.506470325747435e-05, "loss": 0.1986, "step": 1800 }, { "epoch": 0.15259818488474822, "grad_norm": 0.31603536009788513, "learning_rate": 9.417224453369033e-05, "loss": 0.2068, "step": 1900 }, { "epoch": 0.16062966829973496, "grad_norm": 0.38132354617118835, "learning_rate": 9.32797858099063e-05, "loss": 0.1999, "step": 2000 }, { "epoch": 0.1686611517147217, "grad_norm": 0.3746110200881958, "learning_rate": 9.238732708612226e-05, "loss": 0.1942, "step": 2100 }, { "epoch": 0.17669263512970845, "grad_norm": 0.2789841890335083, "learning_rate": 9.149486836233825e-05, "loss": 0.1911, "step": 2200 }, { "epoch": 0.1847241185446952, "grad_norm": 0.39334043860435486, "learning_rate": 9.060240963855422e-05, "loss": 0.1831, "step": 2300 }, { "epoch": 0.19275560195968194, "grad_norm": 0.62880539894104, "learning_rate": 8.970995091477019e-05, "loss": 0.1759, "step": 2400 }, { "epoch": 0.2007870853746687, "grad_norm": 0.23944030702114105, "learning_rate": 8.881749219098617e-05, "loss": 0.1845, "step": 2500 }, { "epoch": 0.20881856878965546, "grad_norm": 0.29401180148124695, "learning_rate": 8.792503346720215e-05, "loss": 0.1752, "step": 2600 }, { "epoch": 0.2168500522046422, "grad_norm": 0.2594122886657715, "learning_rate": 8.703257474341812e-05, "loss": 0.1753, "step": 2700 }, { "epoch": 0.22488153561962895, "grad_norm": 0.3151404559612274, "learning_rate": 8.61401160196341e-05, "loss": 0.1787, "step": 2800 }, { "epoch": 0.2329130190346157, "grad_norm": 0.2669820487499237, "learning_rate": 8.524765729585006e-05, "loss": 0.1703, "step": 2900 }, { "epoch": 0.24094450244960244, "grad_norm": 0.2792331278324127, "learning_rate": 8.435519857206605e-05, "loss": 0.1697, "step": 3000 }, { "epoch": 0.24094450244960244, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.20056565754655142, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.15873809158802032, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 272.0505, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 1.838, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.232, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.33574278609842134, "step": 3000 }, { "epoch": 0.24094450244960244, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.287418555565752, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.49165913462638855, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 52.9975, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 9.208, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.151, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.5653197332287171, "step": 3000 }, { "epoch": 0.24094450244960244, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.706062030075188, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5416178703308105, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 29.6535, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.732, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.371, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.6511375947995667, "step": 3000 }, { "epoch": 0.2489759858645892, "grad_norm": 0.3188706040382385, "learning_rate": 8.346273984828203e-05, "loss": 0.1629, "step": 3100 }, { "epoch": 0.25700746927957596, "grad_norm": 0.43640485405921936, "learning_rate": 8.257028112449799e-05, "loss": 0.1706, "step": 3200 }, { "epoch": 0.2650389526945627, "grad_norm": 0.3020322322845459, "learning_rate": 8.167782240071396e-05, "loss": 0.1683, "step": 3300 }, { "epoch": 0.27307043610954945, "grad_norm": 0.32241836190223694, "learning_rate": 8.078536367692995e-05, "loss": 0.1629, "step": 3400 }, { "epoch": 0.28110191952453617, "grad_norm": 0.23341508209705353, "learning_rate": 7.989290495314592e-05, "loss": 0.1593, "step": 3500 }, { "epoch": 0.28913340293952294, "grad_norm": 0.3228916525840759, "learning_rate": 7.900044622936189e-05, "loss": 0.1542, "step": 3600 }, { "epoch": 0.29716488635450966, "grad_norm": 0.265959233045578, "learning_rate": 7.810798750557787e-05, "loss": 0.1511, "step": 3700 }, { "epoch": 0.30519636976949643, "grad_norm": 0.2453306019306183, "learning_rate": 7.721552878179385e-05, "loss": 0.1531, "step": 3800 }, { "epoch": 0.31322785318448315, "grad_norm": 0.30733558535575867, "learning_rate": 7.632307005800982e-05, "loss": 0.1495, "step": 3900 }, { "epoch": 0.3212593365994699, "grad_norm": 0.2568661570549011, "learning_rate": 7.54306113342258e-05, "loss": 0.1534, "step": 4000 }, { "epoch": 0.3292908200144567, "grad_norm": 0.3408128619194031, "learning_rate": 7.453815261044176e-05, "loss": 0.1492, "step": 4100 }, { "epoch": 0.3373223034294434, "grad_norm": 0.2851060628890991, "learning_rate": 7.364569388665774e-05, "loss": 0.1449, "step": 4200 }, { "epoch": 0.3453537868444302, "grad_norm": 0.36409246921539307, "learning_rate": 7.275323516287372e-05, "loss": 0.1365, "step": 4300 }, { "epoch": 0.3533852702594169, "grad_norm": 0.6164109110832214, "learning_rate": 7.186077643908969e-05, "loss": 0.1345, "step": 4400 }, { "epoch": 0.3614167536744037, "grad_norm": 0.26890653371810913, "learning_rate": 7.096831771530566e-05, "loss": 0.1333, "step": 4500 }, { "epoch": 0.3614167536744037, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.15153795732077396, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.12628014385700226, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 240.3276, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.08, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.262, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.2586017463713641, "step": 4500 }, { "epoch": 0.3614167536744037, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11984949986234744, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3522701859474182, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 43.0411, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.338, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.417, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2879560612004708, "step": 4500 }, { "epoch": 0.3614167536744037, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5928101503759399, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.47585469484329224, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 30.2706, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.676, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.363, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5536294691224268, "step": 4500 }, { "epoch": 0.3694482370893904, "grad_norm": 0.23563382029533386, "learning_rate": 7.007585899152165e-05, "loss": 0.1338, "step": 4600 }, { "epoch": 0.37747972050437717, "grad_norm": 0.2873114049434662, "learning_rate": 6.918340026773762e-05, "loss": 0.1328, "step": 4700 }, { "epoch": 0.3855112039193639, "grad_norm": 0.2561749517917633, "learning_rate": 6.829094154395359e-05, "loss": 0.1287, "step": 4800 }, { "epoch": 0.39354268733435066, "grad_norm": 0.30756044387817383, "learning_rate": 6.739848282016957e-05, "loss": 0.1244, "step": 4900 }, { "epoch": 0.4015741707493374, "grad_norm": 0.21384097635746002, "learning_rate": 6.650602409638555e-05, "loss": 0.1296, "step": 5000 }, { "epoch": 0.40960565416432415, "grad_norm": 0.28480997681617737, "learning_rate": 6.561356537260152e-05, "loss": 0.1306, "step": 5100 }, { "epoch": 0.4176371375793109, "grad_norm": 0.3029521405696869, "learning_rate": 6.47211066488175e-05, "loss": 0.1285, "step": 5200 }, { "epoch": 0.42566862099429764, "grad_norm": 0.3370857238769531, "learning_rate": 6.382864792503346e-05, "loss": 0.1284, "step": 5300 }, { "epoch": 0.4337001044092844, "grad_norm": 0.206114262342453, "learning_rate": 6.293618920124944e-05, "loss": 0.1255, "step": 5400 }, { "epoch": 0.44173158782427113, "grad_norm": 0.26664304733276367, "learning_rate": 6.204373047746542e-05, "loss": 0.12, "step": 5500 }, { "epoch": 0.4497630712392579, "grad_norm": 0.23688237369060516, "learning_rate": 6.115127175368139e-05, "loss": 0.1209, "step": 5600 }, { "epoch": 0.4577945546542446, "grad_norm": 0.2480483055114746, "learning_rate": 6.0258813029897365e-05, "loss": 0.1232, "step": 5700 }, { "epoch": 0.4658260380692314, "grad_norm": 0.23789241909980774, "learning_rate": 5.9366354306113345e-05, "loss": 0.1229, "step": 5800 }, { "epoch": 0.4738575214842181, "grad_norm": 0.19691696763038635, "learning_rate": 5.847389558232932e-05, "loss": 0.1156, "step": 5900 }, { "epoch": 0.4818890048992049, "grad_norm": 0.2652628421783447, "learning_rate": 5.758143685854529e-05, "loss": 0.1234, "step": 6000 }, { "epoch": 0.4818890048992049, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.1208759194969775, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.11701546609401703, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 229.5709, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.178, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.274, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.22506216388133926, "step": 6000 }, { "epoch": 0.4818890048992049, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10544186473341287, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3353877365589142, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.1571, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.152, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.519, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2706943899568458, "step": 6000 }, { "epoch": 0.4818890048992049, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.636983082706767, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.4661330282688141, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.8323, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.809, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.382, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5492957746478874, "step": 6000 }, { "epoch": 0.4899204883141916, "grad_norm": 0.46512874960899353, "learning_rate": 5.6688978134761274e-05, "loss": 0.1171, "step": 6100 }, { "epoch": 0.4979519717291784, "grad_norm": 0.4254817068576813, "learning_rate": 5.579651941097724e-05, "loss": 0.1189, "step": 6200 }, { "epoch": 0.5059834551441651, "grad_norm": 0.21140217781066895, "learning_rate": 5.4904060687193214e-05, "loss": 0.1244, "step": 6300 }, { "epoch": 0.5140149385591519, "grad_norm": 0.24077512323856354, "learning_rate": 5.4011601963409195e-05, "loss": 0.116, "step": 6400 }, { "epoch": 0.5220464219741386, "grad_norm": 0.3227494955062866, "learning_rate": 5.311914323962517e-05, "loss": 0.1186, "step": 6500 }, { "epoch": 0.5300779053891254, "grad_norm": 0.23471038043498993, "learning_rate": 5.222668451584114e-05, "loss": 0.1137, "step": 6600 }, { "epoch": 0.5381093888041121, "grad_norm": 0.33298271894454956, "learning_rate": 5.133422579205712e-05, "loss": 0.117, "step": 6700 }, { "epoch": 0.5461408722190989, "grad_norm": 0.2374378889799118, "learning_rate": 5.04417670682731e-05, "loss": 0.1183, "step": 6800 }, { "epoch": 0.5541723556340856, "grad_norm": 0.2295396625995636, "learning_rate": 4.954930834448907e-05, "loss": 0.1151, "step": 6900 }, { "epoch": 0.5622038390490723, "grad_norm": 0.2642989158630371, "learning_rate": 4.8656849620705045e-05, "loss": 0.1141, "step": 7000 }, { "epoch": 0.5702353224640591, "grad_norm": 0.2136625200510025, "learning_rate": 4.776439089692102e-05, "loss": 0.1155, "step": 7100 }, { "epoch": 0.5782668058790459, "grad_norm": 0.19569291174411774, "learning_rate": 4.687193217313699e-05, "loss": 0.1162, "step": 7200 }, { "epoch": 0.5862982892940326, "grad_norm": 0.2163517326116562, "learning_rate": 4.597947344935297e-05, "loss": 0.1115, "step": 7300 }, { "epoch": 0.5943297727090193, "grad_norm": 0.2426002025604248, "learning_rate": 4.5087014725568947e-05, "loss": 0.1165, "step": 7400 }, { "epoch": 0.6023612561240062, "grad_norm": 0.2794135510921478, "learning_rate": 4.419455600178492e-05, "loss": 0.1106, "step": 7500 }, { "epoch": 0.6023612561240062, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.10490155616518171, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.1121324971318245, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 229.2718, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.181, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.275, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.20499624125368646, "step": 7500 }, { "epoch": 0.6023612561240062, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11140680921354501, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3176734149456024, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 39.7862, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.266, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.533, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.27697136131816397, "step": 7500 }, { "epoch": 0.6023612561240062, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.518562030075188, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.4668663740158081, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 26.2532, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 3.085, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.419, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4723726977248104, "step": 7500 }, { "epoch": 0.6103927395389929, "grad_norm": 0.2562696039676666, "learning_rate": 4.3302097278000894e-05, "loss": 0.1111, "step": 7600 }, { "epoch": 0.6184242229539796, "grad_norm": 0.28034263849258423, "learning_rate": 4.240963855421687e-05, "loss": 0.1107, "step": 7700 }, { "epoch": 0.6264557063689663, "grad_norm": 0.23074501752853394, "learning_rate": 4.151717983043285e-05, "loss": 0.1098, "step": 7800 }, { "epoch": 0.6344871897839531, "grad_norm": 0.26322418451309204, "learning_rate": 4.062472110664882e-05, "loss": 0.1148, "step": 7900 }, { "epoch": 0.6425186731989398, "grad_norm": 0.3186470568180084, "learning_rate": 3.9732262382864796e-05, "loss": 0.1073, "step": 8000 }, { "epoch": 0.6505501566139266, "grad_norm": 0.3686552047729492, "learning_rate": 3.883980365908077e-05, "loss": 0.1091, "step": 8100 }, { "epoch": 0.6585816400289134, "grad_norm": 0.3054899275302887, "learning_rate": 3.7947344935296744e-05, "loss": 0.1118, "step": 8200 }, { "epoch": 0.6666131234439001, "grad_norm": 0.22546516358852386, "learning_rate": 3.705488621151272e-05, "loss": 0.11, "step": 8300 }, { "epoch": 0.6746446068588868, "grad_norm": 0.28520339727401733, "learning_rate": 3.61624274877287e-05, "loss": 0.1088, "step": 8400 }, { "epoch": 0.6826760902738735, "grad_norm": 0.21663372218608856, "learning_rate": 3.526996876394467e-05, "loss": 0.1079, "step": 8500 }, { "epoch": 0.6907075736888604, "grad_norm": 0.2450413554906845, "learning_rate": 3.4377510040160646e-05, "loss": 0.108, "step": 8600 }, { "epoch": 0.6987390571038471, "grad_norm": 0.20690684020519257, "learning_rate": 3.348505131637662e-05, "loss": 0.1142, "step": 8700 }, { "epoch": 0.7067705405188338, "grad_norm": 0.1910647600889206, "learning_rate": 3.25925925925926e-05, "loss": 0.1057, "step": 8800 }, { "epoch": 0.7148020239338205, "grad_norm": 0.28397220373153687, "learning_rate": 3.170013386880857e-05, "loss": 0.1092, "step": 8900 }, { "epoch": 0.7228335073488074, "grad_norm": 0.29897409677505493, "learning_rate": 3.080767514502455e-05, "loss": 0.1103, "step": 9000 }, { "epoch": 0.7228335073488074, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.10295938433152874, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.10704782605171204, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 247.051, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.024, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.255, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.19799919042387093, "step": 9000 }, { "epoch": 0.7228335073488074, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11085619895384051, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3005123436450958, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 43.5576, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.204, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.4, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2793252255786583, "step": 9000 }, { "epoch": 0.7228335073488074, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.6355733082706767, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46724840998649597, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.5607, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.344, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.318, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5612134344528711, "step": 9000 }, { "epoch": 0.7308649907637941, "grad_norm": 0.2587852478027344, "learning_rate": 2.991521642124052e-05, "loss": 0.1091, "step": 9100 }, { "epoch": 0.7388964741787808, "grad_norm": 0.27610886096954346, "learning_rate": 2.9022757697456492e-05, "loss": 0.1109, "step": 9200 }, { "epoch": 0.7469279575937676, "grad_norm": 0.23138980567455292, "learning_rate": 2.813029897367247e-05, "loss": 0.1027, "step": 9300 }, { "epoch": 0.7549594410087543, "grad_norm": 0.305155873298645, "learning_rate": 2.7237840249888446e-05, "loss": 0.1096, "step": 9400 }, { "epoch": 0.7629909244237411, "grad_norm": 0.30439290404319763, "learning_rate": 2.6345381526104417e-05, "loss": 0.1055, "step": 9500 }, { "epoch": 0.7710224078387278, "grad_norm": 0.2712409198284149, "learning_rate": 2.5452922802320394e-05, "loss": 0.1068, "step": 9600 }, { "epoch": 0.7790538912537146, "grad_norm": 0.16476544737815857, "learning_rate": 2.4560464078536368e-05, "loss": 0.1052, "step": 9700 }, { "epoch": 0.7870853746687013, "grad_norm": 0.24273213744163513, "learning_rate": 2.366800535475234e-05, "loss": 0.1119, "step": 9800 }, { "epoch": 0.795116858083688, "grad_norm": 0.2552710771560669, "learning_rate": 2.277554663096832e-05, "loss": 0.1067, "step": 9900 }, { "epoch": 0.8031483414986748, "grad_norm": 0.19780471920967102, "learning_rate": 2.1883087907184292e-05, "loss": 0.1056, "step": 10000 }, { "epoch": 0.8111798249136616, "grad_norm": 0.2539425492286682, "learning_rate": 2.0990629183400266e-05, "loss": 0.1049, "step": 10100 }, { "epoch": 0.8192113083286483, "grad_norm": 0.15332534909248352, "learning_rate": 2.0098170459616243e-05, "loss": 0.1053, "step": 10200 }, { "epoch": 0.827242791743635, "grad_norm": 0.22054997086524963, "learning_rate": 1.9205711735832217e-05, "loss": 0.105, "step": 10300 }, { "epoch": 0.8352742751586218, "grad_norm": 0.2107006013393402, "learning_rate": 1.8313253012048194e-05, "loss": 0.1085, "step": 10400 }, { "epoch": 0.8433057585736086, "grad_norm": 0.31692540645599365, "learning_rate": 1.7420794288264168e-05, "loss": 0.1028, "step": 10500 }, { "epoch": 0.8433057585736086, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.09179189628802409, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.10394905507564545, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 246.725, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.027, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.255, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.18556641415601688, "step": 10500 }, { "epoch": 0.8433057585736086, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10278058181150776, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.2935584783554077, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 44.0728, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.073, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.384, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.26441741859552764, "step": 10500 }, { "epoch": 0.8433057585736086, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5568609022556391, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46201658248901367, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 33.0653, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.45, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.333, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5362946912242686, "step": 10500 }, { "epoch": 0.8513372419885953, "grad_norm": 0.35895422101020813, "learning_rate": 1.6528335564480142e-05, "loss": 0.1049, "step": 10600 }, { "epoch": 0.859368725403582, "grad_norm": 0.16946537792682648, "learning_rate": 1.563587684069612e-05, "loss": 0.1104, "step": 10700 }, { "epoch": 0.8674002088185688, "grad_norm": 0.2614282965660095, "learning_rate": 1.4743418116912095e-05, "loss": 0.1029, "step": 10800 }, { "epoch": 0.8754316922335555, "grad_norm": 0.27617985010147095, "learning_rate": 1.3850959393128068e-05, "loss": 0.1096, "step": 10900 }, { "epoch": 0.8834631756485423, "grad_norm": 0.20801787078380585, "learning_rate": 1.2958500669344042e-05, "loss": 0.1087, "step": 11000 }, { "epoch": 0.891494659063529, "grad_norm": 0.2360270470380783, "learning_rate": 1.2066041945560018e-05, "loss": 0.1049, "step": 11100 }, { "epoch": 0.8995261424785158, "grad_norm": 0.24903564155101776, "learning_rate": 1.1173583221775993e-05, "loss": 0.1083, "step": 11200 }, { "epoch": 0.9075576258935025, "grad_norm": 0.22896708548069, "learning_rate": 1.0281124497991969e-05, "loss": 0.1038, "step": 11300 }, { "epoch": 0.9155891093084892, "grad_norm": 0.278579443693161, "learning_rate": 9.388665774207942e-06, "loss": 0.0991, "step": 11400 }, { "epoch": 0.9236205927234761, "grad_norm": 0.2865758538246155, "learning_rate": 8.496207050423918e-06, "loss": 0.1049, "step": 11500 }, { "epoch": 0.9316520761384628, "grad_norm": 0.21692918241024017, "learning_rate": 7.6037483266398935e-06, "loss": 0.1029, "step": 11600 }, { "epoch": 0.9396835595534495, "grad_norm": 0.20344237983226776, "learning_rate": 6.711289602855868e-06, "loss": 0.103, "step": 11700 }, { "epoch": 0.9477150429684362, "grad_norm": 0.27135396003723145, "learning_rate": 5.818830879071844e-06, "loss": 0.1079, "step": 11800 }, { "epoch": 0.9557465263834231, "grad_norm": 0.28661566972732544, "learning_rate": 4.926372155287818e-06, "loss": 0.1044, "step": 11900 }, { "epoch": 0.9637780097984098, "grad_norm": 0.20027625560760498, "learning_rate": 4.033913431503794e-06, "loss": 0.1062, "step": 12000 }, { "epoch": 0.9637780097984098, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.08954626010536282, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.1025717481970787, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 245.0753, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.04, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.257, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.1836003006997051, "step": 12000 }, { "epoch": 0.9637780097984098, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10204643479856841, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.29060474038124084, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 44.4274, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 10.984, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.373, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2608866222047862, "step": 12000 }, { "epoch": 0.9637780097984098, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.6207706766917294, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46583321690559387, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.1057, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.375, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.323, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.581798483206934, "step": 12000 }, { "epoch": 0.9718094932133965, "grad_norm": 0.1759420484304428, "learning_rate": 3.141454707719768e-06, "loss": 0.1025, "step": 12100 }, { "epoch": 0.9798409766283832, "grad_norm": 0.21223457157611847, "learning_rate": 2.248995983935743e-06, "loss": 0.1083, "step": 12200 }, { "epoch": 0.98787246004337, "grad_norm": 0.2107488363981247, "learning_rate": 1.356537260151718e-06, "loss": 0.1008, "step": 12300 }, { "epoch": 0.9959039434583568, "grad_norm": 0.22825685143470764, "learning_rate": 4.64078536367693e-07, "loss": 0.1026, "step": 12400 }, { "epoch": 1.0, "step": 12451, "total_flos": 1.056850030203863e+17, "train_loss": 0.1694467846107008, "train_runtime": 5343.3049, "train_samples_per_second": 18.641, "train_steps_per_second": 2.33 } ], "logging_steps": 100, "max_steps": 12451, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.056850030203863e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }