bm-byt5-text-normalization-10 / trainer_state.json
oza75's picture
End of training
b088e0e verified
{
"best_global_step": 12000,
"best_metric": 0.08954626010536282,
"best_model_checkpoint": "./output/bm-byt5-text-normalization-10/checkpoint-12000",
"epoch": 1.0,
"eval_steps": 1500,
"global_step": 12451,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008031483414986749,
"grad_norm": 2.8291845321655273,
"learning_rate": 7.9454253611557e-06,
"loss": 1.5212,
"step": 100
},
{
"epoch": 0.016062966829973498,
"grad_norm": 1.04109525680542,
"learning_rate": 1.5971107544141253e-05,
"loss": 0.8546,
"step": 200
},
{
"epoch": 0.024094450244960243,
"grad_norm": 0.7079290151596069,
"learning_rate": 2.3996789727126808e-05,
"loss": 0.5929,
"step": 300
},
{
"epoch": 0.032125933659946995,
"grad_norm": 1.0854698419570923,
"learning_rate": 3.202247191011236e-05,
"loss": 0.5379,
"step": 400
},
{
"epoch": 0.04015741707493374,
"grad_norm": 0.31283432245254517,
"learning_rate": 4.0048154093097914e-05,
"loss": 0.4776,
"step": 500
},
{
"epoch": 0.048188900489920486,
"grad_norm": 0.37612250447273254,
"learning_rate": 4.807383627608347e-05,
"loss": 0.4267,
"step": 600
},
{
"epoch": 0.05622038390490724,
"grad_norm": 0.41092032194137573,
"learning_rate": 5.6099518459069025e-05,
"loss": 0.3954,
"step": 700
},
{
"epoch": 0.06425186731989399,
"grad_norm": 0.393480509519577,
"learning_rate": 6.412520064205457e-05,
"loss": 0.3465,
"step": 800
},
{
"epoch": 0.07228335073488074,
"grad_norm": 0.31357675790786743,
"learning_rate": 7.215088282504013e-05,
"loss": 0.3212,
"step": 900
},
{
"epoch": 0.08031483414986748,
"grad_norm": 0.3486596345901489,
"learning_rate": 8.017656500802569e-05,
"loss": 0.2934,
"step": 1000
},
{
"epoch": 0.08834631756485423,
"grad_norm": 0.22963479161262512,
"learning_rate": 8.820224719101124e-05,
"loss": 0.2953,
"step": 1100
},
{
"epoch": 0.09637780097984097,
"grad_norm": 0.3239472508430481,
"learning_rate": 9.62279293739968e-05,
"loss": 0.2663,
"step": 1200
},
{
"epoch": 0.10440928439482773,
"grad_norm": 0.24768926203250885,
"learning_rate": 9.952699687639447e-05,
"loss": 0.2508,
"step": 1300
},
{
"epoch": 0.11244076780981448,
"grad_norm": 0.32661986351013184,
"learning_rate": 9.863453815261045e-05,
"loss": 0.234,
"step": 1400
},
{
"epoch": 0.12047225122480122,
"grad_norm": 0.39115390181541443,
"learning_rate": 9.774207942882642e-05,
"loss": 0.236,
"step": 1500
},
{
"epoch": 0.12047225122480122,
"eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.3998931805491491,
"eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.21429085731506348,
"eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 273.5873,
"eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 1.828,
"eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.23,
"eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.5533452842190482,
"step": 1500
},
{
"epoch": 0.12047225122480122,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 1.2055611636230155,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.576806366443634,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 136.9717,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 3.563,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 0.445,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 1.61514319340918,
"step": 1500
},
{
"epoch": 0.12047225122480122,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.8101503759398496,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5432849526405334,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.8989,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.321,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.315,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.8331527627302275,
"step": 1500
},
{
"epoch": 0.12850373463978798,
"grad_norm": 0.35263243317604065,
"learning_rate": 9.68496207050424e-05,
"loss": 0.217,
"step": 1600
},
{
"epoch": 0.13653521805477473,
"grad_norm": 0.35822373628616333,
"learning_rate": 9.595716198125837e-05,
"loss": 0.2156,
"step": 1700
},
{
"epoch": 0.14456670146976147,
"grad_norm": 0.36933159828186035,
"learning_rate": 9.506470325747435e-05,
"loss": 0.1986,
"step": 1800
},
{
"epoch": 0.15259818488474822,
"grad_norm": 0.31603536009788513,
"learning_rate": 9.417224453369033e-05,
"loss": 0.2068,
"step": 1900
},
{
"epoch": 0.16062966829973496,
"grad_norm": 0.38132354617118835,
"learning_rate": 9.32797858099063e-05,
"loss": 0.1999,
"step": 2000
},
{
"epoch": 0.1686611517147217,
"grad_norm": 0.3746110200881958,
"learning_rate": 9.238732708612226e-05,
"loss": 0.1942,
"step": 2100
},
{
"epoch": 0.17669263512970845,
"grad_norm": 0.2789841890335083,
"learning_rate": 9.149486836233825e-05,
"loss": 0.1911,
"step": 2200
},
{
"epoch": 0.1847241185446952,
"grad_norm": 0.39334043860435486,
"learning_rate": 9.060240963855422e-05,
"loss": 0.1831,
"step": 2300
},
{
"epoch": 0.19275560195968194,
"grad_norm": 0.62880539894104,
"learning_rate": 8.970995091477019e-05,
"loss": 0.1759,
"step": 2400
},
{
"epoch": 0.2007870853746687,
"grad_norm": 0.23944030702114105,
"learning_rate": 8.881749219098617e-05,
"loss": 0.1845,
"step": 2500
},
{
"epoch": 0.20881856878965546,
"grad_norm": 0.29401180148124695,
"learning_rate": 8.792503346720215e-05,
"loss": 0.1752,
"step": 2600
},
{
"epoch": 0.2168500522046422,
"grad_norm": 0.2594122886657715,
"learning_rate": 8.703257474341812e-05,
"loss": 0.1753,
"step": 2700
},
{
"epoch": 0.22488153561962895,
"grad_norm": 0.3151404559612274,
"learning_rate": 8.61401160196341e-05,
"loss": 0.1787,
"step": 2800
},
{
"epoch": 0.2329130190346157,
"grad_norm": 0.2669820487499237,
"learning_rate": 8.524765729585006e-05,
"loss": 0.1703,
"step": 2900
},
{
"epoch": 0.24094450244960244,
"grad_norm": 0.2792331278324127,
"learning_rate": 8.435519857206605e-05,
"loss": 0.1697,
"step": 3000
},
{
"epoch": 0.24094450244960244,
"eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.20056565754655142,
"eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.15873809158802032,
"eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 272.0505,
"eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 1.838,
"eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.232,
"eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.33574278609842134,
"step": 3000
},
{
"epoch": 0.24094450244960244,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.287418555565752,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.49165913462638855,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 52.9975,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 9.208,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.151,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.5653197332287171,
"step": 3000
},
{
"epoch": 0.24094450244960244,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.706062030075188,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5416178703308105,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 29.6535,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.732,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.371,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.6511375947995667,
"step": 3000
},
{
"epoch": 0.2489759858645892,
"grad_norm": 0.3188706040382385,
"learning_rate": 8.346273984828203e-05,
"loss": 0.1629,
"step": 3100
},
{
"epoch": 0.25700746927957596,
"grad_norm": 0.43640485405921936,
"learning_rate": 8.257028112449799e-05,
"loss": 0.1706,
"step": 3200
},
{
"epoch": 0.2650389526945627,
"grad_norm": 0.3020322322845459,
"learning_rate": 8.167782240071396e-05,
"loss": 0.1683,
"step": 3300
},
{
"epoch": 0.27307043610954945,
"grad_norm": 0.32241836190223694,
"learning_rate": 8.078536367692995e-05,
"loss": 0.1629,
"step": 3400
},
{
"epoch": 0.28110191952453617,
"grad_norm": 0.23341508209705353,
"learning_rate": 7.989290495314592e-05,
"loss": 0.1593,
"step": 3500
},
{
"epoch": 0.28913340293952294,
"grad_norm": 0.3228916525840759,
"learning_rate": 7.900044622936189e-05,
"loss": 0.1542,
"step": 3600
},
{
"epoch": 0.29716488635450966,
"grad_norm": 0.265959233045578,
"learning_rate": 7.810798750557787e-05,
"loss": 0.1511,
"step": 3700
},
{
"epoch": 0.30519636976949643,
"grad_norm": 0.2453306019306183,
"learning_rate": 7.721552878179385e-05,
"loss": 0.1531,
"step": 3800
},
{
"epoch": 0.31322785318448315,
"grad_norm": 0.30733558535575867,
"learning_rate": 7.632307005800982e-05,
"loss": 0.1495,
"step": 3900
},
{
"epoch": 0.3212593365994699,
"grad_norm": 0.2568661570549011,
"learning_rate": 7.54306113342258e-05,
"loss": 0.1534,
"step": 4000
},
{
"epoch": 0.3292908200144567,
"grad_norm": 0.3408128619194031,
"learning_rate": 7.453815261044176e-05,
"loss": 0.1492,
"step": 4100
},
{
"epoch": 0.3373223034294434,
"grad_norm": 0.2851060628890991,
"learning_rate": 7.364569388665774e-05,
"loss": 0.1449,
"step": 4200
},
{
"epoch": 0.3453537868444302,
"grad_norm": 0.36409246921539307,
"learning_rate": 7.275323516287372e-05,
"loss": 0.1365,
"step": 4300
},
{
"epoch": 0.3533852702594169,
"grad_norm": 0.6164109110832214,
"learning_rate": 7.186077643908969e-05,
"loss": 0.1345,
"step": 4400
},
{
"epoch": 0.3614167536744037,
"grad_norm": 0.26890653371810913,
"learning_rate": 7.096831771530566e-05,
"loss": 0.1333,
"step": 4500
},
{
"epoch": 0.3614167536744037,
"eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.15153795732077396,
"eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.12628014385700226,
"eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 240.3276,
"eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.08,
"eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.262,
"eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.2586017463713641,
"step": 4500
},
{
"epoch": 0.3614167536744037,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11984949986234744,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3522701859474182,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 43.0411,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.338,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.417,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2879560612004708,
"step": 4500
},
{
"epoch": 0.3614167536744037,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5928101503759399,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.47585469484329224,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 30.2706,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.676,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.363,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5536294691224268,
"step": 4500
},
{
"epoch": 0.3694482370893904,
"grad_norm": 0.23563382029533386,
"learning_rate": 7.007585899152165e-05,
"loss": 0.1338,
"step": 4600
},
{
"epoch": 0.37747972050437717,
"grad_norm": 0.2873114049434662,
"learning_rate": 6.918340026773762e-05,
"loss": 0.1328,
"step": 4700
},
{
"epoch": 0.3855112039193639,
"grad_norm": 0.2561749517917633,
"learning_rate": 6.829094154395359e-05,
"loss": 0.1287,
"step": 4800
},
{
"epoch": 0.39354268733435066,
"grad_norm": 0.30756044387817383,
"learning_rate": 6.739848282016957e-05,
"loss": 0.1244,
"step": 4900
},
{
"epoch": 0.4015741707493374,
"grad_norm": 0.21384097635746002,
"learning_rate": 6.650602409638555e-05,
"loss": 0.1296,
"step": 5000
},
{
"epoch": 0.40960565416432415,
"grad_norm": 0.28480997681617737,
"learning_rate": 6.561356537260152e-05,
"loss": 0.1306,
"step": 5100
},
{
"epoch": 0.4176371375793109,
"grad_norm": 0.3029521405696869,
"learning_rate": 6.47211066488175e-05,
"loss": 0.1285,
"step": 5200
},
{
"epoch": 0.42566862099429764,
"grad_norm": 0.3370857238769531,
"learning_rate": 6.382864792503346e-05,
"loss": 0.1284,
"step": 5300
},
{
"epoch": 0.4337001044092844,
"grad_norm": 0.206114262342453,
"learning_rate": 6.293618920124944e-05,
"loss": 0.1255,
"step": 5400
},
{
"epoch": 0.44173158782427113,
"grad_norm": 0.26664304733276367,
"learning_rate": 6.204373047746542e-05,
"loss": 0.12,
"step": 5500
},
{
"epoch": 0.4497630712392579,
"grad_norm": 0.23688237369060516,
"learning_rate": 6.115127175368139e-05,
"loss": 0.1209,
"step": 5600
},
{
"epoch": 0.4577945546542446,
"grad_norm": 0.2480483055114746,
"learning_rate": 6.0258813029897365e-05,
"loss": 0.1232,
"step": 5700
},
{
"epoch": 0.4658260380692314,
"grad_norm": 0.23789241909980774,
"learning_rate": 5.9366354306113345e-05,
"loss": 0.1229,
"step": 5800
},
{
"epoch": 0.4738575214842181,
"grad_norm": 0.19691696763038635,
"learning_rate": 5.847389558232932e-05,
"loss": 0.1156,
"step": 5900
},
{
"epoch": 0.4818890048992049,
"grad_norm": 0.2652628421783447,
"learning_rate": 5.758143685854529e-05,
"loss": 0.1234,
"step": 6000
},
{
"epoch": 0.4818890048992049,
"eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.1208759194969775,
"eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.11701546609401703,
"eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 229.5709,
"eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.178,
"eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.274,
"eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.22506216388133926,
"step": 6000
},
{
"epoch": 0.4818890048992049,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10544186473341287,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3353877365589142,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.1571,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.152,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.519,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2706943899568458,
"step": 6000
},
{
"epoch": 0.4818890048992049,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.636983082706767,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.4661330282688141,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.8323,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.809,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.382,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5492957746478874,
"step": 6000
},
{
"epoch": 0.4899204883141916,
"grad_norm": 0.46512874960899353,
"learning_rate": 5.6688978134761274e-05,
"loss": 0.1171,
"step": 6100
},
{
"epoch": 0.4979519717291784,
"grad_norm": 0.4254817068576813,
"learning_rate": 5.579651941097724e-05,
"loss": 0.1189,
"step": 6200
},
{
"epoch": 0.5059834551441651,
"grad_norm": 0.21140217781066895,
"learning_rate": 5.4904060687193214e-05,
"loss": 0.1244,
"step": 6300
},
{
"epoch": 0.5140149385591519,
"grad_norm": 0.24077512323856354,
"learning_rate": 5.4011601963409195e-05,
"loss": 0.116,
"step": 6400
},
{
"epoch": 0.5220464219741386,
"grad_norm": 0.3227494955062866,
"learning_rate": 5.311914323962517e-05,
"loss": 0.1186,
"step": 6500
},
{
"epoch": 0.5300779053891254,
"grad_norm": 0.23471038043498993,
"learning_rate": 5.222668451584114e-05,
"loss": 0.1137,
"step": 6600
},
{
"epoch": 0.5381093888041121,
"grad_norm": 0.33298271894454956,
"learning_rate": 5.133422579205712e-05,
"loss": 0.117,
"step": 6700
},
{
"epoch": 0.5461408722190989,
"grad_norm": 0.2374378889799118,
"learning_rate": 5.04417670682731e-05,
"loss": 0.1183,
"step": 6800
},
{
"epoch": 0.5541723556340856,
"grad_norm": 0.2295396625995636,
"learning_rate": 4.954930834448907e-05,
"loss": 0.1151,
"step": 6900
},
{
"epoch": 0.5622038390490723,
"grad_norm": 0.2642989158630371,
"learning_rate": 4.8656849620705045e-05,
"loss": 0.1141,
"step": 7000
},
{
"epoch": 0.5702353224640591,
"grad_norm": 0.2136625200510025,
"learning_rate": 4.776439089692102e-05,
"loss": 0.1155,
"step": 7100
},
{
"epoch": 0.5782668058790459,
"grad_norm": 0.19569291174411774,
"learning_rate": 4.687193217313699e-05,
"loss": 0.1162,
"step": 7200
},
{
"epoch": 0.5862982892940326,
"grad_norm": 0.2163517326116562,
"learning_rate": 4.597947344935297e-05,
"loss": 0.1115,
"step": 7300
},
{
"epoch": 0.5943297727090193,
"grad_norm": 0.2426002025604248,
"learning_rate": 4.5087014725568947e-05,
"loss": 0.1165,
"step": 7400
},
{
"epoch": 0.6023612561240062,
"grad_norm": 0.2794135510921478,
"learning_rate": 4.419455600178492e-05,
"loss": 0.1106,
"step": 7500
},
{
"epoch": 0.6023612561240062,
"eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.10490155616518171,
"eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.1121324971318245,
"eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 229.2718,
"eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.181,
"eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.275,
"eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.20499624125368646,
"step": 7500
},
{
"epoch": 0.6023612561240062,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11140680921354501,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3176734149456024,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 39.7862,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.266,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.533,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.27697136131816397,
"step": 7500
},
{
"epoch": 0.6023612561240062,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.518562030075188,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.4668663740158081,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 26.2532,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 3.085,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.419,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4723726977248104,
"step": 7500
},
{
"epoch": 0.6103927395389929,
"grad_norm": 0.2562696039676666,
"learning_rate": 4.3302097278000894e-05,
"loss": 0.1111,
"step": 7600
},
{
"epoch": 0.6184242229539796,
"grad_norm": 0.28034263849258423,
"learning_rate": 4.240963855421687e-05,
"loss": 0.1107,
"step": 7700
},
{
"epoch": 0.6264557063689663,
"grad_norm": 0.23074501752853394,
"learning_rate": 4.151717983043285e-05,
"loss": 0.1098,
"step": 7800
},
{
"epoch": 0.6344871897839531,
"grad_norm": 0.26322418451309204,
"learning_rate": 4.062472110664882e-05,
"loss": 0.1148,
"step": 7900
},
{
"epoch": 0.6425186731989398,
"grad_norm": 0.3186470568180084,
"learning_rate": 3.9732262382864796e-05,
"loss": 0.1073,
"step": 8000
},
{
"epoch": 0.6505501566139266,
"grad_norm": 0.3686552047729492,
"learning_rate": 3.883980365908077e-05,
"loss": 0.1091,
"step": 8100
},
{
"epoch": 0.6585816400289134,
"grad_norm": 0.3054899275302887,
"learning_rate": 3.7947344935296744e-05,
"loss": 0.1118,
"step": 8200
},
{
"epoch": 0.6666131234439001,
"grad_norm": 0.22546516358852386,
"learning_rate": 3.705488621151272e-05,
"loss": 0.11,
"step": 8300
},
{
"epoch": 0.6746446068588868,
"grad_norm": 0.28520339727401733,
"learning_rate": 3.61624274877287e-05,
"loss": 0.1088,
"step": 8400
},
{
"epoch": 0.6826760902738735,
"grad_norm": 0.21663372218608856,
"learning_rate": 3.526996876394467e-05,
"loss": 0.1079,
"step": 8500
},
{
"epoch": 0.6907075736888604,
"grad_norm": 0.2450413554906845,
"learning_rate": 3.4377510040160646e-05,
"loss": 0.108,
"step": 8600
},
{
"epoch": 0.6987390571038471,
"grad_norm": 0.20690684020519257,
"learning_rate": 3.348505131637662e-05,
"loss": 0.1142,
"step": 8700
},
{
"epoch": 0.7067705405188338,
"grad_norm": 0.1910647600889206,
"learning_rate": 3.25925925925926e-05,
"loss": 0.1057,
"step": 8800
},
{
"epoch": 0.7148020239338205,
"grad_norm": 0.28397220373153687,
"learning_rate": 3.170013386880857e-05,
"loss": 0.1092,
"step": 8900
},
{
"epoch": 0.7228335073488074,
"grad_norm": 0.29897409677505493,
"learning_rate": 3.080767514502455e-05,
"loss": 0.1103,
"step": 9000
},
{
"epoch": 0.7228335073488074,
"eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.10295938433152874,
"eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.10704782605171204,
"eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 247.051,
"eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.024,
"eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.255,
"eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.19799919042387093,
"step": 9000
},
{
"epoch": 0.7228335073488074,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11085619895384051,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3005123436450958,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 43.5576,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.204,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.4,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2793252255786583,
"step": 9000
},
{
"epoch": 0.7228335073488074,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.6355733082706767,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46724840998649597,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.5607,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.344,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.318,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5612134344528711,
"step": 9000
},
{
"epoch": 0.7308649907637941,
"grad_norm": 0.2587852478027344,
"learning_rate": 2.991521642124052e-05,
"loss": 0.1091,
"step": 9100
},
{
"epoch": 0.7388964741787808,
"grad_norm": 0.27610886096954346,
"learning_rate": 2.9022757697456492e-05,
"loss": 0.1109,
"step": 9200
},
{
"epoch": 0.7469279575937676,
"grad_norm": 0.23138980567455292,
"learning_rate": 2.813029897367247e-05,
"loss": 0.1027,
"step": 9300
},
{
"epoch": 0.7549594410087543,
"grad_norm": 0.305155873298645,
"learning_rate": 2.7237840249888446e-05,
"loss": 0.1096,
"step": 9400
},
{
"epoch": 0.7629909244237411,
"grad_norm": 0.30439290404319763,
"learning_rate": 2.6345381526104417e-05,
"loss": 0.1055,
"step": 9500
},
{
"epoch": 0.7710224078387278,
"grad_norm": 0.2712409198284149,
"learning_rate": 2.5452922802320394e-05,
"loss": 0.1068,
"step": 9600
},
{
"epoch": 0.7790538912537146,
"grad_norm": 0.16476544737815857,
"learning_rate": 2.4560464078536368e-05,
"loss": 0.1052,
"step": 9700
},
{
"epoch": 0.7870853746687013,
"grad_norm": 0.24273213744163513,
"learning_rate": 2.366800535475234e-05,
"loss": 0.1119,
"step": 9800
},
{
"epoch": 0.795116858083688,
"grad_norm": 0.2552710771560669,
"learning_rate": 2.277554663096832e-05,
"loss": 0.1067,
"step": 9900
},
{
"epoch": 0.8031483414986748,
"grad_norm": 0.19780471920967102,
"learning_rate": 2.1883087907184292e-05,
"loss": 0.1056,
"step": 10000
},
{
"epoch": 0.8111798249136616,
"grad_norm": 0.2539425492286682,
"learning_rate": 2.0990629183400266e-05,
"loss": 0.1049,
"step": 10100
},
{
"epoch": 0.8192113083286483,
"grad_norm": 0.15332534909248352,
"learning_rate": 2.0098170459616243e-05,
"loss": 0.1053,
"step": 10200
},
{
"epoch": 0.827242791743635,
"grad_norm": 0.22054997086524963,
"learning_rate": 1.9205711735832217e-05,
"loss": 0.105,
"step": 10300
},
{
"epoch": 0.8352742751586218,
"grad_norm": 0.2107006013393402,
"learning_rate": 1.8313253012048194e-05,
"loss": 0.1085,
"step": 10400
},
{
"epoch": 0.8433057585736086,
"grad_norm": 0.31692540645599365,
"learning_rate": 1.7420794288264168e-05,
"loss": 0.1028,
"step": 10500
},
{
"epoch": 0.8433057585736086,
"eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.09179189628802409,
"eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.10394905507564545,
"eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 246.725,
"eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.027,
"eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.255,
"eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.18556641415601688,
"step": 10500
},
{
"epoch": 0.8433057585736086,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10278058181150776,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.2935584783554077,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 44.0728,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.073,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.384,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.26441741859552764,
"step": 10500
},
{
"epoch": 0.8433057585736086,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5568609022556391,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46201658248901367,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 33.0653,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.45,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.333,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5362946912242686,
"step": 10500
},
{
"epoch": 0.8513372419885953,
"grad_norm": 0.35895422101020813,
"learning_rate": 1.6528335564480142e-05,
"loss": 0.1049,
"step": 10600
},
{
"epoch": 0.859368725403582,
"grad_norm": 0.16946537792682648,
"learning_rate": 1.563587684069612e-05,
"loss": 0.1104,
"step": 10700
},
{
"epoch": 0.8674002088185688,
"grad_norm": 0.2614282965660095,
"learning_rate": 1.4743418116912095e-05,
"loss": 0.1029,
"step": 10800
},
{
"epoch": 0.8754316922335555,
"grad_norm": 0.27617985010147095,
"learning_rate": 1.3850959393128068e-05,
"loss": 0.1096,
"step": 10900
},
{
"epoch": 0.8834631756485423,
"grad_norm": 0.20801787078380585,
"learning_rate": 1.2958500669344042e-05,
"loss": 0.1087,
"step": 11000
},
{
"epoch": 0.891494659063529,
"grad_norm": 0.2360270470380783,
"learning_rate": 1.2066041945560018e-05,
"loss": 0.1049,
"step": 11100
},
{
"epoch": 0.8995261424785158,
"grad_norm": 0.24903564155101776,
"learning_rate": 1.1173583221775993e-05,
"loss": 0.1083,
"step": 11200
},
{
"epoch": 0.9075576258935025,
"grad_norm": 0.22896708548069,
"learning_rate": 1.0281124497991969e-05,
"loss": 0.1038,
"step": 11300
},
{
"epoch": 0.9155891093084892,
"grad_norm": 0.278579443693161,
"learning_rate": 9.388665774207942e-06,
"loss": 0.0991,
"step": 11400
},
{
"epoch": 0.9236205927234761,
"grad_norm": 0.2865758538246155,
"learning_rate": 8.496207050423918e-06,
"loss": 0.1049,
"step": 11500
},
{
"epoch": 0.9316520761384628,
"grad_norm": 0.21692918241024017,
"learning_rate": 7.6037483266398935e-06,
"loss": 0.1029,
"step": 11600
},
{
"epoch": 0.9396835595534495,
"grad_norm": 0.20344237983226776,
"learning_rate": 6.711289602855868e-06,
"loss": 0.103,
"step": 11700
},
{
"epoch": 0.9477150429684362,
"grad_norm": 0.27135396003723145,
"learning_rate": 5.818830879071844e-06,
"loss": 0.1079,
"step": 11800
},
{
"epoch": 0.9557465263834231,
"grad_norm": 0.28661566972732544,
"learning_rate": 4.926372155287818e-06,
"loss": 0.1044,
"step": 11900
},
{
"epoch": 0.9637780097984098,
"grad_norm": 0.20027625560760498,
"learning_rate": 4.033913431503794e-06,
"loss": 0.1062,
"step": 12000
},
{
"epoch": 0.9637780097984098,
"eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.08954626010536282,
"eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.1025717481970787,
"eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 245.0753,
"eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.04,
"eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.257,
"eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.1836003006997051,
"step": 12000
},
{
"epoch": 0.9637780097984098,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10204643479856841,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.29060474038124084,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 44.4274,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 10.984,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.373,
"eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2608866222047862,
"step": 12000
},
{
"epoch": 0.9637780097984098,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.6207706766917294,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46583321690559387,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.1057,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.375,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.323,
"eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.581798483206934,
"step": 12000
},
{
"epoch": 0.9718094932133965,
"grad_norm": 0.1759420484304428,
"learning_rate": 3.141454707719768e-06,
"loss": 0.1025,
"step": 12100
},
{
"epoch": 0.9798409766283832,
"grad_norm": 0.21223457157611847,
"learning_rate": 2.248995983935743e-06,
"loss": 0.1083,
"step": 12200
},
{
"epoch": 0.98787246004337,
"grad_norm": 0.2107488363981247,
"learning_rate": 1.356537260151718e-06,
"loss": 0.1008,
"step": 12300
},
{
"epoch": 0.9959039434583568,
"grad_norm": 0.22825685143470764,
"learning_rate": 4.64078536367693e-07,
"loss": 0.1026,
"step": 12400
},
{
"epoch": 1.0,
"step": 12451,
"total_flos": 1.056850030203863e+17,
"train_loss": 0.1694467846107008,
"train_runtime": 5343.3049,
"train_samples_per_second": 18.641,
"train_steps_per_second": 2.33
}
],
"logging_steps": 100,
"max_steps": 12451,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.056850030203863e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}