| { | |
| "best_global_step": 12000, | |
| "best_metric": 0.08954626010536282, | |
| "best_model_checkpoint": "./output/bm-byt5-text-normalization-10/checkpoint-12000", | |
| "epoch": 1.0, | |
| "eval_steps": 1500, | |
| "global_step": 12451, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008031483414986749, | |
| "grad_norm": 2.8291845321655273, | |
| "learning_rate": 7.9454253611557e-06, | |
| "loss": 1.5212, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016062966829973498, | |
| "grad_norm": 1.04109525680542, | |
| "learning_rate": 1.5971107544141253e-05, | |
| "loss": 0.8546, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.024094450244960243, | |
| "grad_norm": 0.7079290151596069, | |
| "learning_rate": 2.3996789727126808e-05, | |
| "loss": 0.5929, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.032125933659946995, | |
| "grad_norm": 1.0854698419570923, | |
| "learning_rate": 3.202247191011236e-05, | |
| "loss": 0.5379, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04015741707493374, | |
| "grad_norm": 0.31283432245254517, | |
| "learning_rate": 4.0048154093097914e-05, | |
| "loss": 0.4776, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.048188900489920486, | |
| "grad_norm": 0.37612250447273254, | |
| "learning_rate": 4.807383627608347e-05, | |
| "loss": 0.4267, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.05622038390490724, | |
| "grad_norm": 0.41092032194137573, | |
| "learning_rate": 5.6099518459069025e-05, | |
| "loss": 0.3954, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.06425186731989399, | |
| "grad_norm": 0.393480509519577, | |
| "learning_rate": 6.412520064205457e-05, | |
| "loss": 0.3465, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.07228335073488074, | |
| "grad_norm": 0.31357675790786743, | |
| "learning_rate": 7.215088282504013e-05, | |
| "loss": 0.3212, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.08031483414986748, | |
| "grad_norm": 0.3486596345901489, | |
| "learning_rate": 8.017656500802569e-05, | |
| "loss": 0.2934, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08834631756485423, | |
| "grad_norm": 0.22963479161262512, | |
| "learning_rate": 8.820224719101124e-05, | |
| "loss": 0.2953, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.09637780097984097, | |
| "grad_norm": 0.3239472508430481, | |
| "learning_rate": 9.62279293739968e-05, | |
| "loss": 0.2663, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.10440928439482773, | |
| "grad_norm": 0.24768926203250885, | |
| "learning_rate": 9.952699687639447e-05, | |
| "loss": 0.2508, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.11244076780981448, | |
| "grad_norm": 0.32661986351013184, | |
| "learning_rate": 9.863453815261045e-05, | |
| "loss": 0.234, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.12047225122480122, | |
| "grad_norm": 0.39115390181541443, | |
| "learning_rate": 9.774207942882642e-05, | |
| "loss": 0.236, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12047225122480122, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.3998931805491491, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.21429085731506348, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 273.5873, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 1.828, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.23, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.5533452842190482, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12047225122480122, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 1.2055611636230155, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.576806366443634, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 136.9717, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 3.563, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 0.445, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 1.61514319340918, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12047225122480122, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.8101503759398496, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5432849526405334, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.8989, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.321, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.315, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.8331527627302275, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12850373463978798, | |
| "grad_norm": 0.35263243317604065, | |
| "learning_rate": 9.68496207050424e-05, | |
| "loss": 0.217, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.13653521805477473, | |
| "grad_norm": 0.35822373628616333, | |
| "learning_rate": 9.595716198125837e-05, | |
| "loss": 0.2156, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.14456670146976147, | |
| "grad_norm": 0.36933159828186035, | |
| "learning_rate": 9.506470325747435e-05, | |
| "loss": 0.1986, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.15259818488474822, | |
| "grad_norm": 0.31603536009788513, | |
| "learning_rate": 9.417224453369033e-05, | |
| "loss": 0.2068, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.16062966829973496, | |
| "grad_norm": 0.38132354617118835, | |
| "learning_rate": 9.32797858099063e-05, | |
| "loss": 0.1999, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.1686611517147217, | |
| "grad_norm": 0.3746110200881958, | |
| "learning_rate": 9.238732708612226e-05, | |
| "loss": 0.1942, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.17669263512970845, | |
| "grad_norm": 0.2789841890335083, | |
| "learning_rate": 9.149486836233825e-05, | |
| "loss": 0.1911, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.1847241185446952, | |
| "grad_norm": 0.39334043860435486, | |
| "learning_rate": 9.060240963855422e-05, | |
| "loss": 0.1831, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.19275560195968194, | |
| "grad_norm": 0.62880539894104, | |
| "learning_rate": 8.970995091477019e-05, | |
| "loss": 0.1759, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2007870853746687, | |
| "grad_norm": 0.23944030702114105, | |
| "learning_rate": 8.881749219098617e-05, | |
| "loss": 0.1845, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.20881856878965546, | |
| "grad_norm": 0.29401180148124695, | |
| "learning_rate": 8.792503346720215e-05, | |
| "loss": 0.1752, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.2168500522046422, | |
| "grad_norm": 0.2594122886657715, | |
| "learning_rate": 8.703257474341812e-05, | |
| "loss": 0.1753, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.22488153561962895, | |
| "grad_norm": 0.3151404559612274, | |
| "learning_rate": 8.61401160196341e-05, | |
| "loss": 0.1787, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.2329130190346157, | |
| "grad_norm": 0.2669820487499237, | |
| "learning_rate": 8.524765729585006e-05, | |
| "loss": 0.1703, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.24094450244960244, | |
| "grad_norm": 0.2792331278324127, | |
| "learning_rate": 8.435519857206605e-05, | |
| "loss": 0.1697, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24094450244960244, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.20056565754655142, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.15873809158802032, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 272.0505, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 1.838, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.232, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.33574278609842134, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24094450244960244, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.287418555565752, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.49165913462638855, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 52.9975, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 9.208, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.151, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.5653197332287171, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24094450244960244, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.706062030075188, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5416178703308105, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 29.6535, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.732, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.371, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.6511375947995667, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.2489759858645892, | |
| "grad_norm": 0.3188706040382385, | |
| "learning_rate": 8.346273984828203e-05, | |
| "loss": 0.1629, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.25700746927957596, | |
| "grad_norm": 0.43640485405921936, | |
| "learning_rate": 8.257028112449799e-05, | |
| "loss": 0.1706, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.2650389526945627, | |
| "grad_norm": 0.3020322322845459, | |
| "learning_rate": 8.167782240071396e-05, | |
| "loss": 0.1683, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.27307043610954945, | |
| "grad_norm": 0.32241836190223694, | |
| "learning_rate": 8.078536367692995e-05, | |
| "loss": 0.1629, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.28110191952453617, | |
| "grad_norm": 0.23341508209705353, | |
| "learning_rate": 7.989290495314592e-05, | |
| "loss": 0.1593, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.28913340293952294, | |
| "grad_norm": 0.3228916525840759, | |
| "learning_rate": 7.900044622936189e-05, | |
| "loss": 0.1542, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.29716488635450966, | |
| "grad_norm": 0.265959233045578, | |
| "learning_rate": 7.810798750557787e-05, | |
| "loss": 0.1511, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.30519636976949643, | |
| "grad_norm": 0.2453306019306183, | |
| "learning_rate": 7.721552878179385e-05, | |
| "loss": 0.1531, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.31322785318448315, | |
| "grad_norm": 0.30733558535575867, | |
| "learning_rate": 7.632307005800982e-05, | |
| "loss": 0.1495, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.3212593365994699, | |
| "grad_norm": 0.2568661570549011, | |
| "learning_rate": 7.54306113342258e-05, | |
| "loss": 0.1534, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3292908200144567, | |
| "grad_norm": 0.3408128619194031, | |
| "learning_rate": 7.453815261044176e-05, | |
| "loss": 0.1492, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.3373223034294434, | |
| "grad_norm": 0.2851060628890991, | |
| "learning_rate": 7.364569388665774e-05, | |
| "loss": 0.1449, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.3453537868444302, | |
| "grad_norm": 0.36409246921539307, | |
| "learning_rate": 7.275323516287372e-05, | |
| "loss": 0.1365, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.3533852702594169, | |
| "grad_norm": 0.6164109110832214, | |
| "learning_rate": 7.186077643908969e-05, | |
| "loss": 0.1345, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.3614167536744037, | |
| "grad_norm": 0.26890653371810913, | |
| "learning_rate": 7.096831771530566e-05, | |
| "loss": 0.1333, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3614167536744037, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.15153795732077396, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.12628014385700226, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 240.3276, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.08, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.262, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.2586017463713641, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3614167536744037, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11984949986234744, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3522701859474182, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 43.0411, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.338, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.417, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2879560612004708, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3614167536744037, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5928101503759399, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.47585469484329224, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 30.2706, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.676, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.363, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5536294691224268, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3694482370893904, | |
| "grad_norm": 0.23563382029533386, | |
| "learning_rate": 7.007585899152165e-05, | |
| "loss": 0.1338, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.37747972050437717, | |
| "grad_norm": 0.2873114049434662, | |
| "learning_rate": 6.918340026773762e-05, | |
| "loss": 0.1328, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.3855112039193639, | |
| "grad_norm": 0.2561749517917633, | |
| "learning_rate": 6.829094154395359e-05, | |
| "loss": 0.1287, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.39354268733435066, | |
| "grad_norm": 0.30756044387817383, | |
| "learning_rate": 6.739848282016957e-05, | |
| "loss": 0.1244, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.4015741707493374, | |
| "grad_norm": 0.21384097635746002, | |
| "learning_rate": 6.650602409638555e-05, | |
| "loss": 0.1296, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.40960565416432415, | |
| "grad_norm": 0.28480997681617737, | |
| "learning_rate": 6.561356537260152e-05, | |
| "loss": 0.1306, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.4176371375793109, | |
| "grad_norm": 0.3029521405696869, | |
| "learning_rate": 6.47211066488175e-05, | |
| "loss": 0.1285, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.42566862099429764, | |
| "grad_norm": 0.3370857238769531, | |
| "learning_rate": 6.382864792503346e-05, | |
| "loss": 0.1284, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.4337001044092844, | |
| "grad_norm": 0.206114262342453, | |
| "learning_rate": 6.293618920124944e-05, | |
| "loss": 0.1255, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.44173158782427113, | |
| "grad_norm": 0.26664304733276367, | |
| "learning_rate": 6.204373047746542e-05, | |
| "loss": 0.12, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.4497630712392579, | |
| "grad_norm": 0.23688237369060516, | |
| "learning_rate": 6.115127175368139e-05, | |
| "loss": 0.1209, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.4577945546542446, | |
| "grad_norm": 0.2480483055114746, | |
| "learning_rate": 6.0258813029897365e-05, | |
| "loss": 0.1232, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.4658260380692314, | |
| "grad_norm": 0.23789241909980774, | |
| "learning_rate": 5.9366354306113345e-05, | |
| "loss": 0.1229, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.4738575214842181, | |
| "grad_norm": 0.19691696763038635, | |
| "learning_rate": 5.847389558232932e-05, | |
| "loss": 0.1156, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.4818890048992049, | |
| "grad_norm": 0.2652628421783447, | |
| "learning_rate": 5.758143685854529e-05, | |
| "loss": 0.1234, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4818890048992049, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.1208759194969775, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.11701546609401703, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 229.5709, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.178, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.274, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.22506216388133926, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4818890048992049, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10544186473341287, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3353877365589142, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.1571, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.152, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.519, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2706943899568458, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4818890048992049, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.636983082706767, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.4661330282688141, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.8323, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.809, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.382, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5492957746478874, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4899204883141916, | |
| "grad_norm": 0.46512874960899353, | |
| "learning_rate": 5.6688978134761274e-05, | |
| "loss": 0.1171, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.4979519717291784, | |
| "grad_norm": 0.4254817068576813, | |
| "learning_rate": 5.579651941097724e-05, | |
| "loss": 0.1189, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.5059834551441651, | |
| "grad_norm": 0.21140217781066895, | |
| "learning_rate": 5.4904060687193214e-05, | |
| "loss": 0.1244, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.5140149385591519, | |
| "grad_norm": 0.24077512323856354, | |
| "learning_rate": 5.4011601963409195e-05, | |
| "loss": 0.116, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.5220464219741386, | |
| "grad_norm": 0.3227494955062866, | |
| "learning_rate": 5.311914323962517e-05, | |
| "loss": 0.1186, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.5300779053891254, | |
| "grad_norm": 0.23471038043498993, | |
| "learning_rate": 5.222668451584114e-05, | |
| "loss": 0.1137, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.5381093888041121, | |
| "grad_norm": 0.33298271894454956, | |
| "learning_rate": 5.133422579205712e-05, | |
| "loss": 0.117, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.5461408722190989, | |
| "grad_norm": 0.2374378889799118, | |
| "learning_rate": 5.04417670682731e-05, | |
| "loss": 0.1183, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5541723556340856, | |
| "grad_norm": 0.2295396625995636, | |
| "learning_rate": 4.954930834448907e-05, | |
| "loss": 0.1151, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.5622038390490723, | |
| "grad_norm": 0.2642989158630371, | |
| "learning_rate": 4.8656849620705045e-05, | |
| "loss": 0.1141, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5702353224640591, | |
| "grad_norm": 0.2136625200510025, | |
| "learning_rate": 4.776439089692102e-05, | |
| "loss": 0.1155, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.5782668058790459, | |
| "grad_norm": 0.19569291174411774, | |
| "learning_rate": 4.687193217313699e-05, | |
| "loss": 0.1162, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.5862982892940326, | |
| "grad_norm": 0.2163517326116562, | |
| "learning_rate": 4.597947344935297e-05, | |
| "loss": 0.1115, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.5943297727090193, | |
| "grad_norm": 0.2426002025604248, | |
| "learning_rate": 4.5087014725568947e-05, | |
| "loss": 0.1165, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.6023612561240062, | |
| "grad_norm": 0.2794135510921478, | |
| "learning_rate": 4.419455600178492e-05, | |
| "loss": 0.1106, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6023612561240062, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.10490155616518171, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.1121324971318245, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 229.2718, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.181, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.275, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.20499624125368646, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6023612561240062, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11140680921354501, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3176734149456024, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 39.7862, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.266, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.533, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.27697136131816397, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6023612561240062, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.518562030075188, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.4668663740158081, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 26.2532, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 3.085, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.419, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4723726977248104, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6103927395389929, | |
| "grad_norm": 0.2562696039676666, | |
| "learning_rate": 4.3302097278000894e-05, | |
| "loss": 0.1111, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.6184242229539796, | |
| "grad_norm": 0.28034263849258423, | |
| "learning_rate": 4.240963855421687e-05, | |
| "loss": 0.1107, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.6264557063689663, | |
| "grad_norm": 0.23074501752853394, | |
| "learning_rate": 4.151717983043285e-05, | |
| "loss": 0.1098, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.6344871897839531, | |
| "grad_norm": 0.26322418451309204, | |
| "learning_rate": 4.062472110664882e-05, | |
| "loss": 0.1148, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.6425186731989398, | |
| "grad_norm": 0.3186470568180084, | |
| "learning_rate": 3.9732262382864796e-05, | |
| "loss": 0.1073, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6505501566139266, | |
| "grad_norm": 0.3686552047729492, | |
| "learning_rate": 3.883980365908077e-05, | |
| "loss": 0.1091, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.6585816400289134, | |
| "grad_norm": 0.3054899275302887, | |
| "learning_rate": 3.7947344935296744e-05, | |
| "loss": 0.1118, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.6666131234439001, | |
| "grad_norm": 0.22546516358852386, | |
| "learning_rate": 3.705488621151272e-05, | |
| "loss": 0.11, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.6746446068588868, | |
| "grad_norm": 0.28520339727401733, | |
| "learning_rate": 3.61624274877287e-05, | |
| "loss": 0.1088, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.6826760902738735, | |
| "grad_norm": 0.21663372218608856, | |
| "learning_rate": 3.526996876394467e-05, | |
| "loss": 0.1079, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.6907075736888604, | |
| "grad_norm": 0.2450413554906845, | |
| "learning_rate": 3.4377510040160646e-05, | |
| "loss": 0.108, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.6987390571038471, | |
| "grad_norm": 0.20690684020519257, | |
| "learning_rate": 3.348505131637662e-05, | |
| "loss": 0.1142, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.7067705405188338, | |
| "grad_norm": 0.1910647600889206, | |
| "learning_rate": 3.25925925925926e-05, | |
| "loss": 0.1057, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.7148020239338205, | |
| "grad_norm": 0.28397220373153687, | |
| "learning_rate": 3.170013386880857e-05, | |
| "loss": 0.1092, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.7228335073488074, | |
| "grad_norm": 0.29897409677505493, | |
| "learning_rate": 3.080767514502455e-05, | |
| "loss": 0.1103, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7228335073488074, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.10295938433152874, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.10704782605171204, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 247.051, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.024, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.255, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.19799919042387093, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7228335073488074, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11085619895384051, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3005123436450958, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 43.5576, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.204, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.4, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2793252255786583, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7228335073488074, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.6355733082706767, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46724840998649597, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.5607, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.344, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.318, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5612134344528711, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7308649907637941, | |
| "grad_norm": 0.2587852478027344, | |
| "learning_rate": 2.991521642124052e-05, | |
| "loss": 0.1091, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.7388964741787808, | |
| "grad_norm": 0.27610886096954346, | |
| "learning_rate": 2.9022757697456492e-05, | |
| "loss": 0.1109, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.7469279575937676, | |
| "grad_norm": 0.23138980567455292, | |
| "learning_rate": 2.813029897367247e-05, | |
| "loss": 0.1027, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.7549594410087543, | |
| "grad_norm": 0.305155873298645, | |
| "learning_rate": 2.7237840249888446e-05, | |
| "loss": 0.1096, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.7629909244237411, | |
| "grad_norm": 0.30439290404319763, | |
| "learning_rate": 2.6345381526104417e-05, | |
| "loss": 0.1055, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.7710224078387278, | |
| "grad_norm": 0.2712409198284149, | |
| "learning_rate": 2.5452922802320394e-05, | |
| "loss": 0.1068, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.7790538912537146, | |
| "grad_norm": 0.16476544737815857, | |
| "learning_rate": 2.4560464078536368e-05, | |
| "loss": 0.1052, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.7870853746687013, | |
| "grad_norm": 0.24273213744163513, | |
| "learning_rate": 2.366800535475234e-05, | |
| "loss": 0.1119, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.795116858083688, | |
| "grad_norm": 0.2552710771560669, | |
| "learning_rate": 2.277554663096832e-05, | |
| "loss": 0.1067, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.8031483414986748, | |
| "grad_norm": 0.19780471920967102, | |
| "learning_rate": 2.1883087907184292e-05, | |
| "loss": 0.1056, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.8111798249136616, | |
| "grad_norm": 0.2539425492286682, | |
| "learning_rate": 2.0990629183400266e-05, | |
| "loss": 0.1049, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.8192113083286483, | |
| "grad_norm": 0.15332534909248352, | |
| "learning_rate": 2.0098170459616243e-05, | |
| "loss": 0.1053, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.827242791743635, | |
| "grad_norm": 0.22054997086524963, | |
| "learning_rate": 1.9205711735832217e-05, | |
| "loss": 0.105, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.8352742751586218, | |
| "grad_norm": 0.2107006013393402, | |
| "learning_rate": 1.8313253012048194e-05, | |
| "loss": 0.1085, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.8433057585736086, | |
| "grad_norm": 0.31692540645599365, | |
| "learning_rate": 1.7420794288264168e-05, | |
| "loss": 0.1028, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8433057585736086, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.09179189628802409, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.10394905507564545, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 246.725, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.027, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.255, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.18556641415601688, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8433057585736086, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10278058181150776, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.2935584783554077, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 44.0728, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.073, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.384, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.26441741859552764, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8433057585736086, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5568609022556391, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46201658248901367, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 33.0653, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.45, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.333, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.5362946912242686, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8513372419885953, | |
| "grad_norm": 0.35895422101020813, | |
| "learning_rate": 1.6528335564480142e-05, | |
| "loss": 0.1049, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.859368725403582, | |
| "grad_norm": 0.16946537792682648, | |
| "learning_rate": 1.563587684069612e-05, | |
| "loss": 0.1104, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.8674002088185688, | |
| "grad_norm": 0.2614282965660095, | |
| "learning_rate": 1.4743418116912095e-05, | |
| "loss": 0.1029, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.8754316922335555, | |
| "grad_norm": 0.27617985010147095, | |
| "learning_rate": 1.3850959393128068e-05, | |
| "loss": 0.1096, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.8834631756485423, | |
| "grad_norm": 0.20801787078380585, | |
| "learning_rate": 1.2958500669344042e-05, | |
| "loss": 0.1087, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.891494659063529, | |
| "grad_norm": 0.2360270470380783, | |
| "learning_rate": 1.2066041945560018e-05, | |
| "loss": 0.1049, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.8995261424785158, | |
| "grad_norm": 0.24903564155101776, | |
| "learning_rate": 1.1173583221775993e-05, | |
| "loss": 0.1083, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.9075576258935025, | |
| "grad_norm": 0.22896708548069, | |
| "learning_rate": 1.0281124497991969e-05, | |
| "loss": 0.1038, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.9155891093084892, | |
| "grad_norm": 0.278579443693161, | |
| "learning_rate": 9.388665774207942e-06, | |
| "loss": 0.0991, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.9236205927234761, | |
| "grad_norm": 0.2865758538246155, | |
| "learning_rate": 8.496207050423918e-06, | |
| "loss": 0.1049, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.9316520761384628, | |
| "grad_norm": 0.21692918241024017, | |
| "learning_rate": 7.6037483266398935e-06, | |
| "loss": 0.1029, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.9396835595534495, | |
| "grad_norm": 0.20344237983226776, | |
| "learning_rate": 6.711289602855868e-06, | |
| "loss": 0.103, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.9477150429684362, | |
| "grad_norm": 0.27135396003723145, | |
| "learning_rate": 5.818830879071844e-06, | |
| "loss": 0.1079, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.9557465263834231, | |
| "grad_norm": 0.28661566972732544, | |
| "learning_rate": 4.926372155287818e-06, | |
| "loss": 0.1044, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.9637780097984098, | |
| "grad_norm": 0.20027625560760498, | |
| "learning_rate": 4.033913431503794e-06, | |
| "loss": 0.1062, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9637780097984098, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.08954626010536282, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.1025717481970787, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 245.0753, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.04, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.257, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.1836003006997051, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9637780097984098, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10204643479856841, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.29060474038124084, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 44.4274, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 10.984, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.373, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2608866222047862, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9637780097984098, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.6207706766917294, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.46583321690559387, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 34.1057, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.375, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.323, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.581798483206934, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9718094932133965, | |
| "grad_norm": 0.1759420484304428, | |
| "learning_rate": 3.141454707719768e-06, | |
| "loss": 0.1025, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.9798409766283832, | |
| "grad_norm": 0.21223457157611847, | |
| "learning_rate": 2.248995983935743e-06, | |
| "loss": 0.1083, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.98787246004337, | |
| "grad_norm": 0.2107488363981247, | |
| "learning_rate": 1.356537260151718e-06, | |
| "loss": 0.1008, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.9959039434583568, | |
| "grad_norm": 0.22825685143470764, | |
| "learning_rate": 4.64078536367693e-07, | |
| "loss": 0.1026, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 12451, | |
| "total_flos": 1.056850030203863e+17, | |
| "train_loss": 0.1694467846107008, | |
| "train_runtime": 5343.3049, | |
| "train_samples_per_second": 18.641, | |
| "train_steps_per_second": 2.33 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 12451, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.056850030203863e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |