{ "best_global_step": 12000, "best_metric": 0.07097326367155066, "best_model_checkpoint": "./output/bm-byt5-text-normalization-9/checkpoint-12000", "epoch": 1.0, "eval_steps": 1500, "global_step": 12486, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008008970046452027, "grad_norm": 3.8596339225769043, "learning_rate": 7.926341072858287e-06, "loss": 1.568, "step": 100 }, { "epoch": 0.016017940092904054, "grad_norm": 1.0137945413589478, "learning_rate": 1.593274619695757e-05, "loss": 0.8709, "step": 200 }, { "epoch": 0.024026910139356077, "grad_norm": 0.7859073877334595, "learning_rate": 2.3939151321056846e-05, "loss": 0.6016, "step": 300 }, { "epoch": 0.03203588018580811, "grad_norm": 0.6444743871688843, "learning_rate": 3.194555644515613e-05, "loss": 0.5168, "step": 400 }, { "epoch": 0.040044850232260135, "grad_norm": 0.4445948302745819, "learning_rate": 3.995196156925541e-05, "loss": 0.4461, "step": 500 }, { "epoch": 0.048053820278712155, "grad_norm": 0.41510170698165894, "learning_rate": 4.795836669335468e-05, "loss": 0.3826, "step": 600 }, { "epoch": 0.05606279032516418, "grad_norm": 0.3964841365814209, "learning_rate": 5.5964771817453964e-05, "loss": 0.3543, "step": 700 }, { "epoch": 0.06407176037161622, "grad_norm": 0.3848731815814972, "learning_rate": 6.397117694155325e-05, "loss": 0.3161, "step": 800 }, { "epoch": 0.07208073041806824, "grad_norm": 0.3667680025100708, "learning_rate": 7.197758206565253e-05, "loss": 0.2937, "step": 900 }, { "epoch": 0.08008970046452027, "grad_norm": 0.4185062050819397, "learning_rate": 7.99839871897518e-05, "loss": 0.2748, "step": 1000 }, { "epoch": 0.08809867051097228, "grad_norm": 0.565210223197937, "learning_rate": 8.799039231385109e-05, "loss": 0.2425, "step": 1100 }, { "epoch": 0.09610764055742431, "grad_norm": 0.3510247468948364, "learning_rate": 9.599679743795037e-05, "loss": 0.2429, "step": 1200 }, { "epoch": 0.10411661060387634, "grad_norm": 0.33350569009780884, "learning_rate": 9.955504138115156e-05, "loss": 0.2319, "step": 1300 }, { "epoch": 0.11212558065032836, "grad_norm": 0.2997024357318878, "learning_rate": 9.866512414345466e-05, "loss": 0.219, "step": 1400 }, { "epoch": 0.12013455069678039, "grad_norm": 0.336683988571167, "learning_rate": 9.777520690575776e-05, "loss": 0.2154, "step": 1500 }, { "epoch": 0.12013455069678039, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.4623481471072902, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.18778733909130096, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 233.0229, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.146, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.27, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.6122854561878952, "step": 1500 }, { "epoch": 0.12013455069678039, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.5852987060658897, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.6063626408576965, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 69.2146, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 7.051, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 0.881, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.8124754805806198, "step": 1500 }, { "epoch": 0.12013455069678039, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5726033834586466, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5617944598197937, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.2591, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.866, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.389, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.8147345612134345, "step": 1500 }, { "epoch": 0.12814352074323243, "grad_norm": 0.3779921233654022, "learning_rate": 9.688528966806088e-05, "loss": 0.1948, "step": 1600 }, { "epoch": 0.13615249078968444, "grad_norm": 0.32918840646743774, "learning_rate": 9.599537243036398e-05, "loss": 0.1989, "step": 1700 }, { "epoch": 0.14416146083613648, "grad_norm": 0.26091116666793823, "learning_rate": 9.510545519266708e-05, "loss": 0.185, "step": 1800 }, { "epoch": 0.1521704308825885, "grad_norm": 0.3381774425506592, "learning_rate": 9.42155379549702e-05, "loss": 0.1835, "step": 1900 }, { "epoch": 0.16017940092904054, "grad_norm": 0.4004645347595215, "learning_rate": 9.33256207172733e-05, "loss": 0.1685, "step": 2000 }, { "epoch": 0.16818837097549255, "grad_norm": 0.22172071039676666, "learning_rate": 9.243570347957641e-05, "loss": 0.1749, "step": 2100 }, { "epoch": 0.17619734102194456, "grad_norm": 0.35416314005851746, "learning_rate": 9.154578624187951e-05, "loss": 0.1642, "step": 2200 }, { "epoch": 0.1842063110683966, "grad_norm": 0.2757333219051361, "learning_rate": 9.065586900418261e-05, "loss": 0.1714, "step": 2300 }, { "epoch": 0.19221528111484862, "grad_norm": 0.27378392219543457, "learning_rate": 8.976595176648572e-05, "loss": 0.1663, "step": 2400 }, { "epoch": 0.20022425116130066, "grad_norm": 0.34761300683021545, "learning_rate": 8.887603452878883e-05, "loss": 0.1544, "step": 2500 }, { "epoch": 0.20823322120775267, "grad_norm": 0.3532973825931549, "learning_rate": 8.798611729109193e-05, "loss": 0.1522, "step": 2600 }, { "epoch": 0.2162421912542047, "grad_norm": 0.3141867220401764, "learning_rate": 8.709620005339504e-05, "loss": 0.1594, "step": 2700 }, { "epoch": 0.22425116130065673, "grad_norm": 0.3187774419784546, "learning_rate": 8.620628281569814e-05, "loss": 0.1534, "step": 2800 }, { "epoch": 0.23226013134710877, "grad_norm": 0.32024797797203064, "learning_rate": 8.531636557800124e-05, "loss": 0.157, "step": 2900 }, { "epoch": 0.24026910139356078, "grad_norm": 0.3410184979438782, "learning_rate": 8.442644834030436e-05, "loss": 0.1497, "step": 3000 }, { "epoch": 0.24026910139356078, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.21550017245123462, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.13786430656909943, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 237.885, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.102, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.265, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.33604336043360433, "step": 3000 }, { "epoch": 0.24026910139356078, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.296411856474259, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.5071771740913391, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 48.4541, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 10.071, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.259, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.5237347979599843, "step": 3000 }, { "epoch": 0.24026910139356078, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.7525845864661654, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5753647089004517, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 32.5425, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.489, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.338, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.6338028169014085, "step": 3000 }, { "epoch": 0.24827807144001282, "grad_norm": 0.2752779722213745, "learning_rate": 8.353653110260746e-05, "loss": 0.141, "step": 3100 }, { "epoch": 0.25628704148646486, "grad_norm": 0.33353230357170105, "learning_rate": 8.264661386491057e-05, "loss": 0.1505, "step": 3200 }, { "epoch": 0.2642960115329169, "grad_norm": 0.21438618004322052, "learning_rate": 8.175669662721367e-05, "loss": 0.1399, "step": 3300 }, { "epoch": 0.2723049815793689, "grad_norm": 0.38063517212867737, "learning_rate": 8.086677938951677e-05, "loss": 0.1471, "step": 3400 }, { "epoch": 0.2803139516258209, "grad_norm": 0.27488982677459717, "learning_rate": 7.997686215181989e-05, "loss": 0.1319, "step": 3500 }, { "epoch": 0.28832292167227297, "grad_norm": 0.33845141530036926, "learning_rate": 7.9086944914123e-05, "loss": 0.1322, "step": 3600 }, { "epoch": 0.296331891718725, "grad_norm": 0.8517484068870544, "learning_rate": 7.819702767642609e-05, "loss": 0.1353, "step": 3700 }, { "epoch": 0.304340861765177, "grad_norm": 0.20209847390651703, "learning_rate": 7.73071104387292e-05, "loss": 0.124, "step": 3800 }, { "epoch": 0.312349831811629, "grad_norm": 0.42707929015159607, "learning_rate": 7.64171932010323e-05, "loss": 0.1266, "step": 3900 }, { "epoch": 0.3203588018580811, "grad_norm": 0.34921887516975403, "learning_rate": 7.55272759633354e-05, "loss": 0.1226, "step": 4000 }, { "epoch": 0.3283677719045331, "grad_norm": 0.33792367577552795, "learning_rate": 7.463735872563852e-05, "loss": 0.1188, "step": 4100 }, { "epoch": 0.3363767419509851, "grad_norm": 0.28311803936958313, "learning_rate": 7.374744148794162e-05, "loss": 0.1167, "step": 4200 }, { "epoch": 0.3443857119974371, "grad_norm": 0.24853067100048065, "learning_rate": 7.285752425024473e-05, "loss": 0.1193, "step": 4300 }, { "epoch": 0.35239468204388913, "grad_norm": 0.2954888641834259, "learning_rate": 7.196760701254784e-05, "loss": 0.1164, "step": 4400 }, { "epoch": 0.3604036520903412, "grad_norm": 0.3384742736816406, "learning_rate": 7.107768977485094e-05, "loss": 0.1107, "step": 4500 }, { "epoch": 0.3604036520903412, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.13066693918219793, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.10385449975728989, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 239.7727, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.085, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.263, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.23932550436615477, "step": 4500 }, { "epoch": 0.3604036520903412, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11562815453794623, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3626447021961212, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.1376, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.158, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.52, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.29619458611220084, "step": 4500 }, { "epoch": 0.3604036520903412, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5462875939849624, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5127649903297424, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.2264, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.87, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.39, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.47670639219934996, "step": 4500 }, { "epoch": 0.3684126221367932, "grad_norm": 0.23822121322155, "learning_rate": 7.018777253715405e-05, "loss": 0.1111, "step": 4600 }, { "epoch": 0.3764215921832452, "grad_norm": 0.26078513264656067, "learning_rate": 6.929785529945715e-05, "loss": 0.1107, "step": 4700 }, { "epoch": 0.38443056222969724, "grad_norm": 0.23475381731987, "learning_rate": 6.840793806176025e-05, "loss": 0.1103, "step": 4800 }, { "epoch": 0.3924395322761493, "grad_norm": 0.2875431478023529, "learning_rate": 6.751802082406337e-05, "loss": 0.1125, "step": 4900 }, { "epoch": 0.4004485023226013, "grad_norm": 0.21920998394489288, "learning_rate": 6.662810358636648e-05, "loss": 0.1051, "step": 5000 }, { "epoch": 0.40845747236905333, "grad_norm": 0.3680271804332733, "learning_rate": 6.573818634866957e-05, "loss": 0.1125, "step": 5100 }, { "epoch": 0.41646644241550534, "grad_norm": 0.2292424738407135, "learning_rate": 6.484826911097268e-05, "loss": 0.0995, "step": 5200 }, { "epoch": 0.4244754124619574, "grad_norm": 0.23008547723293304, "learning_rate": 6.39583518732758e-05, "loss": 0.1026, "step": 5300 }, { "epoch": 0.4324843825084094, "grad_norm": 0.34461820125579834, "learning_rate": 6.30684346355789e-05, "loss": 0.1062, "step": 5400 }, { "epoch": 0.44049335255486144, "grad_norm": 0.287975937128067, "learning_rate": 6.2178517397882e-05, "loss": 0.1009, "step": 5500 }, { "epoch": 0.44850232260131345, "grad_norm": 0.31931379437446594, "learning_rate": 6.12886001601851e-05, "loss": 0.1021, "step": 5600 }, { "epoch": 0.4565112926477655, "grad_norm": 0.2849687933921814, "learning_rate": 6.039868292248821e-05, "loss": 0.1075, "step": 5700 }, { "epoch": 0.46452026269421753, "grad_norm": 0.22519993782043457, "learning_rate": 5.9508765684791314e-05, "loss": 0.1115, "step": 5800 }, { "epoch": 0.47252923274066955, "grad_norm": 0.16592390835285187, "learning_rate": 5.861884844709442e-05, "loss": 0.0957, "step": 5900 }, { "epoch": 0.48053820278712156, "grad_norm": 0.20374052226543427, "learning_rate": 5.772893120939753e-05, "loss": 0.1073, "step": 6000 }, { "epoch": 0.48053820278712156, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.11223381832581787, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.09635704010725021, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 237.7988, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.103, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.265, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.21547726588376995, "step": 6000 }, { "epoch": 0.48053820278712156, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10736900064237864, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3359106481075287, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.6894, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.993, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.499, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.28167908983915263, "step": 6000 }, { "epoch": 0.48053820278712156, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5159774436090225, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.506378173828125, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.9504, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.798, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.38, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.42903575297941493, "step": 6000 }, { "epoch": 0.48854717283357363, "grad_norm": 0.2751060128211975, "learning_rate": 5.683901397170064e-05, "loss": 0.1023, "step": 6100 }, { "epoch": 0.49655614288002564, "grad_norm": 0.21174342930316925, "learning_rate": 5.594909673400374e-05, "loss": 0.1056, "step": 6200 }, { "epoch": 0.5045651129264777, "grad_norm": 0.13474728167057037, "learning_rate": 5.5059179496306845e-05, "loss": 0.0935, "step": 6300 }, { "epoch": 0.5125740829729297, "grad_norm": 0.3412100076675415, "learning_rate": 5.416926225860995e-05, "loss": 0.102, "step": 6400 }, { "epoch": 0.5205830530193817, "grad_norm": 0.1809050291776657, "learning_rate": 5.327934502091306e-05, "loss": 0.0931, "step": 6500 }, { "epoch": 0.5285920230658337, "grad_norm": 0.31216275691986084, "learning_rate": 5.238942778321616e-05, "loss": 0.0994, "step": 6600 }, { "epoch": 0.5366009931122857, "grad_norm": 0.2171262800693512, "learning_rate": 5.149951054551927e-05, "loss": 0.0985, "step": 6700 }, { "epoch": 0.5446099631587378, "grad_norm": 0.1729833483695984, "learning_rate": 5.0609593307822376e-05, "loss": 0.1006, "step": 6800 }, { "epoch": 0.5526189332051898, "grad_norm": 0.3678678870201111, "learning_rate": 4.9719676070125484e-05, "loss": 0.0916, "step": 6900 }, { "epoch": 0.5606279032516418, "grad_norm": 0.21691511571407318, "learning_rate": 4.8829758832428584e-05, "loss": 0.0909, "step": 7000 }, { "epoch": 0.5686368732980939, "grad_norm": 0.11386135965585709, "learning_rate": 4.793984159473169e-05, "loss": 0.1012, "step": 7100 }, { "epoch": 0.5766458433445459, "grad_norm": 0.33011430501937866, "learning_rate": 4.70499243570348e-05, "loss": 0.0995, "step": 7200 }, { "epoch": 0.5846548133909979, "grad_norm": 0.436538428068161, "learning_rate": 4.616000711933791e-05, "loss": 0.0934, "step": 7300 }, { "epoch": 0.59266378343745, "grad_norm": 0.1885899156332016, "learning_rate": 4.527008988164101e-05, "loss": 0.0986, "step": 7400 }, { "epoch": 0.6006727534839019, "grad_norm": 0.24165351688861847, "learning_rate": 4.4380172643944115e-05, "loss": 0.0925, "step": 7500 }, { "epoch": 0.6006727534839019, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.09868042870099511, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.0899689719080925, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 237.6964, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.104, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.265, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.19337548931044865, "step": 7500 }, { "epoch": 0.6006727534839019, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10773607414884831, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.31528761982917786, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 39.2229, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.442, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.555, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2793252255786583, "step": 7500 }, { "epoch": 0.6006727534839019, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5488721804511278, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5185889005661011, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.6015, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.832, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.385, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4528710725893825, "step": 7500 }, { "epoch": 0.608681723530354, "grad_norm": 0.4197905957698822, "learning_rate": 4.349025540624722e-05, "loss": 0.0941, "step": 7600 }, { "epoch": 0.6166906935768061, "grad_norm": 0.16225901246070862, "learning_rate": 4.2600338168550324e-05, "loss": 0.0921, "step": 7700 }, { "epoch": 0.624699663623258, "grad_norm": 0.2778748869895935, "learning_rate": 4.171042093085343e-05, "loss": 0.0868, "step": 7800 }, { "epoch": 0.6327086336697101, "grad_norm": 0.25382688641548157, "learning_rate": 4.082050369315654e-05, "loss": 0.0934, "step": 7900 }, { "epoch": 0.6407176037161622, "grad_norm": 0.3628831207752228, "learning_rate": 3.9930586455459647e-05, "loss": 0.0968, "step": 8000 }, { "epoch": 0.6487265737626141, "grad_norm": 0.24050642549991608, "learning_rate": 3.904066921776275e-05, "loss": 0.0911, "step": 8100 }, { "epoch": 0.6567355438090662, "grad_norm": 0.22333455085754395, "learning_rate": 3.8150751980065855e-05, "loss": 0.0942, "step": 8200 }, { "epoch": 0.6647445138555181, "grad_norm": 0.36757221817970276, "learning_rate": 3.726083474236896e-05, "loss": 0.0935, "step": 8300 }, { "epoch": 0.6727534839019702, "grad_norm": 0.4592411518096924, "learning_rate": 3.637091750467207e-05, "loss": 0.0903, "step": 8400 }, { "epoch": 0.6807624539484223, "grad_norm": 0.2041786164045334, "learning_rate": 3.548100026697517e-05, "loss": 0.0984, "step": 8500 }, { "epoch": 0.6887714239948742, "grad_norm": 0.2337992787361145, "learning_rate": 3.459108302927828e-05, "loss": 0.0938, "step": 8600 }, { "epoch": 0.6967803940413263, "grad_norm": 0.34608423709869385, "learning_rate": 3.3701165791581386e-05, "loss": 0.09, "step": 8700 }, { "epoch": 0.7047893640877783, "grad_norm": 0.24163080751895905, "learning_rate": 3.281124855388449e-05, "loss": 0.0866, "step": 8800 }, { "epoch": 0.7127983341342303, "grad_norm": 0.23344071209430695, "learning_rate": 3.1921331316187594e-05, "loss": 0.0876, "step": 8900 }, { "epoch": 0.7208073041806824, "grad_norm": 0.33907049894332886, "learning_rate": 3.10314140784907e-05, "loss": 0.0904, "step": 9000 }, { "epoch": 0.7208073041806824, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.08734974387798117, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.0864705815911293, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 239.9022, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.084, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.263, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.18066847335140018, "step": 9000 }, { "epoch": 0.7208073041806824, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10360649720106452, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.30589863657951355, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.4318, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.07, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.509, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2734405649274225, "step": 9000 }, { "epoch": 0.7208073041806824, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5204417293233082, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5159913301467896, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.5651, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.836, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.385, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4431202600216685, "step": 9000 }, { "epoch": 0.7288162742271344, "grad_norm": 0.3358537554740906, "learning_rate": 3.014149684079381e-05, "loss": 0.0975, "step": 9100 }, { "epoch": 0.7368252442735864, "grad_norm": 0.30242183804512024, "learning_rate": 2.9251579603096914e-05, "loss": 0.0932, "step": 9200 }, { "epoch": 0.7448342143200385, "grad_norm": 0.2576250731945038, "learning_rate": 2.836166236540002e-05, "loss": 0.0884, "step": 9300 }, { "epoch": 0.7528431843664904, "grad_norm": 0.1822267770767212, "learning_rate": 2.7471745127703125e-05, "loss": 0.0887, "step": 9400 }, { "epoch": 0.7608521544129425, "grad_norm": 0.28615352511405945, "learning_rate": 2.6581827890006233e-05, "loss": 0.096, "step": 9500 }, { "epoch": 0.7688611244593945, "grad_norm": 0.25158432126045227, "learning_rate": 2.5691910652309337e-05, "loss": 0.0885, "step": 9600 }, { "epoch": 0.7768700945058465, "grad_norm": 0.3026082515716553, "learning_rate": 2.480199341461244e-05, "loss": 0.0877, "step": 9700 }, { "epoch": 0.7848790645522986, "grad_norm": 0.27292346954345703, "learning_rate": 2.391207617691555e-05, "loss": 0.0941, "step": 9800 }, { "epoch": 0.7928880345987506, "grad_norm": 0.24669557809829712, "learning_rate": 2.3022158939218656e-05, "loss": 0.0925, "step": 9900 }, { "epoch": 0.8008970046452026, "grad_norm": 0.2944881021976471, "learning_rate": 2.213224170152176e-05, "loss": 0.0945, "step": 10000 }, { "epoch": 0.8089059746916547, "grad_norm": 0.09000946581363678, "learning_rate": 2.1242324463824865e-05, "loss": 0.0857, "step": 10100 }, { "epoch": 0.8169149447381067, "grad_norm": 0.27225881814956665, "learning_rate": 2.035240722612797e-05, "loss": 0.0854, "step": 10200 }, { "epoch": 0.8249239147845587, "grad_norm": 0.19183236360549927, "learning_rate": 1.9462489988431076e-05, "loss": 0.0876, "step": 10300 }, { "epoch": 0.8329328848310107, "grad_norm": 0.4199092984199524, "learning_rate": 1.8572572750734184e-05, "loss": 0.0896, "step": 10400 }, { "epoch": 0.8409418548774628, "grad_norm": 0.16657961905002594, "learning_rate": 1.7682655513037288e-05, "loss": 0.0832, "step": 10500 }, { "epoch": 0.8409418548774628, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.07417957921898752, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.08394235372543335, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 237.0276, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.109, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.266, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.1648900933453779, "step": 10500 }, { "epoch": 0.8409418548774628, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10516655960356061, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.28152379393577576, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 39.2346, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.438, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.555, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2734405649274225, "step": 10500 }, { "epoch": 0.8409418548774628, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5044642857142857, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5193919539451599, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 26.7813, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 3.024, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.411, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.41278439869989164, "step": 10500 }, { "epoch": 0.8489508249239148, "grad_norm": 0.25866174697875977, "learning_rate": 1.6792738275340396e-05, "loss": 0.0853, "step": 10600 }, { "epoch": 0.8569597949703668, "grad_norm": 0.28780218958854675, "learning_rate": 1.59028210376435e-05, "loss": 0.0899, "step": 10700 }, { "epoch": 0.8649687650168189, "grad_norm": 0.3092251121997833, "learning_rate": 1.5012903799946607e-05, "loss": 0.0949, "step": 10800 }, { "epoch": 0.8729777350632708, "grad_norm": 0.1439545601606369, "learning_rate": 1.4122986562249713e-05, "loss": 0.0846, "step": 10900 }, { "epoch": 0.8809867051097229, "grad_norm": 0.49992987513542175, "learning_rate": 1.3233069324552816e-05, "loss": 0.0878, "step": 11000 }, { "epoch": 0.888995675156175, "grad_norm": 0.5189112424850464, "learning_rate": 1.2343152086855923e-05, "loss": 0.088, "step": 11100 }, { "epoch": 0.8970046452026269, "grad_norm": 0.22200240194797516, "learning_rate": 1.1453234849159029e-05, "loss": 0.0793, "step": 11200 }, { "epoch": 0.905013615249079, "grad_norm": 0.8681122660636902, "learning_rate": 1.0563317611462133e-05, "loss": 0.0897, "step": 11300 }, { "epoch": 0.913022585295531, "grad_norm": 0.1663997769355774, "learning_rate": 9.673400373765241e-06, "loss": 0.0865, "step": 11400 }, { "epoch": 0.921031555341983, "grad_norm": 0.3327358067035675, "learning_rate": 8.783483136068347e-06, "loss": 0.0887, "step": 11500 }, { "epoch": 0.9290405253884351, "grad_norm": 0.23733209073543549, "learning_rate": 7.893565898371453e-06, "loss": 0.0868, "step": 11600 }, { "epoch": 0.937049495434887, "grad_norm": 0.21292167901992798, "learning_rate": 7.0036486606745585e-06, "loss": 0.0896, "step": 11700 }, { "epoch": 0.9450584654813391, "grad_norm": 0.21337737143039703, "learning_rate": 6.1137314229776635e-06, "loss": 0.0847, "step": 11800 }, { "epoch": 0.9530674355277912, "grad_norm": 0.23139464855194092, "learning_rate": 5.223814185280769e-06, "loss": 0.082, "step": 11900 }, { "epoch": 0.9610764055742431, "grad_norm": 0.4367704391479492, "learning_rate": 4.333896947583875e-06, "loss": 0.0874, "step": 12000 }, { "epoch": 0.9610764055742431, "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.07097326367155066, "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.08283345401287079, "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 236.9213, "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.11, "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.266, "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.16193917494730503, "step": 12000 }, { "epoch": 0.9610764055742431, "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.1025052766816555, "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.27890169620513916, "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 41.0822, "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.879, "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.485, "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2742251863475873, "step": 12000 }, { "epoch": 0.9610764055742431, "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5032894736842105, "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5151007771492004, "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 27.495, "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.946, "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.4, "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4106175514626219, "step": 12000 }, { "epoch": 0.9690853756206952, "grad_norm": 0.18608327209949493, "learning_rate": 3.4439797098869802e-06, "loss": 0.0888, "step": 12100 }, { "epoch": 0.9770943456671473, "grad_norm": 0.3838459253311157, "learning_rate": 2.5540624721900865e-06, "loss": 0.0834, "step": 12200 }, { "epoch": 0.9851033157135992, "grad_norm": 0.32492393255233765, "learning_rate": 1.6641452344931922e-06, "loss": 0.0862, "step": 12300 }, { "epoch": 0.9931122857600513, "grad_norm": 0.3034497797489166, "learning_rate": 7.74227996796298e-07, "loss": 0.0879, "step": 12400 }, { "epoch": 1.0, "step": 12486, "total_flos": 1.1016496820713882e+17, "train_loss": 0.1502396887776639, "train_runtime": 5207.863, "train_samples_per_second": 19.179, "train_steps_per_second": 2.398 } ], "logging_steps": 100, "max_steps": 12486, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1016496820713882e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }