| { | |
| "best_global_step": 12000, | |
| "best_metric": 0.07097326367155066, | |
| "best_model_checkpoint": "./output/bm-byt5-text-normalization-9/checkpoint-12000", | |
| "epoch": 1.0, | |
| "eval_steps": 1500, | |
| "global_step": 12486, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008008970046452027, | |
| "grad_norm": 3.8596339225769043, | |
| "learning_rate": 7.926341072858287e-06, | |
| "loss": 1.568, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016017940092904054, | |
| "grad_norm": 1.0137945413589478, | |
| "learning_rate": 1.593274619695757e-05, | |
| "loss": 0.8709, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.024026910139356077, | |
| "grad_norm": 0.7859073877334595, | |
| "learning_rate": 2.3939151321056846e-05, | |
| "loss": 0.6016, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03203588018580811, | |
| "grad_norm": 0.6444743871688843, | |
| "learning_rate": 3.194555644515613e-05, | |
| "loss": 0.5168, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.040044850232260135, | |
| "grad_norm": 0.4445948302745819, | |
| "learning_rate": 3.995196156925541e-05, | |
| "loss": 0.4461, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.048053820278712155, | |
| "grad_norm": 0.41510170698165894, | |
| "learning_rate": 4.795836669335468e-05, | |
| "loss": 0.3826, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.05606279032516418, | |
| "grad_norm": 0.3964841365814209, | |
| "learning_rate": 5.5964771817453964e-05, | |
| "loss": 0.3543, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.06407176037161622, | |
| "grad_norm": 0.3848731815814972, | |
| "learning_rate": 6.397117694155325e-05, | |
| "loss": 0.3161, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.07208073041806824, | |
| "grad_norm": 0.3667680025100708, | |
| "learning_rate": 7.197758206565253e-05, | |
| "loss": 0.2937, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.08008970046452027, | |
| "grad_norm": 0.4185062050819397, | |
| "learning_rate": 7.99839871897518e-05, | |
| "loss": 0.2748, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08809867051097228, | |
| "grad_norm": 0.565210223197937, | |
| "learning_rate": 8.799039231385109e-05, | |
| "loss": 0.2425, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.09610764055742431, | |
| "grad_norm": 0.3510247468948364, | |
| "learning_rate": 9.599679743795037e-05, | |
| "loss": 0.2429, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.10411661060387634, | |
| "grad_norm": 0.33350569009780884, | |
| "learning_rate": 9.955504138115156e-05, | |
| "loss": 0.2319, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.11212558065032836, | |
| "grad_norm": 0.2997024357318878, | |
| "learning_rate": 9.866512414345466e-05, | |
| "loss": 0.219, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.12013455069678039, | |
| "grad_norm": 0.336683988571167, | |
| "learning_rate": 9.777520690575776e-05, | |
| "loss": 0.2154, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12013455069678039, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.4623481471072902, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.18778733909130096, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 233.0229, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.146, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.27, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.6122854561878952, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12013455069678039, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.5852987060658897, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.6063626408576965, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 69.2146, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 7.051, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 0.881, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.8124754805806198, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12013455069678039, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5726033834586466, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5617944598197937, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.2591, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.866, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.389, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.8147345612134345, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12814352074323243, | |
| "grad_norm": 0.3779921233654022, | |
| "learning_rate": 9.688528966806088e-05, | |
| "loss": 0.1948, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.13615249078968444, | |
| "grad_norm": 0.32918840646743774, | |
| "learning_rate": 9.599537243036398e-05, | |
| "loss": 0.1989, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.14416146083613648, | |
| "grad_norm": 0.26091116666793823, | |
| "learning_rate": 9.510545519266708e-05, | |
| "loss": 0.185, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.1521704308825885, | |
| "grad_norm": 0.3381774425506592, | |
| "learning_rate": 9.42155379549702e-05, | |
| "loss": 0.1835, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.16017940092904054, | |
| "grad_norm": 0.4004645347595215, | |
| "learning_rate": 9.33256207172733e-05, | |
| "loss": 0.1685, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16818837097549255, | |
| "grad_norm": 0.22172071039676666, | |
| "learning_rate": 9.243570347957641e-05, | |
| "loss": 0.1749, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.17619734102194456, | |
| "grad_norm": 0.35416314005851746, | |
| "learning_rate": 9.154578624187951e-05, | |
| "loss": 0.1642, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.1842063110683966, | |
| "grad_norm": 0.2757333219051361, | |
| "learning_rate": 9.065586900418261e-05, | |
| "loss": 0.1714, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.19221528111484862, | |
| "grad_norm": 0.27378392219543457, | |
| "learning_rate": 8.976595176648572e-05, | |
| "loss": 0.1663, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.20022425116130066, | |
| "grad_norm": 0.34761300683021545, | |
| "learning_rate": 8.887603452878883e-05, | |
| "loss": 0.1544, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.20823322120775267, | |
| "grad_norm": 0.3532973825931549, | |
| "learning_rate": 8.798611729109193e-05, | |
| "loss": 0.1522, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.2162421912542047, | |
| "grad_norm": 0.3141867220401764, | |
| "learning_rate": 8.709620005339504e-05, | |
| "loss": 0.1594, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.22425116130065673, | |
| "grad_norm": 0.3187774419784546, | |
| "learning_rate": 8.620628281569814e-05, | |
| "loss": 0.1534, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.23226013134710877, | |
| "grad_norm": 0.32024797797203064, | |
| "learning_rate": 8.531636557800124e-05, | |
| "loss": 0.157, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.24026910139356078, | |
| "grad_norm": 0.3410184979438782, | |
| "learning_rate": 8.442644834030436e-05, | |
| "loss": 0.1497, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24026910139356078, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.21550017245123462, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.13786430656909943, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 237.885, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.102, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.265, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.33604336043360433, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24026910139356078, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.296411856474259, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.5071771740913391, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 48.4541, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 10.071, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.259, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.5237347979599843, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24026910139356078, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.7525845864661654, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5753647089004517, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 32.5425, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.489, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.338, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.6338028169014085, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24827807144001282, | |
| "grad_norm": 0.2752779722213745, | |
| "learning_rate": 8.353653110260746e-05, | |
| "loss": 0.141, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.25628704148646486, | |
| "grad_norm": 0.33353230357170105, | |
| "learning_rate": 8.264661386491057e-05, | |
| "loss": 0.1505, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.2642960115329169, | |
| "grad_norm": 0.21438618004322052, | |
| "learning_rate": 8.175669662721367e-05, | |
| "loss": 0.1399, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.2723049815793689, | |
| "grad_norm": 0.38063517212867737, | |
| "learning_rate": 8.086677938951677e-05, | |
| "loss": 0.1471, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.2803139516258209, | |
| "grad_norm": 0.27488982677459717, | |
| "learning_rate": 7.997686215181989e-05, | |
| "loss": 0.1319, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.28832292167227297, | |
| "grad_norm": 0.33845141530036926, | |
| "learning_rate": 7.9086944914123e-05, | |
| "loss": 0.1322, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.296331891718725, | |
| "grad_norm": 0.8517484068870544, | |
| "learning_rate": 7.819702767642609e-05, | |
| "loss": 0.1353, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.304340861765177, | |
| "grad_norm": 0.20209847390651703, | |
| "learning_rate": 7.73071104387292e-05, | |
| "loss": 0.124, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.312349831811629, | |
| "grad_norm": 0.42707929015159607, | |
| "learning_rate": 7.64171932010323e-05, | |
| "loss": 0.1266, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.3203588018580811, | |
| "grad_norm": 0.34921887516975403, | |
| "learning_rate": 7.55272759633354e-05, | |
| "loss": 0.1226, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3283677719045331, | |
| "grad_norm": 0.33792367577552795, | |
| "learning_rate": 7.463735872563852e-05, | |
| "loss": 0.1188, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.3363767419509851, | |
| "grad_norm": 0.28311803936958313, | |
| "learning_rate": 7.374744148794162e-05, | |
| "loss": 0.1167, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.3443857119974371, | |
| "grad_norm": 0.24853067100048065, | |
| "learning_rate": 7.285752425024473e-05, | |
| "loss": 0.1193, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.35239468204388913, | |
| "grad_norm": 0.2954888641834259, | |
| "learning_rate": 7.196760701254784e-05, | |
| "loss": 0.1164, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.3604036520903412, | |
| "grad_norm": 0.3384742736816406, | |
| "learning_rate": 7.107768977485094e-05, | |
| "loss": 0.1107, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3604036520903412, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.13066693918219793, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.10385449975728989, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 239.7727, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.085, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.263, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.23932550436615477, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3604036520903412, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.11562815453794623, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3626447021961212, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.1376, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.158, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.52, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.29619458611220084, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3604036520903412, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5462875939849624, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5127649903297424, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.2264, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.87, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.39, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.47670639219934996, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3684126221367932, | |
| "grad_norm": 0.23822121322155, | |
| "learning_rate": 7.018777253715405e-05, | |
| "loss": 0.1111, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.3764215921832452, | |
| "grad_norm": 0.26078513264656067, | |
| "learning_rate": 6.929785529945715e-05, | |
| "loss": 0.1107, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.38443056222969724, | |
| "grad_norm": 0.23475381731987, | |
| "learning_rate": 6.840793806176025e-05, | |
| "loss": 0.1103, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.3924395322761493, | |
| "grad_norm": 0.2875431478023529, | |
| "learning_rate": 6.751802082406337e-05, | |
| "loss": 0.1125, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.4004485023226013, | |
| "grad_norm": 0.21920998394489288, | |
| "learning_rate": 6.662810358636648e-05, | |
| "loss": 0.1051, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.40845747236905333, | |
| "grad_norm": 0.3680271804332733, | |
| "learning_rate": 6.573818634866957e-05, | |
| "loss": 0.1125, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.41646644241550534, | |
| "grad_norm": 0.2292424738407135, | |
| "learning_rate": 6.484826911097268e-05, | |
| "loss": 0.0995, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.4244754124619574, | |
| "grad_norm": 0.23008547723293304, | |
| "learning_rate": 6.39583518732758e-05, | |
| "loss": 0.1026, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.4324843825084094, | |
| "grad_norm": 0.34461820125579834, | |
| "learning_rate": 6.30684346355789e-05, | |
| "loss": 0.1062, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.44049335255486144, | |
| "grad_norm": 0.287975937128067, | |
| "learning_rate": 6.2178517397882e-05, | |
| "loss": 0.1009, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.44850232260131345, | |
| "grad_norm": 0.31931379437446594, | |
| "learning_rate": 6.12886001601851e-05, | |
| "loss": 0.1021, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.4565112926477655, | |
| "grad_norm": 0.2849687933921814, | |
| "learning_rate": 6.039868292248821e-05, | |
| "loss": 0.1075, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.46452026269421753, | |
| "grad_norm": 0.22519993782043457, | |
| "learning_rate": 5.9508765684791314e-05, | |
| "loss": 0.1115, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.47252923274066955, | |
| "grad_norm": 0.16592390835285187, | |
| "learning_rate": 5.861884844709442e-05, | |
| "loss": 0.0957, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.48053820278712156, | |
| "grad_norm": 0.20374052226543427, | |
| "learning_rate": 5.772893120939753e-05, | |
| "loss": 0.1073, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.48053820278712156, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.11223381832581787, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.09635704010725021, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 237.7988, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.103, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.265, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.21547726588376995, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.48053820278712156, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10736900064237864, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.3359106481075287, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.6894, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.993, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.499, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.28167908983915263, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.48053820278712156, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5159774436090225, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.506378173828125, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.9504, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.798, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.38, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.42903575297941493, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.48854717283357363, | |
| "grad_norm": 0.2751060128211975, | |
| "learning_rate": 5.683901397170064e-05, | |
| "loss": 0.1023, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.49655614288002564, | |
| "grad_norm": 0.21174342930316925, | |
| "learning_rate": 5.594909673400374e-05, | |
| "loss": 0.1056, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.5045651129264777, | |
| "grad_norm": 0.13474728167057037, | |
| "learning_rate": 5.5059179496306845e-05, | |
| "loss": 0.0935, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.5125740829729297, | |
| "grad_norm": 0.3412100076675415, | |
| "learning_rate": 5.416926225860995e-05, | |
| "loss": 0.102, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.5205830530193817, | |
| "grad_norm": 0.1809050291776657, | |
| "learning_rate": 5.327934502091306e-05, | |
| "loss": 0.0931, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.5285920230658337, | |
| "grad_norm": 0.31216275691986084, | |
| "learning_rate": 5.238942778321616e-05, | |
| "loss": 0.0994, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.5366009931122857, | |
| "grad_norm": 0.2171262800693512, | |
| "learning_rate": 5.149951054551927e-05, | |
| "loss": 0.0985, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.5446099631587378, | |
| "grad_norm": 0.1729833483695984, | |
| "learning_rate": 5.0609593307822376e-05, | |
| "loss": 0.1006, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5526189332051898, | |
| "grad_norm": 0.3678678870201111, | |
| "learning_rate": 4.9719676070125484e-05, | |
| "loss": 0.0916, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.5606279032516418, | |
| "grad_norm": 0.21691511571407318, | |
| "learning_rate": 4.8829758832428584e-05, | |
| "loss": 0.0909, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5686368732980939, | |
| "grad_norm": 0.11386135965585709, | |
| "learning_rate": 4.793984159473169e-05, | |
| "loss": 0.1012, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.5766458433445459, | |
| "grad_norm": 0.33011430501937866, | |
| "learning_rate": 4.70499243570348e-05, | |
| "loss": 0.0995, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.5846548133909979, | |
| "grad_norm": 0.436538428068161, | |
| "learning_rate": 4.616000711933791e-05, | |
| "loss": 0.0934, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.59266378343745, | |
| "grad_norm": 0.1885899156332016, | |
| "learning_rate": 4.527008988164101e-05, | |
| "loss": 0.0986, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.6006727534839019, | |
| "grad_norm": 0.24165351688861847, | |
| "learning_rate": 4.4380172643944115e-05, | |
| "loss": 0.0925, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6006727534839019, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.09868042870099511, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.0899689719080925, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 237.6964, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.104, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.265, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.19337548931044865, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6006727534839019, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10773607414884831, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.31528761982917786, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 39.2229, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.442, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.555, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2793252255786583, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6006727534839019, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5488721804511278, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5185889005661011, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.6015, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.832, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.385, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4528710725893825, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.608681723530354, | |
| "grad_norm": 0.4197905957698822, | |
| "learning_rate": 4.349025540624722e-05, | |
| "loss": 0.0941, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.6166906935768061, | |
| "grad_norm": 0.16225901246070862, | |
| "learning_rate": 4.2600338168550324e-05, | |
| "loss": 0.0921, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.624699663623258, | |
| "grad_norm": 0.2778748869895935, | |
| "learning_rate": 4.171042093085343e-05, | |
| "loss": 0.0868, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.6327086336697101, | |
| "grad_norm": 0.25382688641548157, | |
| "learning_rate": 4.082050369315654e-05, | |
| "loss": 0.0934, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.6407176037161622, | |
| "grad_norm": 0.3628831207752228, | |
| "learning_rate": 3.9930586455459647e-05, | |
| "loss": 0.0968, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6487265737626141, | |
| "grad_norm": 0.24050642549991608, | |
| "learning_rate": 3.904066921776275e-05, | |
| "loss": 0.0911, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.6567355438090662, | |
| "grad_norm": 0.22333455085754395, | |
| "learning_rate": 3.8150751980065855e-05, | |
| "loss": 0.0942, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.6647445138555181, | |
| "grad_norm": 0.36757221817970276, | |
| "learning_rate": 3.726083474236896e-05, | |
| "loss": 0.0935, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.6727534839019702, | |
| "grad_norm": 0.4592411518096924, | |
| "learning_rate": 3.637091750467207e-05, | |
| "loss": 0.0903, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.6807624539484223, | |
| "grad_norm": 0.2041786164045334, | |
| "learning_rate": 3.548100026697517e-05, | |
| "loss": 0.0984, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.6887714239948742, | |
| "grad_norm": 0.2337992787361145, | |
| "learning_rate": 3.459108302927828e-05, | |
| "loss": 0.0938, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.6967803940413263, | |
| "grad_norm": 0.34608423709869385, | |
| "learning_rate": 3.3701165791581386e-05, | |
| "loss": 0.09, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.7047893640877783, | |
| "grad_norm": 0.24163080751895905, | |
| "learning_rate": 3.281124855388449e-05, | |
| "loss": 0.0866, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.7127983341342303, | |
| "grad_norm": 0.23344071209430695, | |
| "learning_rate": 3.1921331316187594e-05, | |
| "loss": 0.0876, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.7208073041806824, | |
| "grad_norm": 0.33907049894332886, | |
| "learning_rate": 3.10314140784907e-05, | |
| "loss": 0.0904, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7208073041806824, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.08734974387798117, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.0864705815911293, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 239.9022, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.084, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.263, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.18066847335140018, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7208073041806824, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10360649720106452, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.30589863657951355, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 40.4318, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.07, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.509, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2734405649274225, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7208073041806824, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5204417293233082, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5159913301467896, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 28.5651, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.836, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.385, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4431202600216685, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7288162742271344, | |
| "grad_norm": 0.3358537554740906, | |
| "learning_rate": 3.014149684079381e-05, | |
| "loss": 0.0975, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.7368252442735864, | |
| "grad_norm": 0.30242183804512024, | |
| "learning_rate": 2.9251579603096914e-05, | |
| "loss": 0.0932, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.7448342143200385, | |
| "grad_norm": 0.2576250731945038, | |
| "learning_rate": 2.836166236540002e-05, | |
| "loss": 0.0884, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.7528431843664904, | |
| "grad_norm": 0.1822267770767212, | |
| "learning_rate": 2.7471745127703125e-05, | |
| "loss": 0.0887, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.7608521544129425, | |
| "grad_norm": 0.28615352511405945, | |
| "learning_rate": 2.6581827890006233e-05, | |
| "loss": 0.096, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.7688611244593945, | |
| "grad_norm": 0.25158432126045227, | |
| "learning_rate": 2.5691910652309337e-05, | |
| "loss": 0.0885, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.7768700945058465, | |
| "grad_norm": 0.3026082515716553, | |
| "learning_rate": 2.480199341461244e-05, | |
| "loss": 0.0877, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.7848790645522986, | |
| "grad_norm": 0.27292346954345703, | |
| "learning_rate": 2.391207617691555e-05, | |
| "loss": 0.0941, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.7928880345987506, | |
| "grad_norm": 0.24669557809829712, | |
| "learning_rate": 2.3022158939218656e-05, | |
| "loss": 0.0925, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.8008970046452026, | |
| "grad_norm": 0.2944881021976471, | |
| "learning_rate": 2.213224170152176e-05, | |
| "loss": 0.0945, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.8089059746916547, | |
| "grad_norm": 0.09000946581363678, | |
| "learning_rate": 2.1242324463824865e-05, | |
| "loss": 0.0857, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.8169149447381067, | |
| "grad_norm": 0.27225881814956665, | |
| "learning_rate": 2.035240722612797e-05, | |
| "loss": 0.0854, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.8249239147845587, | |
| "grad_norm": 0.19183236360549927, | |
| "learning_rate": 1.9462489988431076e-05, | |
| "loss": 0.0876, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.8329328848310107, | |
| "grad_norm": 0.4199092984199524, | |
| "learning_rate": 1.8572572750734184e-05, | |
| "loss": 0.0896, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.8409418548774628, | |
| "grad_norm": 0.16657961905002594, | |
| "learning_rate": 1.7682655513037288e-05, | |
| "loss": 0.0832, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8409418548774628, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.07417957921898752, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.08394235372543335, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 237.0276, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.109, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.266, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.1648900933453779, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8409418548774628, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.10516655960356061, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.28152379393577576, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 39.2346, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 12.438, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.555, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2734405649274225, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8409418548774628, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5044642857142857, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5193919539451599, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 26.7813, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 3.024, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.411, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.41278439869989164, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8489508249239148, | |
| "grad_norm": 0.25866174697875977, | |
| "learning_rate": 1.6792738275340396e-05, | |
| "loss": 0.0853, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.8569597949703668, | |
| "grad_norm": 0.28780218958854675, | |
| "learning_rate": 1.59028210376435e-05, | |
| "loss": 0.0899, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.8649687650168189, | |
| "grad_norm": 0.3092251121997833, | |
| "learning_rate": 1.5012903799946607e-05, | |
| "loss": 0.0949, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.8729777350632708, | |
| "grad_norm": 0.1439545601606369, | |
| "learning_rate": 1.4122986562249713e-05, | |
| "loss": 0.0846, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.8809867051097229, | |
| "grad_norm": 0.49992987513542175, | |
| "learning_rate": 1.3233069324552816e-05, | |
| "loss": 0.0878, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.888995675156175, | |
| "grad_norm": 0.5189112424850464, | |
| "learning_rate": 1.2343152086855923e-05, | |
| "loss": 0.088, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.8970046452026269, | |
| "grad_norm": 0.22200240194797516, | |
| "learning_rate": 1.1453234849159029e-05, | |
| "loss": 0.0793, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.905013615249079, | |
| "grad_norm": 0.8681122660636902, | |
| "learning_rate": 1.0563317611462133e-05, | |
| "loss": 0.0897, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.913022585295531, | |
| "grad_norm": 0.1663997769355774, | |
| "learning_rate": 9.673400373765241e-06, | |
| "loss": 0.0865, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.921031555341983, | |
| "grad_norm": 0.3327358067035675, | |
| "learning_rate": 8.783483136068347e-06, | |
| "loss": 0.0887, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.9290405253884351, | |
| "grad_norm": 0.23733209073543549, | |
| "learning_rate": 7.893565898371453e-06, | |
| "loss": 0.0868, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.937049495434887, | |
| "grad_norm": 0.21292167901992798, | |
| "learning_rate": 7.0036486606745585e-06, | |
| "loss": 0.0896, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.9450584654813391, | |
| "grad_norm": 0.21337737143039703, | |
| "learning_rate": 6.1137314229776635e-06, | |
| "loss": 0.0847, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.9530674355277912, | |
| "grad_norm": 0.23139464855194092, | |
| "learning_rate": 5.223814185280769e-06, | |
| "loss": 0.082, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.9610764055742431, | |
| "grad_norm": 0.4367704391479492, | |
| "learning_rate": 4.333896947583875e-06, | |
| "loss": 0.0874, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9610764055742431, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__cer": 0.07097326367155066, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__loss": 0.08283345401287079, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__runtime": 236.9213, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__samples_per_second": 2.11, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__steps_per_second": 0.266, | |
| "eval_oza75_bm-text-normalization__cbr__dev__500__wer": 0.16193917494730503, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9610764055742431, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__cer": 0.1025052766816555, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__loss": 0.27890169620513916, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__runtime": 41.0822, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__samples_per_second": 11.879, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__steps_per_second": 1.485, | |
| "eval_djelia_bm-text-normalization__bamadaba__dev__500__wer": 0.2742251863475873, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9610764055742431, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_cer": 0.5032894736842105, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_loss": 0.5151007771492004, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_runtime": 27.495, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_samples_per_second": 2.946, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_steps_per_second": 0.4, | |
| "eval_djelia_bm-text-normalization-benchmark__level-1__test_wer": 0.4106175514626219, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9690853756206952, | |
| "grad_norm": 0.18608327209949493, | |
| "learning_rate": 3.4439797098869802e-06, | |
| "loss": 0.0888, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.9770943456671473, | |
| "grad_norm": 0.3838459253311157, | |
| "learning_rate": 2.5540624721900865e-06, | |
| "loss": 0.0834, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.9851033157135992, | |
| "grad_norm": 0.32492393255233765, | |
| "learning_rate": 1.6641452344931922e-06, | |
| "loss": 0.0862, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.9931122857600513, | |
| "grad_norm": 0.3034497797489166, | |
| "learning_rate": 7.74227996796298e-07, | |
| "loss": 0.0879, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 12486, | |
| "total_flos": 1.1016496820713882e+17, | |
| "train_loss": 0.1502396887776639, | |
| "train_runtime": 5207.863, | |
| "train_samples_per_second": 19.179, | |
| "train_steps_per_second": 2.398 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 12486, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1016496820713882e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |