{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 656, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12195121951219512, "grad_norm": 42.956241607666016, "learning_rate": 1.5e-06, "loss": 3.946, "step": 20 }, { "epoch": 0.24390243902439024, "grad_norm": 18.780467987060547, "learning_rate": 3.5e-06, "loss": 2.801, "step": 40 }, { "epoch": 0.36585365853658536, "grad_norm": 10.132372856140137, "learning_rate": 5.500000000000001e-06, "loss": 1.829, "step": 60 }, { "epoch": 0.4878048780487805, "grad_norm": 6.34807825088501, "learning_rate": 7.500000000000001e-06, "loss": 1.1664, "step": 80 }, { "epoch": 0.6097560975609756, "grad_norm": 3.893784523010254, "learning_rate": 9.5e-06, "loss": 0.8395, "step": 100 }, { "epoch": 0.7317073170731707, "grad_norm": 3.6245577335357666, "learning_rate": 9.73021582733813e-06, "loss": 0.6985, "step": 120 }, { "epoch": 0.8536585365853658, "grad_norm": 3.2853732109069824, "learning_rate": 9.370503597122303e-06, "loss": 0.6198, "step": 140 }, { "epoch": 0.975609756097561, "grad_norm": 3.1876819133758545, "learning_rate": 9.010791366906476e-06, "loss": 0.5933, "step": 160 }, { "epoch": 1.0, "eval_avg_cer": 0.35838623267570763, "eval_avg_wer": 0.8217081943985028, "eval_gilaki_cer": 0.38532785111447737, "eval_gilaki_wer": 0.9969254419677172, "eval_hawrami_cer": 0.11622886617633012, "eval_hawrami_wer": 0.5257112184648417, "eval_laki_kurdish_cer": 0.24613632666341304, "eval_laki_kurdish_wer": 0.7856850715746422, "eval_loss": 0.7336705923080444, "eval_mazanderani_cer": 0.3138505136379738, "eval_mazanderani_wer": 0.8884360672440142, "eval_runtime": 457.0137, "eval_samples_per_second": 7.129, "eval_southern_kurdish_cer": 0.21365623482062313, "eval_southern_kurdish_wer": 0.6900034831069314, "eval_steps_per_second": 0.057, "eval_talysh_cer": 0.5, "eval_talysh_wer": 0.9166666666666666, "eval_zazaki_cer": 0.7335038363171356, "eval_zazaki_wer": 0.9485294117647058, "step": 164 }, { "epoch": 1.0975609756097562, "grad_norm": 3.3254923820495605, "learning_rate": 8.651079136690648e-06, "loss": 0.5337, "step": 180 }, { "epoch": 1.2195121951219512, "grad_norm": 3.416600227355957, "learning_rate": 8.29136690647482e-06, "loss": 0.5054, "step": 200 }, { "epoch": 1.3414634146341464, "grad_norm": 3.544034481048584, "learning_rate": 7.931654676258992e-06, "loss": 0.4915, "step": 220 }, { "epoch": 1.4634146341463414, "grad_norm": 2.7927117347717285, "learning_rate": 7.571942446043166e-06, "loss": 0.47, "step": 240 }, { "epoch": 1.5853658536585367, "grad_norm": 3.406677007675171, "learning_rate": 7.212230215827338e-06, "loss": 0.4681, "step": 260 }, { "epoch": 1.7073170731707317, "grad_norm": 2.7277846336364746, "learning_rate": 6.852517985611511e-06, "loss": 0.4653, "step": 280 }, { "epoch": 1.8292682926829267, "grad_norm": 2.8852195739746094, "learning_rate": 6.4928057553956835e-06, "loss": 0.4281, "step": 300 }, { "epoch": 1.951219512195122, "grad_norm": 3.2276437282562256, "learning_rate": 6.133093525179856e-06, "loss": 0.4436, "step": 320 }, { "epoch": 2.0, "eval_avg_cer": 0.2926722864574391, "eval_avg_wer": 0.7377513408917141, "eval_gilaki_cer": 0.38182211642501623, "eval_gilaki_wer": 0.9723289777094543, "eval_hawrami_cer": 0.09332314452192186, "eval_hawrami_wer": 0.44186795491143316, "eval_laki_kurdish_cer": 0.1845615747519115, "eval_laki_kurdish_wer": 0.6486707566462168, "eval_loss": 0.5930325984954834, "eval_mazanderani_cer": 0.29389538316212066, "eval_mazanderani_wer": 0.7514009169638308, "eval_runtime": 456.2009, "eval_samples_per_second": 7.142, "eval_southern_kurdish_cer": 0.17975851779890362, "eval_southern_kurdish_wer": 0.5950888192267503, "eval_steps_per_second": 0.057, "eval_talysh_cer": 0.5, "eval_talysh_wer": 0.9166666666666666, "eval_zazaki_cer": 0.4153452685421995, "eval_zazaki_wer": 0.8382352941176471, "step": 328 }, { "epoch": 2.073170731707317, "grad_norm": 3.5604612827301025, "learning_rate": 5.773381294964029e-06, "loss": 0.4201, "step": 340 }, { "epoch": 2.1951219512195124, "grad_norm": 2.8034543991088867, "learning_rate": 5.413669064748202e-06, "loss": 0.407, "step": 360 }, { "epoch": 2.317073170731707, "grad_norm": 3.1773579120635986, "learning_rate": 5.053956834532374e-06, "loss": 0.3865, "step": 380 }, { "epoch": 2.4390243902439024, "grad_norm": 2.6149208545684814, "learning_rate": 4.6942446043165475e-06, "loss": 0.4109, "step": 400 }, { "epoch": 2.5609756097560976, "grad_norm": 2.5704352855682373, "learning_rate": 4.33453237410072e-06, "loss": 0.3879, "step": 420 }, { "epoch": 2.682926829268293, "grad_norm": 2.887918472290039, "learning_rate": 3.974820143884892e-06, "loss": 0.3831, "step": 440 }, { "epoch": 2.8048780487804876, "grad_norm": 2.9402122497558594, "learning_rate": 3.6151079136690647e-06, "loss": 0.3946, "step": 460 }, { "epoch": 2.926829268292683, "grad_norm": 2.4520623683929443, "learning_rate": 3.2553956834532375e-06, "loss": 0.387, "step": 480 }, { "epoch": 3.0, "eval_avg_cer": 0.2707400949444132, "eval_avg_wer": 0.7061799250572226, "eval_gilaki_cer": 0.3735122267907379, "eval_gilaki_wer": 0.9523443504996156, "eval_hawrami_cer": 0.08913936383608749, "eval_hawrami_wer": 0.42297369833601717, "eval_laki_kurdish_cer": 0.1678054335448186, "eval_laki_kurdish_wer": 0.6094069529652352, "eval_loss": 0.5509017109870911, "eval_mazanderani_cer": 0.24359428503955602, "eval_mazanderani_wer": 0.6821192052980133, "eval_runtime": 459.5728, "eval_samples_per_second": 7.089, "eval_southern_kurdish_cer": 0.1871140101311498, "eval_southern_kurdish_wer": 0.5705329153605015, "eval_steps_per_second": 0.057, "eval_talysh_cer": 0.5, "eval_talysh_wer": 0.9166666666666666, "eval_zazaki_cer": 0.3340153452685422, "eval_zazaki_wer": 0.7892156862745098, "step": 492 }, { "epoch": 3.048780487804878, "grad_norm": 2.502338409423828, "learning_rate": 2.89568345323741e-06, "loss": 0.3788, "step": 500 }, { "epoch": 3.1707317073170733, "grad_norm": 2.474639654159546, "learning_rate": 2.5359712230215827e-06, "loss": 0.3829, "step": 520 }, { "epoch": 3.292682926829268, "grad_norm": 2.5425171852111816, "learning_rate": 2.1762589928057555e-06, "loss": 0.3557, "step": 540 }, { "epoch": 3.4146341463414633, "grad_norm": 2.498109817504883, "learning_rate": 1.8165467625899283e-06, "loss": 0.3607, "step": 560 }, { "epoch": 3.5365853658536586, "grad_norm": 2.560028076171875, "learning_rate": 1.4568345323741009e-06, "loss": 0.3544, "step": 580 }, { "epoch": 3.658536585365854, "grad_norm": 2.3217689990997314, "learning_rate": 1.0971223021582735e-06, "loss": 0.3614, "step": 600 }, { "epoch": 3.7804878048780486, "grad_norm": 2.47906231880188, "learning_rate": 7.37410071942446e-07, "loss": 0.3608, "step": 620 }, { "epoch": 3.902439024390244, "grad_norm": 2.4336979389190674, "learning_rate": 3.7769784172661875e-07, "loss": 0.3575, "step": 640 }, { "epoch": 4.0, "eval_avg_cer": 0.2678389158869433, "eval_avg_wer": 0.7003291969579674, "eval_gilaki_cer": 0.38182211642501623, "eval_gilaki_wer": 0.9606072252113759, "eval_hawrami_cer": 0.08648390486197345, "eval_hawrami_wer": 0.4144927536231884, "eval_laki_kurdish_cer": 0.16528387831462502, "eval_laki_kurdish_wer": 0.603680981595092, "eval_loss": 0.5420735478401184, "eval_mazanderani_cer": 0.2510331798323297, "eval_mazanderani_wer": 0.6836474783494652, "eval_runtime": 457.2923, "eval_samples_per_second": 7.125, "eval_southern_kurdish_cer": 0.17209076399972242, "eval_southern_kurdish_wer": 0.5609543712991989, "eval_steps_per_second": 0.057, "eval_talysh_cer": 0.5, "eval_talysh_wer": 0.9166666666666666, "eval_zazaki_cer": 0.31815856777493606, "eval_zazaki_wer": 0.7622549019607843, "step": 656 }, { "epoch": 4.0, "step": 656, "total_flos": 5.41529079939072e+18, "train_loss": 0.6912094768954486, "train_runtime": 12274.8821, "train_samples_per_second": 6.802, "train_steps_per_second": 0.053 } ], "logging_steps": 20, "max_steps": 656, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.41529079939072e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }