| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 656, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12195121951219512, | |
| "grad_norm": 42.956241607666016, | |
| "learning_rate": 1.5e-06, | |
| "loss": 3.946, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 18.780467987060547, | |
| "learning_rate": 3.5e-06, | |
| "loss": 2.801, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 10.132372856140137, | |
| "learning_rate": 5.500000000000001e-06, | |
| "loss": 1.829, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 6.34807825088501, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.1664, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6097560975609756, | |
| "grad_norm": 3.893784523010254, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.8395, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 3.6245577335357666, | |
| "learning_rate": 9.73021582733813e-06, | |
| "loss": 0.6985, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8536585365853658, | |
| "grad_norm": 3.2853732109069824, | |
| "learning_rate": 9.370503597122303e-06, | |
| "loss": 0.6198, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 3.1876819133758545, | |
| "learning_rate": 9.010791366906476e-06, | |
| "loss": 0.5933, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_avg_cer": 0.35838623267570763, | |
| "eval_avg_wer": 0.8217081943985028, | |
| "eval_gilaki_cer": 0.38532785111447737, | |
| "eval_gilaki_wer": 0.9969254419677172, | |
| "eval_hawrami_cer": 0.11622886617633012, | |
| "eval_hawrami_wer": 0.5257112184648417, | |
| "eval_laki_kurdish_cer": 0.24613632666341304, | |
| "eval_laki_kurdish_wer": 0.7856850715746422, | |
| "eval_loss": 0.7336705923080444, | |
| "eval_mazanderani_cer": 0.3138505136379738, | |
| "eval_mazanderani_wer": 0.8884360672440142, | |
| "eval_runtime": 457.0137, | |
| "eval_samples_per_second": 7.129, | |
| "eval_southern_kurdish_cer": 0.21365623482062313, | |
| "eval_southern_kurdish_wer": 0.6900034831069314, | |
| "eval_steps_per_second": 0.057, | |
| "eval_talysh_cer": 0.5, | |
| "eval_talysh_wer": 0.9166666666666666, | |
| "eval_zazaki_cer": 0.7335038363171356, | |
| "eval_zazaki_wer": 0.9485294117647058, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.0975609756097562, | |
| "grad_norm": 3.3254923820495605, | |
| "learning_rate": 8.651079136690648e-06, | |
| "loss": 0.5337, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 3.416600227355957, | |
| "learning_rate": 8.29136690647482e-06, | |
| "loss": 0.5054, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.3414634146341464, | |
| "grad_norm": 3.544034481048584, | |
| "learning_rate": 7.931654676258992e-06, | |
| "loss": 0.4915, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 2.7927117347717285, | |
| "learning_rate": 7.571942446043166e-06, | |
| "loss": 0.47, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.5853658536585367, | |
| "grad_norm": 3.406677007675171, | |
| "learning_rate": 7.212230215827338e-06, | |
| "loss": 0.4681, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.7073170731707317, | |
| "grad_norm": 2.7277846336364746, | |
| "learning_rate": 6.852517985611511e-06, | |
| "loss": 0.4653, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.8292682926829267, | |
| "grad_norm": 2.8852195739746094, | |
| "learning_rate": 6.4928057553956835e-06, | |
| "loss": 0.4281, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.951219512195122, | |
| "grad_norm": 3.2276437282562256, | |
| "learning_rate": 6.133093525179856e-06, | |
| "loss": 0.4436, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_avg_cer": 0.2926722864574391, | |
| "eval_avg_wer": 0.7377513408917141, | |
| "eval_gilaki_cer": 0.38182211642501623, | |
| "eval_gilaki_wer": 0.9723289777094543, | |
| "eval_hawrami_cer": 0.09332314452192186, | |
| "eval_hawrami_wer": 0.44186795491143316, | |
| "eval_laki_kurdish_cer": 0.1845615747519115, | |
| "eval_laki_kurdish_wer": 0.6486707566462168, | |
| "eval_loss": 0.5930325984954834, | |
| "eval_mazanderani_cer": 0.29389538316212066, | |
| "eval_mazanderani_wer": 0.7514009169638308, | |
| "eval_runtime": 456.2009, | |
| "eval_samples_per_second": 7.142, | |
| "eval_southern_kurdish_cer": 0.17975851779890362, | |
| "eval_southern_kurdish_wer": 0.5950888192267503, | |
| "eval_steps_per_second": 0.057, | |
| "eval_talysh_cer": 0.5, | |
| "eval_talysh_wer": 0.9166666666666666, | |
| "eval_zazaki_cer": 0.4153452685421995, | |
| "eval_zazaki_wer": 0.8382352941176471, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.073170731707317, | |
| "grad_norm": 3.5604612827301025, | |
| "learning_rate": 5.773381294964029e-06, | |
| "loss": 0.4201, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.1951219512195124, | |
| "grad_norm": 2.8034543991088867, | |
| "learning_rate": 5.413669064748202e-06, | |
| "loss": 0.407, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.317073170731707, | |
| "grad_norm": 3.1773579120635986, | |
| "learning_rate": 5.053956834532374e-06, | |
| "loss": 0.3865, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 2.6149208545684814, | |
| "learning_rate": 4.6942446043165475e-06, | |
| "loss": 0.4109, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.5609756097560976, | |
| "grad_norm": 2.5704352855682373, | |
| "learning_rate": 4.33453237410072e-06, | |
| "loss": 0.3879, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.682926829268293, | |
| "grad_norm": 2.887918472290039, | |
| "learning_rate": 3.974820143884892e-06, | |
| "loss": 0.3831, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.8048780487804876, | |
| "grad_norm": 2.9402122497558594, | |
| "learning_rate": 3.6151079136690647e-06, | |
| "loss": 0.3946, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.926829268292683, | |
| "grad_norm": 2.4520623683929443, | |
| "learning_rate": 3.2553956834532375e-06, | |
| "loss": 0.387, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_avg_cer": 0.2707400949444132, | |
| "eval_avg_wer": 0.7061799250572226, | |
| "eval_gilaki_cer": 0.3735122267907379, | |
| "eval_gilaki_wer": 0.9523443504996156, | |
| "eval_hawrami_cer": 0.08913936383608749, | |
| "eval_hawrami_wer": 0.42297369833601717, | |
| "eval_laki_kurdish_cer": 0.1678054335448186, | |
| "eval_laki_kurdish_wer": 0.6094069529652352, | |
| "eval_loss": 0.5509017109870911, | |
| "eval_mazanderani_cer": 0.24359428503955602, | |
| "eval_mazanderani_wer": 0.6821192052980133, | |
| "eval_runtime": 459.5728, | |
| "eval_samples_per_second": 7.089, | |
| "eval_southern_kurdish_cer": 0.1871140101311498, | |
| "eval_southern_kurdish_wer": 0.5705329153605015, | |
| "eval_steps_per_second": 0.057, | |
| "eval_talysh_cer": 0.5, | |
| "eval_talysh_wer": 0.9166666666666666, | |
| "eval_zazaki_cer": 0.3340153452685422, | |
| "eval_zazaki_wer": 0.7892156862745098, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.048780487804878, | |
| "grad_norm": 2.502338409423828, | |
| "learning_rate": 2.89568345323741e-06, | |
| "loss": 0.3788, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.1707317073170733, | |
| "grad_norm": 2.474639654159546, | |
| "learning_rate": 2.5359712230215827e-06, | |
| "loss": 0.3829, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.292682926829268, | |
| "grad_norm": 2.5425171852111816, | |
| "learning_rate": 2.1762589928057555e-06, | |
| "loss": 0.3557, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.4146341463414633, | |
| "grad_norm": 2.498109817504883, | |
| "learning_rate": 1.8165467625899283e-06, | |
| "loss": 0.3607, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.5365853658536586, | |
| "grad_norm": 2.560028076171875, | |
| "learning_rate": 1.4568345323741009e-06, | |
| "loss": 0.3544, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.658536585365854, | |
| "grad_norm": 2.3217689990997314, | |
| "learning_rate": 1.0971223021582735e-06, | |
| "loss": 0.3614, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.7804878048780486, | |
| "grad_norm": 2.47906231880188, | |
| "learning_rate": 7.37410071942446e-07, | |
| "loss": 0.3608, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.902439024390244, | |
| "grad_norm": 2.4336979389190674, | |
| "learning_rate": 3.7769784172661875e-07, | |
| "loss": 0.3575, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_avg_cer": 0.2678389158869433, | |
| "eval_avg_wer": 0.7003291969579674, | |
| "eval_gilaki_cer": 0.38182211642501623, | |
| "eval_gilaki_wer": 0.9606072252113759, | |
| "eval_hawrami_cer": 0.08648390486197345, | |
| "eval_hawrami_wer": 0.4144927536231884, | |
| "eval_laki_kurdish_cer": 0.16528387831462502, | |
| "eval_laki_kurdish_wer": 0.603680981595092, | |
| "eval_loss": 0.5420735478401184, | |
| "eval_mazanderani_cer": 0.2510331798323297, | |
| "eval_mazanderani_wer": 0.6836474783494652, | |
| "eval_runtime": 457.2923, | |
| "eval_samples_per_second": 7.125, | |
| "eval_southern_kurdish_cer": 0.17209076399972242, | |
| "eval_southern_kurdish_wer": 0.5609543712991989, | |
| "eval_steps_per_second": 0.057, | |
| "eval_talysh_cer": 0.5, | |
| "eval_talysh_wer": 0.9166666666666666, | |
| "eval_zazaki_cer": 0.31815856777493606, | |
| "eval_zazaki_wer": 0.7622549019607843, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 656, | |
| "total_flos": 5.41529079939072e+18, | |
| "train_loss": 0.6912094768954486, | |
| "train_runtime": 12274.8821, | |
| "train_samples_per_second": 6.802, | |
| "train_steps_per_second": 0.053 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 656, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.41529079939072e+18, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |