| { | |
| "best_metric": 0.4528582034149963, | |
| "best_model_checkpoint": "d:\\DataTicon\\Whisper-Khmer-Small\\whisper-khmer\\outputs\\whisper-base-khmer\\checkpoint-4400", | |
| "epoch": 2.962962962962963, | |
| "eval_steps": 400, | |
| "global_step": 4400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006734006734006734, | |
| "grad_norm": 22.712024688720703, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 2.8858, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.013468013468013467, | |
| "grad_norm": 17.574142456054688, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 2.2953, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.020202020202020204, | |
| "grad_norm": 16.47330093383789, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 1.8692, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.026936026936026935, | |
| "grad_norm": 10.106539726257324, | |
| "learning_rate": 3.7e-05, | |
| "loss": 1.5765, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03367003367003367, | |
| "grad_norm": 15.11670970916748, | |
| "learning_rate": 4.7e-05, | |
| "loss": 1.4941, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04040404040404041, | |
| "grad_norm": 15.619561195373535, | |
| "learning_rate": 4.992054483541431e-05, | |
| "loss": 1.4231, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04713804713804714, | |
| "grad_norm": 11.616868019104004, | |
| "learning_rate": 4.9807037457434736e-05, | |
| "loss": 1.384, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05387205387205387, | |
| "grad_norm": 20.449440002441406, | |
| "learning_rate": 4.969353007945517e-05, | |
| "loss": 1.3234, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06060606060606061, | |
| "grad_norm": 12.725702285766602, | |
| "learning_rate": 4.95800227014756e-05, | |
| "loss": 1.328, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06734006734006734, | |
| "grad_norm": 15.14296817779541, | |
| "learning_rate": 4.946651532349603e-05, | |
| "loss": 1.3084, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 11.94970417022705, | |
| "learning_rate": 4.935300794551646e-05, | |
| "loss": 1.2537, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.08080808080808081, | |
| "grad_norm": 8.479811668395996, | |
| "learning_rate": 4.92395005675369e-05, | |
| "loss": 1.2109, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08754208754208755, | |
| "grad_norm": 14.986007690429688, | |
| "learning_rate": 4.9125993189557325e-05, | |
| "loss": 1.1808, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.09427609427609428, | |
| "grad_norm": 12.524420738220215, | |
| "learning_rate": 4.901248581157776e-05, | |
| "loss": 1.1299, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.10101010101010101, | |
| "grad_norm": 12.007112503051758, | |
| "learning_rate": 4.8898978433598185e-05, | |
| "loss": 1.151, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10774410774410774, | |
| "grad_norm": 11.51995849609375, | |
| "learning_rate": 4.878547105561862e-05, | |
| "loss": 1.0585, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.11447811447811448, | |
| "grad_norm": 9.454591751098633, | |
| "learning_rate": 4.8671963677639046e-05, | |
| "loss": 0.9909, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 9.037362098693848, | |
| "learning_rate": 4.855845629965948e-05, | |
| "loss": 0.9491, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12794612794612795, | |
| "grad_norm": 11.153435707092285, | |
| "learning_rate": 4.844494892167991e-05, | |
| "loss": 0.9348, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.13468013468013468, | |
| "grad_norm": 10.488895416259766, | |
| "learning_rate": 4.833144154370035e-05, | |
| "loss": 0.8737, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1414141414141414, | |
| "grad_norm": 7.803547382354736, | |
| "learning_rate": 4.8217934165720774e-05, | |
| "loss": 0.7626, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 8.341965675354004, | |
| "learning_rate": 4.810442678774121e-05, | |
| "loss": 0.7547, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.15488215488215487, | |
| "grad_norm": 8.866105079650879, | |
| "learning_rate": 4.7990919409761635e-05, | |
| "loss": 0.6779, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.16161616161616163, | |
| "grad_norm": 11.127110481262207, | |
| "learning_rate": 4.787741203178207e-05, | |
| "loss": 0.7238, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.16835016835016836, | |
| "grad_norm": 10.66511344909668, | |
| "learning_rate": 4.77639046538025e-05, | |
| "loss": 0.6632, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1750841750841751, | |
| "grad_norm": 9.223587989807129, | |
| "learning_rate": 4.7650397275822936e-05, | |
| "loss": 0.6478, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 8.034420013427734, | |
| "learning_rate": 4.753688989784336e-05, | |
| "loss": 0.6227, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.18855218855218855, | |
| "grad_norm": 8.181520462036133, | |
| "learning_rate": 4.7423382519863796e-05, | |
| "loss": 0.6241, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.19528619528619529, | |
| "grad_norm": 8.540548324584961, | |
| "learning_rate": 4.730987514188422e-05, | |
| "loss": 0.5726, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.20202020202020202, | |
| "grad_norm": 10.086724281311035, | |
| "learning_rate": 4.719636776390466e-05, | |
| "loss": 0.5918, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.20875420875420875, | |
| "grad_norm": 6.169092178344727, | |
| "learning_rate": 4.708286038592509e-05, | |
| "loss": 0.5442, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.21548821548821548, | |
| "grad_norm": 8.274078369140625, | |
| "learning_rate": 4.6969353007945524e-05, | |
| "loss": 0.5636, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 7.014498710632324, | |
| "learning_rate": 4.685584562996595e-05, | |
| "loss": 0.5154, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.22895622895622897, | |
| "grad_norm": 7.540900707244873, | |
| "learning_rate": 4.6742338251986385e-05, | |
| "loss": 0.5323, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2356902356902357, | |
| "grad_norm": 9.698654174804688, | |
| "learning_rate": 4.662883087400681e-05, | |
| "loss": 0.519, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 5.288636207580566, | |
| "learning_rate": 4.6515323496027245e-05, | |
| "loss": 0.4927, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.24915824915824916, | |
| "grad_norm": 6.129817485809326, | |
| "learning_rate": 4.640181611804767e-05, | |
| "loss": 0.5281, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2558922558922559, | |
| "grad_norm": 8.628268241882324, | |
| "learning_rate": 4.6288308740068106e-05, | |
| "loss": 0.4962, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.26262626262626265, | |
| "grad_norm": 7.04541015625, | |
| "learning_rate": 4.617480136208854e-05, | |
| "loss": 0.4874, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.26936026936026936, | |
| "grad_norm": 6.490813732147217, | |
| "learning_rate": 4.606129398410897e-05, | |
| "loss": 0.4962, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.26936026936026936, | |
| "eval_loss": 0.5467123985290527, | |
| "eval_runtime": 806.5112, | |
| "eval_samples_per_second": 1.758, | |
| "eval_steps_per_second": 0.11, | |
| "eval_wer": 0.5883444691907943, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2760942760942761, | |
| "grad_norm": 6.944798946380615, | |
| "learning_rate": 4.59477866061294e-05, | |
| "loss": 0.473, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2828282828282828, | |
| "grad_norm": 6.29971981048584, | |
| "learning_rate": 4.5834279228149834e-05, | |
| "loss": 0.4672, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2895622895622896, | |
| "grad_norm": 6.314589500427246, | |
| "learning_rate": 4.572077185017026e-05, | |
| "loss": 0.4657, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 5.14242696762085, | |
| "learning_rate": 4.5607264472190694e-05, | |
| "loss": 0.4613, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 6.764094352722168, | |
| "learning_rate": 4.549375709421113e-05, | |
| "loss": 0.4798, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.30976430976430974, | |
| "grad_norm": 7.024701118469238, | |
| "learning_rate": 4.538024971623156e-05, | |
| "loss": 0.4509, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3164983164983165, | |
| "grad_norm": 4.914060592651367, | |
| "learning_rate": 4.526674233825199e-05, | |
| "loss": 0.4442, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.32323232323232326, | |
| "grad_norm": 5.153116703033447, | |
| "learning_rate": 4.515323496027242e-05, | |
| "loss": 0.4368, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.32996632996632996, | |
| "grad_norm": 6.269533157348633, | |
| "learning_rate": 4.503972758229285e-05, | |
| "loss": 0.4467, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3367003367003367, | |
| "grad_norm": 6.478705406188965, | |
| "learning_rate": 4.492622020431328e-05, | |
| "loss": 0.4453, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3434343434343434, | |
| "grad_norm": 5.625921249389648, | |
| "learning_rate": 4.481271282633372e-05, | |
| "loss": 0.3846, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3501683501683502, | |
| "grad_norm": 5.453153133392334, | |
| "learning_rate": 4.469920544835415e-05, | |
| "loss": 0.3811, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3569023569023569, | |
| "grad_norm": 6.992231369018555, | |
| "learning_rate": 4.458569807037458e-05, | |
| "loss": 0.4318, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 5.616722583770752, | |
| "learning_rate": 4.447219069239501e-05, | |
| "loss": 0.3774, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 4.499491214752197, | |
| "learning_rate": 4.435868331441544e-05, | |
| "loss": 0.4302, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3771043771043771, | |
| "grad_norm": 5.019254207611084, | |
| "learning_rate": 4.424517593643587e-05, | |
| "loss": 0.405, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3838383838383838, | |
| "grad_norm": 4.1655144691467285, | |
| "learning_rate": 4.41316685584563e-05, | |
| "loss": 0.3785, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.39057239057239057, | |
| "grad_norm": 4.204577922821045, | |
| "learning_rate": 4.401816118047674e-05, | |
| "loss": 0.3917, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.39730639730639733, | |
| "grad_norm": 5.208505153656006, | |
| "learning_rate": 4.3904653802497166e-05, | |
| "loss": 0.4067, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 5.3686418533325195, | |
| "learning_rate": 4.37911464245176e-05, | |
| "loss": 0.3595, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4107744107744108, | |
| "grad_norm": 4.19749641418457, | |
| "learning_rate": 4.3677639046538026e-05, | |
| "loss": 0.4, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4175084175084175, | |
| "grad_norm": 7.22583532333374, | |
| "learning_rate": 4.356413166855846e-05, | |
| "loss": 0.3517, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.42424242424242425, | |
| "grad_norm": 5.825573921203613, | |
| "learning_rate": 4.345062429057889e-05, | |
| "loss": 0.4059, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.43097643097643096, | |
| "grad_norm": 5.724638938903809, | |
| "learning_rate": 4.333711691259932e-05, | |
| "loss": 0.3847, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4377104377104377, | |
| "grad_norm": 7.2732954025268555, | |
| "learning_rate": 4.3223609534619754e-05, | |
| "loss": 0.3559, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 5.488597393035889, | |
| "learning_rate": 4.311010215664019e-05, | |
| "loss": 0.3833, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4511784511784512, | |
| "grad_norm": 6.655267715454102, | |
| "learning_rate": 4.2996594778660615e-05, | |
| "loss": 0.3468, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.45791245791245794, | |
| "grad_norm": 6.509310245513916, | |
| "learning_rate": 4.288308740068105e-05, | |
| "loss": 0.3865, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.46464646464646464, | |
| "grad_norm": 4.374180793762207, | |
| "learning_rate": 4.2769580022701476e-05, | |
| "loss": 0.3617, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4713804713804714, | |
| "grad_norm": 3.9861130714416504, | |
| "learning_rate": 4.265607264472191e-05, | |
| "loss": 0.3495, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4781144781144781, | |
| "grad_norm": 4.877681732177734, | |
| "learning_rate": 4.254256526674234e-05, | |
| "loss": 0.3662, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 4.442035675048828, | |
| "learning_rate": 4.242905788876278e-05, | |
| "loss": 0.3842, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.49158249158249157, | |
| "grad_norm": 4.463146209716797, | |
| "learning_rate": 4.2315550510783204e-05, | |
| "loss": 0.3786, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4983164983164983, | |
| "grad_norm": 5.221556186676025, | |
| "learning_rate": 4.220204313280364e-05, | |
| "loss": 0.3684, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5050505050505051, | |
| "grad_norm": 5.097838878631592, | |
| "learning_rate": 4.2088535754824064e-05, | |
| "loss": 0.3175, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5117845117845118, | |
| "grad_norm": 4.413094997406006, | |
| "learning_rate": 4.19750283768445e-05, | |
| "loss": 0.3759, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5185185185185185, | |
| "grad_norm": 4.710460662841797, | |
| "learning_rate": 4.186152099886493e-05, | |
| "loss": 0.3746, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5252525252525253, | |
| "grad_norm": 4.644078254699707, | |
| "learning_rate": 4.1748013620885365e-05, | |
| "loss": 0.3613, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.531986531986532, | |
| "grad_norm": 3.328462839126587, | |
| "learning_rate": 4.163450624290579e-05, | |
| "loss": 0.3326, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5387205387205387, | |
| "grad_norm": 4.52667236328125, | |
| "learning_rate": 4.1520998864926226e-05, | |
| "loss": 0.3349, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5387205387205387, | |
| "eval_loss": 0.49282562732696533, | |
| "eval_runtime": 1123.4733, | |
| "eval_samples_per_second": 1.262, | |
| "eval_steps_per_second": 0.079, | |
| "eval_wer": 0.5612186625549654, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 4.9449262619018555, | |
| "learning_rate": 4.140749148694665e-05, | |
| "loss": 0.3366, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5521885521885522, | |
| "grad_norm": 5.562499523162842, | |
| "learning_rate": 4.1293984108967086e-05, | |
| "loss": 0.3781, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5589225589225589, | |
| "grad_norm": 4.4809250831604, | |
| "learning_rate": 4.118047673098751e-05, | |
| "loss": 0.3607, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5656565656565656, | |
| "grad_norm": 4.371147155761719, | |
| "learning_rate": 4.106696935300795e-05, | |
| "loss": 0.3082, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5723905723905723, | |
| "grad_norm": 5.5584893226623535, | |
| "learning_rate": 4.095346197502838e-05, | |
| "loss": 0.3384, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5791245791245792, | |
| "grad_norm": 4.966277599334717, | |
| "learning_rate": 4.0839954597048814e-05, | |
| "loss": 0.3328, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5858585858585859, | |
| "grad_norm": 3.8009321689605713, | |
| "learning_rate": 4.072644721906924e-05, | |
| "loss": 0.3201, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 5.3277106285095215, | |
| "learning_rate": 4.0612939841089675e-05, | |
| "loss": 0.3345, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5993265993265994, | |
| "grad_norm": 4.464631080627441, | |
| "learning_rate": 4.04994324631101e-05, | |
| "loss": 0.3081, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 4.56332540512085, | |
| "learning_rate": 4.0385925085130536e-05, | |
| "loss": 0.3643, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6127946127946128, | |
| "grad_norm": 4.806687831878662, | |
| "learning_rate": 4.027241770715097e-05, | |
| "loss": 0.3662, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6195286195286195, | |
| "grad_norm": 5.562252998352051, | |
| "learning_rate": 4.01589103291714e-05, | |
| "loss": 0.3436, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6262626262626263, | |
| "grad_norm": 4.167670249938965, | |
| "learning_rate": 4.004540295119183e-05, | |
| "loss": 0.3238, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.632996632996633, | |
| "grad_norm": 4.628272533416748, | |
| "learning_rate": 3.993189557321226e-05, | |
| "loss": 0.3351, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6397306397306397, | |
| "grad_norm": 6.1379828453063965, | |
| "learning_rate": 3.981838819523269e-05, | |
| "loss": 0.3199, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6464646464646465, | |
| "grad_norm": 4.196822643280029, | |
| "learning_rate": 3.970488081725312e-05, | |
| "loss": 0.3056, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6531986531986532, | |
| "grad_norm": 5.613431930541992, | |
| "learning_rate": 3.959137343927356e-05, | |
| "loss": 0.3292, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6599326599326599, | |
| "grad_norm": 4.543855667114258, | |
| "learning_rate": 3.9477866061293985e-05, | |
| "loss": 0.3001, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 3.637694835662842, | |
| "learning_rate": 3.936435868331442e-05, | |
| "loss": 0.2602, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6734006734006734, | |
| "grad_norm": 6.640063285827637, | |
| "learning_rate": 3.9250851305334845e-05, | |
| "loss": 0.3195, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6801346801346801, | |
| "grad_norm": 4.616398334503174, | |
| "learning_rate": 3.913734392735528e-05, | |
| "loss": 0.3157, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6868686868686869, | |
| "grad_norm": 5.17544412612915, | |
| "learning_rate": 3.9023836549375706e-05, | |
| "loss": 0.3079, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.6936026936026936, | |
| "grad_norm": 5.558164596557617, | |
| "learning_rate": 3.891032917139614e-05, | |
| "loss": 0.3093, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7003367003367004, | |
| "grad_norm": 4.862564563751221, | |
| "learning_rate": 3.879682179341657e-05, | |
| "loss": 0.3255, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.7070707070707071, | |
| "grad_norm": 3.790825605392456, | |
| "learning_rate": 3.868331441543701e-05, | |
| "loss": 0.3331, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7138047138047138, | |
| "grad_norm": 4.081621170043945, | |
| "learning_rate": 3.8569807037457434e-05, | |
| "loss": 0.275, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.7205387205387206, | |
| "grad_norm": 3.9744527339935303, | |
| "learning_rate": 3.845629965947787e-05, | |
| "loss": 0.2978, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 4.056180953979492, | |
| "learning_rate": 3.8342792281498294e-05, | |
| "loss": 0.2912, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.734006734006734, | |
| "grad_norm": 5.50215482711792, | |
| "learning_rate": 3.822928490351873e-05, | |
| "loss": 0.2738, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 6.039124488830566, | |
| "learning_rate": 3.811577752553916e-05, | |
| "loss": 0.2912, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7474747474747475, | |
| "grad_norm": 4.475265979766846, | |
| "learning_rate": 3.8002270147559595e-05, | |
| "loss": 0.3059, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7542087542087542, | |
| "grad_norm": 3.8151988983154297, | |
| "learning_rate": 3.788876276958002e-05, | |
| "loss": 0.2865, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7609427609427609, | |
| "grad_norm": 4.704629898071289, | |
| "learning_rate": 3.7775255391600456e-05, | |
| "loss": 0.2938, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7676767676767676, | |
| "grad_norm": 4.103381633758545, | |
| "learning_rate": 3.766174801362088e-05, | |
| "loss": 0.277, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7744107744107744, | |
| "grad_norm": 5.573786735534668, | |
| "learning_rate": 3.754824063564132e-05, | |
| "loss": 0.3232, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7811447811447811, | |
| "grad_norm": 3.373387575149536, | |
| "learning_rate": 3.743473325766175e-05, | |
| "loss": 0.3285, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7878787878787878, | |
| "grad_norm": 3.7531933784484863, | |
| "learning_rate": 3.7321225879682184e-05, | |
| "loss": 0.2596, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7946127946127947, | |
| "grad_norm": 5.006664752960205, | |
| "learning_rate": 3.720771850170261e-05, | |
| "loss": 0.2919, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.8013468013468014, | |
| "grad_norm": 6.7509307861328125, | |
| "learning_rate": 3.7094211123723045e-05, | |
| "loss": 0.2947, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 3.8846304416656494, | |
| "learning_rate": 3.698070374574347e-05, | |
| "loss": 0.2906, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "eval_loss": 0.4824906289577484, | |
| "eval_runtime": 688.719, | |
| "eval_samples_per_second": 2.059, | |
| "eval_steps_per_second": 0.129, | |
| "eval_wer": 0.5195876877391354, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8148148148148148, | |
| "grad_norm": 4.751924514770508, | |
| "learning_rate": 3.6867196367763905e-05, | |
| "loss": 0.2785, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.8215488215488216, | |
| "grad_norm": 3.7664754390716553, | |
| "learning_rate": 3.675368898978433e-05, | |
| "loss": 0.3289, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8282828282828283, | |
| "grad_norm": 5.232487201690674, | |
| "learning_rate": 3.6640181611804766e-05, | |
| "loss": 0.3174, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.835016835016835, | |
| "grad_norm": 2.880322217941284, | |
| "learning_rate": 3.65266742338252e-05, | |
| "loss": 0.2686, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8417508417508418, | |
| "grad_norm": 4.3191609382629395, | |
| "learning_rate": 3.641316685584563e-05, | |
| "loss": 0.3059, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8484848484848485, | |
| "grad_norm": 3.35304594039917, | |
| "learning_rate": 3.629965947786606e-05, | |
| "loss": 0.2693, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8552188552188552, | |
| "grad_norm": 3.989720582962036, | |
| "learning_rate": 3.6186152099886494e-05, | |
| "loss": 0.2748, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8619528619528619, | |
| "grad_norm": 2.795743942260742, | |
| "learning_rate": 3.607264472190692e-05, | |
| "loss": 0.2608, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8686868686868687, | |
| "grad_norm": 5.1076226234436035, | |
| "learning_rate": 3.5959137343927354e-05, | |
| "loss": 0.2979, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8754208754208754, | |
| "grad_norm": 3.131528854370117, | |
| "learning_rate": 3.584562996594779e-05, | |
| "loss": 0.2865, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8821548821548821, | |
| "grad_norm": 6.658942699432373, | |
| "learning_rate": 3.573212258796822e-05, | |
| "loss": 0.2727, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 4.044928073883057, | |
| "learning_rate": 3.561861520998865e-05, | |
| "loss": 0.2913, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8956228956228957, | |
| "grad_norm": 4.87237024307251, | |
| "learning_rate": 3.550510783200908e-05, | |
| "loss": 0.2485, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.9023569023569024, | |
| "grad_norm": 3.8377342224121094, | |
| "learning_rate": 3.539160045402951e-05, | |
| "loss": 0.2707, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 3.848212957382202, | |
| "learning_rate": 3.527809307604994e-05, | |
| "loss": 0.2375, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9158249158249159, | |
| "grad_norm": 3.686363697052002, | |
| "learning_rate": 3.5164585698070377e-05, | |
| "loss": 0.2597, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.9225589225589226, | |
| "grad_norm": 4.444821834564209, | |
| "learning_rate": 3.505107832009081e-05, | |
| "loss": 0.2599, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.9292929292929293, | |
| "grad_norm": 4.101839542388916, | |
| "learning_rate": 3.493757094211124e-05, | |
| "loss": 0.297, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.936026936026936, | |
| "grad_norm": 4.912603855133057, | |
| "learning_rate": 3.482406356413167e-05, | |
| "loss": 0.2557, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.9427609427609428, | |
| "grad_norm": 4.1229248046875, | |
| "learning_rate": 3.47105561861521e-05, | |
| "loss": 0.271, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9494949494949495, | |
| "grad_norm": 3.668956756591797, | |
| "learning_rate": 3.459704880817253e-05, | |
| "loss": 0.286, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.9562289562289562, | |
| "grad_norm": 5.052644729614258, | |
| "learning_rate": 3.448354143019296e-05, | |
| "loss": 0.2841, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9629629629629629, | |
| "grad_norm": 3.9958231449127197, | |
| "learning_rate": 3.43700340522134e-05, | |
| "loss": 0.2473, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 4.527822971343994, | |
| "learning_rate": 3.4256526674233826e-05, | |
| "loss": 0.2867, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9764309764309764, | |
| "grad_norm": 3.6779627799987793, | |
| "learning_rate": 3.414301929625426e-05, | |
| "loss": 0.2547, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9831649831649831, | |
| "grad_norm": 4.0302581787109375, | |
| "learning_rate": 3.4029511918274686e-05, | |
| "loss": 0.2697, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.98989898989899, | |
| "grad_norm": 3.447392463684082, | |
| "learning_rate": 3.391600454029512e-05, | |
| "loss": 0.2809, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.9966329966329966, | |
| "grad_norm": 3.6442668437957764, | |
| "learning_rate": 3.380249716231555e-05, | |
| "loss": 0.2705, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.0033670033670035, | |
| "grad_norm": 3.358112335205078, | |
| "learning_rate": 3.368898978433598e-05, | |
| "loss": 0.2206, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.0101010101010102, | |
| "grad_norm": 2.5414748191833496, | |
| "learning_rate": 3.3575482406356414e-05, | |
| "loss": 0.22, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0168350168350169, | |
| "grad_norm": 3.4823262691497803, | |
| "learning_rate": 3.346197502837685e-05, | |
| "loss": 0.1841, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.0235690235690236, | |
| "grad_norm": 3.468315362930298, | |
| "learning_rate": 3.3348467650397275e-05, | |
| "loss": 0.2136, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.0303030303030303, | |
| "grad_norm": 3.683201551437378, | |
| "learning_rate": 3.323496027241771e-05, | |
| "loss": 0.1949, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.037037037037037, | |
| "grad_norm": 2.5732924938201904, | |
| "learning_rate": 3.3121452894438135e-05, | |
| "loss": 0.2017, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.0437710437710437, | |
| "grad_norm": 4.662359714508057, | |
| "learning_rate": 3.300794551645857e-05, | |
| "loss": 0.224, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0505050505050506, | |
| "grad_norm": 3.25209379196167, | |
| "learning_rate": 3.2894438138479e-05, | |
| "loss": 0.1729, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.0572390572390573, | |
| "grad_norm": 3.497758388519287, | |
| "learning_rate": 3.2780930760499436e-05, | |
| "loss": 0.2176, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.063973063973064, | |
| "grad_norm": 3.802095890045166, | |
| "learning_rate": 3.2667423382519863e-05, | |
| "loss": 0.1802, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.0707070707070707, | |
| "grad_norm": 3.487844705581665, | |
| "learning_rate": 3.25539160045403e-05, | |
| "loss": 0.2105, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.0774410774410774, | |
| "grad_norm": 2.5473880767822266, | |
| "learning_rate": 3.2440408626560724e-05, | |
| "loss": 0.1848, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.0774410774410774, | |
| "eval_loss": 0.46825486421585083, | |
| "eval_runtime": 776.6351, | |
| "eval_samples_per_second": 1.826, | |
| "eval_steps_per_second": 0.115, | |
| "eval_wer": 0.502912455028268, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.0841750841750841, | |
| "grad_norm": 3.390531063079834, | |
| "learning_rate": 3.232690124858116e-05, | |
| "loss": 0.205, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 3.4687283039093018, | |
| "learning_rate": 3.221339387060159e-05, | |
| "loss": 0.1893, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.0976430976430978, | |
| "grad_norm": 4.269082069396973, | |
| "learning_rate": 3.2099886492622025e-05, | |
| "loss": 0.1824, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.1043771043771045, | |
| "grad_norm": 4.165797233581543, | |
| "learning_rate": 3.198637911464245e-05, | |
| "loss": 0.1989, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 3.5516631603240967, | |
| "learning_rate": 3.1872871736662886e-05, | |
| "loss": 0.1872, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1178451178451179, | |
| "grad_norm": 3.6036102771759033, | |
| "learning_rate": 3.175936435868331e-05, | |
| "loss": 0.2143, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.1245791245791246, | |
| "grad_norm": 3.6270053386688232, | |
| "learning_rate": 3.1645856980703746e-05, | |
| "loss": 0.1806, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.1313131313131313, | |
| "grad_norm": 3.623009204864502, | |
| "learning_rate": 3.153234960272417e-05, | |
| "loss": 0.1964, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.138047138047138, | |
| "grad_norm": 3.477240800857544, | |
| "learning_rate": 3.141884222474461e-05, | |
| "loss": 0.2126, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.144781144781145, | |
| "grad_norm": 4.374250411987305, | |
| "learning_rate": 3.130533484676504e-05, | |
| "loss": 0.2001, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.1515151515151516, | |
| "grad_norm": 3.302889347076416, | |
| "learning_rate": 3.1191827468785474e-05, | |
| "loss": 0.1732, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.1582491582491583, | |
| "grad_norm": 3.2889134883880615, | |
| "learning_rate": 3.10783200908059e-05, | |
| "loss": 0.2031, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.164983164983165, | |
| "grad_norm": 3.4270858764648438, | |
| "learning_rate": 3.0964812712826335e-05, | |
| "loss": 0.1947, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.1717171717171717, | |
| "grad_norm": 2.7580225467681885, | |
| "learning_rate": 3.085130533484676e-05, | |
| "loss": 0.1871, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.1784511784511784, | |
| "grad_norm": 3.1256375312805176, | |
| "learning_rate": 3.0737797956867195e-05, | |
| "loss": 0.2017, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.1851851851851851, | |
| "grad_norm": 2.583787441253662, | |
| "learning_rate": 3.062429057888763e-05, | |
| "loss": 0.2098, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.1919191919191918, | |
| "grad_norm": 3.7047979831695557, | |
| "learning_rate": 3.051078320090806e-05, | |
| "loss": 0.2023, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.1986531986531987, | |
| "grad_norm": 2.448273181915283, | |
| "learning_rate": 3.039727582292849e-05, | |
| "loss": 0.1836, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.2053872053872055, | |
| "grad_norm": 2.4795892238616943, | |
| "learning_rate": 3.0283768444948923e-05, | |
| "loss": 0.1904, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 4.302123546600342, | |
| "learning_rate": 3.0170261066969354e-05, | |
| "loss": 0.1787, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.2188552188552189, | |
| "grad_norm": 3.4514520168304443, | |
| "learning_rate": 3.0056753688989787e-05, | |
| "loss": 0.1779, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.2255892255892256, | |
| "grad_norm": 4.459456443786621, | |
| "learning_rate": 2.9943246311010214e-05, | |
| "loss": 0.1958, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.2323232323232323, | |
| "grad_norm": 2.8016040325164795, | |
| "learning_rate": 2.9829738933030648e-05, | |
| "loss": 0.2076, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.239057239057239, | |
| "grad_norm": 2.360806941986084, | |
| "learning_rate": 2.9716231555051078e-05, | |
| "loss": 0.1778, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.2457912457912457, | |
| "grad_norm": 3.3838906288146973, | |
| "learning_rate": 2.9602724177071512e-05, | |
| "loss": 0.2019, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.2525252525252526, | |
| "grad_norm": 3.9138290882110596, | |
| "learning_rate": 2.948921679909194e-05, | |
| "loss": 0.1981, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.2592592592592593, | |
| "grad_norm": 3.0798556804656982, | |
| "learning_rate": 2.9375709421112372e-05, | |
| "loss": 0.1718, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.265993265993266, | |
| "grad_norm": 5.293337345123291, | |
| "learning_rate": 2.9262202043132803e-05, | |
| "loss": 0.1801, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 3.36914324760437, | |
| "learning_rate": 2.9148694665153236e-05, | |
| "loss": 0.2143, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.2794612794612794, | |
| "grad_norm": 3.070244550704956, | |
| "learning_rate": 2.9035187287173667e-05, | |
| "loss": 0.1844, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.2861952861952861, | |
| "grad_norm": 2.8654966354370117, | |
| "learning_rate": 2.89216799091941e-05, | |
| "loss": 0.1774, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.2929292929292928, | |
| "grad_norm": 3.248065710067749, | |
| "learning_rate": 2.8808172531214527e-05, | |
| "loss": 0.1649, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.2996632996632997, | |
| "grad_norm": 3.4403655529022217, | |
| "learning_rate": 2.869466515323496e-05, | |
| "loss": 0.1858, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.3063973063973064, | |
| "grad_norm": 2.928788661956787, | |
| "learning_rate": 2.858115777525539e-05, | |
| "loss": 0.1986, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.3131313131313131, | |
| "grad_norm": 2.984104871749878, | |
| "learning_rate": 2.8467650397275825e-05, | |
| "loss": 0.1874, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.3198653198653199, | |
| "grad_norm": 3.16933536529541, | |
| "learning_rate": 2.8354143019296252e-05, | |
| "loss": 0.1954, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.3265993265993266, | |
| "grad_norm": 3.3342158794403076, | |
| "learning_rate": 2.8240635641316686e-05, | |
| "loss": 0.1654, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 3.3244802951812744, | |
| "learning_rate": 2.8127128263337116e-05, | |
| "loss": 0.1461, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.34006734006734, | |
| "grad_norm": 3.566857099533081, | |
| "learning_rate": 2.801362088535755e-05, | |
| "loss": 0.1732, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.3468013468013469, | |
| "grad_norm": 2.2419066429138184, | |
| "learning_rate": 2.790011350737798e-05, | |
| "loss": 0.1612, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3468013468013469, | |
| "eval_loss": 0.470431387424469, | |
| "eval_runtime": 778.9928, | |
| "eval_samples_per_second": 1.82, | |
| "eval_steps_per_second": 0.114, | |
| "eval_wer": 0.49309005767803094, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3535353535353536, | |
| "grad_norm": 3.476229429244995, | |
| "learning_rate": 2.7786606129398414e-05, | |
| "loss": 0.1884, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.3602693602693603, | |
| "grad_norm": 2.509948968887329, | |
| "learning_rate": 2.767309875141884e-05, | |
| "loss": 0.1647, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.367003367003367, | |
| "grad_norm": 3.446333408355713, | |
| "learning_rate": 2.7559591373439274e-05, | |
| "loss": 0.1882, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.3737373737373737, | |
| "grad_norm": 4.690558910369873, | |
| "learning_rate": 2.7446083995459704e-05, | |
| "loss": 0.1772, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.3804713804713804, | |
| "grad_norm": 3.1924571990966797, | |
| "learning_rate": 2.7332576617480138e-05, | |
| "loss": 0.1598, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.387205387205387, | |
| "grad_norm": 3.3819077014923096, | |
| "learning_rate": 2.7219069239500565e-05, | |
| "loss": 0.1778, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.393939393939394, | |
| "grad_norm": 3.4989449977874756, | |
| "learning_rate": 2.7105561861521002e-05, | |
| "loss": 0.1613, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.4006734006734007, | |
| "grad_norm": 3.041142225265503, | |
| "learning_rate": 2.699205448354143e-05, | |
| "loss": 0.1602, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.4074074074074074, | |
| "grad_norm": 2.791797399520874, | |
| "learning_rate": 2.6878547105561863e-05, | |
| "loss": 0.1847, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.4141414141414141, | |
| "grad_norm": 3.252044916152954, | |
| "learning_rate": 2.6765039727582293e-05, | |
| "loss": 0.1687, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4208754208754208, | |
| "grad_norm": 4.116684436798096, | |
| "learning_rate": 2.6651532349602727e-05, | |
| "loss": 0.1839, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.4276094276094276, | |
| "grad_norm": 2.723188638687134, | |
| "learning_rate": 2.6538024971623154e-05, | |
| "loss": 0.1746, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.4343434343434343, | |
| "grad_norm": 2.7226133346557617, | |
| "learning_rate": 2.6424517593643587e-05, | |
| "loss": 0.1621, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.4410774410774412, | |
| "grad_norm": 4.077718734741211, | |
| "learning_rate": 2.6311010215664018e-05, | |
| "loss": 0.1765, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.4478114478114479, | |
| "grad_norm": 3.0060672760009766, | |
| "learning_rate": 2.619750283768445e-05, | |
| "loss": 0.1658, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 3.187156915664673, | |
| "learning_rate": 2.6083995459704878e-05, | |
| "loss": 0.1722, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.4612794612794613, | |
| "grad_norm": 2.6665992736816406, | |
| "learning_rate": 2.5970488081725315e-05, | |
| "loss": 0.1879, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.468013468013468, | |
| "grad_norm": 3.637021541595459, | |
| "learning_rate": 2.5856980703745742e-05, | |
| "loss": 0.1657, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.4747474747474747, | |
| "grad_norm": 3.3992161750793457, | |
| "learning_rate": 2.5743473325766176e-05, | |
| "loss": 0.2039, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 4.711835861206055, | |
| "learning_rate": 2.5629965947786606e-05, | |
| "loss": 0.1943, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.4882154882154883, | |
| "grad_norm": 2.712679624557495, | |
| "learning_rate": 2.551645856980704e-05, | |
| "loss": 0.1671, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.494949494949495, | |
| "grad_norm": 3.3363306522369385, | |
| "learning_rate": 2.5402951191827467e-05, | |
| "loss": 0.1669, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.5016835016835017, | |
| "grad_norm": 3.961500406265259, | |
| "learning_rate": 2.52894438138479e-05, | |
| "loss": 0.1478, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.5084175084175084, | |
| "grad_norm": 3.390343189239502, | |
| "learning_rate": 2.517593643586833e-05, | |
| "loss": 0.1744, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 3.3122527599334717, | |
| "learning_rate": 2.5062429057888764e-05, | |
| "loss": 0.182, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.5218855218855218, | |
| "grad_norm": 3.9705302715301514, | |
| "learning_rate": 2.4948921679909195e-05, | |
| "loss": 0.1545, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.5286195286195285, | |
| "grad_norm": 3.5639703273773193, | |
| "learning_rate": 2.483541430192963e-05, | |
| "loss": 0.1643, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.5353535353535355, | |
| "grad_norm": 3.0987420082092285, | |
| "learning_rate": 2.472190692395006e-05, | |
| "loss": 0.1845, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.542087542087542, | |
| "grad_norm": 3.1441290378570557, | |
| "learning_rate": 2.460839954597049e-05, | |
| "loss": 0.1515, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.5488215488215489, | |
| "grad_norm": 3.7302119731903076, | |
| "learning_rate": 2.4494892167990923e-05, | |
| "loss": 0.1838, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 2.877547025680542, | |
| "learning_rate": 2.4381384790011353e-05, | |
| "loss": 0.1837, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.5622895622895623, | |
| "grad_norm": 3.0840272903442383, | |
| "learning_rate": 2.4267877412031783e-05, | |
| "loss": 0.17, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.569023569023569, | |
| "grad_norm": 2.3135063648223877, | |
| "learning_rate": 2.4154370034052214e-05, | |
| "loss": 0.1524, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.5757575757575757, | |
| "grad_norm": 5.435102939605713, | |
| "learning_rate": 2.4040862656072647e-05, | |
| "loss": 0.1631, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.5824915824915826, | |
| "grad_norm": 2.6250736713409424, | |
| "learning_rate": 2.3927355278093077e-05, | |
| "loss": 0.1748, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.589225589225589, | |
| "grad_norm": 3.478433132171631, | |
| "learning_rate": 2.3813847900113508e-05, | |
| "loss": 0.1557, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.595959595959596, | |
| "grad_norm": 2.924372673034668, | |
| "learning_rate": 2.370034052213394e-05, | |
| "loss": 0.1647, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.6026936026936027, | |
| "grad_norm": 2.708841562271118, | |
| "learning_rate": 2.3586833144154372e-05, | |
| "loss": 0.2072, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.6094276094276094, | |
| "grad_norm": 3.2418808937072754, | |
| "learning_rate": 2.3473325766174802e-05, | |
| "loss": 0.1705, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.6161616161616161, | |
| "grad_norm": 2.908341884613037, | |
| "learning_rate": 2.3359818388195236e-05, | |
| "loss": 0.192, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6161616161616161, | |
| "eval_loss": 0.4698619246482849, | |
| "eval_runtime": 721.5021, | |
| "eval_samples_per_second": 1.965, | |
| "eval_steps_per_second": 0.123, | |
| "eval_wer": 0.49100565358917253, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6228956228956228, | |
| "grad_norm": 2.884115695953369, | |
| "learning_rate": 2.3246311010215666e-05, | |
| "loss": 0.17, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.6296296296296298, | |
| "grad_norm": 3.3428990840911865, | |
| "learning_rate": 2.3132803632236096e-05, | |
| "loss": 0.1576, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 2.6130261421203613, | |
| "learning_rate": 2.3019296254256527e-05, | |
| "loss": 0.161, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.6430976430976432, | |
| "grad_norm": 2.56199049949646, | |
| "learning_rate": 2.290578887627696e-05, | |
| "loss": 0.1741, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.6498316498316499, | |
| "grad_norm": 3.0649795532226562, | |
| "learning_rate": 2.279228149829739e-05, | |
| "loss": 0.1662, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.6565656565656566, | |
| "grad_norm": 2.2965986728668213, | |
| "learning_rate": 2.267877412031782e-05, | |
| "loss": 0.1717, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.6632996632996633, | |
| "grad_norm": 3.121750831604004, | |
| "learning_rate": 2.2565266742338255e-05, | |
| "loss": 0.1577, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.67003367003367, | |
| "grad_norm": 2.052502393722534, | |
| "learning_rate": 2.2451759364358685e-05, | |
| "loss": 0.1568, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.676767676767677, | |
| "grad_norm": 2.4969053268432617, | |
| "learning_rate": 2.2338251986379115e-05, | |
| "loss": 0.153, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.6835016835016834, | |
| "grad_norm": 2.811131715774536, | |
| "learning_rate": 2.222474460839955e-05, | |
| "loss": 0.1527, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.6902356902356903, | |
| "grad_norm": 2.959965229034424, | |
| "learning_rate": 2.211123723041998e-05, | |
| "loss": 0.1501, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.696969696969697, | |
| "grad_norm": 3.3598415851593018, | |
| "learning_rate": 2.199772985244041e-05, | |
| "loss": 0.1702, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.7037037037037037, | |
| "grad_norm": 3.3592233657836914, | |
| "learning_rate": 2.1884222474460843e-05, | |
| "loss": 0.1584, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.7104377104377104, | |
| "grad_norm": 3.0574469566345215, | |
| "learning_rate": 2.1770715096481273e-05, | |
| "loss": 0.1513, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.7171717171717171, | |
| "grad_norm": 2.782938003540039, | |
| "learning_rate": 2.1657207718501704e-05, | |
| "loss": 0.1646, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.723905723905724, | |
| "grad_norm": 2.9138362407684326, | |
| "learning_rate": 2.1543700340522134e-05, | |
| "loss": 0.1513, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.7306397306397305, | |
| "grad_norm": 2.8213393688201904, | |
| "learning_rate": 2.1430192962542568e-05, | |
| "loss": 0.1683, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.7373737373737375, | |
| "grad_norm": 3.486140489578247, | |
| "learning_rate": 2.1316685584562998e-05, | |
| "loss": 0.1713, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.7441077441077442, | |
| "grad_norm": 2.8046581745147705, | |
| "learning_rate": 2.1203178206583428e-05, | |
| "loss": 0.1804, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.7508417508417509, | |
| "grad_norm": 2.6458210945129395, | |
| "learning_rate": 2.1089670828603862e-05, | |
| "loss": 0.165, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.7575757575757576, | |
| "grad_norm": 2.7271742820739746, | |
| "learning_rate": 2.0976163450624292e-05, | |
| "loss": 0.165, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.7643097643097643, | |
| "grad_norm": 3.7697384357452393, | |
| "learning_rate": 2.0862656072644723e-05, | |
| "loss": 0.1944, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.7710437710437712, | |
| "grad_norm": 2.6092400550842285, | |
| "learning_rate": 2.0749148694665156e-05, | |
| "loss": 0.1712, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 3.9321539402008057, | |
| "learning_rate": 2.0635641316685587e-05, | |
| "loss": 0.1718, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.7845117845117846, | |
| "grad_norm": 3.0893261432647705, | |
| "learning_rate": 2.0522133938706017e-05, | |
| "loss": 0.1802, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.791245791245791, | |
| "grad_norm": 3.8314249515533447, | |
| "learning_rate": 2.0408626560726447e-05, | |
| "loss": 0.1746, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.797979797979798, | |
| "grad_norm": 3.2088515758514404, | |
| "learning_rate": 2.029511918274688e-05, | |
| "loss": 0.1693, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.8047138047138047, | |
| "grad_norm": 3.136512517929077, | |
| "learning_rate": 2.018161180476731e-05, | |
| "loss": 0.1773, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.8114478114478114, | |
| "grad_norm": 2.799889326095581, | |
| "learning_rate": 2.006810442678774e-05, | |
| "loss": 0.146, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 2.3367459774017334, | |
| "learning_rate": 1.9954597048808175e-05, | |
| "loss": 0.1561, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.8249158249158248, | |
| "grad_norm": 3.626417636871338, | |
| "learning_rate": 1.9841089670828605e-05, | |
| "loss": 0.1532, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.8316498316498318, | |
| "grad_norm": 3.480536460876465, | |
| "learning_rate": 1.9727582292849036e-05, | |
| "loss": 0.1493, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.8383838383838382, | |
| "grad_norm": 2.8837146759033203, | |
| "learning_rate": 1.961407491486947e-05, | |
| "loss": 0.1475, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.8451178451178452, | |
| "grad_norm": 2.784156322479248, | |
| "learning_rate": 1.95005675368899e-05, | |
| "loss": 0.1698, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 2.8038928508758545, | |
| "learning_rate": 1.938706015891033e-05, | |
| "loss": 0.1686, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.8585858585858586, | |
| "grad_norm": 2.904350996017456, | |
| "learning_rate": 1.9273552780930764e-05, | |
| "loss": 0.1554, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.8653198653198653, | |
| "grad_norm": 2.736264705657959, | |
| "learning_rate": 1.9160045402951194e-05, | |
| "loss": 0.1562, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.872053872053872, | |
| "grad_norm": 3.001835584640503, | |
| "learning_rate": 1.9046538024971624e-05, | |
| "loss": 0.1567, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.878787878787879, | |
| "grad_norm": 2.6082592010498047, | |
| "learning_rate": 1.8933030646992055e-05, | |
| "loss": 0.1573, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.8855218855218854, | |
| "grad_norm": 3.1785757541656494, | |
| "learning_rate": 1.8819523269012488e-05, | |
| "loss": 0.1528, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.8855218855218854, | |
| "eval_loss": 0.46750280261039734, | |
| "eval_runtime": 783.2295, | |
| "eval_samples_per_second": 1.81, | |
| "eval_steps_per_second": 0.114, | |
| "eval_wer": 0.48695105933413285, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.8922558922558923, | |
| "grad_norm": 3.254110336303711, | |
| "learning_rate": 1.870601589103292e-05, | |
| "loss": 0.1492, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.898989898989899, | |
| "grad_norm": 3.617150068283081, | |
| "learning_rate": 1.859250851305335e-05, | |
| "loss": 0.1524, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.9057239057239057, | |
| "grad_norm": 2.7314984798431396, | |
| "learning_rate": 1.8479001135073783e-05, | |
| "loss": 0.1824, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.9124579124579124, | |
| "grad_norm": 3.677401304244995, | |
| "learning_rate": 1.8365493757094213e-05, | |
| "loss": 0.1675, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.9191919191919191, | |
| "grad_norm": 3.4799599647521973, | |
| "learning_rate": 1.8251986379114643e-05, | |
| "loss": 0.1936, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.925925925925926, | |
| "grad_norm": 2.47420072555542, | |
| "learning_rate": 1.8138479001135077e-05, | |
| "loss": 0.1624, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.9326599326599325, | |
| "grad_norm": 3.2847509384155273, | |
| "learning_rate": 1.8024971623155507e-05, | |
| "loss": 0.1867, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.9393939393939394, | |
| "grad_norm": 2.4963037967681885, | |
| "learning_rate": 1.7911464245175937e-05, | |
| "loss": 0.1498, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.9461279461279462, | |
| "grad_norm": 4.231179714202881, | |
| "learning_rate": 1.7797956867196368e-05, | |
| "loss": 0.1569, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.9528619528619529, | |
| "grad_norm": 3.305777072906494, | |
| "learning_rate": 1.76844494892168e-05, | |
| "loss": 0.1647, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.9595959595959596, | |
| "grad_norm": 2.858846664428711, | |
| "learning_rate": 1.757094211123723e-05, | |
| "loss": 0.1668, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.9663299663299663, | |
| "grad_norm": 2.4449424743652344, | |
| "learning_rate": 1.7457434733257662e-05, | |
| "loss": 0.1506, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.9730639730639732, | |
| "grad_norm": 2.5614805221557617, | |
| "learning_rate": 1.7343927355278096e-05, | |
| "loss": 0.1821, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.9797979797979797, | |
| "grad_norm": 3.1182758808135986, | |
| "learning_rate": 1.7230419977298526e-05, | |
| "loss": 0.1709, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.9865319865319866, | |
| "grad_norm": 3.463992118835449, | |
| "learning_rate": 1.7116912599318956e-05, | |
| "loss": 0.1479, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.9932659932659933, | |
| "grad_norm": 2.1584393978118896, | |
| "learning_rate": 1.700340522133939e-05, | |
| "loss": 0.1431, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.5880393981933594, | |
| "learning_rate": 1.688989784335982e-05, | |
| "loss": 0.1478, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.006734006734007, | |
| "grad_norm": 1.8367834091186523, | |
| "learning_rate": 1.677639046538025e-05, | |
| "loss": 0.0986, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.0134680134680134, | |
| "grad_norm": 2.266422748565674, | |
| "learning_rate": 1.6662883087400684e-05, | |
| "loss": 0.1031, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.0202020202020203, | |
| "grad_norm": 2.440058708190918, | |
| "learning_rate": 1.6549375709421114e-05, | |
| "loss": 0.0995, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.026936026936027, | |
| "grad_norm": 1.5215619802474976, | |
| "learning_rate": 1.6435868331441545e-05, | |
| "loss": 0.0937, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.0336700336700337, | |
| "grad_norm": 3.0853044986724854, | |
| "learning_rate": 1.6322360953461975e-05, | |
| "loss": 0.1028, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.04040404040404, | |
| "grad_norm": 2.2898178100585938, | |
| "learning_rate": 1.620885357548241e-05, | |
| "loss": 0.0971, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.047138047138047, | |
| "grad_norm": 2.6617209911346436, | |
| "learning_rate": 1.609534619750284e-05, | |
| "loss": 0.1043, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.053872053872054, | |
| "grad_norm": 3.225191593170166, | |
| "learning_rate": 1.598183881952327e-05, | |
| "loss": 0.0998, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.0606060606060606, | |
| "grad_norm": 2.3820834159851074, | |
| "learning_rate": 1.5868331441543703e-05, | |
| "loss": 0.083, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.0673400673400675, | |
| "grad_norm": 3.0194029808044434, | |
| "learning_rate": 1.5754824063564133e-05, | |
| "loss": 0.0909, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.074074074074074, | |
| "grad_norm": 1.5243077278137207, | |
| "learning_rate": 1.5641316685584564e-05, | |
| "loss": 0.0877, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.080808080808081, | |
| "grad_norm": 2.7908105850219727, | |
| "learning_rate": 1.5527809307604997e-05, | |
| "loss": 0.1003, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.0875420875420874, | |
| "grad_norm": 2.368906259536743, | |
| "learning_rate": 1.5414301929625428e-05, | |
| "loss": 0.1013, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.0942760942760943, | |
| "grad_norm": 1.6835886240005493, | |
| "learning_rate": 1.5300794551645858e-05, | |
| "loss": 0.1012, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.101010101010101, | |
| "grad_norm": 2.943992853164673, | |
| "learning_rate": 1.518728717366629e-05, | |
| "loss": 0.0949, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.1077441077441077, | |
| "grad_norm": 2.4449052810668945, | |
| "learning_rate": 1.5073779795686722e-05, | |
| "loss": 0.0967, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.1144781144781146, | |
| "grad_norm": 2.521737813949585, | |
| "learning_rate": 1.4960272417707152e-05, | |
| "loss": 0.0933, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.121212121212121, | |
| "grad_norm": 2.7859129905700684, | |
| "learning_rate": 1.4846765039727584e-05, | |
| "loss": 0.1091, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.127946127946128, | |
| "grad_norm": 2.2307798862457275, | |
| "learning_rate": 1.4733257661748014e-05, | |
| "loss": 0.091, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.1346801346801345, | |
| "grad_norm": 3.108671188354492, | |
| "learning_rate": 1.4619750283768446e-05, | |
| "loss": 0.1147, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.1414141414141414, | |
| "grad_norm": 2.4862091541290283, | |
| "learning_rate": 1.4506242905788878e-05, | |
| "loss": 0.0951, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.148148148148148, | |
| "grad_norm": 1.7988865375518799, | |
| "learning_rate": 1.4392735527809309e-05, | |
| "loss": 0.0963, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.154882154882155, | |
| "grad_norm": 2.6203229427337646, | |
| "learning_rate": 1.427922814982974e-05, | |
| "loss": 0.0999, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.154882154882155, | |
| "eval_loss": 0.47007495164871216, | |
| "eval_runtime": 773.3558, | |
| "eval_samples_per_second": 1.834, | |
| "eval_steps_per_second": 0.115, | |
| "eval_wer": 0.4653360744674776, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.1616161616161618, | |
| "grad_norm": 3.4655826091766357, | |
| "learning_rate": 1.4165720771850171e-05, | |
| "loss": 0.1302, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.1683501683501682, | |
| "grad_norm": 2.2620227336883545, | |
| "learning_rate": 1.4052213393870603e-05, | |
| "loss": 0.0917, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.175084175084175, | |
| "grad_norm": 3.4524097442626953, | |
| "learning_rate": 1.3938706015891035e-05, | |
| "loss": 0.0977, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 3.0727145671844482, | |
| "learning_rate": 1.3825198637911465e-05, | |
| "loss": 0.0995, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.1885521885521886, | |
| "grad_norm": 2.59820818901062, | |
| "learning_rate": 1.3711691259931897e-05, | |
| "loss": 0.0965, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.1952861952861955, | |
| "grad_norm": 1.8692411184310913, | |
| "learning_rate": 1.3598183881952328e-05, | |
| "loss": 0.0973, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.202020202020202, | |
| "grad_norm": 1.9435840845108032, | |
| "learning_rate": 1.348467650397276e-05, | |
| "loss": 0.1007, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.208754208754209, | |
| "grad_norm": 3.7439959049224854, | |
| "learning_rate": 1.3371169125993192e-05, | |
| "loss": 0.0929, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.2154882154882154, | |
| "grad_norm": 3.1171443462371826, | |
| "learning_rate": 1.3257661748013622e-05, | |
| "loss": 0.1129, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 3.671785593032837, | |
| "learning_rate": 1.3144154370034054e-05, | |
| "loss": 0.1068, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.228956228956229, | |
| "grad_norm": 2.3654842376708984, | |
| "learning_rate": 1.3030646992054484e-05, | |
| "loss": 0.0851, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.2356902356902357, | |
| "grad_norm": 3.868271589279175, | |
| "learning_rate": 1.2917139614074916e-05, | |
| "loss": 0.1121, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.242424242424242, | |
| "grad_norm": 2.7278647422790527, | |
| "learning_rate": 1.2803632236095348e-05, | |
| "loss": 0.1068, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.249158249158249, | |
| "grad_norm": 2.541274309158325, | |
| "learning_rate": 1.2690124858115778e-05, | |
| "loss": 0.1005, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.255892255892256, | |
| "grad_norm": 2.2592976093292236, | |
| "learning_rate": 1.257661748013621e-05, | |
| "loss": 0.1014, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.2626262626262625, | |
| "grad_norm": 1.714357614517212, | |
| "learning_rate": 1.246311010215664e-05, | |
| "loss": 0.0775, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.2693602693602695, | |
| "grad_norm": 3.3454010486602783, | |
| "learning_rate": 1.2349602724177071e-05, | |
| "loss": 0.1026, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.276094276094276, | |
| "grad_norm": 3.0652363300323486, | |
| "learning_rate": 1.2236095346197503e-05, | |
| "loss": 0.1227, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.282828282828283, | |
| "grad_norm": 2.409959077835083, | |
| "learning_rate": 1.2122587968217935e-05, | |
| "loss": 0.1115, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.28956228956229, | |
| "grad_norm": 3.0419325828552246, | |
| "learning_rate": 1.2009080590238365e-05, | |
| "loss": 0.0942, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.2962962962962963, | |
| "grad_norm": 2.3572564125061035, | |
| "learning_rate": 1.1895573212258797e-05, | |
| "loss": 0.1034, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.303030303030303, | |
| "grad_norm": 2.0597918033599854, | |
| "learning_rate": 1.1782065834279228e-05, | |
| "loss": 0.1169, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.3097643097643097, | |
| "grad_norm": 1.6198811531066895, | |
| "learning_rate": 1.166855845629966e-05, | |
| "loss": 0.1073, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.3164983164983166, | |
| "grad_norm": 2.385390520095825, | |
| "learning_rate": 1.1555051078320092e-05, | |
| "loss": 0.0913, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.323232323232323, | |
| "grad_norm": 1.6714180707931519, | |
| "learning_rate": 1.1441543700340522e-05, | |
| "loss": 0.0964, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.32996632996633, | |
| "grad_norm": 2.2347018718719482, | |
| "learning_rate": 1.1328036322360954e-05, | |
| "loss": 0.0948, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.3367003367003365, | |
| "grad_norm": 1.7842698097229004, | |
| "learning_rate": 1.1214528944381384e-05, | |
| "loss": 0.0933, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.3434343434343434, | |
| "grad_norm": 2.054187059402466, | |
| "learning_rate": 1.1101021566401816e-05, | |
| "loss": 0.0967, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.3501683501683504, | |
| "grad_norm": 2.3955607414245605, | |
| "learning_rate": 1.0987514188422248e-05, | |
| "loss": 0.0789, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.356902356902357, | |
| "grad_norm": 2.6920056343078613, | |
| "learning_rate": 1.0874006810442678e-05, | |
| "loss": 0.1126, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 2.0969793796539307, | |
| "learning_rate": 1.076049943246311e-05, | |
| "loss": 0.0909, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.3703703703703702, | |
| "grad_norm": 2.8712689876556396, | |
| "learning_rate": 1.064699205448354e-05, | |
| "loss": 0.0948, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.377104377104377, | |
| "grad_norm": 3.084336519241333, | |
| "learning_rate": 1.0533484676503973e-05, | |
| "loss": 0.1052, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.3838383838383836, | |
| "grad_norm": 2.8842592239379883, | |
| "learning_rate": 1.0419977298524405e-05, | |
| "loss": 0.1051, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.3905723905723906, | |
| "grad_norm": 1.8973740339279175, | |
| "learning_rate": 1.0306469920544835e-05, | |
| "loss": 0.1224, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.3973063973063975, | |
| "grad_norm": 2.898562431335449, | |
| "learning_rate": 1.0192962542565267e-05, | |
| "loss": 0.117, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.404040404040404, | |
| "grad_norm": 2.5222558975219727, | |
| "learning_rate": 1.0079455164585697e-05, | |
| "loss": 0.0975, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.410774410774411, | |
| "grad_norm": 2.629905939102173, | |
| "learning_rate": 9.96594778660613e-06, | |
| "loss": 0.116, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.4175084175084174, | |
| "grad_norm": 2.554290294647217, | |
| "learning_rate": 9.852440408626561e-06, | |
| "loss": 0.112, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 1.7490330934524536, | |
| "learning_rate": 9.738933030646992e-06, | |
| "loss": 0.088, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "eval_loss": 0.4696303904056549, | |
| "eval_runtime": 674.2707, | |
| "eval_samples_per_second": 2.103, | |
| "eval_steps_per_second": 0.132, | |
| "eval_wer": 0.46716349723031236, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.430976430976431, | |
| "grad_norm": 3.1101365089416504, | |
| "learning_rate": 9.625425652667424e-06, | |
| "loss": 0.0916, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.4377104377104377, | |
| "grad_norm": 4.431212425231934, | |
| "learning_rate": 9.511918274687854e-06, | |
| "loss": 0.119, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 2.283841848373413, | |
| "learning_rate": 9.398410896708286e-06, | |
| "loss": 0.0921, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.451178451178451, | |
| "grad_norm": 2.228675127029419, | |
| "learning_rate": 9.284903518728718e-06, | |
| "loss": 0.0835, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.457912457912458, | |
| "grad_norm": 2.3716728687286377, | |
| "learning_rate": 9.171396140749148e-06, | |
| "loss": 0.0908, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.4646464646464645, | |
| "grad_norm": 2.604325532913208, | |
| "learning_rate": 9.05788876276958e-06, | |
| "loss": 0.0971, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.4713804713804715, | |
| "grad_norm": 2.1539206504821777, | |
| "learning_rate": 8.944381384790012e-06, | |
| "loss": 0.0815, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.478114478114478, | |
| "grad_norm": 2.422910213470459, | |
| "learning_rate": 8.830874006810442e-06, | |
| "loss": 0.1085, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.484848484848485, | |
| "grad_norm": 2.368211030960083, | |
| "learning_rate": 8.717366628830874e-06, | |
| "loss": 0.0947, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.4915824915824913, | |
| "grad_norm": 1.828069806098938, | |
| "learning_rate": 8.603859250851305e-06, | |
| "loss": 0.0867, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.4983164983164983, | |
| "grad_norm": 2.33329176902771, | |
| "learning_rate": 8.490351872871737e-06, | |
| "loss": 0.0941, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.505050505050505, | |
| "grad_norm": 2.341047525405884, | |
| "learning_rate": 8.376844494892169e-06, | |
| "loss": 0.0915, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.5117845117845117, | |
| "grad_norm": 1.9225627183914185, | |
| "learning_rate": 8.263337116912599e-06, | |
| "loss": 0.1044, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.5185185185185186, | |
| "grad_norm": 2.387437105178833, | |
| "learning_rate": 8.149829738933031e-06, | |
| "loss": 0.0987, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.525252525252525, | |
| "grad_norm": 2.9379942417144775, | |
| "learning_rate": 8.036322360953461e-06, | |
| "loss": 0.0871, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.531986531986532, | |
| "grad_norm": 3.075242042541504, | |
| "learning_rate": 7.922814982973893e-06, | |
| "loss": 0.0962, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.538720538720539, | |
| "grad_norm": 3.6734471321105957, | |
| "learning_rate": 7.809307604994325e-06, | |
| "loss": 0.0844, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 2.5898001194000244, | |
| "learning_rate": 7.695800227014755e-06, | |
| "loss": 0.0996, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.5521885521885523, | |
| "grad_norm": 2.4215145111083984, | |
| "learning_rate": 7.5822928490351875e-06, | |
| "loss": 0.0857, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.558922558922559, | |
| "grad_norm": 3.2795231342315674, | |
| "learning_rate": 7.468785471055619e-06, | |
| "loss": 0.1002, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.5656565656565657, | |
| "grad_norm": 3.356985092163086, | |
| "learning_rate": 7.35527809307605e-06, | |
| "loss": 0.0956, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.5723905723905722, | |
| "grad_norm": 1.5472785234451294, | |
| "learning_rate": 7.241770715096481e-06, | |
| "loss": 0.0937, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.579124579124579, | |
| "grad_norm": 3.1027777194976807, | |
| "learning_rate": 7.128263337116913e-06, | |
| "loss": 0.1001, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.5858585858585856, | |
| "grad_norm": 2.8028059005737305, | |
| "learning_rate": 7.014755959137344e-06, | |
| "loss": 0.1012, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.5925925925925926, | |
| "grad_norm": 3.476177930831909, | |
| "learning_rate": 6.901248581157775e-06, | |
| "loss": 0.0843, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.5993265993265995, | |
| "grad_norm": 1.6291272640228271, | |
| "learning_rate": 6.787741203178206e-06, | |
| "loss": 0.1141, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.606060606060606, | |
| "grad_norm": 2.611839532852173, | |
| "learning_rate": 6.6742338251986375e-06, | |
| "loss": 0.0842, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.612794612794613, | |
| "grad_norm": 2.290695905685425, | |
| "learning_rate": 6.5607264472190694e-06, | |
| "loss": 0.1037, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.6195286195286194, | |
| "grad_norm": 2.7317962646484375, | |
| "learning_rate": 6.447219069239501e-06, | |
| "loss": 0.0967, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.6262626262626263, | |
| "grad_norm": 2.8561346530914307, | |
| "learning_rate": 6.333711691259932e-06, | |
| "loss": 0.1026, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.6329966329966332, | |
| "grad_norm": 1.468044638633728, | |
| "learning_rate": 6.220204313280364e-06, | |
| "loss": 0.0962, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.6397306397306397, | |
| "grad_norm": 2.2081100940704346, | |
| "learning_rate": 6.106696935300795e-06, | |
| "loss": 0.1087, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.6464646464646466, | |
| "grad_norm": 1.8171058893203735, | |
| "learning_rate": 5.993189557321226e-06, | |
| "loss": 0.1031, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.653198653198653, | |
| "grad_norm": 2.296617269515991, | |
| "learning_rate": 5.879682179341658e-06, | |
| "loss": 0.1066, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.65993265993266, | |
| "grad_norm": 2.02673077583313, | |
| "learning_rate": 5.766174801362089e-06, | |
| "loss": 0.0939, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 2.2023749351501465, | |
| "learning_rate": 5.65266742338252e-06, | |
| "loss": 0.1186, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.6734006734006734, | |
| "grad_norm": 2.8352410793304443, | |
| "learning_rate": 5.539160045402951e-06, | |
| "loss": 0.0982, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.68013468013468, | |
| "grad_norm": 2.6541831493377686, | |
| "learning_rate": 5.425652667423383e-06, | |
| "loss": 0.1005, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.686868686868687, | |
| "grad_norm": 2.7797365188598633, | |
| "learning_rate": 5.3121452894438146e-06, | |
| "loss": 0.0946, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.6936026936026938, | |
| "grad_norm": 1.8120551109313965, | |
| "learning_rate": 5.198637911464246e-06, | |
| "loss": 0.1003, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.6936026936026938, | |
| "eval_loss": 0.4680774211883545, | |
| "eval_runtime": 686.7326, | |
| "eval_samples_per_second": 2.065, | |
| "eval_steps_per_second": 0.13, | |
| "eval_wer": 0.46013934098566617, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7003367003367003, | |
| "grad_norm": 2.1801674365997314, | |
| "learning_rate": 5.085130533484677e-06, | |
| "loss": 0.087, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.707070707070707, | |
| "grad_norm": 2.256625175476074, | |
| "learning_rate": 4.971623155505108e-06, | |
| "loss": 0.0806, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.7138047138047137, | |
| "grad_norm": 2.6446785926818848, | |
| "learning_rate": 4.85811577752554e-06, | |
| "loss": 0.1, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.7205387205387206, | |
| "grad_norm": 1.7377904653549194, | |
| "learning_rate": 4.744608399545971e-06, | |
| "loss": 0.0923, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 1.9209539890289307, | |
| "learning_rate": 4.631101021566402e-06, | |
| "loss": 0.0992, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.734006734006734, | |
| "grad_norm": 2.6267309188842773, | |
| "learning_rate": 4.517593643586833e-06, | |
| "loss": 0.0822, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.7407407407407405, | |
| "grad_norm": 1.8967944383621216, | |
| "learning_rate": 4.404086265607265e-06, | |
| "loss": 0.0807, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.7474747474747474, | |
| "grad_norm": 2.2259716987609863, | |
| "learning_rate": 4.2905788876276965e-06, | |
| "loss": 0.0794, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.7542087542087543, | |
| "grad_norm": 2.3132541179656982, | |
| "learning_rate": 4.177071509648128e-06, | |
| "loss": 0.0835, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.760942760942761, | |
| "grad_norm": 2.2206366062164307, | |
| "learning_rate": 4.063564131668559e-06, | |
| "loss": 0.0827, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.7676767676767677, | |
| "grad_norm": 2.6239089965820312, | |
| "learning_rate": 3.95005675368899e-06, | |
| "loss": 0.0946, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.774410774410774, | |
| "grad_norm": 2.0979490280151367, | |
| "learning_rate": 3.836549375709422e-06, | |
| "loss": 0.0682, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.781144781144781, | |
| "grad_norm": 2.2430787086486816, | |
| "learning_rate": 3.723041997729853e-06, | |
| "loss": 0.089, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.787878787878788, | |
| "grad_norm": 2.0071237087249756, | |
| "learning_rate": 3.6095346197502842e-06, | |
| "loss": 0.0892, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.7946127946127945, | |
| "grad_norm": 2.0898067951202393, | |
| "learning_rate": 3.4960272417707154e-06, | |
| "loss": 0.0879, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.8013468013468015, | |
| "grad_norm": 3.248400926589966, | |
| "learning_rate": 3.382519863791147e-06, | |
| "loss": 0.0928, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.808080808080808, | |
| "grad_norm": 2.3027138710021973, | |
| "learning_rate": 3.269012485811578e-06, | |
| "loss": 0.0876, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.814814814814815, | |
| "grad_norm": 2.523341417312622, | |
| "learning_rate": 3.1555051078320097e-06, | |
| "loss": 0.0902, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.821548821548822, | |
| "grad_norm": 1.5260744094848633, | |
| "learning_rate": 3.0419977298524404e-06, | |
| "loss": 0.0773, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.8282828282828283, | |
| "grad_norm": 1.5680999755859375, | |
| "learning_rate": 2.928490351872872e-06, | |
| "loss": 0.0862, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.8350168350168348, | |
| "grad_norm": 2.547013759613037, | |
| "learning_rate": 2.814982973893303e-06, | |
| "loss": 0.0972, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.8417508417508417, | |
| "grad_norm": 2.116196870803833, | |
| "learning_rate": 2.7014755959137347e-06, | |
| "loss": 0.0949, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.8484848484848486, | |
| "grad_norm": 2.7395036220550537, | |
| "learning_rate": 2.587968217934166e-06, | |
| "loss": 0.094, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.855218855218855, | |
| "grad_norm": 2.0705437660217285, | |
| "learning_rate": 2.4744608399545974e-06, | |
| "loss": 0.0991, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.861952861952862, | |
| "grad_norm": 3.2586395740509033, | |
| "learning_rate": 2.3609534619750285e-06, | |
| "loss": 0.0959, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.8686868686868685, | |
| "grad_norm": 2.0380172729492188, | |
| "learning_rate": 2.2474460839954596e-06, | |
| "loss": 0.091, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.8754208754208754, | |
| "grad_norm": 1.8960984945297241, | |
| "learning_rate": 2.1339387060158912e-06, | |
| "loss": 0.0867, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.8821548821548824, | |
| "grad_norm": 2.761885643005371, | |
| "learning_rate": 2.0204313280363224e-06, | |
| "loss": 0.0972, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 2.1381499767303467, | |
| "learning_rate": 1.9069239500567537e-06, | |
| "loss": 0.0929, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.8956228956228958, | |
| "grad_norm": 1.5396257638931274, | |
| "learning_rate": 1.793416572077185e-06, | |
| "loss": 0.088, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.9023569023569022, | |
| "grad_norm": 2.4630839824676514, | |
| "learning_rate": 1.6799091940976164e-06, | |
| "loss": 0.1068, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 1.8399533033370972, | |
| "learning_rate": 1.5664018161180478e-06, | |
| "loss": 0.0772, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.915824915824916, | |
| "grad_norm": 2.0763957500457764, | |
| "learning_rate": 1.4528944381384791e-06, | |
| "loss": 0.0885, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.9225589225589226, | |
| "grad_norm": 5.270694255828857, | |
| "learning_rate": 1.3393870601589105e-06, | |
| "loss": 0.0876, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.929292929292929, | |
| "grad_norm": 2.339585542678833, | |
| "learning_rate": 1.2258796821793418e-06, | |
| "loss": 0.0829, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.936026936026936, | |
| "grad_norm": 2.7977676391601562, | |
| "learning_rate": 1.112372304199773e-06, | |
| "loss": 0.1098, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.942760942760943, | |
| "grad_norm": 2.1591367721557617, | |
| "learning_rate": 9.988649262202043e-07, | |
| "loss": 0.0917, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.9494949494949494, | |
| "grad_norm": 2.4336767196655273, | |
| "learning_rate": 8.853575482406357e-07, | |
| "loss": 0.1013, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.9562289562289563, | |
| "grad_norm": 1.9739155769348145, | |
| "learning_rate": 7.718501702610669e-07, | |
| "loss": 0.0731, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 2.9581665992736816, | |
| "learning_rate": 6.583427922814983e-07, | |
| "loss": 0.1034, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "eval_loss": 0.4650237560272217, | |
| "eval_runtime": 680.1256, | |
| "eval_samples_per_second": 2.085, | |
| "eval_steps_per_second": 0.131, | |
| "eval_wer": 0.4528582034149963, | |
| "step": 4400 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4455, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.56588428967936e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |