Wav2vec2-fula / trainer_state.json
Leonel-Maia's picture
End of training
e33c195 verified
{
"best_global_step": 8500,
"best_metric": 0.3142754137516022,
"best_model_checkpoint": "./Wav2vec2-fula/checkpoint-8500",
"epoch": 7.565045540470871,
"eval_steps": 500,
"global_step": 11000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017185083347654236,
"grad_norm": 10.79065990447998,
"learning_rate": 1.2000000000000002e-06,
"loss": 14.779,
"step": 25
},
{
"epoch": 0.03437016669530847,
"grad_norm": 11.638880729675293,
"learning_rate": 2.4000000000000003e-06,
"loss": 13.4402,
"step": 50
},
{
"epoch": 0.05155525004296271,
"grad_norm": 12.49268913269043,
"learning_rate": 3.6499999999999998e-06,
"loss": 14.1815,
"step": 75
},
{
"epoch": 0.06874033339061694,
"grad_norm": 13.672736167907715,
"learning_rate": 4.9000000000000005e-06,
"loss": 12.3835,
"step": 100
},
{
"epoch": 0.08592541673827118,
"grad_norm": 13.192852973937988,
"learning_rate": 6.15e-06,
"loss": 11.3532,
"step": 125
},
{
"epoch": 0.10311050008592541,
"grad_norm": 13.134611129760742,
"learning_rate": 7.4e-06,
"loss": 7.4199,
"step": 150
},
{
"epoch": 0.12029558343357966,
"grad_norm": 10.945942878723145,
"learning_rate": 8.65e-06,
"loss": 6.3555,
"step": 175
},
{
"epoch": 0.13748066678123388,
"grad_norm": 10.094327926635742,
"learning_rate": 9.900000000000002e-06,
"loss": 5.2388,
"step": 200
},
{
"epoch": 0.1546657501288881,
"grad_norm": 9.148414611816406,
"learning_rate": 1.115e-05,
"loss": 5.0771,
"step": 225
},
{
"epoch": 0.17185083347654237,
"grad_norm": 7.876718997955322,
"learning_rate": 1.24e-05,
"loss": 4.6181,
"step": 250
},
{
"epoch": 0.1890359168241966,
"grad_norm": 7.483435153961182,
"learning_rate": 1.3650000000000001e-05,
"loss": 4.5182,
"step": 275
},
{
"epoch": 0.20622100017185083,
"grad_norm": 6.882397174835205,
"learning_rate": 1.49e-05,
"loss": 4.1879,
"step": 300
},
{
"epoch": 0.22340608351950508,
"grad_norm": 6.444328308105469,
"learning_rate": 1.6150000000000003e-05,
"loss": 4.1217,
"step": 325
},
{
"epoch": 0.2405911668671593,
"grad_norm": 4.407646179199219,
"learning_rate": 1.74e-05,
"loss": 3.8535,
"step": 350
},
{
"epoch": 0.25777625021481354,
"grad_norm": 4.332294940948486,
"learning_rate": 1.865e-05,
"loss": 3.7725,
"step": 375
},
{
"epoch": 0.27496133356246777,
"grad_norm": 3.5310115814208984,
"learning_rate": 1.9900000000000003e-05,
"loss": 3.5538,
"step": 400
},
{
"epoch": 0.292146416910122,
"grad_norm": 2.884195566177368,
"learning_rate": 2.115e-05,
"loss": 3.4667,
"step": 425
},
{
"epoch": 0.3093315002577762,
"grad_norm": 2.221975564956665,
"learning_rate": 2.2400000000000002e-05,
"loss": 3.3157,
"step": 450
},
{
"epoch": 0.3265165836054305,
"grad_norm": 1.393004059791565,
"learning_rate": 2.365e-05,
"loss": 3.2574,
"step": 475
},
{
"epoch": 0.34370166695308474,
"grad_norm": 1.5566725730895996,
"learning_rate": 2.4900000000000002e-05,
"loss": 3.1344,
"step": 500
},
{
"epoch": 0.34370166695308474,
"eval_loss": 3.093510150909424,
"eval_runtime": 148.9649,
"eval_samples_per_second": 8.223,
"eval_steps_per_second": 1.034,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 0.36088675030073897,
"grad_norm": 1.1272865533828735,
"learning_rate": 2.6150000000000002e-05,
"loss": 3.0829,
"step": 525
},
{
"epoch": 0.3780718336483932,
"grad_norm": 0.8798616528511047,
"learning_rate": 2.7400000000000002e-05,
"loss": 3.0138,
"step": 550
},
{
"epoch": 0.3952569169960474,
"grad_norm": 1.233052134513855,
"learning_rate": 2.865e-05,
"loss": 2.9976,
"step": 575
},
{
"epoch": 0.41244200034370165,
"grad_norm": 0.4763319492340088,
"learning_rate": 2.9900000000000002e-05,
"loss": 2.9609,
"step": 600
},
{
"epoch": 0.4296270836913559,
"grad_norm": 0.35058021545410156,
"learning_rate": 3.115e-05,
"loss": 2.9375,
"step": 625
},
{
"epoch": 0.44681216703901017,
"grad_norm": 0.9661968946456909,
"learning_rate": 3.24e-05,
"loss": 2.8961,
"step": 650
},
{
"epoch": 0.4639972503866644,
"grad_norm": 1.0027278661727905,
"learning_rate": 3.3650000000000005e-05,
"loss": 2.8169,
"step": 675
},
{
"epoch": 0.4811823337343186,
"grad_norm": 0.6780478358268738,
"learning_rate": 3.49e-05,
"loss": 2.674,
"step": 700
},
{
"epoch": 0.49836741708197285,
"grad_norm": 0.7937625646591187,
"learning_rate": 3.615e-05,
"loss": 2.4667,
"step": 725
},
{
"epoch": 0.5155525004296271,
"grad_norm": 1.0229036808013916,
"learning_rate": 3.74e-05,
"loss": 2.2186,
"step": 750
},
{
"epoch": 0.5327375837772813,
"grad_norm": 1.1023578643798828,
"learning_rate": 3.8650000000000004e-05,
"loss": 1.9797,
"step": 775
},
{
"epoch": 0.5499226671249355,
"grad_norm": 1.084370493888855,
"learning_rate": 3.99e-05,
"loss": 1.6958,
"step": 800
},
{
"epoch": 0.5671077504725898,
"grad_norm": 0.8571304082870483,
"learning_rate": 4.115e-05,
"loss": 1.3521,
"step": 825
},
{
"epoch": 0.584292833820244,
"grad_norm": 1.0564861297607422,
"learning_rate": 4.24e-05,
"loss": 1.1168,
"step": 850
},
{
"epoch": 0.6014779171678982,
"grad_norm": 0.8157157301902771,
"learning_rate": 4.3650000000000004e-05,
"loss": 0.9829,
"step": 875
},
{
"epoch": 0.6186630005155525,
"grad_norm": 1.098561406135559,
"learning_rate": 4.49e-05,
"loss": 0.904,
"step": 900
},
{
"epoch": 0.6358480838632067,
"grad_norm": 0.6373503804206848,
"learning_rate": 4.6150000000000004e-05,
"loss": 0.8686,
"step": 925
},
{
"epoch": 0.653033167210861,
"grad_norm": 1.2277697324752808,
"learning_rate": 4.74e-05,
"loss": 0.7969,
"step": 950
},
{
"epoch": 0.6702182505585152,
"grad_norm": 0.5907439589500427,
"learning_rate": 4.8650000000000003e-05,
"loss": 0.7524,
"step": 975
},
{
"epoch": 0.6874033339061695,
"grad_norm": 0.7881184220314026,
"learning_rate": 4.99e-05,
"loss": 0.7323,
"step": 1000
},
{
"epoch": 0.6874033339061695,
"eval_loss": 0.630436360836029,
"eval_runtime": 155.009,
"eval_samples_per_second": 7.903,
"eval_steps_per_second": 0.993,
"eval_wer": 0.711954217888936,
"step": 1000
},
{
"epoch": 0.7045884172538237,
"grad_norm": 0.7166395783424377,
"learning_rate": 4.99866651205937e-05,
"loss": 0.705,
"step": 1025
},
{
"epoch": 0.7217735006014779,
"grad_norm": 0.7382345199584961,
"learning_rate": 4.9972170686456406e-05,
"loss": 0.7207,
"step": 1050
},
{
"epoch": 0.7389585839491322,
"grad_norm": 0.6300435066223145,
"learning_rate": 4.995767625231911e-05,
"loss": 0.7097,
"step": 1075
},
{
"epoch": 0.7561436672967864,
"grad_norm": 0.7419637441635132,
"learning_rate": 4.994318181818182e-05,
"loss": 0.6865,
"step": 1100
},
{
"epoch": 0.7733287506444406,
"grad_norm": 0.5746726989746094,
"learning_rate": 4.9928687384044535e-05,
"loss": 0.6694,
"step": 1125
},
{
"epoch": 0.7905138339920948,
"grad_norm": 0.7999011874198914,
"learning_rate": 4.991419294990724e-05,
"loss": 0.6729,
"step": 1150
},
{
"epoch": 0.8076989173397491,
"grad_norm": 0.5744255781173706,
"learning_rate": 4.989969851576995e-05,
"loss": 0.6253,
"step": 1175
},
{
"epoch": 0.8248840006874033,
"grad_norm": 0.8337430357933044,
"learning_rate": 4.988520408163265e-05,
"loss": 0.6616,
"step": 1200
},
{
"epoch": 0.8420690840350575,
"grad_norm": 0.5431417226791382,
"learning_rate": 4.9870709647495364e-05,
"loss": 0.6129,
"step": 1225
},
{
"epoch": 0.8592541673827118,
"grad_norm": 0.8542420864105225,
"learning_rate": 4.985621521335807e-05,
"loss": 0.6252,
"step": 1250
},
{
"epoch": 0.876439250730366,
"grad_norm": 0.5819442272186279,
"learning_rate": 4.984172077922078e-05,
"loss": 0.5723,
"step": 1275
},
{
"epoch": 0.8936243340780203,
"grad_norm": 0.9629399180412292,
"learning_rate": 4.982722634508349e-05,
"loss": 0.582,
"step": 1300
},
{
"epoch": 0.9108094174256746,
"grad_norm": 0.5698046088218689,
"learning_rate": 4.98127319109462e-05,
"loss": 0.5745,
"step": 1325
},
{
"epoch": 0.9279945007733288,
"grad_norm": 0.893267035484314,
"learning_rate": 4.979823747680891e-05,
"loss": 0.5887,
"step": 1350
},
{
"epoch": 0.945179584120983,
"grad_norm": 0.4911533296108246,
"learning_rate": 4.9783743042671616e-05,
"loss": 0.5657,
"step": 1375
},
{
"epoch": 0.9623646674686372,
"grad_norm": 0.9226717948913574,
"learning_rate": 4.976924860853432e-05,
"loss": 0.5615,
"step": 1400
},
{
"epoch": 0.9795497508162915,
"grad_norm": 0.5409913659095764,
"learning_rate": 4.975475417439703e-05,
"loss": 0.5562,
"step": 1425
},
{
"epoch": 0.9967348341639457,
"grad_norm": 0.7855440974235535,
"learning_rate": 4.9740259740259745e-05,
"loss": 0.5641,
"step": 1450
},
{
"epoch": 1.0144354700120295,
"grad_norm": 0.48342418670654297,
"learning_rate": 4.972576530612245e-05,
"loss": 0.5216,
"step": 1475
},
{
"epoch": 1.0316205533596838,
"grad_norm": 0.7128458023071289,
"learning_rate": 4.971127087198516e-05,
"loss": 0.5416,
"step": 1500
},
{
"epoch": 1.0316205533596838,
"eval_loss": 0.47852450609207153,
"eval_runtime": 156.3389,
"eval_samples_per_second": 7.836,
"eval_steps_per_second": 0.985,
"eval_wer": 0.6490568037303942,
"step": 1500
},
{
"epoch": 1.048805636707338,
"grad_norm": 0.4331744909286499,
"learning_rate": 4.969677643784787e-05,
"loss": 0.5337,
"step": 1525
},
{
"epoch": 1.0659907200549923,
"grad_norm": 0.7347924113273621,
"learning_rate": 4.968228200371058e-05,
"loss": 0.5115,
"step": 1550
},
{
"epoch": 1.0831758034026464,
"grad_norm": 0.47687920928001404,
"learning_rate": 4.966778756957329e-05,
"loss": 0.5525,
"step": 1575
},
{
"epoch": 1.1003608867503007,
"grad_norm": 0.6743185520172119,
"learning_rate": 4.9653293135435996e-05,
"loss": 0.4931,
"step": 1600
},
{
"epoch": 1.117545970097955,
"grad_norm": 0.5449199676513672,
"learning_rate": 4.9638798701298704e-05,
"loss": 0.4773,
"step": 1625
},
{
"epoch": 1.1347310534456092,
"grad_norm": 0.6370054483413696,
"learning_rate": 4.962430426716141e-05,
"loss": 0.4937,
"step": 1650
},
{
"epoch": 1.1519161367932633,
"grad_norm": 0.5630868077278137,
"learning_rate": 4.9609809833024125e-05,
"loss": 0.541,
"step": 1675
},
{
"epoch": 1.1691012201409177,
"grad_norm": 0.6498411297798157,
"learning_rate": 4.959531539888683e-05,
"loss": 0.5057,
"step": 1700
},
{
"epoch": 1.186286303488572,
"grad_norm": 0.468143105506897,
"learning_rate": 4.9580820964749533e-05,
"loss": 0.5155,
"step": 1725
},
{
"epoch": 1.2034713868362261,
"grad_norm": 0.7214887738227844,
"learning_rate": 4.956632653061225e-05,
"loss": 0.5132,
"step": 1750
},
{
"epoch": 1.2206564701838805,
"grad_norm": 0.4510962963104248,
"learning_rate": 4.9551832096474955e-05,
"loss": 0.4643,
"step": 1775
},
{
"epoch": 1.2378415535315346,
"grad_norm": 0.6587559580802917,
"learning_rate": 4.953733766233766e-05,
"loss": 0.4701,
"step": 1800
},
{
"epoch": 1.255026636879189,
"grad_norm": 0.5024493336677551,
"learning_rate": 4.952284322820037e-05,
"loss": 0.5198,
"step": 1825
},
{
"epoch": 1.272211720226843,
"grad_norm": 0.7172912359237671,
"learning_rate": 4.950834879406308e-05,
"loss": 0.4555,
"step": 1850
},
{
"epoch": 1.2893968035744974,
"grad_norm": 0.5447876453399658,
"learning_rate": 4.949385435992579e-05,
"loss": 0.5041,
"step": 1875
},
{
"epoch": 1.3065818869221515,
"grad_norm": 0.6178200244903564,
"learning_rate": 4.94793599257885e-05,
"loss": 0.4686,
"step": 1900
},
{
"epoch": 1.3237669702698058,
"grad_norm": 0.49054816365242004,
"learning_rate": 4.9464865491651207e-05,
"loss": 0.4803,
"step": 1925
},
{
"epoch": 1.34095205361746,
"grad_norm": 0.6802988648414612,
"learning_rate": 4.9450371057513914e-05,
"loss": 0.4644,
"step": 1950
},
{
"epoch": 1.3581371369651143,
"grad_norm": 0.41138285398483276,
"learning_rate": 4.943587662337663e-05,
"loss": 0.513,
"step": 1975
},
{
"epoch": 1.3753222203127686,
"grad_norm": 0.8374213576316833,
"learning_rate": 4.9421382189239336e-05,
"loss": 0.4479,
"step": 2000
},
{
"epoch": 1.3753222203127686,
"eval_loss": 0.42023956775665283,
"eval_runtime": 156.1936,
"eval_samples_per_second": 7.843,
"eval_steps_per_second": 0.986,
"eval_wer": 0.6206549385332768,
"step": 2000
},
{
"epoch": 1.3925073036604227,
"grad_norm": 0.7108762860298157,
"learning_rate": 4.940688775510204e-05,
"loss": 0.465,
"step": 2025
},
{
"epoch": 1.4096923870080769,
"grad_norm": 0.6862374544143677,
"learning_rate": 4.939239332096475e-05,
"loss": 0.4513,
"step": 2050
},
{
"epoch": 1.4268774703557312,
"grad_norm": 0.5275014638900757,
"learning_rate": 4.9377898886827465e-05,
"loss": 0.5135,
"step": 2075
},
{
"epoch": 1.4440625537033855,
"grad_norm": 0.7356075644493103,
"learning_rate": 4.936340445269017e-05,
"loss": 0.4843,
"step": 2100
},
{
"epoch": 1.4612476370510397,
"grad_norm": 0.46985840797424316,
"learning_rate": 4.934891001855288e-05,
"loss": 0.544,
"step": 2125
},
{
"epoch": 1.478432720398694,
"grad_norm": 0.8410568237304688,
"learning_rate": 4.933441558441559e-05,
"loss": 0.453,
"step": 2150
},
{
"epoch": 1.4956178037463481,
"grad_norm": 0.5153818130493164,
"learning_rate": 4.9319921150278294e-05,
"loss": 0.5145,
"step": 2175
},
{
"epoch": 1.5128028870940025,
"grad_norm": 0.8318967223167419,
"learning_rate": 4.930542671614101e-05,
"loss": 0.4583,
"step": 2200
},
{
"epoch": 1.5299879704416566,
"grad_norm": 0.5471023321151733,
"learning_rate": 4.9290932282003716e-05,
"loss": 0.4622,
"step": 2225
},
{
"epoch": 1.547173053789311,
"grad_norm": 0.6953750848770142,
"learning_rate": 4.927643784786642e-05,
"loss": 0.4712,
"step": 2250
},
{
"epoch": 1.5643581371369653,
"grad_norm": 0.5455946326255798,
"learning_rate": 4.926194341372913e-05,
"loss": 0.4702,
"step": 2275
},
{
"epoch": 1.5815432204846194,
"grad_norm": 0.6755653023719788,
"learning_rate": 4.924744897959184e-05,
"loss": 0.4356,
"step": 2300
},
{
"epoch": 1.5987283038322735,
"grad_norm": 0.47134584188461304,
"learning_rate": 4.9232954545454546e-05,
"loss": 0.4333,
"step": 2325
},
{
"epoch": 1.6159133871799278,
"grad_norm": 0.6077346205711365,
"learning_rate": 4.921846011131725e-05,
"loss": 0.4432,
"step": 2350
},
{
"epoch": 1.6330984705275822,
"grad_norm": 0.5316899418830872,
"learning_rate": 4.920396567717996e-05,
"loss": 0.45,
"step": 2375
},
{
"epoch": 1.6502835538752363,
"grad_norm": 0.79489666223526,
"learning_rate": 4.9189471243042675e-05,
"loss": 0.4637,
"step": 2400
},
{
"epoch": 1.6674686372228904,
"grad_norm": 0.5166532397270203,
"learning_rate": 4.917497680890538e-05,
"loss": 0.4375,
"step": 2425
},
{
"epoch": 1.6846537205705447,
"grad_norm": 0.6109660863876343,
"learning_rate": 4.916048237476809e-05,
"loss": 0.418,
"step": 2450
},
{
"epoch": 1.701838803918199,
"grad_norm": 0.6634232401847839,
"learning_rate": 4.91459879406308e-05,
"loss": 0.4315,
"step": 2475
},
{
"epoch": 1.7190238872658532,
"grad_norm": 0.5985594987869263,
"learning_rate": 4.913149350649351e-05,
"loss": 0.4541,
"step": 2500
},
{
"epoch": 1.7190238872658532,
"eval_loss": 0.3850683569908142,
"eval_runtime": 155.7689,
"eval_samples_per_second": 7.864,
"eval_steps_per_second": 0.989,
"eval_wer": 0.6006252649427724,
"step": 2500
},
{
"epoch": 1.7362089706135073,
"grad_norm": 0.5531134605407715,
"learning_rate": 4.911699907235622e-05,
"loss": 0.4558,
"step": 2525
},
{
"epoch": 1.7533940539611617,
"grad_norm": 0.594007670879364,
"learning_rate": 4.9102504638218926e-05,
"loss": 0.4422,
"step": 2550
},
{
"epoch": 1.770579137308816,
"grad_norm": 0.4865548312664032,
"learning_rate": 4.9088010204081634e-05,
"loss": 0.4216,
"step": 2575
},
{
"epoch": 1.7877642206564701,
"grad_norm": 0.5752180814743042,
"learning_rate": 4.907351576994435e-05,
"loss": 0.429,
"step": 2600
},
{
"epoch": 1.8049493040041245,
"grad_norm": 0.4255332350730896,
"learning_rate": 4.9059021335807055e-05,
"loss": 0.4074,
"step": 2625
},
{
"epoch": 1.8221343873517788,
"grad_norm": 0.6160002946853638,
"learning_rate": 4.904452690166976e-05,
"loss": 0.4388,
"step": 2650
},
{
"epoch": 1.839319470699433,
"grad_norm": 0.49583593010902405,
"learning_rate": 4.903003246753247e-05,
"loss": 0.4211,
"step": 2675
},
{
"epoch": 1.856504554047087,
"grad_norm": 0.7490856647491455,
"learning_rate": 4.901553803339518e-05,
"loss": 0.4559,
"step": 2700
},
{
"epoch": 1.8736896373947414,
"grad_norm": 0.4634897708892822,
"learning_rate": 4.900104359925789e-05,
"loss": 0.4323,
"step": 2725
},
{
"epoch": 1.8908747207423957,
"grad_norm": 1.0682307481765747,
"learning_rate": 4.89865491651206e-05,
"loss": 0.4361,
"step": 2750
},
{
"epoch": 1.9080598040900498,
"grad_norm": 0.41067609190940857,
"learning_rate": 4.89720547309833e-05,
"loss": 0.4627,
"step": 2775
},
{
"epoch": 1.925244887437704,
"grad_norm": 0.674846887588501,
"learning_rate": 4.8957560296846014e-05,
"loss": 0.4357,
"step": 2800
},
{
"epoch": 1.9424299707853583,
"grad_norm": 0.4671356678009033,
"learning_rate": 4.894306586270872e-05,
"loss": 0.429,
"step": 2825
},
{
"epoch": 1.9596150541330126,
"grad_norm": 0.6439480781555176,
"learning_rate": 4.892857142857143e-05,
"loss": 0.3965,
"step": 2850
},
{
"epoch": 1.9768001374806667,
"grad_norm": 0.5034067034721375,
"learning_rate": 4.8914076994434137e-05,
"loss": 0.4131,
"step": 2875
},
{
"epoch": 1.9939852208283209,
"grad_norm": 0.67892986536026,
"learning_rate": 4.8899582560296844e-05,
"loss": 0.4306,
"step": 2900
},
{
"epoch": 2.011685856676405,
"grad_norm": 0.4826701879501343,
"learning_rate": 4.888508812615956e-05,
"loss": 0.4437,
"step": 2925
},
{
"epoch": 2.028870940024059,
"grad_norm": 0.5658535361289978,
"learning_rate": 4.8870593692022266e-05,
"loss": 0.3842,
"step": 2950
},
{
"epoch": 2.0460560233717136,
"grad_norm": 0.5490546226501465,
"learning_rate": 4.885609925788497e-05,
"loss": 0.3872,
"step": 2975
},
{
"epoch": 2.0632411067193677,
"grad_norm": 0.59776371717453,
"learning_rate": 4.884160482374768e-05,
"loss": 0.365,
"step": 3000
},
{
"epoch": 2.0632411067193677,
"eval_loss": 0.37011492252349854,
"eval_runtime": 150.8939,
"eval_samples_per_second": 8.118,
"eval_steps_per_second": 1.021,
"eval_wer": 0.588543874523103,
"step": 3000
},
{
"epoch": 2.080426190067022,
"grad_norm": 0.4145926237106323,
"learning_rate": 4.8827110389610395e-05,
"loss": 0.4335,
"step": 3025
},
{
"epoch": 2.097611273414676,
"grad_norm": 0.6833218336105347,
"learning_rate": 4.88126159554731e-05,
"loss": 0.3801,
"step": 3050
},
{
"epoch": 2.1147963567623305,
"grad_norm": 0.5245340466499329,
"learning_rate": 4.879812152133581e-05,
"loss": 0.4167,
"step": 3075
},
{
"epoch": 2.1319814401099846,
"grad_norm": 0.5739388465881348,
"learning_rate": 4.878362708719852e-05,
"loss": 0.3793,
"step": 3100
},
{
"epoch": 2.1491665234576387,
"grad_norm": 0.502185046672821,
"learning_rate": 4.876913265306123e-05,
"loss": 0.4444,
"step": 3125
},
{
"epoch": 2.166351606805293,
"grad_norm": 0.635421633720398,
"learning_rate": 4.875463821892394e-05,
"loss": 0.3668,
"step": 3150
},
{
"epoch": 2.1835366901529474,
"grad_norm": 0.4521035850048065,
"learning_rate": 4.8740143784786646e-05,
"loss": 0.3772,
"step": 3175
},
{
"epoch": 2.2007217735006015,
"grad_norm": 0.7126047015190125,
"learning_rate": 4.8725649350649354e-05,
"loss": 0.374,
"step": 3200
},
{
"epoch": 2.2179068568482556,
"grad_norm": 0.44621542096138,
"learning_rate": 4.871115491651206e-05,
"loss": 0.448,
"step": 3225
},
{
"epoch": 2.23509194019591,
"grad_norm": 0.6418918967247009,
"learning_rate": 4.8696660482374775e-05,
"loss": 0.4099,
"step": 3250
},
{
"epoch": 2.2522770235435643,
"grad_norm": 0.6630382537841797,
"learning_rate": 4.8682166048237476e-05,
"loss": 0.4028,
"step": 3275
},
{
"epoch": 2.2694621068912184,
"grad_norm": 0.6084064841270447,
"learning_rate": 4.866767161410018e-05,
"loss": 0.3737,
"step": 3300
},
{
"epoch": 2.2866471902388725,
"grad_norm": 0.49679034948349,
"learning_rate": 4.86531771799629e-05,
"loss": 0.4054,
"step": 3325
},
{
"epoch": 2.3038322735865266,
"grad_norm": 0.5616837739944458,
"learning_rate": 4.8638682745825605e-05,
"loss": 0.3562,
"step": 3350
},
{
"epoch": 2.321017356934181,
"grad_norm": 0.501042366027832,
"learning_rate": 4.862418831168831e-05,
"loss": 0.3613,
"step": 3375
},
{
"epoch": 2.3382024402818353,
"grad_norm": 0.7408941984176636,
"learning_rate": 4.860969387755102e-05,
"loss": 0.3774,
"step": 3400
},
{
"epoch": 2.3553875236294894,
"grad_norm": 0.4748440086841583,
"learning_rate": 4.859519944341373e-05,
"loss": 0.4066,
"step": 3425
},
{
"epoch": 2.372572606977144,
"grad_norm": 0.6289178729057312,
"learning_rate": 4.858070500927644e-05,
"loss": 0.3748,
"step": 3450
},
{
"epoch": 2.389757690324798,
"grad_norm": 0.4348820149898529,
"learning_rate": 4.856621057513915e-05,
"loss": 0.4605,
"step": 3475
},
{
"epoch": 2.4069427736724522,
"grad_norm": 0.5051092505455017,
"learning_rate": 4.8551716141001856e-05,
"loss": 0.3433,
"step": 3500
},
{
"epoch": 2.4069427736724522,
"eval_loss": 0.364750474691391,
"eval_runtime": 151.4529,
"eval_samples_per_second": 8.088,
"eval_steps_per_second": 1.017,
"eval_wer": 0.5796947859262399,
"step": 3500
},
{
"epoch": 2.4241278570201064,
"grad_norm": 0.5361665487289429,
"learning_rate": 4.8537221706864564e-05,
"loss": 0.4373,
"step": 3525
},
{
"epoch": 2.441312940367761,
"grad_norm": 0.5831518769264221,
"learning_rate": 4.852272727272728e-05,
"loss": 0.4052,
"step": 3550
},
{
"epoch": 2.458498023715415,
"grad_norm": 0.42250296473503113,
"learning_rate": 4.8508232838589985e-05,
"loss": 0.3786,
"step": 3575
},
{
"epoch": 2.475683107063069,
"grad_norm": 0.6438080072402954,
"learning_rate": 4.849373840445269e-05,
"loss": 0.3529,
"step": 3600
},
{
"epoch": 2.4928681904107233,
"grad_norm": 0.41823628544807434,
"learning_rate": 4.84792439703154e-05,
"loss": 0.4122,
"step": 3625
},
{
"epoch": 2.510053273758378,
"grad_norm": 0.6400181651115417,
"learning_rate": 4.8464749536178115e-05,
"loss": 0.3602,
"step": 3650
},
{
"epoch": 2.527238357106032,
"grad_norm": 0.40071290731430054,
"learning_rate": 4.845025510204082e-05,
"loss": 0.3968,
"step": 3675
},
{
"epoch": 2.544423440453686,
"grad_norm": 1.0052437782287598,
"learning_rate": 4.843576066790353e-05,
"loss": 0.3927,
"step": 3700
},
{
"epoch": 2.5616085238013406,
"grad_norm": 0.43629390001296997,
"learning_rate": 4.842126623376624e-05,
"loss": 0.3858,
"step": 3725
},
{
"epoch": 2.5787936071489947,
"grad_norm": 0.8912670016288757,
"learning_rate": 4.8406771799628944e-05,
"loss": 0.3688,
"step": 3750
},
{
"epoch": 2.595978690496649,
"grad_norm": 0.4484070837497711,
"learning_rate": 4.839227736549166e-05,
"loss": 0.4143,
"step": 3775
},
{
"epoch": 2.613163773844303,
"grad_norm": 0.6882378458976746,
"learning_rate": 4.837778293135436e-05,
"loss": 0.3575,
"step": 3800
},
{
"epoch": 2.6303488571919575,
"grad_norm": 0.4403606653213501,
"learning_rate": 4.8363288497217067e-05,
"loss": 0.4099,
"step": 3825
},
{
"epoch": 2.6475339405396117,
"grad_norm": 0.7600814700126648,
"learning_rate": 4.834879406307978e-05,
"loss": 0.3732,
"step": 3850
},
{
"epoch": 2.664719023887266,
"grad_norm": 0.3970819115638733,
"learning_rate": 4.833429962894249e-05,
"loss": 0.3994,
"step": 3875
},
{
"epoch": 2.68190410723492,
"grad_norm": 0.8571271300315857,
"learning_rate": 4.8319805194805196e-05,
"loss": 0.3354,
"step": 3900
},
{
"epoch": 2.6990891905825745,
"grad_norm": 0.514602780342102,
"learning_rate": 4.83053107606679e-05,
"loss": 0.4109,
"step": 3925
},
{
"epoch": 2.7162742739302286,
"grad_norm": 0.5113664269447327,
"learning_rate": 4.829081632653061e-05,
"loss": 0.3742,
"step": 3950
},
{
"epoch": 2.7334593572778827,
"grad_norm": 0.44583624601364136,
"learning_rate": 4.8276321892393325e-05,
"loss": 0.3896,
"step": 3975
},
{
"epoch": 2.7506444406255373,
"grad_norm": 0.5685079097747803,
"learning_rate": 4.826182745825603e-05,
"loss": 0.3561,
"step": 4000
},
{
"epoch": 2.7506444406255373,
"eval_loss": 0.3437730669975281,
"eval_runtime": 152.7484,
"eval_samples_per_second": 8.02,
"eval_steps_per_second": 1.008,
"eval_wer": 0.5716405256464604,
"step": 4000
},
{
"epoch": 2.7678295239731914,
"grad_norm": 0.48729953169822693,
"learning_rate": 4.824733302411874e-05,
"loss": 0.3752,
"step": 4025
},
{
"epoch": 2.7850146073208455,
"grad_norm": 0.64439457654953,
"learning_rate": 4.823283858998145e-05,
"loss": 0.3513,
"step": 4050
},
{
"epoch": 2.8021996906684996,
"grad_norm": 0.4153486490249634,
"learning_rate": 4.821834415584416e-05,
"loss": 0.3979,
"step": 4075
},
{
"epoch": 2.8193847740161537,
"grad_norm": 0.5988856554031372,
"learning_rate": 4.820384972170687e-05,
"loss": 0.3825,
"step": 4100
},
{
"epoch": 2.8365698573638083,
"grad_norm": 0.8136705160140991,
"learning_rate": 4.8189355287569576e-05,
"loss": 0.4321,
"step": 4125
},
{
"epoch": 2.8537549407114624,
"grad_norm": 0.6228049993515015,
"learning_rate": 4.8174860853432284e-05,
"loss": 0.3569,
"step": 4150
},
{
"epoch": 2.8709400240591165,
"grad_norm": 0.5307970643043518,
"learning_rate": 4.816036641929499e-05,
"loss": 0.3775,
"step": 4175
},
{
"epoch": 2.888125107406771,
"grad_norm": 0.5696175694465637,
"learning_rate": 4.8145871985157705e-05,
"loss": 0.3306,
"step": 4200
},
{
"epoch": 2.905310190754425,
"grad_norm": 0.46450933814048767,
"learning_rate": 4.813137755102041e-05,
"loss": 0.4307,
"step": 4225
},
{
"epoch": 2.9224952741020793,
"grad_norm": 0.6127625703811646,
"learning_rate": 4.811688311688312e-05,
"loss": 0.3604,
"step": 4250
},
{
"epoch": 2.939680357449734,
"grad_norm": 0.5017271637916565,
"learning_rate": 4.810238868274583e-05,
"loss": 0.3721,
"step": 4275
},
{
"epoch": 2.956865440797388,
"grad_norm": 0.6709030866622925,
"learning_rate": 4.808789424860854e-05,
"loss": 0.3603,
"step": 4300
},
{
"epoch": 2.974050524145042,
"grad_norm": 0.40175503492355347,
"learning_rate": 4.807339981447124e-05,
"loss": 0.376,
"step": 4325
},
{
"epoch": 2.9912356074926962,
"grad_norm": 0.6254987716674805,
"learning_rate": 4.805890538033395e-05,
"loss": 0.3316,
"step": 4350
},
{
"epoch": 3.0089362433407802,
"grad_norm": 0.4055463373661041,
"learning_rate": 4.804441094619666e-05,
"loss": 0.4126,
"step": 4375
},
{
"epoch": 3.0261213266884344,
"grad_norm": 0.5347069501876831,
"learning_rate": 4.802991651205937e-05,
"loss": 0.3477,
"step": 4400
},
{
"epoch": 3.0433064100360885,
"grad_norm": 0.4559672474861145,
"learning_rate": 4.801542207792208e-05,
"loss": 0.3458,
"step": 4425
},
{
"epoch": 3.060491493383743,
"grad_norm": 0.46173930168151855,
"learning_rate": 4.8000927643784786e-05,
"loss": 0.3316,
"step": 4450
},
{
"epoch": 3.077676576731397,
"grad_norm": 0.414719820022583,
"learning_rate": 4.7986433209647494e-05,
"loss": 0.3719,
"step": 4475
},
{
"epoch": 3.0948616600790513,
"grad_norm": 0.7085908055305481,
"learning_rate": 4.797193877551021e-05,
"loss": 0.3237,
"step": 4500
},
{
"epoch": 3.0948616600790513,
"eval_loss": 0.3646816313266754,
"eval_runtime": 153.9661,
"eval_samples_per_second": 7.956,
"eval_steps_per_second": 1.0,
"eval_wer": 0.5677193726155151,
"step": 4500
},
{
"epoch": 3.1120467434267054,
"grad_norm": 0.4840669631958008,
"learning_rate": 4.7957444341372916e-05,
"loss": 0.3515,
"step": 4525
},
{
"epoch": 3.12923182677436,
"grad_norm": 0.6030757427215576,
"learning_rate": 4.794294990723562e-05,
"loss": 0.3263,
"step": 4550
},
{
"epoch": 3.146416910122014,
"grad_norm": 0.5091059803962708,
"learning_rate": 4.792845547309833e-05,
"loss": 0.3315,
"step": 4575
},
{
"epoch": 3.163601993469668,
"grad_norm": 0.7523996829986572,
"learning_rate": 4.7913961038961045e-05,
"loss": 0.3368,
"step": 4600
},
{
"epoch": 3.1807870768173228,
"grad_norm": 1.0035797357559204,
"learning_rate": 4.789946660482375e-05,
"loss": 0.3931,
"step": 4625
},
{
"epoch": 3.197972160164977,
"grad_norm": 0.5936137437820435,
"learning_rate": 4.788497217068646e-05,
"loss": 0.2896,
"step": 4650
},
{
"epoch": 3.215157243512631,
"grad_norm": 0.5628079771995544,
"learning_rate": 4.787047773654917e-05,
"loss": 0.3632,
"step": 4675
},
{
"epoch": 3.232342326860285,
"grad_norm": 0.5175526738166809,
"learning_rate": 4.7855983302411874e-05,
"loss": 0.33,
"step": 4700
},
{
"epoch": 3.2495274102079397,
"grad_norm": 0.36029067635536194,
"learning_rate": 4.784148886827459e-05,
"loss": 0.3487,
"step": 4725
},
{
"epoch": 3.266712493555594,
"grad_norm": 0.6295140981674194,
"learning_rate": 4.7826994434137296e-05,
"loss": 0.3391,
"step": 4750
},
{
"epoch": 3.283897576903248,
"grad_norm": 0.535555362701416,
"learning_rate": 4.7812500000000003e-05,
"loss": 0.3804,
"step": 4775
},
{
"epoch": 3.301082660250902,
"grad_norm": 1.0075314044952393,
"learning_rate": 4.779800556586271e-05,
"loss": 0.3338,
"step": 4800
},
{
"epoch": 3.3182677435985566,
"grad_norm": 0.3681296110153198,
"learning_rate": 4.7783511131725425e-05,
"loss": 0.3727,
"step": 4825
},
{
"epoch": 3.3354528269462107,
"grad_norm": 0.5938307642936707,
"learning_rate": 4.7769016697588126e-05,
"loss": 0.3565,
"step": 4850
},
{
"epoch": 3.352637910293865,
"grad_norm": 0.4364496171474457,
"learning_rate": 4.775452226345083e-05,
"loss": 0.3459,
"step": 4875
},
{
"epoch": 3.3698229936415194,
"grad_norm": 0.5793933272361755,
"learning_rate": 4.774002782931354e-05,
"loss": 0.3653,
"step": 4900
},
{
"epoch": 3.3870080769891735,
"grad_norm": 0.41033703088760376,
"learning_rate": 4.7725533395176255e-05,
"loss": 0.3629,
"step": 4925
},
{
"epoch": 3.4041931603368276,
"grad_norm": 0.6783180236816406,
"learning_rate": 4.771103896103896e-05,
"loss": 0.3258,
"step": 4950
},
{
"epoch": 3.4213782436844817,
"grad_norm": 0.4356047511100769,
"learning_rate": 4.769654452690167e-05,
"loss": 0.3423,
"step": 4975
},
{
"epoch": 3.4385633270321363,
"grad_norm": 0.645196795463562,
"learning_rate": 4.768205009276438e-05,
"loss": 0.322,
"step": 5000
},
{
"epoch": 3.4385633270321363,
"eval_loss": 0.3426838219165802,
"eval_runtime": 154.5652,
"eval_samples_per_second": 7.925,
"eval_steps_per_second": 0.996,
"eval_wer": 0.5637982195845698,
"step": 5000
},
{
"epoch": 3.4557484103797904,
"grad_norm": 0.7585852742195129,
"learning_rate": 4.766755565862709e-05,
"loss": 0.3615,
"step": 5025
},
{
"epoch": 3.4729334937274445,
"grad_norm": 0.6891105771064758,
"learning_rate": 4.76530612244898e-05,
"loss": 0.3016,
"step": 5050
},
{
"epoch": 3.4901185770750986,
"grad_norm": 0.40171509981155396,
"learning_rate": 4.7638566790352506e-05,
"loss": 0.3389,
"step": 5075
},
{
"epoch": 3.507303660422753,
"grad_norm": 0.6688668131828308,
"learning_rate": 4.7624072356215214e-05,
"loss": 0.3196,
"step": 5100
},
{
"epoch": 3.5244887437704073,
"grad_norm": 0.7812600135803223,
"learning_rate": 4.760957792207793e-05,
"loss": 0.3795,
"step": 5125
},
{
"epoch": 3.5416738271180614,
"grad_norm": 0.6127042174339294,
"learning_rate": 4.7595083487940635e-05,
"loss": 0.3422,
"step": 5150
},
{
"epoch": 3.558858910465716,
"grad_norm": 0.4893074333667755,
"learning_rate": 4.758058905380334e-05,
"loss": 0.3759,
"step": 5175
},
{
"epoch": 3.57604399381337,
"grad_norm": 0.7298163175582886,
"learning_rate": 4.756609461966605e-05,
"loss": 0.3388,
"step": 5200
},
{
"epoch": 3.5932290771610242,
"grad_norm": 0.5726421475410461,
"learning_rate": 4.755160018552876e-05,
"loss": 0.4172,
"step": 5225
},
{
"epoch": 3.6104141605086784,
"grad_norm": 0.5771546959877014,
"learning_rate": 4.753710575139147e-05,
"loss": 0.3217,
"step": 5250
},
{
"epoch": 3.6275992438563325,
"grad_norm": 0.48307299613952637,
"learning_rate": 4.752261131725418e-05,
"loss": 0.3864,
"step": 5275
},
{
"epoch": 3.644784327203987,
"grad_norm": 0.5440219044685364,
"learning_rate": 4.750811688311689e-05,
"loss": 0.3288,
"step": 5300
},
{
"epoch": 3.661969410551641,
"grad_norm": 0.4851985275745392,
"learning_rate": 4.7493622448979594e-05,
"loss": 0.4105,
"step": 5325
},
{
"epoch": 3.6791544938992953,
"grad_norm": 0.537399172782898,
"learning_rate": 4.74791280148423e-05,
"loss": 0.3309,
"step": 5350
},
{
"epoch": 3.69633957724695,
"grad_norm": 0.4729978144168854,
"learning_rate": 4.746463358070501e-05,
"loss": 0.3604,
"step": 5375
},
{
"epoch": 3.713524660594604,
"grad_norm": 0.8028717041015625,
"learning_rate": 4.7450139146567716e-05,
"loss": 0.2908,
"step": 5400
},
{
"epoch": 3.730709743942258,
"grad_norm": 0.40672922134399414,
"learning_rate": 4.7435644712430424e-05,
"loss": 0.3871,
"step": 5425
},
{
"epoch": 3.747894827289912,
"grad_norm": 0.6138872504234314,
"learning_rate": 4.742115027829314e-05,
"loss": 0.3166,
"step": 5450
},
{
"epoch": 3.7650799106375668,
"grad_norm": 0.4270385801792145,
"learning_rate": 4.7406655844155846e-05,
"loss": 0.3449,
"step": 5475
},
{
"epoch": 3.782264993985221,
"grad_norm": 0.535121500492096,
"learning_rate": 4.739216141001855e-05,
"loss": 0.2921,
"step": 5500
},
{
"epoch": 3.782264993985221,
"eval_loss": 0.3344533145427704,
"eval_runtime": 150.3102,
"eval_samples_per_second": 8.15,
"eval_steps_per_second": 1.025,
"eval_wer": 0.5604069520983468,
"step": 5500
},
{
"epoch": 3.799450077332875,
"grad_norm": 0.3871770203113556,
"learning_rate": 4.737766697588126e-05,
"loss": 0.349,
"step": 5525
},
{
"epoch": 3.816635160680529,
"grad_norm": 0.503182053565979,
"learning_rate": 4.7363172541743975e-05,
"loss": 0.3707,
"step": 5550
},
{
"epoch": 3.8338202440281837,
"grad_norm": 0.4528012275695801,
"learning_rate": 4.734867810760668e-05,
"loss": 0.359,
"step": 5575
},
{
"epoch": 3.851005327375838,
"grad_norm": 0.630174458026886,
"learning_rate": 4.733418367346939e-05,
"loss": 0.3558,
"step": 5600
},
{
"epoch": 3.868190410723492,
"grad_norm": 0.4319029450416565,
"learning_rate": 4.73196892393321e-05,
"loss": 0.3812,
"step": 5625
},
{
"epoch": 3.8853754940711465,
"grad_norm": 0.5308706760406494,
"learning_rate": 4.730519480519481e-05,
"loss": 0.2885,
"step": 5650
},
{
"epoch": 3.9025605774188006,
"grad_norm": 0.4054734408855438,
"learning_rate": 4.729070037105752e-05,
"loss": 0.363,
"step": 5675
},
{
"epoch": 3.9197456607664547,
"grad_norm": 0.8012121319770813,
"learning_rate": 4.7276205936920226e-05,
"loss": 0.3398,
"step": 5700
},
{
"epoch": 3.936930744114109,
"grad_norm": 0.4499848783016205,
"learning_rate": 4.7261711502782933e-05,
"loss": 0.3485,
"step": 5725
},
{
"epoch": 3.954115827461763,
"grad_norm": 0.5845701098442078,
"learning_rate": 4.724721706864564e-05,
"loss": 0.3188,
"step": 5750
},
{
"epoch": 3.9713009108094175,
"grad_norm": 0.4100358486175537,
"learning_rate": 4.7232722634508355e-05,
"loss": 0.3479,
"step": 5775
},
{
"epoch": 3.9884859941570716,
"grad_norm": 0.42875462770462036,
"learning_rate": 4.721822820037106e-05,
"loss": 0.3043,
"step": 5800
},
{
"epoch": 4.006186630005155,
"grad_norm": 0.4040299355983734,
"learning_rate": 4.720373376623377e-05,
"loss": 0.4045,
"step": 5825
},
{
"epoch": 4.02337171335281,
"grad_norm": 0.4601922035217285,
"learning_rate": 4.718923933209648e-05,
"loss": 0.3044,
"step": 5850
},
{
"epoch": 4.040556796700464,
"grad_norm": 0.5125726461410522,
"learning_rate": 4.7174744897959185e-05,
"loss": 0.342,
"step": 5875
},
{
"epoch": 4.057741880048118,
"grad_norm": 0.5830023884773254,
"learning_rate": 4.716025046382189e-05,
"loss": 0.2916,
"step": 5900
},
{
"epoch": 4.0749269633957725,
"grad_norm": 0.3486079275608063,
"learning_rate": 4.71457560296846e-05,
"loss": 0.3215,
"step": 5925
},
{
"epoch": 4.092112046743427,
"grad_norm": 0.5681314468383789,
"learning_rate": 4.713126159554731e-05,
"loss": 0.3049,
"step": 5950
},
{
"epoch": 4.109297130091081,
"grad_norm": 0.3579752743244171,
"learning_rate": 4.711676716141002e-05,
"loss": 0.3167,
"step": 5975
},
{
"epoch": 4.126482213438735,
"grad_norm": 0.8702667355537415,
"learning_rate": 4.710227272727273e-05,
"loss": 0.3037,
"step": 6000
},
{
"epoch": 4.126482213438735,
"eval_loss": 0.335175484418869,
"eval_runtime": 150.3279,
"eval_samples_per_second": 8.149,
"eval_steps_per_second": 1.024,
"eval_wer": 0.5541013141161509,
"step": 6000
},
{
"epoch": 4.143667296786389,
"grad_norm": 0.6025490760803223,
"learning_rate": 4.7087778293135436e-05,
"loss": 0.3527,
"step": 6025
},
{
"epoch": 4.160852380134044,
"grad_norm": 0.46763402223587036,
"learning_rate": 4.7073283858998144e-05,
"loss": 0.3182,
"step": 6050
},
{
"epoch": 4.178037463481698,
"grad_norm": 0.38680383563041687,
"learning_rate": 4.705878942486086e-05,
"loss": 0.3234,
"step": 6075
},
{
"epoch": 4.195222546829352,
"grad_norm": 0.45606276392936707,
"learning_rate": 4.7044294990723565e-05,
"loss": 0.3418,
"step": 6100
},
{
"epoch": 4.212407630177006,
"grad_norm": 0.5080279111862183,
"learning_rate": 4.702980055658627e-05,
"loss": 0.3402,
"step": 6125
},
{
"epoch": 4.229592713524661,
"grad_norm": 0.5734138488769531,
"learning_rate": 4.701530612244898e-05,
"loss": 0.3026,
"step": 6150
},
{
"epoch": 4.246777796872315,
"grad_norm": 0.34839344024658203,
"learning_rate": 4.7000811688311694e-05,
"loss": 0.3422,
"step": 6175
},
{
"epoch": 4.263962880219969,
"grad_norm": 0.5648381114006042,
"learning_rate": 4.69863172541744e-05,
"loss": 0.3075,
"step": 6200
},
{
"epoch": 4.281147963567623,
"grad_norm": 0.5454294681549072,
"learning_rate": 4.697182282003711e-05,
"loss": 0.3528,
"step": 6225
},
{
"epoch": 4.298333046915277,
"grad_norm": 0.5028226375579834,
"learning_rate": 4.695732838589982e-05,
"loss": 0.2952,
"step": 6250
},
{
"epoch": 4.315518130262932,
"grad_norm": 0.45058056712150574,
"learning_rate": 4.6942833951762524e-05,
"loss": 0.3535,
"step": 6275
},
{
"epoch": 4.332703213610586,
"grad_norm": 0.6654832363128662,
"learning_rate": 4.692833951762524e-05,
"loss": 0.3127,
"step": 6300
},
{
"epoch": 4.34988829695824,
"grad_norm": 0.49009886384010315,
"learning_rate": 4.6913845083487946e-05,
"loss": 0.3419,
"step": 6325
},
{
"epoch": 4.367073380305895,
"grad_norm": 0.5751463770866394,
"learning_rate": 4.689935064935065e-05,
"loss": 0.3069,
"step": 6350
},
{
"epoch": 4.384258463653548,
"grad_norm": 0.7767444849014282,
"learning_rate": 4.688485621521336e-05,
"loss": 0.3666,
"step": 6375
},
{
"epoch": 4.401443547001203,
"grad_norm": 0.5131709575653076,
"learning_rate": 4.687036178107607e-05,
"loss": 0.329,
"step": 6400
},
{
"epoch": 4.4186286303488576,
"grad_norm": 0.4997400939464569,
"learning_rate": 4.6855867346938776e-05,
"loss": 0.365,
"step": 6425
},
{
"epoch": 4.435813713696511,
"grad_norm": 0.5275589227676392,
"learning_rate": 4.684137291280148e-05,
"loss": 0.31,
"step": 6450
},
{
"epoch": 4.452998797044166,
"grad_norm": 0.43651729822158813,
"learning_rate": 4.682687847866419e-05,
"loss": 0.3253,
"step": 6475
},
{
"epoch": 4.47018388039182,
"grad_norm": 0.49254560470581055,
"learning_rate": 4.6812384044526905e-05,
"loss": 0.2695,
"step": 6500
},
{
"epoch": 4.47018388039182,
"eval_loss": 0.32015639543533325,
"eval_runtime": 154.5447,
"eval_samples_per_second": 7.927,
"eval_steps_per_second": 0.996,
"eval_wer": 0.5515048749470114,
"step": 6500
},
{
"epoch": 4.487368963739474,
"grad_norm": 0.3705599904060364,
"learning_rate": 4.679788961038961e-05,
"loss": 0.3453,
"step": 6525
},
{
"epoch": 4.504554047087129,
"grad_norm": 0.5254660844802856,
"learning_rate": 4.678339517625232e-05,
"loss": 0.282,
"step": 6550
},
{
"epoch": 4.521739130434782,
"grad_norm": 0.37494751811027527,
"learning_rate": 4.676890074211503e-05,
"loss": 0.3491,
"step": 6575
},
{
"epoch": 4.538924213782437,
"grad_norm": 0.5620461702346802,
"learning_rate": 4.675440630797774e-05,
"loss": 0.3074,
"step": 6600
},
{
"epoch": 4.556109297130091,
"grad_norm": 0.8100690245628357,
"learning_rate": 4.673991187384045e-05,
"loss": 0.3366,
"step": 6625
},
{
"epoch": 4.573294380477745,
"grad_norm": 0.7091922760009766,
"learning_rate": 4.6725417439703156e-05,
"loss": 0.2864,
"step": 6650
},
{
"epoch": 4.5904794638254,
"grad_norm": 0.38283970952033997,
"learning_rate": 4.6710923005565864e-05,
"loss": 0.3512,
"step": 6675
},
{
"epoch": 4.607664547173053,
"grad_norm": 0.5631033182144165,
"learning_rate": 4.669642857142857e-05,
"loss": 0.2884,
"step": 6700
},
{
"epoch": 4.624849630520708,
"grad_norm": 0.3868861794471741,
"learning_rate": 4.6681934137291285e-05,
"loss": 0.3413,
"step": 6725
},
{
"epoch": 4.642034713868362,
"grad_norm": 0.6378294229507446,
"learning_rate": 4.666743970315399e-05,
"loss": 0.2949,
"step": 6750
},
{
"epoch": 4.659219797216016,
"grad_norm": 0.5135634541511536,
"learning_rate": 4.66529452690167e-05,
"loss": 0.3463,
"step": 6775
},
{
"epoch": 4.676404880563671,
"grad_norm": 0.4989064633846283,
"learning_rate": 4.663845083487941e-05,
"loss": 0.3027,
"step": 6800
},
{
"epoch": 4.693589963911325,
"grad_norm": 0.5633465647697449,
"learning_rate": 4.662395640074212e-05,
"loss": 0.3626,
"step": 6825
},
{
"epoch": 4.710775047258979,
"grad_norm": 0.6484938859939575,
"learning_rate": 4.660946196660483e-05,
"loss": 0.2881,
"step": 6850
},
{
"epoch": 4.7279601306066334,
"grad_norm": 0.4283730983734131,
"learning_rate": 4.6594967532467537e-05,
"loss": 0.3452,
"step": 6875
},
{
"epoch": 4.745145213954288,
"grad_norm": 0.5287323594093323,
"learning_rate": 4.6580473098330244e-05,
"loss": 0.3015,
"step": 6900
},
{
"epoch": 4.762330297301942,
"grad_norm": 0.41760608553886414,
"learning_rate": 4.656597866419295e-05,
"loss": 0.3032,
"step": 6925
},
{
"epoch": 4.779515380649596,
"grad_norm": 0.6855202317237854,
"learning_rate": 4.655148423005566e-05,
"loss": 0.3006,
"step": 6950
},
{
"epoch": 4.796700463997251,
"grad_norm": 0.45387232303619385,
"learning_rate": 4.6536989795918366e-05,
"loss": 0.3428,
"step": 6975
},
{
"epoch": 4.8138855473449045,
"grad_norm": 0.8281689286231995,
"learning_rate": 4.6522495361781074e-05,
"loss": 0.2804,
"step": 7000
},
{
"epoch": 4.8138855473449045,
"eval_loss": 0.3352712392807007,
"eval_runtime": 156.292,
"eval_samples_per_second": 7.838,
"eval_steps_per_second": 0.985,
"eval_wer": 0.5525116574819839,
"step": 7000
},
{
"epoch": 4.831070630692559,
"grad_norm": 0.4252523183822632,
"learning_rate": 4.650800092764379e-05,
"loss": 0.3456,
"step": 7025
},
{
"epoch": 4.848255714040213,
"grad_norm": 0.536359429359436,
"learning_rate": 4.6493506493506495e-05,
"loss": 0.3016,
"step": 7050
},
{
"epoch": 4.865440797387867,
"grad_norm": 0.48082077503204346,
"learning_rate": 4.64790120593692e-05,
"loss": 0.3323,
"step": 7075
},
{
"epoch": 4.882625880735522,
"grad_norm": 0.7152004837989807,
"learning_rate": 4.646451762523191e-05,
"loss": 0.3102,
"step": 7100
},
{
"epoch": 4.8998109640831755,
"grad_norm": 0.43397244811058044,
"learning_rate": 4.6450023191094625e-05,
"loss": 0.3671,
"step": 7125
},
{
"epoch": 4.91699604743083,
"grad_norm": 1.093762755393982,
"learning_rate": 4.643552875695733e-05,
"loss": 0.299,
"step": 7150
},
{
"epoch": 4.934181130778485,
"grad_norm": 0.3550453782081604,
"learning_rate": 4.642103432282004e-05,
"loss": 0.3351,
"step": 7175
},
{
"epoch": 4.951366214126138,
"grad_norm": 0.6337935924530029,
"learning_rate": 4.640653988868275e-05,
"loss": 0.2952,
"step": 7200
},
{
"epoch": 4.968551297473793,
"grad_norm": 0.5126771926879883,
"learning_rate": 4.6392045454545454e-05,
"loss": 0.3431,
"step": 7225
},
{
"epoch": 4.9857363808214465,
"grad_norm": 0.4208792448043823,
"learning_rate": 4.637755102040817e-05,
"loss": 0.3025,
"step": 7250
},
{
"epoch": 5.003437016669531,
"grad_norm": 0.5154265761375427,
"learning_rate": 4.6363056586270876e-05,
"loss": 0.3215,
"step": 7275
},
{
"epoch": 5.020622100017185,
"grad_norm": 0.4937199652194977,
"learning_rate": 4.634856215213358e-05,
"loss": 0.2898,
"step": 7300
},
{
"epoch": 5.03780718336484,
"grad_norm": 0.4737917482852936,
"learning_rate": 4.633406771799629e-05,
"loss": 0.3186,
"step": 7325
},
{
"epoch": 5.054992266712493,
"grad_norm": 0.39069080352783203,
"learning_rate": 4.6319573283859005e-05,
"loss": 0.299,
"step": 7350
},
{
"epoch": 5.072177350060148,
"grad_norm": 0.41207846999168396,
"learning_rate": 4.630507884972171e-05,
"loss": 0.3261,
"step": 7375
},
{
"epoch": 5.089362433407802,
"grad_norm": 0.5971049070358276,
"learning_rate": 4.629058441558442e-05,
"loss": 0.279,
"step": 7400
},
{
"epoch": 5.106547516755456,
"grad_norm": 0.41475459933280945,
"learning_rate": 4.627608998144712e-05,
"loss": 0.3126,
"step": 7425
},
{
"epoch": 5.123732600103111,
"grad_norm": 0.5062717795372009,
"learning_rate": 4.6261595547309835e-05,
"loss": 0.2741,
"step": 7450
},
{
"epoch": 5.140917683450764,
"grad_norm": 0.5244805812835693,
"learning_rate": 4.624710111317254e-05,
"loss": 0.2913,
"step": 7475
},
{
"epoch": 5.158102766798419,
"grad_norm": 0.7847909927368164,
"learning_rate": 4.623260667903525e-05,
"loss": 0.2908,
"step": 7500
},
{
"epoch": 5.158102766798419,
"eval_loss": 0.3383817970752716,
"eval_runtime": 159.5151,
"eval_samples_per_second": 7.68,
"eval_steps_per_second": 0.965,
"eval_wer": 0.5484845273420941,
"step": 7500
},
{
"epoch": 5.1752878501460735,
"grad_norm": 0.44492971897125244,
"learning_rate": 4.621811224489796e-05,
"loss": 0.3359,
"step": 7525
},
{
"epoch": 5.192472933493727,
"grad_norm": 0.8884369730949402,
"learning_rate": 4.620361781076067e-05,
"loss": 0.286,
"step": 7550
},
{
"epoch": 5.209658016841382,
"grad_norm": 0.4650115966796875,
"learning_rate": 4.618912337662338e-05,
"loss": 0.3538,
"step": 7575
},
{
"epoch": 5.226843100189036,
"grad_norm": 0.46531254053115845,
"learning_rate": 4.6174628942486086e-05,
"loss": 0.2828,
"step": 7600
},
{
"epoch": 5.24402818353669,
"grad_norm": 0.5248584747314453,
"learning_rate": 4.6160134508348794e-05,
"loss": 0.3166,
"step": 7625
},
{
"epoch": 5.2612132668843445,
"grad_norm": 0.676996648311615,
"learning_rate": 4.614564007421151e-05,
"loss": 0.3141,
"step": 7650
},
{
"epoch": 5.278398350231998,
"grad_norm": 0.40085482597351074,
"learning_rate": 4.6131145640074215e-05,
"loss": 0.3099,
"step": 7675
},
{
"epoch": 5.295583433579653,
"grad_norm": 0.5248492956161499,
"learning_rate": 4.611665120593692e-05,
"loss": 0.2905,
"step": 7700
},
{
"epoch": 5.312768516927307,
"grad_norm": 0.3710981607437134,
"learning_rate": 4.610215677179963e-05,
"loss": 0.3114,
"step": 7725
},
{
"epoch": 5.329953600274961,
"grad_norm": 0.607997477054596,
"learning_rate": 4.608766233766234e-05,
"loss": 0.2814,
"step": 7750
},
{
"epoch": 5.347138683622616,
"grad_norm": 0.41846323013305664,
"learning_rate": 4.607316790352505e-05,
"loss": 0.3442,
"step": 7775
},
{
"epoch": 5.36432376697027,
"grad_norm": 0.7187564373016357,
"learning_rate": 4.605867346938776e-05,
"loss": 0.3089,
"step": 7800
},
{
"epoch": 5.381508850317924,
"grad_norm": 0.6370894312858582,
"learning_rate": 4.604417903525047e-05,
"loss": 0.2802,
"step": 7825
},
{
"epoch": 5.398693933665578,
"grad_norm": 0.4946443736553192,
"learning_rate": 4.6029684601113174e-05,
"loss": 0.2752,
"step": 7850
},
{
"epoch": 5.415879017013232,
"grad_norm": 0.5713298916816711,
"learning_rate": 4.601519016697589e-05,
"loss": 0.3335,
"step": 7875
},
{
"epoch": 5.433064100360887,
"grad_norm": 0.5185278058052063,
"learning_rate": 4.6000695732838596e-05,
"loss": 0.2779,
"step": 7900
},
{
"epoch": 5.450249183708541,
"grad_norm": 0.6777128577232361,
"learning_rate": 4.59862012987013e-05,
"loss": 0.2907,
"step": 7925
},
{
"epoch": 5.467434267056195,
"grad_norm": 0.4836239814758301,
"learning_rate": 4.5971706864564004e-05,
"loss": 0.2726,
"step": 7950
},
{
"epoch": 5.484619350403849,
"grad_norm": 0.4827396273612976,
"learning_rate": 4.595721243042672e-05,
"loss": 0.2956,
"step": 7975
},
{
"epoch": 5.501804433751504,
"grad_norm": 0.4124370813369751,
"learning_rate": 4.5942717996289425e-05,
"loss": 0.2646,
"step": 8000
},
{
"epoch": 5.501804433751504,
"eval_loss": 0.31637853384017944,
"eval_runtime": 154.147,
"eval_samples_per_second": 7.947,
"eval_steps_per_second": 0.999,
"eval_wer": 0.5461530309453159,
"step": 8000
},
{
"epoch": 5.518989517099158,
"grad_norm": 0.4147077202796936,
"learning_rate": 4.592822356215213e-05,
"loss": 0.3084,
"step": 8025
},
{
"epoch": 5.536174600446812,
"grad_norm": 0.5999482274055481,
"learning_rate": 4.591372912801484e-05,
"loss": 0.2824,
"step": 8050
},
{
"epoch": 5.553359683794467,
"grad_norm": 0.4082586169242859,
"learning_rate": 4.5899234693877555e-05,
"loss": 0.3095,
"step": 8075
},
{
"epoch": 5.57054476714212,
"grad_norm": 0.45266950130462646,
"learning_rate": 4.588474025974026e-05,
"loss": 0.2672,
"step": 8100
},
{
"epoch": 5.587729850489775,
"grad_norm": 0.4024001955986023,
"learning_rate": 4.587024582560297e-05,
"loss": 0.3147,
"step": 8125
},
{
"epoch": 5.60491493383743,
"grad_norm": 0.61323481798172,
"learning_rate": 4.585575139146568e-05,
"loss": 0.2461,
"step": 8150
},
{
"epoch": 5.622100017185083,
"grad_norm": 0.7370169758796692,
"learning_rate": 4.584125695732839e-05,
"loss": 0.3364,
"step": 8175
},
{
"epoch": 5.639285100532738,
"grad_norm": 0.5105010867118835,
"learning_rate": 4.58267625231911e-05,
"loss": 0.2749,
"step": 8200
},
{
"epoch": 5.6564701838803915,
"grad_norm": 0.4862951636314392,
"learning_rate": 4.5812268089053806e-05,
"loss": 0.318,
"step": 8225
},
{
"epoch": 5.673655267228046,
"grad_norm": 0.5929895639419556,
"learning_rate": 4.5797773654916513e-05,
"loss": 0.3215,
"step": 8250
},
{
"epoch": 5.690840350575701,
"grad_norm": 0.5355464220046997,
"learning_rate": 4.578327922077922e-05,
"loss": 0.343,
"step": 8275
},
{
"epoch": 5.708025433923354,
"grad_norm": 0.6044451594352722,
"learning_rate": 4.5768784786641935e-05,
"loss": 0.2761,
"step": 8300
},
{
"epoch": 5.725210517271009,
"grad_norm": 0.5010135769844055,
"learning_rate": 4.575429035250464e-05,
"loss": 0.3525,
"step": 8325
},
{
"epoch": 5.742395600618663,
"grad_norm": 0.592808723449707,
"learning_rate": 4.573979591836735e-05,
"loss": 0.2749,
"step": 8350
},
{
"epoch": 5.759580683966317,
"grad_norm": 0.5672963261604309,
"learning_rate": 4.572530148423006e-05,
"loss": 0.3711,
"step": 8375
},
{
"epoch": 5.776765767313972,
"grad_norm": 0.8346742391586304,
"learning_rate": 4.571080705009277e-05,
"loss": 0.272,
"step": 8400
},
{
"epoch": 5.793950850661625,
"grad_norm": 0.5474342107772827,
"learning_rate": 4.569631261595548e-05,
"loss": 0.3022,
"step": 8425
},
{
"epoch": 5.81113593400928,
"grad_norm": 0.5067320466041565,
"learning_rate": 4.5681818181818186e-05,
"loss": 0.2632,
"step": 8450
},
{
"epoch": 5.828321017356934,
"grad_norm": 0.4363684356212616,
"learning_rate": 4.566732374768089e-05,
"loss": 0.2961,
"step": 8475
},
{
"epoch": 5.845506100704588,
"grad_norm": 0.6637271642684937,
"learning_rate": 4.56528293135436e-05,
"loss": 0.2982,
"step": 8500
},
{
"epoch": 5.845506100704588,
"eval_loss": 0.3142754137516022,
"eval_runtime": 153.2422,
"eval_samples_per_second": 7.994,
"eval_steps_per_second": 1.005,
"eval_wer": 0.5454641797371768,
"step": 8500
},
{
"epoch": 5.862691184052243,
"grad_norm": 0.5597206950187683,
"learning_rate": 4.563833487940631e-05,
"loss": 0.3282,
"step": 8525
},
{
"epoch": 5.879876267399897,
"grad_norm": 0.5069429278373718,
"learning_rate": 4.5623840445269016e-05,
"loss": 0.2819,
"step": 8550
},
{
"epoch": 5.897061350747551,
"grad_norm": 0.4272073805332184,
"learning_rate": 4.5609346011131724e-05,
"loss": 0.3032,
"step": 8575
},
{
"epoch": 5.9142464340952055,
"grad_norm": 0.46952158212661743,
"learning_rate": 4.559485157699444e-05,
"loss": 0.2979,
"step": 8600
},
{
"epoch": 5.93143151744286,
"grad_norm": 0.46362563967704773,
"learning_rate": 4.5580357142857145e-05,
"loss": 0.3042,
"step": 8625
},
{
"epoch": 5.948616600790514,
"grad_norm": 0.6578242778778076,
"learning_rate": 4.556586270871985e-05,
"loss": 0.2558,
"step": 8650
},
{
"epoch": 5.965801684138168,
"grad_norm": 0.5568517446517944,
"learning_rate": 4.555136827458256e-05,
"loss": 0.3127,
"step": 8675
},
{
"epoch": 5.982986767485822,
"grad_norm": 0.5202658772468567,
"learning_rate": 4.5536873840445274e-05,
"loss": 0.2956,
"step": 8700
},
{
"epoch": 6.000687403333906,
"grad_norm": 0.5851805210113525,
"learning_rate": 4.552237940630798e-05,
"loss": 0.3197,
"step": 8725
},
{
"epoch": 6.0178724866815605,
"grad_norm": 0.4245930314064026,
"learning_rate": 4.550788497217069e-05,
"loss": 0.2537,
"step": 8750
},
{
"epoch": 6.035057570029215,
"grad_norm": 0.3843390643596649,
"learning_rate": 4.54933905380334e-05,
"loss": 0.2552,
"step": 8775
},
{
"epoch": 6.052242653376869,
"grad_norm": 0.4880934953689575,
"learning_rate": 4.5478896103896104e-05,
"loss": 0.2669,
"step": 8800
},
{
"epoch": 6.069427736724523,
"grad_norm": 0.44671300053596497,
"learning_rate": 4.546440166975882e-05,
"loss": 0.2894,
"step": 8825
},
{
"epoch": 6.086612820072177,
"grad_norm": 0.6307169795036316,
"learning_rate": 4.5449907235621526e-05,
"loss": 0.3032,
"step": 8850
},
{
"epoch": 6.1037979034198315,
"grad_norm": 0.4117954969406128,
"learning_rate": 4.543541280148423e-05,
"loss": 0.2863,
"step": 8875
},
{
"epoch": 6.120982986767486,
"grad_norm": 0.41599756479263306,
"learning_rate": 4.542091836734694e-05,
"loss": 0.2804,
"step": 8900
},
{
"epoch": 6.13816807011514,
"grad_norm": 0.5033993124961853,
"learning_rate": 4.5406423933209655e-05,
"loss": 0.2971,
"step": 8925
},
{
"epoch": 6.155353153462794,
"grad_norm": 0.39483895897865295,
"learning_rate": 4.539192949907236e-05,
"loss": 0.2485,
"step": 8950
},
{
"epoch": 6.172538236810449,
"grad_norm": 0.48659563064575195,
"learning_rate": 4.537743506493506e-05,
"loss": 0.2847,
"step": 8975
},
{
"epoch": 6.189723320158103,
"grad_norm": 0.7809969186782837,
"learning_rate": 4.536294063079777e-05,
"loss": 0.2978,
"step": 9000
},
{
"epoch": 6.189723320158103,
"eval_loss": 0.3218280076980591,
"eval_runtime": 151.9921,
"eval_samples_per_second": 8.06,
"eval_steps_per_second": 1.013,
"eval_wer": 0.5423908435777872,
"step": 9000
},
{
"epoch": 6.206908403505757,
"grad_norm": 0.6053128242492676,
"learning_rate": 4.5348446196660485e-05,
"loss": 0.275,
"step": 9025
},
{
"epoch": 6.224093486853411,
"grad_norm": 0.4554101526737213,
"learning_rate": 4.533395176252319e-05,
"loss": 0.2926,
"step": 9050
},
{
"epoch": 6.241278570201065,
"grad_norm": 0.4731072187423706,
"learning_rate": 4.53194573283859e-05,
"loss": 0.2921,
"step": 9075
},
{
"epoch": 6.25846365354872,
"grad_norm": 0.4384573996067047,
"learning_rate": 4.530496289424861e-05,
"loss": 0.2827,
"step": 9100
},
{
"epoch": 6.275648736896374,
"grad_norm": 0.5838501453399658,
"learning_rate": 4.529046846011132e-05,
"loss": 0.2924,
"step": 9125
},
{
"epoch": 6.292833820244028,
"grad_norm": 0.5555030703544617,
"learning_rate": 4.527597402597403e-05,
"loss": 0.2839,
"step": 9150
},
{
"epoch": 6.310018903591683,
"grad_norm": 0.7978671193122864,
"learning_rate": 4.5261479591836736e-05,
"loss": 0.2808,
"step": 9175
},
{
"epoch": 6.327203986939336,
"grad_norm": 0.4151977002620697,
"learning_rate": 4.5246985157699443e-05,
"loss": 0.2767,
"step": 9200
},
{
"epoch": 6.344389070286991,
"grad_norm": 0.5371110439300537,
"learning_rate": 4.523249072356215e-05,
"loss": 0.3124,
"step": 9225
},
{
"epoch": 6.3615741536346455,
"grad_norm": 0.4500742554664612,
"learning_rate": 4.5217996289424865e-05,
"loss": 0.2731,
"step": 9250
},
{
"epoch": 6.378759236982299,
"grad_norm": 0.5459848642349243,
"learning_rate": 4.520350185528757e-05,
"loss": 0.3118,
"step": 9275
},
{
"epoch": 6.395944320329954,
"grad_norm": 0.7165120244026184,
"learning_rate": 4.518900742115028e-05,
"loss": 0.2781,
"step": 9300
},
{
"epoch": 6.413129403677607,
"grad_norm": 0.4492073655128479,
"learning_rate": 4.517451298701299e-05,
"loss": 0.2652,
"step": 9325
},
{
"epoch": 6.430314487025262,
"grad_norm": 0.8615822196006775,
"learning_rate": 4.51600185528757e-05,
"loss": 0.265,
"step": 9350
},
{
"epoch": 6.4474995703729165,
"grad_norm": 0.37190139293670654,
"learning_rate": 4.514552411873841e-05,
"loss": 0.2839,
"step": 9375
},
{
"epoch": 6.46468465372057,
"grad_norm": 0.5044118762016296,
"learning_rate": 4.5131029684601116e-05,
"loss": 0.2737,
"step": 9400
},
{
"epoch": 6.481869737068225,
"grad_norm": 0.8805606961250305,
"learning_rate": 4.5116535250463824e-05,
"loss": 0.2658,
"step": 9425
},
{
"epoch": 6.499054820415879,
"grad_norm": 0.52882981300354,
"learning_rate": 4.510204081632654e-05,
"loss": 0.2875,
"step": 9450
},
{
"epoch": 6.516239903763533,
"grad_norm": 0.8483859896659851,
"learning_rate": 4.5087546382189246e-05,
"loss": 0.3182,
"step": 9475
},
{
"epoch": 6.533424987111188,
"grad_norm": 0.5920891165733337,
"learning_rate": 4.5073051948051946e-05,
"loss": 0.288,
"step": 9500
},
{
"epoch": 6.533424987111188,
"eval_loss": 0.3151616156101227,
"eval_runtime": 152.9497,
"eval_samples_per_second": 8.009,
"eval_steps_per_second": 1.007,
"eval_wer": 0.5417549809241204,
"step": 9500
},
{
"epoch": 6.550610070458841,
"grad_norm": 0.5297147035598755,
"learning_rate": 4.5059137291280155e-05,
"loss": 0.2972,
"step": 9525
},
{
"epoch": 6.567795153806496,
"grad_norm": 0.43849292397499084,
"learning_rate": 4.504464285714286e-05,
"loss": 0.2753,
"step": 9550
},
{
"epoch": 6.58498023715415,
"grad_norm": 0.5231007933616638,
"learning_rate": 4.503014842300557e-05,
"loss": 0.2916,
"step": 9575
},
{
"epoch": 6.602165320501804,
"grad_norm": 0.3840227425098419,
"learning_rate": 4.501565398886828e-05,
"loss": 0.2569,
"step": 9600
},
{
"epoch": 6.619350403849459,
"grad_norm": 0.4950826168060303,
"learning_rate": 4.5001159554730984e-05,
"loss": 0.3198,
"step": 9625
},
{
"epoch": 6.636535487197113,
"grad_norm": 0.4441792368888855,
"learning_rate": 4.49866651205937e-05,
"loss": 0.2796,
"step": 9650
},
{
"epoch": 6.653720570544767,
"grad_norm": 0.4779716730117798,
"learning_rate": 4.4972170686456406e-05,
"loss": 0.3165,
"step": 9675
},
{
"epoch": 6.670905653892421,
"grad_norm": 0.6962916254997253,
"learning_rate": 4.4957676252319113e-05,
"loss": 0.3176,
"step": 9700
},
{
"epoch": 6.688090737240076,
"grad_norm": 0.43603190779685974,
"learning_rate": 4.494318181818182e-05,
"loss": 0.3095,
"step": 9725
},
{
"epoch": 6.70527582058773,
"grad_norm": 0.8133454918861389,
"learning_rate": 4.492868738404453e-05,
"loss": 0.2793,
"step": 9750
},
{
"epoch": 6.722460903935384,
"grad_norm": 0.4596538245677948,
"learning_rate": 4.4914192949907236e-05,
"loss": 0.2626,
"step": 9775
},
{
"epoch": 6.739645987283039,
"grad_norm": 0.6948014497756958,
"learning_rate": 4.489969851576994e-05,
"loss": 0.2546,
"step": 9800
},
{
"epoch": 6.756831070630692,
"grad_norm": 0.40873172879219055,
"learning_rate": 4.488520408163265e-05,
"loss": 0.2867,
"step": 9825
},
{
"epoch": 6.774016153978347,
"grad_norm": 0.36870279908180237,
"learning_rate": 4.4870709647495365e-05,
"loss": 0.3258,
"step": 9850
},
{
"epoch": 6.791201237326001,
"grad_norm": 0.4158894121646881,
"learning_rate": 4.485621521335807e-05,
"loss": 0.3015,
"step": 9875
},
{
"epoch": 6.808386320673655,
"grad_norm": 0.4422719180583954,
"learning_rate": 4.484172077922078e-05,
"loss": 0.285,
"step": 9900
},
{
"epoch": 6.82557140402131,
"grad_norm": 0.7379534840583801,
"learning_rate": 4.482780612244898e-05,
"loss": 0.3093,
"step": 9925
},
{
"epoch": 6.8427564873689635,
"grad_norm": 0.5119884014129639,
"learning_rate": 4.481331168831169e-05,
"loss": 0.2794,
"step": 9950
},
{
"epoch": 6.859941570716618,
"grad_norm": 0.46535733342170715,
"learning_rate": 4.4798817254174396e-05,
"loss": 0.2784,
"step": 9975
},
{
"epoch": 6.877126654064273,
"grad_norm": 0.49434012174606323,
"learning_rate": 4.4784322820037104e-05,
"loss": 0.2706,
"step": 10000
},
{
"epoch": 6.877126654064273,
"eval_loss": 0.321118026971817,
"eval_runtime": 152.9572,
"eval_samples_per_second": 8.009,
"eval_steps_per_second": 1.007,
"eval_wer": 0.53984739296312,
"step": 10000
},
{
"epoch": 6.894311737411926,
"grad_norm": 0.4334565997123718,
"learning_rate": 4.476982838589982e-05,
"loss": 0.2762,
"step": 10025
},
{
"epoch": 6.911496820759581,
"grad_norm": 0.564243733882904,
"learning_rate": 4.4755333951762525e-05,
"loss": 0.2429,
"step": 10050
},
{
"epoch": 6.9286819041072345,
"grad_norm": 0.4657536447048187,
"learning_rate": 4.474083951762523e-05,
"loss": 0.2528,
"step": 10075
},
{
"epoch": 6.945866987454889,
"grad_norm": 0.5497089624404907,
"learning_rate": 4.472634508348794e-05,
"loss": 0.2982,
"step": 10100
},
{
"epoch": 6.963052070802544,
"grad_norm": 0.7017095685005188,
"learning_rate": 4.471185064935065e-05,
"loss": 0.2762,
"step": 10125
},
{
"epoch": 6.980237154150197,
"grad_norm": 0.4462623596191406,
"learning_rate": 4.469735621521336e-05,
"loss": 0.2814,
"step": 10150
},
{
"epoch": 6.997422237497852,
"grad_norm": 0.42071837186813354,
"learning_rate": 4.468286178107607e-05,
"loss": 0.2461,
"step": 10175
},
{
"epoch": 7.015122873345936,
"grad_norm": 0.5645279288291931,
"learning_rate": 4.466836734693878e-05,
"loss": 0.2862,
"step": 10200
},
{
"epoch": 7.0323079566935895,
"grad_norm": 1.0430643558502197,
"learning_rate": 4.4653872912801484e-05,
"loss": 0.2689,
"step": 10225
},
{
"epoch": 7.049493040041244,
"grad_norm": 0.5359970331192017,
"learning_rate": 4.46393784786642e-05,
"loss": 0.2685,
"step": 10250
},
{
"epoch": 7.066678123388899,
"grad_norm": 0.7322932481765747,
"learning_rate": 4.4624884044526906e-05,
"loss": 0.2806,
"step": 10275
},
{
"epoch": 7.083863206736552,
"grad_norm": 0.4568728804588318,
"learning_rate": 4.461038961038961e-05,
"loss": 0.2726,
"step": 10300
},
{
"epoch": 7.101048290084207,
"grad_norm": 1.341957926750183,
"learning_rate": 4.459589517625232e-05,
"loss": 0.2647,
"step": 10325
},
{
"epoch": 7.1182333734318615,
"grad_norm": 0.48318567872047424,
"learning_rate": 4.4581400742115035e-05,
"loss": 0.28,
"step": 10350
},
{
"epoch": 7.135418456779515,
"grad_norm": 0.7370210289955139,
"learning_rate": 4.456690630797774e-05,
"loss": 0.2886,
"step": 10375
},
{
"epoch": 7.15260354012717,
"grad_norm": 0.703504741191864,
"learning_rate": 4.455241187384045e-05,
"loss": 0.276,
"step": 10400
},
{
"epoch": 7.169788623474824,
"grad_norm": 0.5735402703285217,
"learning_rate": 4.453791743970316e-05,
"loss": 0.2786,
"step": 10425
},
{
"epoch": 7.186973706822478,
"grad_norm": 0.36307334899902344,
"learning_rate": 4.4523423005565865e-05,
"loss": 0.2714,
"step": 10450
},
{
"epoch": 7.2041587901701325,
"grad_norm": 0.8428148031234741,
"learning_rate": 4.450892857142857e-05,
"loss": 0.2997,
"step": 10475
},
{
"epoch": 7.221343873517786,
"grad_norm": 0.8433165550231934,
"learning_rate": 4.449443413729128e-05,
"loss": 0.3008,
"step": 10500
},
{
"epoch": 7.221343873517786,
"eval_loss": 0.3265960216522217,
"eval_runtime": 151.3692,
"eval_samples_per_second": 8.093,
"eval_steps_per_second": 1.017,
"eval_wer": 0.53984739296312,
"step": 10500
},
{
"epoch": 7.238528956865441,
"grad_norm": 0.7506297826766968,
"learning_rate": 4.447993970315399e-05,
"loss": 0.2789,
"step": 10525
},
{
"epoch": 7.255714040213095,
"grad_norm": 0.4726732075214386,
"learning_rate": 4.44654452690167e-05,
"loss": 0.2734,
"step": 10550
},
{
"epoch": 7.272899123560749,
"grad_norm": 0.5341284275054932,
"learning_rate": 4.445095083487941e-05,
"loss": 0.2281,
"step": 10575
},
{
"epoch": 7.2900842069084035,
"grad_norm": 1.0162677764892578,
"learning_rate": 4.4436456400742116e-05,
"loss": 0.2581,
"step": 10600
},
{
"epoch": 7.307269290256058,
"grad_norm": 0.6543082594871521,
"learning_rate": 4.4421961966604823e-05,
"loss": 0.245,
"step": 10625
},
{
"epoch": 7.324454373603712,
"grad_norm": 0.46228596568107605,
"learning_rate": 4.440746753246753e-05,
"loss": 0.2835,
"step": 10650
},
{
"epoch": 7.341639456951366,
"grad_norm": 1.2381787300109863,
"learning_rate": 4.4392973098330245e-05,
"loss": 0.241,
"step": 10675
},
{
"epoch": 7.35882454029902,
"grad_norm": 0.6333453059196472,
"learning_rate": 4.437847866419295e-05,
"loss": 0.2691,
"step": 10700
},
{
"epoch": 7.376009623646675,
"grad_norm": 0.5812168121337891,
"learning_rate": 4.436398423005566e-05,
"loss": 0.2679,
"step": 10725
},
{
"epoch": 7.393194706994329,
"grad_norm": 0.4156525433063507,
"learning_rate": 4.434948979591837e-05,
"loss": 0.2355,
"step": 10750
},
{
"epoch": 7.410379790341983,
"grad_norm": 0.7613847255706787,
"learning_rate": 4.433499536178108e-05,
"loss": 0.2578,
"step": 10775
},
{
"epoch": 7.427564873689637,
"grad_norm": 0.4807249903678894,
"learning_rate": 4.432050092764379e-05,
"loss": 0.2873,
"step": 10800
},
{
"epoch": 7.444749957037292,
"grad_norm": 0.6777732372283936,
"learning_rate": 4.4306006493506496e-05,
"loss": 0.2319,
"step": 10825
},
{
"epoch": 7.461935040384946,
"grad_norm": 0.5690301060676575,
"learning_rate": 4.4291512059369204e-05,
"loss": 0.2933,
"step": 10850
},
{
"epoch": 7.4791201237326,
"grad_norm": 0.6938736438751221,
"learning_rate": 4.427701762523191e-05,
"loss": 0.25,
"step": 10875
},
{
"epoch": 7.496305207080255,
"grad_norm": 1.2933138608932495,
"learning_rate": 4.4262523191094626e-05,
"loss": 0.2932,
"step": 10900
},
{
"epoch": 7.513490290427908,
"grad_norm": 0.6283312439918518,
"learning_rate": 4.424802875695733e-05,
"loss": 0.2703,
"step": 10925
},
{
"epoch": 7.530675373775563,
"grad_norm": 0.6064092516899109,
"learning_rate": 4.423353432282004e-05,
"loss": 0.2762,
"step": 10950
},
{
"epoch": 7.5478604571232175,
"grad_norm": 0.6407192349433899,
"learning_rate": 4.421903988868275e-05,
"loss": 0.2477,
"step": 10975
},
{
"epoch": 7.565045540470871,
"grad_norm": 0.40185797214508057,
"learning_rate": 4.4204545454545455e-05,
"loss": 0.2674,
"step": 11000
},
{
"epoch": 7.565045540470871,
"eval_loss": 0.318492591381073,
"eval_runtime": 150.7169,
"eval_samples_per_second": 8.128,
"eval_steps_per_second": 1.022,
"eval_wer": 0.5379398050021196,
"step": 11000
},
{
"epoch": 7.565045540470871,
"step": 11000,
"total_flos": 8.071230701734987e+19,
"train_loss": 0.7137202631343494,
"train_runtime": 97593.296,
"train_samples_per_second": 28.619,
"train_steps_per_second": 0.894
}
],
"logging_steps": 25,
"max_steps": 87240,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.071230701734987e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}