Selest's picture
Upload folder using huggingface_hub
0329af1 verified
Invalid JSON:Unexpected token 'N', ..."ad_norm": NaN, "... is not valid JSON
{
"best_global_step": 22000,
"best_metric": 0.09801159451698542,
"best_model_checkpoint": "w2v-bert-urmi-out-v3/checkpoint-22000",
"epoch": 19.113814074717638,
"eval_steps": 500,
"global_step": 22000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.043440486533449174,
"grad_norm": NaN,
"learning_rate": 2.9400000000000002e-06,
"loss": 17.6344189453125,
"step": 50
},
{
"epoch": 0.08688097306689835,
"grad_norm": 41.381343841552734,
"learning_rate": 5.940000000000001e-06,
"loss": 8.653125,
"step": 100
},
{
"epoch": 0.13032145960034752,
"grad_norm": 25.690353393554688,
"learning_rate": 8.939999999999999e-06,
"loss": 6.376434936523437,
"step": 150
},
{
"epoch": 0.1737619461337967,
"grad_norm": 150.42007446289062,
"learning_rate": 1.1940000000000001e-05,
"loss": 5.640718994140625,
"step": 200
},
{
"epoch": 0.21720243266724587,
"grad_norm": 31.25239372253418,
"learning_rate": 1.4940000000000001e-05,
"loss": 4.098101806640625,
"step": 250
},
{
"epoch": 0.26064291920069504,
"grad_norm": 36.728145599365234,
"learning_rate": 1.794e-05,
"loss": 2.84884033203125,
"step": 300
},
{
"epoch": 0.3040834057341442,
"grad_norm": 19.722448348999023,
"learning_rate": 2.094e-05,
"loss": 2.7515597534179688,
"step": 350
},
{
"epoch": 0.3475238922675934,
"grad_norm": 72.23578643798828,
"learning_rate": 2.394e-05,
"loss": 2.2729856872558596,
"step": 400
},
{
"epoch": 0.39096437880104257,
"grad_norm": 29.115379333496094,
"learning_rate": 2.6940000000000003e-05,
"loss": 2.120067443847656,
"step": 450
},
{
"epoch": 0.43440486533449174,
"grad_norm": 41.9510612487793,
"learning_rate": 2.994e-05,
"loss": 2.0917138671875,
"step": 500
},
{
"epoch": 0.43440486533449174,
"eval_cer": 0.2925177439453866,
"eval_loss": 1.1169049739837646,
"eval_runtime": 41.0307,
"eval_samples_per_second": 24.323,
"eval_steps_per_second": 12.162,
"eval_wer": 0.860224586288416,
"step": 500
},
{
"epoch": 0.4778453518679409,
"grad_norm": 56.66565704345703,
"learning_rate": 2.995680282104026e-05,
"loss": 2.0159649658203125,
"step": 550
},
{
"epoch": 0.5212858384013901,
"grad_norm": 93.8177261352539,
"learning_rate": 2.9912724066999708e-05,
"loss": 2.4505059814453123,
"step": 600
},
{
"epoch": 0.5647263249348393,
"grad_norm": 21.54208755493164,
"learning_rate": 2.9868645312959155e-05,
"loss": 1.9825759887695313,
"step": 650
},
{
"epoch": 0.6081668114682884,
"grad_norm": 67.06742095947266,
"learning_rate": 2.9824566558918603e-05,
"loss": 1.764338836669922,
"step": 700
},
{
"epoch": 0.6516072980017377,
"grad_norm": 52.83509063720703,
"learning_rate": 2.978048780487805e-05,
"loss": 1.8609919738769531,
"step": 750
},
{
"epoch": 0.6950477845351868,
"grad_norm": 19.600238800048828,
"learning_rate": 2.9736409050837498e-05,
"loss": 1.800018310546875,
"step": 800
},
{
"epoch": 0.738488271068636,
"grad_norm": 131.72647094726562,
"learning_rate": 2.9692330296796945e-05,
"loss": 1.884906005859375,
"step": 850
},
{
"epoch": 0.7819287576020851,
"grad_norm": 90.5487060546875,
"learning_rate": 2.9648251542756393e-05,
"loss": 1.8686543273925782,
"step": 900
},
{
"epoch": 0.8253692441355344,
"grad_norm": 109.63233184814453,
"learning_rate": 2.960417278871584e-05,
"loss": 1.5970869445800782,
"step": 950
},
{
"epoch": 0.8688097306689835,
"grad_norm": 33.57727813720703,
"learning_rate": 2.9560094034675285e-05,
"loss": 1.7014007568359375,
"step": 1000
},
{
"epoch": 0.8688097306689835,
"eval_cer": 0.2367123584547868,
"eval_loss": 0.8549327850341797,
"eval_runtime": 36.6894,
"eval_samples_per_second": 27.201,
"eval_steps_per_second": 13.601,
"eval_wer": 0.7121749408983451,
"step": 1000
},
{
"epoch": 0.9122502172024327,
"grad_norm": 43.89970016479492,
"learning_rate": 2.9516015280634735e-05,
"loss": 1.5748690795898437,
"step": 1050
},
{
"epoch": 0.9556907037358818,
"grad_norm": 26.835451126098633,
"learning_rate": 2.947193652659418e-05,
"loss": 1.4466169738769532,
"step": 1100
},
{
"epoch": 0.9991311902693311,
"grad_norm": 56.00555419921875,
"learning_rate": 2.942785777255363e-05,
"loss": 1.6500205993652344,
"step": 1150
},
{
"epoch": 1.0425716768027802,
"grad_norm": 14.68392562866211,
"learning_rate": 2.9383779018513075e-05,
"loss": 1.589685821533203,
"step": 1200
},
{
"epoch": 1.0860121633362294,
"grad_norm": 43.6255989074707,
"learning_rate": 2.9339700264472526e-05,
"loss": 1.181229705810547,
"step": 1250
},
{
"epoch": 1.1294526498696786,
"grad_norm": 73.56681060791016,
"learning_rate": 2.929562151043197e-05,
"loss": 1.37734619140625,
"step": 1300
},
{
"epoch": 1.1728931364031276,
"grad_norm": 12.448647499084473,
"learning_rate": 2.925154275639142e-05,
"loss": 1.4884031677246095,
"step": 1350
},
{
"epoch": 1.2163336229365769,
"grad_norm": 68.00922393798828,
"learning_rate": 2.9207464002350868e-05,
"loss": 1.596350860595703,
"step": 1400
},
{
"epoch": 1.259774109470026,
"grad_norm": 39.86298370361328,
"learning_rate": 2.9163385248310316e-05,
"loss": 1.3065278625488281,
"step": 1450
},
{
"epoch": 1.3032145960034751,
"grad_norm": 15.449691772460938,
"learning_rate": 2.9119306494269763e-05,
"loss": 1.1519753265380859,
"step": 1500
},
{
"epoch": 1.3032145960034751,
"eval_cer": 0.2101099853714038,
"eval_loss": 0.790473997592926,
"eval_runtime": 35.6641,
"eval_samples_per_second": 27.983,
"eval_steps_per_second": 13.992,
"eval_wer": 0.6616430260047281,
"step": 1500
},
{
"epoch": 1.3466550825369243,
"grad_norm": 24.626739501953125,
"learning_rate": 2.907522774022921e-05,
"loss": 1.427688751220703,
"step": 1550
},
{
"epoch": 1.3900955690703736,
"grad_norm": 17.682024002075195,
"learning_rate": 2.9031148986188658e-05,
"loss": 1.4333070373535157,
"step": 1600
},
{
"epoch": 1.4335360556038228,
"grad_norm": 23.89002227783203,
"learning_rate": 2.8987070232148106e-05,
"loss": 1.2861351013183593,
"step": 1650
},
{
"epoch": 1.476976542137272,
"grad_norm": 30.092050552368164,
"learning_rate": 2.8942991478107553e-05,
"loss": 1.256303176879883,
"step": 1700
},
{
"epoch": 1.520417028670721,
"grad_norm": 18.956981658935547,
"learning_rate": 2.8898912724067e-05,
"loss": 1.2424105834960937,
"step": 1750
},
{
"epoch": 1.5638575152041703,
"grad_norm": 56.31697082519531,
"learning_rate": 2.8854833970026448e-05,
"loss": 1.1252889251708984,
"step": 1800
},
{
"epoch": 1.6072980017376195,
"grad_norm": 42.870338439941406,
"learning_rate": 2.8810755215985896e-05,
"loss": 1.0607293701171876,
"step": 1850
},
{
"epoch": 1.6507384882710685,
"grad_norm": 61.81471633911133,
"learning_rate": 2.8766676461945343e-05,
"loss": 1.077663116455078,
"step": 1900
},
{
"epoch": 1.694178974804518,
"grad_norm": 14.434207916259766,
"learning_rate": 2.872259770790479e-05,
"loss": 1.0206593322753905,
"step": 1950
},
{
"epoch": 1.737619461337967,
"grad_norm": 42.81059265136719,
"learning_rate": 2.8678518953864238e-05,
"loss": 1.1406269836425782,
"step": 2000
},
{
"epoch": 1.737619461337967,
"eval_cer": 0.18090697296418703,
"eval_loss": 0.7465401887893677,
"eval_runtime": 35.7694,
"eval_samples_per_second": 27.901,
"eval_steps_per_second": 13.95,
"eval_wer": 0.5706264775413712,
"step": 2000
},
{
"epoch": 1.7810599478714162,
"grad_norm": 10.328418731689453,
"learning_rate": 2.8634440199823686e-05,
"loss": 1.0407346343994142,
"step": 2050
},
{
"epoch": 1.8245004344048654,
"grad_norm": 57.201133728027344,
"learning_rate": 2.8590361445783133e-05,
"loss": 1.36724365234375,
"step": 2100
},
{
"epoch": 1.8679409209383144,
"grad_norm": 63.23184585571289,
"learning_rate": 2.854628269174258e-05,
"loss": 1.1011062622070313,
"step": 2150
},
{
"epoch": 1.9113814074717637,
"grad_norm": 12.780070304870605,
"learning_rate": 2.850220393770203e-05,
"loss": 1.0486875915527343,
"step": 2200
},
{
"epoch": 1.954821894005213,
"grad_norm": 15.590168952941895,
"learning_rate": 2.8458125183661476e-05,
"loss": 1.4535511779785155,
"step": 2250
},
{
"epoch": 1.998262380538662,
"grad_norm": 23.453882217407227,
"learning_rate": 2.8414046429620923e-05,
"loss": 1.2782644653320312,
"step": 2300
},
{
"epoch": 2.0417028670721113,
"grad_norm": 96.71955108642578,
"learning_rate": 2.836996767558037e-05,
"loss": 0.843255615234375,
"step": 2350
},
{
"epoch": 2.0851433536055604,
"grad_norm": 17.284881591796875,
"learning_rate": 2.832588892153982e-05,
"loss": 0.7496602630615234,
"step": 2400
},
{
"epoch": 2.1285838401390094,
"grad_norm": 19.61467742919922,
"learning_rate": 2.8281810167499266e-05,
"loss": 0.8246018218994141,
"step": 2450
},
{
"epoch": 2.172024326672459,
"grad_norm": 20.361276626586914,
"learning_rate": 2.8237731413458713e-05,
"loss": 1.0796304321289063,
"step": 2500
},
{
"epoch": 2.172024326672459,
"eval_cer": 0.16687435661266728,
"eval_loss": 0.6873559951782227,
"eval_runtime": 35.5627,
"eval_samples_per_second": 28.063,
"eval_steps_per_second": 14.032,
"eval_wer": 0.5325059101654847,
"step": 2500
},
{
"epoch": 2.215464813205908,
"grad_norm": 41.729736328125,
"learning_rate": 2.819365265941816e-05,
"loss": 0.8797608947753907,
"step": 2550
},
{
"epoch": 2.2589052997393573,
"grad_norm": 11.56946086883545,
"learning_rate": 2.814957390537761e-05,
"loss": 0.9233754730224609,
"step": 2600
},
{
"epoch": 2.3023457862728063,
"grad_norm": 13.053935050964355,
"learning_rate": 2.8105495151337056e-05,
"loss": 1.0405730438232421,
"step": 2650
},
{
"epoch": 2.3457862728062553,
"grad_norm": 27.34178352355957,
"learning_rate": 2.8061416397296503e-05,
"loss": 0.9149618530273438,
"step": 2700
},
{
"epoch": 2.3892267593397047,
"grad_norm": 32.077274322509766,
"learning_rate": 2.801733764325595e-05,
"loss": 0.9278230285644531,
"step": 2750
},
{
"epoch": 2.4326672458731537,
"grad_norm": 14.97318172454834,
"learning_rate": 2.79732588892154e-05,
"loss": 1.0948815155029297,
"step": 2800
},
{
"epoch": 2.4761077324066028,
"grad_norm": 0.24952514469623566,
"learning_rate": 2.7929180135174846e-05,
"loss": 0.7527609252929688,
"step": 2850
},
{
"epoch": 2.519548218940052,
"grad_norm": 42.21710205078125,
"learning_rate": 2.7885101381134294e-05,
"loss": 0.8132963562011719,
"step": 2900
},
{
"epoch": 2.562988705473501,
"grad_norm": 10.806293487548828,
"learning_rate": 2.784102262709374e-05,
"loss": 1.0475637817382812,
"step": 2950
},
{
"epoch": 2.6064291920069502,
"grad_norm": 24.606548309326172,
"learning_rate": 2.779694387305319e-05,
"loss": 0.8934781646728516,
"step": 3000
},
{
"epoch": 2.6064291920069502,
"eval_cer": 0.20637156634339274,
"eval_loss": 0.6298205256462097,
"eval_runtime": 35.3882,
"eval_samples_per_second": 28.201,
"eval_steps_per_second": 14.101,
"eval_wer": 0.567080378250591,
"step": 3000
},
{
"epoch": 2.6498696785403997,
"grad_norm": 34.39348220825195,
"learning_rate": 2.7752865119012636e-05,
"loss": 0.920788803100586,
"step": 3050
},
{
"epoch": 2.6933101650738487,
"grad_norm": 67.26911163330078,
"learning_rate": 2.7708786364972084e-05,
"loss": 0.8773422241210938,
"step": 3100
},
{
"epoch": 2.736750651607298,
"grad_norm": 62.36620330810547,
"learning_rate": 2.766470761093153e-05,
"loss": 1.0072268676757812,
"step": 3150
},
{
"epoch": 2.780191138140747,
"grad_norm": 28.642549514770508,
"learning_rate": 2.7620628856890982e-05,
"loss": 1.3133396911621094,
"step": 3200
},
{
"epoch": 2.8236316246741966,
"grad_norm": 20.24125862121582,
"learning_rate": 2.7576550102850426e-05,
"loss": 0.9312178802490234,
"step": 3250
},
{
"epoch": 2.8670721112076456,
"grad_norm": 87.64716339111328,
"learning_rate": 2.7532471348809877e-05,
"loss": 0.9268650817871094,
"step": 3300
},
{
"epoch": 2.9105125977410946,
"grad_norm": 10.313736915588379,
"learning_rate": 2.748839259476932e-05,
"loss": 0.8256442260742187,
"step": 3350
},
{
"epoch": 2.953953084274544,
"grad_norm": 31.871875762939453,
"learning_rate": 2.7444313840728772e-05,
"loss": 1.0591201782226562,
"step": 3400
},
{
"epoch": 2.997393570807993,
"grad_norm": 51.40370559692383,
"learning_rate": 2.7400235086688216e-05,
"loss": 1.0520962524414061,
"step": 3450
},
{
"epoch": 3.040834057341442,
"grad_norm": 7.437458515167236,
"learning_rate": 2.7356156332647667e-05,
"loss": 0.7271649169921875,
"step": 3500
},
{
"epoch": 3.040834057341442,
"eval_cer": 0.15457549981037005,
"eval_loss": 0.6895098090171814,
"eval_runtime": 35.2334,
"eval_samples_per_second": 28.325,
"eval_steps_per_second": 14.163,
"eval_wer": 0.5041371158392435,
"step": 3500
},
{
"epoch": 3.0842745438748915,
"grad_norm": 2.256615161895752,
"learning_rate": 2.731207757860711e-05,
"loss": 0.6253271865844726,
"step": 3550
},
{
"epoch": 3.1277150304083405,
"grad_norm": 24.19891357421875,
"learning_rate": 2.7267998824566562e-05,
"loss": 0.8554808807373047,
"step": 3600
},
{
"epoch": 3.1711555169417895,
"grad_norm": 25.919506072998047,
"learning_rate": 2.7223920070526006e-05,
"loss": 0.7264094543457031,
"step": 3650
},
{
"epoch": 3.214596003475239,
"grad_norm": 0.6518918871879578,
"learning_rate": 2.7179841316485454e-05,
"loss": 0.633333511352539,
"step": 3700
},
{
"epoch": 3.258036490008688,
"grad_norm": 36.61137390136719,
"learning_rate": 2.71357625624449e-05,
"loss": 0.8081251525878906,
"step": 3750
},
{
"epoch": 3.3014769765421375,
"grad_norm": 8.081766128540039,
"learning_rate": 2.709168380840435e-05,
"loss": 0.8773213195800781,
"step": 3800
},
{
"epoch": 3.3449174630755865,
"grad_norm": 24.704824447631836,
"learning_rate": 2.7047605054363796e-05,
"loss": 0.8086146545410157,
"step": 3850
},
{
"epoch": 3.3883579496090355,
"grad_norm": 0.8145921230316162,
"learning_rate": 2.7003526300323244e-05,
"loss": 0.6982787322998046,
"step": 3900
},
{
"epoch": 3.431798436142485,
"grad_norm": 37.42679214477539,
"learning_rate": 2.695944754628269e-05,
"loss": 0.7531932067871093,
"step": 3950
},
{
"epoch": 3.475238922675934,
"grad_norm": 32.73085403442383,
"learning_rate": 2.691536879224214e-05,
"loss": 0.6939554595947266,
"step": 4000
},
{
"epoch": 3.475238922675934,
"eval_cer": 0.1360459446280544,
"eval_loss": 0.6202276349067688,
"eval_runtime": 34.9968,
"eval_samples_per_second": 28.517,
"eval_steps_per_second": 14.258,
"eval_wer": 0.42671394799054374,
"step": 4000
},
{
"epoch": 3.5186794092093834,
"grad_norm": 0.5364285111427307,
"learning_rate": 2.6871290038201586e-05,
"loss": 0.7825308227539063,
"step": 4050
},
{
"epoch": 3.5621198957428324,
"grad_norm": 26.980627059936523,
"learning_rate": 2.6827211284161034e-05,
"loss": 0.7656624603271485,
"step": 4100
},
{
"epoch": 3.6055603822762814,
"grad_norm": 10.756477355957031,
"learning_rate": 2.6783132530120485e-05,
"loss": 0.7668492889404297,
"step": 4150
},
{
"epoch": 3.649000868809731,
"grad_norm": 13.8463773727417,
"learning_rate": 2.673905377607993e-05,
"loss": 0.7058528900146485,
"step": 4200
},
{
"epoch": 3.69244135534318,
"grad_norm": 18.059154510498047,
"learning_rate": 2.669497502203938e-05,
"loss": 0.7425822448730469,
"step": 4250
},
{
"epoch": 3.735881841876629,
"grad_norm": 14.087454795837402,
"learning_rate": 2.6650896267998824e-05,
"loss": 0.7796754455566406,
"step": 4300
},
{
"epoch": 3.7793223284100783,
"grad_norm": 4.631764888763428,
"learning_rate": 2.6606817513958275e-05,
"loss": 0.7211798095703125,
"step": 4350
},
{
"epoch": 3.8227628149435273,
"grad_norm": 0.7707765698432922,
"learning_rate": 2.656273875991772e-05,
"loss": 0.6928179168701172,
"step": 4400
},
{
"epoch": 3.8662033014769763,
"grad_norm": 0.21713215112686157,
"learning_rate": 2.651866000587717e-05,
"loss": 0.7452503204345703,
"step": 4450
},
{
"epoch": 3.909643788010426,
"grad_norm": 19.084728240966797,
"learning_rate": 2.6474581251836614e-05,
"loss": 0.6911500549316406,
"step": 4500
},
{
"epoch": 3.909643788010426,
"eval_cer": 0.14384786259955573,
"eval_loss": 0.6342427730560303,
"eval_runtime": 35.3212,
"eval_samples_per_second": 28.255,
"eval_steps_per_second": 14.128,
"eval_wer": 0.4435579196217494,
"step": 4500
},
{
"epoch": 3.953084274543875,
"grad_norm": 0.5245521068572998,
"learning_rate": 2.6430502497796065e-05,
"loss": 0.6018388748168946,
"step": 4550
},
{
"epoch": 3.996524761077324,
"grad_norm": 22.038259506225586,
"learning_rate": 2.638642374375551e-05,
"loss": 0.5375812149047852,
"step": 4600
},
{
"epoch": 4.039965247610773,
"grad_norm": 11.167423248291016,
"learning_rate": 2.634234498971496e-05,
"loss": 0.5369546508789063,
"step": 4650
},
{
"epoch": 4.083405734144223,
"grad_norm": 0.10995540767908096,
"learning_rate": 2.6298266235674404e-05,
"loss": 0.6637758636474609,
"step": 4700
},
{
"epoch": 4.126846220677671,
"grad_norm": 596.5354614257812,
"learning_rate": 2.6254187481633855e-05,
"loss": 0.5059263610839844,
"step": 4750
},
{
"epoch": 4.170286707211121,
"grad_norm": 49.69171905517578,
"learning_rate": 2.62101087275933e-05,
"loss": 0.5901547622680664,
"step": 4800
},
{
"epoch": 4.21372719374457,
"grad_norm": 0.014338035136461258,
"learning_rate": 2.616602997355275e-05,
"loss": 0.6108988571166992,
"step": 4850
},
{
"epoch": 4.257167680278019,
"grad_norm": 0.33344972133636475,
"learning_rate": 2.6121951219512194e-05,
"loss": 0.5445775985717773,
"step": 4900
},
{
"epoch": 4.300608166811468,
"grad_norm": 0.7429609298706055,
"learning_rate": 2.6077872465471645e-05,
"loss": 0.4996451187133789,
"step": 4950
},
{
"epoch": 4.344048653344918,
"grad_norm": 0.05325142666697502,
"learning_rate": 2.603379371143109e-05,
"loss": 0.5604157257080078,
"step": 5000
},
{
"epoch": 4.344048653344918,
"eval_cer": 0.1346914449802243,
"eval_loss": 0.5602818727493286,
"eval_runtime": 35.3794,
"eval_samples_per_second": 28.209,
"eval_steps_per_second": 14.104,
"eval_wer": 0.4231678486997636,
"step": 5000
},
{
"epoch": 4.387489139878367,
"grad_norm": 0.4052943289279938,
"learning_rate": 2.5989714957390537e-05,
"loss": 0.571678810119629,
"step": 5050
},
{
"epoch": 4.430929626411816,
"grad_norm": 7.114663600921631,
"learning_rate": 2.5945636203349988e-05,
"loss": 0.5940496826171875,
"step": 5100
},
{
"epoch": 4.474370112945265,
"grad_norm": 3.226045846939087,
"learning_rate": 2.5901557449309432e-05,
"loss": 0.48196929931640625,
"step": 5150
},
{
"epoch": 4.5178105994787146,
"grad_norm": 14.2632474899292,
"learning_rate": 2.5857478695268883e-05,
"loss": 0.4521299362182617,
"step": 5200
},
{
"epoch": 4.561251086012163,
"grad_norm": 11.746747016906738,
"learning_rate": 2.5813399941228327e-05,
"loss": 0.6751963806152343,
"step": 5250
},
{
"epoch": 4.604691572545613,
"grad_norm": 16.07468605041504,
"learning_rate": 2.5769321187187778e-05,
"loss": 0.45375862121582033,
"step": 5300
},
{
"epoch": 4.648132059079062,
"grad_norm": 11.938125610351562,
"learning_rate": 2.5725242433147222e-05,
"loss": 0.5193147277832031,
"step": 5350
},
{
"epoch": 4.691572545612511,
"grad_norm": 14.44975757598877,
"learning_rate": 2.5681163679106673e-05,
"loss": 0.5906137084960937,
"step": 5400
},
{
"epoch": 4.73501303214596,
"grad_norm": 0.14901815354824066,
"learning_rate": 2.5637084925066117e-05,
"loss": 0.5064856338500977,
"step": 5450
},
{
"epoch": 4.7784535186794095,
"grad_norm": 0.9449958801269531,
"learning_rate": 2.5593006171025568e-05,
"loss": 0.6325591278076171,
"step": 5500
},
{
"epoch": 4.7784535186794095,
"eval_cer": 0.1300319661916888,
"eval_loss": 0.5885463356971741,
"eval_runtime": 35.2148,
"eval_samples_per_second": 28.34,
"eval_steps_per_second": 14.17,
"eval_wer": 0.4078014184397163,
"step": 5500
},
{
"epoch": 4.821894005212858,
"grad_norm": 0.016951393336057663,
"learning_rate": 2.5548927416985012e-05,
"loss": 0.5301705551147461,
"step": 5550
},
{
"epoch": 4.8653344917463075,
"grad_norm": 31.289724349975586,
"learning_rate": 2.5504848662944463e-05,
"loss": 0.4865913009643555,
"step": 5600
},
{
"epoch": 4.908774978279757,
"grad_norm": 0.2750867009162903,
"learning_rate": 2.5460769908903907e-05,
"loss": 0.6053089523315429,
"step": 5650
},
{
"epoch": 4.9522154648132055,
"grad_norm": 0.15572036802768707,
"learning_rate": 2.5416691154863358e-05,
"loss": 0.44040061950683596,
"step": 5700
},
{
"epoch": 4.995655951346655,
"grad_norm": 1.536003828048706,
"learning_rate": 2.5372612400822802e-05,
"loss": 0.6176298141479493,
"step": 5750
},
{
"epoch": 5.039096437880104,
"grad_norm": 3.888091564178467,
"learning_rate": 2.5328533646782253e-05,
"loss": 0.6050854873657227,
"step": 5800
},
{
"epoch": 5.082536924413553,
"grad_norm": 1.9056124687194824,
"learning_rate": 2.5284454892741697e-05,
"loss": 0.5753683090209961,
"step": 5850
},
{
"epoch": 5.125977410947002,
"grad_norm": 0.10637835413217545,
"learning_rate": 2.5240376138701148e-05,
"loss": 0.47484302520751953,
"step": 5900
},
{
"epoch": 5.169417897480452,
"grad_norm": 4.4535441398620605,
"learning_rate": 2.5196297384660595e-05,
"loss": 0.34999225616455076,
"step": 5950
},
{
"epoch": 5.212858384013901,
"grad_norm": 0.6373205780982971,
"learning_rate": 2.5152218630620043e-05,
"loss": 0.38846492767333984,
"step": 6000
},
{
"epoch": 5.212858384013901,
"eval_cer": 0.1255350273608929,
"eval_loss": 0.6293100118637085,
"eval_runtime": 35.1024,
"eval_samples_per_second": 28.431,
"eval_steps_per_second": 14.216,
"eval_wer": 0.3983451536643026,
"step": 6000
},
{
"epoch": 5.25629887054735,
"grad_norm": 0.3430880010128021,
"learning_rate": 2.510813987657949e-05,
"loss": 0.3233113479614258,
"step": 6050
},
{
"epoch": 5.299739357080799,
"grad_norm": 0.023547176271677017,
"learning_rate": 2.5064061122538938e-05,
"loss": 0.5810712432861328,
"step": 6100
},
{
"epoch": 5.343179843614249,
"grad_norm": 0.45001161098480225,
"learning_rate": 2.5019982368498385e-05,
"loss": 0.31497194290161135,
"step": 6150
},
{
"epoch": 5.386620330147697,
"grad_norm": 0.07451729476451874,
"learning_rate": 2.4975903614457833e-05,
"loss": 0.3424281311035156,
"step": 6200
},
{
"epoch": 5.430060816681147,
"grad_norm": 102.05135345458984,
"learning_rate": 2.493182486041728e-05,
"loss": 0.42556037902832033,
"step": 6250
},
{
"epoch": 5.473501303214596,
"grad_norm": 1.4394115209579468,
"learning_rate": 2.4887746106376728e-05,
"loss": 0.39192684173583986,
"step": 6300
},
{
"epoch": 5.516941789748045,
"grad_norm": 2.0899856090545654,
"learning_rate": 2.4843667352336176e-05,
"loss": 0.4351010513305664,
"step": 6350
},
{
"epoch": 5.560382276281494,
"grad_norm": 0.12065482884645462,
"learning_rate": 2.479958859829562e-05,
"loss": 0.508093376159668,
"step": 6400
},
{
"epoch": 5.603822762814944,
"grad_norm": 0.041007447987794876,
"learning_rate": 2.475550984425507e-05,
"loss": 0.4111709213256836,
"step": 6450
},
{
"epoch": 5.647263249348393,
"grad_norm": 14.792854309082031,
"learning_rate": 2.4711431090214515e-05,
"loss": 0.34780517578125,
"step": 6500
},
{
"epoch": 5.647263249348393,
"eval_cer": 0.1270520669664626,
"eval_loss": 0.6646775007247925,
"eval_runtime": 35.5266,
"eval_samples_per_second": 28.092,
"eval_steps_per_second": 14.046,
"eval_wer": 0.3980496453900709,
"step": 6500
},
{
"epoch": 5.690703735881842,
"grad_norm": 28.922000885009766,
"learning_rate": 2.4667352336173966e-05,
"loss": 0.4107795715332031,
"step": 6550
},
{
"epoch": 5.734144222415291,
"grad_norm": 0.07848715782165527,
"learning_rate": 2.462327358213341e-05,
"loss": 0.5832571029663086,
"step": 6600
},
{
"epoch": 5.777584708948741,
"grad_norm": 19.316383361816406,
"learning_rate": 2.457919482809286e-05,
"loss": 0.41104129791259764,
"step": 6650
},
{
"epoch": 5.821025195482189,
"grad_norm": 0.20225679874420166,
"learning_rate": 2.4535116074052305e-05,
"loss": 0.4999349975585938,
"step": 6700
},
{
"epoch": 5.864465682015639,
"grad_norm": 0.04317609593272209,
"learning_rate": 2.4491037320011756e-05,
"loss": 0.5584917449951172,
"step": 6750
},
{
"epoch": 5.907906168549088,
"grad_norm": 0.3524606227874756,
"learning_rate": 2.44469585659712e-05,
"loss": 0.4921522521972656,
"step": 6800
},
{
"epoch": 5.951346655082537,
"grad_norm": 29.436384201049805,
"learning_rate": 2.440287981193065e-05,
"loss": 0.5514765548706054,
"step": 6850
},
{
"epoch": 5.994787141615986,
"grad_norm": 0.23278824985027313,
"learning_rate": 2.4358801057890098e-05,
"loss": 0.3556842422485352,
"step": 6900
},
{
"epoch": 6.038227628149436,
"grad_norm": 0.08552414178848267,
"learning_rate": 2.4314722303849546e-05,
"loss": 0.33310401916503907,
"step": 6950
},
{
"epoch": 6.081668114682884,
"grad_norm": 11.057211875915527,
"learning_rate": 2.4270643549808993e-05,
"loss": 0.36625064849853517,
"step": 7000
},
{
"epoch": 6.081668114682884,
"eval_cer": 0.12483068754402124,
"eval_loss": 0.6519187688827515,
"eval_runtime": 35.4455,
"eval_samples_per_second": 28.156,
"eval_steps_per_second": 14.078,
"eval_wer": 0.3945035460992908,
"step": 7000
},
{
"epoch": 6.125108601216334,
"grad_norm": 0.06223779171705246,
"learning_rate": 2.422656479576844e-05,
"loss": 0.4004500198364258,
"step": 7050
},
{
"epoch": 6.168549087749783,
"grad_norm": 0.009129839017987251,
"learning_rate": 2.4182486041727888e-05,
"loss": 0.26837165832519533,
"step": 7100
},
{
"epoch": 6.211989574283232,
"grad_norm": 1.0068172216415405,
"learning_rate": 2.4138407287687336e-05,
"loss": 0.3842990112304687,
"step": 7150
},
{
"epoch": 6.255430060816681,
"grad_norm": 0.03263875097036362,
"learning_rate": 2.4094328533646783e-05,
"loss": 0.46779460906982423,
"step": 7200
},
{
"epoch": 6.2988705473501305,
"grad_norm": 0.025848915800452232,
"learning_rate": 2.405024977960623e-05,
"loss": 0.46671478271484373,
"step": 7250
},
{
"epoch": 6.342311033883579,
"grad_norm": 0.032335590571165085,
"learning_rate": 2.4006171025565678e-05,
"loss": 0.2948387336730957,
"step": 7300
},
{
"epoch": 6.3857515204170285,
"grad_norm": 0.07902107387781143,
"learning_rate": 2.3962092271525126e-05,
"loss": 0.2986873435974121,
"step": 7350
},
{
"epoch": 6.429192006950478,
"grad_norm": 1.8951733112335205,
"learning_rate": 2.3918013517484573e-05,
"loss": 0.48029232025146484,
"step": 7400
},
{
"epoch": 6.4726324934839266,
"grad_norm": 0.1298227608203888,
"learning_rate": 2.387393476344402e-05,
"loss": 0.45991172790527346,
"step": 7450
},
{
"epoch": 6.516072980017376,
"grad_norm": 8.462530136108398,
"learning_rate": 2.382985600940347e-05,
"loss": 0.3584669876098633,
"step": 7500
},
{
"epoch": 6.516072980017376,
"eval_cer": 0.12033374871322533,
"eval_loss": 0.6832783818244934,
"eval_runtime": 35.4555,
"eval_samples_per_second": 28.148,
"eval_steps_per_second": 14.074,
"eval_wer": 0.3844562647754137,
"step": 7500
},
{
"epoch": 6.5595134665508255,
"grad_norm": 0.03215891495347023,
"learning_rate": 2.3785777255362916e-05,
"loss": 0.3052578163146973,
"step": 7550
},
{
"epoch": 6.602953953084275,
"grad_norm": 3.956105947494507,
"learning_rate": 2.3741698501322363e-05,
"loss": 0.3200105667114258,
"step": 7600
},
{
"epoch": 6.6463944396177235,
"grad_norm": 10.573678016662598,
"learning_rate": 2.369761974728181e-05,
"loss": 0.3665552520751953,
"step": 7650
},
{
"epoch": 6.689834926151173,
"grad_norm": 2.567551374435425,
"learning_rate": 2.365354099324126e-05,
"loss": 0.3944419479370117,
"step": 7700
},
{
"epoch": 6.733275412684622,
"grad_norm": 2.1139237880706787,
"learning_rate": 2.3609462239200703e-05,
"loss": 0.36841018676757814,
"step": 7750
},
{
"epoch": 6.776715899218071,
"grad_norm": 0.01942128874361515,
"learning_rate": 2.3565383485160153e-05,
"loss": 0.3560383987426758,
"step": 7800
},
{
"epoch": 6.82015638575152,
"grad_norm": 1.474857211112976,
"learning_rate": 2.35213047311196e-05,
"loss": 0.46931259155273436,
"step": 7850
},
{
"epoch": 6.86359687228497,
"grad_norm": 24.063940048217773,
"learning_rate": 2.347722597707905e-05,
"loss": 0.3131961250305176,
"step": 7900
},
{
"epoch": 6.907037358818418,
"grad_norm": 0.024980274960398674,
"learning_rate": 2.3433147223038496e-05,
"loss": 0.3744655609130859,
"step": 7950
},
{
"epoch": 6.950477845351868,
"grad_norm": 19.342248916625977,
"learning_rate": 2.3389068468997944e-05,
"loss": 0.38378406524658204,
"step": 8000
},
{
"epoch": 6.950477845351868,
"eval_cer": 0.14720702172617436,
"eval_loss": 0.6338760852813721,
"eval_runtime": 35.924,
"eval_samples_per_second": 27.781,
"eval_steps_per_second": 13.89,
"eval_wer": 0.40573286052009455,
"step": 8000
},
{
"epoch": 6.993918331885317,
"grad_norm": 3.440767765045166,
"learning_rate": 2.334498971495739e-05,
"loss": 0.44374298095703124,
"step": 8050
},
{
"epoch": 7.037358818418766,
"grad_norm": 78.23323822021484,
"learning_rate": 2.330091096091684e-05,
"loss": 0.4143082809448242,
"step": 8100
},
{
"epoch": 7.080799304952215,
"grad_norm": 20.11145782470703,
"learning_rate": 2.3256832206876286e-05,
"loss": 0.2681180191040039,
"step": 8150
},
{
"epoch": 7.124239791485665,
"grad_norm": 4.221235275268555,
"learning_rate": 2.3212753452835734e-05,
"loss": 0.33470783233642576,
"step": 8200
},
{
"epoch": 7.167680278019114,
"grad_norm": 0.00418456643819809,
"learning_rate": 2.316867469879518e-05,
"loss": 0.29219053268432615,
"step": 8250
},
{
"epoch": 7.211120764552563,
"grad_norm": 48.96384048461914,
"learning_rate": 2.312459594475463e-05,
"loss": 0.2650064277648926,
"step": 8300
},
{
"epoch": 7.254561251086012,
"grad_norm": 0.4012812077999115,
"learning_rate": 2.3080517190714076e-05,
"loss": 0.2377411651611328,
"step": 8350
},
{
"epoch": 7.298001737619462,
"grad_norm": 0.04035955294966698,
"learning_rate": 2.3036438436673524e-05,
"loss": 0.39625030517578125,
"step": 8400
},
{
"epoch": 7.34144222415291,
"grad_norm": 0.015255268663167953,
"learning_rate": 2.299235968263297e-05,
"loss": 0.29354951858520506,
"step": 8450
},
{
"epoch": 7.38488271068636,
"grad_norm": 0.1737648993730545,
"learning_rate": 2.294828092859242e-05,
"loss": 0.43962146759033205,
"step": 8500
},
{
"epoch": 7.38488271068636,
"eval_cer": 0.11811236929078399,
"eval_loss": 0.6835731863975525,
"eval_runtime": 35.5953,
"eval_samples_per_second": 28.037,
"eval_steps_per_second": 14.019,
"eval_wer": 0.37056737588652483,
"step": 8500
},
{
"epoch": 7.428323197219809,
"grad_norm": 26.738134384155273,
"learning_rate": 2.2904202174551866e-05,
"loss": 0.32676326751708984,
"step": 8550
},
{
"epoch": 7.471763683753258,
"grad_norm": 91.86631774902344,
"learning_rate": 2.2860123420511314e-05,
"loss": 0.27230093002319333,
"step": 8600
},
{
"epoch": 7.515204170286707,
"grad_norm": 0.2025415152311325,
"learning_rate": 2.281604466647076e-05,
"loss": 0.34510005950927736,
"step": 8650
},
{
"epoch": 7.558644656820157,
"grad_norm": 0.06521395593881607,
"learning_rate": 2.2771965912430212e-05,
"loss": 0.31739959716796873,
"step": 8700
},
{
"epoch": 7.602085143353605,
"grad_norm": 0.023135656490921974,
"learning_rate": 2.2727887158389656e-05,
"loss": 0.4019832992553711,
"step": 8750
},
{
"epoch": 7.645525629887055,
"grad_norm": 0.0029301783069968224,
"learning_rate": 2.2683808404349107e-05,
"loss": 0.3610734558105469,
"step": 8800
},
{
"epoch": 7.688966116420504,
"grad_norm": 155.0016326904297,
"learning_rate": 2.263972965030855e-05,
"loss": 0.4037496566772461,
"step": 8850
},
{
"epoch": 7.732406602953953,
"grad_norm": 25.407201766967773,
"learning_rate": 2.2595650896268002e-05,
"loss": 0.22004886627197265,
"step": 8900
},
{
"epoch": 7.775847089487402,
"grad_norm": 0.04883955046534538,
"learning_rate": 2.2551572142227446e-05,
"loss": 0.4110527420043945,
"step": 8950
},
{
"epoch": 7.819287576020852,
"grad_norm": 6.237477779388428,
"learning_rate": 2.2507493388186897e-05,
"loss": 0.21914356231689452,
"step": 9000
},
{
"epoch": 7.819287576020852,
"eval_cer": 0.11708294955843311,
"eval_loss": 0.6818587183952332,
"eval_runtime": 35.3665,
"eval_samples_per_second": 28.219,
"eval_steps_per_second": 14.109,
"eval_wer": 0.37706855791962174,
"step": 9000
},
{
"epoch": 7.8627280625543,
"grad_norm": 2.864680528640747,
"learning_rate": 2.246341463414634e-05,
"loss": 0.3091525459289551,
"step": 9050
},
{
"epoch": 7.90616854908775,
"grad_norm": 0.009744558483362198,
"learning_rate": 2.241933588010579e-05,
"loss": 0.18931781768798828,
"step": 9100
},
{
"epoch": 7.949609035621199,
"grad_norm": 0.018469370901584625,
"learning_rate": 2.2375257126065236e-05,
"loss": 0.25914777755737306,
"step": 9150
},
{
"epoch": 7.9930495221546485,
"grad_norm": 0.11463995277881622,
"learning_rate": 2.2331178372024684e-05,
"loss": 0.3924109649658203,
"step": 9200
},
{
"epoch": 8.036490008688098,
"grad_norm": 0.18527474999427795,
"learning_rate": 2.228709961798413e-05,
"loss": 0.4139134979248047,
"step": 9250
},
{
"epoch": 8.079930495221546,
"grad_norm": 0.012078936211764812,
"learning_rate": 2.224302086394358e-05,
"loss": 0.20489992141723634,
"step": 9300
},
{
"epoch": 8.123370981754995,
"grad_norm": 0.026449766010046005,
"learning_rate": 2.2198942109903026e-05,
"loss": 0.23680988311767578,
"step": 9350
},
{
"epoch": 8.166811468288445,
"grad_norm": 1.4742465019226074,
"learning_rate": 2.2154863355862474e-05,
"loss": 0.2400914192199707,
"step": 9400
},
{
"epoch": 8.210251954821894,
"grad_norm": 0.0015448889462277293,
"learning_rate": 2.211078460182192e-05,
"loss": 0.31873985290527346,
"step": 9450
},
{
"epoch": 8.253692441355343,
"grad_norm": 0.6546465158462524,
"learning_rate": 2.206670584778137e-05,
"loss": 0.16370586395263673,
"step": 9500
},
{
"epoch": 8.253692441355343,
"eval_cer": 0.11431977027685973,
"eval_loss": 0.722854495048523,
"eval_runtime": 34.9616,
"eval_samples_per_second": 28.546,
"eval_steps_per_second": 14.273,
"eval_wer": 0.3602245862884161,
"step": 9500
},
{
"epoch": 8.297132927888793,
"grad_norm": 0.0220937579870224,
"learning_rate": 2.2022627093740816e-05,
"loss": 0.16179698944091797,
"step": 9550
},
{
"epoch": 8.340573414422241,
"grad_norm": 0.8495884537696838,
"learning_rate": 2.1978548339700264e-05,
"loss": 0.2629365348815918,
"step": 9600
},
{
"epoch": 8.38401390095569,
"grad_norm": 0.9506490230560303,
"learning_rate": 2.1934469585659715e-05,
"loss": 0.2445651626586914,
"step": 9650
},
{
"epoch": 8.42745438748914,
"grad_norm": 0.26221564412117004,
"learning_rate": 2.189039083161916e-05,
"loss": 0.20401872634887697,
"step": 9700
},
{
"epoch": 8.470894874022589,
"grad_norm": 0.00027192034758627415,
"learning_rate": 2.184631207757861e-05,
"loss": 0.2544666290283203,
"step": 9750
},
{
"epoch": 8.514335360556037,
"grad_norm": 0.04336933791637421,
"learning_rate": 2.1802233323538054e-05,
"loss": 0.3723867797851563,
"step": 9800
},
{
"epoch": 8.557775847089488,
"grad_norm": 0.14333416521549225,
"learning_rate": 2.1758154569497505e-05,
"loss": 0.25252397537231447,
"step": 9850
},
{
"epoch": 8.601216333622936,
"grad_norm": 0.31019526720046997,
"learning_rate": 2.171407581545695e-05,
"loss": 0.23379629135131835,
"step": 9900
},
{
"epoch": 8.644656820156385,
"grad_norm": 0.9922002553939819,
"learning_rate": 2.16699970614164e-05,
"loss": 0.3892123031616211,
"step": 9950
},
{
"epoch": 8.688097306689835,
"grad_norm": 0.00887572392821312,
"learning_rate": 2.1625918307375844e-05,
"loss": 0.2666620254516602,
"step": 10000
},
{
"epoch": 8.688097306689835,
"eval_cer": 0.11231511079807119,
"eval_loss": 0.7393125891685486,
"eval_runtime": 35.3264,
"eval_samples_per_second": 28.251,
"eval_steps_per_second": 14.125,
"eval_wer": 0.35726950354609927,
"step": 10000
},
{
"epoch": 8.731537793223284,
"grad_norm": 0.017117468640208244,
"learning_rate": 2.1581839553335295e-05,
"loss": 0.18703149795532226,
"step": 10050
},
{
"epoch": 8.774978279756734,
"grad_norm": 0.20450972020626068,
"learning_rate": 2.153776079929474e-05,
"loss": 0.16164979934692383,
"step": 10100
},
{
"epoch": 8.818418766290183,
"grad_norm": 0.00887273158878088,
"learning_rate": 2.149368204525419e-05,
"loss": 0.2993427085876465,
"step": 10150
},
{
"epoch": 8.861859252823631,
"grad_norm": 0.00210910034365952,
"learning_rate": 2.1449603291213634e-05,
"loss": 0.2953006172180176,
"step": 10200
},
{
"epoch": 8.90529973935708,
"grad_norm": 0.0051006837747991085,
"learning_rate": 2.1405524537173085e-05,
"loss": 0.24485448837280274,
"step": 10250
},
{
"epoch": 8.94874022589053,
"grad_norm": 0.5796188712120056,
"learning_rate": 2.136144578313253e-05,
"loss": 0.3098959159851074,
"step": 10300
},
{
"epoch": 8.992180712423979,
"grad_norm": 0.01159872580319643,
"learning_rate": 2.131736702909198e-05,
"loss": 0.27299707412719726,
"step": 10350
},
{
"epoch": 9.035621198957429,
"grad_norm": 2.516123056411743,
"learning_rate": 2.1273288275051424e-05,
"loss": 0.25595357894897464,
"step": 10400
},
{
"epoch": 9.079061685490878,
"grad_norm": 0.0016837273724377155,
"learning_rate": 2.1229209521010872e-05,
"loss": 0.2204635238647461,
"step": 10450
},
{
"epoch": 9.122502172024326,
"grad_norm": 0.004055003169924021,
"learning_rate": 2.118513076697032e-05,
"loss": 0.29069057464599607,
"step": 10500
},
{
"epoch": 9.122502172024326,
"eval_cer": 0.1140488703472937,
"eval_loss": 0.7343300580978394,
"eval_runtime": 35.4446,
"eval_samples_per_second": 28.157,
"eval_steps_per_second": 14.078,
"eval_wer": 0.3472222222222222,
"step": 10500
},
{
"epoch": 9.165942658557777,
"grad_norm": 10.814416885375977,
"learning_rate": 2.1141052012929767e-05,
"loss": 0.22457393646240234,
"step": 10550
},
{
"epoch": 9.209383145091225,
"grad_norm": 0.531550407409668,
"learning_rate": 2.1096973258889218e-05,
"loss": 0.19709733963012696,
"step": 10600
},
{
"epoch": 9.252823631624674,
"grad_norm": 0.02372005581855774,
"learning_rate": 2.1052894504848662e-05,
"loss": 0.21724346160888672,
"step": 10650
},
{
"epoch": 9.296264118158124,
"grad_norm": 0.003351462772116065,
"learning_rate": 2.1008815750808113e-05,
"loss": 0.2724002838134766,
"step": 10700
},
{
"epoch": 9.339704604691573,
"grad_norm": 0.2525140047073364,
"learning_rate": 2.0964736996767557e-05,
"loss": 0.23882347106933594,
"step": 10750
},
{
"epoch": 9.383145091225021,
"grad_norm": 0.14738580584526062,
"learning_rate": 2.0920658242727008e-05,
"loss": 0.1564232349395752,
"step": 10800
},
{
"epoch": 9.426585577758472,
"grad_norm": 0.10283453017473221,
"learning_rate": 2.0876579488686452e-05,
"loss": 0.14069479942321778,
"step": 10850
},
{
"epoch": 9.47002606429192,
"grad_norm": 0.07120943069458008,
"learning_rate": 2.0832500734645903e-05,
"loss": 0.20460891723632812,
"step": 10900
},
{
"epoch": 9.513466550825369,
"grad_norm": 0.229303777217865,
"learning_rate": 2.0788421980605347e-05,
"loss": 0.29092355728149416,
"step": 10950
},
{
"epoch": 9.556907037358819,
"grad_norm": 0.011797781102359295,
"learning_rate": 2.0744343226564798e-05,
"loss": 0.14928483963012695,
"step": 11000
},
{
"epoch": 9.556907037358819,
"eval_cer": 0.11589098986834263,
"eval_loss": 0.6468539237976074,
"eval_runtime": 35.2293,
"eval_samples_per_second": 28.329,
"eval_steps_per_second": 14.164,
"eval_wer": 0.366725768321513,
"step": 11000
},
{
"epoch": 9.600347523892268,
"grad_norm": 0.0010864798678085208,
"learning_rate": 2.0700264472524242e-05,
"loss": 0.22057802200317383,
"step": 11050
},
{
"epoch": 9.643788010425716,
"grad_norm": 0.00047053879825398326,
"learning_rate": 2.0656185718483693e-05,
"loss": 0.1952187156677246,
"step": 11100
},
{
"epoch": 9.687228496959166,
"grad_norm": 0.004543100483715534,
"learning_rate": 2.0612106964443137e-05,
"loss": 0.3174121856689453,
"step": 11150
},
{
"epoch": 9.730668983492615,
"grad_norm": 0.0010513780871406198,
"learning_rate": 2.0568028210402588e-05,
"loss": 0.16007177352905275,
"step": 11200
},
{
"epoch": 9.774109470026064,
"grad_norm": 0.0026681029703468084,
"learning_rate": 2.0523949456362032e-05,
"loss": 0.2065435218811035,
"step": 11250
},
{
"epoch": 9.817549956559514,
"grad_norm": 0.2069607824087143,
"learning_rate": 2.0479870702321483e-05,
"loss": 0.22219644546508788,
"step": 11300
},
{
"epoch": 9.860990443092962,
"grad_norm": 0.012031909078359604,
"learning_rate": 2.0435791948280927e-05,
"loss": 0.1956252098083496,
"step": 11350
},
{
"epoch": 9.904430929626411,
"grad_norm": 0.0008321640198118985,
"learning_rate": 2.0391713194240378e-05,
"loss": 0.3007790565490723,
"step": 11400
},
{
"epoch": 9.947871416159861,
"grad_norm": 0.00023682558094151318,
"learning_rate": 2.0347634440199825e-05,
"loss": 0.25140411376953126,
"step": 11450
},
{
"epoch": 9.99131190269331,
"grad_norm": 0.18799935281276703,
"learning_rate": 2.0303555686159273e-05,
"loss": 0.20868509292602538,
"step": 11500
},
{
"epoch": 9.99131190269331,
"eval_cer": 0.11296527062902964,
"eval_loss": 0.871296226978302,
"eval_runtime": 35.7614,
"eval_samples_per_second": 27.907,
"eval_steps_per_second": 13.954,
"eval_wer": 0.35786052009456265,
"step": 11500
},
{
"epoch": 10.034752389226758,
"grad_norm": 0.001729931216686964,
"learning_rate": 2.025947693211872e-05,
"loss": 0.2267488098144531,
"step": 11550
},
{
"epoch": 10.078192875760209,
"grad_norm": 0.0033665213268250227,
"learning_rate": 2.0215398178078168e-05,
"loss": 0.11680364608764648,
"step": 11600
},
{
"epoch": 10.121633362293657,
"grad_norm": 0.000843276153318584,
"learning_rate": 2.0171319424037616e-05,
"loss": 0.20855466842651368,
"step": 11650
},
{
"epoch": 10.165073848827106,
"grad_norm": 0.0007557457429356873,
"learning_rate": 2.0127240669997063e-05,
"loss": 0.17802534103393555,
"step": 11700
},
{
"epoch": 10.208514335360556,
"grad_norm": 0.08655949681997299,
"learning_rate": 2.008316191595651e-05,
"loss": 0.14240021705627443,
"step": 11750
},
{
"epoch": 10.251954821894005,
"grad_norm": 55.311119079589844,
"learning_rate": 2.0039083161915955e-05,
"loss": 0.3166378211975098,
"step": 11800
},
{
"epoch": 10.295395308427455,
"grad_norm": 0.04812853783369064,
"learning_rate": 1.9995004407875406e-05,
"loss": 0.1832990837097168,
"step": 11850
},
{
"epoch": 10.338835794960904,
"grad_norm": 4.462372303009033,
"learning_rate": 1.995092565383485e-05,
"loss": 0.1998225212097168,
"step": 11900
},
{
"epoch": 10.382276281494352,
"grad_norm": 0.030581099912524223,
"learning_rate": 1.99068468997943e-05,
"loss": 0.1649586296081543,
"step": 11950
},
{
"epoch": 10.425716768027803,
"grad_norm": 0.0061181094497442245,
"learning_rate": 1.9862768145753745e-05,
"loss": 0.21640779495239257,
"step": 12000
},
{
"epoch": 10.425716768027803,
"eval_cer": 0.11507829007964458,
"eval_loss": 0.7006326913833618,
"eval_runtime": 35.3681,
"eval_samples_per_second": 28.218,
"eval_steps_per_second": 14.109,
"eval_wer": 0.3489952718676123,
"step": 12000
},
{
"epoch": 10.469157254561251,
"grad_norm": 9.876059532165527,
"learning_rate": 1.9818689391713196e-05,
"loss": 0.24267179489135743,
"step": 12050
},
{
"epoch": 10.5125977410947,
"grad_norm": 0.017044102773070335,
"learning_rate": 1.977461063767264e-05,
"loss": 0.17035614013671874,
"step": 12100
},
{
"epoch": 10.55603822762815,
"grad_norm": 0.0013389646774157882,
"learning_rate": 1.973053188363209e-05,
"loss": 0.154972562789917,
"step": 12150
},
{
"epoch": 10.599478714161599,
"grad_norm": 0.0071999249048531055,
"learning_rate": 1.9686453129591535e-05,
"loss": 0.08084283828735352,
"step": 12200
},
{
"epoch": 10.642919200695047,
"grad_norm": 0.29191315174102783,
"learning_rate": 1.9642374375550986e-05,
"loss": 0.2428382682800293,
"step": 12250
},
{
"epoch": 10.686359687228498,
"grad_norm": 2.9929769039154053,
"learning_rate": 1.959829562151043e-05,
"loss": 0.165596923828125,
"step": 12300
},
{
"epoch": 10.729800173761946,
"grad_norm": 0.6568811535835266,
"learning_rate": 1.955421686746988e-05,
"loss": 0.24114521026611327,
"step": 12350
},
{
"epoch": 10.773240660295395,
"grad_norm": 0.00521878432482481,
"learning_rate": 1.9510138113429328e-05,
"loss": 0.13222161293029785,
"step": 12400
},
{
"epoch": 10.816681146828845,
"grad_norm": 0.004102786537259817,
"learning_rate": 1.9466059359388776e-05,
"loss": 0.2611697006225586,
"step": 12450
},
{
"epoch": 10.860121633362294,
"grad_norm": 0.009258633479475975,
"learning_rate": 1.9421980605348223e-05,
"loss": 0.17743043899536132,
"step": 12500
},
{
"epoch": 10.860121633362294,
"eval_cer": 0.11280273067129003,
"eval_loss": 0.8380096554756165,
"eval_runtime": 35.6349,
"eval_samples_per_second": 28.006,
"eval_steps_per_second": 14.003,
"eval_wer": 0.3549054373522459,
"step": 12500
},
{
"epoch": 10.903562119895742,
"grad_norm": 0.010119021870195866,
"learning_rate": 1.937790185130767e-05,
"loss": 0.2513529586791992,
"step": 12550
},
{
"epoch": 10.947002606429193,
"grad_norm": 0.06954587996006012,
"learning_rate": 1.9333823097267118e-05,
"loss": 0.18938690185546875,
"step": 12600
},
{
"epoch": 10.990443092962641,
"grad_norm": 0.012158134952187538,
"learning_rate": 1.9289744343226566e-05,
"loss": 0.1424751377105713,
"step": 12650
},
{
"epoch": 11.03388357949609,
"grad_norm": 0.35711684823036194,
"learning_rate": 1.9245665589186013e-05,
"loss": 0.22175674438476561,
"step": 12700
},
{
"epoch": 11.07732406602954,
"grad_norm": 0.029316997155547142,
"learning_rate": 1.920158683514546e-05,
"loss": 0.09745993614196777,
"step": 12750
},
{
"epoch": 11.120764552562989,
"grad_norm": 0.0045172832906246185,
"learning_rate": 1.915750808110491e-05,
"loss": 0.14967589378356932,
"step": 12800
},
{
"epoch": 11.164205039096437,
"grad_norm": 0.1485351026058197,
"learning_rate": 1.9113429327064356e-05,
"loss": 0.15214619636535645,
"step": 12850
},
{
"epoch": 11.207645525629887,
"grad_norm": 0.013465415686368942,
"learning_rate": 1.9069350573023803e-05,
"loss": 0.20777603149414062,
"step": 12900
},
{
"epoch": 11.251086012163336,
"grad_norm": 0.003324932884424925,
"learning_rate": 1.902527181898325e-05,
"loss": 0.14591985702514648,
"step": 12950
},
{
"epoch": 11.294526498696785,
"grad_norm": 18.002288818359375,
"learning_rate": 1.89811930649427e-05,
"loss": 0.0729653549194336,
"step": 13000
},
{
"epoch": 11.294526498696785,
"eval_cer": 0.10716801213631684,
"eval_loss": 0.8233883380889893,
"eval_runtime": 35.8606,
"eval_samples_per_second": 27.83,
"eval_steps_per_second": 13.915,
"eval_wer": 0.3354018912529551,
"step": 13000
},
{
"epoch": 11.337966985230235,
"grad_norm": 0.008703617379069328,
"learning_rate": 1.8937114310902146e-05,
"loss": 0.18068933486938477,
"step": 13050
},
{
"epoch": 11.381407471763684,
"grad_norm": 0.016712911427021027,
"learning_rate": 1.8893035556861593e-05,
"loss": 0.18568845748901366,
"step": 13100
},
{
"epoch": 11.424847958297132,
"grad_norm": 0.0025050437543541193,
"learning_rate": 1.884895680282104e-05,
"loss": 0.12276277542114258,
"step": 13150
},
{
"epoch": 11.468288444830582,
"grad_norm": 0.0009163509821519256,
"learning_rate": 1.880487804878049e-05,
"loss": 0.14427170753479004,
"step": 13200
},
{
"epoch": 11.511728931364031,
"grad_norm": 0.0007597589865326881,
"learning_rate": 1.8760799294739933e-05,
"loss": 0.21098020553588867,
"step": 13250
},
{
"epoch": 11.555169417897481,
"grad_norm": 0.00016254196816589683,
"learning_rate": 1.8716720540699384e-05,
"loss": 0.18421314239501954,
"step": 13300
},
{
"epoch": 11.59860990443093,
"grad_norm": 0.006345795933157206,
"learning_rate": 1.867264178665883e-05,
"loss": 0.20616317749023438,
"step": 13350
},
{
"epoch": 11.642050390964378,
"grad_norm": 0.0005729036638513207,
"learning_rate": 1.862856303261828e-05,
"loss": 0.10284842491149902,
"step": 13400
},
{
"epoch": 11.685490877497829,
"grad_norm": 0.014439227990806103,
"learning_rate": 1.8584484278577726e-05,
"loss": 0.17948501586914062,
"step": 13450
},
{
"epoch": 11.728931364031277,
"grad_norm": 1.6784127950668335,
"learning_rate": 1.8540405524537174e-05,
"loss": 0.15696640014648439,
"step": 13500
},
{
"epoch": 11.728931364031277,
"eval_cer": 0.1050549926857019,
"eval_loss": 0.827880322933197,
"eval_runtime": 35.4053,
"eval_samples_per_second": 28.188,
"eval_steps_per_second": 14.094,
"eval_wer": 0.3271276595744681,
"step": 13500
},
{
"epoch": 11.772371850564726,
"grad_norm": 0.0005249602254480124,
"learning_rate": 1.849632677049662e-05,
"loss": 0.1481422519683838,
"step": 13550
},
{
"epoch": 11.815812337098176,
"grad_norm": 0.04822874069213867,
"learning_rate": 1.845224801645607e-05,
"loss": 0.17740755081176757,
"step": 13600
},
{
"epoch": 11.859252823631625,
"grad_norm": 0.0025418957229703665,
"learning_rate": 1.8408169262415516e-05,
"loss": 0.12424736022949219,
"step": 13650
},
{
"epoch": 11.902693310165073,
"grad_norm": 0.004390745423734188,
"learning_rate": 1.8364090508374964e-05,
"loss": 0.13344883918762207,
"step": 13700
},
{
"epoch": 11.946133796698524,
"grad_norm": 32.29993438720703,
"learning_rate": 1.832001175433441e-05,
"loss": 0.08959797859191894,
"step": 13750
},
{
"epoch": 11.989574283231972,
"grad_norm": 0.01902751810848713,
"learning_rate": 1.827593300029386e-05,
"loss": 0.15901991844177246,
"step": 13800
},
{
"epoch": 12.03301476976542,
"grad_norm": 0.0059561156667768955,
"learning_rate": 1.8231854246253306e-05,
"loss": 0.17461122512817384,
"step": 13850
},
{
"epoch": 12.076455256298871,
"grad_norm": 0.018380964174866676,
"learning_rate": 1.8187775492212754e-05,
"loss": 0.07262963771820069,
"step": 13900
},
{
"epoch": 12.11989574283232,
"grad_norm": 0.0007720252615399659,
"learning_rate": 1.81436967381722e-05,
"loss": 0.12178866386413574,
"step": 13950
},
{
"epoch": 12.163336229365768,
"grad_norm": 0.005173459183424711,
"learning_rate": 1.809961798413165e-05,
"loss": 0.18882158279418945,
"step": 14000
},
{
"epoch": 12.163336229365768,
"eval_cer": 0.109335211572845,
"eval_loss": 0.7686098217964172,
"eval_runtime": 35.2553,
"eval_samples_per_second": 28.308,
"eval_steps_per_second": 14.154,
"eval_wer": 0.3380614657210402,
"step": 14000
},
{
"epoch": 12.206776715899219,
"grad_norm": 0.005660334601998329,
"learning_rate": 1.8055539230091096e-05,
"loss": 0.10836532592773437,
"step": 14050
},
{
"epoch": 12.250217202432667,
"grad_norm": 0.24879610538482666,
"learning_rate": 1.8011460476050544e-05,
"loss": 0.06245335102081299,
"step": 14100
},
{
"epoch": 12.293657688966116,
"grad_norm": 0.0002563217713031918,
"learning_rate": 1.796738172200999e-05,
"loss": 0.09659749031066894,
"step": 14150
},
{
"epoch": 12.337098175499566,
"grad_norm": 0.0001977673382498324,
"learning_rate": 1.792330296796944e-05,
"loss": 0.08525155067443847,
"step": 14200
},
{
"epoch": 12.380538662033015,
"grad_norm": 0.00024911269429139793,
"learning_rate": 1.7879224213928886e-05,
"loss": 0.1183913516998291,
"step": 14250
},
{
"epoch": 12.423979148566463,
"grad_norm": 0.001824671751819551,
"learning_rate": 1.7835145459888337e-05,
"loss": 0.08873219490051269,
"step": 14300
},
{
"epoch": 12.467419635099914,
"grad_norm": 0.004962866194546223,
"learning_rate": 1.779106670584778e-05,
"loss": 0.11354425430297851,
"step": 14350
},
{
"epoch": 12.510860121633362,
"grad_norm": 0.0757075771689415,
"learning_rate": 1.7746987951807232e-05,
"loss": 0.11156253814697266,
"step": 14400
},
{
"epoch": 12.55430060816681,
"grad_norm": 0.02478897199034691,
"learning_rate": 1.7702909197766676e-05,
"loss": 0.12282137870788574,
"step": 14450
},
{
"epoch": 12.597741094700261,
"grad_norm": 2.5461020469665527,
"learning_rate": 1.7658830443726127e-05,
"loss": 0.11555877685546875,
"step": 14500
},
{
"epoch": 12.597741094700261,
"eval_cer": 0.10830579184049412,
"eval_loss": 0.92600417137146,
"eval_runtime": 35.2867,
"eval_samples_per_second": 28.283,
"eval_steps_per_second": 14.141,
"eval_wer": 0.3309692671394799,
"step": 14500
},
{
"epoch": 12.64118158123371,
"grad_norm": 0.5628868341445923,
"learning_rate": 1.761475168968557e-05,
"loss": 0.2381545639038086,
"step": 14550
},
{
"epoch": 12.684622067767158,
"grad_norm": 0.01276449766010046,
"learning_rate": 1.757067293564502e-05,
"loss": 0.10359532356262208,
"step": 14600
},
{
"epoch": 12.728062554300609,
"grad_norm": 0.009611076675355434,
"learning_rate": 1.7526594181604466e-05,
"loss": 0.10290337562561035,
"step": 14650
},
{
"epoch": 12.771503040834057,
"grad_norm": 0.00047707941848784685,
"learning_rate": 1.7482515427563914e-05,
"loss": 0.20995697021484375,
"step": 14700
},
{
"epoch": 12.814943527367507,
"grad_norm": 10.169084548950195,
"learning_rate": 1.743843667352336e-05,
"loss": 0.15165854454040528,
"step": 14750
},
{
"epoch": 12.858384013900956,
"grad_norm": 0.0020368106197565794,
"learning_rate": 1.739435791948281e-05,
"loss": 0.22781238555908204,
"step": 14800
},
{
"epoch": 12.901824500434405,
"grad_norm": 0.04858289286494255,
"learning_rate": 1.7350279165442256e-05,
"loss": 0.13032222747802735,
"step": 14850
},
{
"epoch": 12.945264986967853,
"grad_norm": 0.0008267110679298639,
"learning_rate": 1.7306200411401704e-05,
"loss": 0.06278028964996338,
"step": 14900
},
{
"epoch": 12.988705473501303,
"grad_norm": 0.14715807139873505,
"learning_rate": 1.726212165736115e-05,
"loss": 0.16469184875488282,
"step": 14950
},
{
"epoch": 13.032145960034752,
"grad_norm": 0.18887297809123993,
"learning_rate": 1.72180429033206e-05,
"loss": 0.248513126373291,
"step": 15000
},
{
"epoch": 13.032145960034752,
"eval_cer": 0.11063553123476189,
"eval_loss": 0.8484429717063904,
"eval_runtime": 35.4635,
"eval_samples_per_second": 28.142,
"eval_steps_per_second": 14.071,
"eval_wer": 0.33747044917257685,
"step": 15000
},
{
"epoch": 13.075586446568202,
"grad_norm": 0.0044303713366389275,
"learning_rate": 1.7173964149280047e-05,
"loss": 0.13554862022399902,
"step": 15050
},
{
"epoch": 13.119026933101651,
"grad_norm": 0.006357671692967415,
"learning_rate": 1.7129885395239494e-05,
"loss": 0.1657179069519043,
"step": 15100
},
{
"epoch": 13.1624674196351,
"grad_norm": 0.004660587292164564,
"learning_rate": 1.7085806641198945e-05,
"loss": 0.07184979438781738,
"step": 15150
},
{
"epoch": 13.20590790616855,
"grad_norm": 0.001002687495201826,
"learning_rate": 1.704172788715839e-05,
"loss": 0.11178950309753417,
"step": 15200
},
{
"epoch": 13.249348392701998,
"grad_norm": 0.0017005419358611107,
"learning_rate": 1.699764913311784e-05,
"loss": 0.14817577362060547,
"step": 15250
},
{
"epoch": 13.292788879235447,
"grad_norm": 30.164806365966797,
"learning_rate": 1.6953570379077284e-05,
"loss": 0.11133524894714356,
"step": 15300
},
{
"epoch": 13.336229365768897,
"grad_norm": 0.20776331424713135,
"learning_rate": 1.6909491625036735e-05,
"loss": 0.08040478706359863,
"step": 15350
},
{
"epoch": 13.379669852302346,
"grad_norm": 0.0001020112176775001,
"learning_rate": 1.686541287099618e-05,
"loss": 0.15835739135742188,
"step": 15400
},
{
"epoch": 13.423110338835794,
"grad_norm": 0.020164845511317253,
"learning_rate": 1.682133411695563e-05,
"loss": 0.14341225624084472,
"step": 15450
},
{
"epoch": 13.466550825369245,
"grad_norm": 0.0017340014455839992,
"learning_rate": 1.6777255362915074e-05,
"loss": 0.1316046142578125,
"step": 15500
},
{
"epoch": 13.466550825369245,
"eval_cer": 0.10104567372812483,
"eval_loss": 0.9770230650901794,
"eval_runtime": 35.2978,
"eval_samples_per_second": 28.274,
"eval_steps_per_second": 14.137,
"eval_wer": 0.32062647754137114,
"step": 15500
},
{
"epoch": 13.509991311902693,
"grad_norm": 0.10325725376605988,
"learning_rate": 1.6733176608874525e-05,
"loss": 0.14392637252807616,
"step": 15550
},
{
"epoch": 13.553431798436142,
"grad_norm": 7.639220714569092,
"learning_rate": 1.668909785483397e-05,
"loss": 0.11816396713256835,
"step": 15600
},
{
"epoch": 13.596872284969592,
"grad_norm": 0.011842885985970497,
"learning_rate": 1.664501910079342e-05,
"loss": 0.06573171615600586,
"step": 15650
},
{
"epoch": 13.64031277150304,
"grad_norm": 0.36505550146102905,
"learning_rate": 1.6600940346752864e-05,
"loss": 0.12598639488220215,
"step": 15700
},
{
"epoch": 13.68375325803649,
"grad_norm": 0.01986199989914894,
"learning_rate": 1.6556861592712315e-05,
"loss": 0.08807419776916504,
"step": 15750
},
{
"epoch": 13.72719374456994,
"grad_norm": 0.0006646508118137717,
"learning_rate": 1.651278283867176e-05,
"loss": 0.07460322380065917,
"step": 15800
},
{
"epoch": 13.770634231103388,
"grad_norm": 0.017491919919848442,
"learning_rate": 1.646870408463121e-05,
"loss": 0.08792648315429688,
"step": 15850
},
{
"epoch": 13.814074717636837,
"grad_norm": 64.46247863769531,
"learning_rate": 1.6424625330590654e-05,
"loss": 0.19781913757324218,
"step": 15900
},
{
"epoch": 13.857515204170287,
"grad_norm": 0.004558779299259186,
"learning_rate": 1.6380546576550102e-05,
"loss": 0.10111617088317872,
"step": 15950
},
{
"epoch": 13.900955690703736,
"grad_norm": 0.00020643201423808932,
"learning_rate": 1.633646782250955e-05,
"loss": 0.08666461944580078,
"step": 16000
},
{
"epoch": 13.900955690703736,
"eval_cer": 0.10776399198136208,
"eval_loss": 0.8977736234664917,
"eval_runtime": 35.6835,
"eval_samples_per_second": 27.968,
"eval_steps_per_second": 13.984,
"eval_wer": 0.3271276595744681,
"step": 16000
},
{
"epoch": 13.944396177237184,
"grad_norm": 0.031363021582365036,
"learning_rate": 1.6292389068468997e-05,
"loss": 0.09195023536682129,
"step": 16050
},
{
"epoch": 13.987836663770635,
"grad_norm": 0.024453002959489822,
"learning_rate": 1.6248310314428448e-05,
"loss": 0.05720340728759766,
"step": 16100
},
{
"epoch": 14.031277150304083,
"grad_norm": 0.020940568298101425,
"learning_rate": 1.6204231560387892e-05,
"loss": 0.11965296745300293,
"step": 16150
},
{
"epoch": 14.074717636837532,
"grad_norm": 0.020178375765681267,
"learning_rate": 1.6160152806347343e-05,
"loss": 0.11014815330505372,
"step": 16200
},
{
"epoch": 14.118158123370982,
"grad_norm": 1.0401362180709839,
"learning_rate": 1.6116074052306787e-05,
"loss": 0.06974054336547851,
"step": 16250
},
{
"epoch": 14.16159860990443,
"grad_norm": 0.007594361901283264,
"learning_rate": 1.6071995298266238e-05,
"loss": 0.041026763916015625,
"step": 16300
},
{
"epoch": 14.20503909643788,
"grad_norm": 0.0018089961959049106,
"learning_rate": 1.6027916544225682e-05,
"loss": 0.05419292449951172,
"step": 16350
},
{
"epoch": 14.24847958297133,
"grad_norm": 8.15002727508545,
"learning_rate": 1.5983837790185133e-05,
"loss": 0.0943959903717041,
"step": 16400
},
{
"epoch": 14.291920069504778,
"grad_norm": 0.0004822172923013568,
"learning_rate": 1.5939759036144577e-05,
"loss": 0.09241563796997071,
"step": 16450
},
{
"epoch": 14.335360556038228,
"grad_norm": 0.0005213666008785367,
"learning_rate": 1.5895680282104028e-05,
"loss": 0.08321575164794921,
"step": 16500
},
{
"epoch": 14.335360556038228,
"eval_cer": 0.10608441241805278,
"eval_loss": 0.936793327331543,
"eval_runtime": 35.3702,
"eval_samples_per_second": 28.216,
"eval_steps_per_second": 14.108,
"eval_wer": 0.32476359338061467,
"step": 16500
},
{
"epoch": 14.378801042571677,
"grad_norm": 7.210012699943036e-05,
"learning_rate": 1.5851601528063472e-05,
"loss": 0.09619697570800781,
"step": 16550
},
{
"epoch": 14.422241529105126,
"grad_norm": 4.460615158081055,
"learning_rate": 1.5807522774022923e-05,
"loss": 0.06220272541046143,
"step": 16600
},
{
"epoch": 14.465682015638576,
"grad_norm": 3.5526578426361084,
"learning_rate": 1.5763444019982367e-05,
"loss": 0.07247277259826661,
"step": 16650
},
{
"epoch": 14.509122502172024,
"grad_norm": 0.0009735809871926904,
"learning_rate": 1.5719365265941818e-05,
"loss": 0.14418716430664064,
"step": 16700
},
{
"epoch": 14.552562988705473,
"grad_norm": 0.002880257787182927,
"learning_rate": 1.5675286511901262e-05,
"loss": 0.05156928539276123,
"step": 16750
},
{
"epoch": 14.596003475238923,
"grad_norm": 0.009934864938259125,
"learning_rate": 1.5631207757860713e-05,
"loss": 0.1062159538269043,
"step": 16800
},
{
"epoch": 14.639443961772372,
"grad_norm": 0.13457264006137848,
"learning_rate": 1.5587129003820157e-05,
"loss": 0.05868762016296387,
"step": 16850
},
{
"epoch": 14.68288444830582,
"grad_norm": 0.00943897757679224,
"learning_rate": 1.5543050249779608e-05,
"loss": 0.07642593383789062,
"step": 16900
},
{
"epoch": 14.72632493483927,
"grad_norm": 0.026743775233626366,
"learning_rate": 1.5498971495739052e-05,
"loss": 0.10913041114807129,
"step": 16950
},
{
"epoch": 14.76976542137272,
"grad_norm": 0.003263711929321289,
"learning_rate": 1.5454892741698503e-05,
"loss": 0.12181022644042969,
"step": 17000
},
{
"epoch": 14.76976542137272,
"eval_cer": 0.10294197323508696,
"eval_loss": 0.8898913264274597,
"eval_runtime": 35.138,
"eval_samples_per_second": 28.402,
"eval_steps_per_second": 14.201,
"eval_wer": 0.3188534278959811,
"step": 17000
},
{
"epoch": 14.813205907906168,
"grad_norm": 0.020024575293064117,
"learning_rate": 1.541081398765795e-05,
"loss": 0.06474356651306153,
"step": 17050
},
{
"epoch": 14.856646394439618,
"grad_norm": 0.02727115899324417,
"learning_rate": 1.5366735233617398e-05,
"loss": 0.064862699508667,
"step": 17100
},
{
"epoch": 14.900086880973067,
"grad_norm": 0.06588542461395264,
"learning_rate": 1.5322656479576846e-05,
"loss": 0.11551046371459961,
"step": 17150
},
{
"epoch": 14.943527367506515,
"grad_norm": 1.188116431236267,
"learning_rate": 1.5278577725536293e-05,
"loss": 0.0937428092956543,
"step": 17200
},
{
"epoch": 14.986967854039966,
"grad_norm": 0.003894130466505885,
"learning_rate": 1.5234498971495739e-05,
"loss": 0.11846747398376464,
"step": 17250
},
{
"epoch": 15.030408340573414,
"grad_norm": 0.0014571856008842587,
"learning_rate": 1.5190420217455185e-05,
"loss": 0.05842185020446777,
"step": 17300
},
{
"epoch": 15.073848827106863,
"grad_norm": 0.0016659823013469577,
"learning_rate": 1.5146341463414634e-05,
"loss": 0.055425772666931154,
"step": 17350
},
{
"epoch": 15.117289313640313,
"grad_norm": 0.0008578883716836572,
"learning_rate": 1.510226270937408e-05,
"loss": 0.10561844825744629,
"step": 17400
},
{
"epoch": 15.160729800173762,
"grad_norm": 0.028916161507368088,
"learning_rate": 1.505818395533353e-05,
"loss": 0.15631651878356934,
"step": 17450
},
{
"epoch": 15.20417028670721,
"grad_norm": 0.01692270301282406,
"learning_rate": 1.5014105201292976e-05,
"loss": 0.04396585464477539,
"step": 17500
},
{
"epoch": 15.20417028670721,
"eval_cer": 0.10250853334778133,
"eval_loss": 0.9507099390029907,
"eval_runtime": 35.4616,
"eval_samples_per_second": 28.143,
"eval_steps_per_second": 14.072,
"eval_wer": 0.3200354609929078,
"step": 17500
},
{
"epoch": 15.24761077324066,
"grad_norm": 0.00015645832172594965,
"learning_rate": 1.4970026447252426e-05,
"loss": 0.10505289077758789,
"step": 17550
},
{
"epoch": 15.29105125977411,
"grad_norm": 6.809161277487874e-05,
"learning_rate": 1.4925947693211873e-05,
"loss": 0.06061763286590576,
"step": 17600
},
{
"epoch": 15.334491746307558,
"grad_norm": 0.002175210742279887,
"learning_rate": 1.488186893917132e-05,
"loss": 0.08643261909484863,
"step": 17650
},
{
"epoch": 15.377932232841008,
"grad_norm": 0.00033852062188088894,
"learning_rate": 1.4837790185130768e-05,
"loss": 0.058766045570373536,
"step": 17700
},
{
"epoch": 15.421372719374457,
"grad_norm": 0.032032400369644165,
"learning_rate": 1.4793711431090216e-05,
"loss": 0.06575697422027588,
"step": 17750
},
{
"epoch": 15.464813205907905,
"grad_norm": 0.007524843327701092,
"learning_rate": 1.4749632677049663e-05,
"loss": 0.039991099834442136,
"step": 17800
},
{
"epoch": 15.508253692441356,
"grad_norm": 0.38591468334198,
"learning_rate": 1.470555392300911e-05,
"loss": 0.06621292591094971,
"step": 17850
},
{
"epoch": 15.551694178974804,
"grad_norm": 0.005559583194553852,
"learning_rate": 1.4661475168968558e-05,
"loss": 0.05986703395843506,
"step": 17900
},
{
"epoch": 15.595134665508255,
"grad_norm": 0.0015642516082152724,
"learning_rate": 1.4617396414928004e-05,
"loss": 0.05914860725402832,
"step": 17950
},
{
"epoch": 15.638575152041703,
"grad_norm": 0.025494471192359924,
"learning_rate": 1.4573317660887452e-05,
"loss": 0.049571285247802736,
"step": 18000
},
{
"epoch": 15.638575152041703,
"eval_cer": 0.10175001354499648,
"eval_loss": 0.9704659581184387,
"eval_runtime": 35.2476,
"eval_samples_per_second": 28.314,
"eval_steps_per_second": 14.157,
"eval_wer": 0.31501182033096925,
"step": 18000
},
{
"epoch": 15.682015638575152,
"grad_norm": 0.0005379091016948223,
"learning_rate": 1.45292389068469e-05,
"loss": 0.04489382266998291,
"step": 18050
},
{
"epoch": 15.725456125108602,
"grad_norm": 0.0002697557501960546,
"learning_rate": 1.4485160152806347e-05,
"loss": 0.03827667951583862,
"step": 18100
},
{
"epoch": 15.76889661164205,
"grad_norm": 0.04231059551239014,
"learning_rate": 1.4441081398765794e-05,
"loss": 0.06413057327270508,
"step": 18150
},
{
"epoch": 15.8123370981755,
"grad_norm": 0.0001920364738907665,
"learning_rate": 1.4397002644725242e-05,
"loss": 0.06431771278381347,
"step": 18200
},
{
"epoch": 15.85577758470895,
"grad_norm": 0.0009730961173772812,
"learning_rate": 1.435292389068469e-05,
"loss": 0.14641772270202635,
"step": 18250
},
{
"epoch": 15.899218071242398,
"grad_norm": 0.04817694053053856,
"learning_rate": 1.4308845136644137e-05,
"loss": 0.09290631294250488,
"step": 18300
},
{
"epoch": 15.942658557775847,
"grad_norm": 0.0002339025668334216,
"learning_rate": 1.4264766382603586e-05,
"loss": 0.04536252975463867,
"step": 18350
},
{
"epoch": 15.986099044309297,
"grad_norm": 5.864691734313965,
"learning_rate": 1.4220687628563033e-05,
"loss": 0.07572299003601074,
"step": 18400
},
{
"epoch": 16.029539530842744,
"grad_norm": 0.00020901852985844016,
"learning_rate": 1.4176608874522481e-05,
"loss": 0.04608057975769043,
"step": 18450
},
{
"epoch": 16.072980017376196,
"grad_norm": 0.07316890358924866,
"learning_rate": 1.4132530120481928e-05,
"loss": 0.11023859977722168,
"step": 18500
},
{
"epoch": 16.072980017376196,
"eval_cer": 0.11616188979790865,
"eval_loss": 1.0090523958206177,
"eval_runtime": 35.2425,
"eval_samples_per_second": 28.318,
"eval_steps_per_second": 14.159,
"eval_wer": 0.33037825059101655,
"step": 18500
},
{
"epoch": 16.116420503909644,
"grad_norm": 0.0020939745008945465,
"learning_rate": 1.4088451366441376e-05,
"loss": 0.05038735389709473,
"step": 18550
},
{
"epoch": 16.159860990443093,
"grad_norm": 0.0004580508393701166,
"learning_rate": 1.4044372612400824e-05,
"loss": 0.0617540168762207,
"step": 18600
},
{
"epoch": 16.20330147697654,
"grad_norm": 0.0010127995628863573,
"learning_rate": 1.4000293858360271e-05,
"loss": 0.0612303876876831,
"step": 18650
},
{
"epoch": 16.24674196350999,
"grad_norm": 0.015361390076577663,
"learning_rate": 1.3956215104319719e-05,
"loss": 0.04825174331665039,
"step": 18700
},
{
"epoch": 16.290182450043442,
"grad_norm": 0.0008976504323072731,
"learning_rate": 1.3912136350279166e-05,
"loss": 0.05854806423187256,
"step": 18750
},
{
"epoch": 16.33362293657689,
"grad_norm": 0.013188125565648079,
"learning_rate": 1.3868057596238614e-05,
"loss": 0.04929457664489746,
"step": 18800
},
{
"epoch": 16.37706342311034,
"grad_norm": 0.013670213520526886,
"learning_rate": 1.3823978842198061e-05,
"loss": 0.038565528392791745,
"step": 18850
},
{
"epoch": 16.420503909643788,
"grad_norm": 0.08130084723234177,
"learning_rate": 1.3779900088157509e-05,
"loss": 0.04009881019592285,
"step": 18900
},
{
"epoch": 16.463944396177236,
"grad_norm": 2.4593734741210938,
"learning_rate": 1.3735821334116956e-05,
"loss": 0.043494491577148436,
"step": 18950
},
{
"epoch": 16.507384882710685,
"grad_norm": 0.0002031345502473414,
"learning_rate": 1.3691742580076404e-05,
"loss": 0.027528271675109864,
"step": 19000
},
{
"epoch": 16.507384882710685,
"eval_cer": 0.10440483285474346,
"eval_loss": 0.9773461818695068,
"eval_runtime": 35.5819,
"eval_samples_per_second": 28.048,
"eval_steps_per_second": 14.024,
"eval_wer": 0.3212174940898345,
"step": 19000
},
{
"epoch": 16.550825369244137,
"grad_norm": 0.00017495028441771865,
"learning_rate": 1.3647663826035851e-05,
"loss": 0.06181173324584961,
"step": 19050
},
{
"epoch": 16.594265855777586,
"grad_norm": 0.00031455489806830883,
"learning_rate": 1.3603585071995299e-05,
"loss": 0.046858110427856446,
"step": 19100
},
{
"epoch": 16.637706342311034,
"grad_norm": 0.03978965803980827,
"learning_rate": 1.3559506317954746e-05,
"loss": 0.05867977142333984,
"step": 19150
},
{
"epoch": 16.681146828844483,
"grad_norm": 0.00650749821215868,
"learning_rate": 1.3515427563914194e-05,
"loss": 0.06390885829925537,
"step": 19200
},
{
"epoch": 16.72458731537793,
"grad_norm": 0.002027066657319665,
"learning_rate": 1.3471348809873641e-05,
"loss": 0.0747562313079834,
"step": 19250
},
{
"epoch": 16.76802780191138,
"grad_norm": 0.00012768770102411509,
"learning_rate": 1.3427270055833089e-05,
"loss": 0.0417702579498291,
"step": 19300
},
{
"epoch": 16.811468288444832,
"grad_norm": 4.3758605897892267e-05,
"learning_rate": 1.3383191301792536e-05,
"loss": 0.043452243804931644,
"step": 19350
},
{
"epoch": 16.85490877497828,
"grad_norm": 0.009404808282852173,
"learning_rate": 1.3339112547751984e-05,
"loss": 0.07918959617614746,
"step": 19400
},
{
"epoch": 16.89834926151173,
"grad_norm": 0.003255483927205205,
"learning_rate": 1.3295033793711431e-05,
"loss": 0.031140968799591065,
"step": 19450
},
{
"epoch": 16.941789748045178,
"grad_norm": 0.046869829297065735,
"learning_rate": 1.3250955039670879e-05,
"loss": 0.053838644027709964,
"step": 19500
},
{
"epoch": 16.941789748045178,
"eval_cer": 0.10142493362951725,
"eval_loss": 1.045753836631775,
"eval_runtime": 35.7261,
"eval_samples_per_second": 27.935,
"eval_steps_per_second": 13.967,
"eval_wer": 0.3141252955082742,
"step": 19500
},
{
"epoch": 16.985230234578626,
"grad_norm": 0.0014443215914070606,
"learning_rate": 1.3206876285630326e-05,
"loss": 0.055178966522216794,
"step": 19550
},
{
"epoch": 17.028670721112075,
"grad_norm": 0.0004687681212089956,
"learning_rate": 1.3162797531589774e-05,
"loss": 0.09418526649475098,
"step": 19600
},
{
"epoch": 17.072111207645527,
"grad_norm": 0.0004573004553094506,
"learning_rate": 1.3118718777549221e-05,
"loss": 0.026365480422973632,
"step": 19650
},
{
"epoch": 17.115551694178976,
"grad_norm": 0.0036469711922109127,
"learning_rate": 1.3074640023508669e-05,
"loss": 0.058814377784729005,
"step": 19700
},
{
"epoch": 17.158992180712424,
"grad_norm": 0.00524592399597168,
"learning_rate": 1.3030561269468116e-05,
"loss": 0.04088939189910889,
"step": 19750
},
{
"epoch": 17.202432667245873,
"grad_norm": 0.00013877540186513215,
"learning_rate": 1.2986482515427564e-05,
"loss": 0.06733872890472412,
"step": 19800
},
{
"epoch": 17.24587315377932,
"grad_norm": 0.04638398066163063,
"learning_rate": 1.2942403761387011e-05,
"loss": 0.02715529441833496,
"step": 19850
},
{
"epoch": 17.28931364031277,
"grad_norm": 0.0002255926956422627,
"learning_rate": 1.2898325007346459e-05,
"loss": 0.024372515678405763,
"step": 19900
},
{
"epoch": 17.332754126846222,
"grad_norm": 0.0001305036712437868,
"learning_rate": 1.2854246253305906e-05,
"loss": 0.05264826774597168,
"step": 19950
},
{
"epoch": 17.37619461337967,
"grad_norm": 0.012165222316980362,
"learning_rate": 1.2810167499265354e-05,
"loss": 0.022559099197387696,
"step": 20000
},
{
"epoch": 17.37619461337967,
"eval_cer": 0.10581351248848675,
"eval_loss": 0.9943767786026001,
"eval_runtime": 35.1598,
"eval_samples_per_second": 28.385,
"eval_steps_per_second": 14.192,
"eval_wer": 0.3271276595744681,
"step": 20000
},
{
"epoch": 17.41963509991312,
"grad_norm": 0.2545449733734131,
"learning_rate": 1.2766088745224801e-05,
"loss": 0.02598097801208496,
"step": 20050
},
{
"epoch": 17.463075586446568,
"grad_norm": 0.5349053144454956,
"learning_rate": 1.2722009991184249e-05,
"loss": 0.02631650447845459,
"step": 20100
},
{
"epoch": 17.506516072980016,
"grad_norm": 0.001936123939231038,
"learning_rate": 1.2677931237143697e-05,
"loss": 0.021945018768310547,
"step": 20150
},
{
"epoch": 17.54995655951347,
"grad_norm": 0.000843520334456116,
"learning_rate": 1.2633852483103146e-05,
"loss": 0.09685382843017579,
"step": 20200
},
{
"epoch": 17.593397046046917,
"grad_norm": 0.0006347526214085519,
"learning_rate": 1.2589773729062593e-05,
"loss": 0.07540733814239502,
"step": 20250
},
{
"epoch": 17.636837532580365,
"grad_norm": 0.00029396990430541337,
"learning_rate": 1.254569497502204e-05,
"loss": 0.04331284999847412,
"step": 20300
},
{
"epoch": 17.680278019113814,
"grad_norm": 0.0012669226853176951,
"learning_rate": 1.2501616220981488e-05,
"loss": 0.05464168548583984,
"step": 20350
},
{
"epoch": 17.723718505647263,
"grad_norm": 8.315537706948817e-05,
"learning_rate": 1.2457537466940936e-05,
"loss": 0.043418560028076175,
"step": 20400
},
{
"epoch": 17.76715899218071,
"grad_norm": 0.014166179113090038,
"learning_rate": 1.2413458712900383e-05,
"loss": 0.057585406303405764,
"step": 20450
},
{
"epoch": 17.810599478714163,
"grad_norm": 0.0003503711777739227,
"learning_rate": 1.236937995885983e-05,
"loss": 0.06322105884552003,
"step": 20500
},
{
"epoch": 17.810599478714163,
"eval_cer": 0.09947445413664192,
"eval_loss": 0.9832805395126343,
"eval_runtime": 35.3191,
"eval_samples_per_second": 28.257,
"eval_steps_per_second": 14.128,
"eval_wer": 0.3076241134751773,
"step": 20500
},
{
"epoch": 17.854039965247612,
"grad_norm": 0.00030440345290116966,
"learning_rate": 1.2325301204819278e-05,
"loss": 0.060886926651000976,
"step": 20550
},
{
"epoch": 17.89748045178106,
"grad_norm": 0.007375710643827915,
"learning_rate": 1.2281222450778726e-05,
"loss": 0.04395482540130615,
"step": 20600
},
{
"epoch": 17.94092093831451,
"grad_norm": 0.0019175054039806128,
"learning_rate": 1.2237143696738172e-05,
"loss": 0.023046765327453613,
"step": 20650
},
{
"epoch": 17.984361424847958,
"grad_norm": 0.0014469270827248693,
"learning_rate": 1.219306494269762e-05,
"loss": 0.041912388801574704,
"step": 20700
},
{
"epoch": 18.027801911381406,
"grad_norm": 0.008292295038700104,
"learning_rate": 1.2148986188657067e-05,
"loss": 0.05653272151947022,
"step": 20750
},
{
"epoch": 18.071242397914858,
"grad_norm": 0.0011951219057664275,
"learning_rate": 1.2104907434616514e-05,
"loss": 0.03046605587005615,
"step": 20800
},
{
"epoch": 18.114682884448307,
"grad_norm": 0.004597791470587254,
"learning_rate": 1.2060828680575962e-05,
"loss": 0.034540703296661375,
"step": 20850
},
{
"epoch": 18.158123370981755,
"grad_norm": 0.0003544765349943191,
"learning_rate": 1.201674992653541e-05,
"loss": 0.015487746000289918,
"step": 20900
},
{
"epoch": 18.201563857515204,
"grad_norm": 0.001666396390646696,
"learning_rate": 1.1972671172494857e-05,
"loss": 0.028139712810516356,
"step": 20950
},
{
"epoch": 18.245004344048652,
"grad_norm": 0.4877508282661438,
"learning_rate": 1.1928592418454304e-05,
"loss": 0.045163874626159665,
"step": 21000
},
{
"epoch": 18.245004344048652,
"eval_cer": 0.10554261255892074,
"eval_loss": 1.0116287469863892,
"eval_runtime": 35.5341,
"eval_samples_per_second": 28.086,
"eval_steps_per_second": 14.043,
"eval_wer": 0.3182624113475177,
"step": 21000
},
{
"epoch": 18.2884448305821,
"grad_norm": 4.470763451536186e-05,
"learning_rate": 1.1884513664413752e-05,
"loss": 0.023325955867767333,
"step": 21050
},
{
"epoch": 18.331885317115553,
"grad_norm": 0.000781964510679245,
"learning_rate": 1.18404349103732e-05,
"loss": 0.025803213119506837,
"step": 21100
},
{
"epoch": 18.375325803649,
"grad_norm": 19.389554977416992,
"learning_rate": 1.1796356156332649e-05,
"loss": 0.030954115390777588,
"step": 21150
},
{
"epoch": 18.41876629018245,
"grad_norm": 7.068664126563817e-05,
"learning_rate": 1.1752277402292096e-05,
"loss": 0.04330010414123535,
"step": 21200
},
{
"epoch": 18.4622067767159,
"grad_norm": 0.00017082026170101017,
"learning_rate": 1.1708198648251544e-05,
"loss": 0.04214978694915771,
"step": 21250
},
{
"epoch": 18.505647263249347,
"grad_norm": 0.0019248217577114701,
"learning_rate": 1.1664119894210991e-05,
"loss": 0.021218812465667723,
"step": 21300
},
{
"epoch": 18.549087749782796,
"grad_norm": 0.2257125824689865,
"learning_rate": 1.1620041140170439e-05,
"loss": 0.04343417644500733,
"step": 21350
},
{
"epoch": 18.592528236316248,
"grad_norm": 0.004617325030267239,
"learning_rate": 1.1575962386129886e-05,
"loss": 0.031105964183807372,
"step": 21400
},
{
"epoch": 18.635968722849697,
"grad_norm": 6.712381582474336e-05,
"learning_rate": 1.1531883632089334e-05,
"loss": 0.022620809078216553,
"step": 21450
},
{
"epoch": 18.679409209383145,
"grad_norm": 11.555673599243164,
"learning_rate": 1.1487804878048781e-05,
"loss": 0.051412558555603026,
"step": 21500
},
{
"epoch": 18.679409209383145,
"eval_cer": 0.09855339437611746,
"eval_loss": 1.0771058797836304,
"eval_runtime": 35.2368,
"eval_samples_per_second": 28.323,
"eval_steps_per_second": 14.161,
"eval_wer": 0.3108747044917258,
"step": 21500
},
{
"epoch": 18.722849695916594,
"grad_norm": 0.001295646419748664,
"learning_rate": 1.1443726124008229e-05,
"loss": 0.0214068603515625,
"step": 21550
},
{
"epoch": 18.766290182450042,
"grad_norm": 0.022851450368762016,
"learning_rate": 1.1399647369967676e-05,
"loss": 0.03087963581085205,
"step": 21600
},
{
"epoch": 18.80973066898349,
"grad_norm": 0.0012702015228569508,
"learning_rate": 1.1355568615927124e-05,
"loss": 0.046400198936462404,
"step": 21650
},
{
"epoch": 18.853171155516943,
"grad_norm": 0.0037327518220990896,
"learning_rate": 1.1311489861886571e-05,
"loss": 0.024634184837341307,
"step": 21700
},
{
"epoch": 18.89661164205039,
"grad_norm": 6.548186502186581e-05,
"learning_rate": 1.1267411107846019e-05,
"loss": 0.03668407678604126,
"step": 21750
},
{
"epoch": 18.94005212858384,
"grad_norm": 3.877016544342041,
"learning_rate": 1.1223332353805466e-05,
"loss": 0.02262542963027954,
"step": 21800
},
{
"epoch": 18.98349261511729,
"grad_norm": 0.0001716541883070022,
"learning_rate": 1.1179253599764914e-05,
"loss": 0.05151228427886963,
"step": 21850
},
{
"epoch": 19.026933101650737,
"grad_norm": 0.0001898752962006256,
"learning_rate": 1.1135174845724361e-05,
"loss": 0.0242765212059021,
"step": 21900
},
{
"epoch": 19.07037358818419,
"grad_norm": 0.10014080256223679,
"learning_rate": 1.1091096091683809e-05,
"loss": 0.037711410522460936,
"step": 21950
},
{
"epoch": 19.113814074717638,
"grad_norm": 0.00022042197815608233,
"learning_rate": 1.1047017337643256e-05,
"loss": 0.025053555965423583,
"step": 22000
},
{
"epoch": 19.113814074717638,
"eval_cer": 0.09801159451698542,
"eval_loss": 1.0469719171524048,
"eval_runtime": 35.8341,
"eval_samples_per_second": 27.851,
"eval_steps_per_second": 13.925,
"eval_wer": 0.30112293144208035,
"step": 22000
}
],
"logging_steps": 50,
"max_steps": 34530,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.810244226353391e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}