Wav2vec2-afr / trainer_state.json
Leonel-Maia's picture
End of training
89768b5 verified
{
"best_global_step": 7500,
"best_metric": 0.1822061687707901,
"best_model_checkpoint": "./Wav2vec2-afr/checkpoint-7500",
"epoch": 6.031374915151972,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015084093823063579,
"grad_norm": 27.733379364013672,
"learning_rate": 1.4999999999999999e-05,
"loss": 20.2292,
"step": 25
},
{
"epoch": 0.030168187646127158,
"grad_norm": 19.019153594970703,
"learning_rate": 2.9999999999999997e-05,
"loss": 11.6267,
"step": 50
},
{
"epoch": 0.04525228146919074,
"grad_norm": 5.631908416748047,
"learning_rate": 4.4999999999999996e-05,
"loss": 4.8714,
"step": 75
},
{
"epoch": 0.060336375292254316,
"grad_norm": 2.689542293548584,
"learning_rate": 5.9999999999999995e-05,
"loss": 3.5888,
"step": 100
},
{
"epoch": 0.0754204691153179,
"grad_norm": 1.0974892377853394,
"learning_rate": 7.5e-05,
"loss": 3.2364,
"step": 125
},
{
"epoch": 0.09050456293838148,
"grad_norm": 0.6232272386550903,
"learning_rate": 8.999999999999999e-05,
"loss": 3.0531,
"step": 150
},
{
"epoch": 0.10558865676144506,
"grad_norm": 0.3990340828895569,
"learning_rate": 0.00010499999999999999,
"loss": 3.0197,
"step": 175
},
{
"epoch": 0.12067275058450863,
"grad_norm": 0.6495853662490845,
"learning_rate": 0.00011999999999999999,
"loss": 2.9908,
"step": 200
},
{
"epoch": 0.1357568444075722,
"grad_norm": 1.156081199645996,
"learning_rate": 0.000135,
"loss": 2.9629,
"step": 225
},
{
"epoch": 0.1508409382306358,
"grad_norm": 1.0735247135162354,
"learning_rate": 0.00015,
"loss": 2.5531,
"step": 250
},
{
"epoch": 0.16592503205369938,
"grad_norm": 0.6276996731758118,
"learning_rate": 0.0001644,
"loss": 1.7912,
"step": 275
},
{
"epoch": 0.18100912587676296,
"grad_norm": 1.1443787813186646,
"learning_rate": 0.00017939999999999997,
"loss": 0.7469,
"step": 300
},
{
"epoch": 0.19609321969982654,
"grad_norm": 0.47428029775619507,
"learning_rate": 0.00019439999999999998,
"loss": 0.7277,
"step": 325
},
{
"epoch": 0.21117731352289013,
"grad_norm": 1.0143519639968872,
"learning_rate": 0.00020939999999999997,
"loss": 0.4411,
"step": 350
},
{
"epoch": 0.22626140734595368,
"grad_norm": 0.36140990257263184,
"learning_rate": 0.00022439999999999998,
"loss": 0.4558,
"step": 375
},
{
"epoch": 0.24134550116901726,
"grad_norm": 0.6368806958198547,
"learning_rate": 0.0002394,
"loss": 0.2993,
"step": 400
},
{
"epoch": 0.25642959499208084,
"grad_norm": 0.5180590748786926,
"learning_rate": 0.00025439999999999995,
"loss": 0.4165,
"step": 425
},
{
"epoch": 0.2715136888151444,
"grad_norm": 5.711215972900391,
"learning_rate": 0.0002694,
"loss": 0.2686,
"step": 450
},
{
"epoch": 0.286597782638208,
"grad_norm": 1.2874069213867188,
"learning_rate": 0.0002844,
"loss": 0.4259,
"step": 475
},
{
"epoch": 0.3016818764612716,
"grad_norm": 0.6762956380844116,
"learning_rate": 0.00029939999999999996,
"loss": 0.2638,
"step": 500
},
{
"epoch": 0.3016818764612716,
"eval_loss": 0.32083866000175476,
"eval_runtime": 754.2459,
"eval_samples_per_second": 4.395,
"eval_steps_per_second": 1.099,
"eval_wer": 0.3275495330130109,
"step": 500
},
{
"epoch": 0.3167659702843352,
"grad_norm": 0.3699875771999359,
"learning_rate": 0.0002999272139102305,
"loss": 0.4233,
"step": 525
},
{
"epoch": 0.33185006410739876,
"grad_norm": 0.9410744905471802,
"learning_rate": 0.00029985139506672056,
"loss": 0.2435,
"step": 550
},
{
"epoch": 0.34693415793046234,
"grad_norm": 0.7363893389701843,
"learning_rate": 0.00029977557622321065,
"loss": 0.4594,
"step": 575
},
{
"epoch": 0.3620182517535259,
"grad_norm": 0.9007164239883423,
"learning_rate": 0.00029969975737970074,
"loss": 0.2262,
"step": 600
},
{
"epoch": 0.3771023455765895,
"grad_norm": 0.4770212173461914,
"learning_rate": 0.0002996239385361908,
"loss": 0.4473,
"step": 625
},
{
"epoch": 0.3921864393996531,
"grad_norm": 0.9380179643630981,
"learning_rate": 0.00029954811969268096,
"loss": 0.227,
"step": 650
},
{
"epoch": 0.40727053322271667,
"grad_norm": 0.5483216643333435,
"learning_rate": 0.000299472300849171,
"loss": 0.3786,
"step": 675
},
{
"epoch": 0.42235462704578025,
"grad_norm": 0.44440972805023193,
"learning_rate": 0.0002993964820056611,
"loss": 0.1966,
"step": 700
},
{
"epoch": 0.4374387208688438,
"grad_norm": 0.6046245098114014,
"learning_rate": 0.0002993206631621512,
"loss": 0.3704,
"step": 725
},
{
"epoch": 0.45252281469190736,
"grad_norm": 0.6455674767494202,
"learning_rate": 0.0002992448443186413,
"loss": 0.1906,
"step": 750
},
{
"epoch": 0.46760690851497094,
"grad_norm": 0.35442134737968445,
"learning_rate": 0.0002991690254751314,
"loss": 0.3416,
"step": 775
},
{
"epoch": 0.4826910023380345,
"grad_norm": 0.9801518321037292,
"learning_rate": 0.0002990932066316215,
"loss": 0.1892,
"step": 800
},
{
"epoch": 0.4977750961610981,
"grad_norm": 0.4081840217113495,
"learning_rate": 0.00029901738778811157,
"loss": 0.3756,
"step": 825
},
{
"epoch": 0.5128591899841617,
"grad_norm": 0.6348875164985657,
"learning_rate": 0.00029894156894460166,
"loss": 0.1788,
"step": 850
},
{
"epoch": 0.5279432838072253,
"grad_norm": 0.2996980547904968,
"learning_rate": 0.00029886575010109175,
"loss": 0.3618,
"step": 875
},
{
"epoch": 0.5430273776302889,
"grad_norm": 0.5273446440696716,
"learning_rate": 0.00029878993125758183,
"loss": 0.1717,
"step": 900
},
{
"epoch": 0.5581114714533525,
"grad_norm": 0.7878848910331726,
"learning_rate": 0.000298714112414072,
"loss": 0.3516,
"step": 925
},
{
"epoch": 0.573195565276416,
"grad_norm": 0.57522052526474,
"learning_rate": 0.00029863829357056206,
"loss": 0.1619,
"step": 950
},
{
"epoch": 0.5882796590994795,
"grad_norm": 0.2759730815887451,
"learning_rate": 0.00029856247472705215,
"loss": 0.3349,
"step": 975
},
{
"epoch": 0.6033637529225432,
"grad_norm": 0.8881000876426697,
"learning_rate": 0.00029848665588354224,
"loss": 0.1603,
"step": 1000
},
{
"epoch": 0.6033637529225432,
"eval_loss": 0.24969196319580078,
"eval_runtime": 757.505,
"eval_samples_per_second": 4.376,
"eval_steps_per_second": 1.094,
"eval_wer": 0.2649138908384171,
"step": 1000
},
{
"epoch": 0.6184478467456067,
"grad_norm": 0.6784160137176514,
"learning_rate": 0.0002984108370400323,
"loss": 0.3343,
"step": 1025
},
{
"epoch": 0.6335319405686703,
"grad_norm": 1.6744931936264038,
"learning_rate": 0.0002983350181965224,
"loss": 0.1664,
"step": 1050
},
{
"epoch": 0.6486160343917339,
"grad_norm": 0.35144105553627014,
"learning_rate": 0.0002982591993530125,
"loss": 0.3518,
"step": 1075
},
{
"epoch": 0.6637001282147975,
"grad_norm": 0.7025427222251892,
"learning_rate": 0.0002981833805095026,
"loss": 0.1482,
"step": 1100
},
{
"epoch": 0.678784222037861,
"grad_norm": 0.4477069675922394,
"learning_rate": 0.0002981075616659927,
"loss": 0.3463,
"step": 1125
},
{
"epoch": 0.6938683158609247,
"grad_norm": 0.603905975818634,
"learning_rate": 0.0002980317428224828,
"loss": 0.1722,
"step": 1150
},
{
"epoch": 0.7089524096839882,
"grad_norm": 0.2929205298423767,
"learning_rate": 0.00029795592397897284,
"loss": 0.3269,
"step": 1175
},
{
"epoch": 0.7240365035070518,
"grad_norm": 0.6123032569885254,
"learning_rate": 0.000297880105135463,
"loss": 0.1573,
"step": 1200
},
{
"epoch": 0.7391205973301154,
"grad_norm": 0.2819807529449463,
"learning_rate": 0.00029780428629195307,
"loss": 0.3203,
"step": 1225
},
{
"epoch": 0.754204691153179,
"grad_norm": 0.5671049356460571,
"learning_rate": 0.00029772846744844316,
"loss": 0.1489,
"step": 1250
},
{
"epoch": 0.7692887849762425,
"grad_norm": 0.7186427116394043,
"learning_rate": 0.00029765264860493325,
"loss": 0.3486,
"step": 1275
},
{
"epoch": 0.7843728787993062,
"grad_norm": 0.5397343635559082,
"learning_rate": 0.00029757682976142333,
"loss": 0.1384,
"step": 1300
},
{
"epoch": 0.7994569726223697,
"grad_norm": 0.3157232701778412,
"learning_rate": 0.0002975010109179134,
"loss": 0.3505,
"step": 1325
},
{
"epoch": 0.8145410664454333,
"grad_norm": 0.714611828327179,
"learning_rate": 0.00029742519207440356,
"loss": 0.1502,
"step": 1350
},
{
"epoch": 0.8296251602684969,
"grad_norm": 0.3562470078468323,
"learning_rate": 0.0002973493732308936,
"loss": 0.3845,
"step": 1375
},
{
"epoch": 0.8447092540915605,
"grad_norm": 0.5777546167373657,
"learning_rate": 0.00029727355438738374,
"loss": 0.1541,
"step": 1400
},
{
"epoch": 0.859793347914624,
"grad_norm": 0.49272066354751587,
"learning_rate": 0.0002971977355438738,
"loss": 0.3538,
"step": 1425
},
{
"epoch": 0.8748774417376876,
"grad_norm": 0.8076097369194031,
"learning_rate": 0.0002971219167003639,
"loss": 0.1343,
"step": 1450
},
{
"epoch": 0.8899615355607512,
"grad_norm": 0.43889355659484863,
"learning_rate": 0.000297046097856854,
"loss": 0.3096,
"step": 1475
},
{
"epoch": 0.9050456293838147,
"grad_norm": 0.8419676423072815,
"learning_rate": 0.0002969702790133441,
"loss": 0.142,
"step": 1500
},
{
"epoch": 0.9050456293838147,
"eval_loss": 0.21447643637657166,
"eval_runtime": 715.0284,
"eval_samples_per_second": 4.636,
"eval_steps_per_second": 1.159,
"eval_wer": 0.24875020245100685,
"step": 1500
},
{
"epoch": 0.9201297232068784,
"grad_norm": 0.4135502278804779,
"learning_rate": 0.00029689446016983417,
"loss": 0.359,
"step": 1525
},
{
"epoch": 0.9352138170299419,
"grad_norm": 0.5666655898094177,
"learning_rate": 0.0002968186413263243,
"loss": 0.1478,
"step": 1550
},
{
"epoch": 0.9502979108530055,
"grad_norm": 0.30614542961120605,
"learning_rate": 0.00029674282248281434,
"loss": 0.3263,
"step": 1575
},
{
"epoch": 0.965382004676069,
"grad_norm": 0.5047621130943298,
"learning_rate": 0.0002966670036393045,
"loss": 0.1369,
"step": 1600
},
{
"epoch": 0.9804660984991327,
"grad_norm": 0.4826274514198303,
"learning_rate": 0.00029659118479579457,
"loss": 0.3709,
"step": 1625
},
{
"epoch": 0.9955501923221962,
"grad_norm": 0.7504212260246277,
"learning_rate": 0.00029651536595228466,
"loss": 0.1389,
"step": 1650
},
{
"epoch": 1.0102571837996832,
"grad_norm": 0.40745294094085693,
"learning_rate": 0.00029643954710877475,
"loss": 0.2744,
"step": 1675
},
{
"epoch": 1.0253412776227468,
"grad_norm": 0.27026304602622986,
"learning_rate": 0.00029636372826526483,
"loss": 0.177,
"step": 1700
},
{
"epoch": 1.0404253714458105,
"grad_norm": 0.4111769199371338,
"learning_rate": 0.0002962879094217549,
"loss": 0.2356,
"step": 1725
},
{
"epoch": 1.055509465268874,
"grad_norm": 0.30931055545806885,
"learning_rate": 0.000296212090578245,
"loss": 0.1747,
"step": 1750
},
{
"epoch": 1.0705935590919375,
"grad_norm": 0.3719274699687958,
"learning_rate": 0.0002961362717347351,
"loss": 0.245,
"step": 1775
},
{
"epoch": 1.0856776529150012,
"grad_norm": 0.28636762499809265,
"learning_rate": 0.0002960604528912252,
"loss": 0.1781,
"step": 1800
},
{
"epoch": 1.1007617467380646,
"grad_norm": 0.27731388807296753,
"learning_rate": 0.0002959846340477153,
"loss": 0.2435,
"step": 1825
},
{
"epoch": 1.1158458405611282,
"grad_norm": 0.315758615732193,
"learning_rate": 0.0002959088152042054,
"loss": 0.1608,
"step": 1850
},
{
"epoch": 1.1309299343841919,
"grad_norm": 0.7471879720687866,
"learning_rate": 0.0002958329963606955,
"loss": 0.2436,
"step": 1875
},
{
"epoch": 1.1460140282072555,
"grad_norm": 0.3493207097053528,
"learning_rate": 0.0002957571775171856,
"loss": 0.165,
"step": 1900
},
{
"epoch": 1.1610981220303191,
"grad_norm": 0.3202393651008606,
"learning_rate": 0.00029568135867367567,
"loss": 0.2448,
"step": 1925
},
{
"epoch": 1.1761822158533826,
"grad_norm": 0.2840123176574707,
"learning_rate": 0.00029560553983016576,
"loss": 0.1583,
"step": 1950
},
{
"epoch": 1.1912663096764462,
"grad_norm": 0.34918013215065,
"learning_rate": 0.00029552972098665584,
"loss": 0.246,
"step": 1975
},
{
"epoch": 1.2063504034995098,
"grad_norm": 0.2470645010471344,
"learning_rate": 0.00029545390214314593,
"loss": 0.1645,
"step": 2000
},
{
"epoch": 1.2063504034995098,
"eval_loss": 0.20230437815189362,
"eval_runtime": 763.4911,
"eval_samples_per_second": 4.342,
"eval_steps_per_second": 1.086,
"eval_wer": 0.22521189872050965,
"step": 2000
},
{
"epoch": 1.2214344973225733,
"grad_norm": 0.5461482405662537,
"learning_rate": 0.00029537808329963607,
"loss": 0.2304,
"step": 2025
},
{
"epoch": 1.236518591145637,
"grad_norm": 0.44666436314582825,
"learning_rate": 0.00029530226445612616,
"loss": 0.1632,
"step": 2050
},
{
"epoch": 1.2516026849687005,
"grad_norm": 0.44556596875190735,
"learning_rate": 0.0002952264456126162,
"loss": 0.252,
"step": 2075
},
{
"epoch": 1.2666867787917642,
"grad_norm": 0.44631072878837585,
"learning_rate": 0.00029515062676910633,
"loss": 0.1795,
"step": 2100
},
{
"epoch": 1.2817708726148276,
"grad_norm": 0.2911534607410431,
"learning_rate": 0.0002950748079255964,
"loss": 0.2458,
"step": 2125
},
{
"epoch": 1.2968549664378912,
"grad_norm": 0.34504878520965576,
"learning_rate": 0.0002949989890820865,
"loss": 0.2024,
"step": 2150
},
{
"epoch": 1.3119390602609549,
"grad_norm": 0.295748770236969,
"learning_rate": 0.000294926202992317,
"loss": 0.6418,
"step": 2175
},
{
"epoch": 1.3270231540840185,
"grad_norm": 0.2935297191143036,
"learning_rate": 0.0002948503841488071,
"loss": 0.1568,
"step": 2200
},
{
"epoch": 1.342107247907082,
"grad_norm": 0.45815935730934143,
"learning_rate": 0.0002947745653052972,
"loss": 0.2491,
"step": 2225
},
{
"epoch": 1.3571913417301456,
"grad_norm": 0.3507382273674011,
"learning_rate": 0.00029469874646178727,
"loss": 0.1683,
"step": 2250
},
{
"epoch": 1.3722754355532092,
"grad_norm": 0.28953784704208374,
"learning_rate": 0.00029462292761827736,
"loss": 0.2649,
"step": 2275
},
{
"epoch": 1.3873595293762726,
"grad_norm": 0.26776325702667236,
"learning_rate": 0.00029454710877476744,
"loss": 0.1511,
"step": 2300
},
{
"epoch": 1.4024436231993362,
"grad_norm": 0.6518787741661072,
"learning_rate": 0.0002944712899312576,
"loss": 0.2438,
"step": 2325
},
{
"epoch": 1.4175277170223999,
"grad_norm": 0.3653862476348877,
"learning_rate": 0.0002943954710877476,
"loss": 0.1677,
"step": 2350
},
{
"epoch": 1.4326118108454635,
"grad_norm": 0.4537375569343567,
"learning_rate": 0.00029431965224423776,
"loss": 0.2619,
"step": 2375
},
{
"epoch": 1.4476959046685272,
"grad_norm": 0.3677208721637726,
"learning_rate": 0.00029424383340072785,
"loss": 0.1564,
"step": 2400
},
{
"epoch": 1.4627799984915906,
"grad_norm": 0.33267661929130554,
"learning_rate": 0.00029416801455721793,
"loss": 0.232,
"step": 2425
},
{
"epoch": 1.4778640923146542,
"grad_norm": 0.32956621050834656,
"learning_rate": 0.000294092195713708,
"loss": 0.1841,
"step": 2450
},
{
"epoch": 1.4929481861377178,
"grad_norm": 0.41168802976608276,
"learning_rate": 0.0002940163768701981,
"loss": 0.2432,
"step": 2475
},
{
"epoch": 1.5080322799607813,
"grad_norm": 0.333852082490921,
"learning_rate": 0.0002939405580266882,
"loss": 0.1818,
"step": 2500
},
{
"epoch": 1.5080322799607813,
"eval_loss": 0.19838476181030273,
"eval_runtime": 747.6476,
"eval_samples_per_second": 4.434,
"eval_steps_per_second": 1.109,
"eval_wer": 0.21765372779787293,
"step": 2500
},
{
"epoch": 1.523116373783845,
"grad_norm": 0.35393306612968445,
"learning_rate": 0.00029386473918317833,
"loss": 0.2496,
"step": 2525
},
{
"epoch": 1.5382004676069085,
"grad_norm": 0.23671747744083405,
"learning_rate": 0.00029378892033966837,
"loss": 0.1641,
"step": 2550
},
{
"epoch": 1.553284561429972,
"grad_norm": 0.49785369634628296,
"learning_rate": 0.0002937131014961585,
"loss": 0.2304,
"step": 2575
},
{
"epoch": 1.5683686552530358,
"grad_norm": 0.7474918365478516,
"learning_rate": 0.0002936372826526486,
"loss": 0.1914,
"step": 2600
},
{
"epoch": 1.5834527490760992,
"grad_norm": 0.3910874128341675,
"learning_rate": 0.0002935614638091387,
"loss": 0.2516,
"step": 2625
},
{
"epoch": 1.5985368428991629,
"grad_norm": 0.33777278661727905,
"learning_rate": 0.00029348564496562877,
"loss": 0.1778,
"step": 2650
},
{
"epoch": 1.6136209367222265,
"grad_norm": 0.39339521527290344,
"learning_rate": 0.00029340982612211886,
"loss": 0.2651,
"step": 2675
},
{
"epoch": 1.62870503054529,
"grad_norm": 0.33642980456352234,
"learning_rate": 0.00029333400727860894,
"loss": 0.1987,
"step": 2700
},
{
"epoch": 1.6437891243683536,
"grad_norm": 0.4382663071155548,
"learning_rate": 0.00029325818843509903,
"loss": 0.3213,
"step": 2725
},
{
"epoch": 1.6588732181914172,
"grad_norm": 0.29682841897010803,
"learning_rate": 0.0002931823695915891,
"loss": 0.1744,
"step": 2750
},
{
"epoch": 1.6739573120144806,
"grad_norm": 0.5017745494842529,
"learning_rate": 0.0002931065507480792,
"loss": 0.2454,
"step": 2775
},
{
"epoch": 1.6890414058375445,
"grad_norm": 0.31300872564315796,
"learning_rate": 0.00029303073190456935,
"loss": 0.1642,
"step": 2800
},
{
"epoch": 1.704125499660608,
"grad_norm": 0.2737913429737091,
"learning_rate": 0.0002929549130610594,
"loss": 0.2304,
"step": 2825
},
{
"epoch": 1.7192095934836713,
"grad_norm": 0.5299367904663086,
"learning_rate": 0.0002928790942175495,
"loss": 0.1446,
"step": 2850
},
{
"epoch": 1.7342936873067352,
"grad_norm": 0.4113737642765045,
"learning_rate": 0.0002928032753740396,
"loss": 0.2751,
"step": 2875
},
{
"epoch": 1.7493777811297986,
"grad_norm": 0.33324816823005676,
"learning_rate": 0.0002927274565305297,
"loss": 0.1635,
"step": 2900
},
{
"epoch": 1.7644618749528622,
"grad_norm": 0.33196279406547546,
"learning_rate": 0.0002926516376870198,
"loss": 0.2426,
"step": 2925
},
{
"epoch": 1.7795459687759259,
"grad_norm": 0.2307615876197815,
"learning_rate": 0.00029257581884350987,
"loss": 0.1548,
"step": 2950
},
{
"epoch": 1.7946300625989893,
"grad_norm": 0.259033739566803,
"learning_rate": 0.00029249999999999995,
"loss": 0.2481,
"step": 2975
},
{
"epoch": 1.809714156422053,
"grad_norm": 0.3014342784881592,
"learning_rate": 0.0002924241811564901,
"loss": 0.1827,
"step": 3000
},
{
"epoch": 1.809714156422053,
"eval_loss": 0.1995258629322052,
"eval_runtime": 720.0351,
"eval_samples_per_second": 4.604,
"eval_steps_per_second": 1.151,
"eval_wer": 0.2203962641040868,
"step": 3000
},
{
"epoch": 1.8247982502451165,
"grad_norm": 0.348734050989151,
"learning_rate": 0.0002923483623129802,
"loss": 0.2435,
"step": 3025
},
{
"epoch": 1.83988234406818,
"grad_norm": 0.38749223947525024,
"learning_rate": 0.00029227254346947027,
"loss": 0.1839,
"step": 3050
},
{
"epoch": 1.8549664378912438,
"grad_norm": 0.3979107141494751,
"learning_rate": 0.00029219672462596036,
"loss": 0.2518,
"step": 3075
},
{
"epoch": 1.8700505317143072,
"grad_norm": 0.2685915231704712,
"learning_rate": 0.00029212090578245044,
"loss": 0.1711,
"step": 3100
},
{
"epoch": 1.8851346255373709,
"grad_norm": 0.2596099078655243,
"learning_rate": 0.00029204508693894053,
"loss": 0.2576,
"step": 3125
},
{
"epoch": 1.9002187193604345,
"grad_norm": 0.26872625946998596,
"learning_rate": 0.0002919692680954306,
"loss": 0.1484,
"step": 3150
},
{
"epoch": 1.915302813183498,
"grad_norm": 0.32843342423439026,
"learning_rate": 0.0002918934492519207,
"loss": 0.2559,
"step": 3175
},
{
"epoch": 1.9303869070065616,
"grad_norm": 0.26869305968284607,
"learning_rate": 0.0002918176304084108,
"loss": 0.1751,
"step": 3200
},
{
"epoch": 1.9454710008296252,
"grad_norm": 0.2810923159122467,
"learning_rate": 0.00029174181156490093,
"loss": 0.2244,
"step": 3225
},
{
"epoch": 1.9605550946526886,
"grad_norm": 0.3540550470352173,
"learning_rate": 0.00029166599272139096,
"loss": 0.1661,
"step": 3250
},
{
"epoch": 1.9756391884757525,
"grad_norm": 0.23880811035633087,
"learning_rate": 0.0002915901738778811,
"loss": 0.2503,
"step": 3275
},
{
"epoch": 1.990723282298816,
"grad_norm": 0.24448829889297485,
"learning_rate": 0.0002915143550343712,
"loss": 0.1501,
"step": 3300
},
{
"epoch": 2.005430273776303,
"grad_norm": 0.931875467300415,
"learning_rate": 0.0002914385361908613,
"loss": 0.228,
"step": 3325
},
{
"epoch": 2.0205143675993664,
"grad_norm": 0.22488653659820557,
"learning_rate": 0.00029136271734735137,
"loss": 0.2308,
"step": 3350
},
{
"epoch": 2.03559846142243,
"grad_norm": 0.33369916677474976,
"learning_rate": 0.00029128689850384145,
"loss": 0.1449,
"step": 3375
},
{
"epoch": 2.0506825552454937,
"grad_norm": 0.28679683804512024,
"learning_rate": 0.00029121107966033154,
"loss": 0.221,
"step": 3400
},
{
"epoch": 2.065766649068557,
"grad_norm": 0.2800401449203491,
"learning_rate": 0.0002911352608168217,
"loss": 0.1486,
"step": 3425
},
{
"epoch": 2.080850742891621,
"grad_norm": 0.36049047112464905,
"learning_rate": 0.0002910594419733117,
"loss": 0.2204,
"step": 3450
},
{
"epoch": 2.0959348367146844,
"grad_norm": 0.39010685682296753,
"learning_rate": 0.00029098362312980186,
"loss": 0.1683,
"step": 3475
},
{
"epoch": 2.111018930537748,
"grad_norm": 0.1714339703321457,
"learning_rate": 0.00029090780428629194,
"loss": 0.1859,
"step": 3500
},
{
"epoch": 2.111018930537748,
"eval_loss": 0.19938839972019196,
"eval_runtime": 719.6016,
"eval_samples_per_second": 4.607,
"eval_steps_per_second": 1.152,
"eval_wer": 0.22404578092101712,
"step": 3500
},
{
"epoch": 2.1261030243608117,
"grad_norm": 0.3100337088108063,
"learning_rate": 0.00029083198544278203,
"loss": 0.159,
"step": 3525
},
{
"epoch": 2.141187118183875,
"grad_norm": 0.26142579317092896,
"learning_rate": 0.0002907561665992721,
"loss": 0.2408,
"step": 3550
},
{
"epoch": 2.1562712120069385,
"grad_norm": 0.4712742269039154,
"learning_rate": 0.0002906803477557622,
"loss": 0.154,
"step": 3575
},
{
"epoch": 2.1713553058300024,
"grad_norm": 0.2582917809486389,
"learning_rate": 0.0002906045289122523,
"loss": 0.2283,
"step": 3600
},
{
"epoch": 2.1864393996530658,
"grad_norm": 0.2900999188423157,
"learning_rate": 0.0002905287100687424,
"loss": 0.1722,
"step": 3625
},
{
"epoch": 2.201523493476129,
"grad_norm": 0.20985634624958038,
"learning_rate": 0.00029045289122523246,
"loss": 0.5346,
"step": 3650
},
{
"epoch": 2.216607587299193,
"grad_norm": 0.32814761996269226,
"learning_rate": 0.00029037707238172255,
"loss": 0.1752,
"step": 3675
},
{
"epoch": 2.2316916811222565,
"grad_norm": 0.22810375690460205,
"learning_rate": 0.0002903012535382127,
"loss": 0.2093,
"step": 3700
},
{
"epoch": 2.2467757749453203,
"grad_norm": 0.4804486334323883,
"learning_rate": 0.0002902254346947027,
"loss": 0.1641,
"step": 3725
},
{
"epoch": 2.2618598687683837,
"grad_norm": 0.2502540051937103,
"learning_rate": 0.00029014961585119287,
"loss": 0.2281,
"step": 3750
},
{
"epoch": 2.276943962591447,
"grad_norm": 0.34445151686668396,
"learning_rate": 0.00029007379700768295,
"loss": 0.1683,
"step": 3775
},
{
"epoch": 2.292028056414511,
"grad_norm": 0.16622678935527802,
"learning_rate": 0.00028999797816417304,
"loss": 0.237,
"step": 3800
},
{
"epoch": 2.3071121502375744,
"grad_norm": 0.45780929923057556,
"learning_rate": 0.00028992215932066313,
"loss": 0.1715,
"step": 3825
},
{
"epoch": 2.3221962440606383,
"grad_norm": 0.3053385615348816,
"learning_rate": 0.0002898463404771532,
"loss": 0.2236,
"step": 3850
},
{
"epoch": 2.3372803378837017,
"grad_norm": 0.5077650547027588,
"learning_rate": 0.0002897705216336433,
"loss": 0.168,
"step": 3875
},
{
"epoch": 2.352364431706765,
"grad_norm": 0.23254072666168213,
"learning_rate": 0.00028969470279013344,
"loss": 0.2139,
"step": 3900
},
{
"epoch": 2.367448525529829,
"grad_norm": 0.5966396927833557,
"learning_rate": 0.00028961888394662353,
"loss": 0.1661,
"step": 3925
},
{
"epoch": 2.3825326193528924,
"grad_norm": 0.2979792058467865,
"learning_rate": 0.0002895430651031136,
"loss": 0.2305,
"step": 3950
},
{
"epoch": 2.397616713175956,
"grad_norm": 0.33389779925346375,
"learning_rate": 0.0002894672462596037,
"loss": 0.1607,
"step": 3975
},
{
"epoch": 2.4127008069990197,
"grad_norm": 0.24503561854362488,
"learning_rate": 0.0002893914274160938,
"loss": 0.2488,
"step": 4000
},
{
"epoch": 2.4127008069990197,
"eval_loss": 0.20397360622882843,
"eval_runtime": 723.2916,
"eval_samples_per_second": 4.583,
"eval_steps_per_second": 1.146,
"eval_wer": 0.2249527614317335,
"step": 4000
},
{
"epoch": 2.427784900822083,
"grad_norm": 0.368486613035202,
"learning_rate": 0.0002893156085725839,
"loss": 0.1815,
"step": 4025
},
{
"epoch": 2.4428689946451465,
"grad_norm": 0.4782474637031555,
"learning_rate": 0.00028923978972907397,
"loss": 0.2391,
"step": 4050
},
{
"epoch": 2.4579530884682104,
"grad_norm": 0.3658357858657837,
"learning_rate": 0.00028916397088556405,
"loss": 0.1652,
"step": 4075
},
{
"epoch": 2.473037182291274,
"grad_norm": 0.27112266421318054,
"learning_rate": 0.00028908815204205414,
"loss": 0.2343,
"step": 4100
},
{
"epoch": 2.488121276114337,
"grad_norm": 0.3050464391708374,
"learning_rate": 0.0002890123331985443,
"loss": 0.1487,
"step": 4125
},
{
"epoch": 2.503205369937401,
"grad_norm": 0.25800853967666626,
"learning_rate": 0.0002889365143550343,
"loss": 0.1978,
"step": 4150
},
{
"epoch": 2.5182894637604645,
"grad_norm": 0.36631202697753906,
"learning_rate": 0.00028886069551152445,
"loss": 0.1668,
"step": 4175
},
{
"epoch": 2.5333735575835283,
"grad_norm": 0.20563054084777832,
"learning_rate": 0.00028878487666801454,
"loss": 0.2048,
"step": 4200
},
{
"epoch": 2.5484576514065918,
"grad_norm": 0.4122121334075928,
"learning_rate": 0.00028870905782450463,
"loss": 0.1579,
"step": 4225
},
{
"epoch": 2.563541745229655,
"grad_norm": 0.27097514271736145,
"learning_rate": 0.0002886332389809947,
"loss": 0.2392,
"step": 4250
},
{
"epoch": 2.578625839052719,
"grad_norm": 0.4393375813961029,
"learning_rate": 0.0002885574201374848,
"loss": 0.1461,
"step": 4275
},
{
"epoch": 2.5937099328757824,
"grad_norm": 0.30553507804870605,
"learning_rate": 0.0002884816012939749,
"loss": 0.2319,
"step": 4300
},
{
"epoch": 2.6087940266988463,
"grad_norm": 0.450008749961853,
"learning_rate": 0.00028840578245046503,
"loss": 0.1482,
"step": 4325
},
{
"epoch": 2.6238781205219097,
"grad_norm": 0.2398987114429474,
"learning_rate": 0.00028832996360695506,
"loss": 0.2193,
"step": 4350
},
{
"epoch": 2.638962214344973,
"grad_norm": 0.2716082036495209,
"learning_rate": 0.0002882541447634452,
"loss": 0.1716,
"step": 4375
},
{
"epoch": 2.654046308168037,
"grad_norm": 0.18016381561756134,
"learning_rate": 0.0002881783259199353,
"loss": 0.2191,
"step": 4400
},
{
"epoch": 2.6691304019911004,
"grad_norm": 0.24146826565265656,
"learning_rate": 0.0002881025070764254,
"loss": 0.1576,
"step": 4425
},
{
"epoch": 2.684214495814164,
"grad_norm": 0.20057305693626404,
"learning_rate": 0.00028802668823291547,
"loss": 0.2222,
"step": 4450
},
{
"epoch": 2.6992985896372277,
"grad_norm": 0.36306390166282654,
"learning_rate": 0.00028795086938940555,
"loss": 0.1662,
"step": 4475
},
{
"epoch": 2.714382683460291,
"grad_norm": 0.2843382954597473,
"learning_rate": 0.00028787505054589564,
"loss": 0.2292,
"step": 4500
},
{
"epoch": 2.714382683460291,
"eval_loss": 0.19914484024047852,
"eval_runtime": 763.4779,
"eval_samples_per_second": 4.342,
"eval_steps_per_second": 1.086,
"eval_wer": 0.22595691842574098,
"step": 4500
},
{
"epoch": 2.7294667772833545,
"grad_norm": 0.5379898548126221,
"learning_rate": 0.0002877992317023858,
"loss": 0.1578,
"step": 4525
},
{
"epoch": 2.7445508711064184,
"grad_norm": 0.18573738634586334,
"learning_rate": 0.0002877234128588758,
"loss": 0.2237,
"step": 4550
},
{
"epoch": 2.759634964929482,
"grad_norm": 0.2831384241580963,
"learning_rate": 0.0002876475940153659,
"loss": 0.1606,
"step": 4575
},
{
"epoch": 2.774719058752545,
"grad_norm": 0.5031735301017761,
"learning_rate": 0.00028757177517185604,
"loss": 0.1805,
"step": 4600
},
{
"epoch": 2.789803152575609,
"grad_norm": 0.768892765045166,
"learning_rate": 0.0002874959563283461,
"loss": 0.157,
"step": 4625
},
{
"epoch": 2.8048872463986725,
"grad_norm": 0.22896036505699158,
"learning_rate": 0.0002874201374848362,
"loss": 0.2106,
"step": 4650
},
{
"epoch": 2.8199713402217363,
"grad_norm": 0.2187754064798355,
"learning_rate": 0.0002873443186413263,
"loss": 0.1605,
"step": 4675
},
{
"epoch": 2.8350554340447998,
"grad_norm": 0.1761629283428192,
"learning_rate": 0.0002872684997978164,
"loss": 0.2186,
"step": 4700
},
{
"epoch": 2.850139527867863,
"grad_norm": 0.3175676167011261,
"learning_rate": 0.0002871926809543065,
"loss": 0.1537,
"step": 4725
},
{
"epoch": 2.865223621690927,
"grad_norm": 0.21728022396564484,
"learning_rate": 0.00028711686211079656,
"loss": 0.2119,
"step": 4750
},
{
"epoch": 2.8803077155139905,
"grad_norm": 0.29456526041030884,
"learning_rate": 0.00028704104326728665,
"loss": 0.1556,
"step": 4775
},
{
"epoch": 2.8953918093370543,
"grad_norm": 0.2309761792421341,
"learning_rate": 0.0002869652244237768,
"loss": 0.1805,
"step": 4800
},
{
"epoch": 2.9104759031601177,
"grad_norm": 0.30479326844215393,
"learning_rate": 0.0002868894055802669,
"loss": 0.1529,
"step": 4825
},
{
"epoch": 2.925559996983181,
"grad_norm": 0.1994062215089798,
"learning_rate": 0.00028681358673675697,
"loss": 0.192,
"step": 4850
},
{
"epoch": 2.940644090806245,
"grad_norm": 0.3960685133934021,
"learning_rate": 0.00028673776789324705,
"loss": 0.1505,
"step": 4875
},
{
"epoch": 2.9557281846293084,
"grad_norm": 0.18159142136573792,
"learning_rate": 0.00028666194904973714,
"loss": 0.1899,
"step": 4900
},
{
"epoch": 2.970812278452372,
"grad_norm": 0.5303758978843689,
"learning_rate": 0.0002865861302062272,
"loss": 0.1899,
"step": 4925
},
{
"epoch": 2.9858963722754357,
"grad_norm": 0.18483863770961761,
"learning_rate": 0.0002865103113627173,
"loss": 0.2267,
"step": 4950
},
{
"epoch": 3.0006033637529224,
"grad_norm": 0.403163343667984,
"learning_rate": 0.0002864344925192074,
"loss": 0.1248,
"step": 4975
},
{
"epoch": 3.015687457575986,
"grad_norm": 0.2946682274341583,
"learning_rate": 0.0002863586736756975,
"loss": 0.2489,
"step": 5000
},
{
"epoch": 3.015687457575986,
"eval_loss": 0.1845189481973648,
"eval_runtime": 767.7712,
"eval_samples_per_second": 4.318,
"eval_steps_per_second": 1.08,
"eval_wer": 0.21365869459590778,
"step": 5000
},
{
"epoch": 3.0307715513990496,
"grad_norm": 0.36335453391075134,
"learning_rate": 0.00028628285483218763,
"loss": 0.0881,
"step": 5025
},
{
"epoch": 3.0458556452221135,
"grad_norm": 0.3755040764808655,
"learning_rate": 0.00028620703598867766,
"loss": 0.2487,
"step": 5050
},
{
"epoch": 3.060939739045177,
"grad_norm": 0.38198986649513245,
"learning_rate": 0.0002861312171451678,
"loss": 0.0799,
"step": 5075
},
{
"epoch": 3.0760238328682403,
"grad_norm": 0.17410704493522644,
"learning_rate": 0.0002860553983016579,
"loss": 0.2627,
"step": 5100
},
{
"epoch": 3.091107926691304,
"grad_norm": 0.6871060132980347,
"learning_rate": 0.000285979579458148,
"loss": 0.0967,
"step": 5125
},
{
"epoch": 3.1061920205143676,
"grad_norm": 0.40587660670280457,
"learning_rate": 0.00028590376061463806,
"loss": 0.2739,
"step": 5150
},
{
"epoch": 3.121276114337431,
"grad_norm": 0.5106558799743652,
"learning_rate": 0.00028582794177112815,
"loss": 0.0817,
"step": 5175
},
{
"epoch": 3.136360208160495,
"grad_norm": 0.5959634184837341,
"learning_rate": 0.00028575212292761824,
"loss": 0.2631,
"step": 5200
},
{
"epoch": 3.1514443019835583,
"grad_norm": 0.32283464074134827,
"learning_rate": 0.0002856763040841084,
"loss": 0.0805,
"step": 5225
},
{
"epoch": 3.1665283958066217,
"grad_norm": 0.4039979577064514,
"learning_rate": 0.0002856004852405984,
"loss": 0.2601,
"step": 5250
},
{
"epoch": 3.1816124896296856,
"grad_norm": 0.9042678475379944,
"learning_rate": 0.00028552466639708855,
"loss": 0.0845,
"step": 5275
},
{
"epoch": 3.196696583452749,
"grad_norm": 0.28807753324508667,
"learning_rate": 0.00028544884755357864,
"loss": 0.2692,
"step": 5300
},
{
"epoch": 3.211780677275813,
"grad_norm": 0.2866358458995819,
"learning_rate": 0.0002853730287100687,
"loss": 0.081,
"step": 5325
},
{
"epoch": 3.2268647710988763,
"grad_norm": 0.2690901756286621,
"learning_rate": 0.0002852972098665588,
"loss": 0.2499,
"step": 5350
},
{
"epoch": 3.2419488649219397,
"grad_norm": 0.21377098560333252,
"learning_rate": 0.0002852213910230489,
"loss": 0.0802,
"step": 5375
},
{
"epoch": 3.2570329587450035,
"grad_norm": 0.3079901933670044,
"learning_rate": 0.000285145572179539,
"loss": 0.2314,
"step": 5400
},
{
"epoch": 3.272117052568067,
"grad_norm": 0.47138121724128723,
"learning_rate": 0.00028506975333602913,
"loss": 0.0883,
"step": 5425
},
{
"epoch": 3.2872011463911304,
"grad_norm": 0.3404664695262909,
"learning_rate": 0.00028499393449251916,
"loss": 0.225,
"step": 5450
},
{
"epoch": 3.3022852402141942,
"grad_norm": 0.3051985800266266,
"learning_rate": 0.00028491811564900925,
"loss": 0.0757,
"step": 5475
},
{
"epoch": 3.3173693340372576,
"grad_norm": 0.27657219767570496,
"learning_rate": 0.0002848453295592398,
"loss": 0.4731,
"step": 5500
},
{
"epoch": 3.3173693340372576,
"eval_loss": 0.185445174574852,
"eval_runtime": 756.69,
"eval_samples_per_second": 4.381,
"eval_steps_per_second": 1.096,
"eval_wer": 0.21210387086325108,
"step": 5500
},
{
"epoch": 3.3324534278603215,
"grad_norm": 0.305276095867157,
"learning_rate": 0.00028476951071572984,
"loss": 0.0804,
"step": 5525
},
{
"epoch": 3.347537521683385,
"grad_norm": 0.23126491904258728,
"learning_rate": 0.00028469369187222,
"loss": 0.2612,
"step": 5550
},
{
"epoch": 3.3626216155064483,
"grad_norm": 0.29387226700782776,
"learning_rate": 0.00028461787302871006,
"loss": 0.085,
"step": 5575
},
{
"epoch": 3.377705709329512,
"grad_norm": 0.7246792912483215,
"learning_rate": 0.00028454205418520015,
"loss": 0.2898,
"step": 5600
},
{
"epoch": 3.3927898031525756,
"grad_norm": 0.42687901854515076,
"learning_rate": 0.00028446623534169024,
"loss": 0.101,
"step": 5625
},
{
"epoch": 3.407873896975639,
"grad_norm": 0.2322624772787094,
"learning_rate": 0.0002843904164981803,
"loss": 0.2543,
"step": 5650
},
{
"epoch": 3.422957990798703,
"grad_norm": 0.49676916003227234,
"learning_rate": 0.0002843145976546704,
"loss": 0.0832,
"step": 5675
},
{
"epoch": 3.4380420846217663,
"grad_norm": 0.42384791374206543,
"learning_rate": 0.0002842387788111605,
"loss": 0.2517,
"step": 5700
},
{
"epoch": 3.4531261784448297,
"grad_norm": 0.48723557591438293,
"learning_rate": 0.0002841629599676506,
"loss": 0.0974,
"step": 5725
},
{
"epoch": 3.4682102722678936,
"grad_norm": 1.3422971963882446,
"learning_rate": 0.00028408714112414067,
"loss": 0.2576,
"step": 5750
},
{
"epoch": 3.483294366090957,
"grad_norm": 0.5135483145713806,
"learning_rate": 0.0002840113222806308,
"loss": 0.0904,
"step": 5775
},
{
"epoch": 3.498378459914021,
"grad_norm": 0.1537868082523346,
"learning_rate": 0.00028393550343712085,
"loss": 0.2522,
"step": 5800
},
{
"epoch": 3.5134625537370843,
"grad_norm": 0.6072225570678711,
"learning_rate": 0.000283859684593611,
"loss": 0.1021,
"step": 5825
},
{
"epoch": 3.5285466475601477,
"grad_norm": 0.2713959813117981,
"learning_rate": 0.0002837838657501011,
"loss": 0.2608,
"step": 5850
},
{
"epoch": 3.5436307413832115,
"grad_norm": 0.4154447019100189,
"learning_rate": 0.00028370804690659116,
"loss": 0.0876,
"step": 5875
},
{
"epoch": 3.558714835206275,
"grad_norm": 0.197547048330307,
"learning_rate": 0.00028363222806308125,
"loss": 0.2415,
"step": 5900
},
{
"epoch": 3.573798929029339,
"grad_norm": 0.27300530672073364,
"learning_rate": 0.00028355640921957134,
"loss": 0.085,
"step": 5925
},
{
"epoch": 3.5888830228524022,
"grad_norm": 0.22969838976860046,
"learning_rate": 0.0002834805903760614,
"loss": 0.2526,
"step": 5950
},
{
"epoch": 3.6039671166754657,
"grad_norm": 0.22905798256397247,
"learning_rate": 0.00028340477153255156,
"loss": 0.0958,
"step": 5975
},
{
"epoch": 3.6190512104985295,
"grad_norm": 0.13335692882537842,
"learning_rate": 0.0002833289526890416,
"loss": 0.2497,
"step": 6000
},
{
"epoch": 3.6190512104985295,
"eval_loss": 0.18878242373466492,
"eval_runtime": 739.3999,
"eval_samples_per_second": 4.483,
"eval_steps_per_second": 1.121,
"eval_wer": 0.21019273335852723,
"step": 6000
},
{
"epoch": 3.634135304321593,
"grad_norm": 1.2492620944976807,
"learning_rate": 0.00028325313384553174,
"loss": 0.0951,
"step": 6025
},
{
"epoch": 3.6492193981446563,
"grad_norm": 0.19799137115478516,
"learning_rate": 0.0002831773150020218,
"loss": 0.2914,
"step": 6050
},
{
"epoch": 3.66430349196772,
"grad_norm": 0.33943161368370056,
"learning_rate": 0.0002831014961585119,
"loss": 0.0908,
"step": 6075
},
{
"epoch": 3.6793875857907836,
"grad_norm": 0.6077541708946228,
"learning_rate": 0.000283025677315002,
"loss": 0.2635,
"step": 6100
},
{
"epoch": 3.694471679613847,
"grad_norm": 0.4308757781982422,
"learning_rate": 0.0002829498584714921,
"loss": 0.1054,
"step": 6125
},
{
"epoch": 3.709555773436911,
"grad_norm": 0.27450037002563477,
"learning_rate": 0.00028287403962798217,
"loss": 0.2521,
"step": 6150
},
{
"epoch": 3.7246398672599743,
"grad_norm": 0.41612792015075684,
"learning_rate": 0.00028279822078447226,
"loss": 0.0969,
"step": 6175
},
{
"epoch": 3.7397239610830377,
"grad_norm": 0.818908154964447,
"learning_rate": 0.00028272240194096235,
"loss": 0.2574,
"step": 6200
},
{
"epoch": 3.7548080549061016,
"grad_norm": 0.3212096393108368,
"learning_rate": 0.00028264658309745243,
"loss": 0.0874,
"step": 6225
},
{
"epoch": 3.769892148729165,
"grad_norm": 0.4317522644996643,
"learning_rate": 0.0002825707642539426,
"loss": 0.2812,
"step": 6250
},
{
"epoch": 3.7849762425522284,
"grad_norm": 0.3905152976512909,
"learning_rate": 0.00028249494541043266,
"loss": 0.0903,
"step": 6275
},
{
"epoch": 3.8000603363752923,
"grad_norm": 0.22848589718341827,
"learning_rate": 0.00028241912656692275,
"loss": 0.292,
"step": 6300
},
{
"epoch": 3.8151444301983557,
"grad_norm": 0.645900547504425,
"learning_rate": 0.00028234330772341284,
"loss": 0.0863,
"step": 6325
},
{
"epoch": 3.8302285240214196,
"grad_norm": 0.3289954960346222,
"learning_rate": 0.0002822674888799029,
"loss": 0.2798,
"step": 6350
},
{
"epoch": 3.845312617844483,
"grad_norm": 0.3951515853404999,
"learning_rate": 0.000282191670036393,
"loss": 0.0894,
"step": 6375
},
{
"epoch": 3.860396711667547,
"grad_norm": 0.23933938145637512,
"learning_rate": 0.00028211585119288315,
"loss": 0.2836,
"step": 6400
},
{
"epoch": 3.8754808054906102,
"grad_norm": 0.3123800456523895,
"learning_rate": 0.0002820400323493732,
"loss": 0.0841,
"step": 6425
},
{
"epoch": 3.8905648993136737,
"grad_norm": 0.2620724141597748,
"learning_rate": 0.0002819642135058633,
"loss": 0.2695,
"step": 6450
},
{
"epoch": 3.9056489931367375,
"grad_norm": 0.3551441431045532,
"learning_rate": 0.0002818883946623534,
"loss": 0.0887,
"step": 6475
},
{
"epoch": 3.920733086959801,
"grad_norm": 0.2354227900505066,
"learning_rate": 0.0002818125758188435,
"loss": 0.2872,
"step": 6500
},
{
"epoch": 3.920733086959801,
"eval_loss": 0.191518634557724,
"eval_runtime": 727.3001,
"eval_samples_per_second": 4.558,
"eval_steps_per_second": 1.14,
"eval_wer": 0.21967283917292016,
"step": 6500
},
{
"epoch": 3.9358171807828644,
"grad_norm": 0.4430249333381653,
"learning_rate": 0.0002817367569753336,
"loss": 0.0916,
"step": 6525
},
{
"epoch": 3.950901274605928,
"grad_norm": 0.3285467326641083,
"learning_rate": 0.00028166093813182367,
"loss": 0.2646,
"step": 6550
},
{
"epoch": 3.9659853684289916,
"grad_norm": 0.27407094836235046,
"learning_rate": 0.00028158511928831376,
"loss": 0.0852,
"step": 6575
},
{
"epoch": 3.981069462252055,
"grad_norm": 0.25445982813835144,
"learning_rate": 0.00028150930044480385,
"loss": 0.2673,
"step": 6600
},
{
"epoch": 3.996153556075119,
"grad_norm": 0.4180232882499695,
"learning_rate": 0.00028143348160129393,
"loss": 0.0804,
"step": 6625
},
{
"epoch": 4.010860547552606,
"grad_norm": 0.42869091033935547,
"learning_rate": 0.000281357662757784,
"loss": 0.2138,
"step": 6650
},
{
"epoch": 4.025944641375669,
"grad_norm": 0.1404862254858017,
"learning_rate": 0.00028128184391427416,
"loss": 0.1138,
"step": 6675
},
{
"epoch": 4.041028735198733,
"grad_norm": 0.3522794246673584,
"learning_rate": 0.0002812060250707642,
"loss": 0.4489,
"step": 6700
},
{
"epoch": 4.056112829021797,
"grad_norm": 0.24887488782405853,
"learning_rate": 0.00028113020622725434,
"loss": 0.1131,
"step": 6725
},
{
"epoch": 4.07119692284486,
"grad_norm": 0.45187073945999146,
"learning_rate": 0.0002810543873837444,
"loss": 0.1959,
"step": 6750
},
{
"epoch": 4.0862810166679235,
"grad_norm": 0.30940550565719604,
"learning_rate": 0.0002809785685402345,
"loss": 0.1017,
"step": 6775
},
{
"epoch": 4.101365110490987,
"grad_norm": 0.43294191360473633,
"learning_rate": 0.0002809027496967246,
"loss": 0.197,
"step": 6800
},
{
"epoch": 4.116449204314051,
"grad_norm": 0.32438719272613525,
"learning_rate": 0.0002808269308532147,
"loss": 0.1136,
"step": 6825
},
{
"epoch": 4.131533298137114,
"grad_norm": 0.38587555289268494,
"learning_rate": 0.00028075111200970477,
"loss": 0.1977,
"step": 6850
},
{
"epoch": 4.146617391960178,
"grad_norm": 0.2455216646194458,
"learning_rate": 0.0002806752931661949,
"loss": 0.1072,
"step": 6875
},
{
"epoch": 4.161701485783242,
"grad_norm": 0.356931209564209,
"learning_rate": 0.00028059947432268494,
"loss": 0.2118,
"step": 6900
},
{
"epoch": 4.176785579606305,
"grad_norm": 0.8069139719009399,
"learning_rate": 0.0002805236554791751,
"loss": 0.1282,
"step": 6925
},
{
"epoch": 4.191869673429369,
"grad_norm": 0.27101409435272217,
"learning_rate": 0.00028044783663566517,
"loss": 0.226,
"step": 6950
},
{
"epoch": 4.206953767252433,
"grad_norm": 0.2601521909236908,
"learning_rate": 0.00028037201779215526,
"loss": 0.1372,
"step": 6975
},
{
"epoch": 4.222037861075496,
"grad_norm": 0.5733149647712708,
"learning_rate": 0.00028029619894864535,
"loss": 0.248,
"step": 7000
},
{
"epoch": 4.222037861075496,
"eval_loss": 0.19203022122383118,
"eval_runtime": 763.6876,
"eval_samples_per_second": 4.341,
"eval_steps_per_second": 1.086,
"eval_wer": 0.21993197646169627,
"step": 7000
},
{
"epoch": 4.2371219548985595,
"grad_norm": 0.34575146436691284,
"learning_rate": 0.00028022038010513543,
"loss": 0.1382,
"step": 7025
},
{
"epoch": 4.252206048721623,
"grad_norm": 0.40568241477012634,
"learning_rate": 0.0002801445612616255,
"loss": 0.2228,
"step": 7050
},
{
"epoch": 4.267290142544686,
"grad_norm": 0.2740955054759979,
"learning_rate": 0.0002800687424181156,
"loss": 0.1264,
"step": 7075
},
{
"epoch": 4.28237423636775,
"grad_norm": 0.33927688002586365,
"learning_rate": 0.0002799929235746057,
"loss": 0.2267,
"step": 7100
},
{
"epoch": 4.297458330190814,
"grad_norm": 0.27805888652801514,
"learning_rate": 0.0002799171047310958,
"loss": 0.1333,
"step": 7125
},
{
"epoch": 4.312542424013877,
"grad_norm": 0.36450818181037903,
"learning_rate": 0.0002798412858875859,
"loss": 0.1916,
"step": 7150
},
{
"epoch": 4.327626517836941,
"grad_norm": 0.1994234174489975,
"learning_rate": 0.000279765467044076,
"loss": 0.1451,
"step": 7175
},
{
"epoch": 4.342710611660005,
"grad_norm": 0.3419537842273712,
"learning_rate": 0.0002796896482005661,
"loss": 0.2135,
"step": 7200
},
{
"epoch": 4.357794705483068,
"grad_norm": 0.252347856760025,
"learning_rate": 0.0002796138293570562,
"loss": 0.1289,
"step": 7225
},
{
"epoch": 4.3728787993061315,
"grad_norm": 0.3467054069042206,
"learning_rate": 0.00027953801051354627,
"loss": 0.2341,
"step": 7250
},
{
"epoch": 4.387962893129195,
"grad_norm": 0.24314385652542114,
"learning_rate": 0.00027946219167003636,
"loss": 0.1246,
"step": 7275
},
{
"epoch": 4.403046986952258,
"grad_norm": 0.35150381922721863,
"learning_rate": 0.0002793863728265265,
"loss": 0.2159,
"step": 7300
},
{
"epoch": 4.418131080775322,
"grad_norm": 0.44021642208099365,
"learning_rate": 0.00027931055398301653,
"loss": 0.1256,
"step": 7325
},
{
"epoch": 4.433215174598386,
"grad_norm": 0.33157458901405334,
"learning_rate": 0.00027923473513950667,
"loss": 0.2206,
"step": 7350
},
{
"epoch": 4.44829926842145,
"grad_norm": 0.292894184589386,
"learning_rate": 0.00027915891629599676,
"loss": 0.1199,
"step": 7375
},
{
"epoch": 4.463383362244513,
"grad_norm": 0.5214021801948547,
"learning_rate": 0.00027908309745248685,
"loss": 0.1855,
"step": 7400
},
{
"epoch": 4.478467456067577,
"grad_norm": 0.22543422877788544,
"learning_rate": 0.00027900727860897693,
"loss": 0.1206,
"step": 7425
},
{
"epoch": 4.493551549890641,
"grad_norm": 0.33008188009262085,
"learning_rate": 0.000278931459765467,
"loss": 0.2273,
"step": 7450
},
{
"epoch": 4.508635643713704,
"grad_norm": 0.25787121057510376,
"learning_rate": 0.0002788556409219571,
"loss": 0.1044,
"step": 7475
},
{
"epoch": 4.5237197375367675,
"grad_norm": 0.26990142464637756,
"learning_rate": 0.0002787798220784472,
"loss": 0.2048,
"step": 7500
},
{
"epoch": 4.5237197375367675,
"eval_loss": 0.1822061687707901,
"eval_runtime": 772.5305,
"eval_samples_per_second": 4.291,
"eval_steps_per_second": 1.073,
"eval_wer": 0.20550666738649248,
"step": 7500
},
{
"epoch": 4.538803831359831,
"grad_norm": 0.2780537009239197,
"learning_rate": 0.0002787040032349373,
"loss": 0.1367,
"step": 7525
},
{
"epoch": 4.553887925182894,
"grad_norm": 0.2901013493537903,
"learning_rate": 0.00027862818439142737,
"loss": 0.2171,
"step": 7550
},
{
"epoch": 4.568972019005958,
"grad_norm": 0.7013656497001648,
"learning_rate": 0.0002785523655479175,
"loss": 0.1269,
"step": 7575
},
{
"epoch": 4.584056112829022,
"grad_norm": 0.4125453531742096,
"learning_rate": 0.00027847654670440754,
"loss": 0.2193,
"step": 7600
},
{
"epoch": 4.599140206652085,
"grad_norm": 0.17244857549667358,
"learning_rate": 0.0002784007278608977,
"loss": 0.1334,
"step": 7625
},
{
"epoch": 4.614224300475149,
"grad_norm": 0.23291832208633423,
"learning_rate": 0.00027832490901738777,
"loss": 0.212,
"step": 7650
},
{
"epoch": 4.629308394298213,
"grad_norm": 0.20680192112922668,
"learning_rate": 0.00027824909017387786,
"loss": 0.1127,
"step": 7675
},
{
"epoch": 4.644392488121277,
"grad_norm": 0.3413441777229309,
"learning_rate": 0.00027817327133036794,
"loss": 0.1931,
"step": 7700
},
{
"epoch": 4.65947658194434,
"grad_norm": 0.24499134719371796,
"learning_rate": 0.00027809745248685803,
"loss": 0.1148,
"step": 7725
},
{
"epoch": 4.674560675767403,
"grad_norm": 0.4222862720489502,
"learning_rate": 0.0002780216336433481,
"loss": 0.2316,
"step": 7750
},
{
"epoch": 4.689644769590467,
"grad_norm": 0.190853551030159,
"learning_rate": 0.00027794581479983826,
"loss": 0.1408,
"step": 7775
},
{
"epoch": 4.70472886341353,
"grad_norm": 0.58743816614151,
"learning_rate": 0.0002778699959563283,
"loss": 0.1865,
"step": 7800
},
{
"epoch": 4.719812957236594,
"grad_norm": 0.19071631133556366,
"learning_rate": 0.00027779417711281843,
"loss": 0.1304,
"step": 7825
},
{
"epoch": 4.734897051059658,
"grad_norm": 0.7743087410926819,
"learning_rate": 0.0002777183582693085,
"loss": 0.203,
"step": 7850
},
{
"epoch": 4.749981144882721,
"grad_norm": 0.21871432662010193,
"learning_rate": 0.0002776425394257986,
"loss": 0.099,
"step": 7875
},
{
"epoch": 4.765065238705785,
"grad_norm": 0.3248477280139923,
"learning_rate": 0.0002775667205822887,
"loss": 0.2121,
"step": 7900
},
{
"epoch": 4.780149332528849,
"grad_norm": 0.21200844645500183,
"learning_rate": 0.0002774909017387788,
"loss": 0.126,
"step": 7925
},
{
"epoch": 4.795233426351912,
"grad_norm": 0.6078742742538452,
"learning_rate": 0.00027741508289526887,
"loss": 0.2062,
"step": 7950
},
{
"epoch": 4.8103175201749755,
"grad_norm": 0.260425865650177,
"learning_rate": 0.00027733926405175896,
"loss": 0.1122,
"step": 7975
},
{
"epoch": 4.825401613998039,
"grad_norm": 0.4660604000091553,
"learning_rate": 0.00027726344520824904,
"loss": 0.1977,
"step": 8000
},
{
"epoch": 4.825401613998039,
"eval_loss": 0.1850125789642334,
"eval_runtime": 772.6249,
"eval_samples_per_second": 4.291,
"eval_steps_per_second": 1.073,
"eval_wer": 0.2094477136532959,
"step": 8000
},
{
"epoch": 4.840485707821102,
"grad_norm": 0.2847197949886322,
"learning_rate": 0.00027718762636473913,
"loss": 0.1246,
"step": 8025
},
{
"epoch": 4.855569801644166,
"grad_norm": 0.39191052317619324,
"learning_rate": 0.00027711180752122927,
"loss": 0.1994,
"step": 8050
},
{
"epoch": 4.87065389546723,
"grad_norm": 0.19209399819374084,
"learning_rate": 0.00027703598867771936,
"loss": 0.1233,
"step": 8075
},
{
"epoch": 4.885737989290293,
"grad_norm": 0.4371297359466553,
"learning_rate": 0.00027696016983420944,
"loss": 0.234,
"step": 8100
},
{
"epoch": 4.900822083113357,
"grad_norm": 0.25683364272117615,
"learning_rate": 0.00027688435099069953,
"loss": 0.1182,
"step": 8125
},
{
"epoch": 4.915906176936421,
"grad_norm": 0.42062830924987793,
"learning_rate": 0.0002768085321471896,
"loss": 0.2268,
"step": 8150
},
{
"epoch": 4.930990270759484,
"grad_norm": 0.28874415159225464,
"learning_rate": 0.0002767327133036797,
"loss": 0.1134,
"step": 8175
},
{
"epoch": 4.946074364582548,
"grad_norm": 0.28938642144203186,
"learning_rate": 0.00027665689446016985,
"loss": 0.2109,
"step": 8200
},
{
"epoch": 4.961158458405611,
"grad_norm": 0.16918736696243286,
"learning_rate": 0.0002765810756166599,
"loss": 0.1132,
"step": 8225
},
{
"epoch": 4.976242552228674,
"grad_norm": 0.3270578682422638,
"learning_rate": 0.00027650525677315,
"loss": 0.2191,
"step": 8250
},
{
"epoch": 4.991326646051738,
"grad_norm": 0.8475662469863892,
"learning_rate": 0.0002764294379296401,
"loss": 0.146,
"step": 8275
},
{
"epoch": 5.006033637529225,
"grad_norm": 0.3081296980381012,
"learning_rate": 0.0002763536190861302,
"loss": 0.2,
"step": 8300
},
{
"epoch": 5.021117731352289,
"grad_norm": 0.17431728541851044,
"learning_rate": 0.0002762778002426203,
"loss": 0.17,
"step": 8325
},
{
"epoch": 5.036201825175352,
"grad_norm": 0.40710902214050293,
"learning_rate": 0.00027620198139911037,
"loss": 0.1504,
"step": 8350
},
{
"epoch": 5.051285918998416,
"grad_norm": 0.19659915566444397,
"learning_rate": 0.00027612616255560046,
"loss": 0.1745,
"step": 8375
},
{
"epoch": 5.06637001282148,
"grad_norm": 0.3791191577911377,
"learning_rate": 0.00027605034371209054,
"loss": 0.1248,
"step": 8400
},
{
"epoch": 5.081454106644543,
"grad_norm": 0.1905103623867035,
"learning_rate": 0.00027597452486858063,
"loss": 0.1933,
"step": 8425
},
{
"epoch": 5.096538200467607,
"grad_norm": 0.28806644678115845,
"learning_rate": 0.0002758987060250707,
"loss": 0.1295,
"step": 8450
},
{
"epoch": 5.111622294290671,
"grad_norm": 0.34333837032318115,
"learning_rate": 0.00027582288718156086,
"loss": 0.1778,
"step": 8475
},
{
"epoch": 5.1267063881137345,
"grad_norm": 0.5322638154029846,
"learning_rate": 0.0002757470683380509,
"loss": 0.1459,
"step": 8500
},
{
"epoch": 5.1267063881137345,
"eval_loss": 0.1905168741941452,
"eval_runtime": 772.0187,
"eval_samples_per_second": 4.294,
"eval_steps_per_second": 1.074,
"eval_wer": 0.21526750526372618,
"step": 8500
},
{
"epoch": 5.141790481936797,
"grad_norm": 0.39731988310813904,
"learning_rate": 0.00027567124949454103,
"loss": 0.1731,
"step": 8525
},
{
"epoch": 5.156874575759861,
"grad_norm": 0.760844349861145,
"learning_rate": 0.0002755954306510311,
"loss": 0.1466,
"step": 8550
},
{
"epoch": 5.171958669582925,
"grad_norm": 0.8529795408248901,
"learning_rate": 0.0002755196118075212,
"loss": 0.1737,
"step": 8575
},
{
"epoch": 5.187042763405988,
"grad_norm": 0.45475590229034424,
"learning_rate": 0.0002754437929640113,
"loss": 0.1452,
"step": 8600
},
{
"epoch": 5.202126857229052,
"grad_norm": 0.26215818524360657,
"learning_rate": 0.0002753710068742418,
"loss": 0.4218,
"step": 8625
},
{
"epoch": 5.217210951052116,
"grad_norm": 0.38315796852111816,
"learning_rate": 0.0002752951880307319,
"loss": 0.1261,
"step": 8650
},
{
"epoch": 5.232295044875179,
"grad_norm": 0.41056641936302185,
"learning_rate": 0.00027521936918722197,
"loss": 0.162,
"step": 8675
},
{
"epoch": 5.247379138698243,
"grad_norm": 0.22341406345367432,
"learning_rate": 0.00027514355034371205,
"loss": 0.1305,
"step": 8700
},
{
"epoch": 5.2624632325213065,
"grad_norm": 0.2595282793045044,
"learning_rate": 0.00027506773150020214,
"loss": 0.1655,
"step": 8725
},
{
"epoch": 5.2775473263443695,
"grad_norm": 0.2871881127357483,
"learning_rate": 0.0002749919126566923,
"loss": 0.1347,
"step": 8750
},
{
"epoch": 5.292631420167433,
"grad_norm": 0.17933253943920135,
"learning_rate": 0.0002749160938131823,
"loss": 0.1715,
"step": 8775
},
{
"epoch": 5.307715513990497,
"grad_norm": 0.5562332272529602,
"learning_rate": 0.00027484027496967246,
"loss": 0.1298,
"step": 8800
},
{
"epoch": 5.32279960781356,
"grad_norm": 0.2567090392112732,
"learning_rate": 0.00027476445612616254,
"loss": 0.1673,
"step": 8825
},
{
"epoch": 5.337883701636624,
"grad_norm": 0.3126806914806366,
"learning_rate": 0.00027468863728265263,
"loss": 0.126,
"step": 8850
},
{
"epoch": 5.352967795459688,
"grad_norm": 0.16551165282726288,
"learning_rate": 0.0002746128184391427,
"loss": 0.1633,
"step": 8875
},
{
"epoch": 5.368051889282752,
"grad_norm": 0.337634414434433,
"learning_rate": 0.0002745369995956328,
"loss": 0.1204,
"step": 8900
},
{
"epoch": 5.383135983105815,
"grad_norm": 0.36297717690467834,
"learning_rate": 0.0002744611807521229,
"loss": 0.1927,
"step": 8925
},
{
"epoch": 5.398220076928879,
"grad_norm": 0.3347633481025696,
"learning_rate": 0.00027438536190861303,
"loss": 0.1434,
"step": 8950
},
{
"epoch": 5.4133041707519425,
"grad_norm": 0.2264309823513031,
"learning_rate": 0.00027430954306510307,
"loss": 0.2008,
"step": 8975
},
{
"epoch": 5.4283882645750055,
"grad_norm": 0.263372004032135,
"learning_rate": 0.0002742337242215932,
"loss": 0.1471,
"step": 9000
},
{
"epoch": 5.4283882645750055,
"eval_loss": 0.1863842010498047,
"eval_runtime": 767.0229,
"eval_samples_per_second": 4.322,
"eval_steps_per_second": 1.081,
"eval_wer": 0.20231064082492037,
"step": 9000
},
{
"epoch": 5.443472358398069,
"grad_norm": 0.20563261210918427,
"learning_rate": 0.0002741579053780833,
"loss": 0.1721,
"step": 9025
},
{
"epoch": 5.458556452221133,
"grad_norm": 0.26344984769821167,
"learning_rate": 0.0002740820865345733,
"loss": 0.1332,
"step": 9050
},
{
"epoch": 5.473640546044196,
"grad_norm": 0.2505332827568054,
"learning_rate": 0.00027400626769106347,
"loss": 0.1638,
"step": 9075
},
{
"epoch": 5.48872463986726,
"grad_norm": 0.5164260864257812,
"learning_rate": 0.00027393044884755355,
"loss": 0.1311,
"step": 9100
},
{
"epoch": 5.503808733690324,
"grad_norm": 0.15487757325172424,
"learning_rate": 0.00027385463000404364,
"loss": 0.1715,
"step": 9125
},
{
"epoch": 5.518892827513387,
"grad_norm": 0.46957388520240784,
"learning_rate": 0.00027377881116053373,
"loss": 0.1392,
"step": 9150
},
{
"epoch": 5.533976921336451,
"grad_norm": 0.20295724272727966,
"learning_rate": 0.0002737029923170238,
"loss": 0.2009,
"step": 9175
},
{
"epoch": 5.5490610151595146,
"grad_norm": 0.27826988697052,
"learning_rate": 0.0002736271734735139,
"loss": 0.1404,
"step": 9200
},
{
"epoch": 5.5641451089825775,
"grad_norm": 0.2126350849866867,
"learning_rate": 0.00027355135463000404,
"loss": 0.1784,
"step": 9225
},
{
"epoch": 5.579229202805641,
"grad_norm": 0.4118373692035675,
"learning_rate": 0.00027347553578649413,
"loss": 0.1309,
"step": 9250
},
{
"epoch": 5.594313296628705,
"grad_norm": 0.16608783602714539,
"learning_rate": 0.0002733997169429842,
"loss": 0.1532,
"step": 9275
},
{
"epoch": 5.609397390451768,
"grad_norm": 0.3244372308254242,
"learning_rate": 0.0002733238980994743,
"loss": 0.124,
"step": 9300
},
{
"epoch": 5.624481484274832,
"grad_norm": 0.2650703489780426,
"learning_rate": 0.0002732480792559644,
"loss": 0.1924,
"step": 9325
},
{
"epoch": 5.639565578097896,
"grad_norm": 0.4252435564994812,
"learning_rate": 0.0002731722604124545,
"loss": 0.1336,
"step": 9350
},
{
"epoch": 5.654649671920959,
"grad_norm": 0.18568560481071472,
"learning_rate": 0.00027309644156894457,
"loss": 0.167,
"step": 9375
},
{
"epoch": 5.669733765744023,
"grad_norm": 0.30477166175842285,
"learning_rate": 0.00027302062272543465,
"loss": 0.128,
"step": 9400
},
{
"epoch": 5.684817859567087,
"grad_norm": 0.19481921195983887,
"learning_rate": 0.0002729448038819248,
"loss": 0.1736,
"step": 9425
},
{
"epoch": 5.69990195339015,
"grad_norm": 0.5762272477149963,
"learning_rate": 0.0002728689850384149,
"loss": 0.1353,
"step": 9450
},
{
"epoch": 5.7149860472132135,
"grad_norm": 0.9085726141929626,
"learning_rate": 0.00027279316619490497,
"loss": 0.1679,
"step": 9475
},
{
"epoch": 5.730070141036277,
"grad_norm": 0.5637058615684509,
"learning_rate": 0.00027271734735139505,
"loss": 0.1528,
"step": 9500
},
{
"epoch": 5.730070141036277,
"eval_loss": 0.1828906089067459,
"eval_runtime": 770.1353,
"eval_samples_per_second": 4.304,
"eval_steps_per_second": 1.076,
"eval_wer": 0.208940236462776,
"step": 9500
},
{
"epoch": 5.745154234859341,
"grad_norm": 0.1850712150335312,
"learning_rate": 0.00027264152850788514,
"loss": 0.1782,
"step": 9525
},
{
"epoch": 5.760238328682404,
"grad_norm": 0.4733022153377533,
"learning_rate": 0.00027256570966437523,
"loss": 0.136,
"step": 9550
},
{
"epoch": 5.775322422505468,
"grad_norm": 0.2352697253227234,
"learning_rate": 0.0002724898908208653,
"loss": 0.1636,
"step": 9575
},
{
"epoch": 5.790406516328532,
"grad_norm": 0.297294557094574,
"learning_rate": 0.0002724140719773554,
"loss": 0.1399,
"step": 9600
},
{
"epoch": 5.805490610151595,
"grad_norm": 0.22856192290782928,
"learning_rate": 0.0002723382531338455,
"loss": 0.1672,
"step": 9625
},
{
"epoch": 5.820574703974659,
"grad_norm": 0.19410184025764465,
"learning_rate": 0.00027226243429033563,
"loss": 0.1342,
"step": 9650
},
{
"epoch": 5.835658797797723,
"grad_norm": 0.15539394319057465,
"learning_rate": 0.00027218661544682566,
"loss": 0.178,
"step": 9675
},
{
"epoch": 5.8507428916207855,
"grad_norm": 0.27372846007347107,
"learning_rate": 0.0002721107966033158,
"loss": 0.1359,
"step": 9700
},
{
"epoch": 5.865826985443849,
"grad_norm": 0.1481178253889084,
"learning_rate": 0.0002720349777598059,
"loss": 0.1484,
"step": 9725
},
{
"epoch": 5.880911079266913,
"grad_norm": 0.22347645461559296,
"learning_rate": 0.000271959158916296,
"loss": 0.1372,
"step": 9750
},
{
"epoch": 5.895995173089977,
"grad_norm": 0.13955964148044586,
"learning_rate": 0.00027188334007278607,
"loss": 0.1506,
"step": 9775
},
{
"epoch": 5.91107926691304,
"grad_norm": 0.44418036937713623,
"learning_rate": 0.00027180752122927615,
"loss": 0.129,
"step": 9800
},
{
"epoch": 5.926163360736104,
"grad_norm": 0.18400181829929352,
"learning_rate": 0.00027173170238576624,
"loss": 0.1812,
"step": 9825
},
{
"epoch": 5.941247454559168,
"grad_norm": 0.3210029602050781,
"learning_rate": 0.0002716558835422564,
"loss": 0.1279,
"step": 9850
},
{
"epoch": 5.956331548382231,
"grad_norm": 0.4363233745098114,
"learning_rate": 0.0002715800646987464,
"loss": 0.1578,
"step": 9875
},
{
"epoch": 5.971415642205295,
"grad_norm": 0.9395078420639038,
"learning_rate": 0.00027150424585523655,
"loss": 0.1398,
"step": 9900
},
{
"epoch": 5.9864997360283585,
"grad_norm": 0.16781263053417206,
"learning_rate": 0.00027142842701172664,
"loss": 0.1687,
"step": 9925
},
{
"epoch": 6.001206727505845,
"grad_norm": 0.5314217209815979,
"learning_rate": 0.0002713526081682167,
"loss": 0.1023,
"step": 9950
},
{
"epoch": 6.016290821328909,
"grad_norm": 0.503394365310669,
"learning_rate": 0.0002712767893247068,
"loss": 0.2136,
"step": 9975
},
{
"epoch": 6.031374915151972,
"grad_norm": 0.38936060667037964,
"learning_rate": 0.0002712009704811969,
"loss": 0.0668,
"step": 10000
},
{
"epoch": 6.031374915151972,
"eval_loss": 0.19079850614070892,
"eval_runtime": 765.219,
"eval_samples_per_second": 4.332,
"eval_steps_per_second": 1.083,
"eval_wer": 0.20498839280894024,
"step": 10000
},
{
"epoch": 6.031374915151972,
"step": 10000,
"total_flos": 7.129096476605158e+19,
"train_loss": 0.34351529302597045,
"train_runtime": 124569.2108,
"train_samples_per_second": 12.772,
"train_steps_per_second": 0.798
}
],
"logging_steps": 25,
"max_steps": 99420,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.129096476605158e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}