SALAMA_NEWMED1 / checkpoint-2000 /trainer_state.json
EYEDOL's picture
Upload folder using huggingface_hub
752420e verified
{
"best_global_step": 2000,
"best_metric": 15.578133881334441,
"best_model_checkpoint": "./SALAMA_NEWMED1/checkpoint-2000",
"epoch": 0.798961350244682,
"eval_steps": 2000,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00399480675122341,
"grad_norm": 29.325483322143555,
"learning_rate": 1.8e-07,
"loss": 3.9137,
"step": 10
},
{
"epoch": 0.00798961350244682,
"grad_norm": 53.530357360839844,
"learning_rate": 3.8e-07,
"loss": 3.8712,
"step": 20
},
{
"epoch": 0.011984420253670228,
"grad_norm": 36.358612060546875,
"learning_rate": 5.800000000000001e-07,
"loss": 3.0878,
"step": 30
},
{
"epoch": 0.01597922700489364,
"grad_norm": 16.489139556884766,
"learning_rate": 7.8e-07,
"loss": 2.7678,
"step": 40
},
{
"epoch": 0.019974033756117046,
"grad_norm": 12.16690444946289,
"learning_rate": 9.800000000000001e-07,
"loss": 2.3664,
"step": 50
},
{
"epoch": 0.023968840507340456,
"grad_norm": 12.258421897888184,
"learning_rate": 1.1800000000000001e-06,
"loss": 2.1087,
"step": 60
},
{
"epoch": 0.027963647258563866,
"grad_norm": 10.25462818145752,
"learning_rate": 1.3800000000000001e-06,
"loss": 1.8865,
"step": 70
},
{
"epoch": 0.03195845400978728,
"grad_norm": 10.944440841674805,
"learning_rate": 1.5800000000000001e-06,
"loss": 1.7902,
"step": 80
},
{
"epoch": 0.035953260761010686,
"grad_norm": 10.586995124816895,
"learning_rate": 1.7800000000000001e-06,
"loss": 1.6421,
"step": 90
},
{
"epoch": 0.03994806751223409,
"grad_norm": 10.034771919250488,
"learning_rate": 1.98e-06,
"loss": 1.5449,
"step": 100
},
{
"epoch": 0.043942874263457506,
"grad_norm": 10.443981170654297,
"learning_rate": 2.1800000000000003e-06,
"loss": 1.4428,
"step": 110
},
{
"epoch": 0.04793768101468091,
"grad_norm": 9.427363395690918,
"learning_rate": 2.38e-06,
"loss": 1.2572,
"step": 120
},
{
"epoch": 0.051932487765904325,
"grad_norm": 10.59061050415039,
"learning_rate": 2.5800000000000003e-06,
"loss": 1.072,
"step": 130
},
{
"epoch": 0.05592729451712773,
"grad_norm": 7.301783084869385,
"learning_rate": 2.7800000000000005e-06,
"loss": 0.7508,
"step": 140
},
{
"epoch": 0.059922101268351145,
"grad_norm": 8.623008728027344,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.7196,
"step": 150
},
{
"epoch": 0.06391690801957456,
"grad_norm": 8.511634826660156,
"learning_rate": 3.1800000000000005e-06,
"loss": 0.6404,
"step": 160
},
{
"epoch": 0.06791171477079797,
"grad_norm": 8.052703857421875,
"learning_rate": 3.3800000000000007e-06,
"loss": 0.6112,
"step": 170
},
{
"epoch": 0.07190652152202137,
"grad_norm": 7.108290672302246,
"learning_rate": 3.58e-06,
"loss": 0.6507,
"step": 180
},
{
"epoch": 0.07590132827324478,
"grad_norm": 7.996196269989014,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.6163,
"step": 190
},
{
"epoch": 0.07989613502446818,
"grad_norm": 6.643161773681641,
"learning_rate": 3.980000000000001e-06,
"loss": 0.5806,
"step": 200
},
{
"epoch": 0.0838909417756916,
"grad_norm": 7.640316009521484,
"learning_rate": 4.18e-06,
"loss": 0.5503,
"step": 210
},
{
"epoch": 0.08788574852691501,
"grad_norm": 7.485447883605957,
"learning_rate": 4.38e-06,
"loss": 0.5672,
"step": 220
},
{
"epoch": 0.09188055527813842,
"grad_norm": 7.693063735961914,
"learning_rate": 4.58e-06,
"loss": 0.5593,
"step": 230
},
{
"epoch": 0.09587536202936182,
"grad_norm": 7.907495498657227,
"learning_rate": 4.78e-06,
"loss": 0.5122,
"step": 240
},
{
"epoch": 0.09987016878058524,
"grad_norm": 6.673120021820068,
"learning_rate": 4.980000000000001e-06,
"loss": 0.5111,
"step": 250
},
{
"epoch": 0.10386497553180865,
"grad_norm": 6.849791049957275,
"learning_rate": 5.18e-06,
"loss": 0.5122,
"step": 260
},
{
"epoch": 0.10785978228303206,
"grad_norm": 6.444810390472412,
"learning_rate": 5.380000000000001e-06,
"loss": 0.4601,
"step": 270
},
{
"epoch": 0.11185458903425546,
"grad_norm": 5.884876728057861,
"learning_rate": 5.580000000000001e-06,
"loss": 0.4452,
"step": 280
},
{
"epoch": 0.11584939578547888,
"grad_norm": 5.974656105041504,
"learning_rate": 5.78e-06,
"loss": 0.495,
"step": 290
},
{
"epoch": 0.11984420253670229,
"grad_norm": 5.884718418121338,
"learning_rate": 5.98e-06,
"loss": 0.4276,
"step": 300
},
{
"epoch": 0.1238390092879257,
"grad_norm": 6.197109699249268,
"learning_rate": 6.18e-06,
"loss": 0.4378,
"step": 310
},
{
"epoch": 0.12783381603914912,
"grad_norm": 6.656605243682861,
"learning_rate": 6.380000000000001e-06,
"loss": 0.4352,
"step": 320
},
{
"epoch": 0.13182862279037252,
"grad_norm": 6.549432277679443,
"learning_rate": 6.5800000000000005e-06,
"loss": 0.4335,
"step": 330
},
{
"epoch": 0.13582342954159593,
"grad_norm": 5.669299125671387,
"learning_rate": 6.780000000000001e-06,
"loss": 0.4776,
"step": 340
},
{
"epoch": 0.13981823629281934,
"grad_norm": 5.265035152435303,
"learning_rate": 6.98e-06,
"loss": 0.4493,
"step": 350
},
{
"epoch": 0.14381304304404274,
"grad_norm": 6.127915382385254,
"learning_rate": 7.180000000000001e-06,
"loss": 0.4505,
"step": 360
},
{
"epoch": 0.14780784979526615,
"grad_norm": 6.895223617553711,
"learning_rate": 7.3800000000000005e-06,
"loss": 0.4369,
"step": 370
},
{
"epoch": 0.15180265654648956,
"grad_norm": 5.936121940612793,
"learning_rate": 7.58e-06,
"loss": 0.4212,
"step": 380
},
{
"epoch": 0.15579746329771296,
"grad_norm": 5.121963977813721,
"learning_rate": 7.78e-06,
"loss": 0.3993,
"step": 390
},
{
"epoch": 0.15979227004893637,
"grad_norm": 6.143499851226807,
"learning_rate": 7.980000000000002e-06,
"loss": 0.4361,
"step": 400
},
{
"epoch": 0.1637870768001598,
"grad_norm": 5.8908491134643555,
"learning_rate": 8.18e-06,
"loss": 0.4003,
"step": 410
},
{
"epoch": 0.1677818835513832,
"grad_norm": 5.114202499389648,
"learning_rate": 8.380000000000001e-06,
"loss": 0.3558,
"step": 420
},
{
"epoch": 0.17177669030260662,
"grad_norm": 5.836564540863037,
"learning_rate": 8.580000000000001e-06,
"loss": 0.3742,
"step": 430
},
{
"epoch": 0.17577149705383002,
"grad_norm": 5.583286285400391,
"learning_rate": 8.78e-06,
"loss": 0.4112,
"step": 440
},
{
"epoch": 0.17976630380505343,
"grad_norm": 6.04997444152832,
"learning_rate": 8.98e-06,
"loss": 0.3803,
"step": 450
},
{
"epoch": 0.18376111055627684,
"grad_norm": 5.244260787963867,
"learning_rate": 9.180000000000002e-06,
"loss": 0.4266,
"step": 460
},
{
"epoch": 0.18775591730750024,
"grad_norm": 5.963865756988525,
"learning_rate": 9.38e-06,
"loss": 0.3907,
"step": 470
},
{
"epoch": 0.19175072405872365,
"grad_norm": 5.7191057205200195,
"learning_rate": 9.58e-06,
"loss": 0.3676,
"step": 480
},
{
"epoch": 0.19574553080994708,
"grad_norm": 5.471884727478027,
"learning_rate": 9.780000000000001e-06,
"loss": 0.3764,
"step": 490
},
{
"epoch": 0.1997403375611705,
"grad_norm": 6.182490825653076,
"learning_rate": 9.980000000000001e-06,
"loss": 0.3758,
"step": 500
},
{
"epoch": 0.2037351443123939,
"grad_norm": 4.735085487365723,
"learning_rate": 9.98716486023959e-06,
"loss": 0.3518,
"step": 510
},
{
"epoch": 0.2077299510636173,
"grad_norm": 5.506002426147461,
"learning_rate": 9.972903593839133e-06,
"loss": 0.3887,
"step": 520
},
{
"epoch": 0.2117247578148407,
"grad_norm": 5.19524621963501,
"learning_rate": 9.958642327438678e-06,
"loss": 0.3979,
"step": 530
},
{
"epoch": 0.21571956456606411,
"grad_norm": 6.315662860870361,
"learning_rate": 9.944381061038221e-06,
"loss": 0.3847,
"step": 540
},
{
"epoch": 0.21971437131728752,
"grad_norm": 5.95819616317749,
"learning_rate": 9.930119794637765e-06,
"loss": 0.3516,
"step": 550
},
{
"epoch": 0.22370917806851093,
"grad_norm": 5.994788646697998,
"learning_rate": 9.91585852823731e-06,
"loss": 0.346,
"step": 560
},
{
"epoch": 0.22770398481973433,
"grad_norm": 4.4159369468688965,
"learning_rate": 9.901597261836851e-06,
"loss": 0.3584,
"step": 570
},
{
"epoch": 0.23169879157095777,
"grad_norm": 4.532053470611572,
"learning_rate": 9.887335995436396e-06,
"loss": 0.4037,
"step": 580
},
{
"epoch": 0.23569359832218117,
"grad_norm": 5.412591457366943,
"learning_rate": 9.87307472903594e-06,
"loss": 0.3547,
"step": 590
},
{
"epoch": 0.23968840507340458,
"grad_norm": 5.319347381591797,
"learning_rate": 9.858813462635483e-06,
"loss": 0.3619,
"step": 600
},
{
"epoch": 0.243683211824628,
"grad_norm": 4.947774887084961,
"learning_rate": 9.844552196235026e-06,
"loss": 0.3764,
"step": 610
},
{
"epoch": 0.2476780185758514,
"grad_norm": 4.8416666984558105,
"learning_rate": 9.83029092983457e-06,
"loss": 0.3073,
"step": 620
},
{
"epoch": 0.2516728253270748,
"grad_norm": 6.026658535003662,
"learning_rate": 9.816029663434114e-06,
"loss": 0.3709,
"step": 630
},
{
"epoch": 0.25566763207829823,
"grad_norm": 6.480794429779053,
"learning_rate": 9.801768397033657e-06,
"loss": 0.3131,
"step": 640
},
{
"epoch": 0.2596624388295216,
"grad_norm": 5.907824516296387,
"learning_rate": 9.787507130633202e-06,
"loss": 0.3447,
"step": 650
},
{
"epoch": 0.26365724558074505,
"grad_norm": 4.916101932525635,
"learning_rate": 9.773245864232744e-06,
"loss": 0.3436,
"step": 660
},
{
"epoch": 0.2676520523319684,
"grad_norm": 5.599504470825195,
"learning_rate": 9.758984597832289e-06,
"loss": 0.3236,
"step": 670
},
{
"epoch": 0.27164685908319186,
"grad_norm": 6.6841654777526855,
"learning_rate": 9.744723331431832e-06,
"loss": 0.3491,
"step": 680
},
{
"epoch": 0.27564166583441524,
"grad_norm": 6.430059909820557,
"learning_rate": 9.730462065031375e-06,
"loss": 0.3343,
"step": 690
},
{
"epoch": 0.2796364725856387,
"grad_norm": 5.829887866973877,
"learning_rate": 9.71620079863092e-06,
"loss": 0.345,
"step": 700
},
{
"epoch": 0.28363127933686205,
"grad_norm": 4.85678243637085,
"learning_rate": 9.701939532230463e-06,
"loss": 0.3756,
"step": 710
},
{
"epoch": 0.2876260860880855,
"grad_norm": 4.557241439819336,
"learning_rate": 9.687678265830007e-06,
"loss": 0.2933,
"step": 720
},
{
"epoch": 0.2916208928393089,
"grad_norm": 5.587825298309326,
"learning_rate": 9.67341699942955e-06,
"loss": 0.3247,
"step": 730
},
{
"epoch": 0.2956156995905323,
"grad_norm": 5.785783290863037,
"learning_rate": 9.659155733029095e-06,
"loss": 0.3489,
"step": 740
},
{
"epoch": 0.29961050634175573,
"grad_norm": 5.360633850097656,
"learning_rate": 9.644894466628636e-06,
"loss": 0.3727,
"step": 750
},
{
"epoch": 0.3036053130929791,
"grad_norm": 5.479579448699951,
"learning_rate": 9.630633200228181e-06,
"loss": 0.3386,
"step": 760
},
{
"epoch": 0.30760011984420255,
"grad_norm": 5.192019462585449,
"learning_rate": 9.616371933827725e-06,
"loss": 0.3268,
"step": 770
},
{
"epoch": 0.3115949265954259,
"grad_norm": 5.177510738372803,
"learning_rate": 9.602110667427268e-06,
"loss": 0.3485,
"step": 780
},
{
"epoch": 0.31558973334664936,
"grad_norm": 4.933345317840576,
"learning_rate": 9.587849401026813e-06,
"loss": 0.3028,
"step": 790
},
{
"epoch": 0.31958454009787274,
"grad_norm": 4.921146869659424,
"learning_rate": 9.573588134626356e-06,
"loss": 0.3348,
"step": 800
},
{
"epoch": 0.32357934684909617,
"grad_norm": 5.784856796264648,
"learning_rate": 9.5593268682259e-06,
"loss": 0.2961,
"step": 810
},
{
"epoch": 0.3275741536003196,
"grad_norm": 5.327091693878174,
"learning_rate": 9.545065601825442e-06,
"loss": 0.2914,
"step": 820
},
{
"epoch": 0.331568960351543,
"grad_norm": 4.885860443115234,
"learning_rate": 9.530804335424987e-06,
"loss": 0.3156,
"step": 830
},
{
"epoch": 0.3355637671027664,
"grad_norm": 5.5808844566345215,
"learning_rate": 9.516543069024529e-06,
"loss": 0.3452,
"step": 840
},
{
"epoch": 0.3395585738539898,
"grad_norm": 5.16964054107666,
"learning_rate": 9.502281802624074e-06,
"loss": 0.2737,
"step": 850
},
{
"epoch": 0.34355338060521323,
"grad_norm": 4.464290142059326,
"learning_rate": 9.488020536223617e-06,
"loss": 0.2717,
"step": 860
},
{
"epoch": 0.3475481873564366,
"grad_norm": 4.1654791831970215,
"learning_rate": 9.47375926982316e-06,
"loss": 0.2875,
"step": 870
},
{
"epoch": 0.35154299410766005,
"grad_norm": 5.758086681365967,
"learning_rate": 9.459498003422705e-06,
"loss": 0.2947,
"step": 880
},
{
"epoch": 0.3555378008588834,
"grad_norm": 4.330506801605225,
"learning_rate": 9.445236737022249e-06,
"loss": 0.2957,
"step": 890
},
{
"epoch": 0.35953260761010686,
"grad_norm": 4.616450786590576,
"learning_rate": 9.430975470621792e-06,
"loss": 0.2752,
"step": 900
},
{
"epoch": 0.3635274143613303,
"grad_norm": 4.364641189575195,
"learning_rate": 9.416714204221335e-06,
"loss": 0.33,
"step": 910
},
{
"epoch": 0.36752222111255367,
"grad_norm": 4.513298034667969,
"learning_rate": 9.40245293782088e-06,
"loss": 0.3098,
"step": 920
},
{
"epoch": 0.3715170278637771,
"grad_norm": 5.608431816101074,
"learning_rate": 9.388191671420423e-06,
"loss": 0.2962,
"step": 930
},
{
"epoch": 0.3755118346150005,
"grad_norm": 3.523578405380249,
"learning_rate": 9.373930405019966e-06,
"loss": 0.2754,
"step": 940
},
{
"epoch": 0.3795066413662239,
"grad_norm": 5.222875595092773,
"learning_rate": 9.35966913861951e-06,
"loss": 0.2875,
"step": 950
},
{
"epoch": 0.3835014481174473,
"grad_norm": 5.467960357666016,
"learning_rate": 9.345407872219053e-06,
"loss": 0.3089,
"step": 960
},
{
"epoch": 0.38749625486867073,
"grad_norm": 4.540034770965576,
"learning_rate": 9.331146605818598e-06,
"loss": 0.2808,
"step": 970
},
{
"epoch": 0.39149106161989417,
"grad_norm": 4.5449628829956055,
"learning_rate": 9.316885339418141e-06,
"loss": 0.3063,
"step": 980
},
{
"epoch": 0.39548586837111754,
"grad_norm": 5.014548301696777,
"learning_rate": 9.302624073017684e-06,
"loss": 0.2795,
"step": 990
},
{
"epoch": 0.399480675122341,
"grad_norm": 4.645490646362305,
"learning_rate": 9.288362806617228e-06,
"loss": 0.2749,
"step": 1000
},
{
"epoch": 0.40347548187356436,
"grad_norm": 4.577834606170654,
"learning_rate": 9.274101540216773e-06,
"loss": 0.2722,
"step": 1010
},
{
"epoch": 0.4074702886247878,
"grad_norm": 3.580354928970337,
"learning_rate": 9.259840273816316e-06,
"loss": 0.2543,
"step": 1020
},
{
"epoch": 0.41146509537601117,
"grad_norm": 4.645276069641113,
"learning_rate": 9.245579007415859e-06,
"loss": 0.2703,
"step": 1030
},
{
"epoch": 0.4154599021272346,
"grad_norm": 4.813560485839844,
"learning_rate": 9.231317741015402e-06,
"loss": 0.2708,
"step": 1040
},
{
"epoch": 0.419454708878458,
"grad_norm": 4.8925371170043945,
"learning_rate": 9.217056474614946e-06,
"loss": 0.2894,
"step": 1050
},
{
"epoch": 0.4234495156296814,
"grad_norm": 4.675732135772705,
"learning_rate": 9.20279520821449e-06,
"loss": 0.2692,
"step": 1060
},
{
"epoch": 0.42744432238090485,
"grad_norm": 4.893064975738525,
"learning_rate": 9.188533941814034e-06,
"loss": 0.3143,
"step": 1070
},
{
"epoch": 0.43143912913212823,
"grad_norm": 4.4499592781066895,
"learning_rate": 9.174272675413579e-06,
"loss": 0.2846,
"step": 1080
},
{
"epoch": 0.43543393588335166,
"grad_norm": 5.22846794128418,
"learning_rate": 9.16001140901312e-06,
"loss": 0.2384,
"step": 1090
},
{
"epoch": 0.43942874263457504,
"grad_norm": 4.598273277282715,
"learning_rate": 9.145750142612665e-06,
"loss": 0.2798,
"step": 1100
},
{
"epoch": 0.4434235493857985,
"grad_norm": 4.458007335662842,
"learning_rate": 9.131488876212208e-06,
"loss": 0.2677,
"step": 1110
},
{
"epoch": 0.44741835613702186,
"grad_norm": 4.530386447906494,
"learning_rate": 9.117227609811752e-06,
"loss": 0.2985,
"step": 1120
},
{
"epoch": 0.4514131628882453,
"grad_norm": 4.605809688568115,
"learning_rate": 9.102966343411297e-06,
"loss": 0.2862,
"step": 1130
},
{
"epoch": 0.45540796963946867,
"grad_norm": 4.465124607086182,
"learning_rate": 9.08870507701084e-06,
"loss": 0.2603,
"step": 1140
},
{
"epoch": 0.4594027763906921,
"grad_norm": 4.143098831176758,
"learning_rate": 9.074443810610383e-06,
"loss": 0.2777,
"step": 1150
},
{
"epoch": 0.46339758314191554,
"grad_norm": 4.249698162078857,
"learning_rate": 9.060182544209926e-06,
"loss": 0.2545,
"step": 1160
},
{
"epoch": 0.4673923898931389,
"grad_norm": 5.094964027404785,
"learning_rate": 9.045921277809471e-06,
"loss": 0.2644,
"step": 1170
},
{
"epoch": 0.47138719664436235,
"grad_norm": 4.91431999206543,
"learning_rate": 9.031660011409013e-06,
"loss": 0.2718,
"step": 1180
},
{
"epoch": 0.47538200339558573,
"grad_norm": 5.91095495223999,
"learning_rate": 9.017398745008558e-06,
"loss": 0.2519,
"step": 1190
},
{
"epoch": 0.47937681014680916,
"grad_norm": 4.603298187255859,
"learning_rate": 9.003137478608101e-06,
"loss": 0.265,
"step": 1200
},
{
"epoch": 0.48337161689803254,
"grad_norm": 4.797214031219482,
"learning_rate": 8.988876212207644e-06,
"loss": 0.2407,
"step": 1210
},
{
"epoch": 0.487366423649256,
"grad_norm": 4.030250072479248,
"learning_rate": 8.97461494580719e-06,
"loss": 0.268,
"step": 1220
},
{
"epoch": 0.49136123040047935,
"grad_norm": 5.035445213317871,
"learning_rate": 8.960353679406733e-06,
"loss": 0.2636,
"step": 1230
},
{
"epoch": 0.4953560371517028,
"grad_norm": 4.499198913574219,
"learning_rate": 8.946092413006276e-06,
"loss": 0.2651,
"step": 1240
},
{
"epoch": 0.4993508439029262,
"grad_norm": 3.754934310913086,
"learning_rate": 8.931831146605819e-06,
"loss": 0.2537,
"step": 1250
},
{
"epoch": 0.5033456506541496,
"grad_norm": 4.640712738037109,
"learning_rate": 8.917569880205364e-06,
"loss": 0.2713,
"step": 1260
},
{
"epoch": 0.507340457405373,
"grad_norm": 4.7820305824279785,
"learning_rate": 8.903308613804906e-06,
"loss": 0.2554,
"step": 1270
},
{
"epoch": 0.5113352641565965,
"grad_norm": 5.130531311035156,
"learning_rate": 8.88904734740445e-06,
"loss": 0.2607,
"step": 1280
},
{
"epoch": 0.5153300709078198,
"grad_norm": 4.392201900482178,
"learning_rate": 8.874786081003994e-06,
"loss": 0.2445,
"step": 1290
},
{
"epoch": 0.5193248776590432,
"grad_norm": 3.3310599327087402,
"learning_rate": 8.860524814603537e-06,
"loss": 0.2355,
"step": 1300
},
{
"epoch": 0.5233196844102667,
"grad_norm": 3.532226800918579,
"learning_rate": 8.846263548203082e-06,
"loss": 0.2255,
"step": 1310
},
{
"epoch": 0.5273144911614901,
"grad_norm": 4.452306270599365,
"learning_rate": 8.832002281802625e-06,
"loss": 0.2671,
"step": 1320
},
{
"epoch": 0.5313092979127134,
"grad_norm": 3.8220486640930176,
"learning_rate": 8.817741015402168e-06,
"loss": 0.2731,
"step": 1330
},
{
"epoch": 0.5353041046639369,
"grad_norm": 4.6910319328308105,
"learning_rate": 8.803479749001712e-06,
"loss": 0.2617,
"step": 1340
},
{
"epoch": 0.5392989114151603,
"grad_norm": 4.17935848236084,
"learning_rate": 8.789218482601257e-06,
"loss": 0.2433,
"step": 1350
},
{
"epoch": 0.5432937181663837,
"grad_norm": 5.38765811920166,
"learning_rate": 8.7749572162008e-06,
"loss": 0.2466,
"step": 1360
},
{
"epoch": 0.5472885249176072,
"grad_norm": 4.427072525024414,
"learning_rate": 8.760695949800343e-06,
"loss": 0.2465,
"step": 1370
},
{
"epoch": 0.5512833316688305,
"grad_norm": 5.366933345794678,
"learning_rate": 8.746434683399886e-06,
"loss": 0.2568,
"step": 1380
},
{
"epoch": 0.5552781384200539,
"grad_norm": 4.636244773864746,
"learning_rate": 8.73217341699943e-06,
"loss": 0.2652,
"step": 1390
},
{
"epoch": 0.5592729451712773,
"grad_norm": 3.3288471698760986,
"learning_rate": 8.717912150598975e-06,
"loss": 0.2183,
"step": 1400
},
{
"epoch": 0.5632677519225008,
"grad_norm": 5.008598327636719,
"learning_rate": 8.703650884198518e-06,
"loss": 0.2838,
"step": 1410
},
{
"epoch": 0.5672625586737241,
"grad_norm": 5.04400110244751,
"learning_rate": 8.689389617798061e-06,
"loss": 0.2521,
"step": 1420
},
{
"epoch": 0.5712573654249475,
"grad_norm": 3.676771640777588,
"learning_rate": 8.675128351397604e-06,
"loss": 0.2235,
"step": 1430
},
{
"epoch": 0.575252172176171,
"grad_norm": 3.988940715789795,
"learning_rate": 8.66086708499715e-06,
"loss": 0.2561,
"step": 1440
},
{
"epoch": 0.5792469789273944,
"grad_norm": 3.753837823867798,
"learning_rate": 8.646605818596692e-06,
"loss": 0.2349,
"step": 1450
},
{
"epoch": 0.5832417856786178,
"grad_norm": 5.020956993103027,
"learning_rate": 8.632344552196236e-06,
"loss": 0.2605,
"step": 1460
},
{
"epoch": 0.5872365924298412,
"grad_norm": 4.19816780090332,
"learning_rate": 8.618083285795779e-06,
"loss": 0.2229,
"step": 1470
},
{
"epoch": 0.5912313991810646,
"grad_norm": 3.8098461627960205,
"learning_rate": 8.603822019395322e-06,
"loss": 0.2286,
"step": 1480
},
{
"epoch": 0.595226205932288,
"grad_norm": 4.122703552246094,
"learning_rate": 8.589560752994867e-06,
"loss": 0.2257,
"step": 1490
},
{
"epoch": 0.5992210126835115,
"grad_norm": 5.137128829956055,
"learning_rate": 8.57529948659441e-06,
"loss": 0.2531,
"step": 1500
},
{
"epoch": 0.6032158194347348,
"grad_norm": 4.318984508514404,
"learning_rate": 8.561038220193954e-06,
"loss": 0.2384,
"step": 1510
},
{
"epoch": 0.6072106261859582,
"grad_norm": 3.5681166648864746,
"learning_rate": 8.546776953793497e-06,
"loss": 0.282,
"step": 1520
},
{
"epoch": 0.6112054329371817,
"grad_norm": 2.9932525157928467,
"learning_rate": 8.532515687393042e-06,
"loss": 0.2095,
"step": 1530
},
{
"epoch": 0.6152002396884051,
"grad_norm": 3.375771999359131,
"learning_rate": 8.518254420992585e-06,
"loss": 0.2333,
"step": 1540
},
{
"epoch": 0.6191950464396285,
"grad_norm": 4.841177940368652,
"learning_rate": 8.503993154592128e-06,
"loss": 0.2295,
"step": 1550
},
{
"epoch": 0.6231898531908519,
"grad_norm": 4.5939717292785645,
"learning_rate": 8.489731888191672e-06,
"loss": 0.2494,
"step": 1560
},
{
"epoch": 0.6271846599420753,
"grad_norm": 3.88786244392395,
"learning_rate": 8.475470621791215e-06,
"loss": 0.2324,
"step": 1570
},
{
"epoch": 0.6311794666932987,
"grad_norm": 3.3802106380462646,
"learning_rate": 8.46120935539076e-06,
"loss": 0.2413,
"step": 1580
},
{
"epoch": 0.6351742734445222,
"grad_norm": 4.619571685791016,
"learning_rate": 8.446948088990303e-06,
"loss": 0.2165,
"step": 1590
},
{
"epoch": 0.6391690801957455,
"grad_norm": 4.9018874168396,
"learning_rate": 8.432686822589846e-06,
"loss": 0.2496,
"step": 1600
},
{
"epoch": 0.6431638869469689,
"grad_norm": 4.832056522369385,
"learning_rate": 8.41842555618939e-06,
"loss": 0.1857,
"step": 1610
},
{
"epoch": 0.6471586936981923,
"grad_norm": 4.44606351852417,
"learning_rate": 8.404164289788934e-06,
"loss": 0.2469,
"step": 1620
},
{
"epoch": 0.6511535004494158,
"grad_norm": 4.6510138511657715,
"learning_rate": 8.389903023388478e-06,
"loss": 0.2419,
"step": 1630
},
{
"epoch": 0.6551483072006392,
"grad_norm": 4.438849925994873,
"learning_rate": 8.375641756988021e-06,
"loss": 0.2488,
"step": 1640
},
{
"epoch": 0.6591431139518625,
"grad_norm": 4.8355889320373535,
"learning_rate": 8.361380490587566e-06,
"loss": 0.2535,
"step": 1650
},
{
"epoch": 0.663137920703086,
"grad_norm": 3.961348533630371,
"learning_rate": 8.347119224187107e-06,
"loss": 0.2338,
"step": 1660
},
{
"epoch": 0.6671327274543094,
"grad_norm": 4.276237964630127,
"learning_rate": 8.332857957786652e-06,
"loss": 0.2417,
"step": 1670
},
{
"epoch": 0.6711275342055328,
"grad_norm": 4.317446231842041,
"learning_rate": 8.318596691386196e-06,
"loss": 0.215,
"step": 1680
},
{
"epoch": 0.6751223409567562,
"grad_norm": 4.8514018058776855,
"learning_rate": 8.304335424985739e-06,
"loss": 0.2462,
"step": 1690
},
{
"epoch": 0.6791171477079796,
"grad_norm": 5.050139427185059,
"learning_rate": 8.290074158585282e-06,
"loss": 0.2228,
"step": 1700
},
{
"epoch": 0.683111954459203,
"grad_norm": 3.6597845554351807,
"learning_rate": 8.275812892184827e-06,
"loss": 0.2337,
"step": 1710
},
{
"epoch": 0.6871067612104265,
"grad_norm": 3.3534600734710693,
"learning_rate": 8.26155162578437e-06,
"loss": 0.2402,
"step": 1720
},
{
"epoch": 0.6911015679616499,
"grad_norm": 3.987107276916504,
"learning_rate": 8.247290359383914e-06,
"loss": 0.2154,
"step": 1730
},
{
"epoch": 0.6950963747128732,
"grad_norm": 3.4701499938964844,
"learning_rate": 8.233029092983458e-06,
"loss": 0.2446,
"step": 1740
},
{
"epoch": 0.6990911814640967,
"grad_norm": 3.479576826095581,
"learning_rate": 8.218767826583002e-06,
"loss": 0.2138,
"step": 1750
},
{
"epoch": 0.7030859882153201,
"grad_norm": 4.3463358879089355,
"learning_rate": 8.204506560182545e-06,
"loss": 0.2202,
"step": 1760
},
{
"epoch": 0.7070807949665435,
"grad_norm": 3.920137882232666,
"learning_rate": 8.190245293782088e-06,
"loss": 0.2184,
"step": 1770
},
{
"epoch": 0.7110756017177668,
"grad_norm": 4.3204121589660645,
"learning_rate": 8.175984027381633e-06,
"loss": 0.2467,
"step": 1780
},
{
"epoch": 0.7150704084689903,
"grad_norm": 4.039252758026123,
"learning_rate": 8.161722760981175e-06,
"loss": 0.2238,
"step": 1790
},
{
"epoch": 0.7190652152202137,
"grad_norm": 4.227252006530762,
"learning_rate": 8.14746149458072e-06,
"loss": 0.2023,
"step": 1800
},
{
"epoch": 0.7230600219714372,
"grad_norm": 2.9367706775665283,
"learning_rate": 8.133200228180263e-06,
"loss": 0.1977,
"step": 1810
},
{
"epoch": 0.7270548287226606,
"grad_norm": 4.3969831466674805,
"learning_rate": 8.118938961779806e-06,
"loss": 0.2494,
"step": 1820
},
{
"epoch": 0.7310496354738839,
"grad_norm": 3.7488110065460205,
"learning_rate": 8.104677695379351e-06,
"loss": 0.2295,
"step": 1830
},
{
"epoch": 0.7350444422251073,
"grad_norm": 5.048527717590332,
"learning_rate": 8.090416428978894e-06,
"loss": 0.2524,
"step": 1840
},
{
"epoch": 0.7390392489763308,
"grad_norm": 4.360014915466309,
"learning_rate": 8.076155162578438e-06,
"loss": 0.21,
"step": 1850
},
{
"epoch": 0.7430340557275542,
"grad_norm": 4.142049789428711,
"learning_rate": 8.06189389617798e-06,
"loss": 0.2198,
"step": 1860
},
{
"epoch": 0.7470288624787776,
"grad_norm": 3.6296193599700928,
"learning_rate": 8.047632629777526e-06,
"loss": 0.2239,
"step": 1870
},
{
"epoch": 0.751023669230001,
"grad_norm": 4.87466287612915,
"learning_rate": 8.033371363377069e-06,
"loss": 0.2253,
"step": 1880
},
{
"epoch": 0.7550184759812244,
"grad_norm": 3.6398117542266846,
"learning_rate": 8.019110096976612e-06,
"loss": 0.2325,
"step": 1890
},
{
"epoch": 0.7590132827324478,
"grad_norm": 3.6101794242858887,
"learning_rate": 8.004848830576156e-06,
"loss": 0.1967,
"step": 1900
},
{
"epoch": 0.7630080894836713,
"grad_norm": 4.282483100891113,
"learning_rate": 7.990587564175699e-06,
"loss": 0.2183,
"step": 1910
},
{
"epoch": 0.7670028962348946,
"grad_norm": 4.618693828582764,
"learning_rate": 7.976326297775244e-06,
"loss": 0.2356,
"step": 1920
},
{
"epoch": 0.770997702986118,
"grad_norm": 3.517585277557373,
"learning_rate": 7.962065031374787e-06,
"loss": 0.2346,
"step": 1930
},
{
"epoch": 0.7749925097373415,
"grad_norm": 3.8423843383789062,
"learning_rate": 7.94780376497433e-06,
"loss": 0.2436,
"step": 1940
},
{
"epoch": 0.7789873164885649,
"grad_norm": 4.099793910980225,
"learning_rate": 7.933542498573873e-06,
"loss": 0.2091,
"step": 1950
},
{
"epoch": 0.7829821232397883,
"grad_norm": 4.841848850250244,
"learning_rate": 7.919281232173418e-06,
"loss": 0.2328,
"step": 1960
},
{
"epoch": 0.7869769299910117,
"grad_norm": 4.761816024780273,
"learning_rate": 7.905019965772962e-06,
"loss": 0.2159,
"step": 1970
},
{
"epoch": 0.7909717367422351,
"grad_norm": 4.0117411613464355,
"learning_rate": 7.890758699372505e-06,
"loss": 0.2287,
"step": 1980
},
{
"epoch": 0.7949665434934585,
"grad_norm": 4.176918029785156,
"learning_rate": 7.876497432972048e-06,
"loss": 0.2036,
"step": 1990
},
{
"epoch": 0.798961350244682,
"grad_norm": 4.187548637390137,
"learning_rate": 7.862236166571591e-06,
"loss": 0.2175,
"step": 2000
},
{
"epoch": 0.798961350244682,
"eval_loss": 0.17748136818408966,
"eval_runtime": 8461.8051,
"eval_samples_per_second": 2.367,
"eval_steps_per_second": 0.296,
"eval_wer": 15.578133881334441,
"step": 2000
}
],
"logging_steps": 10,
"max_steps": 7512,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.531871408128e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}