gliner-ettin-150 / trainer_state.json
BioMike's picture
Upload folder using huggingface_hub
13a2c5b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.1999980000199998,
"eval_steps": 500,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000999990000099999,
"grad_norm": 5341.90380859375,
"learning_rate": 2.0000000000000003e-06,
"loss": 594.5605,
"step": 100
},
{
"epoch": 0.001999980000199998,
"grad_norm": 789.9061889648438,
"learning_rate": 4.000000000000001e-06,
"loss": 127.941,
"step": 200
},
{
"epoch": 0.002999970000299997,
"grad_norm": 306.2316589355469,
"learning_rate": 6e-06,
"loss": 119.2364,
"step": 300
},
{
"epoch": 0.003999960000399996,
"grad_norm": 751.376220703125,
"learning_rate": 8.000000000000001e-06,
"loss": 118.0761,
"step": 400
},
{
"epoch": 0.004999950000499995,
"grad_norm": 335.5125732421875,
"learning_rate": 1e-05,
"loss": 109.0291,
"step": 500
},
{
"epoch": 0.005999940000599994,
"grad_norm": 510.80975341796875,
"learning_rate": 1.2e-05,
"loss": 109.4396,
"step": 600
},
{
"epoch": 0.006999930000699993,
"grad_norm": 314.0848083496094,
"learning_rate": 1.4e-05,
"loss": 84.1079,
"step": 700
},
{
"epoch": 0.007999920000799993,
"grad_norm": 935.3131713867188,
"learning_rate": 1.6000000000000003e-05,
"loss": 82.2216,
"step": 800
},
{
"epoch": 0.008999910000899992,
"grad_norm": 1358.540771484375,
"learning_rate": 1.8e-05,
"loss": 80.6757,
"step": 900
},
{
"epoch": 0.00999990000099999,
"grad_norm": 317.9821472167969,
"learning_rate": 2e-05,
"loss": 62.5531,
"step": 1000
},
{
"epoch": 0.010999890001099988,
"grad_norm": 649.0853271484375,
"learning_rate": 2.2000000000000003e-05,
"loss": 56.7378,
"step": 1100
},
{
"epoch": 0.011999880001199987,
"grad_norm": 530.7359008789062,
"learning_rate": 2.4e-05,
"loss": 56.3481,
"step": 1200
},
{
"epoch": 0.012999870001299986,
"grad_norm": 241.44517517089844,
"learning_rate": 2.6000000000000002e-05,
"loss": 53.1624,
"step": 1300
},
{
"epoch": 0.013999860001399985,
"grad_norm": 1705.9263916015625,
"learning_rate": 2.8e-05,
"loss": 57.0686,
"step": 1400
},
{
"epoch": 0.014999850001499985,
"grad_norm": 1379.0430908203125,
"learning_rate": 3.0000000000000004e-05,
"loss": 47.7146,
"step": 1500
},
{
"epoch": 0.015999840001599985,
"grad_norm": 253.24891662597656,
"learning_rate": 3.2000000000000005e-05,
"loss": 42.4126,
"step": 1600
},
{
"epoch": 0.016999830001699984,
"grad_norm": 258.5624694824219,
"learning_rate": 3.4e-05,
"loss": 48.3719,
"step": 1700
},
{
"epoch": 0.017999820001799983,
"grad_norm": 273.0712585449219,
"learning_rate": 3.6e-05,
"loss": 38.7285,
"step": 1800
},
{
"epoch": 0.018999810001899983,
"grad_norm": 820.5573120117188,
"learning_rate": 3.8e-05,
"loss": 44.3776,
"step": 1900
},
{
"epoch": 0.01999980000199998,
"grad_norm": 538.52587890625,
"learning_rate": 4e-05,
"loss": 33.6354,
"step": 2000
},
{
"epoch": 0.02099979000209998,
"grad_norm": 987.8306884765625,
"learning_rate": 3.999997482501191e-05,
"loss": 39.2998,
"step": 2100
},
{
"epoch": 0.021999780002199976,
"grad_norm": 963.3218383789062,
"learning_rate": 3.9999899300111024e-05,
"loss": 30.5145,
"step": 2200
},
{
"epoch": 0.022999770002299975,
"grad_norm": 384.54278564453125,
"learning_rate": 3.999977342548747e-05,
"loss": 26.2319,
"step": 2300
},
{
"epoch": 0.023999760002399975,
"grad_norm": 362.4068908691406,
"learning_rate": 3.9999597201458134e-05,
"loss": 21.7486,
"step": 2400
},
{
"epoch": 0.024999750002499974,
"grad_norm": 246.06344604492188,
"learning_rate": 3.9999370628466666e-05,
"loss": 18.8825,
"step": 2500
},
{
"epoch": 0.025999740002599973,
"grad_norm": 558.910888671875,
"learning_rate": 3.9999093707083455e-05,
"loss": 18.5258,
"step": 2600
},
{
"epoch": 0.026999730002699972,
"grad_norm": 4613.8505859375,
"learning_rate": 3.999876643800567e-05,
"loss": 20.1839,
"step": 2700
},
{
"epoch": 0.02799972000279997,
"grad_norm": 246.72665405273438,
"learning_rate": 3.999838882205719e-05,
"loss": 17.0552,
"step": 2800
},
{
"epoch": 0.02899971000289997,
"grad_norm": 85.92310333251953,
"learning_rate": 3.9997960860188666e-05,
"loss": 16.4099,
"step": 2900
},
{
"epoch": 0.02999970000299997,
"grad_norm": 607.7698364257812,
"learning_rate": 3.9997482553477506e-05,
"loss": 15.0478,
"step": 3000
},
{
"epoch": 0.030999690003099968,
"grad_norm": 348.2640075683594,
"learning_rate": 3.999695390312783e-05,
"loss": 13.6557,
"step": 3100
},
{
"epoch": 0.03199968000319997,
"grad_norm": 553.7791137695312,
"learning_rate": 3.999637491047052e-05,
"loss": 16.3278,
"step": 3200
},
{
"epoch": 0.032999670003299966,
"grad_norm": 851.149169921875,
"learning_rate": 3.999574557696319e-05,
"loss": 15.7218,
"step": 3300
},
{
"epoch": 0.03399966000339997,
"grad_norm": 131.01693725585938,
"learning_rate": 3.9995065904190185e-05,
"loss": 11.3467,
"step": 3400
},
{
"epoch": 0.034999650003499964,
"grad_norm": 41.529632568359375,
"learning_rate": 3.999433589386259e-05,
"loss": 12.6374,
"step": 3500
},
{
"epoch": 0.03599964000359997,
"grad_norm": 205.26910400390625,
"learning_rate": 3.9993555547818186e-05,
"loss": 13.2414,
"step": 3600
},
{
"epoch": 0.03699963000369996,
"grad_norm": 89.92840576171875,
"learning_rate": 3.999272486802151e-05,
"loss": 10.1512,
"step": 3700
},
{
"epoch": 0.037999620003799965,
"grad_norm": 410.4055480957031,
"learning_rate": 3.9991843856563786e-05,
"loss": 9.8745,
"step": 3800
},
{
"epoch": 0.03899961000389996,
"grad_norm": 296.5409240722656,
"learning_rate": 3.999091251566297e-05,
"loss": 12.645,
"step": 3900
},
{
"epoch": 0.03999960000399996,
"grad_norm": 49.79493713378906,
"learning_rate": 3.9989930847663706e-05,
"loss": 12.447,
"step": 4000
},
{
"epoch": 0.04099959000409996,
"grad_norm": 477.4189147949219,
"learning_rate": 3.998889885503734e-05,
"loss": 11.1971,
"step": 4100
},
{
"epoch": 0.04199958000419996,
"grad_norm": 236.76950073242188,
"learning_rate": 3.998781654038192e-05,
"loss": 11.6718,
"step": 4200
},
{
"epoch": 0.04299957000429996,
"grad_norm": 484.04681396484375,
"learning_rate": 3.998668390642216e-05,
"loss": 9.9448,
"step": 4300
},
{
"epoch": 0.04399956000439995,
"grad_norm": 313.50457763671875,
"learning_rate": 3.998550095600948e-05,
"loss": 10.5262,
"step": 4400
},
{
"epoch": 0.044999550004499955,
"grad_norm": 82.21493530273438,
"learning_rate": 3.998426769212194e-05,
"loss": 13.1368,
"step": 4500
},
{
"epoch": 0.04599954000459995,
"grad_norm": 151.22616577148438,
"learning_rate": 3.9982984117864285e-05,
"loss": 9.8138,
"step": 4600
},
{
"epoch": 0.04699953000469995,
"grad_norm": 274.2380676269531,
"learning_rate": 3.9981650236467916e-05,
"loss": 10.2387,
"step": 4700
},
{
"epoch": 0.04799952000479995,
"grad_norm": 77.92356872558594,
"learning_rate": 3.998026605129088e-05,
"loss": 9.5225,
"step": 4800
},
{
"epoch": 0.04899951000489995,
"grad_norm": 202.85816955566406,
"learning_rate": 3.997883156581786e-05,
"loss": 9.8305,
"step": 4900
},
{
"epoch": 0.04999950000499995,
"grad_norm": 121.39033508300781,
"learning_rate": 3.9977346783660165e-05,
"loss": 12.1433,
"step": 5000
},
{
"epoch": 0.05099949000509995,
"grad_norm": 703.3681030273438,
"learning_rate": 3.997581170855573e-05,
"loss": 9.3006,
"step": 5100
},
{
"epoch": 0.051999480005199945,
"grad_norm": 905.860107421875,
"learning_rate": 3.9974226344369124e-05,
"loss": 9.2302,
"step": 5200
},
{
"epoch": 0.05299947000529995,
"grad_norm": 108.33662414550781,
"learning_rate": 3.9972590695091476e-05,
"loss": 9.0692,
"step": 5300
},
{
"epoch": 0.053999460005399944,
"grad_norm": 278.9888916015625,
"learning_rate": 3.9970904764840554e-05,
"loss": 9.0078,
"step": 5400
},
{
"epoch": 0.054999450005499946,
"grad_norm": 339.5447692871094,
"learning_rate": 3.9969168557860665e-05,
"loss": 8.5263,
"step": 5500
},
{
"epoch": 0.05599944000559994,
"grad_norm": 479.4326477050781,
"learning_rate": 3.9967382078522716e-05,
"loss": 8.6827,
"step": 5600
},
{
"epoch": 0.056999430005699944,
"grad_norm": 205.8404083251953,
"learning_rate": 3.9965545331324166e-05,
"loss": 8.0084,
"step": 5700
},
{
"epoch": 0.05799942000579994,
"grad_norm": 318.3268127441406,
"learning_rate": 3.996365832088903e-05,
"loss": 7.8264,
"step": 5800
},
{
"epoch": 0.05899941000589994,
"grad_norm": 545.3519287109375,
"learning_rate": 3.996172105196785e-05,
"loss": 8.5439,
"step": 5900
},
{
"epoch": 0.05999940000599994,
"grad_norm": 536.3674926757812,
"learning_rate": 3.995973352943769e-05,
"loss": 7.2916,
"step": 6000
},
{
"epoch": 0.06099939000609994,
"grad_norm": 29.15781593322754,
"learning_rate": 3.995769575830215e-05,
"loss": 8.3879,
"step": 6100
},
{
"epoch": 0.061999380006199936,
"grad_norm": 573.3207397460938,
"learning_rate": 3.99556077436913e-05,
"loss": 6.6733,
"step": 6200
},
{
"epoch": 0.06299937000629993,
"grad_norm": 206.36526489257812,
"learning_rate": 3.995346949086174e-05,
"loss": 5.7694,
"step": 6300
},
{
"epoch": 0.06399936000639994,
"grad_norm": 16834.9296875,
"learning_rate": 3.9951281005196486e-05,
"loss": 6.1864,
"step": 6400
},
{
"epoch": 0.06499935000649994,
"grad_norm": 401.1425476074219,
"learning_rate": 3.994904229220507e-05,
"loss": 6.1338,
"step": 6500
},
{
"epoch": 0.06599934000659993,
"grad_norm": 277.509765625,
"learning_rate": 3.994675335752345e-05,
"loss": 4.8342,
"step": 6600
},
{
"epoch": 0.06699933000669993,
"grad_norm": 544.186767578125,
"learning_rate": 3.9944414206914e-05,
"loss": 8.32,
"step": 6700
},
{
"epoch": 0.06799932000679994,
"grad_norm": 174.74789428710938,
"learning_rate": 3.994202484626555e-05,
"loss": 10.03,
"step": 6800
},
{
"epoch": 0.06899931000689993,
"grad_norm": 562.16357421875,
"learning_rate": 3.99395852815933e-05,
"loss": 7.7643,
"step": 6900
},
{
"epoch": 0.06999930000699993,
"grad_norm": 219.15576171875,
"learning_rate": 3.993709551903885e-05,
"loss": 8.8717,
"step": 7000
},
{
"epoch": 0.07099929000709992,
"grad_norm": 159.7786102294922,
"learning_rate": 3.993455556487018e-05,
"loss": 7.3802,
"step": 7100
},
{
"epoch": 0.07199928000719993,
"grad_norm": 224.97128295898438,
"learning_rate": 3.993196542548162e-05,
"loss": 9.665,
"step": 7200
},
{
"epoch": 0.07299927000729993,
"grad_norm": 691.8470458984375,
"learning_rate": 3.992932510739383e-05,
"loss": 5.8392,
"step": 7300
},
{
"epoch": 0.07399926000739993,
"grad_norm": 196.7902069091797,
"learning_rate": 3.992663461725383e-05,
"loss": 5.8781,
"step": 7400
},
{
"epoch": 0.07499925000749992,
"grad_norm": 450.8050231933594,
"learning_rate": 3.9923893961834914e-05,
"loss": 8.0027,
"step": 7500
},
{
"epoch": 0.07599924000759993,
"grad_norm": 182.17953491210938,
"learning_rate": 3.992110314803668e-05,
"loss": 4.6279,
"step": 7600
},
{
"epoch": 0.07699923000769993,
"grad_norm": 176.743896484375,
"learning_rate": 3.9918262182884994e-05,
"loss": 8.5188,
"step": 7700
},
{
"epoch": 0.07799922000779992,
"grad_norm": 382.1143798828125,
"learning_rate": 3.9915371073531995e-05,
"loss": 6.4761,
"step": 7800
},
{
"epoch": 0.07899921000789992,
"grad_norm": 513.8743286132812,
"learning_rate": 3.991242982725603e-05,
"loss": 5.1712,
"step": 7900
},
{
"epoch": 0.07999920000799993,
"grad_norm": 197.9111785888672,
"learning_rate": 3.9909438451461695e-05,
"loss": 7.5148,
"step": 8000
},
{
"epoch": 0.08099919000809992,
"grad_norm": 165.2156982421875,
"learning_rate": 3.990639695367977e-05,
"loss": 5.1054,
"step": 8100
},
{
"epoch": 0.08199918000819992,
"grad_norm": 162.3336944580078,
"learning_rate": 3.990330534156723e-05,
"loss": 5.3642,
"step": 8200
},
{
"epoch": 0.08299917000829991,
"grad_norm": 149.72377014160156,
"learning_rate": 3.9900163622907196e-05,
"loss": 7.4674,
"step": 8300
},
{
"epoch": 0.08399916000839992,
"grad_norm": 110.8755874633789,
"learning_rate": 3.9896971805608945e-05,
"loss": 6.2615,
"step": 8400
},
{
"epoch": 0.08499915000849992,
"grad_norm": 97.61107635498047,
"learning_rate": 3.989372989770787e-05,
"loss": 6.0901,
"step": 8500
},
{
"epoch": 0.08599914000859991,
"grad_norm": 47.84280776977539,
"learning_rate": 3.989043790736547e-05,
"loss": 6.6694,
"step": 8600
},
{
"epoch": 0.08699913000869991,
"grad_norm": 418.85052490234375,
"learning_rate": 3.988709584286933e-05,
"loss": 6.1093,
"step": 8700
},
{
"epoch": 0.0879991200087999,
"grad_norm": 402.12933349609375,
"learning_rate": 3.98837037126331e-05,
"loss": 5.6737,
"step": 8800
},
{
"epoch": 0.08899911000889991,
"grad_norm": 136.71066284179688,
"learning_rate": 3.988026152519645e-05,
"loss": 6.0548,
"step": 8900
},
{
"epoch": 0.08999910000899991,
"grad_norm": 319.9411926269531,
"learning_rate": 3.9876769289225084e-05,
"loss": 6.1571,
"step": 9000
},
{
"epoch": 0.0909990900090999,
"grad_norm": 216.7753143310547,
"learning_rate": 3.9873227013510714e-05,
"loss": 4.6247,
"step": 9100
},
{
"epoch": 0.0919990800091999,
"grad_norm": 554.5048828125,
"learning_rate": 3.9869634706971e-05,
"loss": 6.5048,
"step": 9200
},
{
"epoch": 0.09299907000929991,
"grad_norm": 131.7164764404297,
"learning_rate": 3.986599237864959e-05,
"loss": 6.2413,
"step": 9300
},
{
"epoch": 0.0939990600093999,
"grad_norm": 102.79979705810547,
"learning_rate": 3.9862300037716025e-05,
"loss": 5.7489,
"step": 9400
},
{
"epoch": 0.0949990500094999,
"grad_norm": 37.355709075927734,
"learning_rate": 3.9858557693465766e-05,
"loss": 5.7535,
"step": 9500
},
{
"epoch": 0.0959990400095999,
"grad_norm": 137.89903259277344,
"learning_rate": 3.985476535532018e-05,
"loss": 4.4851,
"step": 9600
},
{
"epoch": 0.09699903000969991,
"grad_norm": 174.57391357421875,
"learning_rate": 3.985092303282645e-05,
"loss": 9.4012,
"step": 9700
},
{
"epoch": 0.0979990200097999,
"grad_norm": 99.09408569335938,
"learning_rate": 3.9847030735657624e-05,
"loss": 7.2147,
"step": 9800
},
{
"epoch": 0.0989990100098999,
"grad_norm": 265.9083557128906,
"learning_rate": 3.984308847361257e-05,
"loss": 7.8453,
"step": 9900
},
{
"epoch": 0.0999990000099999,
"grad_norm": 240.05801391601562,
"learning_rate": 3.983909625661591e-05,
"loss": 6.4133,
"step": 10000
},
{
"epoch": 0.1009989900100999,
"grad_norm": 245.1486053466797,
"learning_rate": 3.983505409471806e-05,
"loss": 5.5553,
"step": 10100
},
{
"epoch": 0.1019989800101999,
"grad_norm": 654.8526000976562,
"learning_rate": 3.9830961998095146e-05,
"loss": 4.5454,
"step": 10200
},
{
"epoch": 0.1029989700102999,
"grad_norm": 49.497371673583984,
"learning_rate": 3.982681997704902e-05,
"loss": 4.0421,
"step": 10300
},
{
"epoch": 0.10399896001039989,
"grad_norm": 119.20033264160156,
"learning_rate": 3.982262804200723e-05,
"loss": 4.6029,
"step": 10400
},
{
"epoch": 0.1049989500104999,
"grad_norm": 595.1155395507812,
"learning_rate": 3.981838620352294e-05,
"loss": 3.8456,
"step": 10500
},
{
"epoch": 0.1059989400105999,
"grad_norm": 29.171674728393555,
"learning_rate": 3.9814094472275e-05,
"loss": 4.0763,
"step": 10600
},
{
"epoch": 0.10699893001069989,
"grad_norm": 359.91021728515625,
"learning_rate": 3.9809752859067823e-05,
"loss": 3.6656,
"step": 10700
},
{
"epoch": 0.10799892001079989,
"grad_norm": 137.34120178222656,
"learning_rate": 3.980536137483141e-05,
"loss": 5.2409,
"step": 10800
},
{
"epoch": 0.1089989100108999,
"grad_norm": 359.36151123046875,
"learning_rate": 3.9800920030621334e-05,
"loss": 7.5297,
"step": 10900
},
{
"epoch": 0.10999890001099989,
"grad_norm": 609.0127563476562,
"learning_rate": 3.979642883761866e-05,
"loss": 5.2982,
"step": 11000
},
{
"epoch": 0.11099889001109989,
"grad_norm": 1026.407470703125,
"learning_rate": 3.979188780712996e-05,
"loss": 5.7533,
"step": 11100
},
{
"epoch": 0.11199888001119988,
"grad_norm": 1161.3192138671875,
"learning_rate": 3.978729695058729e-05,
"loss": 3.8523,
"step": 11200
},
{
"epoch": 0.11299887001129989,
"grad_norm": 207.47286987304688,
"learning_rate": 3.9782656279548114e-05,
"loss": 5.7909,
"step": 11300
},
{
"epoch": 0.11399886001139989,
"grad_norm": 180.18321228027344,
"learning_rate": 3.9777965805695315e-05,
"loss": 4.8786,
"step": 11400
},
{
"epoch": 0.11499885001149988,
"grad_norm": 154.166259765625,
"learning_rate": 3.977322554083716e-05,
"loss": 5.9407,
"step": 11500
},
{
"epoch": 0.11599884001159988,
"grad_norm": 175.445556640625,
"learning_rate": 3.976843549690725e-05,
"loss": 3.8969,
"step": 11600
},
{
"epoch": 0.11699883001169989,
"grad_norm": 204.4251251220703,
"learning_rate": 3.976359568596453e-05,
"loss": 4.1672,
"step": 11700
},
{
"epoch": 0.11799882001179988,
"grad_norm": 330.901123046875,
"learning_rate": 3.97587061201932e-05,
"loss": 4.1925,
"step": 11800
},
{
"epoch": 0.11899881001189988,
"grad_norm": 563.4291381835938,
"learning_rate": 3.9753766811902756e-05,
"loss": 3.4035,
"step": 11900
},
{
"epoch": 0.11999880001199988,
"grad_norm": 57.178955078125,
"learning_rate": 3.974877777352789e-05,
"loss": 3.4773,
"step": 12000
},
{
"epoch": 0.12099879001209989,
"grad_norm": 94.94606018066406,
"learning_rate": 3.97437390176285e-05,
"loss": 3.5796,
"step": 12100
},
{
"epoch": 0.12199878001219988,
"grad_norm": 801.7224731445312,
"learning_rate": 3.973865055688965e-05,
"loss": 3.4456,
"step": 12200
},
{
"epoch": 0.12299877001229988,
"grad_norm": 211.559814453125,
"learning_rate": 3.973351240412153e-05,
"loss": 5.9604,
"step": 12300
},
{
"epoch": 0.12399876001239987,
"grad_norm": 47.33822250366211,
"learning_rate": 3.972832457225944e-05,
"loss": 1.6479,
"step": 12400
},
{
"epoch": 0.12499875001249988,
"grad_norm": 517.2850341796875,
"learning_rate": 3.972308707436374e-05,
"loss": 4.1837,
"step": 12500
},
{
"epoch": 0.12599874001259986,
"grad_norm": 168.18865966796875,
"learning_rate": 3.971779992361981e-05,
"loss": 3.7355,
"step": 12600
},
{
"epoch": 0.12699873001269987,
"grad_norm": 582.1716918945312,
"learning_rate": 3.971246313333807e-05,
"loss": 4.9486,
"step": 12700
},
{
"epoch": 0.12799872001279988,
"grad_norm": 337.09344482421875,
"learning_rate": 3.9707076716953866e-05,
"loss": 4.6692,
"step": 12800
},
{
"epoch": 0.12899871001289986,
"grad_norm": 383.8667297363281,
"learning_rate": 3.97016406880275e-05,
"loss": 3.3209,
"step": 12900
},
{
"epoch": 0.12999870001299987,
"grad_norm": 109.9424819946289,
"learning_rate": 3.9696155060244166e-05,
"loss": 3.5282,
"step": 13000
},
{
"epoch": 0.13099869001309986,
"grad_norm": 74.46175384521484,
"learning_rate": 3.969061984741393e-05,
"loss": 3.883,
"step": 13100
},
{
"epoch": 0.13199868001319986,
"grad_norm": 170.72601318359375,
"learning_rate": 3.9685035063471675e-05,
"loss": 6.4028,
"step": 13200
},
{
"epoch": 0.13299867001329987,
"grad_norm": 588.787109375,
"learning_rate": 3.9679400722477096e-05,
"loss": 3.1633,
"step": 13300
},
{
"epoch": 0.13399866001339986,
"grad_norm": 172.7151336669922,
"learning_rate": 3.967371683861465e-05,
"loss": 4.3976,
"step": 13400
},
{
"epoch": 0.13499865001349987,
"grad_norm": 728.732666015625,
"learning_rate": 3.9667983426193485e-05,
"loss": 3.4596,
"step": 13500
},
{
"epoch": 0.13599864001359988,
"grad_norm": 530.2247314453125,
"learning_rate": 3.9662200499647464e-05,
"loss": 7.6628,
"step": 13600
},
{
"epoch": 0.13699863001369986,
"grad_norm": 1040.433837890625,
"learning_rate": 3.965636807353511e-05,
"loss": 5.9463,
"step": 13700
},
{
"epoch": 0.13799862001379987,
"grad_norm": 94.73613739013672,
"learning_rate": 3.9650486162539555e-05,
"loss": 3.4241,
"step": 13800
},
{
"epoch": 0.13899861001389985,
"grad_norm": 87.12159729003906,
"learning_rate": 3.964455478146848e-05,
"loss": 4.2988,
"step": 13900
},
{
"epoch": 0.13999860001399986,
"grad_norm": 422.29547119140625,
"learning_rate": 3.963857394525413e-05,
"loss": 2.7699,
"step": 14000
},
{
"epoch": 0.14099859001409987,
"grad_norm": 677.4222412109375,
"learning_rate": 3.9632543668953284e-05,
"loss": 3.9013,
"step": 14100
},
{
"epoch": 0.14199858001419985,
"grad_norm": 98.60820770263672,
"learning_rate": 3.9626463967747126e-05,
"loss": 2.8094,
"step": 14200
},
{
"epoch": 0.14299857001429986,
"grad_norm": 567.5491333007812,
"learning_rate": 3.9620334856941305e-05,
"loss": 1.7368,
"step": 14300
},
{
"epoch": 0.14399856001439987,
"grad_norm": 19.20868492126465,
"learning_rate": 3.961415635196585e-05,
"loss": 2.8347,
"step": 14400
},
{
"epoch": 0.14499855001449985,
"grad_norm": 131.01400756835938,
"learning_rate": 3.960792846837514e-05,
"loss": 3.9196,
"step": 14500
},
{
"epoch": 0.14599854001459986,
"grad_norm": 113.39109802246094,
"learning_rate": 3.960165122184787e-05,
"loss": 2.9107,
"step": 14600
},
{
"epoch": 0.14699853001469984,
"grad_norm": 701.5942993164062,
"learning_rate": 3.959532462818699e-05,
"loss": 1.98,
"step": 14700
},
{
"epoch": 0.14799852001479985,
"grad_norm": 83.20955657958984,
"learning_rate": 3.958894870331971e-05,
"loss": 2.093,
"step": 14800
},
{
"epoch": 0.14899851001489986,
"grad_norm": 473.39202880859375,
"learning_rate": 3.958252346329739e-05,
"loss": 3.0402,
"step": 14900
},
{
"epoch": 0.14999850001499984,
"grad_norm": 267.7691955566406,
"learning_rate": 3.957604892429558e-05,
"loss": 5.534,
"step": 15000
},
{
"epoch": 0.15099849001509985,
"grad_norm": 278.0105285644531,
"learning_rate": 3.956952510261392e-05,
"loss": 3.1192,
"step": 15100
},
{
"epoch": 0.15199848001519986,
"grad_norm": 76.60382080078125,
"learning_rate": 3.9562952014676116e-05,
"loss": 4.55,
"step": 15200
},
{
"epoch": 0.15299847001529984,
"grad_norm": 62.512725830078125,
"learning_rate": 3.955632967702992e-05,
"loss": 4.7252,
"step": 15300
},
{
"epoch": 0.15399846001539985,
"grad_norm": 52.81327819824219,
"learning_rate": 3.954965810634706e-05,
"loss": 4.3578,
"step": 15400
},
{
"epoch": 0.15499845001549983,
"grad_norm": 388.27850341796875,
"learning_rate": 3.954293731942319e-05,
"loss": 3.9787,
"step": 15500
},
{
"epoch": 0.15599844001559984,
"grad_norm": 83.5515365600586,
"learning_rate": 3.953616733317791e-05,
"loss": 0.5327,
"step": 15600
},
{
"epoch": 0.15699843001569985,
"grad_norm": 104.8029556274414,
"learning_rate": 3.9529348164654625e-05,
"loss": 5.1422,
"step": 15700
},
{
"epoch": 0.15799842001579983,
"grad_norm": 682.9906616210938,
"learning_rate": 3.9522479831020605e-05,
"loss": 0.6372,
"step": 15800
},
{
"epoch": 0.15899841001589984,
"grad_norm": 970.7451171875,
"learning_rate": 3.951556234956686e-05,
"loss": 2.5437,
"step": 15900
},
{
"epoch": 0.15999840001599985,
"grad_norm": 276.81207275390625,
"learning_rate": 3.950859573770815e-05,
"loss": 0.5738,
"step": 16000
},
{
"epoch": 0.16099839001609983,
"grad_norm": 121.99581909179688,
"learning_rate": 3.9501580012982894e-05,
"loss": 2.5115,
"step": 16100
},
{
"epoch": 0.16199838001619984,
"grad_norm": 234.561279296875,
"learning_rate": 3.949451519305319e-05,
"loss": 2.9896,
"step": 16200
},
{
"epoch": 0.16299837001629983,
"grad_norm": 576.3875732421875,
"learning_rate": 3.948740129570471e-05,
"loss": 2.834,
"step": 16300
},
{
"epoch": 0.16399836001639984,
"grad_norm": 256.8683166503906,
"learning_rate": 3.948023833884667e-05,
"loss": 2.7145,
"step": 16400
},
{
"epoch": 0.16499835001649985,
"grad_norm": 782.2625122070312,
"learning_rate": 3.947302634051182e-05,
"loss": 2.0984,
"step": 16500
},
{
"epoch": 0.16599834001659983,
"grad_norm": 237.81005859375,
"learning_rate": 3.946576531885636e-05,
"loss": 1.2758,
"step": 16600
},
{
"epoch": 0.16699833001669984,
"grad_norm": 148.71981811523438,
"learning_rate": 3.9458455292159883e-05,
"loss": 5.3672,
"step": 16700
},
{
"epoch": 0.16799832001679985,
"grad_norm": 339.80877685546875,
"learning_rate": 3.9451096278825386e-05,
"loss": 1.9513,
"step": 16800
},
{
"epoch": 0.16899831001689983,
"grad_norm": 339.36767578125,
"learning_rate": 3.944368829737918e-05,
"loss": 3.6129,
"step": 16900
},
{
"epoch": 0.16999830001699984,
"grad_norm": 45.14118194580078,
"learning_rate": 3.9436231366470836e-05,
"loss": 4.6565,
"step": 17000
},
{
"epoch": 0.17099829001709982,
"grad_norm": 47.00923156738281,
"learning_rate": 3.942872550487318e-05,
"loss": 3.9665,
"step": 17100
},
{
"epoch": 0.17199828001719983,
"grad_norm": 100.63198852539062,
"learning_rate": 3.942117073148221e-05,
"loss": 3.8538,
"step": 17200
},
{
"epoch": 0.17299827001729984,
"grad_norm": 13.572708129882812,
"learning_rate": 3.9413567065317056e-05,
"loss": 2.2025,
"step": 17300
},
{
"epoch": 0.17399826001739982,
"grad_norm": 411.76959228515625,
"learning_rate": 3.940591452551993e-05,
"loss": 1.3909,
"step": 17400
},
{
"epoch": 0.17499825001749983,
"grad_norm": 48.388797760009766,
"learning_rate": 3.93982131313561e-05,
"loss": 2.3617,
"step": 17500
},
{
"epoch": 0.1759982400175998,
"grad_norm": 107.87911224365234,
"learning_rate": 3.939046290221383e-05,
"loss": 2.1434,
"step": 17600
},
{
"epoch": 0.17699823001769982,
"grad_norm": 36.4320182800293,
"learning_rate": 3.938266385760429e-05,
"loss": 1.2506,
"step": 17700
},
{
"epoch": 0.17799822001779983,
"grad_norm": 293.2892150878906,
"learning_rate": 3.937481601716157e-05,
"loss": 3.2974,
"step": 17800
},
{
"epoch": 0.1789982100178998,
"grad_norm": 595.9169921875,
"learning_rate": 3.936691940064261e-05,
"loss": 3.2548,
"step": 17900
},
{
"epoch": 0.17999820001799982,
"grad_norm": 402.92236328125,
"learning_rate": 3.935897402792713e-05,
"loss": 1.4305,
"step": 18000
},
{
"epoch": 0.18099819001809983,
"grad_norm": 56.42055892944336,
"learning_rate": 3.935097991901759e-05,
"loss": 1.3576,
"step": 18100
},
{
"epoch": 0.1819981800181998,
"grad_norm": 38.25709533691406,
"learning_rate": 3.934293709403915e-05,
"loss": 2.9067,
"step": 18200
},
{
"epoch": 0.18299817001829982,
"grad_norm": 263.8206787109375,
"learning_rate": 3.933484557323961e-05,
"loss": 3.5546,
"step": 18300
},
{
"epoch": 0.1839981600183998,
"grad_norm": 190.02703857421875,
"learning_rate": 3.932670537698937e-05,
"loss": 3.0082,
"step": 18400
},
{
"epoch": 0.1849981500184998,
"grad_norm": 171.09291076660156,
"learning_rate": 3.931851652578137e-05,
"loss": 2.6617,
"step": 18500
},
{
"epoch": 0.18599814001859982,
"grad_norm": 202.73995971679688,
"learning_rate": 3.931027904023102e-05,
"loss": 2.818,
"step": 18600
},
{
"epoch": 0.1869981300186998,
"grad_norm": 179.5336456298828,
"learning_rate": 3.9301992941076185e-05,
"loss": 3.1686,
"step": 18700
},
{
"epoch": 0.1879981200187998,
"grad_norm": 396.6241455078125,
"learning_rate": 3.929365824917712e-05,
"loss": 1.8101,
"step": 18800
},
{
"epoch": 0.18899811001889982,
"grad_norm": 82.62757873535156,
"learning_rate": 3.928527498551639e-05,
"loss": 1.5857,
"step": 18900
},
{
"epoch": 0.1899981000189998,
"grad_norm": 733.2717895507812,
"learning_rate": 3.9276843171198844e-05,
"loss": 2.6652,
"step": 19000
},
{
"epoch": 0.19099809001909981,
"grad_norm": 960.3976440429688,
"learning_rate": 3.926836282745158e-05,
"loss": 3.8051,
"step": 19100
},
{
"epoch": 0.1919980800191998,
"grad_norm": 534.16748046875,
"learning_rate": 3.925983397562385e-05,
"loss": 2.8182,
"step": 19200
},
{
"epoch": 0.1929980700192998,
"grad_norm": 233.56008911132812,
"learning_rate": 3.925125663718703e-05,
"loss": 2.5726,
"step": 19300
},
{
"epoch": 0.19399806001939982,
"grad_norm": 450.2645263671875,
"learning_rate": 3.924263083373455e-05,
"loss": 1.7716,
"step": 19400
},
{
"epoch": 0.1949980500194998,
"grad_norm": 81.5189208984375,
"learning_rate": 3.923395658698186e-05,
"loss": 4.5006,
"step": 19500
},
{
"epoch": 0.1959980400195998,
"grad_norm": 84.00701141357422,
"learning_rate": 3.922523391876638e-05,
"loss": 0.5761,
"step": 19600
},
{
"epoch": 0.19699803001969982,
"grad_norm": 229.67648315429688,
"learning_rate": 3.9216462851047405e-05,
"loss": 5.6245,
"step": 19700
},
{
"epoch": 0.1979980200197998,
"grad_norm": 603.322265625,
"learning_rate": 3.9207643405906094e-05,
"loss": 3.7712,
"step": 19800
},
{
"epoch": 0.1989980100198998,
"grad_norm": 282.677734375,
"learning_rate": 3.9198775605545385e-05,
"loss": -0.2731,
"step": 19900
},
{
"epoch": 0.1999980000199998,
"grad_norm": 87.2725601196289,
"learning_rate": 3.9189859472289956e-05,
"loss": 2.8561,
"step": 20000
}
],
"logging_steps": 100,
"max_steps": 200000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}