sentbert / trainer_state.json
kubalewcz's picture
Upload 7 files
a622549 verified
{
"best_metric": 0.5136106610298157,
"best_model_checkpoint": "./results/checkpoint-2874",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 5748,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006958942240779402,
"grad_norm": 4.3180251121521,
"learning_rate": 1.9972164231036883e-05,
"loss": 1.3759,
"step": 10
},
{
"epoch": 0.013917884481558803,
"grad_norm": 2.7253687381744385,
"learning_rate": 1.9944328462073764e-05,
"loss": 1.3343,
"step": 20
},
{
"epoch": 0.020876826722338204,
"grad_norm": 3.703948736190796,
"learning_rate": 1.9916492693110648e-05,
"loss": 1.226,
"step": 30
},
{
"epoch": 0.027835768963117607,
"grad_norm": 6.535583972930908,
"learning_rate": 1.9888656924147533e-05,
"loss": 1.0883,
"step": 40
},
{
"epoch": 0.03479471120389701,
"grad_norm": 13.897150039672852,
"learning_rate": 1.9860821155184414e-05,
"loss": 0.9748,
"step": 50
},
{
"epoch": 0.04175365344467641,
"grad_norm": 7.23193359375,
"learning_rate": 1.9832985386221295e-05,
"loss": 0.8725,
"step": 60
},
{
"epoch": 0.04871259568545581,
"grad_norm": 13.598752975463867,
"learning_rate": 1.980514961725818e-05,
"loss": 0.8301,
"step": 70
},
{
"epoch": 0.055671537926235214,
"grad_norm": 15.322084426879883,
"learning_rate": 1.977731384829506e-05,
"loss": 0.8432,
"step": 80
},
{
"epoch": 0.06263048016701461,
"grad_norm": 6.434969425201416,
"learning_rate": 1.974947807933194e-05,
"loss": 0.7408,
"step": 90
},
{
"epoch": 0.06958942240779402,
"grad_norm": 8.11408805847168,
"learning_rate": 1.9721642310368826e-05,
"loss": 0.7086,
"step": 100
},
{
"epoch": 0.07654836464857341,
"grad_norm": 7.85330057144165,
"learning_rate": 1.969380654140571e-05,
"loss": 0.7318,
"step": 110
},
{
"epoch": 0.08350730688935282,
"grad_norm": 10.534364700317383,
"learning_rate": 1.966597077244259e-05,
"loss": 0.6798,
"step": 120
},
{
"epoch": 0.09046624913013222,
"grad_norm": 13.151297569274902,
"learning_rate": 1.9638135003479472e-05,
"loss": 0.6988,
"step": 130
},
{
"epoch": 0.09742519137091162,
"grad_norm": 9.17402458190918,
"learning_rate": 1.9610299234516353e-05,
"loss": 0.6723,
"step": 140
},
{
"epoch": 0.10438413361169102,
"grad_norm": 19.499664306640625,
"learning_rate": 1.9582463465553238e-05,
"loss": 0.5994,
"step": 150
},
{
"epoch": 0.11134307585247043,
"grad_norm": 11.141926765441895,
"learning_rate": 1.955462769659012e-05,
"loss": 0.6565,
"step": 160
},
{
"epoch": 0.11830201809324982,
"grad_norm": 15.322267532348633,
"learning_rate": 1.9526791927627003e-05,
"loss": 0.7444,
"step": 170
},
{
"epoch": 0.12526096033402923,
"grad_norm": 9.982789993286133,
"learning_rate": 1.9498956158663885e-05,
"loss": 0.6882,
"step": 180
},
{
"epoch": 0.13221990257480862,
"grad_norm": 15.503236770629883,
"learning_rate": 1.9471120389700766e-05,
"loss": 0.6286,
"step": 190
},
{
"epoch": 0.13917884481558804,
"grad_norm": 9.867083549499512,
"learning_rate": 1.944328462073765e-05,
"loss": 0.7333,
"step": 200
},
{
"epoch": 0.14613778705636743,
"grad_norm": 21.876277923583984,
"learning_rate": 1.941544885177453e-05,
"loss": 0.7438,
"step": 210
},
{
"epoch": 0.15309672929714682,
"grad_norm": 17.48896598815918,
"learning_rate": 1.9387613082811416e-05,
"loss": 0.6269,
"step": 220
},
{
"epoch": 0.16005567153792624,
"grad_norm": 6.696071147918701,
"learning_rate": 1.9359777313848297e-05,
"loss": 0.6739,
"step": 230
},
{
"epoch": 0.16701461377870563,
"grad_norm": 11.048694610595703,
"learning_rate": 1.933194154488518e-05,
"loss": 0.7373,
"step": 240
},
{
"epoch": 0.17397355601948503,
"grad_norm": 8.395058631896973,
"learning_rate": 1.9304105775922062e-05,
"loss": 0.6133,
"step": 250
},
{
"epoch": 0.18093249826026445,
"grad_norm": 9.003988265991211,
"learning_rate": 1.9276270006958943e-05,
"loss": 0.5964,
"step": 260
},
{
"epoch": 0.18789144050104384,
"grad_norm": 10.79345703125,
"learning_rate": 1.9248434237995824e-05,
"loss": 0.6293,
"step": 270
},
{
"epoch": 0.19485038274182323,
"grad_norm": 19.241323471069336,
"learning_rate": 1.922059846903271e-05,
"loss": 0.6418,
"step": 280
},
{
"epoch": 0.20180932498260265,
"grad_norm": 20.079133987426758,
"learning_rate": 1.9192762700069593e-05,
"loss": 0.749,
"step": 290
},
{
"epoch": 0.20876826722338204,
"grad_norm": 31.14455223083496,
"learning_rate": 1.9164926931106474e-05,
"loss": 0.6719,
"step": 300
},
{
"epoch": 0.21572720946416143,
"grad_norm": 18.57120132446289,
"learning_rate": 1.9137091162143355e-05,
"loss": 0.674,
"step": 310
},
{
"epoch": 0.22268615170494085,
"grad_norm": 6.905323028564453,
"learning_rate": 1.9109255393180236e-05,
"loss": 0.5172,
"step": 320
},
{
"epoch": 0.22964509394572025,
"grad_norm": 9.24687671661377,
"learning_rate": 1.908141962421712e-05,
"loss": 0.637,
"step": 330
},
{
"epoch": 0.23660403618649964,
"grad_norm": 11.945255279541016,
"learning_rate": 1.9053583855254002e-05,
"loss": 0.6888,
"step": 340
},
{
"epoch": 0.24356297842727906,
"grad_norm": 7.873608112335205,
"learning_rate": 1.9025748086290886e-05,
"loss": 0.625,
"step": 350
},
{
"epoch": 0.25052192066805845,
"grad_norm": 10.357784271240234,
"learning_rate": 1.8997912317327767e-05,
"loss": 0.5676,
"step": 360
},
{
"epoch": 0.25748086290883787,
"grad_norm": 12.345287322998047,
"learning_rate": 1.8970076548364652e-05,
"loss": 0.5983,
"step": 370
},
{
"epoch": 0.26443980514961724,
"grad_norm": 7.470931053161621,
"learning_rate": 1.8942240779401533e-05,
"loss": 0.5978,
"step": 380
},
{
"epoch": 0.27139874739039666,
"grad_norm": 10.457704544067383,
"learning_rate": 1.8914405010438414e-05,
"loss": 0.6074,
"step": 390
},
{
"epoch": 0.2783576896311761,
"grad_norm": 14.570989608764648,
"learning_rate": 1.8886569241475295e-05,
"loss": 0.6218,
"step": 400
},
{
"epoch": 0.28531663187195544,
"grad_norm": 4.791262626647949,
"learning_rate": 1.885873347251218e-05,
"loss": 0.5941,
"step": 410
},
{
"epoch": 0.29227557411273486,
"grad_norm": 7.30219030380249,
"learning_rate": 1.8830897703549064e-05,
"loss": 0.5407,
"step": 420
},
{
"epoch": 0.2992345163535143,
"grad_norm": 7.455644130706787,
"learning_rate": 1.8803061934585945e-05,
"loss": 0.6167,
"step": 430
},
{
"epoch": 0.30619345859429364,
"grad_norm": 14.505638122558594,
"learning_rate": 1.8775226165622826e-05,
"loss": 0.6554,
"step": 440
},
{
"epoch": 0.31315240083507306,
"grad_norm": 9.505303382873535,
"learning_rate": 1.8747390396659707e-05,
"loss": 0.5817,
"step": 450
},
{
"epoch": 0.3201113430758525,
"grad_norm": 11.907074928283691,
"learning_rate": 1.871955462769659e-05,
"loss": 0.5423,
"step": 460
},
{
"epoch": 0.32707028531663185,
"grad_norm": 9.421733856201172,
"learning_rate": 1.8691718858733473e-05,
"loss": 0.5928,
"step": 470
},
{
"epoch": 0.33402922755741127,
"grad_norm": 8.2644624710083,
"learning_rate": 1.8663883089770357e-05,
"loss": 0.6234,
"step": 480
},
{
"epoch": 0.3409881697981907,
"grad_norm": 16.61441421508789,
"learning_rate": 1.8636047320807238e-05,
"loss": 0.5777,
"step": 490
},
{
"epoch": 0.34794711203897005,
"grad_norm": 8.484561920166016,
"learning_rate": 1.8608211551844123e-05,
"loss": 0.5505,
"step": 500
},
{
"epoch": 0.35490605427974947,
"grad_norm": 7.690084457397461,
"learning_rate": 1.8580375782881004e-05,
"loss": 0.5898,
"step": 510
},
{
"epoch": 0.3618649965205289,
"grad_norm": 7.3729963302612305,
"learning_rate": 1.8552540013917885e-05,
"loss": 0.6634,
"step": 520
},
{
"epoch": 0.36882393876130826,
"grad_norm": 8.139713287353516,
"learning_rate": 1.852470424495477e-05,
"loss": 0.6074,
"step": 530
},
{
"epoch": 0.3757828810020877,
"grad_norm": 10.640233039855957,
"learning_rate": 1.849686847599165e-05,
"loss": 0.6352,
"step": 540
},
{
"epoch": 0.3827418232428671,
"grad_norm": 16.793916702270508,
"learning_rate": 1.8469032707028535e-05,
"loss": 0.6306,
"step": 550
},
{
"epoch": 0.38970076548364646,
"grad_norm": 15.125770568847656,
"learning_rate": 1.8441196938065416e-05,
"loss": 0.6629,
"step": 560
},
{
"epoch": 0.3966597077244259,
"grad_norm": 13.026156425476074,
"learning_rate": 1.8413361169102297e-05,
"loss": 0.5743,
"step": 570
},
{
"epoch": 0.4036186499652053,
"grad_norm": 10.893036842346191,
"learning_rate": 1.838552540013918e-05,
"loss": 0.5443,
"step": 580
},
{
"epoch": 0.41057759220598466,
"grad_norm": 21.39899253845215,
"learning_rate": 1.8357689631176062e-05,
"loss": 0.5717,
"step": 590
},
{
"epoch": 0.4175365344467641,
"grad_norm": 29.05453872680664,
"learning_rate": 1.8329853862212947e-05,
"loss": 0.5856,
"step": 600
},
{
"epoch": 0.4244954766875435,
"grad_norm": 9.653709411621094,
"learning_rate": 1.8302018093249828e-05,
"loss": 0.5105,
"step": 610
},
{
"epoch": 0.43145441892832287,
"grad_norm": 14.518112182617188,
"learning_rate": 1.827418232428671e-05,
"loss": 0.6757,
"step": 620
},
{
"epoch": 0.4384133611691023,
"grad_norm": 12.425946235656738,
"learning_rate": 1.8246346555323593e-05,
"loss": 0.5708,
"step": 630
},
{
"epoch": 0.4453723034098817,
"grad_norm": 16.857666015625,
"learning_rate": 1.8218510786360474e-05,
"loss": 0.603,
"step": 640
},
{
"epoch": 0.4523312456506611,
"grad_norm": 19.68130111694336,
"learning_rate": 1.8190675017397356e-05,
"loss": 0.611,
"step": 650
},
{
"epoch": 0.4592901878914405,
"grad_norm": 13.848482131958008,
"learning_rate": 1.816283924843424e-05,
"loss": 0.625,
"step": 660
},
{
"epoch": 0.4662491301322199,
"grad_norm": 12.750751495361328,
"learning_rate": 1.813500347947112e-05,
"loss": 0.616,
"step": 670
},
{
"epoch": 0.4732080723729993,
"grad_norm": 9.34748363494873,
"learning_rate": 1.8107167710508005e-05,
"loss": 0.612,
"step": 680
},
{
"epoch": 0.4801670146137787,
"grad_norm": 14.746898651123047,
"learning_rate": 1.8079331941544887e-05,
"loss": 0.6521,
"step": 690
},
{
"epoch": 0.4871259568545581,
"grad_norm": 16.734874725341797,
"learning_rate": 1.8051496172581768e-05,
"loss": 0.606,
"step": 700
},
{
"epoch": 0.4940848990953375,
"grad_norm": 10.393136024475098,
"learning_rate": 1.8023660403618652e-05,
"loss": 0.6286,
"step": 710
},
{
"epoch": 0.5010438413361169,
"grad_norm": 8.78846263885498,
"learning_rate": 1.7995824634655533e-05,
"loss": 0.4593,
"step": 720
},
{
"epoch": 0.5080027835768963,
"grad_norm": 18.29091453552246,
"learning_rate": 1.7967988865692418e-05,
"loss": 0.6764,
"step": 730
},
{
"epoch": 0.5149617258176757,
"grad_norm": 13.510717391967773,
"learning_rate": 1.79401530967293e-05,
"loss": 0.6387,
"step": 740
},
{
"epoch": 0.5219206680584552,
"grad_norm": 8.811914443969727,
"learning_rate": 1.791231732776618e-05,
"loss": 0.6072,
"step": 750
},
{
"epoch": 0.5288796102992345,
"grad_norm": 12.649739265441895,
"learning_rate": 1.7884481558803064e-05,
"loss": 0.6128,
"step": 760
},
{
"epoch": 0.5358385525400139,
"grad_norm": 9.891730308532715,
"learning_rate": 1.7856645789839945e-05,
"loss": 0.568,
"step": 770
},
{
"epoch": 0.5427974947807933,
"grad_norm": 9.014187812805176,
"learning_rate": 1.7828810020876826e-05,
"loss": 0.5393,
"step": 780
},
{
"epoch": 0.5497564370215727,
"grad_norm": 19.861948013305664,
"learning_rate": 1.780097425191371e-05,
"loss": 0.5345,
"step": 790
},
{
"epoch": 0.5567153792623522,
"grad_norm": 8.01091480255127,
"learning_rate": 1.7773138482950595e-05,
"loss": 0.6457,
"step": 800
},
{
"epoch": 0.5636743215031316,
"grad_norm": 10.99765682220459,
"learning_rate": 1.7745302713987476e-05,
"loss": 0.5605,
"step": 810
},
{
"epoch": 0.5706332637439109,
"grad_norm": 7.248875617980957,
"learning_rate": 1.7717466945024357e-05,
"loss": 0.5975,
"step": 820
},
{
"epoch": 0.5775922059846903,
"grad_norm": 12.024378776550293,
"learning_rate": 1.768963117606124e-05,
"loss": 0.6238,
"step": 830
},
{
"epoch": 0.5845511482254697,
"grad_norm": 7.073344707489014,
"learning_rate": 1.7661795407098123e-05,
"loss": 0.6067,
"step": 840
},
{
"epoch": 0.5915100904662491,
"grad_norm": 11.20361614227295,
"learning_rate": 1.7633959638135004e-05,
"loss": 0.5319,
"step": 850
},
{
"epoch": 0.5984690327070286,
"grad_norm": 15.502198219299316,
"learning_rate": 1.760612386917189e-05,
"loss": 0.6038,
"step": 860
},
{
"epoch": 0.605427974947808,
"grad_norm": 7.949779510498047,
"learning_rate": 1.757828810020877e-05,
"loss": 0.5103,
"step": 870
},
{
"epoch": 0.6123869171885873,
"grad_norm": 21.52058219909668,
"learning_rate": 1.7550452331245654e-05,
"loss": 0.5607,
"step": 880
},
{
"epoch": 0.6193458594293667,
"grad_norm": 11.751809120178223,
"learning_rate": 1.7522616562282535e-05,
"loss": 0.46,
"step": 890
},
{
"epoch": 0.6263048016701461,
"grad_norm": 19.769689559936523,
"learning_rate": 1.7494780793319416e-05,
"loss": 0.5555,
"step": 900
},
{
"epoch": 0.6332637439109255,
"grad_norm": 6.040853977203369,
"learning_rate": 1.74669450243563e-05,
"loss": 0.6577,
"step": 910
},
{
"epoch": 0.640222686151705,
"grad_norm": 10.926910400390625,
"learning_rate": 1.743910925539318e-05,
"loss": 0.5674,
"step": 920
},
{
"epoch": 0.6471816283924844,
"grad_norm": 5.619332790374756,
"learning_rate": 1.7411273486430066e-05,
"loss": 0.5937,
"step": 930
},
{
"epoch": 0.6541405706332637,
"grad_norm": 9.416117668151855,
"learning_rate": 1.7383437717466947e-05,
"loss": 0.5718,
"step": 940
},
{
"epoch": 0.6610995128740431,
"grad_norm": 16.25941276550293,
"learning_rate": 1.7355601948503828e-05,
"loss": 0.5068,
"step": 950
},
{
"epoch": 0.6680584551148225,
"grad_norm": 10.545511245727539,
"learning_rate": 1.732776617954071e-05,
"loss": 0.5642,
"step": 960
},
{
"epoch": 0.675017397355602,
"grad_norm": 7.5949578285217285,
"learning_rate": 1.7299930410577594e-05,
"loss": 0.5243,
"step": 970
},
{
"epoch": 0.6819763395963814,
"grad_norm": 10.766368865966797,
"learning_rate": 1.7272094641614475e-05,
"loss": 0.5285,
"step": 980
},
{
"epoch": 0.6889352818371608,
"grad_norm": 7.256805419921875,
"learning_rate": 1.724425887265136e-05,
"loss": 0.5286,
"step": 990
},
{
"epoch": 0.6958942240779401,
"grad_norm": 10.257540702819824,
"learning_rate": 1.721642310368824e-05,
"loss": 0.6115,
"step": 1000
},
{
"epoch": 0.7028531663187195,
"grad_norm": 14.531510353088379,
"learning_rate": 1.7188587334725125e-05,
"loss": 0.6112,
"step": 1010
},
{
"epoch": 0.7098121085594989,
"grad_norm": 8.326130867004395,
"learning_rate": 1.7160751565762006e-05,
"loss": 0.5534,
"step": 1020
},
{
"epoch": 0.7167710508002784,
"grad_norm": 13.698468208312988,
"learning_rate": 1.7132915796798887e-05,
"loss": 0.5505,
"step": 1030
},
{
"epoch": 0.7237299930410578,
"grad_norm": 12.968040466308594,
"learning_rate": 1.710508002783577e-05,
"loss": 0.596,
"step": 1040
},
{
"epoch": 0.7306889352818372,
"grad_norm": 10.557011604309082,
"learning_rate": 1.7077244258872652e-05,
"loss": 0.5588,
"step": 1050
},
{
"epoch": 0.7376478775226165,
"grad_norm": 7.124124526977539,
"learning_rate": 1.7049408489909537e-05,
"loss": 0.5644,
"step": 1060
},
{
"epoch": 0.7446068197633959,
"grad_norm": 7.210671901702881,
"learning_rate": 1.7021572720946418e-05,
"loss": 0.5828,
"step": 1070
},
{
"epoch": 0.7515657620041754,
"grad_norm": 20.80126190185547,
"learning_rate": 1.69937369519833e-05,
"loss": 0.5491,
"step": 1080
},
{
"epoch": 0.7585247042449548,
"grad_norm": 8.95080852508545,
"learning_rate": 1.696590118302018e-05,
"loss": 0.495,
"step": 1090
},
{
"epoch": 0.7654836464857342,
"grad_norm": 8.503535270690918,
"learning_rate": 1.6938065414057064e-05,
"loss": 0.5253,
"step": 1100
},
{
"epoch": 0.7724425887265136,
"grad_norm": 6.803649425506592,
"learning_rate": 1.691022964509395e-05,
"loss": 0.5859,
"step": 1110
},
{
"epoch": 0.7794015309672929,
"grad_norm": 9.828047752380371,
"learning_rate": 1.688239387613083e-05,
"loss": 0.5418,
"step": 1120
},
{
"epoch": 0.7863604732080723,
"grad_norm": 7.690149307250977,
"learning_rate": 1.685455810716771e-05,
"loss": 0.5738,
"step": 1130
},
{
"epoch": 0.7933194154488518,
"grad_norm": 8.76807975769043,
"learning_rate": 1.6826722338204595e-05,
"loss": 0.458,
"step": 1140
},
{
"epoch": 0.8002783576896312,
"grad_norm": 10.1242036819458,
"learning_rate": 1.6798886569241476e-05,
"loss": 0.5573,
"step": 1150
},
{
"epoch": 0.8072372999304106,
"grad_norm": 8.316211700439453,
"learning_rate": 1.6771050800278358e-05,
"loss": 0.5527,
"step": 1160
},
{
"epoch": 0.81419624217119,
"grad_norm": 15.57465934753418,
"learning_rate": 1.6743215031315242e-05,
"loss": 0.5704,
"step": 1170
},
{
"epoch": 0.8211551844119693,
"grad_norm": 13.459155082702637,
"learning_rate": 1.6715379262352126e-05,
"loss": 0.5659,
"step": 1180
},
{
"epoch": 0.8281141266527487,
"grad_norm": 7.378421783447266,
"learning_rate": 1.6687543493389008e-05,
"loss": 0.5482,
"step": 1190
},
{
"epoch": 0.8350730688935282,
"grad_norm": 10.607648849487305,
"learning_rate": 1.665970772442589e-05,
"loss": 0.5627,
"step": 1200
},
{
"epoch": 0.8420320111343076,
"grad_norm": 13.157455444335938,
"learning_rate": 1.663187195546277e-05,
"loss": 0.5479,
"step": 1210
},
{
"epoch": 0.848990953375087,
"grad_norm": 9.343613624572754,
"learning_rate": 1.6604036186499654e-05,
"loss": 0.5695,
"step": 1220
},
{
"epoch": 0.8559498956158664,
"grad_norm": 24.01211929321289,
"learning_rate": 1.6576200417536535e-05,
"loss": 0.5173,
"step": 1230
},
{
"epoch": 0.8629088378566457,
"grad_norm": 14.033452987670898,
"learning_rate": 1.654836464857342e-05,
"loss": 0.5604,
"step": 1240
},
{
"epoch": 0.8698677800974252,
"grad_norm": 13.518189430236816,
"learning_rate": 1.65205288796103e-05,
"loss": 0.5797,
"step": 1250
},
{
"epoch": 0.8768267223382046,
"grad_norm": 9.779712677001953,
"learning_rate": 1.6492693110647182e-05,
"loss": 0.5761,
"step": 1260
},
{
"epoch": 0.883785664578984,
"grad_norm": 25.537031173706055,
"learning_rate": 1.6464857341684066e-05,
"loss": 0.4546,
"step": 1270
},
{
"epoch": 0.8907446068197634,
"grad_norm": 9.35092544555664,
"learning_rate": 1.6437021572720947e-05,
"loss": 0.5037,
"step": 1280
},
{
"epoch": 0.8977035490605428,
"grad_norm": 11.983678817749023,
"learning_rate": 1.640918580375783e-05,
"loss": 0.4967,
"step": 1290
},
{
"epoch": 0.9046624913013221,
"grad_norm": 8.681296348571777,
"learning_rate": 1.6381350034794713e-05,
"loss": 0.6147,
"step": 1300
},
{
"epoch": 0.9116214335421016,
"grad_norm": 9.482718467712402,
"learning_rate": 1.6353514265831597e-05,
"loss": 0.5906,
"step": 1310
},
{
"epoch": 0.918580375782881,
"grad_norm": 11.09154987335205,
"learning_rate": 1.6325678496868478e-05,
"loss": 0.5307,
"step": 1320
},
{
"epoch": 0.9255393180236604,
"grad_norm": 12.523815155029297,
"learning_rate": 1.629784272790536e-05,
"loss": 0.4931,
"step": 1330
},
{
"epoch": 0.9324982602644398,
"grad_norm": 13.545294761657715,
"learning_rate": 1.627000695894224e-05,
"loss": 0.4874,
"step": 1340
},
{
"epoch": 0.9394572025052192,
"grad_norm": 8.994409561157227,
"learning_rate": 1.6242171189979125e-05,
"loss": 0.5581,
"step": 1350
},
{
"epoch": 0.9464161447459986,
"grad_norm": 9.682478904724121,
"learning_rate": 1.6214335421016006e-05,
"loss": 0.5548,
"step": 1360
},
{
"epoch": 0.953375086986778,
"grad_norm": 7.079063415527344,
"learning_rate": 1.618649965205289e-05,
"loss": 0.4918,
"step": 1370
},
{
"epoch": 0.9603340292275574,
"grad_norm": 10.886133193969727,
"learning_rate": 1.615866388308977e-05,
"loss": 0.594,
"step": 1380
},
{
"epoch": 0.9672929714683368,
"grad_norm": 17.207847595214844,
"learning_rate": 1.6130828114126653e-05,
"loss": 0.5892,
"step": 1390
},
{
"epoch": 0.9742519137091162,
"grad_norm": 9.363895416259766,
"learning_rate": 1.6102992345163537e-05,
"loss": 0.5774,
"step": 1400
},
{
"epoch": 0.9812108559498957,
"grad_norm": 16.450660705566406,
"learning_rate": 1.6075156576200418e-05,
"loss": 0.481,
"step": 1410
},
{
"epoch": 0.988169798190675,
"grad_norm": 12.754993438720703,
"learning_rate": 1.6047320807237302e-05,
"loss": 0.5667,
"step": 1420
},
{
"epoch": 0.9951287404314544,
"grad_norm": 9.848027229309082,
"learning_rate": 1.6019485038274184e-05,
"loss": 0.562,
"step": 1430
},
{
"epoch": 1.0,
"eval_accuracy": 0.8099216710182767,
"eval_f1": 0.8040056577967634,
"eval_loss": 0.518168568611145,
"eval_precision": 0.8071878224437447,
"eval_recall": 0.8099216710182767,
"eval_runtime": 30.1807,
"eval_samples_per_second": 190.353,
"eval_steps_per_second": 5.964,
"step": 1437
},
{
"epoch": 1.0020876826722338,
"grad_norm": 10.002483367919922,
"learning_rate": 1.5991649269311068e-05,
"loss": 0.489,
"step": 1440
},
{
"epoch": 1.0090466249130132,
"grad_norm": 8.09190559387207,
"learning_rate": 1.596381350034795e-05,
"loss": 0.4158,
"step": 1450
},
{
"epoch": 1.0160055671537926,
"grad_norm": 11.66649055480957,
"learning_rate": 1.593597773138483e-05,
"loss": 0.4249,
"step": 1460
},
{
"epoch": 1.022964509394572,
"grad_norm": 11.087824821472168,
"learning_rate": 1.590814196242171e-05,
"loss": 0.4064,
"step": 1470
},
{
"epoch": 1.0299234516353515,
"grad_norm": 14.069585800170898,
"learning_rate": 1.5880306193458596e-05,
"loss": 0.4159,
"step": 1480
},
{
"epoch": 1.036882393876131,
"grad_norm": 17.53207778930664,
"learning_rate": 1.585247042449548e-05,
"loss": 0.3956,
"step": 1490
},
{
"epoch": 1.0438413361169103,
"grad_norm": 5.558675289154053,
"learning_rate": 1.582463465553236e-05,
"loss": 0.3837,
"step": 1500
},
{
"epoch": 1.0508002783576895,
"grad_norm": 14.77510929107666,
"learning_rate": 1.5796798886569242e-05,
"loss": 0.3886,
"step": 1510
},
{
"epoch": 1.057759220598469,
"grad_norm": 11.358789443969727,
"learning_rate": 1.5768963117606123e-05,
"loss": 0.3557,
"step": 1520
},
{
"epoch": 1.0647181628392484,
"grad_norm": 15.428235054016113,
"learning_rate": 1.5741127348643008e-05,
"loss": 0.4789,
"step": 1530
},
{
"epoch": 1.0716771050800278,
"grad_norm": 16.330005645751953,
"learning_rate": 1.571329157967989e-05,
"loss": 0.4957,
"step": 1540
},
{
"epoch": 1.0786360473208072,
"grad_norm": 11.652454376220703,
"learning_rate": 1.5685455810716773e-05,
"loss": 0.4305,
"step": 1550
},
{
"epoch": 1.0855949895615866,
"grad_norm": 11.190437316894531,
"learning_rate": 1.5657620041753654e-05,
"loss": 0.4325,
"step": 1560
},
{
"epoch": 1.092553931802366,
"grad_norm": 17.103654861450195,
"learning_rate": 1.562978427279054e-05,
"loss": 0.4482,
"step": 1570
},
{
"epoch": 1.0995128740431455,
"grad_norm": 13.081258773803711,
"learning_rate": 1.560194850382742e-05,
"loss": 0.3345,
"step": 1580
},
{
"epoch": 1.1064718162839249,
"grad_norm": 10.141121864318848,
"learning_rate": 1.55741127348643e-05,
"loss": 0.453,
"step": 1590
},
{
"epoch": 1.1134307585247043,
"grad_norm": 15.91781997680664,
"learning_rate": 1.5546276965901182e-05,
"loss": 0.4351,
"step": 1600
},
{
"epoch": 1.1203897007654837,
"grad_norm": 11.977279663085938,
"learning_rate": 1.5518441196938066e-05,
"loss": 0.3826,
"step": 1610
},
{
"epoch": 1.1273486430062631,
"grad_norm": 12.163490295410156,
"learning_rate": 1.549060542797495e-05,
"loss": 0.439,
"step": 1620
},
{
"epoch": 1.1343075852470426,
"grad_norm": 12.221410751342773,
"learning_rate": 1.5462769659011832e-05,
"loss": 0.4793,
"step": 1630
},
{
"epoch": 1.1412665274878218,
"grad_norm": 9.962662696838379,
"learning_rate": 1.5434933890048713e-05,
"loss": 0.4524,
"step": 1640
},
{
"epoch": 1.1482254697286012,
"grad_norm": 10.281346321105957,
"learning_rate": 1.5407098121085594e-05,
"loss": 0.4537,
"step": 1650
},
{
"epoch": 1.1551844119693806,
"grad_norm": 8.511795997619629,
"learning_rate": 1.537926235212248e-05,
"loss": 0.3701,
"step": 1660
},
{
"epoch": 1.16214335421016,
"grad_norm": 20.107454299926758,
"learning_rate": 1.535142658315936e-05,
"loss": 0.4807,
"step": 1670
},
{
"epoch": 1.1691022964509394,
"grad_norm": 12.120085716247559,
"learning_rate": 1.5323590814196244e-05,
"loss": 0.4253,
"step": 1680
},
{
"epoch": 1.1760612386917189,
"grad_norm": 13.358500480651855,
"learning_rate": 1.5295755045233125e-05,
"loss": 0.3941,
"step": 1690
},
{
"epoch": 1.1830201809324983,
"grad_norm": 10.09534740447998,
"learning_rate": 1.526791927627001e-05,
"loss": 0.4248,
"step": 1700
},
{
"epoch": 1.1899791231732777,
"grad_norm": 23.822050094604492,
"learning_rate": 1.524008350730689e-05,
"loss": 0.4355,
"step": 1710
},
{
"epoch": 1.1969380654140571,
"grad_norm": 10.628303527832031,
"learning_rate": 1.5212247738343773e-05,
"loss": 0.3619,
"step": 1720
},
{
"epoch": 1.2038970076548365,
"grad_norm": 18.837543487548828,
"learning_rate": 1.5184411969380654e-05,
"loss": 0.4091,
"step": 1730
},
{
"epoch": 1.210855949895616,
"grad_norm": 9.72425365447998,
"learning_rate": 1.5156576200417539e-05,
"loss": 0.4468,
"step": 1740
},
{
"epoch": 1.2178148921363952,
"grad_norm": 21.17300796508789,
"learning_rate": 1.512874043145442e-05,
"loss": 0.4977,
"step": 1750
},
{
"epoch": 1.2247738343771746,
"grad_norm": 13.43977165222168,
"learning_rate": 1.5100904662491303e-05,
"loss": 0.4028,
"step": 1760
},
{
"epoch": 1.231732776617954,
"grad_norm": 11.052497863769531,
"learning_rate": 1.5073068893528184e-05,
"loss": 0.5183,
"step": 1770
},
{
"epoch": 1.2386917188587334,
"grad_norm": 12.521780014038086,
"learning_rate": 1.5045233124565067e-05,
"loss": 0.4714,
"step": 1780
},
{
"epoch": 1.2456506610995128,
"grad_norm": 10.825766563415527,
"learning_rate": 1.5017397355601951e-05,
"loss": 0.459,
"step": 1790
},
{
"epoch": 1.2526096033402923,
"grad_norm": 9.5389404296875,
"learning_rate": 1.4989561586638832e-05,
"loss": 0.4247,
"step": 1800
},
{
"epoch": 1.2595685455810717,
"grad_norm": 10.02591609954834,
"learning_rate": 1.4961725817675715e-05,
"loss": 0.4035,
"step": 1810
},
{
"epoch": 1.266527487821851,
"grad_norm": 11.203591346740723,
"learning_rate": 1.4933890048712596e-05,
"loss": 0.3517,
"step": 1820
},
{
"epoch": 1.2734864300626305,
"grad_norm": 10.96849250793457,
"learning_rate": 1.490605427974948e-05,
"loss": 0.4199,
"step": 1830
},
{
"epoch": 1.28044537230341,
"grad_norm": 11.830713272094727,
"learning_rate": 1.4878218510786361e-05,
"loss": 0.4145,
"step": 1840
},
{
"epoch": 1.2874043145441894,
"grad_norm": 11.980402946472168,
"learning_rate": 1.4850382741823244e-05,
"loss": 0.4801,
"step": 1850
},
{
"epoch": 1.2943632567849686,
"grad_norm": 12.308026313781738,
"learning_rate": 1.4822546972860125e-05,
"loss": 0.4601,
"step": 1860
},
{
"epoch": 1.3013221990257482,
"grad_norm": 16.0020694732666,
"learning_rate": 1.479471120389701e-05,
"loss": 0.4674,
"step": 1870
},
{
"epoch": 1.3082811412665274,
"grad_norm": 6.870344161987305,
"learning_rate": 1.4766875434933892e-05,
"loss": 0.4389,
"step": 1880
},
{
"epoch": 1.3152400835073068,
"grad_norm": 12.440506935119629,
"learning_rate": 1.4739039665970773e-05,
"loss": 0.4242,
"step": 1890
},
{
"epoch": 1.3221990257480862,
"grad_norm": 11.824153900146484,
"learning_rate": 1.4711203897007655e-05,
"loss": 0.4483,
"step": 1900
},
{
"epoch": 1.3291579679888657,
"grad_norm": 6.609494686126709,
"learning_rate": 1.4683368128044539e-05,
"loss": 0.4573,
"step": 1910
},
{
"epoch": 1.336116910229645,
"grad_norm": 12.088859558105469,
"learning_rate": 1.4655532359081422e-05,
"loss": 0.3912,
"step": 1920
},
{
"epoch": 1.3430758524704245,
"grad_norm": 7.114819526672363,
"learning_rate": 1.4627696590118303e-05,
"loss": 0.3867,
"step": 1930
},
{
"epoch": 1.350034794711204,
"grad_norm": 11.105134010314941,
"learning_rate": 1.4599860821155186e-05,
"loss": 0.5326,
"step": 1940
},
{
"epoch": 1.3569937369519833,
"grad_norm": 8.85695743560791,
"learning_rate": 1.4572025052192067e-05,
"loss": 0.445,
"step": 1950
},
{
"epoch": 1.3639526791927628,
"grad_norm": 13.736560821533203,
"learning_rate": 1.4544189283228951e-05,
"loss": 0.4333,
"step": 1960
},
{
"epoch": 1.3709116214335422,
"grad_norm": 9.378311157226562,
"learning_rate": 1.4516353514265832e-05,
"loss": 0.4275,
"step": 1970
},
{
"epoch": 1.3778705636743216,
"grad_norm": 10.04967212677002,
"learning_rate": 1.4488517745302715e-05,
"loss": 0.4151,
"step": 1980
},
{
"epoch": 1.3848295059151008,
"grad_norm": 7.61630392074585,
"learning_rate": 1.4460681976339596e-05,
"loss": 0.4994,
"step": 1990
},
{
"epoch": 1.3917884481558804,
"grad_norm": 14.55225944519043,
"learning_rate": 1.443284620737648e-05,
"loss": 0.4048,
"step": 2000
},
{
"epoch": 1.3987473903966596,
"grad_norm": 11.608763694763184,
"learning_rate": 1.4405010438413363e-05,
"loss": 0.3836,
"step": 2010
},
{
"epoch": 1.405706332637439,
"grad_norm": 11.155454635620117,
"learning_rate": 1.4377174669450244e-05,
"loss": 0.3942,
"step": 2020
},
{
"epoch": 1.4126652748782185,
"grad_norm": 11.708532333374023,
"learning_rate": 1.4349338900487127e-05,
"loss": 0.478,
"step": 2030
},
{
"epoch": 1.4196242171189979,
"grad_norm": 9.829862594604492,
"learning_rate": 1.432150313152401e-05,
"loss": 0.3487,
"step": 2040
},
{
"epoch": 1.4265831593597773,
"grad_norm": 14.25184440612793,
"learning_rate": 1.4293667362560893e-05,
"loss": 0.5225,
"step": 2050
},
{
"epoch": 1.4335421016005567,
"grad_norm": 12.317340850830078,
"learning_rate": 1.4265831593597774e-05,
"loss": 0.37,
"step": 2060
},
{
"epoch": 1.4405010438413361,
"grad_norm": 13.569458961486816,
"learning_rate": 1.4237995824634656e-05,
"loss": 0.3805,
"step": 2070
},
{
"epoch": 1.4474599860821156,
"grad_norm": 16.662263870239258,
"learning_rate": 1.4210160055671537e-05,
"loss": 0.3996,
"step": 2080
},
{
"epoch": 1.454418928322895,
"grad_norm": 12.971599578857422,
"learning_rate": 1.4182324286708422e-05,
"loss": 0.4265,
"step": 2090
},
{
"epoch": 1.4613778705636742,
"grad_norm": 9.266508102416992,
"learning_rate": 1.4154488517745305e-05,
"loss": 0.4199,
"step": 2100
},
{
"epoch": 1.4683368128044538,
"grad_norm": 15.103167533874512,
"learning_rate": 1.4126652748782186e-05,
"loss": 0.472,
"step": 2110
},
{
"epoch": 1.475295755045233,
"grad_norm": 13.94981861114502,
"learning_rate": 1.4098816979819068e-05,
"loss": 0.4681,
"step": 2120
},
{
"epoch": 1.4822546972860124,
"grad_norm": 19.643762588500977,
"learning_rate": 1.4070981210855951e-05,
"loss": 0.3848,
"step": 2130
},
{
"epoch": 1.4892136395267919,
"grad_norm": 11.58189868927002,
"learning_rate": 1.4043145441892834e-05,
"loss": 0.5083,
"step": 2140
},
{
"epoch": 1.4961725817675713,
"grad_norm": 13.264250755310059,
"learning_rate": 1.4015309672929715e-05,
"loss": 0.45,
"step": 2150
},
{
"epoch": 1.5031315240083507,
"grad_norm": 10.432905197143555,
"learning_rate": 1.3987473903966598e-05,
"loss": 0.4583,
"step": 2160
},
{
"epoch": 1.5100904662491301,
"grad_norm": 9.850616455078125,
"learning_rate": 1.3959638135003482e-05,
"loss": 0.349,
"step": 2170
},
{
"epoch": 1.5170494084899095,
"grad_norm": 14.087292671203613,
"learning_rate": 1.3931802366040363e-05,
"loss": 0.448,
"step": 2180
},
{
"epoch": 1.524008350730689,
"grad_norm": 12.514032363891602,
"learning_rate": 1.3903966597077246e-05,
"loss": 0.514,
"step": 2190
},
{
"epoch": 1.5309672929714684,
"grad_norm": 25.41820526123047,
"learning_rate": 1.3876130828114127e-05,
"loss": 0.4356,
"step": 2200
},
{
"epoch": 1.5379262352122476,
"grad_norm": 11.849440574645996,
"learning_rate": 1.3848295059151012e-05,
"loss": 0.3895,
"step": 2210
},
{
"epoch": 1.5448851774530272,
"grad_norm": 8.636540412902832,
"learning_rate": 1.3820459290187893e-05,
"loss": 0.481,
"step": 2220
},
{
"epoch": 1.5518441196938064,
"grad_norm": 11.286504745483398,
"learning_rate": 1.3792623521224775e-05,
"loss": 0.4019,
"step": 2230
},
{
"epoch": 1.558803061934586,
"grad_norm": 11.524672508239746,
"learning_rate": 1.3764787752261656e-05,
"loss": 0.4617,
"step": 2240
},
{
"epoch": 1.5657620041753653,
"grad_norm": 11.370726585388184,
"learning_rate": 1.373695198329854e-05,
"loss": 0.4186,
"step": 2250
},
{
"epoch": 1.572720946416145,
"grad_norm": 23.02247428894043,
"learning_rate": 1.3709116214335422e-05,
"loss": 0.4743,
"step": 2260
},
{
"epoch": 1.579679888656924,
"grad_norm": 11.176335334777832,
"learning_rate": 1.3681280445372305e-05,
"loss": 0.41,
"step": 2270
},
{
"epoch": 1.5866388308977035,
"grad_norm": 11.33989429473877,
"learning_rate": 1.3653444676409186e-05,
"loss": 0.4822,
"step": 2280
},
{
"epoch": 1.593597773138483,
"grad_norm": 18.040159225463867,
"learning_rate": 1.3625608907446069e-05,
"loss": 0.351,
"step": 2290
},
{
"epoch": 1.6005567153792624,
"grad_norm": 5.855461597442627,
"learning_rate": 1.3597773138482953e-05,
"loss": 0.4567,
"step": 2300
},
{
"epoch": 1.6075156576200418,
"grad_norm": 10.47138500213623,
"learning_rate": 1.3569937369519834e-05,
"loss": 0.4875,
"step": 2310
},
{
"epoch": 1.6144745998608212,
"grad_norm": 11.59261417388916,
"learning_rate": 1.3542101600556717e-05,
"loss": 0.4871,
"step": 2320
},
{
"epoch": 1.6214335421016006,
"grad_norm": 7.732606410980225,
"learning_rate": 1.3514265831593598e-05,
"loss": 0.4078,
"step": 2330
},
{
"epoch": 1.6283924843423798,
"grad_norm": 10.10660457611084,
"learning_rate": 1.3486430062630482e-05,
"loss": 0.3767,
"step": 2340
},
{
"epoch": 1.6353514265831595,
"grad_norm": 10.724883079528809,
"learning_rate": 1.3458594293667363e-05,
"loss": 0.4102,
"step": 2350
},
{
"epoch": 1.6423103688239387,
"grad_norm": 11.941119194030762,
"learning_rate": 1.3430758524704246e-05,
"loss": 0.5101,
"step": 2360
},
{
"epoch": 1.6492693110647183,
"grad_norm": 11.30588436126709,
"learning_rate": 1.3402922755741127e-05,
"loss": 0.3157,
"step": 2370
},
{
"epoch": 1.6562282533054975,
"grad_norm": 11.969123840332031,
"learning_rate": 1.337508698677801e-05,
"loss": 0.4568,
"step": 2380
},
{
"epoch": 1.663187195546277,
"grad_norm": 12.086457252502441,
"learning_rate": 1.3347251217814894e-05,
"loss": 0.4307,
"step": 2390
},
{
"epoch": 1.6701461377870563,
"grad_norm": 11.068685531616211,
"learning_rate": 1.3319415448851776e-05,
"loss": 0.4913,
"step": 2400
},
{
"epoch": 1.6771050800278358,
"grad_norm": 7.290180206298828,
"learning_rate": 1.3291579679888658e-05,
"loss": 0.4157,
"step": 2410
},
{
"epoch": 1.6840640222686152,
"grad_norm": 12.097051620483398,
"learning_rate": 1.326374391092554e-05,
"loss": 0.4356,
"step": 2420
},
{
"epoch": 1.6910229645093946,
"grad_norm": 10.983007431030273,
"learning_rate": 1.3235908141962424e-05,
"loss": 0.4594,
"step": 2430
},
{
"epoch": 1.697981906750174,
"grad_norm": 8.463971138000488,
"learning_rate": 1.3208072372999305e-05,
"loss": 0.395,
"step": 2440
},
{
"epoch": 1.7049408489909532,
"grad_norm": 10.346870422363281,
"learning_rate": 1.3180236604036188e-05,
"loss": 0.436,
"step": 2450
},
{
"epoch": 1.7118997912317329,
"grad_norm": 13.56679916381836,
"learning_rate": 1.3152400835073069e-05,
"loss": 0.4011,
"step": 2460
},
{
"epoch": 1.718858733472512,
"grad_norm": 8.750198364257812,
"learning_rate": 1.3124565066109953e-05,
"loss": 0.4479,
"step": 2470
},
{
"epoch": 1.7258176757132917,
"grad_norm": 8.680354118347168,
"learning_rate": 1.3096729297146836e-05,
"loss": 0.357,
"step": 2480
},
{
"epoch": 1.732776617954071,
"grad_norm": 13.647467613220215,
"learning_rate": 1.3068893528183717e-05,
"loss": 0.4286,
"step": 2490
},
{
"epoch": 1.7397355601948505,
"grad_norm": 8.507746696472168,
"learning_rate": 1.30410577592206e-05,
"loss": 0.3357,
"step": 2500
},
{
"epoch": 1.7466945024356297,
"grad_norm": 12.067097663879395,
"learning_rate": 1.3013221990257482e-05,
"loss": 0.434,
"step": 2510
},
{
"epoch": 1.7536534446764092,
"grad_norm": 9.736947059631348,
"learning_rate": 1.2985386221294365e-05,
"loss": 0.4228,
"step": 2520
},
{
"epoch": 1.7606123869171886,
"grad_norm": 14.245895385742188,
"learning_rate": 1.2957550452331246e-05,
"loss": 0.4379,
"step": 2530
},
{
"epoch": 1.767571329157968,
"grad_norm": 12.476272583007812,
"learning_rate": 1.2929714683368129e-05,
"loss": 0.4332,
"step": 2540
},
{
"epoch": 1.7745302713987474,
"grad_norm": 13.964608192443848,
"learning_rate": 1.290187891440501e-05,
"loss": 0.3422,
"step": 2550
},
{
"epoch": 1.7814892136395268,
"grad_norm": 11.833532333374023,
"learning_rate": 1.2874043145441895e-05,
"loss": 0.401,
"step": 2560
},
{
"epoch": 1.7884481558803063,
"grad_norm": 10.771284103393555,
"learning_rate": 1.2846207376478776e-05,
"loss": 0.3829,
"step": 2570
},
{
"epoch": 1.7954070981210855,
"grad_norm": 13.72558307647705,
"learning_rate": 1.2818371607515658e-05,
"loss": 0.3885,
"step": 2580
},
{
"epoch": 1.802366040361865,
"grad_norm": 19.78665542602539,
"learning_rate": 1.279053583855254e-05,
"loss": 0.5615,
"step": 2590
},
{
"epoch": 1.8093249826026443,
"grad_norm": 10.085536003112793,
"learning_rate": 1.2762700069589424e-05,
"loss": 0.3689,
"step": 2600
},
{
"epoch": 1.816283924843424,
"grad_norm": 11.349889755249023,
"learning_rate": 1.2734864300626307e-05,
"loss": 0.3492,
"step": 2610
},
{
"epoch": 1.8232428670842031,
"grad_norm": 15.420230865478516,
"learning_rate": 1.2707028531663188e-05,
"loss": 0.4059,
"step": 2620
},
{
"epoch": 1.8302018093249826,
"grad_norm": 43.528160095214844,
"learning_rate": 1.267919276270007e-05,
"loss": 0.4361,
"step": 2630
},
{
"epoch": 1.837160751565762,
"grad_norm": 8.911616325378418,
"learning_rate": 1.2651356993736953e-05,
"loss": 0.413,
"step": 2640
},
{
"epoch": 1.8441196938065414,
"grad_norm": 15.205978393554688,
"learning_rate": 1.2623521224773836e-05,
"loss": 0.4352,
"step": 2650
},
{
"epoch": 1.8510786360473208,
"grad_norm": 15.270347595214844,
"learning_rate": 1.2595685455810717e-05,
"loss": 0.5509,
"step": 2660
},
{
"epoch": 1.8580375782881002,
"grad_norm": 7.940185546875,
"learning_rate": 1.25678496868476e-05,
"loss": 0.4402,
"step": 2670
},
{
"epoch": 1.8649965205288797,
"grad_norm": 9.823007583618164,
"learning_rate": 1.2540013917884481e-05,
"loss": 0.4116,
"step": 2680
},
{
"epoch": 1.8719554627696589,
"grad_norm": 14.74289321899414,
"learning_rate": 1.2512178148921365e-05,
"loss": 0.4503,
"step": 2690
},
{
"epoch": 1.8789144050104385,
"grad_norm": 13.300530433654785,
"learning_rate": 1.2484342379958248e-05,
"loss": 0.4735,
"step": 2700
},
{
"epoch": 1.8858733472512177,
"grad_norm": 10.028038024902344,
"learning_rate": 1.245650661099513e-05,
"loss": 0.4889,
"step": 2710
},
{
"epoch": 1.8928322894919973,
"grad_norm": 13.30984878540039,
"learning_rate": 1.2428670842032012e-05,
"loss": 0.4714,
"step": 2720
},
{
"epoch": 1.8997912317327765,
"grad_norm": 8.563050270080566,
"learning_rate": 1.2400835073068895e-05,
"loss": 0.4601,
"step": 2730
},
{
"epoch": 1.9067501739735562,
"grad_norm": 9.030021667480469,
"learning_rate": 1.2372999304105777e-05,
"loss": 0.448,
"step": 2740
},
{
"epoch": 1.9137091162143354,
"grad_norm": 11.629081726074219,
"learning_rate": 1.2345163535142659e-05,
"loss": 0.4628,
"step": 2750
},
{
"epoch": 1.9206680584551148,
"grad_norm": 13.654706001281738,
"learning_rate": 1.2317327766179541e-05,
"loss": 0.4105,
"step": 2760
},
{
"epoch": 1.9276270006958942,
"grad_norm": 10.076985359191895,
"learning_rate": 1.2289491997216426e-05,
"loss": 0.4087,
"step": 2770
},
{
"epoch": 1.9345859429366736,
"grad_norm": 10.824203491210938,
"learning_rate": 1.2261656228253307e-05,
"loss": 0.3625,
"step": 2780
},
{
"epoch": 1.941544885177453,
"grad_norm": 19.84947395324707,
"learning_rate": 1.223382045929019e-05,
"loss": 0.3909,
"step": 2790
},
{
"epoch": 1.9485038274182325,
"grad_norm": 11.292709350585938,
"learning_rate": 1.220598469032707e-05,
"loss": 0.5342,
"step": 2800
},
{
"epoch": 1.955462769659012,
"grad_norm": 12.195109367370605,
"learning_rate": 1.2178148921363955e-05,
"loss": 0.434,
"step": 2810
},
{
"epoch": 1.962421711899791,
"grad_norm": 5.671847820281982,
"learning_rate": 1.2150313152400836e-05,
"loss": 0.3821,
"step": 2820
},
{
"epoch": 1.9693806541405707,
"grad_norm": 6.8894267082214355,
"learning_rate": 1.2122477383437719e-05,
"loss": 0.4741,
"step": 2830
},
{
"epoch": 1.97633959638135,
"grad_norm": 11.644760131835938,
"learning_rate": 1.20946416144746e-05,
"loss": 0.4485,
"step": 2840
},
{
"epoch": 1.9832985386221296,
"grad_norm": 13.690812110900879,
"learning_rate": 1.2066805845511483e-05,
"loss": 0.4329,
"step": 2850
},
{
"epoch": 1.9902574808629088,
"grad_norm": 14.693482398986816,
"learning_rate": 1.2038970076548367e-05,
"loss": 0.4345,
"step": 2860
},
{
"epoch": 1.9972164231036882,
"grad_norm": 8.777437210083008,
"learning_rate": 1.2011134307585248e-05,
"loss": 0.4272,
"step": 2870
},
{
"epoch": 2.0,
"eval_accuracy": 0.8186248912097476,
"eval_f1": 0.8142148630689494,
"eval_loss": 0.5136106610298157,
"eval_precision": 0.815336261474068,
"eval_recall": 0.8186248912097476,
"eval_runtime": 30.487,
"eval_samples_per_second": 188.441,
"eval_steps_per_second": 5.904,
"step": 2874
},
{
"epoch": 2.0041753653444676,
"grad_norm": 6.2629876136779785,
"learning_rate": 1.198329853862213e-05,
"loss": 0.3394,
"step": 2880
},
{
"epoch": 2.0111343075852472,
"grad_norm": 13.670483589172363,
"learning_rate": 1.1955462769659012e-05,
"loss": 0.2971,
"step": 2890
},
{
"epoch": 2.0180932498260264,
"grad_norm": 7.209113121032715,
"learning_rate": 1.1927627000695896e-05,
"loss": 0.3459,
"step": 2900
},
{
"epoch": 2.0250521920668056,
"grad_norm": 14.117879867553711,
"learning_rate": 1.1899791231732778e-05,
"loss": 0.2936,
"step": 2910
},
{
"epoch": 2.0320111343075853,
"grad_norm": 8.980249404907227,
"learning_rate": 1.187195546276966e-05,
"loss": 0.3326,
"step": 2920
},
{
"epoch": 2.0389700765483645,
"grad_norm": 16.819644927978516,
"learning_rate": 1.1844119693806541e-05,
"loss": 0.3578,
"step": 2930
},
{
"epoch": 2.045929018789144,
"grad_norm": 14.287947654724121,
"learning_rate": 1.1816283924843426e-05,
"loss": 0.3025,
"step": 2940
},
{
"epoch": 2.0528879610299233,
"grad_norm": 11.339349746704102,
"learning_rate": 1.1788448155880307e-05,
"loss": 0.3576,
"step": 2950
},
{
"epoch": 2.059846903270703,
"grad_norm": 7.132763862609863,
"learning_rate": 1.176061238691719e-05,
"loss": 0.319,
"step": 2960
},
{
"epoch": 2.066805845511482,
"grad_norm": 10.997299194335938,
"learning_rate": 1.173277661795407e-05,
"loss": 0.398,
"step": 2970
},
{
"epoch": 2.073764787752262,
"grad_norm": 27.665699005126953,
"learning_rate": 1.1704940848990953e-05,
"loss": 0.2853,
"step": 2980
},
{
"epoch": 2.080723729993041,
"grad_norm": 25.032983779907227,
"learning_rate": 1.1677105080027838e-05,
"loss": 0.3484,
"step": 2990
},
{
"epoch": 2.0876826722338206,
"grad_norm": 17.8544921875,
"learning_rate": 1.1649269311064719e-05,
"loss": 0.3496,
"step": 3000
},
{
"epoch": 2.0946416144746,
"grad_norm": 19.761899948120117,
"learning_rate": 1.1621433542101602e-05,
"loss": 0.3137,
"step": 3010
},
{
"epoch": 2.101600556715379,
"grad_norm": 21.972309112548828,
"learning_rate": 1.1593597773138483e-05,
"loss": 0.3503,
"step": 3020
},
{
"epoch": 2.1085594989561587,
"grad_norm": 14.141931533813477,
"learning_rate": 1.1565762004175367e-05,
"loss": 0.2801,
"step": 3030
},
{
"epoch": 2.115518441196938,
"grad_norm": 9.858434677124023,
"learning_rate": 1.1537926235212248e-05,
"loss": 0.2912,
"step": 3040
},
{
"epoch": 2.1224773834377175,
"grad_norm": 11.357017517089844,
"learning_rate": 1.1510090466249131e-05,
"loss": 0.3761,
"step": 3050
},
{
"epoch": 2.1294363256784967,
"grad_norm": 14.140629768371582,
"learning_rate": 1.1482254697286012e-05,
"loss": 0.4435,
"step": 3060
},
{
"epoch": 2.1363952679192764,
"grad_norm": 14.739768028259277,
"learning_rate": 1.1454418928322897e-05,
"loss": 0.3424,
"step": 3070
},
{
"epoch": 2.1433542101600556,
"grad_norm": 11.241192817687988,
"learning_rate": 1.142658315935978e-05,
"loss": 0.3119,
"step": 3080
},
{
"epoch": 2.150313152400835,
"grad_norm": 7.299347400665283,
"learning_rate": 1.139874739039666e-05,
"loss": 0.2737,
"step": 3090
},
{
"epoch": 2.1572720946416144,
"grad_norm": 12.915804862976074,
"learning_rate": 1.1370911621433543e-05,
"loss": 0.3366,
"step": 3100
},
{
"epoch": 2.164231036882394,
"grad_norm": 14.415313720703125,
"learning_rate": 1.1343075852470426e-05,
"loss": 0.3461,
"step": 3110
},
{
"epoch": 2.1711899791231732,
"grad_norm": 37.51091384887695,
"learning_rate": 1.1315240083507309e-05,
"loss": 0.2735,
"step": 3120
},
{
"epoch": 2.178148921363953,
"grad_norm": 14.238667488098145,
"learning_rate": 1.128740431454419e-05,
"loss": 0.3137,
"step": 3130
},
{
"epoch": 2.185107863604732,
"grad_norm": 10.343038558959961,
"learning_rate": 1.1259568545581073e-05,
"loss": 0.3305,
"step": 3140
},
{
"epoch": 2.1920668058455113,
"grad_norm": 11.619972229003906,
"learning_rate": 1.1231732776617954e-05,
"loss": 0.3192,
"step": 3150
},
{
"epoch": 2.199025748086291,
"grad_norm": 10.04326343536377,
"learning_rate": 1.1203897007654838e-05,
"loss": 0.3116,
"step": 3160
},
{
"epoch": 2.20598469032707,
"grad_norm": 10.689598083496094,
"learning_rate": 1.117606123869172e-05,
"loss": 0.2896,
"step": 3170
},
{
"epoch": 2.2129436325678498,
"grad_norm": 13.70692253112793,
"learning_rate": 1.1148225469728602e-05,
"loss": 0.3201,
"step": 3180
},
{
"epoch": 2.219902574808629,
"grad_norm": 11.719026565551758,
"learning_rate": 1.1120389700765483e-05,
"loss": 0.2838,
"step": 3190
},
{
"epoch": 2.2268615170494086,
"grad_norm": 14.697103500366211,
"learning_rate": 1.1092553931802367e-05,
"loss": 0.3266,
"step": 3200
},
{
"epoch": 2.233820459290188,
"grad_norm": 9.828338623046875,
"learning_rate": 1.106471816283925e-05,
"loss": 0.2887,
"step": 3210
},
{
"epoch": 2.2407794015309674,
"grad_norm": 17.478595733642578,
"learning_rate": 1.1036882393876131e-05,
"loss": 0.263,
"step": 3220
},
{
"epoch": 2.2477383437717466,
"grad_norm": 12.797255516052246,
"learning_rate": 1.1009046624913014e-05,
"loss": 0.2799,
"step": 3230
},
{
"epoch": 2.2546972860125263,
"grad_norm": 7.045528888702393,
"learning_rate": 1.0981210855949897e-05,
"loss": 0.3794,
"step": 3240
},
{
"epoch": 2.2616562282533055,
"grad_norm": 13.09620189666748,
"learning_rate": 1.095337508698678e-05,
"loss": 0.2539,
"step": 3250
},
{
"epoch": 2.268615170494085,
"grad_norm": 7.552209377288818,
"learning_rate": 1.092553931802366e-05,
"loss": 0.3526,
"step": 3260
},
{
"epoch": 2.2755741127348643,
"grad_norm": 10.503962516784668,
"learning_rate": 1.0897703549060543e-05,
"loss": 0.3042,
"step": 3270
},
{
"epoch": 2.2825330549756435,
"grad_norm": 9.320645332336426,
"learning_rate": 1.0869867780097424e-05,
"loss": 0.2384,
"step": 3280
},
{
"epoch": 2.289491997216423,
"grad_norm": 9.707759857177734,
"learning_rate": 1.0842032011134309e-05,
"loss": 0.2633,
"step": 3290
},
{
"epoch": 2.2964509394572024,
"grad_norm": 10.683955192565918,
"learning_rate": 1.0814196242171192e-05,
"loss": 0.2941,
"step": 3300
},
{
"epoch": 2.303409881697982,
"grad_norm": 11.840535163879395,
"learning_rate": 1.0786360473208073e-05,
"loss": 0.2836,
"step": 3310
},
{
"epoch": 2.310368823938761,
"grad_norm": 17.78310203552246,
"learning_rate": 1.0758524704244955e-05,
"loss": 0.317,
"step": 3320
},
{
"epoch": 2.317327766179541,
"grad_norm": 14.615537643432617,
"learning_rate": 1.0730688935281838e-05,
"loss": 0.2933,
"step": 3330
},
{
"epoch": 2.32428670842032,
"grad_norm": 14.550018310546875,
"learning_rate": 1.0702853166318721e-05,
"loss": 0.3915,
"step": 3340
},
{
"epoch": 2.3312456506610997,
"grad_norm": 11.032766342163086,
"learning_rate": 1.0675017397355602e-05,
"loss": 0.3216,
"step": 3350
},
{
"epoch": 2.338204592901879,
"grad_norm": 11.570281028747559,
"learning_rate": 1.0647181628392485e-05,
"loss": 0.3011,
"step": 3360
},
{
"epoch": 2.3451635351426585,
"grad_norm": 8.726863861083984,
"learning_rate": 1.061934585942937e-05,
"loss": 0.2779,
"step": 3370
},
{
"epoch": 2.3521224773834377,
"grad_norm": 11.70459270477295,
"learning_rate": 1.059151009046625e-05,
"loss": 0.3044,
"step": 3380
},
{
"epoch": 2.359081419624217,
"grad_norm": 11.244171142578125,
"learning_rate": 1.0563674321503133e-05,
"loss": 0.2742,
"step": 3390
},
{
"epoch": 2.3660403618649966,
"grad_norm": 13.98281192779541,
"learning_rate": 1.0535838552540014e-05,
"loss": 0.4095,
"step": 3400
},
{
"epoch": 2.3729993041057758,
"grad_norm": 60.7244758605957,
"learning_rate": 1.0508002783576897e-05,
"loss": 0.2786,
"step": 3410
},
{
"epoch": 2.3799582463465554,
"grad_norm": 17.862695693969727,
"learning_rate": 1.048016701461378e-05,
"loss": 0.4003,
"step": 3420
},
{
"epoch": 2.3869171885873346,
"grad_norm": 11.725099563598633,
"learning_rate": 1.0452331245650662e-05,
"loss": 0.2304,
"step": 3430
},
{
"epoch": 2.3938761308281142,
"grad_norm": 14.38791561126709,
"learning_rate": 1.0424495476687543e-05,
"loss": 0.3011,
"step": 3440
},
{
"epoch": 2.4008350730688934,
"grad_norm": 17.396326065063477,
"learning_rate": 1.0396659707724426e-05,
"loss": 0.2762,
"step": 3450
},
{
"epoch": 2.407794015309673,
"grad_norm": 15.41369915008545,
"learning_rate": 1.036882393876131e-05,
"loss": 0.318,
"step": 3460
},
{
"epoch": 2.4147529575504523,
"grad_norm": 10.988295555114746,
"learning_rate": 1.0340988169798192e-05,
"loss": 0.3218,
"step": 3470
},
{
"epoch": 2.421711899791232,
"grad_norm": 25.048797607421875,
"learning_rate": 1.0313152400835074e-05,
"loss": 0.3305,
"step": 3480
},
{
"epoch": 2.428670842032011,
"grad_norm": 14.599493026733398,
"learning_rate": 1.0285316631871956e-05,
"loss": 0.3633,
"step": 3490
},
{
"epoch": 2.4356297842727903,
"grad_norm": 14.409786224365234,
"learning_rate": 1.025748086290884e-05,
"loss": 0.3577,
"step": 3500
},
{
"epoch": 2.44258872651357,
"grad_norm": 11.649439811706543,
"learning_rate": 1.0229645093945721e-05,
"loss": 0.321,
"step": 3510
},
{
"epoch": 2.449547668754349,
"grad_norm": 20.704423904418945,
"learning_rate": 1.0201809324982604e-05,
"loss": 0.3077,
"step": 3520
},
{
"epoch": 2.456506610995129,
"grad_norm": 9.154399871826172,
"learning_rate": 1.0173973556019485e-05,
"loss": 0.2218,
"step": 3530
},
{
"epoch": 2.463465553235908,
"grad_norm": 10.178906440734863,
"learning_rate": 1.014613778705637e-05,
"loss": 0.3181,
"step": 3540
},
{
"epoch": 2.4704244954766876,
"grad_norm": 12.843514442443848,
"learning_rate": 1.011830201809325e-05,
"loss": 0.2807,
"step": 3550
},
{
"epoch": 2.477383437717467,
"grad_norm": 15.194390296936035,
"learning_rate": 1.0090466249130133e-05,
"loss": 0.3834,
"step": 3560
},
{
"epoch": 2.4843423799582465,
"grad_norm": 10.255640983581543,
"learning_rate": 1.0062630480167014e-05,
"loss": 0.2968,
"step": 3570
},
{
"epoch": 2.4913013221990257,
"grad_norm": 12.686639785766602,
"learning_rate": 1.0034794711203897e-05,
"loss": 0.3559,
"step": 3580
},
{
"epoch": 2.4982602644398053,
"grad_norm": 18.302518844604492,
"learning_rate": 1.0006958942240781e-05,
"loss": 0.2919,
"step": 3590
},
{
"epoch": 2.5052192066805845,
"grad_norm": 13.351080894470215,
"learning_rate": 9.979123173277662e-06,
"loss": 0.374,
"step": 3600
},
{
"epoch": 2.5121781489213637,
"grad_norm": 11.826626777648926,
"learning_rate": 9.951287404314545e-06,
"loss": 0.351,
"step": 3610
},
{
"epoch": 2.5191370911621433,
"grad_norm": 14.403546333312988,
"learning_rate": 9.923451635351428e-06,
"loss": 0.3448,
"step": 3620
},
{
"epoch": 2.526096033402923,
"grad_norm": 8.75331974029541,
"learning_rate": 9.895615866388309e-06,
"loss": 0.3678,
"step": 3630
},
{
"epoch": 2.533054975643702,
"grad_norm": 7.926251411437988,
"learning_rate": 9.867780097425192e-06,
"loss": 0.3124,
"step": 3640
},
{
"epoch": 2.5400139178844814,
"grad_norm": 14.520807266235352,
"learning_rate": 9.839944328462075e-06,
"loss": 0.3685,
"step": 3650
},
{
"epoch": 2.546972860125261,
"grad_norm": 6.630367279052734,
"learning_rate": 9.812108559498957e-06,
"loss": 0.4329,
"step": 3660
},
{
"epoch": 2.5539318023660402,
"grad_norm": 16.33591651916504,
"learning_rate": 9.784272790535838e-06,
"loss": 0.2904,
"step": 3670
},
{
"epoch": 2.56089074460682,
"grad_norm": 12.767754554748535,
"learning_rate": 9.756437021572723e-06,
"loss": 0.3424,
"step": 3680
},
{
"epoch": 2.567849686847599,
"grad_norm": 17.801118850708008,
"learning_rate": 9.728601252609604e-06,
"loss": 0.3373,
"step": 3690
},
{
"epoch": 2.5748086290883787,
"grad_norm": 12.671394348144531,
"learning_rate": 9.700765483646487e-06,
"loss": 0.3632,
"step": 3700
},
{
"epoch": 2.581767571329158,
"grad_norm": 13.465224266052246,
"learning_rate": 9.67292971468337e-06,
"loss": 0.3065,
"step": 3710
},
{
"epoch": 2.588726513569937,
"grad_norm": 7.1438822746276855,
"learning_rate": 9.64509394572025e-06,
"loss": 0.2948,
"step": 3720
},
{
"epoch": 2.5956854558107167,
"grad_norm": 6.285761833190918,
"learning_rate": 9.617258176757133e-06,
"loss": 0.2847,
"step": 3730
},
{
"epoch": 2.6026443980514964,
"grad_norm": 12.588811874389648,
"learning_rate": 9.589422407794016e-06,
"loss": 0.3873,
"step": 3740
},
{
"epoch": 2.6096033402922756,
"grad_norm": 10.82616138458252,
"learning_rate": 9.561586638830899e-06,
"loss": 0.276,
"step": 3750
},
{
"epoch": 2.616562282533055,
"grad_norm": 19.147323608398438,
"learning_rate": 9.53375086986778e-06,
"loss": 0.3486,
"step": 3760
},
{
"epoch": 2.6235212247738344,
"grad_norm": 5.6541266441345215,
"learning_rate": 9.505915100904664e-06,
"loss": 0.3499,
"step": 3770
},
{
"epoch": 2.6304801670146136,
"grad_norm": 11.496247291564941,
"learning_rate": 9.478079331941545e-06,
"loss": 0.3104,
"step": 3780
},
{
"epoch": 2.6374391092553933,
"grad_norm": 13.55700397491455,
"learning_rate": 9.450243562978428e-06,
"loss": 0.3168,
"step": 3790
},
{
"epoch": 2.6443980514961725,
"grad_norm": 13.626465797424316,
"learning_rate": 9.422407794015311e-06,
"loss": 0.3061,
"step": 3800
},
{
"epoch": 2.651356993736952,
"grad_norm": 15.465563774108887,
"learning_rate": 9.394572025052194e-06,
"loss": 0.2827,
"step": 3810
},
{
"epoch": 2.6583159359777313,
"grad_norm": 10.07729721069336,
"learning_rate": 9.366736256089075e-06,
"loss": 0.2696,
"step": 3820
},
{
"epoch": 2.665274878218511,
"grad_norm": 14.677043914794922,
"learning_rate": 9.338900487125957e-06,
"loss": 0.3666,
"step": 3830
},
{
"epoch": 2.67223382045929,
"grad_norm": 9.545304298400879,
"learning_rate": 9.31106471816284e-06,
"loss": 0.3099,
"step": 3840
},
{
"epoch": 2.67919276270007,
"grad_norm": 13.406818389892578,
"learning_rate": 9.283228949199723e-06,
"loss": 0.3011,
"step": 3850
},
{
"epoch": 2.686151704940849,
"grad_norm": 9.16269302368164,
"learning_rate": 9.255393180236604e-06,
"loss": 0.3376,
"step": 3860
},
{
"epoch": 2.693110647181628,
"grad_norm": 13.706355094909668,
"learning_rate": 9.227557411273487e-06,
"loss": 0.3558,
"step": 3870
},
{
"epoch": 2.700069589422408,
"grad_norm": 13.172536849975586,
"learning_rate": 9.19972164231037e-06,
"loss": 0.3418,
"step": 3880
},
{
"epoch": 2.7070285316631875,
"grad_norm": 13.34077262878418,
"learning_rate": 9.171885873347252e-06,
"loss": 0.4049,
"step": 3890
},
{
"epoch": 2.7139874739039667,
"grad_norm": 22.909151077270508,
"learning_rate": 9.144050104384135e-06,
"loss": 0.3101,
"step": 3900
},
{
"epoch": 2.720946416144746,
"grad_norm": 10.906767845153809,
"learning_rate": 9.116214335421016e-06,
"loss": 0.2435,
"step": 3910
},
{
"epoch": 2.7279053583855255,
"grad_norm": 17.20676040649414,
"learning_rate": 9.088378566457899e-06,
"loss": 0.3154,
"step": 3920
},
{
"epoch": 2.7348643006263047,
"grad_norm": 12.238724708557129,
"learning_rate": 9.060542797494782e-06,
"loss": 0.2535,
"step": 3930
},
{
"epoch": 2.7418232428670843,
"grad_norm": 14.290855407714844,
"learning_rate": 9.032707028531664e-06,
"loss": 0.32,
"step": 3940
},
{
"epoch": 2.7487821851078635,
"grad_norm": 7.506951332092285,
"learning_rate": 9.004871259568545e-06,
"loss": 0.3707,
"step": 3950
},
{
"epoch": 2.755741127348643,
"grad_norm": 9.022459030151367,
"learning_rate": 8.97703549060543e-06,
"loss": 0.2429,
"step": 3960
},
{
"epoch": 2.7627000695894224,
"grad_norm": 8.920448303222656,
"learning_rate": 8.949199721642311e-06,
"loss": 0.2934,
"step": 3970
},
{
"epoch": 2.7696590118302016,
"grad_norm": 7.519834995269775,
"learning_rate": 8.921363952679194e-06,
"loss": 0.2833,
"step": 3980
},
{
"epoch": 2.776617954070981,
"grad_norm": 10.720945358276367,
"learning_rate": 8.893528183716076e-06,
"loss": 0.2456,
"step": 3990
},
{
"epoch": 2.783576896311761,
"grad_norm": 11.830615997314453,
"learning_rate": 8.865692414752958e-06,
"loss": 0.2867,
"step": 4000
},
{
"epoch": 2.79053583855254,
"grad_norm": 9.925026893615723,
"learning_rate": 8.83785664578984e-06,
"loss": 0.3503,
"step": 4010
},
{
"epoch": 2.7974947807933193,
"grad_norm": 9.181790351867676,
"learning_rate": 8.810020876826723e-06,
"loss": 0.3396,
"step": 4020
},
{
"epoch": 2.804453723034099,
"grad_norm": 17.847026824951172,
"learning_rate": 8.782185107863606e-06,
"loss": 0.3592,
"step": 4030
},
{
"epoch": 2.811412665274878,
"grad_norm": 14.639543533325195,
"learning_rate": 8.754349338900487e-06,
"loss": 0.2279,
"step": 4040
},
{
"epoch": 2.8183716075156577,
"grad_norm": 14.787379264831543,
"learning_rate": 8.72651356993737e-06,
"loss": 0.298,
"step": 4050
},
{
"epoch": 2.825330549756437,
"grad_norm": 9.879755020141602,
"learning_rate": 8.698677800974252e-06,
"loss": 0.3329,
"step": 4060
},
{
"epoch": 2.8322894919972166,
"grad_norm": 8.337702751159668,
"learning_rate": 8.670842032011135e-06,
"loss": 0.3393,
"step": 4070
},
{
"epoch": 2.8392484342379958,
"grad_norm": 11.59692668914795,
"learning_rate": 8.643006263048018e-06,
"loss": 0.2697,
"step": 4080
},
{
"epoch": 2.846207376478775,
"grad_norm": 22.700538635253906,
"learning_rate": 8.6151704940849e-06,
"loss": 0.3444,
"step": 4090
},
{
"epoch": 2.8531663187195546,
"grad_norm": 13.9461088180542,
"learning_rate": 8.587334725121782e-06,
"loss": 0.2616,
"step": 4100
},
{
"epoch": 2.8601252609603343,
"grad_norm": 9.75053882598877,
"learning_rate": 8.559498956158664e-06,
"loss": 0.3219,
"step": 4110
},
{
"epoch": 2.8670842032011135,
"grad_norm": 11.127705574035645,
"learning_rate": 8.531663187195547e-06,
"loss": 0.3238,
"step": 4120
},
{
"epoch": 2.8740431454418927,
"grad_norm": 13.097844123840332,
"learning_rate": 8.50382741823243e-06,
"loss": 0.3177,
"step": 4130
},
{
"epoch": 2.8810020876826723,
"grad_norm": 11.675921440124512,
"learning_rate": 8.475991649269311e-06,
"loss": 0.2872,
"step": 4140
},
{
"epoch": 2.8879610299234515,
"grad_norm": 9.369670867919922,
"learning_rate": 8.448155880306194e-06,
"loss": 0.2693,
"step": 4150
},
{
"epoch": 2.894919972164231,
"grad_norm": 8.535505294799805,
"learning_rate": 8.420320111343077e-06,
"loss": 0.2828,
"step": 4160
},
{
"epoch": 2.9018789144050103,
"grad_norm": 11.415098190307617,
"learning_rate": 8.392484342379958e-06,
"loss": 0.341,
"step": 4170
},
{
"epoch": 2.90883785664579,
"grad_norm": 19.970497131347656,
"learning_rate": 8.364648573416842e-06,
"loss": 0.3472,
"step": 4180
},
{
"epoch": 2.915796798886569,
"grad_norm": 6.632875919342041,
"learning_rate": 8.336812804453723e-06,
"loss": 0.317,
"step": 4190
},
{
"epoch": 2.9227557411273484,
"grad_norm": 18.783174514770508,
"learning_rate": 8.308977035490606e-06,
"loss": 0.3287,
"step": 4200
},
{
"epoch": 2.929714683368128,
"grad_norm": 10.871125221252441,
"learning_rate": 8.281141266527489e-06,
"loss": 0.2833,
"step": 4210
},
{
"epoch": 2.9366736256089077,
"grad_norm": 16.211822509765625,
"learning_rate": 8.253305497564371e-06,
"loss": 0.2999,
"step": 4220
},
{
"epoch": 2.943632567849687,
"grad_norm": 15.132637023925781,
"learning_rate": 8.225469728601253e-06,
"loss": 0.3001,
"step": 4230
},
{
"epoch": 2.950591510090466,
"grad_norm": 15.456144332885742,
"learning_rate": 8.197633959638135e-06,
"loss": 0.3072,
"step": 4240
},
{
"epoch": 2.9575504523312457,
"grad_norm": 12.601120948791504,
"learning_rate": 8.169798190675018e-06,
"loss": 0.343,
"step": 4250
},
{
"epoch": 2.964509394572025,
"grad_norm": 18.754928588867188,
"learning_rate": 8.1419624217119e-06,
"loss": 0.3183,
"step": 4260
},
{
"epoch": 2.9714683368128045,
"grad_norm": 12.473878860473633,
"learning_rate": 8.114126652748784e-06,
"loss": 0.3068,
"step": 4270
},
{
"epoch": 2.9784272790535837,
"grad_norm": 15.256598472595215,
"learning_rate": 8.086290883785666e-06,
"loss": 0.3455,
"step": 4280
},
{
"epoch": 2.9853862212943634,
"grad_norm": 9.27231216430664,
"learning_rate": 8.058455114822547e-06,
"loss": 0.352,
"step": 4290
},
{
"epoch": 2.9923451635351426,
"grad_norm": 9.258604049682617,
"learning_rate": 8.03061934585943e-06,
"loss": 0.2213,
"step": 4300
},
{
"epoch": 2.999304105775922,
"grad_norm": 14.93215560913086,
"learning_rate": 8.002783576896313e-06,
"loss": 0.4324,
"step": 4310
},
{
"epoch": 3.0,
"eval_accuracy": 0.816710182767624,
"eval_f1": 0.8145784658491753,
"eval_loss": 0.567512571811676,
"eval_precision": 0.8135975269617428,
"eval_recall": 0.816710182767624,
"eval_runtime": 30.2203,
"eval_samples_per_second": 190.104,
"eval_steps_per_second": 5.956,
"step": 4311
},
{
"epoch": 3.0062630480167014,
"grad_norm": 18.71284294128418,
"learning_rate": 7.974947807933194e-06,
"loss": 0.2542,
"step": 4320
},
{
"epoch": 3.013221990257481,
"grad_norm": 7.149287223815918,
"learning_rate": 7.947112038970077e-06,
"loss": 0.2366,
"step": 4330
},
{
"epoch": 3.0201809324982603,
"grad_norm": 11.145984649658203,
"learning_rate": 7.91927627000696e-06,
"loss": 0.2427,
"step": 4340
},
{
"epoch": 3.0271398747390394,
"grad_norm": 14.64748764038086,
"learning_rate": 7.891440501043842e-06,
"loss": 0.2473,
"step": 4350
},
{
"epoch": 3.034098816979819,
"grad_norm": 13.893207550048828,
"learning_rate": 7.863604732080723e-06,
"loss": 0.2094,
"step": 4360
},
{
"epoch": 3.0410577592205983,
"grad_norm": 22.052799224853516,
"learning_rate": 7.835768963117608e-06,
"loss": 0.2436,
"step": 4370
},
{
"epoch": 3.048016701461378,
"grad_norm": 16.942176818847656,
"learning_rate": 7.807933194154489e-06,
"loss": 0.1888,
"step": 4380
},
{
"epoch": 3.054975643702157,
"grad_norm": 12.624246597290039,
"learning_rate": 7.780097425191372e-06,
"loss": 0.2503,
"step": 4390
},
{
"epoch": 3.0619345859429368,
"grad_norm": 6.612172603607178,
"learning_rate": 7.752261656228254e-06,
"loss": 0.2775,
"step": 4400
},
{
"epoch": 3.068893528183716,
"grad_norm": 7.194397449493408,
"learning_rate": 7.724425887265137e-06,
"loss": 0.2143,
"step": 4410
},
{
"epoch": 3.0758524704244956,
"grad_norm": 15.386282920837402,
"learning_rate": 7.696590118302018e-06,
"loss": 0.2393,
"step": 4420
},
{
"epoch": 3.082811412665275,
"grad_norm": 4.229943752288818,
"learning_rate": 7.668754349338901e-06,
"loss": 0.2499,
"step": 4430
},
{
"epoch": 3.0897703549060545,
"grad_norm": 7.80819845199585,
"learning_rate": 7.640918580375784e-06,
"loss": 0.2578,
"step": 4440
},
{
"epoch": 3.0967292971468336,
"grad_norm": 3.920732259750366,
"learning_rate": 7.6130828114126656e-06,
"loss": 0.2114,
"step": 4450
},
{
"epoch": 3.1036882393876133,
"grad_norm": 16.482385635375977,
"learning_rate": 7.585247042449548e-06,
"loss": 0.1836,
"step": 4460
},
{
"epoch": 3.1106471816283925,
"grad_norm": 10.486527442932129,
"learning_rate": 7.55741127348643e-06,
"loss": 0.2508,
"step": 4470
},
{
"epoch": 3.1176061238691717,
"grad_norm": 9.817858695983887,
"learning_rate": 7.529575504523313e-06,
"loss": 0.2671,
"step": 4480
},
{
"epoch": 3.1245650661099513,
"grad_norm": 7.070506572723389,
"learning_rate": 7.501739735560195e-06,
"loss": 0.2899,
"step": 4490
},
{
"epoch": 3.1315240083507305,
"grad_norm": 11.537872314453125,
"learning_rate": 7.473903966597078e-06,
"loss": 0.2668,
"step": 4500
},
{
"epoch": 3.13848295059151,
"grad_norm": 14.454391479492188,
"learning_rate": 7.44606819763396e-06,
"loss": 0.2487,
"step": 4510
},
{
"epoch": 3.1454418928322894,
"grad_norm": 12.986367225646973,
"learning_rate": 7.418232428670843e-06,
"loss": 0.3061,
"step": 4520
},
{
"epoch": 3.152400835073069,
"grad_norm": 21.419010162353516,
"learning_rate": 7.390396659707725e-06,
"loss": 0.2157,
"step": 4530
},
{
"epoch": 3.159359777313848,
"grad_norm": 19.896608352661133,
"learning_rate": 7.362560890744608e-06,
"loss": 0.3067,
"step": 4540
},
{
"epoch": 3.166318719554628,
"grad_norm": 12.328235626220703,
"learning_rate": 7.33472512178149e-06,
"loss": 0.2303,
"step": 4550
},
{
"epoch": 3.173277661795407,
"grad_norm": 10.288804054260254,
"learning_rate": 7.3068893528183725e-06,
"loss": 0.3152,
"step": 4560
},
{
"epoch": 3.1802366040361867,
"grad_norm": 7.457220077514648,
"learning_rate": 7.2790535838552544e-06,
"loss": 0.2319,
"step": 4570
},
{
"epoch": 3.187195546276966,
"grad_norm": 11.831998825073242,
"learning_rate": 7.251217814892137e-06,
"loss": 0.2369,
"step": 4580
},
{
"epoch": 3.1941544885177455,
"grad_norm": 8.234902381896973,
"learning_rate": 7.223382045929019e-06,
"loss": 0.1987,
"step": 4590
},
{
"epoch": 3.2011134307585247,
"grad_norm": 11.515932083129883,
"learning_rate": 7.195546276965901e-06,
"loss": 0.1955,
"step": 4600
},
{
"epoch": 3.208072372999304,
"grad_norm": 13.247298240661621,
"learning_rate": 7.167710508002785e-06,
"loss": 0.2123,
"step": 4610
},
{
"epoch": 3.2150313152400836,
"grad_norm": 9.564682006835938,
"learning_rate": 7.139874739039666e-06,
"loss": 0.2544,
"step": 4620
},
{
"epoch": 3.2219902574808628,
"grad_norm": 16.233783721923828,
"learning_rate": 7.112038970076549e-06,
"loss": 0.2659,
"step": 4630
},
{
"epoch": 3.2289491997216424,
"grad_norm": 6.909665107727051,
"learning_rate": 7.084203201113431e-06,
"loss": 0.1884,
"step": 4640
},
{
"epoch": 3.2359081419624216,
"grad_norm": 13.52547836303711,
"learning_rate": 7.056367432150314e-06,
"loss": 0.2759,
"step": 4650
},
{
"epoch": 3.2428670842032012,
"grad_norm": 10.246102333068848,
"learning_rate": 7.028531663187196e-06,
"loss": 0.2318,
"step": 4660
},
{
"epoch": 3.2498260264439804,
"grad_norm": 25.54823875427246,
"learning_rate": 7.000695894224079e-06,
"loss": 0.2425,
"step": 4670
},
{
"epoch": 3.25678496868476,
"grad_norm": 10.150367736816406,
"learning_rate": 6.9728601252609605e-06,
"loss": 0.2687,
"step": 4680
},
{
"epoch": 3.2637439109255393,
"grad_norm": 17.207233428955078,
"learning_rate": 6.945024356297843e-06,
"loss": 0.2855,
"step": 4690
},
{
"epoch": 3.270702853166319,
"grad_norm": 8.081562042236328,
"learning_rate": 6.917188587334725e-06,
"loss": 0.2238,
"step": 4700
},
{
"epoch": 3.277661795407098,
"grad_norm": 22.442302703857422,
"learning_rate": 6.889352818371609e-06,
"loss": 0.2874,
"step": 4710
},
{
"epoch": 3.2846207376478773,
"grad_norm": 5.26035213470459,
"learning_rate": 6.861517049408491e-06,
"loss": 0.2347,
"step": 4720
},
{
"epoch": 3.291579679888657,
"grad_norm": 11.35543155670166,
"learning_rate": 6.8336812804453735e-06,
"loss": 0.2517,
"step": 4730
},
{
"epoch": 3.298538622129436,
"grad_norm": 20.361177444458008,
"learning_rate": 6.805845511482255e-06,
"loss": 0.2306,
"step": 4740
},
{
"epoch": 3.305497564370216,
"grad_norm": 21.40257453918457,
"learning_rate": 6.778009742519137e-06,
"loss": 0.245,
"step": 4750
},
{
"epoch": 3.312456506610995,
"grad_norm": 15.335564613342285,
"learning_rate": 6.75017397355602e-06,
"loss": 0.2872,
"step": 4760
},
{
"epoch": 3.3194154488517746,
"grad_norm": 12.894388198852539,
"learning_rate": 6.722338204592902e-06,
"loss": 0.2023,
"step": 4770
},
{
"epoch": 3.326374391092554,
"grad_norm": 9.890000343322754,
"learning_rate": 6.694502435629785e-06,
"loss": 0.2154,
"step": 4780
},
{
"epoch": 3.3333333333333335,
"grad_norm": 9.852010726928711,
"learning_rate": 6.666666666666667e-06,
"loss": 0.2223,
"step": 4790
},
{
"epoch": 3.3402922755741127,
"grad_norm": 12.012703895568848,
"learning_rate": 6.638830897703549e-06,
"loss": 0.1917,
"step": 4800
},
{
"epoch": 3.3472512178148923,
"grad_norm": 6.725717544555664,
"learning_rate": 6.610995128740431e-06,
"loss": 0.2387,
"step": 4810
},
{
"epoch": 3.3542101600556715,
"grad_norm": 11.746057510375977,
"learning_rate": 6.583159359777315e-06,
"loss": 0.2588,
"step": 4820
},
{
"epoch": 3.3611691022964507,
"grad_norm": 15.1249418258667,
"learning_rate": 6.555323590814197e-06,
"loss": 0.2504,
"step": 4830
},
{
"epoch": 3.3681280445372304,
"grad_norm": 43.986305236816406,
"learning_rate": 6.52748782185108e-06,
"loss": 0.2518,
"step": 4840
},
{
"epoch": 3.3750869867780096,
"grad_norm": 6.751053810119629,
"learning_rate": 6.4996520528879615e-06,
"loss": 0.2555,
"step": 4850
},
{
"epoch": 3.382045929018789,
"grad_norm": 11.51275634765625,
"learning_rate": 6.471816283924844e-06,
"loss": 0.2098,
"step": 4860
},
{
"epoch": 3.3890048712595684,
"grad_norm": 28.333683013916016,
"learning_rate": 6.443980514961726e-06,
"loss": 0.2588,
"step": 4870
},
{
"epoch": 3.395963813500348,
"grad_norm": 13.346843719482422,
"learning_rate": 6.416144745998609e-06,
"loss": 0.2254,
"step": 4880
},
{
"epoch": 3.4029227557411272,
"grad_norm": 14.337092399597168,
"learning_rate": 6.388308977035491e-06,
"loss": 0.2666,
"step": 4890
},
{
"epoch": 3.409881697981907,
"grad_norm": 19.120765686035156,
"learning_rate": 6.360473208072373e-06,
"loss": 0.2897,
"step": 4900
},
{
"epoch": 3.416840640222686,
"grad_norm": 9.88152027130127,
"learning_rate": 6.332637439109256e-06,
"loss": 0.272,
"step": 4910
},
{
"epoch": 3.4237995824634657,
"grad_norm": 16.881410598754883,
"learning_rate": 6.304801670146138e-06,
"loss": 0.1939,
"step": 4920
},
{
"epoch": 3.430758524704245,
"grad_norm": 9.80156421661377,
"learning_rate": 6.276965901183021e-06,
"loss": 0.1824,
"step": 4930
},
{
"epoch": 3.437717466945024,
"grad_norm": 13.772383689880371,
"learning_rate": 6.249130132219903e-06,
"loss": 0.2546,
"step": 4940
},
{
"epoch": 3.4446764091858038,
"grad_norm": 15.60239028930664,
"learning_rate": 6.221294363256786e-06,
"loss": 0.287,
"step": 4950
},
{
"epoch": 3.4516353514265834,
"grad_norm": 13.885263442993164,
"learning_rate": 6.193458594293668e-06,
"loss": 0.3121,
"step": 4960
},
{
"epoch": 3.4585942936673626,
"grad_norm": 13.832782745361328,
"learning_rate": 6.16562282533055e-06,
"loss": 0.1614,
"step": 4970
},
{
"epoch": 3.465553235908142,
"grad_norm": 8.264083862304688,
"learning_rate": 6.137787056367432e-06,
"loss": 0.2014,
"step": 4980
},
{
"epoch": 3.4725121781489214,
"grad_norm": 10.630083084106445,
"learning_rate": 6.109951287404315e-06,
"loss": 0.198,
"step": 4990
},
{
"epoch": 3.4794711203897006,
"grad_norm": 12.914116859436035,
"learning_rate": 6.082115518441197e-06,
"loss": 0.27,
"step": 5000
},
{
"epoch": 3.4864300626304803,
"grad_norm": 9.667845726013184,
"learning_rate": 6.0542797494780806e-06,
"loss": 0.2419,
"step": 5010
},
{
"epoch": 3.4933890048712595,
"grad_norm": 12.074315071105957,
"learning_rate": 6.0264439805149625e-06,
"loss": 0.2857,
"step": 5020
},
{
"epoch": 3.500347947112039,
"grad_norm": 15.645792007446289,
"learning_rate": 5.998608211551845e-06,
"loss": 0.2042,
"step": 5030
},
{
"epoch": 3.5073068893528183,
"grad_norm": 9.472585678100586,
"learning_rate": 5.970772442588727e-06,
"loss": 0.1702,
"step": 5040
},
{
"epoch": 3.5142658315935975,
"grad_norm": 11.11557674407959,
"learning_rate": 5.942936673625609e-06,
"loss": 0.24,
"step": 5050
},
{
"epoch": 3.521224773834377,
"grad_norm": 6.724925518035889,
"learning_rate": 5.915100904662492e-06,
"loss": 0.2401,
"step": 5060
},
{
"epoch": 3.528183716075157,
"grad_norm": 32.468055725097656,
"learning_rate": 5.887265135699374e-06,
"loss": 0.1586,
"step": 5070
},
{
"epoch": 3.535142658315936,
"grad_norm": 19.15355110168457,
"learning_rate": 5.8594293667362565e-06,
"loss": 0.2534,
"step": 5080
},
{
"epoch": 3.542101600556715,
"grad_norm": 5.981065273284912,
"learning_rate": 5.831593597773138e-06,
"loss": 0.275,
"step": 5090
},
{
"epoch": 3.549060542797495,
"grad_norm": 13.749540328979492,
"learning_rate": 5.803757828810022e-06,
"loss": 0.2548,
"step": 5100
},
{
"epoch": 3.556019485038274,
"grad_norm": 11.47478199005127,
"learning_rate": 5.775922059846903e-06,
"loss": 0.2089,
"step": 5110
},
{
"epoch": 3.5629784272790537,
"grad_norm": 9.613821029663086,
"learning_rate": 5.748086290883787e-06,
"loss": 0.2008,
"step": 5120
},
{
"epoch": 3.569937369519833,
"grad_norm": 9.990856170654297,
"learning_rate": 5.7202505219206686e-06,
"loss": 0.1772,
"step": 5130
},
{
"epoch": 3.5768963117606125,
"grad_norm": 13.554731369018555,
"learning_rate": 5.692414752957551e-06,
"loss": 0.2308,
"step": 5140
},
{
"epoch": 3.5838552540013917,
"grad_norm": 13.590909004211426,
"learning_rate": 5.664578983994433e-06,
"loss": 0.1847,
"step": 5150
},
{
"epoch": 3.5908141962421714,
"grad_norm": 19.21977996826172,
"learning_rate": 5.636743215031316e-06,
"loss": 0.2994,
"step": 5160
},
{
"epoch": 3.5977731384829506,
"grad_norm": 12.067795753479004,
"learning_rate": 5.608907446068198e-06,
"loss": 0.2212,
"step": 5170
},
{
"epoch": 3.60473208072373,
"grad_norm": 23.037675857543945,
"learning_rate": 5.581071677105081e-06,
"loss": 0.2694,
"step": 5180
},
{
"epoch": 3.6116910229645094,
"grad_norm": 7.532259464263916,
"learning_rate": 5.553235908141963e-06,
"loss": 0.2941,
"step": 5190
},
{
"epoch": 3.6186499652052886,
"grad_norm": 10.377799987792969,
"learning_rate": 5.5254001391788445e-06,
"loss": 0.2152,
"step": 5200
},
{
"epoch": 3.6256089074460682,
"grad_norm": 7.491756916046143,
"learning_rate": 5.497564370215728e-06,
"loss": 0.2326,
"step": 5210
},
{
"epoch": 3.632567849686848,
"grad_norm": 13.305363655090332,
"learning_rate": 5.46972860125261e-06,
"loss": 0.1967,
"step": 5220
},
{
"epoch": 3.639526791927627,
"grad_norm": 8.822273254394531,
"learning_rate": 5.441892832289493e-06,
"loss": 0.2532,
"step": 5230
},
{
"epoch": 3.6464857341684063,
"grad_norm": 22.017900466918945,
"learning_rate": 5.414057063326375e-06,
"loss": 0.2643,
"step": 5240
},
{
"epoch": 3.653444676409186,
"grad_norm": 17.09214210510254,
"learning_rate": 5.3862212943632574e-06,
"loss": 0.2167,
"step": 5250
},
{
"epoch": 3.660403618649965,
"grad_norm": 14.493659973144531,
"learning_rate": 5.358385525400139e-06,
"loss": 0.2321,
"step": 5260
},
{
"epoch": 3.6673625608907447,
"grad_norm": 4.279122829437256,
"learning_rate": 5.330549756437022e-06,
"loss": 0.228,
"step": 5270
},
{
"epoch": 3.674321503131524,
"grad_norm": 4.165134429931641,
"learning_rate": 5.302713987473904e-06,
"loss": 0.2386,
"step": 5280
},
{
"epoch": 3.6812804453723036,
"grad_norm": 15.653182983398438,
"learning_rate": 5.274878218510787e-06,
"loss": 0.2298,
"step": 5290
},
{
"epoch": 3.688239387613083,
"grad_norm": 6.955724239349365,
"learning_rate": 5.247042449547669e-06,
"loss": 0.1807,
"step": 5300
},
{
"epoch": 3.695198329853862,
"grad_norm": 12.580881118774414,
"learning_rate": 5.219206680584552e-06,
"loss": 0.2604,
"step": 5310
},
{
"epoch": 3.7021572720946416,
"grad_norm": 9.050446510314941,
"learning_rate": 5.191370911621434e-06,
"loss": 0.2233,
"step": 5320
},
{
"epoch": 3.7091162143354213,
"grad_norm": 8.741286277770996,
"learning_rate": 5.163535142658317e-06,
"loss": 0.223,
"step": 5330
},
{
"epoch": 3.7160751565762005,
"grad_norm": 5.017666816711426,
"learning_rate": 5.135699373695199e-06,
"loss": 0.1976,
"step": 5340
},
{
"epoch": 3.7230340988169797,
"grad_norm": 15.6959228515625,
"learning_rate": 5.107863604732081e-06,
"loss": 0.2077,
"step": 5350
},
{
"epoch": 3.7299930410577593,
"grad_norm": 12.862638473510742,
"learning_rate": 5.0800278357689635e-06,
"loss": 0.1962,
"step": 5360
},
{
"epoch": 3.7369519832985385,
"grad_norm": 11.374602317810059,
"learning_rate": 5.0521920668058454e-06,
"loss": 0.2101,
"step": 5370
},
{
"epoch": 3.743910925539318,
"grad_norm": 25.180683135986328,
"learning_rate": 5.024356297842728e-06,
"loss": 0.2527,
"step": 5380
},
{
"epoch": 3.7508698677800973,
"grad_norm": 7.092601299285889,
"learning_rate": 4.996520528879611e-06,
"loss": 0.249,
"step": 5390
},
{
"epoch": 3.757828810020877,
"grad_norm": 12.866328239440918,
"learning_rate": 4.968684759916494e-06,
"loss": 0.1986,
"step": 5400
},
{
"epoch": 3.764787752261656,
"grad_norm": 15.909232139587402,
"learning_rate": 4.940848990953376e-06,
"loss": 0.2104,
"step": 5410
},
{
"epoch": 3.7717466945024354,
"grad_norm": 18.9605770111084,
"learning_rate": 4.9130132219902575e-06,
"loss": 0.2601,
"step": 5420
},
{
"epoch": 3.778705636743215,
"grad_norm": 21.599374771118164,
"learning_rate": 4.88517745302714e-06,
"loss": 0.222,
"step": 5430
},
{
"epoch": 3.7856645789839947,
"grad_norm": 16.166671752929688,
"learning_rate": 4.857341684064022e-06,
"loss": 0.2494,
"step": 5440
},
{
"epoch": 3.792623521224774,
"grad_norm": 5.08117151260376,
"learning_rate": 4.829505915100905e-06,
"loss": 0.2836,
"step": 5450
},
{
"epoch": 3.799582463465553,
"grad_norm": 20.023841857910156,
"learning_rate": 4.801670146137788e-06,
"loss": 0.2123,
"step": 5460
},
{
"epoch": 3.8065414057063327,
"grad_norm": 17.623476028442383,
"learning_rate": 4.77383437717467e-06,
"loss": 0.2018,
"step": 5470
},
{
"epoch": 3.813500347947112,
"grad_norm": 17.300357818603516,
"learning_rate": 4.745998608211552e-06,
"loss": 0.2031,
"step": 5480
},
{
"epoch": 3.8204592901878915,
"grad_norm": 19.605348587036133,
"learning_rate": 4.718162839248434e-06,
"loss": 0.1733,
"step": 5490
},
{
"epoch": 3.8274182324286707,
"grad_norm": 13.359166145324707,
"learning_rate": 4.690327070285317e-06,
"loss": 0.2236,
"step": 5500
},
{
"epoch": 3.8343771746694504,
"grad_norm": 22.73190689086914,
"learning_rate": 4.6624913013222e-06,
"loss": 0.2148,
"step": 5510
},
{
"epoch": 3.8413361169102296,
"grad_norm": 14.263452529907227,
"learning_rate": 4.634655532359082e-06,
"loss": 0.3008,
"step": 5520
},
{
"epoch": 3.848295059151009,
"grad_norm": 24.41339111328125,
"learning_rate": 4.6068197633959645e-06,
"loss": 0.2365,
"step": 5530
},
{
"epoch": 3.8552540013917884,
"grad_norm": 10.054245948791504,
"learning_rate": 4.578983994432846e-06,
"loss": 0.2201,
"step": 5540
},
{
"epoch": 3.862212943632568,
"grad_norm": 20.606000900268555,
"learning_rate": 4.551148225469729e-06,
"loss": 0.2901,
"step": 5550
},
{
"epoch": 3.8691718858733473,
"grad_norm": 8.361483573913574,
"learning_rate": 4.523312456506611e-06,
"loss": 0.2133,
"step": 5560
},
{
"epoch": 3.8761308281141265,
"grad_norm": 16.224584579467773,
"learning_rate": 4.495476687543494e-06,
"loss": 0.2648,
"step": 5570
},
{
"epoch": 3.883089770354906,
"grad_norm": 24.251644134521484,
"learning_rate": 4.467640918580376e-06,
"loss": 0.2505,
"step": 5580
},
{
"epoch": 3.8900487125956853,
"grad_norm": 10.059554100036621,
"learning_rate": 4.4398051496172585e-06,
"loss": 0.1715,
"step": 5590
},
{
"epoch": 3.897007654836465,
"grad_norm": 22.54900550842285,
"learning_rate": 4.41196938065414e-06,
"loss": 0.1525,
"step": 5600
},
{
"epoch": 3.903966597077244,
"grad_norm": 5.793692588806152,
"learning_rate": 4.384133611691023e-06,
"loss": 0.2345,
"step": 5610
},
{
"epoch": 3.910925539318024,
"grad_norm": 19.2098445892334,
"learning_rate": 4.356297842727906e-06,
"loss": 0.165,
"step": 5620
},
{
"epoch": 3.917884481558803,
"grad_norm": 11.589962005615234,
"learning_rate": 4.328462073764788e-06,
"loss": 0.2033,
"step": 5630
},
{
"epoch": 3.9248434237995826,
"grad_norm": 13.061795234680176,
"learning_rate": 4.300626304801671e-06,
"loss": 0.2517,
"step": 5640
},
{
"epoch": 3.931802366040362,
"grad_norm": 9.12142276763916,
"learning_rate": 4.272790535838553e-06,
"loss": 0.2068,
"step": 5650
},
{
"epoch": 3.9387613082811415,
"grad_norm": 14.601790428161621,
"learning_rate": 4.244954766875435e-06,
"loss": 0.2864,
"step": 5660
},
{
"epoch": 3.9457202505219207,
"grad_norm": 10.787036895751953,
"learning_rate": 4.217118997912318e-06,
"loss": 0.2286,
"step": 5670
},
{
"epoch": 3.9526791927627,
"grad_norm": 11.121417045593262,
"learning_rate": 4.1892832289492e-06,
"loss": 0.2385,
"step": 5680
},
{
"epoch": 3.9596381350034795,
"grad_norm": 11.553411483764648,
"learning_rate": 4.161447459986083e-06,
"loss": 0.2509,
"step": 5690
},
{
"epoch": 3.966597077244259,
"grad_norm": 12.59765625,
"learning_rate": 4.1336116910229655e-06,
"loss": 0.2478,
"step": 5700
},
{
"epoch": 3.9735560194850383,
"grad_norm": 10.153321266174316,
"learning_rate": 4.105775922059847e-06,
"loss": 0.2149,
"step": 5710
},
{
"epoch": 3.9805149617258175,
"grad_norm": 10.688750267028809,
"learning_rate": 4.077940153096729e-06,
"loss": 0.2423,
"step": 5720
},
{
"epoch": 3.987473903966597,
"grad_norm": 22.212329864501953,
"learning_rate": 4.050104384133612e-06,
"loss": 0.2626,
"step": 5730
},
{
"epoch": 3.9944328462073764,
"grad_norm": 7.783158302307129,
"learning_rate": 4.022268615170494e-06,
"loss": 0.2033,
"step": 5740
},
{
"epoch": 4.0,
"eval_accuracy": 0.8193211488250652,
"eval_f1": 0.8180624199818877,
"eval_loss": 0.6137004494667053,
"eval_precision": 0.8174134517321099,
"eval_recall": 0.8193211488250652,
"eval_runtime": 30.262,
"eval_samples_per_second": 189.842,
"eval_steps_per_second": 5.948,
"step": 5748
}
],
"logging_steps": 10,
"max_steps": 7185,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2096484874133504e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}