Constellation-One-Text-001 / trainer_state.json
DominicTWHV's picture
Model upload
4730bdc verified
{
"best_global_step": 4839,
"best_metric": 0.43726749573500223,
"best_model_checkpoint": "constellation_one_text/checkpoint-4839",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 4839,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00744301442084044,
"grad_norm": 16.79511070251465,
"learning_rate": 4.5454545454545457e-07,
"loss": 5.104981422424316,
"step": 12
},
{
"epoch": 0.01488602884168088,
"grad_norm": 14.2466402053833,
"learning_rate": 9.50413223140496e-07,
"loss": 4.855861345926921,
"step": 24
},
{
"epoch": 0.02232904326252132,
"grad_norm": 15.325632095336914,
"learning_rate": 1.4462809917355372e-06,
"loss": 4.62240473429362,
"step": 36
},
{
"epoch": 0.02977205768336176,
"grad_norm": 14.659135818481445,
"learning_rate": 1.9421487603305786e-06,
"loss": 4.279359499613444,
"step": 48
},
{
"epoch": 0.037215072104202204,
"grad_norm": 11.83539867401123,
"learning_rate": 2.43801652892562e-06,
"loss": 4.009869893391927,
"step": 60
},
{
"epoch": 0.04465808652504264,
"grad_norm": 12.459957122802734,
"learning_rate": 2.9338842975206615e-06,
"loss": 3.658400217692057,
"step": 72
},
{
"epoch": 0.052101100945883085,
"grad_norm": 10.960445404052734,
"learning_rate": 3.429752066115703e-06,
"loss": 3.3341188430786133,
"step": 84
},
{
"epoch": 0.05954411536672352,
"grad_norm": 10.394844055175781,
"learning_rate": 3.925619834710744e-06,
"loss": 2.9902642567952475,
"step": 96
},
{
"epoch": 0.06698712978756396,
"grad_norm": 10.080375671386719,
"learning_rate": 4.421487603305786e-06,
"loss": 2.7187296549479165,
"step": 108
},
{
"epoch": 0.07443014420840441,
"grad_norm": 11.83609676361084,
"learning_rate": 4.917355371900827e-06,
"loss": 2.4078760147094727,
"step": 120
},
{
"epoch": 0.08187315862924484,
"grad_norm": 20.040725708007812,
"learning_rate": 5.413223140495868e-06,
"loss": 2.1843010584513345,
"step": 132
},
{
"epoch": 0.08931617305008528,
"grad_norm": 10.70347785949707,
"learning_rate": 5.90909090909091e-06,
"loss": 1.9951588312784831,
"step": 144
},
{
"epoch": 0.09675918747092573,
"grad_norm": 13.84825611114502,
"learning_rate": 6.404958677685951e-06,
"loss": 1.7978707949320476,
"step": 156
},
{
"epoch": 0.10420220189176617,
"grad_norm": 8.921030044555664,
"learning_rate": 6.900826446280993e-06,
"loss": 1.6856780052185059,
"step": 168
},
{
"epoch": 0.1116452163126066,
"grad_norm": 7.919989585876465,
"learning_rate": 7.396694214876033e-06,
"loss": 1.497524897257487,
"step": 180
},
{
"epoch": 0.11908823073344704,
"grad_norm": 15.635968208312988,
"learning_rate": 7.892561983471076e-06,
"loss": 1.4976633389790852,
"step": 192
},
{
"epoch": 0.12653124515428749,
"grad_norm": 14.213494300842285,
"learning_rate": 8.388429752066116e-06,
"loss": 1.4405194918314617,
"step": 204
},
{
"epoch": 0.13397425957512793,
"grad_norm": 10.790483474731445,
"learning_rate": 8.884297520661158e-06,
"loss": 1.2696106433868408,
"step": 216
},
{
"epoch": 0.14141727399596837,
"grad_norm": 14.101875305175781,
"learning_rate": 9.3801652892562e-06,
"loss": 1.3300576210021973,
"step": 228
},
{
"epoch": 0.14886028841680882,
"grad_norm": 19.911815643310547,
"learning_rate": 9.876033057851241e-06,
"loss": 1.2497991720835369,
"step": 240
},
{
"epoch": 0.15630330283764926,
"grad_norm": 12.594736099243164,
"learning_rate": 1.0371900826446282e-05,
"loss": 1.20013427734375,
"step": 252
},
{
"epoch": 0.16374631725848968,
"grad_norm": 10.003790855407715,
"learning_rate": 1.0867768595041323e-05,
"loss": 1.1903626918792725,
"step": 264
},
{
"epoch": 0.17118933167933012,
"grad_norm": 19.644290924072266,
"learning_rate": 1.1363636363636366e-05,
"loss": 1.2084464232126872,
"step": 276
},
{
"epoch": 0.17863234610017056,
"grad_norm": 12.33438777923584,
"learning_rate": 1.1859504132231406e-05,
"loss": 1.1396081447601318,
"step": 288
},
{
"epoch": 0.186075360521011,
"grad_norm": 7.845709800720215,
"learning_rate": 1.2355371900826447e-05,
"loss": 1.0346049467722576,
"step": 300
},
{
"epoch": 0.19351837494185145,
"grad_norm": 12.355867385864258,
"learning_rate": 1.2851239669421488e-05,
"loss": 1.0486024220784504,
"step": 312
},
{
"epoch": 0.2009613893626919,
"grad_norm": 9.542502403259277,
"learning_rate": 1.3347107438016531e-05,
"loss": 1.1321392059326172,
"step": 324
},
{
"epoch": 0.20840440378353234,
"grad_norm": 171.94647216796875,
"learning_rate": 1.384297520661157e-05,
"loss": 0.9731620152791342,
"step": 336
},
{
"epoch": 0.21584741820437278,
"grad_norm": 14.012189865112305,
"learning_rate": 1.4338842975206612e-05,
"loss": 0.9310257434844971,
"step": 348
},
{
"epoch": 0.2232904326252132,
"grad_norm": 17.743682861328125,
"learning_rate": 1.4834710743801655e-05,
"loss": 0.9263285795847574,
"step": 360
},
{
"epoch": 0.23073344704605364,
"grad_norm": 29.65188217163086,
"learning_rate": 1.5330578512396693e-05,
"loss": 1.0049312114715576,
"step": 372
},
{
"epoch": 0.23817646146689408,
"grad_norm": 16.46782684326172,
"learning_rate": 1.5826446280991736e-05,
"loss": 1.078270673751831,
"step": 384
},
{
"epoch": 0.24561947588773453,
"grad_norm": 15.282443046569824,
"learning_rate": 1.632231404958678e-05,
"loss": 0.9908095200856527,
"step": 396
},
{
"epoch": 0.25306249030857497,
"grad_norm": 7.152077674865723,
"learning_rate": 1.681818181818182e-05,
"loss": 0.8867685794830322,
"step": 408
},
{
"epoch": 0.2605055047294154,
"grad_norm": 17.630233764648438,
"learning_rate": 1.731404958677686e-05,
"loss": 0.8261091709136963,
"step": 420
},
{
"epoch": 0.26794851915025586,
"grad_norm": 8.756381034851074,
"learning_rate": 1.78099173553719e-05,
"loss": 0.8141599496205648,
"step": 432
},
{
"epoch": 0.2753915335710963,
"grad_norm": 14.227313041687012,
"learning_rate": 1.8305785123966944e-05,
"loss": 0.8025492032368978,
"step": 444
},
{
"epoch": 0.28283454799193675,
"grad_norm": 6.028214931488037,
"learning_rate": 1.8801652892561987e-05,
"loss": 0.827876885732015,
"step": 456
},
{
"epoch": 0.2902775624127772,
"grad_norm": 9.791404724121094,
"learning_rate": 1.9297520661157026e-05,
"loss": 0.8186439673105875,
"step": 468
},
{
"epoch": 0.29772057683361763,
"grad_norm": 19.028491973876953,
"learning_rate": 1.9793388429752066e-05,
"loss": 0.8027651309967041,
"step": 480
},
{
"epoch": 0.3051635912544581,
"grad_norm": 5.418436527252197,
"learning_rate": 1.996785304247991e-05,
"loss": 0.7800490061442057,
"step": 492
},
{
"epoch": 0.3126066056752985,
"grad_norm": 7.598865985870361,
"learning_rate": 1.9912743972445466e-05,
"loss": 0.7126566569010416,
"step": 504
},
{
"epoch": 0.3200496200961389,
"grad_norm": 7.867424011230469,
"learning_rate": 1.9857634902411024e-05,
"loss": 0.6536041895548502,
"step": 516
},
{
"epoch": 0.32749263451697935,
"grad_norm": 10.367350578308105,
"learning_rate": 1.980252583237658e-05,
"loss": 0.8624240557352701,
"step": 528
},
{
"epoch": 0.3349356489378198,
"grad_norm": 6.30031681060791,
"learning_rate": 1.9747416762342138e-05,
"loss": 0.8412895202636719,
"step": 540
},
{
"epoch": 0.34237866335866024,
"grad_norm": 15.809948921203613,
"learning_rate": 1.9692307692307696e-05,
"loss": 0.7370687325795492,
"step": 552
},
{
"epoch": 0.3498216777795007,
"grad_norm": 6.0920491218566895,
"learning_rate": 1.963719862227325e-05,
"loss": 0.7390193144480387,
"step": 564
},
{
"epoch": 0.3572646922003411,
"grad_norm": 11.583715438842773,
"learning_rate": 1.9582089552238807e-05,
"loss": 0.6651956637700399,
"step": 576
},
{
"epoch": 0.36470770662118157,
"grad_norm": 11.411588668823242,
"learning_rate": 1.9526980482204364e-05,
"loss": 0.7644002437591553,
"step": 588
},
{
"epoch": 0.372150721042022,
"grad_norm": 8.31484603881836,
"learning_rate": 1.947187141216992e-05,
"loss": 0.6794478893280029,
"step": 600
},
{
"epoch": 0.37959373546286246,
"grad_norm": 6.703721523284912,
"learning_rate": 1.941676234213548e-05,
"loss": 0.6266262531280518,
"step": 612
},
{
"epoch": 0.3870367498837029,
"grad_norm": 9.479427337646484,
"learning_rate": 1.9361653272101036e-05,
"loss": 0.6851427555084229,
"step": 624
},
{
"epoch": 0.39447976430454335,
"grad_norm": 7.663156032562256,
"learning_rate": 1.9306544202066593e-05,
"loss": 0.6938677628835043,
"step": 636
},
{
"epoch": 0.4019227787253838,
"grad_norm": 4.276080131530762,
"learning_rate": 1.9251435132032147e-05,
"loss": 0.76728622118632,
"step": 648
},
{
"epoch": 0.40936579314622423,
"grad_norm": 11.622859001159668,
"learning_rate": 1.9196326061997705e-05,
"loss": 0.7580918471018473,
"step": 660
},
{
"epoch": 0.4168088075670647,
"grad_norm": 13.203335762023926,
"learning_rate": 1.9141216991963262e-05,
"loss": 0.642679770787557,
"step": 672
},
{
"epoch": 0.4242518219879051,
"grad_norm": 8.963321685791016,
"learning_rate": 1.908610792192882e-05,
"loss": 0.6361099084218343,
"step": 684
},
{
"epoch": 0.43169483640874556,
"grad_norm": 8.1705904006958,
"learning_rate": 1.9030998851894377e-05,
"loss": 0.6898341178894043,
"step": 696
},
{
"epoch": 0.439137850829586,
"grad_norm": 3.9877262115478516,
"learning_rate": 1.8975889781859934e-05,
"loss": 0.6462088028589884,
"step": 708
},
{
"epoch": 0.4465808652504264,
"grad_norm": 12.81478500366211,
"learning_rate": 1.892078071182549e-05,
"loss": 0.6965091228485107,
"step": 720
},
{
"epoch": 0.45402387967126684,
"grad_norm": 7.810659885406494,
"learning_rate": 1.8865671641791045e-05,
"loss": 0.7788422902425131,
"step": 732
},
{
"epoch": 0.4614668940921073,
"grad_norm": 4.958326816558838,
"learning_rate": 1.8810562571756603e-05,
"loss": 0.7460188865661621,
"step": 744
},
{
"epoch": 0.4689099085129477,
"grad_norm": 9.091962814331055,
"learning_rate": 1.875545350172216e-05,
"loss": 0.6937299569447836,
"step": 756
},
{
"epoch": 0.47635292293378817,
"grad_norm": 7.729589939117432,
"learning_rate": 1.8700344431687717e-05,
"loss": 0.6188247601191202,
"step": 768
},
{
"epoch": 0.4837959373546286,
"grad_norm": 8.878933906555176,
"learning_rate": 1.8645235361653275e-05,
"loss": 0.7017858028411865,
"step": 780
},
{
"epoch": 0.49123895177546906,
"grad_norm": 23.914348602294922,
"learning_rate": 1.8590126291618832e-05,
"loss": 0.7923436164855957,
"step": 792
},
{
"epoch": 0.4986819661963095,
"grad_norm": 10.980387687683105,
"learning_rate": 1.853501722158439e-05,
"loss": 0.6881453990936279,
"step": 804
},
{
"epoch": 0.5061249806171499,
"grad_norm": 6.988458156585693,
"learning_rate": 1.8479908151549943e-05,
"loss": 0.683276891708374,
"step": 816
},
{
"epoch": 0.5135679950379903,
"grad_norm": 23.667926788330078,
"learning_rate": 1.84247990815155e-05,
"loss": 0.6124229431152344,
"step": 828
},
{
"epoch": 0.5210110094588308,
"grad_norm": 7.078935623168945,
"learning_rate": 1.8369690011481058e-05,
"loss": 0.7043429215749105,
"step": 840
},
{
"epoch": 0.5284540238796712,
"grad_norm": 9.82224178314209,
"learning_rate": 1.8314580941446615e-05,
"loss": 0.6555114189783732,
"step": 852
},
{
"epoch": 0.5358970383005117,
"grad_norm": 8.077360153198242,
"learning_rate": 1.8259471871412173e-05,
"loss": 0.6555444002151489,
"step": 864
},
{
"epoch": 0.5433400527213521,
"grad_norm": 3.6762046813964844,
"learning_rate": 1.820436280137773e-05,
"loss": 0.636172374089559,
"step": 876
},
{
"epoch": 0.5507830671421926,
"grad_norm": 3.8388607501983643,
"learning_rate": 1.8149253731343287e-05,
"loss": 0.6085333824157715,
"step": 888
},
{
"epoch": 0.558226081563033,
"grad_norm": 3.0353925228118896,
"learning_rate": 1.809414466130884e-05,
"loss": 0.58968718846639,
"step": 900
},
{
"epoch": 0.5656690959838735,
"grad_norm": 6.465055465698242,
"learning_rate": 1.80390355912744e-05,
"loss": 0.6078658103942871,
"step": 912
},
{
"epoch": 0.5731121104047139,
"grad_norm": 5.472475528717041,
"learning_rate": 1.7983926521239956e-05,
"loss": 0.6997927029927572,
"step": 924
},
{
"epoch": 0.5805551248255544,
"grad_norm": 15.40697193145752,
"learning_rate": 1.792881745120551e-05,
"loss": 0.6386371453603109,
"step": 936
},
{
"epoch": 0.5879981392463948,
"grad_norm": 6.439900875091553,
"learning_rate": 1.787370838117107e-05,
"loss": 0.6876135667165121,
"step": 948
},
{
"epoch": 0.5954411536672353,
"grad_norm": 10.793220520019531,
"learning_rate": 1.7818599311136628e-05,
"loss": 0.6237523953119913,
"step": 960
},
{
"epoch": 0.6028841680880757,
"grad_norm": 5.377976417541504,
"learning_rate": 1.7763490241102185e-05,
"loss": 0.614266554514567,
"step": 972
},
{
"epoch": 0.6103271825089162,
"grad_norm": 7.794371604919434,
"learning_rate": 1.770838117106774e-05,
"loss": 0.5918615261713663,
"step": 984
},
{
"epoch": 0.6177701969297565,
"grad_norm": 4.7419867515563965,
"learning_rate": 1.7653272101033296e-05,
"loss": 0.5848552385965983,
"step": 996
},
{
"epoch": 0.625213211350597,
"grad_norm": 14.705470085144043,
"learning_rate": 1.7598163030998854e-05,
"loss": 0.6608580350875854,
"step": 1008
},
{
"epoch": 0.6326562257714374,
"grad_norm": 6.041922092437744,
"learning_rate": 1.754305396096441e-05,
"loss": 0.549665609995524,
"step": 1020
},
{
"epoch": 0.6400992401922778,
"grad_norm": 5.13696813583374,
"learning_rate": 1.7487944890929965e-05,
"loss": 0.7017458279927572,
"step": 1032
},
{
"epoch": 0.6475422546131183,
"grad_norm": 6.016454696655273,
"learning_rate": 1.7432835820895522e-05,
"loss": 0.6309004227320353,
"step": 1044
},
{
"epoch": 0.6549852690339587,
"grad_norm": 9.331708908081055,
"learning_rate": 1.7377726750861083e-05,
"loss": 0.6831174691518148,
"step": 1056
},
{
"epoch": 0.6624282834547992,
"grad_norm": 9.878951072692871,
"learning_rate": 1.7322617680826637e-05,
"loss": 0.6587471961975098,
"step": 1068
},
{
"epoch": 0.6698712978756396,
"grad_norm": 5.033365726470947,
"learning_rate": 1.7267508610792194e-05,
"loss": 0.6370361646016439,
"step": 1080
},
{
"epoch": 0.6773143122964801,
"grad_norm": 18.762298583984375,
"learning_rate": 1.721239954075775e-05,
"loss": 0.5823976198832194,
"step": 1092
},
{
"epoch": 0.6847573267173205,
"grad_norm": 2.940394163131714,
"learning_rate": 1.715729047072331e-05,
"loss": 0.6264007488886515,
"step": 1104
},
{
"epoch": 0.692200341138161,
"grad_norm": 7.621018886566162,
"learning_rate": 1.7102181400688863e-05,
"loss": 0.5824793974558512,
"step": 1116
},
{
"epoch": 0.6996433555590014,
"grad_norm": 3.141854763031006,
"learning_rate": 1.704707233065442e-05,
"loss": 0.5842764774958292,
"step": 1128
},
{
"epoch": 0.7070863699798419,
"grad_norm": 5.849940776824951,
"learning_rate": 1.6991963260619978e-05,
"loss": 0.5304047664006551,
"step": 1140
},
{
"epoch": 0.7145293844006823,
"grad_norm": 7.9883551597595215,
"learning_rate": 1.6936854190585535e-05,
"loss": 0.5599017937978109,
"step": 1152
},
{
"epoch": 0.7219723988215228,
"grad_norm": 11.370931625366211,
"learning_rate": 1.6881745120551092e-05,
"loss": 0.5798830588658651,
"step": 1164
},
{
"epoch": 0.7294154132423631,
"grad_norm": 3.5065290927886963,
"learning_rate": 1.682663605051665e-05,
"loss": 0.6167506376902262,
"step": 1176
},
{
"epoch": 0.7368584276632036,
"grad_norm": 5.930673599243164,
"learning_rate": 1.6771526980482207e-05,
"loss": 0.5873833497365316,
"step": 1188
},
{
"epoch": 0.744301442084044,
"grad_norm": 6.102614402770996,
"learning_rate": 1.671641791044776e-05,
"loss": 0.6477183898289999,
"step": 1200
},
{
"epoch": 0.7517444565048845,
"grad_norm": 4.337888717651367,
"learning_rate": 1.6661308840413318e-05,
"loss": 0.5860347350438436,
"step": 1212
},
{
"epoch": 0.7591874709257249,
"grad_norm": 4.841605186462402,
"learning_rate": 1.6606199770378875e-05,
"loss": 0.6613442897796631,
"step": 1224
},
{
"epoch": 0.7666304853465653,
"grad_norm": 14.614047050476074,
"learning_rate": 1.6551090700344433e-05,
"loss": 0.6218246618906657,
"step": 1236
},
{
"epoch": 0.7740734997674058,
"grad_norm": 8.036581039428711,
"learning_rate": 1.649598163030999e-05,
"loss": 0.5646830002466837,
"step": 1248
},
{
"epoch": 0.7815165141882462,
"grad_norm": 3.943291664123535,
"learning_rate": 1.6440872560275547e-05,
"loss": 0.6018180449803671,
"step": 1260
},
{
"epoch": 0.7889595286090867,
"grad_norm": 12.51102352142334,
"learning_rate": 1.6385763490241105e-05,
"loss": 0.6140671968460083,
"step": 1272
},
{
"epoch": 0.7964025430299271,
"grad_norm": 3.718653678894043,
"learning_rate": 1.633065442020666e-05,
"loss": 0.5359119176864624,
"step": 1284
},
{
"epoch": 0.8038455574507676,
"grad_norm": 2.8353357315063477,
"learning_rate": 1.6275545350172216e-05,
"loss": 0.502113143603007,
"step": 1296
},
{
"epoch": 0.811288571871608,
"grad_norm": 4.345269203186035,
"learning_rate": 1.6220436280137773e-05,
"loss": 0.5975545644760132,
"step": 1308
},
{
"epoch": 0.8187315862924485,
"grad_norm": 6.92914342880249,
"learning_rate": 1.616532721010333e-05,
"loss": 0.6587652762730917,
"step": 1320
},
{
"epoch": 0.8261746007132889,
"grad_norm": 4.188693046569824,
"learning_rate": 1.6110218140068888e-05,
"loss": 0.6142017841339111,
"step": 1332
},
{
"epoch": 0.8336176151341294,
"grad_norm": 9.596400260925293,
"learning_rate": 1.6055109070034445e-05,
"loss": 0.5469466845194498,
"step": 1344
},
{
"epoch": 0.8410606295549697,
"grad_norm": 4.810947895050049,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.5744484265645345,
"step": 1356
},
{
"epoch": 0.8485036439758102,
"grad_norm": 3.5819036960601807,
"learning_rate": 1.5944890929965557e-05,
"loss": 0.5045839150746664,
"step": 1368
},
{
"epoch": 0.8559466583966506,
"grad_norm": 4.879307746887207,
"learning_rate": 1.5889781859931114e-05,
"loss": 0.6669184366861979,
"step": 1380
},
{
"epoch": 0.8633896728174911,
"grad_norm": 6.7210693359375,
"learning_rate": 1.583467278989667e-05,
"loss": 0.5324758291244507,
"step": 1392
},
{
"epoch": 0.8708326872383315,
"grad_norm": 4.653786659240723,
"learning_rate": 1.577956371986223e-05,
"loss": 0.543891986211141,
"step": 1404
},
{
"epoch": 0.878275701659172,
"grad_norm": 6.386638641357422,
"learning_rate": 1.5724454649827786e-05,
"loss": 0.5688877105712891,
"step": 1416
},
{
"epoch": 0.8857187160800124,
"grad_norm": 14.5455322265625,
"learning_rate": 1.5669345579793343e-05,
"loss": 0.5081936915715536,
"step": 1428
},
{
"epoch": 0.8931617305008528,
"grad_norm": 13.621335983276367,
"learning_rate": 1.56142365097589e-05,
"loss": 0.5466565688451132,
"step": 1440
},
{
"epoch": 0.9006047449216933,
"grad_norm": 7.791660308837891,
"learning_rate": 1.5559127439724455e-05,
"loss": 0.5543188651402792,
"step": 1452
},
{
"epoch": 0.9080477593425337,
"grad_norm": 4.039332866668701,
"learning_rate": 1.5504018369690012e-05,
"loss": 0.564227819442749,
"step": 1464
},
{
"epoch": 0.9154907737633742,
"grad_norm": 5.744030475616455,
"learning_rate": 1.544890929965557e-05,
"loss": 0.5645032723744711,
"step": 1476
},
{
"epoch": 0.9229337881842146,
"grad_norm": 7.17244815826416,
"learning_rate": 1.5393800229621126e-05,
"loss": 0.6025459369023641,
"step": 1488
},
{
"epoch": 0.9303768026050551,
"grad_norm": 9.460329055786133,
"learning_rate": 1.5338691159586684e-05,
"loss": 0.5522710482279459,
"step": 1500
},
{
"epoch": 0.9378198170258955,
"grad_norm": 8.257369995117188,
"learning_rate": 1.528358208955224e-05,
"loss": 0.5696142514546713,
"step": 1512
},
{
"epoch": 0.945262831446736,
"grad_norm": 14.734770774841309,
"learning_rate": 1.5228473019517798e-05,
"loss": 0.60454261302948,
"step": 1524
},
{
"epoch": 0.9527058458675763,
"grad_norm": 4.352370738983154,
"learning_rate": 1.5173363949483352e-05,
"loss": 0.48172632853190106,
"step": 1536
},
{
"epoch": 0.9601488602884168,
"grad_norm": 2.388683557510376,
"learning_rate": 1.511825487944891e-05,
"loss": 0.5889216661453247,
"step": 1548
},
{
"epoch": 0.9675918747092572,
"grad_norm": 9.910285949707031,
"learning_rate": 1.5063145809414467e-05,
"loss": 0.5621689558029175,
"step": 1560
},
{
"epoch": 0.9750348891300977,
"grad_norm": 5.445796966552734,
"learning_rate": 1.5008036739380026e-05,
"loss": 0.5526663859685262,
"step": 1572
},
{
"epoch": 0.9824779035509381,
"grad_norm": 5.242825984954834,
"learning_rate": 1.495292766934558e-05,
"loss": 0.5673882563908895,
"step": 1584
},
{
"epoch": 0.9899209179717786,
"grad_norm": 10.1865234375,
"learning_rate": 1.4897818599311137e-05,
"loss": 0.5648102362950643,
"step": 1596
},
{
"epoch": 0.997363932392619,
"grad_norm": 5.388990879058838,
"learning_rate": 1.4842709529276695e-05,
"loss": 0.5376612345377604,
"step": 1608
},
{
"epoch": 1.0,
"eval_f1": 0.4322638779516363,
"eval_loss": 0.13781657814979553,
"eval_precision": 0.3967545697112817,
"eval_recall": 0.4884485429972486,
"eval_runtime": 583.7374,
"eval_samples_per_second": 66.278,
"eval_steps_per_second": 1.382,
"step": 1613
},
{
"epoch": 1.004341758412157,
"grad_norm": 11.303878784179688,
"learning_rate": 1.478760045924225e-05,
"loss": 0.46324888865152997,
"step": 1620
},
{
"epoch": 1.0117847728329974,
"grad_norm": 4.389431476593018,
"learning_rate": 1.4732491389207808e-05,
"loss": 0.48095786571502686,
"step": 1632
},
{
"epoch": 1.0192277872538378,
"grad_norm": 2.298799514770508,
"learning_rate": 1.4677382319173365e-05,
"loss": 0.5406383275985718,
"step": 1644
},
{
"epoch": 1.0266708016746782,
"grad_norm": 4.433741092681885,
"learning_rate": 1.4622273249138922e-05,
"loss": 0.4697510798772176,
"step": 1656
},
{
"epoch": 1.0341138160955188,
"grad_norm": 4.704965591430664,
"learning_rate": 1.4567164179104478e-05,
"loss": 0.5180115699768066,
"step": 1668
},
{
"epoch": 1.0415568305163592,
"grad_norm": 5.16159725189209,
"learning_rate": 1.4512055109070035e-05,
"loss": 0.49386584758758545,
"step": 1680
},
{
"epoch": 1.0489998449371996,
"grad_norm": 2.5488502979278564,
"learning_rate": 1.4456946039035593e-05,
"loss": 0.41516109307607013,
"step": 1692
},
{
"epoch": 1.05644285935804,
"grad_norm": 12.81408405303955,
"learning_rate": 1.4401836969001148e-05,
"loss": 0.5269262790679932,
"step": 1704
},
{
"epoch": 1.0638858737788803,
"grad_norm": 2.8521316051483154,
"learning_rate": 1.4346727898966706e-05,
"loss": 0.45834481716156006,
"step": 1716
},
{
"epoch": 1.071328888199721,
"grad_norm": 5.517307758331299,
"learning_rate": 1.4291618828932263e-05,
"loss": 0.573523203531901,
"step": 1728
},
{
"epoch": 1.0787719026205613,
"grad_norm": 2.4321818351745605,
"learning_rate": 1.423650975889782e-05,
"loss": 0.4625085194905599,
"step": 1740
},
{
"epoch": 1.0862149170414017,
"grad_norm": 15.532980918884277,
"learning_rate": 1.4181400688863376e-05,
"loss": 0.5057009855906168,
"step": 1752
},
{
"epoch": 1.093657931462242,
"grad_norm": 4.501278877258301,
"learning_rate": 1.4126291618828933e-05,
"loss": 0.4823911984761556,
"step": 1764
},
{
"epoch": 1.1011009458830827,
"grad_norm": 6.726215362548828,
"learning_rate": 1.407118254879449e-05,
"loss": 0.42187273502349854,
"step": 1776
},
{
"epoch": 1.108543960303923,
"grad_norm": 14.170055389404297,
"learning_rate": 1.4016073478760046e-05,
"loss": 0.5301618576049805,
"step": 1788
},
{
"epoch": 1.1159869747247635,
"grad_norm": 2.776092767715454,
"learning_rate": 1.3960964408725603e-05,
"loss": 0.4935903151830037,
"step": 1800
},
{
"epoch": 1.1234299891456039,
"grad_norm": 7.334898948669434,
"learning_rate": 1.390585533869116e-05,
"loss": 0.5331637859344482,
"step": 1812
},
{
"epoch": 1.1308730035664445,
"grad_norm": 4.995052337646484,
"learning_rate": 1.3850746268656718e-05,
"loss": 0.4663925568262736,
"step": 1824
},
{
"epoch": 1.1383160179872849,
"grad_norm": 9.281367301940918,
"learning_rate": 1.3795637198622274e-05,
"loss": 0.44923396905263263,
"step": 1836
},
{
"epoch": 1.1457590324081253,
"grad_norm": 5.095090866088867,
"learning_rate": 1.3740528128587831e-05,
"loss": 0.5650514364242554,
"step": 1848
},
{
"epoch": 1.1532020468289657,
"grad_norm": 2.299600839614868,
"learning_rate": 1.3685419058553388e-05,
"loss": 0.48252185185750324,
"step": 1860
},
{
"epoch": 1.1606450612498063,
"grad_norm": 6.702273368835449,
"learning_rate": 1.3630309988518944e-05,
"loss": 0.5192966063817342,
"step": 1872
},
{
"epoch": 1.1680880756706467,
"grad_norm": 10.89989948272705,
"learning_rate": 1.3575200918484501e-05,
"loss": 0.48262282212575275,
"step": 1884
},
{
"epoch": 1.175531090091487,
"grad_norm": 15.075289726257324,
"learning_rate": 1.3520091848450059e-05,
"loss": 0.45538806915283203,
"step": 1896
},
{
"epoch": 1.1829741045123274,
"grad_norm": 3.0880722999572754,
"learning_rate": 1.3464982778415616e-05,
"loss": 0.46872226397196454,
"step": 1908
},
{
"epoch": 1.1904171189331678,
"grad_norm": 8.533724784851074,
"learning_rate": 1.3409873708381172e-05,
"loss": 0.4827297528584798,
"step": 1920
},
{
"epoch": 1.1978601333540084,
"grad_norm": 3.070657968521118,
"learning_rate": 1.3354764638346729e-05,
"loss": 0.48583118120829266,
"step": 1932
},
{
"epoch": 1.2053031477748488,
"grad_norm": 3.7270054817199707,
"learning_rate": 1.3299655568312286e-05,
"loss": 0.505421002705892,
"step": 1944
},
{
"epoch": 1.2127461621956892,
"grad_norm": 9.997303009033203,
"learning_rate": 1.3244546498277842e-05,
"loss": 0.4140005111694336,
"step": 1956
},
{
"epoch": 1.2201891766165298,
"grad_norm": 11.578160285949707,
"learning_rate": 1.31894374282434e-05,
"loss": 0.44274091720581055,
"step": 1968
},
{
"epoch": 1.2276321910373702,
"grad_norm": 9.199183464050293,
"learning_rate": 1.3134328358208957e-05,
"loss": 0.5600036780039469,
"step": 1980
},
{
"epoch": 1.2350752054582106,
"grad_norm": 7.212144374847412,
"learning_rate": 1.3079219288174514e-05,
"loss": 0.494090994199117,
"step": 1992
},
{
"epoch": 1.242518219879051,
"grad_norm": 3.4123635292053223,
"learning_rate": 1.302411021814007e-05,
"loss": 0.4909547170003255,
"step": 2004
},
{
"epoch": 1.2499612342998914,
"grad_norm": 7.941708087921143,
"learning_rate": 1.2969001148105627e-05,
"loss": 0.47832663853963214,
"step": 2016
},
{
"epoch": 1.257404248720732,
"grad_norm": 2.4799387454986572,
"learning_rate": 1.2913892078071184e-05,
"loss": 0.49106045564015705,
"step": 2028
},
{
"epoch": 1.2648472631415724,
"grad_norm": 5.136545658111572,
"learning_rate": 1.2858783008036742e-05,
"loss": 0.4738738536834717,
"step": 2040
},
{
"epoch": 1.2722902775624128,
"grad_norm": 4.9489240646362305,
"learning_rate": 1.2803673938002297e-05,
"loss": 0.4953068097432454,
"step": 2052
},
{
"epoch": 1.2797332919832531,
"grad_norm": 6.822914123535156,
"learning_rate": 1.2748564867967855e-05,
"loss": 0.46026841799418133,
"step": 2064
},
{
"epoch": 1.2871763064040938,
"grad_norm": 6.177013874053955,
"learning_rate": 1.2693455797933412e-05,
"loss": 0.494237224260966,
"step": 2076
},
{
"epoch": 1.2946193208249341,
"grad_norm": 2.4243626594543457,
"learning_rate": 1.2638346727898967e-05,
"loss": 0.5003351370493571,
"step": 2088
},
{
"epoch": 1.3020623352457745,
"grad_norm": 18.99603843688965,
"learning_rate": 1.2583237657864525e-05,
"loss": 0.5109163920084635,
"step": 2100
},
{
"epoch": 1.309505349666615,
"grad_norm": 2.4371707439422607,
"learning_rate": 1.2528128587830082e-05,
"loss": 0.41310568650563556,
"step": 2112
},
{
"epoch": 1.3169483640874553,
"grad_norm": 3.7665302753448486,
"learning_rate": 1.247301951779564e-05,
"loss": 0.45848862330118817,
"step": 2124
},
{
"epoch": 1.324391378508296,
"grad_norm": 12.537642478942871,
"learning_rate": 1.2417910447761195e-05,
"loss": 0.523716410001119,
"step": 2136
},
{
"epoch": 1.3318343929291363,
"grad_norm": 2.882084846496582,
"learning_rate": 1.2362801377726752e-05,
"loss": 0.47608526547749835,
"step": 2148
},
{
"epoch": 1.3392774073499767,
"grad_norm": 2.86336612701416,
"learning_rate": 1.230769230769231e-05,
"loss": 0.4347230593363444,
"step": 2160
},
{
"epoch": 1.3467204217708173,
"grad_norm": 3.1628830432891846,
"learning_rate": 1.2252583237657865e-05,
"loss": 0.46674203872680664,
"step": 2172
},
{
"epoch": 1.3541634361916577,
"grad_norm": 11.767653465270996,
"learning_rate": 1.2197474167623423e-05,
"loss": 0.47306569417317706,
"step": 2184
},
{
"epoch": 1.361606450612498,
"grad_norm": 11.81271743774414,
"learning_rate": 1.214236509758898e-05,
"loss": 0.4672517776489258,
"step": 2196
},
{
"epoch": 1.3690494650333385,
"grad_norm": 3.6157212257385254,
"learning_rate": 1.2087256027554537e-05,
"loss": 0.4465065797170003,
"step": 2208
},
{
"epoch": 1.3764924794541789,
"grad_norm": 3.778449773788452,
"learning_rate": 1.2032146957520093e-05,
"loss": 0.5149937868118286,
"step": 2220
},
{
"epoch": 1.3839354938750195,
"grad_norm": 2.5120906829833984,
"learning_rate": 1.197703788748565e-05,
"loss": 0.45879046122233075,
"step": 2232
},
{
"epoch": 1.3913785082958599,
"grad_norm": 17.704999923706055,
"learning_rate": 1.1921928817451208e-05,
"loss": 0.5167669057846069,
"step": 2244
},
{
"epoch": 1.3988215227167002,
"grad_norm": 11.8012113571167,
"learning_rate": 1.1866819747416762e-05,
"loss": 0.496524175008138,
"step": 2256
},
{
"epoch": 1.4062645371375406,
"grad_norm": 13.236916542053223,
"learning_rate": 1.181171067738232e-05,
"loss": 0.47164463996887207,
"step": 2268
},
{
"epoch": 1.4137075515583812,
"grad_norm": 3.6107146739959717,
"learning_rate": 1.1756601607347878e-05,
"loss": 0.4411802689234416,
"step": 2280
},
{
"epoch": 1.4211505659792216,
"grad_norm": 3.5400538444519043,
"learning_rate": 1.1701492537313435e-05,
"loss": 0.44078512986501056,
"step": 2292
},
{
"epoch": 1.428593580400062,
"grad_norm": 3.386744260787964,
"learning_rate": 1.164638346727899e-05,
"loss": 0.44522058963775635,
"step": 2304
},
{
"epoch": 1.4360365948209024,
"grad_norm": 7.451818466186523,
"learning_rate": 1.1591274397244548e-05,
"loss": 0.4643220106760661,
"step": 2316
},
{
"epoch": 1.4434796092417428,
"grad_norm": 3.741562843322754,
"learning_rate": 1.1536165327210106e-05,
"loss": 0.4557652473449707,
"step": 2328
},
{
"epoch": 1.4509226236625834,
"grad_norm": 2.767171621322632,
"learning_rate": 1.148105625717566e-05,
"loss": 0.4677225748697917,
"step": 2340
},
{
"epoch": 1.4583656380834238,
"grad_norm": 5.696690559387207,
"learning_rate": 1.1425947187141217e-05,
"loss": 0.42428747812906903,
"step": 2352
},
{
"epoch": 1.4658086525042642,
"grad_norm": 6.44115686416626,
"learning_rate": 1.1370838117106774e-05,
"loss": 0.4969560702641805,
"step": 2364
},
{
"epoch": 1.4732516669251048,
"grad_norm": 6.7684831619262695,
"learning_rate": 1.1315729047072333e-05,
"loss": 0.5301390091578165,
"step": 2376
},
{
"epoch": 1.4806946813459452,
"grad_norm": 2.761455774307251,
"learning_rate": 1.1260619977037887e-05,
"loss": 0.4755421082178752,
"step": 2388
},
{
"epoch": 1.4881376957667856,
"grad_norm": 7.615389347076416,
"learning_rate": 1.1205510907003444e-05,
"loss": 0.4676011800765991,
"step": 2400
},
{
"epoch": 1.495580710187626,
"grad_norm": 3.118619680404663,
"learning_rate": 1.1150401836969002e-05,
"loss": 0.4575995206832886,
"step": 2412
},
{
"epoch": 1.5030237246084663,
"grad_norm": 4.179815769195557,
"learning_rate": 1.1095292766934557e-05,
"loss": 0.5326940615971884,
"step": 2424
},
{
"epoch": 1.5104667390293067,
"grad_norm": 3.128330945968628,
"learning_rate": 1.1040183696900115e-05,
"loss": 0.45927361647288006,
"step": 2436
},
{
"epoch": 1.5179097534501473,
"grad_norm": 3.6722943782806396,
"learning_rate": 1.0985074626865672e-05,
"loss": 0.5232657591501871,
"step": 2448
},
{
"epoch": 1.5253527678709877,
"grad_norm": 8.696102142333984,
"learning_rate": 1.092996555683123e-05,
"loss": 0.5253320535024008,
"step": 2460
},
{
"epoch": 1.5327957822918283,
"grad_norm": 6.030095100402832,
"learning_rate": 1.0874856486796785e-05,
"loss": 0.4725768566131592,
"step": 2472
},
{
"epoch": 1.5402387967126687,
"grad_norm": 8.892803192138672,
"learning_rate": 1.0819747416762342e-05,
"loss": 0.44700531164805096,
"step": 2484
},
{
"epoch": 1.5476818111335091,
"grad_norm": 15.271442413330078,
"learning_rate": 1.07646383467279e-05,
"loss": 0.44845902919769287,
"step": 2496
},
{
"epoch": 1.5551248255543495,
"grad_norm": 5.234111785888672,
"learning_rate": 1.0709529276693457e-05,
"loss": 0.5186563730239868,
"step": 2508
},
{
"epoch": 1.5625678399751899,
"grad_norm": 6.541170597076416,
"learning_rate": 1.0654420206659013e-05,
"loss": 0.4690740505854289,
"step": 2520
},
{
"epoch": 1.5700108543960303,
"grad_norm": 2.7548892498016357,
"learning_rate": 1.059931113662457e-05,
"loss": 0.43329620361328125,
"step": 2532
},
{
"epoch": 1.5774538688168709,
"grad_norm": 12.428861618041992,
"learning_rate": 1.0544202066590127e-05,
"loss": 0.43588805198669434,
"step": 2544
},
{
"epoch": 1.5848968832377113,
"grad_norm": 9.76059627532959,
"learning_rate": 1.0489092996555683e-05,
"loss": 0.4283796151479085,
"step": 2556
},
{
"epoch": 1.5923398976585517,
"grad_norm": 10.960260391235352,
"learning_rate": 1.043398392652124e-05,
"loss": 0.4565364519755046,
"step": 2568
},
{
"epoch": 1.5997829120793923,
"grad_norm": 6.568747043609619,
"learning_rate": 1.0378874856486798e-05,
"loss": 0.41670429706573486,
"step": 2580
},
{
"epoch": 1.6072259265002327,
"grad_norm": 2.755124568939209,
"learning_rate": 1.0323765786452355e-05,
"loss": 0.4691346486409505,
"step": 2592
},
{
"epoch": 1.614668940921073,
"grad_norm": 21.070772171020508,
"learning_rate": 1.026865671641791e-05,
"loss": 0.4186259905497233,
"step": 2604
},
{
"epoch": 1.6221119553419134,
"grad_norm": 5.272284507751465,
"learning_rate": 1.0213547646383468e-05,
"loss": 0.4942372639973958,
"step": 2616
},
{
"epoch": 1.6295549697627538,
"grad_norm": 8.858941078186035,
"learning_rate": 1.0158438576349025e-05,
"loss": 0.4842514594395955,
"step": 2628
},
{
"epoch": 1.6369979841835942,
"grad_norm": 4.663693428039551,
"learning_rate": 1.010332950631458e-05,
"loss": 0.49429325262705487,
"step": 2640
},
{
"epoch": 1.6444409986044348,
"grad_norm": 14.864917755126953,
"learning_rate": 1.0048220436280138e-05,
"loss": 0.46838700771331787,
"step": 2652
},
{
"epoch": 1.6518840130252752,
"grad_norm": 2.5411393642425537,
"learning_rate": 9.993111366245695e-06,
"loss": 0.4521595239639282,
"step": 2664
},
{
"epoch": 1.6593270274461158,
"grad_norm": 3.005941152572632,
"learning_rate": 9.938002296211253e-06,
"loss": 0.48365652561187744,
"step": 2676
},
{
"epoch": 1.6667700418669562,
"grad_norm": 5.7398552894592285,
"learning_rate": 9.88289322617681e-06,
"loss": 0.4695123831431071,
"step": 2688
},
{
"epoch": 1.6742130562877966,
"grad_norm": 4.946065902709961,
"learning_rate": 9.827784156142366e-06,
"loss": 0.4761979579925537,
"step": 2700
},
{
"epoch": 1.681656070708637,
"grad_norm": 7.703652858734131,
"learning_rate": 9.772675086107923e-06,
"loss": 0.49780480066935223,
"step": 2712
},
{
"epoch": 1.6890990851294774,
"grad_norm": 8.237687110900879,
"learning_rate": 9.71756601607348e-06,
"loss": 0.4623022476832072,
"step": 2724
},
{
"epoch": 1.6965420995503178,
"grad_norm": 2.87007474899292,
"learning_rate": 9.662456946039036e-06,
"loss": 0.41221630573272705,
"step": 2736
},
{
"epoch": 1.7039851139711584,
"grad_norm": 4.247465133666992,
"learning_rate": 9.607347876004593e-06,
"loss": 0.4721166690190633,
"step": 2748
},
{
"epoch": 1.7114281283919988,
"grad_norm": 4.022077560424805,
"learning_rate": 9.552238805970149e-06,
"loss": 0.47880788644154865,
"step": 2760
},
{
"epoch": 1.7188711428128391,
"grad_norm": 5.686273574829102,
"learning_rate": 9.497129735935708e-06,
"loss": 0.486567219098409,
"step": 2772
},
{
"epoch": 1.7263141572336798,
"grad_norm": 4.733608245849609,
"learning_rate": 9.442020665901264e-06,
"loss": 0.4696682294209798,
"step": 2784
},
{
"epoch": 1.7337571716545201,
"grad_norm": 3.8102357387542725,
"learning_rate": 9.38691159586682e-06,
"loss": 0.4944278796513875,
"step": 2796
},
{
"epoch": 1.7412001860753605,
"grad_norm": 5.343743801116943,
"learning_rate": 9.331802525832377e-06,
"loss": 0.45073699951171875,
"step": 2808
},
{
"epoch": 1.748643200496201,
"grad_norm": 8.939608573913574,
"learning_rate": 9.276693455797934e-06,
"loss": 0.5150019327799479,
"step": 2820
},
{
"epoch": 1.7560862149170413,
"grad_norm": 9.984607696533203,
"learning_rate": 9.221584385763491e-06,
"loss": 0.49051181475321454,
"step": 2832
},
{
"epoch": 1.7635292293378817,
"grad_norm": 4.297845840454102,
"learning_rate": 9.166475315729047e-06,
"loss": 0.43834813435872394,
"step": 2844
},
{
"epoch": 1.7709722437587223,
"grad_norm": 4.738193035125732,
"learning_rate": 9.111366245694604e-06,
"loss": 0.48496174812316895,
"step": 2856
},
{
"epoch": 1.7784152581795627,
"grad_norm": 6.950840473175049,
"learning_rate": 9.056257175660162e-06,
"loss": 0.4803895950317383,
"step": 2868
},
{
"epoch": 1.7858582726004033,
"grad_norm": 2.9567737579345703,
"learning_rate": 9.001148105625719e-06,
"loss": 0.47137478987375897,
"step": 2880
},
{
"epoch": 1.7933012870212437,
"grad_norm": 21.629295349121094,
"learning_rate": 8.946039035591275e-06,
"loss": 0.5382961829503378,
"step": 2892
},
{
"epoch": 1.800744301442084,
"grad_norm": 4.054839611053467,
"learning_rate": 8.890929965556832e-06,
"loss": 0.429937203725179,
"step": 2904
},
{
"epoch": 1.8081873158629245,
"grad_norm": 8.124676704406738,
"learning_rate": 8.83582089552239e-06,
"loss": 0.46343564987182617,
"step": 2916
},
{
"epoch": 1.8156303302837649,
"grad_norm": 6.405475616455078,
"learning_rate": 8.780711825487945e-06,
"loss": 0.47476502259572345,
"step": 2928
},
{
"epoch": 1.8230733447046052,
"grad_norm": 3.4982993602752686,
"learning_rate": 8.725602755453502e-06,
"loss": 0.42661325136820477,
"step": 2940
},
{
"epoch": 1.8305163591254459,
"grad_norm": 5.036385536193848,
"learning_rate": 8.67049368541906e-06,
"loss": 0.42475831508636475,
"step": 2952
},
{
"epoch": 1.8379593735462862,
"grad_norm": 9.453807830810547,
"learning_rate": 8.615384615384617e-06,
"loss": 0.4522843360900879,
"step": 2964
},
{
"epoch": 1.8454023879671266,
"grad_norm": 7.572172164916992,
"learning_rate": 8.560275545350172e-06,
"loss": 0.5405757427215576,
"step": 2976
},
{
"epoch": 1.8528454023879672,
"grad_norm": 3.8509397506713867,
"learning_rate": 8.50516647531573e-06,
"loss": 0.4206368128458659,
"step": 2988
},
{
"epoch": 1.8602884168088076,
"grad_norm": 3.8660781383514404,
"learning_rate": 8.450057405281287e-06,
"loss": 0.4278140465418498,
"step": 3000
},
{
"epoch": 1.867731431229648,
"grad_norm": 13.179638862609863,
"learning_rate": 8.394948335246843e-06,
"loss": 0.45146167278289795,
"step": 3012
},
{
"epoch": 1.8751744456504884,
"grad_norm": 2.5003507137298584,
"learning_rate": 8.3398392652124e-06,
"loss": 0.5010615189870199,
"step": 3024
},
{
"epoch": 1.8826174600713288,
"grad_norm": 6.336158752441406,
"learning_rate": 8.284730195177957e-06,
"loss": 0.48331379890441895,
"step": 3036
},
{
"epoch": 1.8900604744921692,
"grad_norm": 3.9048869609832764,
"learning_rate": 8.229621125143515e-06,
"loss": 0.4964629014333089,
"step": 3048
},
{
"epoch": 1.8975034889130098,
"grad_norm": 4.851749897003174,
"learning_rate": 8.17451205510907e-06,
"loss": 0.4605306386947632,
"step": 3060
},
{
"epoch": 1.9049465033338502,
"grad_norm": 2.5984604358673096,
"learning_rate": 8.119402985074628e-06,
"loss": 0.42377761999766034,
"step": 3072
},
{
"epoch": 1.9123895177546908,
"grad_norm": 14.330255508422852,
"learning_rate": 8.064293915040185e-06,
"loss": 0.4586070378621419,
"step": 3084
},
{
"epoch": 1.9198325321755312,
"grad_norm": 5.363494873046875,
"learning_rate": 8.00918484500574e-06,
"loss": 0.4935295581817627,
"step": 3096
},
{
"epoch": 1.9272755465963716,
"grad_norm": 5.703904151916504,
"learning_rate": 7.954075774971298e-06,
"loss": 0.44021427631378174,
"step": 3108
},
{
"epoch": 1.934718561017212,
"grad_norm": 5.600277423858643,
"learning_rate": 7.898966704936855e-06,
"loss": 0.48560158411661786,
"step": 3120
},
{
"epoch": 1.9421615754380523,
"grad_norm": 11.074832916259766,
"learning_rate": 7.843857634902413e-06,
"loss": 0.4312416712443034,
"step": 3132
},
{
"epoch": 1.9496045898588927,
"grad_norm": 3.4356892108917236,
"learning_rate": 7.788748564867968e-06,
"loss": 0.4442025025685628,
"step": 3144
},
{
"epoch": 1.9570476042797333,
"grad_norm": 3.7474091053009033,
"learning_rate": 7.733639494833526e-06,
"loss": 0.5241368214289347,
"step": 3156
},
{
"epoch": 1.9644906187005737,
"grad_norm": 4.750489234924316,
"learning_rate": 7.678530424799083e-06,
"loss": 0.4401020606358846,
"step": 3168
},
{
"epoch": 1.9719336331214141,
"grad_norm": 22.131851196289062,
"learning_rate": 7.6234213547646386e-06,
"loss": 0.5134913126627604,
"step": 3180
},
{
"epoch": 1.9793766475422547,
"grad_norm": 4.812230587005615,
"learning_rate": 7.568312284730196e-06,
"loss": 0.5479523340861002,
"step": 3192
},
{
"epoch": 1.9868196619630951,
"grad_norm": 6.560222625732422,
"learning_rate": 7.513203214695752e-06,
"loss": 0.4738404353459676,
"step": 3204
},
{
"epoch": 1.9942626763839355,
"grad_norm": 5.240246772766113,
"learning_rate": 7.45809414466131e-06,
"loss": 0.4475013017654419,
"step": 3216
},
{
"epoch": 2.0,
"eval_f1": 0.43079906968624254,
"eval_loss": 0.11952196806669235,
"eval_precision": 0.391528709389682,
"eval_recall": 0.4931553870446119,
"eval_runtime": 585.0453,
"eval_samples_per_second": 66.13,
"eval_steps_per_second": 1.379,
"step": 3226
},
{
"epoch": 2.0012405024034736,
"grad_norm": 4.430677890777588,
"learning_rate": 7.402985074626866e-06,
"loss": 0.4009953737258911,
"step": 3228
},
{
"epoch": 2.008683516824314,
"grad_norm": 10.324471473693848,
"learning_rate": 7.3478760045924235e-06,
"loss": 0.4711928367614746,
"step": 3240
},
{
"epoch": 2.0161265312451544,
"grad_norm": 11.249197006225586,
"learning_rate": 7.29276693455798e-06,
"loss": 0.4341440995534261,
"step": 3252
},
{
"epoch": 2.023569545665995,
"grad_norm": 2.7949812412261963,
"learning_rate": 7.2376578645235365e-06,
"loss": 0.3914073705673218,
"step": 3264
},
{
"epoch": 2.031012560086835,
"grad_norm": 10.501336097717285,
"learning_rate": 7.182548794489094e-06,
"loss": 0.3871670166651408,
"step": 3276
},
{
"epoch": 2.0384555745076756,
"grad_norm": 11.492402076721191,
"learning_rate": 7.12743972445465e-06,
"loss": 0.44295652707417804,
"step": 3288
},
{
"epoch": 2.045898588928516,
"grad_norm": 8.688313484191895,
"learning_rate": 7.072330654420208e-06,
"loss": 0.4092850685119629,
"step": 3300
},
{
"epoch": 2.0533416033493563,
"grad_norm": 5.402098178863525,
"learning_rate": 7.017221584385764e-06,
"loss": 0.41869743665059406,
"step": 3312
},
{
"epoch": 2.0607846177701967,
"grad_norm": 3.6429481506347656,
"learning_rate": 6.962112514351321e-06,
"loss": 0.3916611671447754,
"step": 3324
},
{
"epoch": 2.0682276321910376,
"grad_norm": 4.778937339782715,
"learning_rate": 6.907003444316878e-06,
"loss": 0.3913481632868449,
"step": 3336
},
{
"epoch": 2.075670646611878,
"grad_norm": 4.281859874725342,
"learning_rate": 6.851894374282435e-06,
"loss": 0.380032738049825,
"step": 3348
},
{
"epoch": 2.0831136610327183,
"grad_norm": 7.385513782501221,
"learning_rate": 6.796785304247992e-06,
"loss": 0.3545822699864705,
"step": 3360
},
{
"epoch": 2.0905566754535587,
"grad_norm": 2.9248600006103516,
"learning_rate": 6.741676234213548e-06,
"loss": 0.419588565826416,
"step": 3372
},
{
"epoch": 2.097999689874399,
"grad_norm": 3.0418336391448975,
"learning_rate": 6.6865671641791055e-06,
"loss": 0.4189613262812297,
"step": 3384
},
{
"epoch": 2.1054427042952395,
"grad_norm": 4.628702640533447,
"learning_rate": 6.631458094144662e-06,
"loss": 0.38280495007832843,
"step": 3396
},
{
"epoch": 2.11288571871608,
"grad_norm": 2.931917667388916,
"learning_rate": 6.576349024110219e-06,
"loss": 0.40134119987487793,
"step": 3408
},
{
"epoch": 2.1203287331369203,
"grad_norm": 5.4905853271484375,
"learning_rate": 6.521239954075776e-06,
"loss": 0.3685312271118164,
"step": 3420
},
{
"epoch": 2.1277717475577607,
"grad_norm": 2.9753782749176025,
"learning_rate": 6.466130884041333e-06,
"loss": 0.3878607749938965,
"step": 3432
},
{
"epoch": 2.1352147619786015,
"grad_norm": 7.17921257019043,
"learning_rate": 6.411021814006889e-06,
"loss": 0.41369112332661945,
"step": 3444
},
{
"epoch": 2.142657776399442,
"grad_norm": 13.806902885437012,
"learning_rate": 6.355912743972445e-06,
"loss": 0.43599124749501544,
"step": 3456
},
{
"epoch": 2.1501007908202823,
"grad_norm": 3.4916634559631348,
"learning_rate": 6.3008036739380026e-06,
"loss": 0.3406885862350464,
"step": 3468
},
{
"epoch": 2.1575438052411227,
"grad_norm": 6.193579196929932,
"learning_rate": 6.245694603903559e-06,
"loss": 0.3558163642883301,
"step": 3480
},
{
"epoch": 2.164986819661963,
"grad_norm": 6.37896203994751,
"learning_rate": 6.190585533869116e-06,
"loss": 0.35776766141255695,
"step": 3492
},
{
"epoch": 2.1724298340828034,
"grad_norm": 12.731496810913086,
"learning_rate": 6.135476463834673e-06,
"loss": 0.37972402572631836,
"step": 3504
},
{
"epoch": 2.179872848503644,
"grad_norm": 19.98930549621582,
"learning_rate": 6.08036739380023e-06,
"loss": 0.42111217975616455,
"step": 3516
},
{
"epoch": 2.187315862924484,
"grad_norm": 6.11861515045166,
"learning_rate": 6.025258323765787e-06,
"loss": 0.3672644297281901,
"step": 3528
},
{
"epoch": 2.194758877345325,
"grad_norm": 11.929699897766113,
"learning_rate": 5.970149253731343e-06,
"loss": 0.4023996591567993,
"step": 3540
},
{
"epoch": 2.2022018917661654,
"grad_norm": 17.26346206665039,
"learning_rate": 5.9150401836969005e-06,
"loss": 0.38841597239176434,
"step": 3552
},
{
"epoch": 2.209644906187006,
"grad_norm": 9.183552742004395,
"learning_rate": 5.859931113662457e-06,
"loss": 0.42536401748657227,
"step": 3564
},
{
"epoch": 2.217087920607846,
"grad_norm": 2.3118231296539307,
"learning_rate": 5.804822043628014e-06,
"loss": 0.4157342513402303,
"step": 3576
},
{
"epoch": 2.2245309350286866,
"grad_norm": 6.309724807739258,
"learning_rate": 5.749712973593571e-06,
"loss": 0.4599275191624959,
"step": 3588
},
{
"epoch": 2.231973949449527,
"grad_norm": 2.892469882965088,
"learning_rate": 5.694603903559128e-06,
"loss": 0.441303292910258,
"step": 3600
},
{
"epoch": 2.2394169638703674,
"grad_norm": 6.523403167724609,
"learning_rate": 5.6394948335246846e-06,
"loss": 0.41275028387705487,
"step": 3612
},
{
"epoch": 2.2468599782912078,
"grad_norm": 2.6101267337799072,
"learning_rate": 5.584385763490242e-06,
"loss": 0.41505225499471027,
"step": 3624
},
{
"epoch": 2.2543029927120486,
"grad_norm": 5.343144416809082,
"learning_rate": 5.529276693455798e-06,
"loss": 0.38965781529744464,
"step": 3636
},
{
"epoch": 2.261746007132889,
"grad_norm": 4.3300395011901855,
"learning_rate": 5.474167623421355e-06,
"loss": 0.4278339942296346,
"step": 3648
},
{
"epoch": 2.2691890215537294,
"grad_norm": 5.109958171844482,
"learning_rate": 5.419058553386912e-06,
"loss": 0.366446574529012,
"step": 3660
},
{
"epoch": 2.2766320359745698,
"grad_norm": 3.8399014472961426,
"learning_rate": 5.363949483352469e-06,
"loss": 0.3991047541300456,
"step": 3672
},
{
"epoch": 2.28407505039541,
"grad_norm": 6.625537872314453,
"learning_rate": 5.308840413318026e-06,
"loss": 0.3346426486968994,
"step": 3684
},
{
"epoch": 2.2915180648162505,
"grad_norm": 11.645654678344727,
"learning_rate": 5.2537313432835825e-06,
"loss": 0.3985482454299927,
"step": 3696
},
{
"epoch": 2.298961079237091,
"grad_norm": 5.67885684967041,
"learning_rate": 5.19862227324914e-06,
"loss": 0.3815650939941406,
"step": 3708
},
{
"epoch": 2.3064040936579313,
"grad_norm": 4.548233985900879,
"learning_rate": 5.143513203214696e-06,
"loss": 0.39840646584828693,
"step": 3720
},
{
"epoch": 2.3138471080787717,
"grad_norm": 3.8364691734313965,
"learning_rate": 5.088404133180253e-06,
"loss": 0.4081765413284302,
"step": 3732
},
{
"epoch": 2.3212901224996125,
"grad_norm": 2.5266079902648926,
"learning_rate": 5.03329506314581e-06,
"loss": 0.3613650401433309,
"step": 3744
},
{
"epoch": 2.328733136920453,
"grad_norm": 7.049173831939697,
"learning_rate": 4.9781859931113666e-06,
"loss": 0.4112436771392822,
"step": 3756
},
{
"epoch": 2.3361761513412933,
"grad_norm": 7.23855447769165,
"learning_rate": 4.923076923076924e-06,
"loss": 0.4015626907348633,
"step": 3768
},
{
"epoch": 2.3436191657621337,
"grad_norm": 7.326627731323242,
"learning_rate": 4.86796785304248e-06,
"loss": 0.389956792195638,
"step": 3780
},
{
"epoch": 2.351062180182974,
"grad_norm": 11.426876068115234,
"learning_rate": 4.812858783008037e-06,
"loss": 0.392941157023112,
"step": 3792
},
{
"epoch": 2.3585051946038145,
"grad_norm": 5.058406352996826,
"learning_rate": 4.757749712973594e-06,
"loss": 0.388182799021403,
"step": 3804
},
{
"epoch": 2.365948209024655,
"grad_norm": 7.783097267150879,
"learning_rate": 4.702640642939151e-06,
"loss": 0.4082544247309367,
"step": 3816
},
{
"epoch": 2.3733912234454952,
"grad_norm": 4.8967084884643555,
"learning_rate": 4.647531572904708e-06,
"loss": 0.40780651569366455,
"step": 3828
},
{
"epoch": 2.3808342378663356,
"grad_norm": 5.760252952575684,
"learning_rate": 4.5924225028702645e-06,
"loss": 0.4002196391423543,
"step": 3840
},
{
"epoch": 2.3882772522871765,
"grad_norm": 4.79511022567749,
"learning_rate": 4.537313432835822e-06,
"loss": 0.3828426996866862,
"step": 3852
},
{
"epoch": 2.395720266708017,
"grad_norm": 3.2499914169311523,
"learning_rate": 4.4822043628013774e-06,
"loss": 0.3649975061416626,
"step": 3864
},
{
"epoch": 2.4031632811288572,
"grad_norm": 5.451921463012695,
"learning_rate": 4.427095292766935e-06,
"loss": 0.3998970588048299,
"step": 3876
},
{
"epoch": 2.4106062955496976,
"grad_norm": 3.8105506896972656,
"learning_rate": 4.371986222732491e-06,
"loss": 0.45681726932525635,
"step": 3888
},
{
"epoch": 2.418049309970538,
"grad_norm": 3.690845012664795,
"learning_rate": 4.3168771526980486e-06,
"loss": 0.3797287543614705,
"step": 3900
},
{
"epoch": 2.4254923243913784,
"grad_norm": 12.44582748413086,
"learning_rate": 4.261768082663605e-06,
"loss": 0.47908584276835126,
"step": 3912
},
{
"epoch": 2.432935338812219,
"grad_norm": 3.862395763397217,
"learning_rate": 4.206659012629162e-06,
"loss": 0.4127648671468099,
"step": 3924
},
{
"epoch": 2.4403783532330596,
"grad_norm": 11.71980094909668,
"learning_rate": 4.151549942594719e-06,
"loss": 0.33937788009643555,
"step": 3936
},
{
"epoch": 2.4478213676539,
"grad_norm": 4.254403591156006,
"learning_rate": 4.096440872560276e-06,
"loss": 0.3548990885416667,
"step": 3948
},
{
"epoch": 2.4552643820747404,
"grad_norm": 5.00128173828125,
"learning_rate": 4.041331802525833e-06,
"loss": 0.4270055294036865,
"step": 3960
},
{
"epoch": 2.462707396495581,
"grad_norm": 3.918459892272949,
"learning_rate": 3.986222732491389e-06,
"loss": 0.3760935465494792,
"step": 3972
},
{
"epoch": 2.470150410916421,
"grad_norm": 11.43891716003418,
"learning_rate": 3.9311136624569465e-06,
"loss": 0.4183223644892375,
"step": 3984
},
{
"epoch": 2.4775934253372616,
"grad_norm": 16.374967575073242,
"learning_rate": 3.876004592422503e-06,
"loss": 0.36837557951609295,
"step": 3996
},
{
"epoch": 2.485036439758102,
"grad_norm": 4.490777015686035,
"learning_rate": 3.82089552238806e-06,
"loss": 0.4069160620371501,
"step": 4008
},
{
"epoch": 2.4924794541789423,
"grad_norm": 8.420413970947266,
"learning_rate": 3.7657864523536168e-06,
"loss": 0.4271164337793986,
"step": 4020
},
{
"epoch": 2.4999224685997827,
"grad_norm": 8.309126853942871,
"learning_rate": 3.7106773823191737e-06,
"loss": 0.3547343810399373,
"step": 4032
},
{
"epoch": 2.5073654830206236,
"grad_norm": 14.98065185546875,
"learning_rate": 3.6555683122847306e-06,
"loss": 0.40314682324727374,
"step": 4044
},
{
"epoch": 2.514808497441464,
"grad_norm": 16.558191299438477,
"learning_rate": 3.600459242250287e-06,
"loss": 0.36269084612528485,
"step": 4056
},
{
"epoch": 2.5222515118623043,
"grad_norm": 6.547549724578857,
"learning_rate": 3.545350172215844e-06,
"loss": 0.36424537499745685,
"step": 4068
},
{
"epoch": 2.5296945262831447,
"grad_norm": 4.773808002471924,
"learning_rate": 3.490241102181401e-06,
"loss": 0.37531224886576336,
"step": 4080
},
{
"epoch": 2.537137540703985,
"grad_norm": 4.01258659362793,
"learning_rate": 3.4351320321469578e-06,
"loss": 0.36545733610788983,
"step": 4092
},
{
"epoch": 2.5445805551248255,
"grad_norm": 10.372180938720703,
"learning_rate": 3.3800229621125147e-06,
"loss": 0.4671864112218221,
"step": 4104
},
{
"epoch": 2.552023569545666,
"grad_norm": 3.3598952293395996,
"learning_rate": 3.3249138920780716e-06,
"loss": 0.3458172082901001,
"step": 4116
},
{
"epoch": 2.5594665839665063,
"grad_norm": 11.469687461853027,
"learning_rate": 3.2698048220436285e-06,
"loss": 0.39522536595662433,
"step": 4128
},
{
"epoch": 2.5669095983873467,
"grad_norm": 3.848041534423828,
"learning_rate": 3.2146957520091854e-06,
"loss": 0.41400329271952313,
"step": 4140
},
{
"epoch": 2.5743526128081875,
"grad_norm": 4.791919231414795,
"learning_rate": 3.159586681974742e-06,
"loss": 0.393940011660258,
"step": 4152
},
{
"epoch": 2.581795627229028,
"grad_norm": 21.486618041992188,
"learning_rate": 3.1044776119402988e-06,
"loss": 0.4398730993270874,
"step": 4164
},
{
"epoch": 2.5892386416498683,
"grad_norm": 5.638022422790527,
"learning_rate": 3.0493685419058557e-06,
"loss": 0.3547349770863851,
"step": 4176
},
{
"epoch": 2.5966816560707087,
"grad_norm": 7.414913177490234,
"learning_rate": 2.9942594718714126e-06,
"loss": 0.38705146312713623,
"step": 4188
},
{
"epoch": 2.604124670491549,
"grad_norm": 6.696681976318359,
"learning_rate": 2.9391504018369695e-06,
"loss": 0.36440642674763996,
"step": 4200
},
{
"epoch": 2.6115676849123894,
"grad_norm": 4.02039098739624,
"learning_rate": 2.8840413318025264e-06,
"loss": 0.39015217622121173,
"step": 4212
},
{
"epoch": 2.61901069933323,
"grad_norm": 3.370777130126953,
"learning_rate": 2.8289322617680833e-06,
"loss": 0.4275425275166829,
"step": 4224
},
{
"epoch": 2.6264537137540707,
"grad_norm": 8.47400951385498,
"learning_rate": 2.7738231917336393e-06,
"loss": 0.3559015194574992,
"step": 4236
},
{
"epoch": 2.6338967281749106,
"grad_norm": 11.06500244140625,
"learning_rate": 2.7187141216991963e-06,
"loss": 0.3683815002441406,
"step": 4248
},
{
"epoch": 2.6413397425957514,
"grad_norm": 3.4861528873443604,
"learning_rate": 2.663605051664753e-06,
"loss": 0.44681187470753986,
"step": 4260
},
{
"epoch": 2.648782757016592,
"grad_norm": 10.642603874206543,
"learning_rate": 2.60849598163031e-06,
"loss": 0.4434703588485718,
"step": 4272
},
{
"epoch": 2.656225771437432,
"grad_norm": 2.501110315322876,
"learning_rate": 2.553386911595867e-06,
"loss": 0.3525495131810506,
"step": 4284
},
{
"epoch": 2.6636687858582726,
"grad_norm": 5.691764831542969,
"learning_rate": 2.498277841561424e-06,
"loss": 0.3853313128153483,
"step": 4296
},
{
"epoch": 2.671111800279113,
"grad_norm": 4.1908135414123535,
"learning_rate": 2.4431687715269808e-06,
"loss": 0.38127346833546955,
"step": 4308
},
{
"epoch": 2.6785548146999534,
"grad_norm": 9.538026809692383,
"learning_rate": 2.3880597014925373e-06,
"loss": 0.39995817343393963,
"step": 4320
},
{
"epoch": 2.6859978291207938,
"grad_norm": 8.436595916748047,
"learning_rate": 2.332950631458094e-06,
"loss": 0.3635564645131429,
"step": 4332
},
{
"epoch": 2.6934408435416346,
"grad_norm": 2.5905513763427734,
"learning_rate": 2.277841561423651e-06,
"loss": 0.46339670817057294,
"step": 4344
},
{
"epoch": 2.7008838579624745,
"grad_norm": 6.738951206207275,
"learning_rate": 2.222732491389208e-06,
"loss": 0.3373739719390869,
"step": 4356
},
{
"epoch": 2.7083268723833154,
"grad_norm": 5.625753402709961,
"learning_rate": 2.167623421354765e-06,
"loss": 0.3713107109069824,
"step": 4368
},
{
"epoch": 2.7157698868041558,
"grad_norm": 3.6908581256866455,
"learning_rate": 2.1125143513203218e-06,
"loss": 0.3845006227493286,
"step": 4380
},
{
"epoch": 2.723212901224996,
"grad_norm": 5.123325824737549,
"learning_rate": 2.0574052812858787e-06,
"loss": 0.3693963686625163,
"step": 4392
},
{
"epoch": 2.7306559156458365,
"grad_norm": 5.600500583648682,
"learning_rate": 2.002296211251435e-06,
"loss": 0.4005578358968099,
"step": 4404
},
{
"epoch": 2.738098930066677,
"grad_norm": 4.9075775146484375,
"learning_rate": 1.947187141216992e-06,
"loss": 0.44304617245992023,
"step": 4416
},
{
"epoch": 2.7455419444875173,
"grad_norm": 2.535568952560425,
"learning_rate": 1.892078071182549e-06,
"loss": 0.36018415292104083,
"step": 4428
},
{
"epoch": 2.7529849589083577,
"grad_norm": 3.863154888153076,
"learning_rate": 1.8369690011481059e-06,
"loss": 0.3833086093266805,
"step": 4440
},
{
"epoch": 2.7604279733291985,
"grad_norm": 3.38565731048584,
"learning_rate": 1.7818599311136626e-06,
"loss": 0.36296629905700684,
"step": 4452
},
{
"epoch": 2.767870987750039,
"grad_norm": 3.979094982147217,
"learning_rate": 1.7267508610792195e-06,
"loss": 0.400799036026001,
"step": 4464
},
{
"epoch": 2.7753140021708793,
"grad_norm": 3.6006662845611572,
"learning_rate": 1.6716417910447764e-06,
"loss": 0.38404210408528644,
"step": 4476
},
{
"epoch": 2.7827570165917197,
"grad_norm": 9.927759170532227,
"learning_rate": 1.6165327210103333e-06,
"loss": 0.47922762235005695,
"step": 4488
},
{
"epoch": 2.79020003101256,
"grad_norm": 4.767171859741211,
"learning_rate": 1.5614236509758898e-06,
"loss": 0.40151556332906085,
"step": 4500
},
{
"epoch": 2.7976430454334005,
"grad_norm": 5.649435043334961,
"learning_rate": 1.5063145809414467e-06,
"loss": 0.3603046735127767,
"step": 4512
},
{
"epoch": 2.805086059854241,
"grad_norm": 11.296677589416504,
"learning_rate": 1.4512055109070036e-06,
"loss": 0.38084761301676434,
"step": 4524
},
{
"epoch": 2.8125290742750813,
"grad_norm": 2.71022629737854,
"learning_rate": 1.3960964408725605e-06,
"loss": 0.3726603190104167,
"step": 4536
},
{
"epoch": 2.8199720886959216,
"grad_norm": 3.849479913711548,
"learning_rate": 1.3409873708381172e-06,
"loss": 0.3995700279871623,
"step": 4548
},
{
"epoch": 2.8274151031167625,
"grad_norm": 14.668109893798828,
"learning_rate": 1.285878300803674e-06,
"loss": 0.39227835337320965,
"step": 4560
},
{
"epoch": 2.834858117537603,
"grad_norm": 3.9545083045959473,
"learning_rate": 1.230769230769231e-06,
"loss": 0.42009902000427246,
"step": 4572
},
{
"epoch": 2.8423011319584432,
"grad_norm": 5.8148298263549805,
"learning_rate": 1.1756601607347877e-06,
"loss": 0.39560989538828534,
"step": 4584
},
{
"epoch": 2.8497441463792836,
"grad_norm": 6.249505996704102,
"learning_rate": 1.1205510907003444e-06,
"loss": 0.42494750022888184,
"step": 4596
},
{
"epoch": 2.857187160800124,
"grad_norm": 4.1339921951293945,
"learning_rate": 1.0654420206659013e-06,
"loss": 0.5030697584152222,
"step": 4608
},
{
"epoch": 2.8646301752209644,
"grad_norm": 13.68895435333252,
"learning_rate": 1.0103329506314582e-06,
"loss": 0.36397520701090497,
"step": 4620
},
{
"epoch": 2.872073189641805,
"grad_norm": 2.826042890548706,
"learning_rate": 9.55223880597015e-07,
"loss": 0.3502591848373413,
"step": 4632
},
{
"epoch": 2.8795162040626456,
"grad_norm": 6.833806991577148,
"learning_rate": 9.001148105625718e-07,
"loss": 0.3613890012105306,
"step": 4644
},
{
"epoch": 2.8869592184834856,
"grad_norm": 4.942678451538086,
"learning_rate": 8.450057405281287e-07,
"loss": 0.39194099108378094,
"step": 4656
},
{
"epoch": 2.8944022329043264,
"grad_norm": 4.509676456451416,
"learning_rate": 7.898966704936855e-07,
"loss": 0.351750651995341,
"step": 4668
},
{
"epoch": 2.901845247325167,
"grad_norm": 8.305526733398438,
"learning_rate": 7.347876004592424e-07,
"loss": 0.40360478560129803,
"step": 4680
},
{
"epoch": 2.909288261746007,
"grad_norm": 4.9328765869140625,
"learning_rate": 6.796785304247991e-07,
"loss": 0.33100277185440063,
"step": 4692
},
{
"epoch": 2.9167312761668476,
"grad_norm": 4.945671558380127,
"learning_rate": 6.24569460390356e-07,
"loss": 0.39974749088287354,
"step": 4704
},
{
"epoch": 2.924174290587688,
"grad_norm": 9.925528526306152,
"learning_rate": 5.694603903559128e-07,
"loss": 0.4116141001383464,
"step": 4716
},
{
"epoch": 2.9316173050085284,
"grad_norm": 4.063233375549316,
"learning_rate": 5.143513203214697e-07,
"loss": 0.3659325838088989,
"step": 4728
},
{
"epoch": 2.9390603194293687,
"grad_norm": 3.5343589782714844,
"learning_rate": 4.5924225028702647e-07,
"loss": 0.3983626365661621,
"step": 4740
},
{
"epoch": 2.9465033338502096,
"grad_norm": 6.534095764160156,
"learning_rate": 4.041331802525833e-07,
"loss": 0.393149733543396,
"step": 4752
},
{
"epoch": 2.9539463482710495,
"grad_norm": 3.4787096977233887,
"learning_rate": 3.490241102181401e-07,
"loss": 0.3340187867482503,
"step": 4764
},
{
"epoch": 2.9613893626918903,
"grad_norm": 5.42100191116333,
"learning_rate": 2.939150401836969e-07,
"loss": 0.3814918597539266,
"step": 4776
},
{
"epoch": 2.9688323771127307,
"grad_norm": 4.148738861083984,
"learning_rate": 2.3880597014925377e-07,
"loss": 0.4039960702260335,
"step": 4788
},
{
"epoch": 2.976275391533571,
"grad_norm": 4.3285746574401855,
"learning_rate": 1.836969001148106e-07,
"loss": 0.34236987431844074,
"step": 4800
},
{
"epoch": 2.9837184059544115,
"grad_norm": 2.8112664222717285,
"learning_rate": 1.2858783008036742e-07,
"loss": 0.3349067767461141,
"step": 4812
},
{
"epoch": 2.991161420375252,
"grad_norm": 4.724297523498535,
"learning_rate": 7.347876004592423e-08,
"loss": 0.38507378101348877,
"step": 4824
},
{
"epoch": 2.9986044347960923,
"grad_norm": 7.1218132972717285,
"learning_rate": 1.8369690011481057e-08,
"loss": 0.34174474080403644,
"step": 4836
},
{
"epoch": 3.0,
"eval_f1": 0.43726749573500223,
"eval_loss": 0.12126699090003967,
"eval_precision": 0.4012637195169362,
"eval_recall": 0.4913673269074057,
"eval_runtime": 522.9994,
"eval_samples_per_second": 73.975,
"eval_steps_per_second": 1.543,
"step": 4839
}
],
"logging_steps": 12,
"max_steps": 4839,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2215684896188826e+17,
"train_batch_size": 48,
"trial_name": null,
"trial_params": null
}