{ "best_global_step": 4839, "best_metric": 0.43726749573500223, "best_model_checkpoint": "constellation_one_text/checkpoint-4839", "epoch": 3.0, "eval_steps": 500, "global_step": 4839, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00744301442084044, "grad_norm": 16.79511070251465, "learning_rate": 4.5454545454545457e-07, "loss": 5.104981422424316, "step": 12 }, { "epoch": 0.01488602884168088, "grad_norm": 14.2466402053833, "learning_rate": 9.50413223140496e-07, "loss": 4.855861345926921, "step": 24 }, { "epoch": 0.02232904326252132, "grad_norm": 15.325632095336914, "learning_rate": 1.4462809917355372e-06, "loss": 4.62240473429362, "step": 36 }, { "epoch": 0.02977205768336176, "grad_norm": 14.659135818481445, "learning_rate": 1.9421487603305786e-06, "loss": 4.279359499613444, "step": 48 }, { "epoch": 0.037215072104202204, "grad_norm": 11.83539867401123, "learning_rate": 2.43801652892562e-06, "loss": 4.009869893391927, "step": 60 }, { "epoch": 0.04465808652504264, "grad_norm": 12.459957122802734, "learning_rate": 2.9338842975206615e-06, "loss": 3.658400217692057, "step": 72 }, { "epoch": 0.052101100945883085, "grad_norm": 10.960445404052734, "learning_rate": 3.429752066115703e-06, "loss": 3.3341188430786133, "step": 84 }, { "epoch": 0.05954411536672352, "grad_norm": 10.394844055175781, "learning_rate": 3.925619834710744e-06, "loss": 2.9902642567952475, "step": 96 }, { "epoch": 0.06698712978756396, "grad_norm": 10.080375671386719, "learning_rate": 4.421487603305786e-06, "loss": 2.7187296549479165, "step": 108 }, { "epoch": 0.07443014420840441, "grad_norm": 11.83609676361084, "learning_rate": 4.917355371900827e-06, "loss": 2.4078760147094727, "step": 120 }, { "epoch": 0.08187315862924484, "grad_norm": 20.040725708007812, "learning_rate": 5.413223140495868e-06, "loss": 2.1843010584513345, "step": 132 }, { "epoch": 0.08931617305008528, "grad_norm": 10.70347785949707, "learning_rate": 5.90909090909091e-06, "loss": 1.9951588312784831, "step": 144 }, { "epoch": 0.09675918747092573, "grad_norm": 13.84825611114502, "learning_rate": 6.404958677685951e-06, "loss": 1.7978707949320476, "step": 156 }, { "epoch": 0.10420220189176617, "grad_norm": 8.921030044555664, "learning_rate": 6.900826446280993e-06, "loss": 1.6856780052185059, "step": 168 }, { "epoch": 0.1116452163126066, "grad_norm": 7.919989585876465, "learning_rate": 7.396694214876033e-06, "loss": 1.497524897257487, "step": 180 }, { "epoch": 0.11908823073344704, "grad_norm": 15.635968208312988, "learning_rate": 7.892561983471076e-06, "loss": 1.4976633389790852, "step": 192 }, { "epoch": 0.12653124515428749, "grad_norm": 14.213494300842285, "learning_rate": 8.388429752066116e-06, "loss": 1.4405194918314617, "step": 204 }, { "epoch": 0.13397425957512793, "grad_norm": 10.790483474731445, "learning_rate": 8.884297520661158e-06, "loss": 1.2696106433868408, "step": 216 }, { "epoch": 0.14141727399596837, "grad_norm": 14.101875305175781, "learning_rate": 9.3801652892562e-06, "loss": 1.3300576210021973, "step": 228 }, { "epoch": 0.14886028841680882, "grad_norm": 19.911815643310547, "learning_rate": 9.876033057851241e-06, "loss": 1.2497991720835369, "step": 240 }, { "epoch": 0.15630330283764926, "grad_norm": 12.594736099243164, "learning_rate": 1.0371900826446282e-05, "loss": 1.20013427734375, "step": 252 }, { "epoch": 0.16374631725848968, "grad_norm": 10.003790855407715, "learning_rate": 1.0867768595041323e-05, "loss": 1.1903626918792725, "step": 264 }, { "epoch": 0.17118933167933012, "grad_norm": 19.644290924072266, "learning_rate": 1.1363636363636366e-05, "loss": 1.2084464232126872, "step": 276 }, { "epoch": 0.17863234610017056, "grad_norm": 12.33438777923584, "learning_rate": 1.1859504132231406e-05, "loss": 1.1396081447601318, "step": 288 }, { "epoch": 0.186075360521011, "grad_norm": 7.845709800720215, "learning_rate": 1.2355371900826447e-05, "loss": 1.0346049467722576, "step": 300 }, { "epoch": 0.19351837494185145, "grad_norm": 12.355867385864258, "learning_rate": 1.2851239669421488e-05, "loss": 1.0486024220784504, "step": 312 }, { "epoch": 0.2009613893626919, "grad_norm": 9.542502403259277, "learning_rate": 1.3347107438016531e-05, "loss": 1.1321392059326172, "step": 324 }, { "epoch": 0.20840440378353234, "grad_norm": 171.94647216796875, "learning_rate": 1.384297520661157e-05, "loss": 0.9731620152791342, "step": 336 }, { "epoch": 0.21584741820437278, "grad_norm": 14.012189865112305, "learning_rate": 1.4338842975206612e-05, "loss": 0.9310257434844971, "step": 348 }, { "epoch": 0.2232904326252132, "grad_norm": 17.743682861328125, "learning_rate": 1.4834710743801655e-05, "loss": 0.9263285795847574, "step": 360 }, { "epoch": 0.23073344704605364, "grad_norm": 29.65188217163086, "learning_rate": 1.5330578512396693e-05, "loss": 1.0049312114715576, "step": 372 }, { "epoch": 0.23817646146689408, "grad_norm": 16.46782684326172, "learning_rate": 1.5826446280991736e-05, "loss": 1.078270673751831, "step": 384 }, { "epoch": 0.24561947588773453, "grad_norm": 15.282443046569824, "learning_rate": 1.632231404958678e-05, "loss": 0.9908095200856527, "step": 396 }, { "epoch": 0.25306249030857497, "grad_norm": 7.152077674865723, "learning_rate": 1.681818181818182e-05, "loss": 0.8867685794830322, "step": 408 }, { "epoch": 0.2605055047294154, "grad_norm": 17.630233764648438, "learning_rate": 1.731404958677686e-05, "loss": 0.8261091709136963, "step": 420 }, { "epoch": 0.26794851915025586, "grad_norm": 8.756381034851074, "learning_rate": 1.78099173553719e-05, "loss": 0.8141599496205648, "step": 432 }, { "epoch": 0.2753915335710963, "grad_norm": 14.227313041687012, "learning_rate": 1.8305785123966944e-05, "loss": 0.8025492032368978, "step": 444 }, { "epoch": 0.28283454799193675, "grad_norm": 6.028214931488037, "learning_rate": 1.8801652892561987e-05, "loss": 0.827876885732015, "step": 456 }, { "epoch": 0.2902775624127772, "grad_norm": 9.791404724121094, "learning_rate": 1.9297520661157026e-05, "loss": 0.8186439673105875, "step": 468 }, { "epoch": 0.29772057683361763, "grad_norm": 19.028491973876953, "learning_rate": 1.9793388429752066e-05, "loss": 0.8027651309967041, "step": 480 }, { "epoch": 0.3051635912544581, "grad_norm": 5.418436527252197, "learning_rate": 1.996785304247991e-05, "loss": 0.7800490061442057, "step": 492 }, { "epoch": 0.3126066056752985, "grad_norm": 7.598865985870361, "learning_rate": 1.9912743972445466e-05, "loss": 0.7126566569010416, "step": 504 }, { "epoch": 0.3200496200961389, "grad_norm": 7.867424011230469, "learning_rate": 1.9857634902411024e-05, "loss": 0.6536041895548502, "step": 516 }, { "epoch": 0.32749263451697935, "grad_norm": 10.367350578308105, "learning_rate": 1.980252583237658e-05, "loss": 0.8624240557352701, "step": 528 }, { "epoch": 0.3349356489378198, "grad_norm": 6.30031681060791, "learning_rate": 1.9747416762342138e-05, "loss": 0.8412895202636719, "step": 540 }, { "epoch": 0.34237866335866024, "grad_norm": 15.809948921203613, "learning_rate": 1.9692307692307696e-05, "loss": 0.7370687325795492, "step": 552 }, { "epoch": 0.3498216777795007, "grad_norm": 6.0920491218566895, "learning_rate": 1.963719862227325e-05, "loss": 0.7390193144480387, "step": 564 }, { "epoch": 0.3572646922003411, "grad_norm": 11.583715438842773, "learning_rate": 1.9582089552238807e-05, "loss": 0.6651956637700399, "step": 576 }, { "epoch": 0.36470770662118157, "grad_norm": 11.411588668823242, "learning_rate": 1.9526980482204364e-05, "loss": 0.7644002437591553, "step": 588 }, { "epoch": 0.372150721042022, "grad_norm": 8.31484603881836, "learning_rate": 1.947187141216992e-05, "loss": 0.6794478893280029, "step": 600 }, { "epoch": 0.37959373546286246, "grad_norm": 6.703721523284912, "learning_rate": 1.941676234213548e-05, "loss": 0.6266262531280518, "step": 612 }, { "epoch": 0.3870367498837029, "grad_norm": 9.479427337646484, "learning_rate": 1.9361653272101036e-05, "loss": 0.6851427555084229, "step": 624 }, { "epoch": 0.39447976430454335, "grad_norm": 7.663156032562256, "learning_rate": 1.9306544202066593e-05, "loss": 0.6938677628835043, "step": 636 }, { "epoch": 0.4019227787253838, "grad_norm": 4.276080131530762, "learning_rate": 1.9251435132032147e-05, "loss": 0.76728622118632, "step": 648 }, { "epoch": 0.40936579314622423, "grad_norm": 11.622859001159668, "learning_rate": 1.9196326061997705e-05, "loss": 0.7580918471018473, "step": 660 }, { "epoch": 0.4168088075670647, "grad_norm": 13.203335762023926, "learning_rate": 1.9141216991963262e-05, "loss": 0.642679770787557, "step": 672 }, { "epoch": 0.4242518219879051, "grad_norm": 8.963321685791016, "learning_rate": 1.908610792192882e-05, "loss": 0.6361099084218343, "step": 684 }, { "epoch": 0.43169483640874556, "grad_norm": 8.1705904006958, "learning_rate": 1.9030998851894377e-05, "loss": 0.6898341178894043, "step": 696 }, { "epoch": 0.439137850829586, "grad_norm": 3.9877262115478516, "learning_rate": 1.8975889781859934e-05, "loss": 0.6462088028589884, "step": 708 }, { "epoch": 0.4465808652504264, "grad_norm": 12.81478500366211, "learning_rate": 1.892078071182549e-05, "loss": 0.6965091228485107, "step": 720 }, { "epoch": 0.45402387967126684, "grad_norm": 7.810659885406494, "learning_rate": 1.8865671641791045e-05, "loss": 0.7788422902425131, "step": 732 }, { "epoch": 0.4614668940921073, "grad_norm": 4.958326816558838, "learning_rate": 1.8810562571756603e-05, "loss": 0.7460188865661621, "step": 744 }, { "epoch": 0.4689099085129477, "grad_norm": 9.091962814331055, "learning_rate": 1.875545350172216e-05, "loss": 0.6937299569447836, "step": 756 }, { "epoch": 0.47635292293378817, "grad_norm": 7.729589939117432, "learning_rate": 1.8700344431687717e-05, "loss": 0.6188247601191202, "step": 768 }, { "epoch": 0.4837959373546286, "grad_norm": 8.878933906555176, "learning_rate": 1.8645235361653275e-05, "loss": 0.7017858028411865, "step": 780 }, { "epoch": 0.49123895177546906, "grad_norm": 23.914348602294922, "learning_rate": 1.8590126291618832e-05, "loss": 0.7923436164855957, "step": 792 }, { "epoch": 0.4986819661963095, "grad_norm": 10.980387687683105, "learning_rate": 1.853501722158439e-05, "loss": 0.6881453990936279, "step": 804 }, { "epoch": 0.5061249806171499, "grad_norm": 6.988458156585693, "learning_rate": 1.8479908151549943e-05, "loss": 0.683276891708374, "step": 816 }, { "epoch": 0.5135679950379903, "grad_norm": 23.667926788330078, "learning_rate": 1.84247990815155e-05, "loss": 0.6124229431152344, "step": 828 }, { "epoch": 0.5210110094588308, "grad_norm": 7.078935623168945, "learning_rate": 1.8369690011481058e-05, "loss": 0.7043429215749105, "step": 840 }, { "epoch": 0.5284540238796712, "grad_norm": 9.82224178314209, "learning_rate": 1.8314580941446615e-05, "loss": 0.6555114189783732, "step": 852 }, { "epoch": 0.5358970383005117, "grad_norm": 8.077360153198242, "learning_rate": 1.8259471871412173e-05, "loss": 0.6555444002151489, "step": 864 }, { "epoch": 0.5433400527213521, "grad_norm": 3.6762046813964844, "learning_rate": 1.820436280137773e-05, "loss": 0.636172374089559, "step": 876 }, { "epoch": 0.5507830671421926, "grad_norm": 3.8388607501983643, "learning_rate": 1.8149253731343287e-05, "loss": 0.6085333824157715, "step": 888 }, { "epoch": 0.558226081563033, "grad_norm": 3.0353925228118896, "learning_rate": 1.809414466130884e-05, "loss": 0.58968718846639, "step": 900 }, { "epoch": 0.5656690959838735, "grad_norm": 6.465055465698242, "learning_rate": 1.80390355912744e-05, "loss": 0.6078658103942871, "step": 912 }, { "epoch": 0.5731121104047139, "grad_norm": 5.472475528717041, "learning_rate": 1.7983926521239956e-05, "loss": 0.6997927029927572, "step": 924 }, { "epoch": 0.5805551248255544, "grad_norm": 15.40697193145752, "learning_rate": 1.792881745120551e-05, "loss": 0.6386371453603109, "step": 936 }, { "epoch": 0.5879981392463948, "grad_norm": 6.439900875091553, "learning_rate": 1.787370838117107e-05, "loss": 0.6876135667165121, "step": 948 }, { "epoch": 0.5954411536672353, "grad_norm": 10.793220520019531, "learning_rate": 1.7818599311136628e-05, "loss": 0.6237523953119913, "step": 960 }, { "epoch": 0.6028841680880757, "grad_norm": 5.377976417541504, "learning_rate": 1.7763490241102185e-05, "loss": 0.614266554514567, "step": 972 }, { "epoch": 0.6103271825089162, "grad_norm": 7.794371604919434, "learning_rate": 1.770838117106774e-05, "loss": 0.5918615261713663, "step": 984 }, { "epoch": 0.6177701969297565, "grad_norm": 4.7419867515563965, "learning_rate": 1.7653272101033296e-05, "loss": 0.5848552385965983, "step": 996 }, { "epoch": 0.625213211350597, "grad_norm": 14.705470085144043, "learning_rate": 1.7598163030998854e-05, "loss": 0.6608580350875854, "step": 1008 }, { "epoch": 0.6326562257714374, "grad_norm": 6.041922092437744, "learning_rate": 1.754305396096441e-05, "loss": 0.549665609995524, "step": 1020 }, { "epoch": 0.6400992401922778, "grad_norm": 5.13696813583374, "learning_rate": 1.7487944890929965e-05, "loss": 0.7017458279927572, "step": 1032 }, { "epoch": 0.6475422546131183, "grad_norm": 6.016454696655273, "learning_rate": 1.7432835820895522e-05, "loss": 0.6309004227320353, "step": 1044 }, { "epoch": 0.6549852690339587, "grad_norm": 9.331708908081055, "learning_rate": 1.7377726750861083e-05, "loss": 0.6831174691518148, "step": 1056 }, { "epoch": 0.6624282834547992, "grad_norm": 9.878951072692871, "learning_rate": 1.7322617680826637e-05, "loss": 0.6587471961975098, "step": 1068 }, { "epoch": 0.6698712978756396, "grad_norm": 5.033365726470947, "learning_rate": 1.7267508610792194e-05, "loss": 0.6370361646016439, "step": 1080 }, { "epoch": 0.6773143122964801, "grad_norm": 18.762298583984375, "learning_rate": 1.721239954075775e-05, "loss": 0.5823976198832194, "step": 1092 }, { "epoch": 0.6847573267173205, "grad_norm": 2.940394163131714, "learning_rate": 1.715729047072331e-05, "loss": 0.6264007488886515, "step": 1104 }, { "epoch": 0.692200341138161, "grad_norm": 7.621018886566162, "learning_rate": 1.7102181400688863e-05, "loss": 0.5824793974558512, "step": 1116 }, { "epoch": 0.6996433555590014, "grad_norm": 3.141854763031006, "learning_rate": 1.704707233065442e-05, "loss": 0.5842764774958292, "step": 1128 }, { "epoch": 0.7070863699798419, "grad_norm": 5.849940776824951, "learning_rate": 1.6991963260619978e-05, "loss": 0.5304047664006551, "step": 1140 }, { "epoch": 0.7145293844006823, "grad_norm": 7.9883551597595215, "learning_rate": 1.6936854190585535e-05, "loss": 0.5599017937978109, "step": 1152 }, { "epoch": 0.7219723988215228, "grad_norm": 11.370931625366211, "learning_rate": 1.6881745120551092e-05, "loss": 0.5798830588658651, "step": 1164 }, { "epoch": 0.7294154132423631, "grad_norm": 3.5065290927886963, "learning_rate": 1.682663605051665e-05, "loss": 0.6167506376902262, "step": 1176 }, { "epoch": 0.7368584276632036, "grad_norm": 5.930673599243164, "learning_rate": 1.6771526980482207e-05, "loss": 0.5873833497365316, "step": 1188 }, { "epoch": 0.744301442084044, "grad_norm": 6.102614402770996, "learning_rate": 1.671641791044776e-05, "loss": 0.6477183898289999, "step": 1200 }, { "epoch": 0.7517444565048845, "grad_norm": 4.337888717651367, "learning_rate": 1.6661308840413318e-05, "loss": 0.5860347350438436, "step": 1212 }, { "epoch": 0.7591874709257249, "grad_norm": 4.841605186462402, "learning_rate": 1.6606199770378875e-05, "loss": 0.6613442897796631, "step": 1224 }, { "epoch": 0.7666304853465653, "grad_norm": 14.614047050476074, "learning_rate": 1.6551090700344433e-05, "loss": 0.6218246618906657, "step": 1236 }, { "epoch": 0.7740734997674058, "grad_norm": 8.036581039428711, "learning_rate": 1.649598163030999e-05, "loss": 0.5646830002466837, "step": 1248 }, { "epoch": 0.7815165141882462, "grad_norm": 3.943291664123535, "learning_rate": 1.6440872560275547e-05, "loss": 0.6018180449803671, "step": 1260 }, { "epoch": 0.7889595286090867, "grad_norm": 12.51102352142334, "learning_rate": 1.6385763490241105e-05, "loss": 0.6140671968460083, "step": 1272 }, { "epoch": 0.7964025430299271, "grad_norm": 3.718653678894043, "learning_rate": 1.633065442020666e-05, "loss": 0.5359119176864624, "step": 1284 }, { "epoch": 0.8038455574507676, "grad_norm": 2.8353357315063477, "learning_rate": 1.6275545350172216e-05, "loss": 0.502113143603007, "step": 1296 }, { "epoch": 0.811288571871608, "grad_norm": 4.345269203186035, "learning_rate": 1.6220436280137773e-05, "loss": 0.5975545644760132, "step": 1308 }, { "epoch": 0.8187315862924485, "grad_norm": 6.92914342880249, "learning_rate": 1.616532721010333e-05, "loss": 0.6587652762730917, "step": 1320 }, { "epoch": 0.8261746007132889, "grad_norm": 4.188693046569824, "learning_rate": 1.6110218140068888e-05, "loss": 0.6142017841339111, "step": 1332 }, { "epoch": 0.8336176151341294, "grad_norm": 9.596400260925293, "learning_rate": 1.6055109070034445e-05, "loss": 0.5469466845194498, "step": 1344 }, { "epoch": 0.8410606295549697, "grad_norm": 4.810947895050049, "learning_rate": 1.6000000000000003e-05, "loss": 0.5744484265645345, "step": 1356 }, { "epoch": 0.8485036439758102, "grad_norm": 3.5819036960601807, "learning_rate": 1.5944890929965557e-05, "loss": 0.5045839150746664, "step": 1368 }, { "epoch": 0.8559466583966506, "grad_norm": 4.879307746887207, "learning_rate": 1.5889781859931114e-05, "loss": 0.6669184366861979, "step": 1380 }, { "epoch": 0.8633896728174911, "grad_norm": 6.7210693359375, "learning_rate": 1.583467278989667e-05, "loss": 0.5324758291244507, "step": 1392 }, { "epoch": 0.8708326872383315, "grad_norm": 4.653786659240723, "learning_rate": 1.577956371986223e-05, "loss": 0.543891986211141, "step": 1404 }, { "epoch": 0.878275701659172, "grad_norm": 6.386638641357422, "learning_rate": 1.5724454649827786e-05, "loss": 0.5688877105712891, "step": 1416 }, { "epoch": 0.8857187160800124, "grad_norm": 14.5455322265625, "learning_rate": 1.5669345579793343e-05, "loss": 0.5081936915715536, "step": 1428 }, { "epoch": 0.8931617305008528, "grad_norm": 13.621335983276367, "learning_rate": 1.56142365097589e-05, "loss": 0.5466565688451132, "step": 1440 }, { "epoch": 0.9006047449216933, "grad_norm": 7.791660308837891, "learning_rate": 1.5559127439724455e-05, "loss": 0.5543188651402792, "step": 1452 }, { "epoch": 0.9080477593425337, "grad_norm": 4.039332866668701, "learning_rate": 1.5504018369690012e-05, "loss": 0.564227819442749, "step": 1464 }, { "epoch": 0.9154907737633742, "grad_norm": 5.744030475616455, "learning_rate": 1.544890929965557e-05, "loss": 0.5645032723744711, "step": 1476 }, { "epoch": 0.9229337881842146, "grad_norm": 7.17244815826416, "learning_rate": 1.5393800229621126e-05, "loss": 0.6025459369023641, "step": 1488 }, { "epoch": 0.9303768026050551, "grad_norm": 9.460329055786133, "learning_rate": 1.5338691159586684e-05, "loss": 0.5522710482279459, "step": 1500 }, { "epoch": 0.9378198170258955, "grad_norm": 8.257369995117188, "learning_rate": 1.528358208955224e-05, "loss": 0.5696142514546713, "step": 1512 }, { "epoch": 0.945262831446736, "grad_norm": 14.734770774841309, "learning_rate": 1.5228473019517798e-05, "loss": 0.60454261302948, "step": 1524 }, { "epoch": 0.9527058458675763, "grad_norm": 4.352370738983154, "learning_rate": 1.5173363949483352e-05, "loss": 0.48172632853190106, "step": 1536 }, { "epoch": 0.9601488602884168, "grad_norm": 2.388683557510376, "learning_rate": 1.511825487944891e-05, "loss": 0.5889216661453247, "step": 1548 }, { "epoch": 0.9675918747092572, "grad_norm": 9.910285949707031, "learning_rate": 1.5063145809414467e-05, "loss": 0.5621689558029175, "step": 1560 }, { "epoch": 0.9750348891300977, "grad_norm": 5.445796966552734, "learning_rate": 1.5008036739380026e-05, "loss": 0.5526663859685262, "step": 1572 }, { "epoch": 0.9824779035509381, "grad_norm": 5.242825984954834, "learning_rate": 1.495292766934558e-05, "loss": 0.5673882563908895, "step": 1584 }, { "epoch": 0.9899209179717786, "grad_norm": 10.1865234375, "learning_rate": 1.4897818599311137e-05, "loss": 0.5648102362950643, "step": 1596 }, { "epoch": 0.997363932392619, "grad_norm": 5.388990879058838, "learning_rate": 1.4842709529276695e-05, "loss": 0.5376612345377604, "step": 1608 }, { "epoch": 1.0, "eval_f1": 0.4322638779516363, "eval_loss": 0.13781657814979553, "eval_precision": 0.3967545697112817, "eval_recall": 0.4884485429972486, "eval_runtime": 583.7374, "eval_samples_per_second": 66.278, "eval_steps_per_second": 1.382, "step": 1613 }, { "epoch": 1.004341758412157, "grad_norm": 11.303878784179688, "learning_rate": 1.478760045924225e-05, "loss": 0.46324888865152997, "step": 1620 }, { "epoch": 1.0117847728329974, "grad_norm": 4.389431476593018, "learning_rate": 1.4732491389207808e-05, "loss": 0.48095786571502686, "step": 1632 }, { "epoch": 1.0192277872538378, "grad_norm": 2.298799514770508, "learning_rate": 1.4677382319173365e-05, "loss": 0.5406383275985718, "step": 1644 }, { "epoch": 1.0266708016746782, "grad_norm": 4.433741092681885, "learning_rate": 1.4622273249138922e-05, "loss": 0.4697510798772176, "step": 1656 }, { "epoch": 1.0341138160955188, "grad_norm": 4.704965591430664, "learning_rate": 1.4567164179104478e-05, "loss": 0.5180115699768066, "step": 1668 }, { "epoch": 1.0415568305163592, "grad_norm": 5.16159725189209, "learning_rate": 1.4512055109070035e-05, "loss": 0.49386584758758545, "step": 1680 }, { "epoch": 1.0489998449371996, "grad_norm": 2.5488502979278564, "learning_rate": 1.4456946039035593e-05, "loss": 0.41516109307607013, "step": 1692 }, { "epoch": 1.05644285935804, "grad_norm": 12.81408405303955, "learning_rate": 1.4401836969001148e-05, "loss": 0.5269262790679932, "step": 1704 }, { "epoch": 1.0638858737788803, "grad_norm": 2.8521316051483154, "learning_rate": 1.4346727898966706e-05, "loss": 0.45834481716156006, "step": 1716 }, { "epoch": 1.071328888199721, "grad_norm": 5.517307758331299, "learning_rate": 1.4291618828932263e-05, "loss": 0.573523203531901, "step": 1728 }, { "epoch": 1.0787719026205613, "grad_norm": 2.4321818351745605, "learning_rate": 1.423650975889782e-05, "loss": 0.4625085194905599, "step": 1740 }, { "epoch": 1.0862149170414017, "grad_norm": 15.532980918884277, "learning_rate": 1.4181400688863376e-05, "loss": 0.5057009855906168, "step": 1752 }, { "epoch": 1.093657931462242, "grad_norm": 4.501278877258301, "learning_rate": 1.4126291618828933e-05, "loss": 0.4823911984761556, "step": 1764 }, { "epoch": 1.1011009458830827, "grad_norm": 6.726215362548828, "learning_rate": 1.407118254879449e-05, "loss": 0.42187273502349854, "step": 1776 }, { "epoch": 1.108543960303923, "grad_norm": 14.170055389404297, "learning_rate": 1.4016073478760046e-05, "loss": 0.5301618576049805, "step": 1788 }, { "epoch": 1.1159869747247635, "grad_norm": 2.776092767715454, "learning_rate": 1.3960964408725603e-05, "loss": 0.4935903151830037, "step": 1800 }, { "epoch": 1.1234299891456039, "grad_norm": 7.334898948669434, "learning_rate": 1.390585533869116e-05, "loss": 0.5331637859344482, "step": 1812 }, { "epoch": 1.1308730035664445, "grad_norm": 4.995052337646484, "learning_rate": 1.3850746268656718e-05, "loss": 0.4663925568262736, "step": 1824 }, { "epoch": 1.1383160179872849, "grad_norm": 9.281367301940918, "learning_rate": 1.3795637198622274e-05, "loss": 0.44923396905263263, "step": 1836 }, { "epoch": 1.1457590324081253, "grad_norm": 5.095090866088867, "learning_rate": 1.3740528128587831e-05, "loss": 0.5650514364242554, "step": 1848 }, { "epoch": 1.1532020468289657, "grad_norm": 2.299600839614868, "learning_rate": 1.3685419058553388e-05, "loss": 0.48252185185750324, "step": 1860 }, { "epoch": 1.1606450612498063, "grad_norm": 6.702273368835449, "learning_rate": 1.3630309988518944e-05, "loss": 0.5192966063817342, "step": 1872 }, { "epoch": 1.1680880756706467, "grad_norm": 10.89989948272705, "learning_rate": 1.3575200918484501e-05, "loss": 0.48262282212575275, "step": 1884 }, { "epoch": 1.175531090091487, "grad_norm": 15.075289726257324, "learning_rate": 1.3520091848450059e-05, "loss": 0.45538806915283203, "step": 1896 }, { "epoch": 1.1829741045123274, "grad_norm": 3.0880722999572754, "learning_rate": 1.3464982778415616e-05, "loss": 0.46872226397196454, "step": 1908 }, { "epoch": 1.1904171189331678, "grad_norm": 8.533724784851074, "learning_rate": 1.3409873708381172e-05, "loss": 0.4827297528584798, "step": 1920 }, { "epoch": 1.1978601333540084, "grad_norm": 3.070657968521118, "learning_rate": 1.3354764638346729e-05, "loss": 0.48583118120829266, "step": 1932 }, { "epoch": 1.2053031477748488, "grad_norm": 3.7270054817199707, "learning_rate": 1.3299655568312286e-05, "loss": 0.505421002705892, "step": 1944 }, { "epoch": 1.2127461621956892, "grad_norm": 9.997303009033203, "learning_rate": 1.3244546498277842e-05, "loss": 0.4140005111694336, "step": 1956 }, { "epoch": 1.2201891766165298, "grad_norm": 11.578160285949707, "learning_rate": 1.31894374282434e-05, "loss": 0.44274091720581055, "step": 1968 }, { "epoch": 1.2276321910373702, "grad_norm": 9.199183464050293, "learning_rate": 1.3134328358208957e-05, "loss": 0.5600036780039469, "step": 1980 }, { "epoch": 1.2350752054582106, "grad_norm": 7.212144374847412, "learning_rate": 1.3079219288174514e-05, "loss": 0.494090994199117, "step": 1992 }, { "epoch": 1.242518219879051, "grad_norm": 3.4123635292053223, "learning_rate": 1.302411021814007e-05, "loss": 0.4909547170003255, "step": 2004 }, { "epoch": 1.2499612342998914, "grad_norm": 7.941708087921143, "learning_rate": 1.2969001148105627e-05, "loss": 0.47832663853963214, "step": 2016 }, { "epoch": 1.257404248720732, "grad_norm": 2.4799387454986572, "learning_rate": 1.2913892078071184e-05, "loss": 0.49106045564015705, "step": 2028 }, { "epoch": 1.2648472631415724, "grad_norm": 5.136545658111572, "learning_rate": 1.2858783008036742e-05, "loss": 0.4738738536834717, "step": 2040 }, { "epoch": 1.2722902775624128, "grad_norm": 4.9489240646362305, "learning_rate": 1.2803673938002297e-05, "loss": 0.4953068097432454, "step": 2052 }, { "epoch": 1.2797332919832531, "grad_norm": 6.822914123535156, "learning_rate": 1.2748564867967855e-05, "loss": 0.46026841799418133, "step": 2064 }, { "epoch": 1.2871763064040938, "grad_norm": 6.177013874053955, "learning_rate": 1.2693455797933412e-05, "loss": 0.494237224260966, "step": 2076 }, { "epoch": 1.2946193208249341, "grad_norm": 2.4243626594543457, "learning_rate": 1.2638346727898967e-05, "loss": 0.5003351370493571, "step": 2088 }, { "epoch": 1.3020623352457745, "grad_norm": 18.99603843688965, "learning_rate": 1.2583237657864525e-05, "loss": 0.5109163920084635, "step": 2100 }, { "epoch": 1.309505349666615, "grad_norm": 2.4371707439422607, "learning_rate": 1.2528128587830082e-05, "loss": 0.41310568650563556, "step": 2112 }, { "epoch": 1.3169483640874553, "grad_norm": 3.7665302753448486, "learning_rate": 1.247301951779564e-05, "loss": 0.45848862330118817, "step": 2124 }, { "epoch": 1.324391378508296, "grad_norm": 12.537642478942871, "learning_rate": 1.2417910447761195e-05, "loss": 0.523716410001119, "step": 2136 }, { "epoch": 1.3318343929291363, "grad_norm": 2.882084846496582, "learning_rate": 1.2362801377726752e-05, "loss": 0.47608526547749835, "step": 2148 }, { "epoch": 1.3392774073499767, "grad_norm": 2.86336612701416, "learning_rate": 1.230769230769231e-05, "loss": 0.4347230593363444, "step": 2160 }, { "epoch": 1.3467204217708173, "grad_norm": 3.1628830432891846, "learning_rate": 1.2252583237657865e-05, "loss": 0.46674203872680664, "step": 2172 }, { "epoch": 1.3541634361916577, "grad_norm": 11.767653465270996, "learning_rate": 1.2197474167623423e-05, "loss": 0.47306569417317706, "step": 2184 }, { "epoch": 1.361606450612498, "grad_norm": 11.81271743774414, "learning_rate": 1.214236509758898e-05, "loss": 0.4672517776489258, "step": 2196 }, { "epoch": 1.3690494650333385, "grad_norm": 3.6157212257385254, "learning_rate": 1.2087256027554537e-05, "loss": 0.4465065797170003, "step": 2208 }, { "epoch": 1.3764924794541789, "grad_norm": 3.778449773788452, "learning_rate": 1.2032146957520093e-05, "loss": 0.5149937868118286, "step": 2220 }, { "epoch": 1.3839354938750195, "grad_norm": 2.5120906829833984, "learning_rate": 1.197703788748565e-05, "loss": 0.45879046122233075, "step": 2232 }, { "epoch": 1.3913785082958599, "grad_norm": 17.704999923706055, "learning_rate": 1.1921928817451208e-05, "loss": 0.5167669057846069, "step": 2244 }, { "epoch": 1.3988215227167002, "grad_norm": 11.8012113571167, "learning_rate": 1.1866819747416762e-05, "loss": 0.496524175008138, "step": 2256 }, { "epoch": 1.4062645371375406, "grad_norm": 13.236916542053223, "learning_rate": 1.181171067738232e-05, "loss": 0.47164463996887207, "step": 2268 }, { "epoch": 1.4137075515583812, "grad_norm": 3.6107146739959717, "learning_rate": 1.1756601607347878e-05, "loss": 0.4411802689234416, "step": 2280 }, { "epoch": 1.4211505659792216, "grad_norm": 3.5400538444519043, "learning_rate": 1.1701492537313435e-05, "loss": 0.44078512986501056, "step": 2292 }, { "epoch": 1.428593580400062, "grad_norm": 3.386744260787964, "learning_rate": 1.164638346727899e-05, "loss": 0.44522058963775635, "step": 2304 }, { "epoch": 1.4360365948209024, "grad_norm": 7.451818466186523, "learning_rate": 1.1591274397244548e-05, "loss": 0.4643220106760661, "step": 2316 }, { "epoch": 1.4434796092417428, "grad_norm": 3.741562843322754, "learning_rate": 1.1536165327210106e-05, "loss": 0.4557652473449707, "step": 2328 }, { "epoch": 1.4509226236625834, "grad_norm": 2.767171621322632, "learning_rate": 1.148105625717566e-05, "loss": 0.4677225748697917, "step": 2340 }, { "epoch": 1.4583656380834238, "grad_norm": 5.696690559387207, "learning_rate": 1.1425947187141217e-05, "loss": 0.42428747812906903, "step": 2352 }, { "epoch": 1.4658086525042642, "grad_norm": 6.44115686416626, "learning_rate": 1.1370838117106774e-05, "loss": 0.4969560702641805, "step": 2364 }, { "epoch": 1.4732516669251048, "grad_norm": 6.7684831619262695, "learning_rate": 1.1315729047072333e-05, "loss": 0.5301390091578165, "step": 2376 }, { "epoch": 1.4806946813459452, "grad_norm": 2.761455774307251, "learning_rate": 1.1260619977037887e-05, "loss": 0.4755421082178752, "step": 2388 }, { "epoch": 1.4881376957667856, "grad_norm": 7.615389347076416, "learning_rate": 1.1205510907003444e-05, "loss": 0.4676011800765991, "step": 2400 }, { "epoch": 1.495580710187626, "grad_norm": 3.118619680404663, "learning_rate": 1.1150401836969002e-05, "loss": 0.4575995206832886, "step": 2412 }, { "epoch": 1.5030237246084663, "grad_norm": 4.179815769195557, "learning_rate": 1.1095292766934557e-05, "loss": 0.5326940615971884, "step": 2424 }, { "epoch": 1.5104667390293067, "grad_norm": 3.128330945968628, "learning_rate": 1.1040183696900115e-05, "loss": 0.45927361647288006, "step": 2436 }, { "epoch": 1.5179097534501473, "grad_norm": 3.6722943782806396, "learning_rate": 1.0985074626865672e-05, "loss": 0.5232657591501871, "step": 2448 }, { "epoch": 1.5253527678709877, "grad_norm": 8.696102142333984, "learning_rate": 1.092996555683123e-05, "loss": 0.5253320535024008, "step": 2460 }, { "epoch": 1.5327957822918283, "grad_norm": 6.030095100402832, "learning_rate": 1.0874856486796785e-05, "loss": 0.4725768566131592, "step": 2472 }, { "epoch": 1.5402387967126687, "grad_norm": 8.892803192138672, "learning_rate": 1.0819747416762342e-05, "loss": 0.44700531164805096, "step": 2484 }, { "epoch": 1.5476818111335091, "grad_norm": 15.271442413330078, "learning_rate": 1.07646383467279e-05, "loss": 0.44845902919769287, "step": 2496 }, { "epoch": 1.5551248255543495, "grad_norm": 5.234111785888672, "learning_rate": 1.0709529276693457e-05, "loss": 0.5186563730239868, "step": 2508 }, { "epoch": 1.5625678399751899, "grad_norm": 6.541170597076416, "learning_rate": 1.0654420206659013e-05, "loss": 0.4690740505854289, "step": 2520 }, { "epoch": 1.5700108543960303, "grad_norm": 2.7548892498016357, "learning_rate": 1.059931113662457e-05, "loss": 0.43329620361328125, "step": 2532 }, { "epoch": 1.5774538688168709, "grad_norm": 12.428861618041992, "learning_rate": 1.0544202066590127e-05, "loss": 0.43588805198669434, "step": 2544 }, { "epoch": 1.5848968832377113, "grad_norm": 9.76059627532959, "learning_rate": 1.0489092996555683e-05, "loss": 0.4283796151479085, "step": 2556 }, { "epoch": 1.5923398976585517, "grad_norm": 10.960260391235352, "learning_rate": 1.043398392652124e-05, "loss": 0.4565364519755046, "step": 2568 }, { "epoch": 1.5997829120793923, "grad_norm": 6.568747043609619, "learning_rate": 1.0378874856486798e-05, "loss": 0.41670429706573486, "step": 2580 }, { "epoch": 1.6072259265002327, "grad_norm": 2.755124568939209, "learning_rate": 1.0323765786452355e-05, "loss": 0.4691346486409505, "step": 2592 }, { "epoch": 1.614668940921073, "grad_norm": 21.070772171020508, "learning_rate": 1.026865671641791e-05, "loss": 0.4186259905497233, "step": 2604 }, { "epoch": 1.6221119553419134, "grad_norm": 5.272284507751465, "learning_rate": 1.0213547646383468e-05, "loss": 0.4942372639973958, "step": 2616 }, { "epoch": 1.6295549697627538, "grad_norm": 8.858941078186035, "learning_rate": 1.0158438576349025e-05, "loss": 0.4842514594395955, "step": 2628 }, { "epoch": 1.6369979841835942, "grad_norm": 4.663693428039551, "learning_rate": 1.010332950631458e-05, "loss": 0.49429325262705487, "step": 2640 }, { "epoch": 1.6444409986044348, "grad_norm": 14.864917755126953, "learning_rate": 1.0048220436280138e-05, "loss": 0.46838700771331787, "step": 2652 }, { "epoch": 1.6518840130252752, "grad_norm": 2.5411393642425537, "learning_rate": 9.993111366245695e-06, "loss": 0.4521595239639282, "step": 2664 }, { "epoch": 1.6593270274461158, "grad_norm": 3.005941152572632, "learning_rate": 9.938002296211253e-06, "loss": 0.48365652561187744, "step": 2676 }, { "epoch": 1.6667700418669562, "grad_norm": 5.7398552894592285, "learning_rate": 9.88289322617681e-06, "loss": 0.4695123831431071, "step": 2688 }, { "epoch": 1.6742130562877966, "grad_norm": 4.946065902709961, "learning_rate": 9.827784156142366e-06, "loss": 0.4761979579925537, "step": 2700 }, { "epoch": 1.681656070708637, "grad_norm": 7.703652858734131, "learning_rate": 9.772675086107923e-06, "loss": 0.49780480066935223, "step": 2712 }, { "epoch": 1.6890990851294774, "grad_norm": 8.237687110900879, "learning_rate": 9.71756601607348e-06, "loss": 0.4623022476832072, "step": 2724 }, { "epoch": 1.6965420995503178, "grad_norm": 2.87007474899292, "learning_rate": 9.662456946039036e-06, "loss": 0.41221630573272705, "step": 2736 }, { "epoch": 1.7039851139711584, "grad_norm": 4.247465133666992, "learning_rate": 9.607347876004593e-06, "loss": 0.4721166690190633, "step": 2748 }, { "epoch": 1.7114281283919988, "grad_norm": 4.022077560424805, "learning_rate": 9.552238805970149e-06, "loss": 0.47880788644154865, "step": 2760 }, { "epoch": 1.7188711428128391, "grad_norm": 5.686273574829102, "learning_rate": 9.497129735935708e-06, "loss": 0.486567219098409, "step": 2772 }, { "epoch": 1.7263141572336798, "grad_norm": 4.733608245849609, "learning_rate": 9.442020665901264e-06, "loss": 0.4696682294209798, "step": 2784 }, { "epoch": 1.7337571716545201, "grad_norm": 3.8102357387542725, "learning_rate": 9.38691159586682e-06, "loss": 0.4944278796513875, "step": 2796 }, { "epoch": 1.7412001860753605, "grad_norm": 5.343743801116943, "learning_rate": 9.331802525832377e-06, "loss": 0.45073699951171875, "step": 2808 }, { "epoch": 1.748643200496201, "grad_norm": 8.939608573913574, "learning_rate": 9.276693455797934e-06, "loss": 0.5150019327799479, "step": 2820 }, { "epoch": 1.7560862149170413, "grad_norm": 9.984607696533203, "learning_rate": 9.221584385763491e-06, "loss": 0.49051181475321454, "step": 2832 }, { "epoch": 1.7635292293378817, "grad_norm": 4.297845840454102, "learning_rate": 9.166475315729047e-06, "loss": 0.43834813435872394, "step": 2844 }, { "epoch": 1.7709722437587223, "grad_norm": 4.738193035125732, "learning_rate": 9.111366245694604e-06, "loss": 0.48496174812316895, "step": 2856 }, { "epoch": 1.7784152581795627, "grad_norm": 6.950840473175049, "learning_rate": 9.056257175660162e-06, "loss": 0.4803895950317383, "step": 2868 }, { "epoch": 1.7858582726004033, "grad_norm": 2.9567737579345703, "learning_rate": 9.001148105625719e-06, "loss": 0.47137478987375897, "step": 2880 }, { "epoch": 1.7933012870212437, "grad_norm": 21.629295349121094, "learning_rate": 8.946039035591275e-06, "loss": 0.5382961829503378, "step": 2892 }, { "epoch": 1.800744301442084, "grad_norm": 4.054839611053467, "learning_rate": 8.890929965556832e-06, "loss": 0.429937203725179, "step": 2904 }, { "epoch": 1.8081873158629245, "grad_norm": 8.124676704406738, "learning_rate": 8.83582089552239e-06, "loss": 0.46343564987182617, "step": 2916 }, { "epoch": 1.8156303302837649, "grad_norm": 6.405475616455078, "learning_rate": 8.780711825487945e-06, "loss": 0.47476502259572345, "step": 2928 }, { "epoch": 1.8230733447046052, "grad_norm": 3.4982993602752686, "learning_rate": 8.725602755453502e-06, "loss": 0.42661325136820477, "step": 2940 }, { "epoch": 1.8305163591254459, "grad_norm": 5.036385536193848, "learning_rate": 8.67049368541906e-06, "loss": 0.42475831508636475, "step": 2952 }, { "epoch": 1.8379593735462862, "grad_norm": 9.453807830810547, "learning_rate": 8.615384615384617e-06, "loss": 0.4522843360900879, "step": 2964 }, { "epoch": 1.8454023879671266, "grad_norm": 7.572172164916992, "learning_rate": 8.560275545350172e-06, "loss": 0.5405757427215576, "step": 2976 }, { "epoch": 1.8528454023879672, "grad_norm": 3.8509397506713867, "learning_rate": 8.50516647531573e-06, "loss": 0.4206368128458659, "step": 2988 }, { "epoch": 1.8602884168088076, "grad_norm": 3.8660781383514404, "learning_rate": 8.450057405281287e-06, "loss": 0.4278140465418498, "step": 3000 }, { "epoch": 1.867731431229648, "grad_norm": 13.179638862609863, "learning_rate": 8.394948335246843e-06, "loss": 0.45146167278289795, "step": 3012 }, { "epoch": 1.8751744456504884, "grad_norm": 2.5003507137298584, "learning_rate": 8.3398392652124e-06, "loss": 0.5010615189870199, "step": 3024 }, { "epoch": 1.8826174600713288, "grad_norm": 6.336158752441406, "learning_rate": 8.284730195177957e-06, "loss": 0.48331379890441895, "step": 3036 }, { "epoch": 1.8900604744921692, "grad_norm": 3.9048869609832764, "learning_rate": 8.229621125143515e-06, "loss": 0.4964629014333089, "step": 3048 }, { "epoch": 1.8975034889130098, "grad_norm": 4.851749897003174, "learning_rate": 8.17451205510907e-06, "loss": 0.4605306386947632, "step": 3060 }, { "epoch": 1.9049465033338502, "grad_norm": 2.5984604358673096, "learning_rate": 8.119402985074628e-06, "loss": 0.42377761999766034, "step": 3072 }, { "epoch": 1.9123895177546908, "grad_norm": 14.330255508422852, "learning_rate": 8.064293915040185e-06, "loss": 0.4586070378621419, "step": 3084 }, { "epoch": 1.9198325321755312, "grad_norm": 5.363494873046875, "learning_rate": 8.00918484500574e-06, "loss": 0.4935295581817627, "step": 3096 }, { "epoch": 1.9272755465963716, "grad_norm": 5.703904151916504, "learning_rate": 7.954075774971298e-06, "loss": 0.44021427631378174, "step": 3108 }, { "epoch": 1.934718561017212, "grad_norm": 5.600277423858643, "learning_rate": 7.898966704936855e-06, "loss": 0.48560158411661786, "step": 3120 }, { "epoch": 1.9421615754380523, "grad_norm": 11.074832916259766, "learning_rate": 7.843857634902413e-06, "loss": 0.4312416712443034, "step": 3132 }, { "epoch": 1.9496045898588927, "grad_norm": 3.4356892108917236, "learning_rate": 7.788748564867968e-06, "loss": 0.4442025025685628, "step": 3144 }, { "epoch": 1.9570476042797333, "grad_norm": 3.7474091053009033, "learning_rate": 7.733639494833526e-06, "loss": 0.5241368214289347, "step": 3156 }, { "epoch": 1.9644906187005737, "grad_norm": 4.750489234924316, "learning_rate": 7.678530424799083e-06, "loss": 0.4401020606358846, "step": 3168 }, { "epoch": 1.9719336331214141, "grad_norm": 22.131851196289062, "learning_rate": 7.6234213547646386e-06, "loss": 0.5134913126627604, "step": 3180 }, { "epoch": 1.9793766475422547, "grad_norm": 4.812230587005615, "learning_rate": 7.568312284730196e-06, "loss": 0.5479523340861002, "step": 3192 }, { "epoch": 1.9868196619630951, "grad_norm": 6.560222625732422, "learning_rate": 7.513203214695752e-06, "loss": 0.4738404353459676, "step": 3204 }, { "epoch": 1.9942626763839355, "grad_norm": 5.240246772766113, "learning_rate": 7.45809414466131e-06, "loss": 0.4475013017654419, "step": 3216 }, { "epoch": 2.0, "eval_f1": 0.43079906968624254, "eval_loss": 0.11952196806669235, "eval_precision": 0.391528709389682, "eval_recall": 0.4931553870446119, "eval_runtime": 585.0453, "eval_samples_per_second": 66.13, "eval_steps_per_second": 1.379, "step": 3226 }, { "epoch": 2.0012405024034736, "grad_norm": 4.430677890777588, "learning_rate": 7.402985074626866e-06, "loss": 0.4009953737258911, "step": 3228 }, { "epoch": 2.008683516824314, "grad_norm": 10.324471473693848, "learning_rate": 7.3478760045924235e-06, "loss": 0.4711928367614746, "step": 3240 }, { "epoch": 2.0161265312451544, "grad_norm": 11.249197006225586, "learning_rate": 7.29276693455798e-06, "loss": 0.4341440995534261, "step": 3252 }, { "epoch": 2.023569545665995, "grad_norm": 2.7949812412261963, "learning_rate": 7.2376578645235365e-06, "loss": 0.3914073705673218, "step": 3264 }, { "epoch": 2.031012560086835, "grad_norm": 10.501336097717285, "learning_rate": 7.182548794489094e-06, "loss": 0.3871670166651408, "step": 3276 }, { "epoch": 2.0384555745076756, "grad_norm": 11.492402076721191, "learning_rate": 7.12743972445465e-06, "loss": 0.44295652707417804, "step": 3288 }, { "epoch": 2.045898588928516, "grad_norm": 8.688313484191895, "learning_rate": 7.072330654420208e-06, "loss": 0.4092850685119629, "step": 3300 }, { "epoch": 2.0533416033493563, "grad_norm": 5.402098178863525, "learning_rate": 7.017221584385764e-06, "loss": 0.41869743665059406, "step": 3312 }, { "epoch": 2.0607846177701967, "grad_norm": 3.6429481506347656, "learning_rate": 6.962112514351321e-06, "loss": 0.3916611671447754, "step": 3324 }, { "epoch": 2.0682276321910376, "grad_norm": 4.778937339782715, "learning_rate": 6.907003444316878e-06, "loss": 0.3913481632868449, "step": 3336 }, { "epoch": 2.075670646611878, "grad_norm": 4.281859874725342, "learning_rate": 6.851894374282435e-06, "loss": 0.380032738049825, "step": 3348 }, { "epoch": 2.0831136610327183, "grad_norm": 7.385513782501221, "learning_rate": 6.796785304247992e-06, "loss": 0.3545822699864705, "step": 3360 }, { "epoch": 2.0905566754535587, "grad_norm": 2.9248600006103516, "learning_rate": 6.741676234213548e-06, "loss": 0.419588565826416, "step": 3372 }, { "epoch": 2.097999689874399, "grad_norm": 3.0418336391448975, "learning_rate": 6.6865671641791055e-06, "loss": 0.4189613262812297, "step": 3384 }, { "epoch": 2.1054427042952395, "grad_norm": 4.628702640533447, "learning_rate": 6.631458094144662e-06, "loss": 0.38280495007832843, "step": 3396 }, { "epoch": 2.11288571871608, "grad_norm": 2.931917667388916, "learning_rate": 6.576349024110219e-06, "loss": 0.40134119987487793, "step": 3408 }, { "epoch": 2.1203287331369203, "grad_norm": 5.4905853271484375, "learning_rate": 6.521239954075776e-06, "loss": 0.3685312271118164, "step": 3420 }, { "epoch": 2.1277717475577607, "grad_norm": 2.9753782749176025, "learning_rate": 6.466130884041333e-06, "loss": 0.3878607749938965, "step": 3432 }, { "epoch": 2.1352147619786015, "grad_norm": 7.17921257019043, "learning_rate": 6.411021814006889e-06, "loss": 0.41369112332661945, "step": 3444 }, { "epoch": 2.142657776399442, "grad_norm": 13.806902885437012, "learning_rate": 6.355912743972445e-06, "loss": 0.43599124749501544, "step": 3456 }, { "epoch": 2.1501007908202823, "grad_norm": 3.4916634559631348, "learning_rate": 6.3008036739380026e-06, "loss": 0.3406885862350464, "step": 3468 }, { "epoch": 2.1575438052411227, "grad_norm": 6.193579196929932, "learning_rate": 6.245694603903559e-06, "loss": 0.3558163642883301, "step": 3480 }, { "epoch": 2.164986819661963, "grad_norm": 6.37896203994751, "learning_rate": 6.190585533869116e-06, "loss": 0.35776766141255695, "step": 3492 }, { "epoch": 2.1724298340828034, "grad_norm": 12.731496810913086, "learning_rate": 6.135476463834673e-06, "loss": 0.37972402572631836, "step": 3504 }, { "epoch": 2.179872848503644, "grad_norm": 19.98930549621582, "learning_rate": 6.08036739380023e-06, "loss": 0.42111217975616455, "step": 3516 }, { "epoch": 2.187315862924484, "grad_norm": 6.11861515045166, "learning_rate": 6.025258323765787e-06, "loss": 0.3672644297281901, "step": 3528 }, { "epoch": 2.194758877345325, "grad_norm": 11.929699897766113, "learning_rate": 5.970149253731343e-06, "loss": 0.4023996591567993, "step": 3540 }, { "epoch": 2.2022018917661654, "grad_norm": 17.26346206665039, "learning_rate": 5.9150401836969005e-06, "loss": 0.38841597239176434, "step": 3552 }, { "epoch": 2.209644906187006, "grad_norm": 9.183552742004395, "learning_rate": 5.859931113662457e-06, "loss": 0.42536401748657227, "step": 3564 }, { "epoch": 2.217087920607846, "grad_norm": 2.3118231296539307, "learning_rate": 5.804822043628014e-06, "loss": 0.4157342513402303, "step": 3576 }, { "epoch": 2.2245309350286866, "grad_norm": 6.309724807739258, "learning_rate": 5.749712973593571e-06, "loss": 0.4599275191624959, "step": 3588 }, { "epoch": 2.231973949449527, "grad_norm": 2.892469882965088, "learning_rate": 5.694603903559128e-06, "loss": 0.441303292910258, "step": 3600 }, { "epoch": 2.2394169638703674, "grad_norm": 6.523403167724609, "learning_rate": 5.6394948335246846e-06, "loss": 0.41275028387705487, "step": 3612 }, { "epoch": 2.2468599782912078, "grad_norm": 2.6101267337799072, "learning_rate": 5.584385763490242e-06, "loss": 0.41505225499471027, "step": 3624 }, { "epoch": 2.2543029927120486, "grad_norm": 5.343144416809082, "learning_rate": 5.529276693455798e-06, "loss": 0.38965781529744464, "step": 3636 }, { "epoch": 2.261746007132889, "grad_norm": 4.3300395011901855, "learning_rate": 5.474167623421355e-06, "loss": 0.4278339942296346, "step": 3648 }, { "epoch": 2.2691890215537294, "grad_norm": 5.109958171844482, "learning_rate": 5.419058553386912e-06, "loss": 0.366446574529012, "step": 3660 }, { "epoch": 2.2766320359745698, "grad_norm": 3.8399014472961426, "learning_rate": 5.363949483352469e-06, "loss": 0.3991047541300456, "step": 3672 }, { "epoch": 2.28407505039541, "grad_norm": 6.625537872314453, "learning_rate": 5.308840413318026e-06, "loss": 0.3346426486968994, "step": 3684 }, { "epoch": 2.2915180648162505, "grad_norm": 11.645654678344727, "learning_rate": 5.2537313432835825e-06, "loss": 0.3985482454299927, "step": 3696 }, { "epoch": 2.298961079237091, "grad_norm": 5.67885684967041, "learning_rate": 5.19862227324914e-06, "loss": 0.3815650939941406, "step": 3708 }, { "epoch": 2.3064040936579313, "grad_norm": 4.548233985900879, "learning_rate": 5.143513203214696e-06, "loss": 0.39840646584828693, "step": 3720 }, { "epoch": 2.3138471080787717, "grad_norm": 3.8364691734313965, "learning_rate": 5.088404133180253e-06, "loss": 0.4081765413284302, "step": 3732 }, { "epoch": 2.3212901224996125, "grad_norm": 2.5266079902648926, "learning_rate": 5.03329506314581e-06, "loss": 0.3613650401433309, "step": 3744 }, { "epoch": 2.328733136920453, "grad_norm": 7.049173831939697, "learning_rate": 4.9781859931113666e-06, "loss": 0.4112436771392822, "step": 3756 }, { "epoch": 2.3361761513412933, "grad_norm": 7.23855447769165, "learning_rate": 4.923076923076924e-06, "loss": 0.4015626907348633, "step": 3768 }, { "epoch": 2.3436191657621337, "grad_norm": 7.326627731323242, "learning_rate": 4.86796785304248e-06, "loss": 0.389956792195638, "step": 3780 }, { "epoch": 2.351062180182974, "grad_norm": 11.426876068115234, "learning_rate": 4.812858783008037e-06, "loss": 0.392941157023112, "step": 3792 }, { "epoch": 2.3585051946038145, "grad_norm": 5.058406352996826, "learning_rate": 4.757749712973594e-06, "loss": 0.388182799021403, "step": 3804 }, { "epoch": 2.365948209024655, "grad_norm": 7.783097267150879, "learning_rate": 4.702640642939151e-06, "loss": 0.4082544247309367, "step": 3816 }, { "epoch": 2.3733912234454952, "grad_norm": 4.8967084884643555, "learning_rate": 4.647531572904708e-06, "loss": 0.40780651569366455, "step": 3828 }, { "epoch": 2.3808342378663356, "grad_norm": 5.760252952575684, "learning_rate": 4.5924225028702645e-06, "loss": 0.4002196391423543, "step": 3840 }, { "epoch": 2.3882772522871765, "grad_norm": 4.79511022567749, "learning_rate": 4.537313432835822e-06, "loss": 0.3828426996866862, "step": 3852 }, { "epoch": 2.395720266708017, "grad_norm": 3.2499914169311523, "learning_rate": 4.4822043628013774e-06, "loss": 0.3649975061416626, "step": 3864 }, { "epoch": 2.4031632811288572, "grad_norm": 5.451921463012695, "learning_rate": 4.427095292766935e-06, "loss": 0.3998970588048299, "step": 3876 }, { "epoch": 2.4106062955496976, "grad_norm": 3.8105506896972656, "learning_rate": 4.371986222732491e-06, "loss": 0.45681726932525635, "step": 3888 }, { "epoch": 2.418049309970538, "grad_norm": 3.690845012664795, "learning_rate": 4.3168771526980486e-06, "loss": 0.3797287543614705, "step": 3900 }, { "epoch": 2.4254923243913784, "grad_norm": 12.44582748413086, "learning_rate": 4.261768082663605e-06, "loss": 0.47908584276835126, "step": 3912 }, { "epoch": 2.432935338812219, "grad_norm": 3.862395763397217, "learning_rate": 4.206659012629162e-06, "loss": 0.4127648671468099, "step": 3924 }, { "epoch": 2.4403783532330596, "grad_norm": 11.71980094909668, "learning_rate": 4.151549942594719e-06, "loss": 0.33937788009643555, "step": 3936 }, { "epoch": 2.4478213676539, "grad_norm": 4.254403591156006, "learning_rate": 4.096440872560276e-06, "loss": 0.3548990885416667, "step": 3948 }, { "epoch": 2.4552643820747404, "grad_norm": 5.00128173828125, "learning_rate": 4.041331802525833e-06, "loss": 0.4270055294036865, "step": 3960 }, { "epoch": 2.462707396495581, "grad_norm": 3.918459892272949, "learning_rate": 3.986222732491389e-06, "loss": 0.3760935465494792, "step": 3972 }, { "epoch": 2.470150410916421, "grad_norm": 11.43891716003418, "learning_rate": 3.9311136624569465e-06, "loss": 0.4183223644892375, "step": 3984 }, { "epoch": 2.4775934253372616, "grad_norm": 16.374967575073242, "learning_rate": 3.876004592422503e-06, "loss": 0.36837557951609295, "step": 3996 }, { "epoch": 2.485036439758102, "grad_norm": 4.490777015686035, "learning_rate": 3.82089552238806e-06, "loss": 0.4069160620371501, "step": 4008 }, { "epoch": 2.4924794541789423, "grad_norm": 8.420413970947266, "learning_rate": 3.7657864523536168e-06, "loss": 0.4271164337793986, "step": 4020 }, { "epoch": 2.4999224685997827, "grad_norm": 8.309126853942871, "learning_rate": 3.7106773823191737e-06, "loss": 0.3547343810399373, "step": 4032 }, { "epoch": 2.5073654830206236, "grad_norm": 14.98065185546875, "learning_rate": 3.6555683122847306e-06, "loss": 0.40314682324727374, "step": 4044 }, { "epoch": 2.514808497441464, "grad_norm": 16.558191299438477, "learning_rate": 3.600459242250287e-06, "loss": 0.36269084612528485, "step": 4056 }, { "epoch": 2.5222515118623043, "grad_norm": 6.547549724578857, "learning_rate": 3.545350172215844e-06, "loss": 0.36424537499745685, "step": 4068 }, { "epoch": 2.5296945262831447, "grad_norm": 4.773808002471924, "learning_rate": 3.490241102181401e-06, "loss": 0.37531224886576336, "step": 4080 }, { "epoch": 2.537137540703985, "grad_norm": 4.01258659362793, "learning_rate": 3.4351320321469578e-06, "loss": 0.36545733610788983, "step": 4092 }, { "epoch": 2.5445805551248255, "grad_norm": 10.372180938720703, "learning_rate": 3.3800229621125147e-06, "loss": 0.4671864112218221, "step": 4104 }, { "epoch": 2.552023569545666, "grad_norm": 3.3598952293395996, "learning_rate": 3.3249138920780716e-06, "loss": 0.3458172082901001, "step": 4116 }, { "epoch": 2.5594665839665063, "grad_norm": 11.469687461853027, "learning_rate": 3.2698048220436285e-06, "loss": 0.39522536595662433, "step": 4128 }, { "epoch": 2.5669095983873467, "grad_norm": 3.848041534423828, "learning_rate": 3.2146957520091854e-06, "loss": 0.41400329271952313, "step": 4140 }, { "epoch": 2.5743526128081875, "grad_norm": 4.791919231414795, "learning_rate": 3.159586681974742e-06, "loss": 0.393940011660258, "step": 4152 }, { "epoch": 2.581795627229028, "grad_norm": 21.486618041992188, "learning_rate": 3.1044776119402988e-06, "loss": 0.4398730993270874, "step": 4164 }, { "epoch": 2.5892386416498683, "grad_norm": 5.638022422790527, "learning_rate": 3.0493685419058557e-06, "loss": 0.3547349770863851, "step": 4176 }, { "epoch": 2.5966816560707087, "grad_norm": 7.414913177490234, "learning_rate": 2.9942594718714126e-06, "loss": 0.38705146312713623, "step": 4188 }, { "epoch": 2.604124670491549, "grad_norm": 6.696681976318359, "learning_rate": 2.9391504018369695e-06, "loss": 0.36440642674763996, "step": 4200 }, { "epoch": 2.6115676849123894, "grad_norm": 4.02039098739624, "learning_rate": 2.8840413318025264e-06, "loss": 0.39015217622121173, "step": 4212 }, { "epoch": 2.61901069933323, "grad_norm": 3.370777130126953, "learning_rate": 2.8289322617680833e-06, "loss": 0.4275425275166829, "step": 4224 }, { "epoch": 2.6264537137540707, "grad_norm": 8.47400951385498, "learning_rate": 2.7738231917336393e-06, "loss": 0.3559015194574992, "step": 4236 }, { "epoch": 2.6338967281749106, "grad_norm": 11.06500244140625, "learning_rate": 2.7187141216991963e-06, "loss": 0.3683815002441406, "step": 4248 }, { "epoch": 2.6413397425957514, "grad_norm": 3.4861528873443604, "learning_rate": 2.663605051664753e-06, "loss": 0.44681187470753986, "step": 4260 }, { "epoch": 2.648782757016592, "grad_norm": 10.642603874206543, "learning_rate": 2.60849598163031e-06, "loss": 0.4434703588485718, "step": 4272 }, { "epoch": 2.656225771437432, "grad_norm": 2.501110315322876, "learning_rate": 2.553386911595867e-06, "loss": 0.3525495131810506, "step": 4284 }, { "epoch": 2.6636687858582726, "grad_norm": 5.691764831542969, "learning_rate": 2.498277841561424e-06, "loss": 0.3853313128153483, "step": 4296 }, { "epoch": 2.671111800279113, "grad_norm": 4.1908135414123535, "learning_rate": 2.4431687715269808e-06, "loss": 0.38127346833546955, "step": 4308 }, { "epoch": 2.6785548146999534, "grad_norm": 9.538026809692383, "learning_rate": 2.3880597014925373e-06, "loss": 0.39995817343393963, "step": 4320 }, { "epoch": 2.6859978291207938, "grad_norm": 8.436595916748047, "learning_rate": 2.332950631458094e-06, "loss": 0.3635564645131429, "step": 4332 }, { "epoch": 2.6934408435416346, "grad_norm": 2.5905513763427734, "learning_rate": 2.277841561423651e-06, "loss": 0.46339670817057294, "step": 4344 }, { "epoch": 2.7008838579624745, "grad_norm": 6.738951206207275, "learning_rate": 2.222732491389208e-06, "loss": 0.3373739719390869, "step": 4356 }, { "epoch": 2.7083268723833154, "grad_norm": 5.625753402709961, "learning_rate": 2.167623421354765e-06, "loss": 0.3713107109069824, "step": 4368 }, { "epoch": 2.7157698868041558, "grad_norm": 3.6908581256866455, "learning_rate": 2.1125143513203218e-06, "loss": 0.3845006227493286, "step": 4380 }, { "epoch": 2.723212901224996, "grad_norm": 5.123325824737549, "learning_rate": 2.0574052812858787e-06, "loss": 0.3693963686625163, "step": 4392 }, { "epoch": 2.7306559156458365, "grad_norm": 5.600500583648682, "learning_rate": 2.002296211251435e-06, "loss": 0.4005578358968099, "step": 4404 }, { "epoch": 2.738098930066677, "grad_norm": 4.9075775146484375, "learning_rate": 1.947187141216992e-06, "loss": 0.44304617245992023, "step": 4416 }, { "epoch": 2.7455419444875173, "grad_norm": 2.535568952560425, "learning_rate": 1.892078071182549e-06, "loss": 0.36018415292104083, "step": 4428 }, { "epoch": 2.7529849589083577, "grad_norm": 3.863154888153076, "learning_rate": 1.8369690011481059e-06, "loss": 0.3833086093266805, "step": 4440 }, { "epoch": 2.7604279733291985, "grad_norm": 3.38565731048584, "learning_rate": 1.7818599311136626e-06, "loss": 0.36296629905700684, "step": 4452 }, { "epoch": 2.767870987750039, "grad_norm": 3.979094982147217, "learning_rate": 1.7267508610792195e-06, "loss": 0.400799036026001, "step": 4464 }, { "epoch": 2.7753140021708793, "grad_norm": 3.6006662845611572, "learning_rate": 1.6716417910447764e-06, "loss": 0.38404210408528644, "step": 4476 }, { "epoch": 2.7827570165917197, "grad_norm": 9.927759170532227, "learning_rate": 1.6165327210103333e-06, "loss": 0.47922762235005695, "step": 4488 }, { "epoch": 2.79020003101256, "grad_norm": 4.767171859741211, "learning_rate": 1.5614236509758898e-06, "loss": 0.40151556332906085, "step": 4500 }, { "epoch": 2.7976430454334005, "grad_norm": 5.649435043334961, "learning_rate": 1.5063145809414467e-06, "loss": 0.3603046735127767, "step": 4512 }, { "epoch": 2.805086059854241, "grad_norm": 11.296677589416504, "learning_rate": 1.4512055109070036e-06, "loss": 0.38084761301676434, "step": 4524 }, { "epoch": 2.8125290742750813, "grad_norm": 2.71022629737854, "learning_rate": 1.3960964408725605e-06, "loss": 0.3726603190104167, "step": 4536 }, { "epoch": 2.8199720886959216, "grad_norm": 3.849479913711548, "learning_rate": 1.3409873708381172e-06, "loss": 0.3995700279871623, "step": 4548 }, { "epoch": 2.8274151031167625, "grad_norm": 14.668109893798828, "learning_rate": 1.285878300803674e-06, "loss": 0.39227835337320965, "step": 4560 }, { "epoch": 2.834858117537603, "grad_norm": 3.9545083045959473, "learning_rate": 1.230769230769231e-06, "loss": 0.42009902000427246, "step": 4572 }, { "epoch": 2.8423011319584432, "grad_norm": 5.8148298263549805, "learning_rate": 1.1756601607347877e-06, "loss": 0.39560989538828534, "step": 4584 }, { "epoch": 2.8497441463792836, "grad_norm": 6.249505996704102, "learning_rate": 1.1205510907003444e-06, "loss": 0.42494750022888184, "step": 4596 }, { "epoch": 2.857187160800124, "grad_norm": 4.1339921951293945, "learning_rate": 1.0654420206659013e-06, "loss": 0.5030697584152222, "step": 4608 }, { "epoch": 2.8646301752209644, "grad_norm": 13.68895435333252, "learning_rate": 1.0103329506314582e-06, "loss": 0.36397520701090497, "step": 4620 }, { "epoch": 2.872073189641805, "grad_norm": 2.826042890548706, "learning_rate": 9.55223880597015e-07, "loss": 0.3502591848373413, "step": 4632 }, { "epoch": 2.8795162040626456, "grad_norm": 6.833806991577148, "learning_rate": 9.001148105625718e-07, "loss": 0.3613890012105306, "step": 4644 }, { "epoch": 2.8869592184834856, "grad_norm": 4.942678451538086, "learning_rate": 8.450057405281287e-07, "loss": 0.39194099108378094, "step": 4656 }, { "epoch": 2.8944022329043264, "grad_norm": 4.509676456451416, "learning_rate": 7.898966704936855e-07, "loss": 0.351750651995341, "step": 4668 }, { "epoch": 2.901845247325167, "grad_norm": 8.305526733398438, "learning_rate": 7.347876004592424e-07, "loss": 0.40360478560129803, "step": 4680 }, { "epoch": 2.909288261746007, "grad_norm": 4.9328765869140625, "learning_rate": 6.796785304247991e-07, "loss": 0.33100277185440063, "step": 4692 }, { "epoch": 2.9167312761668476, "grad_norm": 4.945671558380127, "learning_rate": 6.24569460390356e-07, "loss": 0.39974749088287354, "step": 4704 }, { "epoch": 2.924174290587688, "grad_norm": 9.925528526306152, "learning_rate": 5.694603903559128e-07, "loss": 0.4116141001383464, "step": 4716 }, { "epoch": 2.9316173050085284, "grad_norm": 4.063233375549316, "learning_rate": 5.143513203214697e-07, "loss": 0.3659325838088989, "step": 4728 }, { "epoch": 2.9390603194293687, "grad_norm": 3.5343589782714844, "learning_rate": 4.5924225028702647e-07, "loss": 0.3983626365661621, "step": 4740 }, { "epoch": 2.9465033338502096, "grad_norm": 6.534095764160156, "learning_rate": 4.041331802525833e-07, "loss": 0.393149733543396, "step": 4752 }, { "epoch": 2.9539463482710495, "grad_norm": 3.4787096977233887, "learning_rate": 3.490241102181401e-07, "loss": 0.3340187867482503, "step": 4764 }, { "epoch": 2.9613893626918903, "grad_norm": 5.42100191116333, "learning_rate": 2.939150401836969e-07, "loss": 0.3814918597539266, "step": 4776 }, { "epoch": 2.9688323771127307, "grad_norm": 4.148738861083984, "learning_rate": 2.3880597014925377e-07, "loss": 0.4039960702260335, "step": 4788 }, { "epoch": 2.976275391533571, "grad_norm": 4.3285746574401855, "learning_rate": 1.836969001148106e-07, "loss": 0.34236987431844074, "step": 4800 }, { "epoch": 2.9837184059544115, "grad_norm": 2.8112664222717285, "learning_rate": 1.2858783008036742e-07, "loss": 0.3349067767461141, "step": 4812 }, { "epoch": 2.991161420375252, "grad_norm": 4.724297523498535, "learning_rate": 7.347876004592423e-08, "loss": 0.38507378101348877, "step": 4824 }, { "epoch": 2.9986044347960923, "grad_norm": 7.1218132972717285, "learning_rate": 1.8369690011481057e-08, "loss": 0.34174474080403644, "step": 4836 }, { "epoch": 3.0, "eval_f1": 0.43726749573500223, "eval_loss": 0.12126699090003967, "eval_precision": 0.4012637195169362, "eval_recall": 0.4913673269074057, "eval_runtime": 522.9994, "eval_samples_per_second": 73.975, "eval_steps_per_second": 1.543, "step": 4839 } ], "logging_steps": 12, "max_steps": 4839, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2215684896188826e+17, "train_batch_size": 48, "trial_name": null, "trial_params": null }