| { | |
| "best_global_step": 4839, | |
| "best_metric": 0.43726749573500223, | |
| "best_model_checkpoint": "constellation_one_text/checkpoint-4839", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 4839, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00744301442084044, | |
| "grad_norm": 16.79511070251465, | |
| "learning_rate": 4.5454545454545457e-07, | |
| "loss": 5.104981422424316, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01488602884168088, | |
| "grad_norm": 14.2466402053833, | |
| "learning_rate": 9.50413223140496e-07, | |
| "loss": 4.855861345926921, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.02232904326252132, | |
| "grad_norm": 15.325632095336914, | |
| "learning_rate": 1.4462809917355372e-06, | |
| "loss": 4.62240473429362, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.02977205768336176, | |
| "grad_norm": 14.659135818481445, | |
| "learning_rate": 1.9421487603305786e-06, | |
| "loss": 4.279359499613444, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.037215072104202204, | |
| "grad_norm": 11.83539867401123, | |
| "learning_rate": 2.43801652892562e-06, | |
| "loss": 4.009869893391927, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04465808652504264, | |
| "grad_norm": 12.459957122802734, | |
| "learning_rate": 2.9338842975206615e-06, | |
| "loss": 3.658400217692057, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.052101100945883085, | |
| "grad_norm": 10.960445404052734, | |
| "learning_rate": 3.429752066115703e-06, | |
| "loss": 3.3341188430786133, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.05954411536672352, | |
| "grad_norm": 10.394844055175781, | |
| "learning_rate": 3.925619834710744e-06, | |
| "loss": 2.9902642567952475, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.06698712978756396, | |
| "grad_norm": 10.080375671386719, | |
| "learning_rate": 4.421487603305786e-06, | |
| "loss": 2.7187296549479165, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.07443014420840441, | |
| "grad_norm": 11.83609676361084, | |
| "learning_rate": 4.917355371900827e-06, | |
| "loss": 2.4078760147094727, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08187315862924484, | |
| "grad_norm": 20.040725708007812, | |
| "learning_rate": 5.413223140495868e-06, | |
| "loss": 2.1843010584513345, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.08931617305008528, | |
| "grad_norm": 10.70347785949707, | |
| "learning_rate": 5.90909090909091e-06, | |
| "loss": 1.9951588312784831, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.09675918747092573, | |
| "grad_norm": 13.84825611114502, | |
| "learning_rate": 6.404958677685951e-06, | |
| "loss": 1.7978707949320476, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.10420220189176617, | |
| "grad_norm": 8.921030044555664, | |
| "learning_rate": 6.900826446280993e-06, | |
| "loss": 1.6856780052185059, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1116452163126066, | |
| "grad_norm": 7.919989585876465, | |
| "learning_rate": 7.396694214876033e-06, | |
| "loss": 1.497524897257487, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.11908823073344704, | |
| "grad_norm": 15.635968208312988, | |
| "learning_rate": 7.892561983471076e-06, | |
| "loss": 1.4976633389790852, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.12653124515428749, | |
| "grad_norm": 14.213494300842285, | |
| "learning_rate": 8.388429752066116e-06, | |
| "loss": 1.4405194918314617, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.13397425957512793, | |
| "grad_norm": 10.790483474731445, | |
| "learning_rate": 8.884297520661158e-06, | |
| "loss": 1.2696106433868408, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.14141727399596837, | |
| "grad_norm": 14.101875305175781, | |
| "learning_rate": 9.3801652892562e-06, | |
| "loss": 1.3300576210021973, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.14886028841680882, | |
| "grad_norm": 19.911815643310547, | |
| "learning_rate": 9.876033057851241e-06, | |
| "loss": 1.2497991720835369, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.15630330283764926, | |
| "grad_norm": 12.594736099243164, | |
| "learning_rate": 1.0371900826446282e-05, | |
| "loss": 1.20013427734375, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.16374631725848968, | |
| "grad_norm": 10.003790855407715, | |
| "learning_rate": 1.0867768595041323e-05, | |
| "loss": 1.1903626918792725, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.17118933167933012, | |
| "grad_norm": 19.644290924072266, | |
| "learning_rate": 1.1363636363636366e-05, | |
| "loss": 1.2084464232126872, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.17863234610017056, | |
| "grad_norm": 12.33438777923584, | |
| "learning_rate": 1.1859504132231406e-05, | |
| "loss": 1.1396081447601318, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.186075360521011, | |
| "grad_norm": 7.845709800720215, | |
| "learning_rate": 1.2355371900826447e-05, | |
| "loss": 1.0346049467722576, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19351837494185145, | |
| "grad_norm": 12.355867385864258, | |
| "learning_rate": 1.2851239669421488e-05, | |
| "loss": 1.0486024220784504, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.2009613893626919, | |
| "grad_norm": 9.542502403259277, | |
| "learning_rate": 1.3347107438016531e-05, | |
| "loss": 1.1321392059326172, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.20840440378353234, | |
| "grad_norm": 171.94647216796875, | |
| "learning_rate": 1.384297520661157e-05, | |
| "loss": 0.9731620152791342, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.21584741820437278, | |
| "grad_norm": 14.012189865112305, | |
| "learning_rate": 1.4338842975206612e-05, | |
| "loss": 0.9310257434844971, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2232904326252132, | |
| "grad_norm": 17.743682861328125, | |
| "learning_rate": 1.4834710743801655e-05, | |
| "loss": 0.9263285795847574, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.23073344704605364, | |
| "grad_norm": 29.65188217163086, | |
| "learning_rate": 1.5330578512396693e-05, | |
| "loss": 1.0049312114715576, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.23817646146689408, | |
| "grad_norm": 16.46782684326172, | |
| "learning_rate": 1.5826446280991736e-05, | |
| "loss": 1.078270673751831, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.24561947588773453, | |
| "grad_norm": 15.282443046569824, | |
| "learning_rate": 1.632231404958678e-05, | |
| "loss": 0.9908095200856527, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.25306249030857497, | |
| "grad_norm": 7.152077674865723, | |
| "learning_rate": 1.681818181818182e-05, | |
| "loss": 0.8867685794830322, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.2605055047294154, | |
| "grad_norm": 17.630233764648438, | |
| "learning_rate": 1.731404958677686e-05, | |
| "loss": 0.8261091709136963, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.26794851915025586, | |
| "grad_norm": 8.756381034851074, | |
| "learning_rate": 1.78099173553719e-05, | |
| "loss": 0.8141599496205648, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.2753915335710963, | |
| "grad_norm": 14.227313041687012, | |
| "learning_rate": 1.8305785123966944e-05, | |
| "loss": 0.8025492032368978, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.28283454799193675, | |
| "grad_norm": 6.028214931488037, | |
| "learning_rate": 1.8801652892561987e-05, | |
| "loss": 0.827876885732015, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.2902775624127772, | |
| "grad_norm": 9.791404724121094, | |
| "learning_rate": 1.9297520661157026e-05, | |
| "loss": 0.8186439673105875, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.29772057683361763, | |
| "grad_norm": 19.028491973876953, | |
| "learning_rate": 1.9793388429752066e-05, | |
| "loss": 0.8027651309967041, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3051635912544581, | |
| "grad_norm": 5.418436527252197, | |
| "learning_rate": 1.996785304247991e-05, | |
| "loss": 0.7800490061442057, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.3126066056752985, | |
| "grad_norm": 7.598865985870361, | |
| "learning_rate": 1.9912743972445466e-05, | |
| "loss": 0.7126566569010416, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.3200496200961389, | |
| "grad_norm": 7.867424011230469, | |
| "learning_rate": 1.9857634902411024e-05, | |
| "loss": 0.6536041895548502, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.32749263451697935, | |
| "grad_norm": 10.367350578308105, | |
| "learning_rate": 1.980252583237658e-05, | |
| "loss": 0.8624240557352701, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.3349356489378198, | |
| "grad_norm": 6.30031681060791, | |
| "learning_rate": 1.9747416762342138e-05, | |
| "loss": 0.8412895202636719, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.34237866335866024, | |
| "grad_norm": 15.809948921203613, | |
| "learning_rate": 1.9692307692307696e-05, | |
| "loss": 0.7370687325795492, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3498216777795007, | |
| "grad_norm": 6.0920491218566895, | |
| "learning_rate": 1.963719862227325e-05, | |
| "loss": 0.7390193144480387, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.3572646922003411, | |
| "grad_norm": 11.583715438842773, | |
| "learning_rate": 1.9582089552238807e-05, | |
| "loss": 0.6651956637700399, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.36470770662118157, | |
| "grad_norm": 11.411588668823242, | |
| "learning_rate": 1.9526980482204364e-05, | |
| "loss": 0.7644002437591553, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.372150721042022, | |
| "grad_norm": 8.31484603881836, | |
| "learning_rate": 1.947187141216992e-05, | |
| "loss": 0.6794478893280029, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.37959373546286246, | |
| "grad_norm": 6.703721523284912, | |
| "learning_rate": 1.941676234213548e-05, | |
| "loss": 0.6266262531280518, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.3870367498837029, | |
| "grad_norm": 9.479427337646484, | |
| "learning_rate": 1.9361653272101036e-05, | |
| "loss": 0.6851427555084229, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.39447976430454335, | |
| "grad_norm": 7.663156032562256, | |
| "learning_rate": 1.9306544202066593e-05, | |
| "loss": 0.6938677628835043, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.4019227787253838, | |
| "grad_norm": 4.276080131530762, | |
| "learning_rate": 1.9251435132032147e-05, | |
| "loss": 0.76728622118632, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.40936579314622423, | |
| "grad_norm": 11.622859001159668, | |
| "learning_rate": 1.9196326061997705e-05, | |
| "loss": 0.7580918471018473, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4168088075670647, | |
| "grad_norm": 13.203335762023926, | |
| "learning_rate": 1.9141216991963262e-05, | |
| "loss": 0.642679770787557, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.4242518219879051, | |
| "grad_norm": 8.963321685791016, | |
| "learning_rate": 1.908610792192882e-05, | |
| "loss": 0.6361099084218343, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.43169483640874556, | |
| "grad_norm": 8.1705904006958, | |
| "learning_rate": 1.9030998851894377e-05, | |
| "loss": 0.6898341178894043, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.439137850829586, | |
| "grad_norm": 3.9877262115478516, | |
| "learning_rate": 1.8975889781859934e-05, | |
| "loss": 0.6462088028589884, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.4465808652504264, | |
| "grad_norm": 12.81478500366211, | |
| "learning_rate": 1.892078071182549e-05, | |
| "loss": 0.6965091228485107, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.45402387967126684, | |
| "grad_norm": 7.810659885406494, | |
| "learning_rate": 1.8865671641791045e-05, | |
| "loss": 0.7788422902425131, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4614668940921073, | |
| "grad_norm": 4.958326816558838, | |
| "learning_rate": 1.8810562571756603e-05, | |
| "loss": 0.7460188865661621, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.4689099085129477, | |
| "grad_norm": 9.091962814331055, | |
| "learning_rate": 1.875545350172216e-05, | |
| "loss": 0.6937299569447836, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.47635292293378817, | |
| "grad_norm": 7.729589939117432, | |
| "learning_rate": 1.8700344431687717e-05, | |
| "loss": 0.6188247601191202, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.4837959373546286, | |
| "grad_norm": 8.878933906555176, | |
| "learning_rate": 1.8645235361653275e-05, | |
| "loss": 0.7017858028411865, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.49123895177546906, | |
| "grad_norm": 23.914348602294922, | |
| "learning_rate": 1.8590126291618832e-05, | |
| "loss": 0.7923436164855957, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.4986819661963095, | |
| "grad_norm": 10.980387687683105, | |
| "learning_rate": 1.853501722158439e-05, | |
| "loss": 0.6881453990936279, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.5061249806171499, | |
| "grad_norm": 6.988458156585693, | |
| "learning_rate": 1.8479908151549943e-05, | |
| "loss": 0.683276891708374, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.5135679950379903, | |
| "grad_norm": 23.667926788330078, | |
| "learning_rate": 1.84247990815155e-05, | |
| "loss": 0.6124229431152344, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.5210110094588308, | |
| "grad_norm": 7.078935623168945, | |
| "learning_rate": 1.8369690011481058e-05, | |
| "loss": 0.7043429215749105, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5284540238796712, | |
| "grad_norm": 9.82224178314209, | |
| "learning_rate": 1.8314580941446615e-05, | |
| "loss": 0.6555114189783732, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.5358970383005117, | |
| "grad_norm": 8.077360153198242, | |
| "learning_rate": 1.8259471871412173e-05, | |
| "loss": 0.6555444002151489, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.5433400527213521, | |
| "grad_norm": 3.6762046813964844, | |
| "learning_rate": 1.820436280137773e-05, | |
| "loss": 0.636172374089559, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.5507830671421926, | |
| "grad_norm": 3.8388607501983643, | |
| "learning_rate": 1.8149253731343287e-05, | |
| "loss": 0.6085333824157715, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.558226081563033, | |
| "grad_norm": 3.0353925228118896, | |
| "learning_rate": 1.809414466130884e-05, | |
| "loss": 0.58968718846639, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5656690959838735, | |
| "grad_norm": 6.465055465698242, | |
| "learning_rate": 1.80390355912744e-05, | |
| "loss": 0.6078658103942871, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.5731121104047139, | |
| "grad_norm": 5.472475528717041, | |
| "learning_rate": 1.7983926521239956e-05, | |
| "loss": 0.6997927029927572, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.5805551248255544, | |
| "grad_norm": 15.40697193145752, | |
| "learning_rate": 1.792881745120551e-05, | |
| "loss": 0.6386371453603109, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.5879981392463948, | |
| "grad_norm": 6.439900875091553, | |
| "learning_rate": 1.787370838117107e-05, | |
| "loss": 0.6876135667165121, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.5954411536672353, | |
| "grad_norm": 10.793220520019531, | |
| "learning_rate": 1.7818599311136628e-05, | |
| "loss": 0.6237523953119913, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6028841680880757, | |
| "grad_norm": 5.377976417541504, | |
| "learning_rate": 1.7763490241102185e-05, | |
| "loss": 0.614266554514567, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.6103271825089162, | |
| "grad_norm": 7.794371604919434, | |
| "learning_rate": 1.770838117106774e-05, | |
| "loss": 0.5918615261713663, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.6177701969297565, | |
| "grad_norm": 4.7419867515563965, | |
| "learning_rate": 1.7653272101033296e-05, | |
| "loss": 0.5848552385965983, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.625213211350597, | |
| "grad_norm": 14.705470085144043, | |
| "learning_rate": 1.7598163030998854e-05, | |
| "loss": 0.6608580350875854, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.6326562257714374, | |
| "grad_norm": 6.041922092437744, | |
| "learning_rate": 1.754305396096441e-05, | |
| "loss": 0.549665609995524, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.6400992401922778, | |
| "grad_norm": 5.13696813583374, | |
| "learning_rate": 1.7487944890929965e-05, | |
| "loss": 0.7017458279927572, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.6475422546131183, | |
| "grad_norm": 6.016454696655273, | |
| "learning_rate": 1.7432835820895522e-05, | |
| "loss": 0.6309004227320353, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.6549852690339587, | |
| "grad_norm": 9.331708908081055, | |
| "learning_rate": 1.7377726750861083e-05, | |
| "loss": 0.6831174691518148, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.6624282834547992, | |
| "grad_norm": 9.878951072692871, | |
| "learning_rate": 1.7322617680826637e-05, | |
| "loss": 0.6587471961975098, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.6698712978756396, | |
| "grad_norm": 5.033365726470947, | |
| "learning_rate": 1.7267508610792194e-05, | |
| "loss": 0.6370361646016439, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6773143122964801, | |
| "grad_norm": 18.762298583984375, | |
| "learning_rate": 1.721239954075775e-05, | |
| "loss": 0.5823976198832194, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.6847573267173205, | |
| "grad_norm": 2.940394163131714, | |
| "learning_rate": 1.715729047072331e-05, | |
| "loss": 0.6264007488886515, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.692200341138161, | |
| "grad_norm": 7.621018886566162, | |
| "learning_rate": 1.7102181400688863e-05, | |
| "loss": 0.5824793974558512, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.6996433555590014, | |
| "grad_norm": 3.141854763031006, | |
| "learning_rate": 1.704707233065442e-05, | |
| "loss": 0.5842764774958292, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.7070863699798419, | |
| "grad_norm": 5.849940776824951, | |
| "learning_rate": 1.6991963260619978e-05, | |
| "loss": 0.5304047664006551, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7145293844006823, | |
| "grad_norm": 7.9883551597595215, | |
| "learning_rate": 1.6936854190585535e-05, | |
| "loss": 0.5599017937978109, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.7219723988215228, | |
| "grad_norm": 11.370931625366211, | |
| "learning_rate": 1.6881745120551092e-05, | |
| "loss": 0.5798830588658651, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.7294154132423631, | |
| "grad_norm": 3.5065290927886963, | |
| "learning_rate": 1.682663605051665e-05, | |
| "loss": 0.6167506376902262, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.7368584276632036, | |
| "grad_norm": 5.930673599243164, | |
| "learning_rate": 1.6771526980482207e-05, | |
| "loss": 0.5873833497365316, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.744301442084044, | |
| "grad_norm": 6.102614402770996, | |
| "learning_rate": 1.671641791044776e-05, | |
| "loss": 0.6477183898289999, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7517444565048845, | |
| "grad_norm": 4.337888717651367, | |
| "learning_rate": 1.6661308840413318e-05, | |
| "loss": 0.5860347350438436, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.7591874709257249, | |
| "grad_norm": 4.841605186462402, | |
| "learning_rate": 1.6606199770378875e-05, | |
| "loss": 0.6613442897796631, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.7666304853465653, | |
| "grad_norm": 14.614047050476074, | |
| "learning_rate": 1.6551090700344433e-05, | |
| "loss": 0.6218246618906657, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.7740734997674058, | |
| "grad_norm": 8.036581039428711, | |
| "learning_rate": 1.649598163030999e-05, | |
| "loss": 0.5646830002466837, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.7815165141882462, | |
| "grad_norm": 3.943291664123535, | |
| "learning_rate": 1.6440872560275547e-05, | |
| "loss": 0.6018180449803671, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.7889595286090867, | |
| "grad_norm": 12.51102352142334, | |
| "learning_rate": 1.6385763490241105e-05, | |
| "loss": 0.6140671968460083, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.7964025430299271, | |
| "grad_norm": 3.718653678894043, | |
| "learning_rate": 1.633065442020666e-05, | |
| "loss": 0.5359119176864624, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.8038455574507676, | |
| "grad_norm": 2.8353357315063477, | |
| "learning_rate": 1.6275545350172216e-05, | |
| "loss": 0.502113143603007, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.811288571871608, | |
| "grad_norm": 4.345269203186035, | |
| "learning_rate": 1.6220436280137773e-05, | |
| "loss": 0.5975545644760132, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.8187315862924485, | |
| "grad_norm": 6.92914342880249, | |
| "learning_rate": 1.616532721010333e-05, | |
| "loss": 0.6587652762730917, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8261746007132889, | |
| "grad_norm": 4.188693046569824, | |
| "learning_rate": 1.6110218140068888e-05, | |
| "loss": 0.6142017841339111, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.8336176151341294, | |
| "grad_norm": 9.596400260925293, | |
| "learning_rate": 1.6055109070034445e-05, | |
| "loss": 0.5469466845194498, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.8410606295549697, | |
| "grad_norm": 4.810947895050049, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.5744484265645345, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.8485036439758102, | |
| "grad_norm": 3.5819036960601807, | |
| "learning_rate": 1.5944890929965557e-05, | |
| "loss": 0.5045839150746664, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.8559466583966506, | |
| "grad_norm": 4.879307746887207, | |
| "learning_rate": 1.5889781859931114e-05, | |
| "loss": 0.6669184366861979, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.8633896728174911, | |
| "grad_norm": 6.7210693359375, | |
| "learning_rate": 1.583467278989667e-05, | |
| "loss": 0.5324758291244507, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.8708326872383315, | |
| "grad_norm": 4.653786659240723, | |
| "learning_rate": 1.577956371986223e-05, | |
| "loss": 0.543891986211141, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.878275701659172, | |
| "grad_norm": 6.386638641357422, | |
| "learning_rate": 1.5724454649827786e-05, | |
| "loss": 0.5688877105712891, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.8857187160800124, | |
| "grad_norm": 14.5455322265625, | |
| "learning_rate": 1.5669345579793343e-05, | |
| "loss": 0.5081936915715536, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.8931617305008528, | |
| "grad_norm": 13.621335983276367, | |
| "learning_rate": 1.56142365097589e-05, | |
| "loss": 0.5466565688451132, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9006047449216933, | |
| "grad_norm": 7.791660308837891, | |
| "learning_rate": 1.5559127439724455e-05, | |
| "loss": 0.5543188651402792, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.9080477593425337, | |
| "grad_norm": 4.039332866668701, | |
| "learning_rate": 1.5504018369690012e-05, | |
| "loss": 0.564227819442749, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.9154907737633742, | |
| "grad_norm": 5.744030475616455, | |
| "learning_rate": 1.544890929965557e-05, | |
| "loss": 0.5645032723744711, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.9229337881842146, | |
| "grad_norm": 7.17244815826416, | |
| "learning_rate": 1.5393800229621126e-05, | |
| "loss": 0.6025459369023641, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.9303768026050551, | |
| "grad_norm": 9.460329055786133, | |
| "learning_rate": 1.5338691159586684e-05, | |
| "loss": 0.5522710482279459, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9378198170258955, | |
| "grad_norm": 8.257369995117188, | |
| "learning_rate": 1.528358208955224e-05, | |
| "loss": 0.5696142514546713, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.945262831446736, | |
| "grad_norm": 14.734770774841309, | |
| "learning_rate": 1.5228473019517798e-05, | |
| "loss": 0.60454261302948, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.9527058458675763, | |
| "grad_norm": 4.352370738983154, | |
| "learning_rate": 1.5173363949483352e-05, | |
| "loss": 0.48172632853190106, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.9601488602884168, | |
| "grad_norm": 2.388683557510376, | |
| "learning_rate": 1.511825487944891e-05, | |
| "loss": 0.5889216661453247, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.9675918747092572, | |
| "grad_norm": 9.910285949707031, | |
| "learning_rate": 1.5063145809414467e-05, | |
| "loss": 0.5621689558029175, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.9750348891300977, | |
| "grad_norm": 5.445796966552734, | |
| "learning_rate": 1.5008036739380026e-05, | |
| "loss": 0.5526663859685262, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.9824779035509381, | |
| "grad_norm": 5.242825984954834, | |
| "learning_rate": 1.495292766934558e-05, | |
| "loss": 0.5673882563908895, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.9899209179717786, | |
| "grad_norm": 10.1865234375, | |
| "learning_rate": 1.4897818599311137e-05, | |
| "loss": 0.5648102362950643, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.997363932392619, | |
| "grad_norm": 5.388990879058838, | |
| "learning_rate": 1.4842709529276695e-05, | |
| "loss": 0.5376612345377604, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.4322638779516363, | |
| "eval_loss": 0.13781657814979553, | |
| "eval_precision": 0.3967545697112817, | |
| "eval_recall": 0.4884485429972486, | |
| "eval_runtime": 583.7374, | |
| "eval_samples_per_second": 66.278, | |
| "eval_steps_per_second": 1.382, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 1.004341758412157, | |
| "grad_norm": 11.303878784179688, | |
| "learning_rate": 1.478760045924225e-05, | |
| "loss": 0.46324888865152997, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.0117847728329974, | |
| "grad_norm": 4.389431476593018, | |
| "learning_rate": 1.4732491389207808e-05, | |
| "loss": 0.48095786571502686, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.0192277872538378, | |
| "grad_norm": 2.298799514770508, | |
| "learning_rate": 1.4677382319173365e-05, | |
| "loss": 0.5406383275985718, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.0266708016746782, | |
| "grad_norm": 4.433741092681885, | |
| "learning_rate": 1.4622273249138922e-05, | |
| "loss": 0.4697510798772176, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.0341138160955188, | |
| "grad_norm": 4.704965591430664, | |
| "learning_rate": 1.4567164179104478e-05, | |
| "loss": 0.5180115699768066, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.0415568305163592, | |
| "grad_norm": 5.16159725189209, | |
| "learning_rate": 1.4512055109070035e-05, | |
| "loss": 0.49386584758758545, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.0489998449371996, | |
| "grad_norm": 2.5488502979278564, | |
| "learning_rate": 1.4456946039035593e-05, | |
| "loss": 0.41516109307607013, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.05644285935804, | |
| "grad_norm": 12.81408405303955, | |
| "learning_rate": 1.4401836969001148e-05, | |
| "loss": 0.5269262790679932, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.0638858737788803, | |
| "grad_norm": 2.8521316051483154, | |
| "learning_rate": 1.4346727898966706e-05, | |
| "loss": 0.45834481716156006, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.071328888199721, | |
| "grad_norm": 5.517307758331299, | |
| "learning_rate": 1.4291618828932263e-05, | |
| "loss": 0.573523203531901, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.0787719026205613, | |
| "grad_norm": 2.4321818351745605, | |
| "learning_rate": 1.423650975889782e-05, | |
| "loss": 0.4625085194905599, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.0862149170414017, | |
| "grad_norm": 15.532980918884277, | |
| "learning_rate": 1.4181400688863376e-05, | |
| "loss": 0.5057009855906168, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 1.093657931462242, | |
| "grad_norm": 4.501278877258301, | |
| "learning_rate": 1.4126291618828933e-05, | |
| "loss": 0.4823911984761556, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.1011009458830827, | |
| "grad_norm": 6.726215362548828, | |
| "learning_rate": 1.407118254879449e-05, | |
| "loss": 0.42187273502349854, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 1.108543960303923, | |
| "grad_norm": 14.170055389404297, | |
| "learning_rate": 1.4016073478760046e-05, | |
| "loss": 0.5301618576049805, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 1.1159869747247635, | |
| "grad_norm": 2.776092767715454, | |
| "learning_rate": 1.3960964408725603e-05, | |
| "loss": 0.4935903151830037, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.1234299891456039, | |
| "grad_norm": 7.334898948669434, | |
| "learning_rate": 1.390585533869116e-05, | |
| "loss": 0.5331637859344482, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 1.1308730035664445, | |
| "grad_norm": 4.995052337646484, | |
| "learning_rate": 1.3850746268656718e-05, | |
| "loss": 0.4663925568262736, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 1.1383160179872849, | |
| "grad_norm": 9.281367301940918, | |
| "learning_rate": 1.3795637198622274e-05, | |
| "loss": 0.44923396905263263, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 1.1457590324081253, | |
| "grad_norm": 5.095090866088867, | |
| "learning_rate": 1.3740528128587831e-05, | |
| "loss": 0.5650514364242554, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 1.1532020468289657, | |
| "grad_norm": 2.299600839614868, | |
| "learning_rate": 1.3685419058553388e-05, | |
| "loss": 0.48252185185750324, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.1606450612498063, | |
| "grad_norm": 6.702273368835449, | |
| "learning_rate": 1.3630309988518944e-05, | |
| "loss": 0.5192966063817342, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 1.1680880756706467, | |
| "grad_norm": 10.89989948272705, | |
| "learning_rate": 1.3575200918484501e-05, | |
| "loss": 0.48262282212575275, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 1.175531090091487, | |
| "grad_norm": 15.075289726257324, | |
| "learning_rate": 1.3520091848450059e-05, | |
| "loss": 0.45538806915283203, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 1.1829741045123274, | |
| "grad_norm": 3.0880722999572754, | |
| "learning_rate": 1.3464982778415616e-05, | |
| "loss": 0.46872226397196454, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 1.1904171189331678, | |
| "grad_norm": 8.533724784851074, | |
| "learning_rate": 1.3409873708381172e-05, | |
| "loss": 0.4827297528584798, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.1978601333540084, | |
| "grad_norm": 3.070657968521118, | |
| "learning_rate": 1.3354764638346729e-05, | |
| "loss": 0.48583118120829266, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 1.2053031477748488, | |
| "grad_norm": 3.7270054817199707, | |
| "learning_rate": 1.3299655568312286e-05, | |
| "loss": 0.505421002705892, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 1.2127461621956892, | |
| "grad_norm": 9.997303009033203, | |
| "learning_rate": 1.3244546498277842e-05, | |
| "loss": 0.4140005111694336, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 1.2201891766165298, | |
| "grad_norm": 11.578160285949707, | |
| "learning_rate": 1.31894374282434e-05, | |
| "loss": 0.44274091720581055, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 1.2276321910373702, | |
| "grad_norm": 9.199183464050293, | |
| "learning_rate": 1.3134328358208957e-05, | |
| "loss": 0.5600036780039469, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.2350752054582106, | |
| "grad_norm": 7.212144374847412, | |
| "learning_rate": 1.3079219288174514e-05, | |
| "loss": 0.494090994199117, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 1.242518219879051, | |
| "grad_norm": 3.4123635292053223, | |
| "learning_rate": 1.302411021814007e-05, | |
| "loss": 0.4909547170003255, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 1.2499612342998914, | |
| "grad_norm": 7.941708087921143, | |
| "learning_rate": 1.2969001148105627e-05, | |
| "loss": 0.47832663853963214, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 1.257404248720732, | |
| "grad_norm": 2.4799387454986572, | |
| "learning_rate": 1.2913892078071184e-05, | |
| "loss": 0.49106045564015705, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 1.2648472631415724, | |
| "grad_norm": 5.136545658111572, | |
| "learning_rate": 1.2858783008036742e-05, | |
| "loss": 0.4738738536834717, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.2722902775624128, | |
| "grad_norm": 4.9489240646362305, | |
| "learning_rate": 1.2803673938002297e-05, | |
| "loss": 0.4953068097432454, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 1.2797332919832531, | |
| "grad_norm": 6.822914123535156, | |
| "learning_rate": 1.2748564867967855e-05, | |
| "loss": 0.46026841799418133, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 1.2871763064040938, | |
| "grad_norm": 6.177013874053955, | |
| "learning_rate": 1.2693455797933412e-05, | |
| "loss": 0.494237224260966, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 1.2946193208249341, | |
| "grad_norm": 2.4243626594543457, | |
| "learning_rate": 1.2638346727898967e-05, | |
| "loss": 0.5003351370493571, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 1.3020623352457745, | |
| "grad_norm": 18.99603843688965, | |
| "learning_rate": 1.2583237657864525e-05, | |
| "loss": 0.5109163920084635, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.309505349666615, | |
| "grad_norm": 2.4371707439422607, | |
| "learning_rate": 1.2528128587830082e-05, | |
| "loss": 0.41310568650563556, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 1.3169483640874553, | |
| "grad_norm": 3.7665302753448486, | |
| "learning_rate": 1.247301951779564e-05, | |
| "loss": 0.45848862330118817, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 1.324391378508296, | |
| "grad_norm": 12.537642478942871, | |
| "learning_rate": 1.2417910447761195e-05, | |
| "loss": 0.523716410001119, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 1.3318343929291363, | |
| "grad_norm": 2.882084846496582, | |
| "learning_rate": 1.2362801377726752e-05, | |
| "loss": 0.47608526547749835, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 1.3392774073499767, | |
| "grad_norm": 2.86336612701416, | |
| "learning_rate": 1.230769230769231e-05, | |
| "loss": 0.4347230593363444, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.3467204217708173, | |
| "grad_norm": 3.1628830432891846, | |
| "learning_rate": 1.2252583237657865e-05, | |
| "loss": 0.46674203872680664, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 1.3541634361916577, | |
| "grad_norm": 11.767653465270996, | |
| "learning_rate": 1.2197474167623423e-05, | |
| "loss": 0.47306569417317706, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 1.361606450612498, | |
| "grad_norm": 11.81271743774414, | |
| "learning_rate": 1.214236509758898e-05, | |
| "loss": 0.4672517776489258, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 1.3690494650333385, | |
| "grad_norm": 3.6157212257385254, | |
| "learning_rate": 1.2087256027554537e-05, | |
| "loss": 0.4465065797170003, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 1.3764924794541789, | |
| "grad_norm": 3.778449773788452, | |
| "learning_rate": 1.2032146957520093e-05, | |
| "loss": 0.5149937868118286, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.3839354938750195, | |
| "grad_norm": 2.5120906829833984, | |
| "learning_rate": 1.197703788748565e-05, | |
| "loss": 0.45879046122233075, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 1.3913785082958599, | |
| "grad_norm": 17.704999923706055, | |
| "learning_rate": 1.1921928817451208e-05, | |
| "loss": 0.5167669057846069, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 1.3988215227167002, | |
| "grad_norm": 11.8012113571167, | |
| "learning_rate": 1.1866819747416762e-05, | |
| "loss": 0.496524175008138, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 1.4062645371375406, | |
| "grad_norm": 13.236916542053223, | |
| "learning_rate": 1.181171067738232e-05, | |
| "loss": 0.47164463996887207, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 1.4137075515583812, | |
| "grad_norm": 3.6107146739959717, | |
| "learning_rate": 1.1756601607347878e-05, | |
| "loss": 0.4411802689234416, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.4211505659792216, | |
| "grad_norm": 3.5400538444519043, | |
| "learning_rate": 1.1701492537313435e-05, | |
| "loss": 0.44078512986501056, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 1.428593580400062, | |
| "grad_norm": 3.386744260787964, | |
| "learning_rate": 1.164638346727899e-05, | |
| "loss": 0.44522058963775635, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 1.4360365948209024, | |
| "grad_norm": 7.451818466186523, | |
| "learning_rate": 1.1591274397244548e-05, | |
| "loss": 0.4643220106760661, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 1.4434796092417428, | |
| "grad_norm": 3.741562843322754, | |
| "learning_rate": 1.1536165327210106e-05, | |
| "loss": 0.4557652473449707, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 1.4509226236625834, | |
| "grad_norm": 2.767171621322632, | |
| "learning_rate": 1.148105625717566e-05, | |
| "loss": 0.4677225748697917, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.4583656380834238, | |
| "grad_norm": 5.696690559387207, | |
| "learning_rate": 1.1425947187141217e-05, | |
| "loss": 0.42428747812906903, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 1.4658086525042642, | |
| "grad_norm": 6.44115686416626, | |
| "learning_rate": 1.1370838117106774e-05, | |
| "loss": 0.4969560702641805, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 1.4732516669251048, | |
| "grad_norm": 6.7684831619262695, | |
| "learning_rate": 1.1315729047072333e-05, | |
| "loss": 0.5301390091578165, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 1.4806946813459452, | |
| "grad_norm": 2.761455774307251, | |
| "learning_rate": 1.1260619977037887e-05, | |
| "loss": 0.4755421082178752, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 1.4881376957667856, | |
| "grad_norm": 7.615389347076416, | |
| "learning_rate": 1.1205510907003444e-05, | |
| "loss": 0.4676011800765991, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.495580710187626, | |
| "grad_norm": 3.118619680404663, | |
| "learning_rate": 1.1150401836969002e-05, | |
| "loss": 0.4575995206832886, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 1.5030237246084663, | |
| "grad_norm": 4.179815769195557, | |
| "learning_rate": 1.1095292766934557e-05, | |
| "loss": 0.5326940615971884, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 1.5104667390293067, | |
| "grad_norm": 3.128330945968628, | |
| "learning_rate": 1.1040183696900115e-05, | |
| "loss": 0.45927361647288006, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 1.5179097534501473, | |
| "grad_norm": 3.6722943782806396, | |
| "learning_rate": 1.0985074626865672e-05, | |
| "loss": 0.5232657591501871, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 1.5253527678709877, | |
| "grad_norm": 8.696102142333984, | |
| "learning_rate": 1.092996555683123e-05, | |
| "loss": 0.5253320535024008, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.5327957822918283, | |
| "grad_norm": 6.030095100402832, | |
| "learning_rate": 1.0874856486796785e-05, | |
| "loss": 0.4725768566131592, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 1.5402387967126687, | |
| "grad_norm": 8.892803192138672, | |
| "learning_rate": 1.0819747416762342e-05, | |
| "loss": 0.44700531164805096, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 1.5476818111335091, | |
| "grad_norm": 15.271442413330078, | |
| "learning_rate": 1.07646383467279e-05, | |
| "loss": 0.44845902919769287, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 1.5551248255543495, | |
| "grad_norm": 5.234111785888672, | |
| "learning_rate": 1.0709529276693457e-05, | |
| "loss": 0.5186563730239868, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 1.5625678399751899, | |
| "grad_norm": 6.541170597076416, | |
| "learning_rate": 1.0654420206659013e-05, | |
| "loss": 0.4690740505854289, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.5700108543960303, | |
| "grad_norm": 2.7548892498016357, | |
| "learning_rate": 1.059931113662457e-05, | |
| "loss": 0.43329620361328125, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 1.5774538688168709, | |
| "grad_norm": 12.428861618041992, | |
| "learning_rate": 1.0544202066590127e-05, | |
| "loss": 0.43588805198669434, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 1.5848968832377113, | |
| "grad_norm": 9.76059627532959, | |
| "learning_rate": 1.0489092996555683e-05, | |
| "loss": 0.4283796151479085, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 1.5923398976585517, | |
| "grad_norm": 10.960260391235352, | |
| "learning_rate": 1.043398392652124e-05, | |
| "loss": 0.4565364519755046, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 1.5997829120793923, | |
| "grad_norm": 6.568747043609619, | |
| "learning_rate": 1.0378874856486798e-05, | |
| "loss": 0.41670429706573486, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.6072259265002327, | |
| "grad_norm": 2.755124568939209, | |
| "learning_rate": 1.0323765786452355e-05, | |
| "loss": 0.4691346486409505, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 1.614668940921073, | |
| "grad_norm": 21.070772171020508, | |
| "learning_rate": 1.026865671641791e-05, | |
| "loss": 0.4186259905497233, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 1.6221119553419134, | |
| "grad_norm": 5.272284507751465, | |
| "learning_rate": 1.0213547646383468e-05, | |
| "loss": 0.4942372639973958, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 1.6295549697627538, | |
| "grad_norm": 8.858941078186035, | |
| "learning_rate": 1.0158438576349025e-05, | |
| "loss": 0.4842514594395955, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 1.6369979841835942, | |
| "grad_norm": 4.663693428039551, | |
| "learning_rate": 1.010332950631458e-05, | |
| "loss": 0.49429325262705487, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.6444409986044348, | |
| "grad_norm": 14.864917755126953, | |
| "learning_rate": 1.0048220436280138e-05, | |
| "loss": 0.46838700771331787, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 1.6518840130252752, | |
| "grad_norm": 2.5411393642425537, | |
| "learning_rate": 9.993111366245695e-06, | |
| "loss": 0.4521595239639282, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 1.6593270274461158, | |
| "grad_norm": 3.005941152572632, | |
| "learning_rate": 9.938002296211253e-06, | |
| "loss": 0.48365652561187744, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 1.6667700418669562, | |
| "grad_norm": 5.7398552894592285, | |
| "learning_rate": 9.88289322617681e-06, | |
| "loss": 0.4695123831431071, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 1.6742130562877966, | |
| "grad_norm": 4.946065902709961, | |
| "learning_rate": 9.827784156142366e-06, | |
| "loss": 0.4761979579925537, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.681656070708637, | |
| "grad_norm": 7.703652858734131, | |
| "learning_rate": 9.772675086107923e-06, | |
| "loss": 0.49780480066935223, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 1.6890990851294774, | |
| "grad_norm": 8.237687110900879, | |
| "learning_rate": 9.71756601607348e-06, | |
| "loss": 0.4623022476832072, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 1.6965420995503178, | |
| "grad_norm": 2.87007474899292, | |
| "learning_rate": 9.662456946039036e-06, | |
| "loss": 0.41221630573272705, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 1.7039851139711584, | |
| "grad_norm": 4.247465133666992, | |
| "learning_rate": 9.607347876004593e-06, | |
| "loss": 0.4721166690190633, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 1.7114281283919988, | |
| "grad_norm": 4.022077560424805, | |
| "learning_rate": 9.552238805970149e-06, | |
| "loss": 0.47880788644154865, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.7188711428128391, | |
| "grad_norm": 5.686273574829102, | |
| "learning_rate": 9.497129735935708e-06, | |
| "loss": 0.486567219098409, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 1.7263141572336798, | |
| "grad_norm": 4.733608245849609, | |
| "learning_rate": 9.442020665901264e-06, | |
| "loss": 0.4696682294209798, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 1.7337571716545201, | |
| "grad_norm": 3.8102357387542725, | |
| "learning_rate": 9.38691159586682e-06, | |
| "loss": 0.4944278796513875, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 1.7412001860753605, | |
| "grad_norm": 5.343743801116943, | |
| "learning_rate": 9.331802525832377e-06, | |
| "loss": 0.45073699951171875, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 1.748643200496201, | |
| "grad_norm": 8.939608573913574, | |
| "learning_rate": 9.276693455797934e-06, | |
| "loss": 0.5150019327799479, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.7560862149170413, | |
| "grad_norm": 9.984607696533203, | |
| "learning_rate": 9.221584385763491e-06, | |
| "loss": 0.49051181475321454, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 1.7635292293378817, | |
| "grad_norm": 4.297845840454102, | |
| "learning_rate": 9.166475315729047e-06, | |
| "loss": 0.43834813435872394, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 1.7709722437587223, | |
| "grad_norm": 4.738193035125732, | |
| "learning_rate": 9.111366245694604e-06, | |
| "loss": 0.48496174812316895, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 1.7784152581795627, | |
| "grad_norm": 6.950840473175049, | |
| "learning_rate": 9.056257175660162e-06, | |
| "loss": 0.4803895950317383, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 1.7858582726004033, | |
| "grad_norm": 2.9567737579345703, | |
| "learning_rate": 9.001148105625719e-06, | |
| "loss": 0.47137478987375897, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.7933012870212437, | |
| "grad_norm": 21.629295349121094, | |
| "learning_rate": 8.946039035591275e-06, | |
| "loss": 0.5382961829503378, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 1.800744301442084, | |
| "grad_norm": 4.054839611053467, | |
| "learning_rate": 8.890929965556832e-06, | |
| "loss": 0.429937203725179, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 1.8081873158629245, | |
| "grad_norm": 8.124676704406738, | |
| "learning_rate": 8.83582089552239e-06, | |
| "loss": 0.46343564987182617, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 1.8156303302837649, | |
| "grad_norm": 6.405475616455078, | |
| "learning_rate": 8.780711825487945e-06, | |
| "loss": 0.47476502259572345, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 1.8230733447046052, | |
| "grad_norm": 3.4982993602752686, | |
| "learning_rate": 8.725602755453502e-06, | |
| "loss": 0.42661325136820477, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.8305163591254459, | |
| "grad_norm": 5.036385536193848, | |
| "learning_rate": 8.67049368541906e-06, | |
| "loss": 0.42475831508636475, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 1.8379593735462862, | |
| "grad_norm": 9.453807830810547, | |
| "learning_rate": 8.615384615384617e-06, | |
| "loss": 0.4522843360900879, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 1.8454023879671266, | |
| "grad_norm": 7.572172164916992, | |
| "learning_rate": 8.560275545350172e-06, | |
| "loss": 0.5405757427215576, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 1.8528454023879672, | |
| "grad_norm": 3.8509397506713867, | |
| "learning_rate": 8.50516647531573e-06, | |
| "loss": 0.4206368128458659, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 1.8602884168088076, | |
| "grad_norm": 3.8660781383514404, | |
| "learning_rate": 8.450057405281287e-06, | |
| "loss": 0.4278140465418498, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.867731431229648, | |
| "grad_norm": 13.179638862609863, | |
| "learning_rate": 8.394948335246843e-06, | |
| "loss": 0.45146167278289795, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 1.8751744456504884, | |
| "grad_norm": 2.5003507137298584, | |
| "learning_rate": 8.3398392652124e-06, | |
| "loss": 0.5010615189870199, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 1.8826174600713288, | |
| "grad_norm": 6.336158752441406, | |
| "learning_rate": 8.284730195177957e-06, | |
| "loss": 0.48331379890441895, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 1.8900604744921692, | |
| "grad_norm": 3.9048869609832764, | |
| "learning_rate": 8.229621125143515e-06, | |
| "loss": 0.4964629014333089, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 1.8975034889130098, | |
| "grad_norm": 4.851749897003174, | |
| "learning_rate": 8.17451205510907e-06, | |
| "loss": 0.4605306386947632, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.9049465033338502, | |
| "grad_norm": 2.5984604358673096, | |
| "learning_rate": 8.119402985074628e-06, | |
| "loss": 0.42377761999766034, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 1.9123895177546908, | |
| "grad_norm": 14.330255508422852, | |
| "learning_rate": 8.064293915040185e-06, | |
| "loss": 0.4586070378621419, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 1.9198325321755312, | |
| "grad_norm": 5.363494873046875, | |
| "learning_rate": 8.00918484500574e-06, | |
| "loss": 0.4935295581817627, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 1.9272755465963716, | |
| "grad_norm": 5.703904151916504, | |
| "learning_rate": 7.954075774971298e-06, | |
| "loss": 0.44021427631378174, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 1.934718561017212, | |
| "grad_norm": 5.600277423858643, | |
| "learning_rate": 7.898966704936855e-06, | |
| "loss": 0.48560158411661786, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.9421615754380523, | |
| "grad_norm": 11.074832916259766, | |
| "learning_rate": 7.843857634902413e-06, | |
| "loss": 0.4312416712443034, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 1.9496045898588927, | |
| "grad_norm": 3.4356892108917236, | |
| "learning_rate": 7.788748564867968e-06, | |
| "loss": 0.4442025025685628, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 1.9570476042797333, | |
| "grad_norm": 3.7474091053009033, | |
| "learning_rate": 7.733639494833526e-06, | |
| "loss": 0.5241368214289347, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 1.9644906187005737, | |
| "grad_norm": 4.750489234924316, | |
| "learning_rate": 7.678530424799083e-06, | |
| "loss": 0.4401020606358846, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 1.9719336331214141, | |
| "grad_norm": 22.131851196289062, | |
| "learning_rate": 7.6234213547646386e-06, | |
| "loss": 0.5134913126627604, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.9793766475422547, | |
| "grad_norm": 4.812230587005615, | |
| "learning_rate": 7.568312284730196e-06, | |
| "loss": 0.5479523340861002, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 1.9868196619630951, | |
| "grad_norm": 6.560222625732422, | |
| "learning_rate": 7.513203214695752e-06, | |
| "loss": 0.4738404353459676, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 1.9942626763839355, | |
| "grad_norm": 5.240246772766113, | |
| "learning_rate": 7.45809414466131e-06, | |
| "loss": 0.4475013017654419, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.43079906968624254, | |
| "eval_loss": 0.11952196806669235, | |
| "eval_precision": 0.391528709389682, | |
| "eval_recall": 0.4931553870446119, | |
| "eval_runtime": 585.0453, | |
| "eval_samples_per_second": 66.13, | |
| "eval_steps_per_second": 1.379, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 2.0012405024034736, | |
| "grad_norm": 4.430677890777588, | |
| "learning_rate": 7.402985074626866e-06, | |
| "loss": 0.4009953737258911, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 2.008683516824314, | |
| "grad_norm": 10.324471473693848, | |
| "learning_rate": 7.3478760045924235e-06, | |
| "loss": 0.4711928367614746, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.0161265312451544, | |
| "grad_norm": 11.249197006225586, | |
| "learning_rate": 7.29276693455798e-06, | |
| "loss": 0.4341440995534261, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 2.023569545665995, | |
| "grad_norm": 2.7949812412261963, | |
| "learning_rate": 7.2376578645235365e-06, | |
| "loss": 0.3914073705673218, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 2.031012560086835, | |
| "grad_norm": 10.501336097717285, | |
| "learning_rate": 7.182548794489094e-06, | |
| "loss": 0.3871670166651408, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 2.0384555745076756, | |
| "grad_norm": 11.492402076721191, | |
| "learning_rate": 7.12743972445465e-06, | |
| "loss": 0.44295652707417804, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 2.045898588928516, | |
| "grad_norm": 8.688313484191895, | |
| "learning_rate": 7.072330654420208e-06, | |
| "loss": 0.4092850685119629, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.0533416033493563, | |
| "grad_norm": 5.402098178863525, | |
| "learning_rate": 7.017221584385764e-06, | |
| "loss": 0.41869743665059406, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 2.0607846177701967, | |
| "grad_norm": 3.6429481506347656, | |
| "learning_rate": 6.962112514351321e-06, | |
| "loss": 0.3916611671447754, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 2.0682276321910376, | |
| "grad_norm": 4.778937339782715, | |
| "learning_rate": 6.907003444316878e-06, | |
| "loss": 0.3913481632868449, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 2.075670646611878, | |
| "grad_norm": 4.281859874725342, | |
| "learning_rate": 6.851894374282435e-06, | |
| "loss": 0.380032738049825, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 2.0831136610327183, | |
| "grad_norm": 7.385513782501221, | |
| "learning_rate": 6.796785304247992e-06, | |
| "loss": 0.3545822699864705, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.0905566754535587, | |
| "grad_norm": 2.9248600006103516, | |
| "learning_rate": 6.741676234213548e-06, | |
| "loss": 0.419588565826416, | |
| "step": 3372 | |
| }, | |
| { | |
| "epoch": 2.097999689874399, | |
| "grad_norm": 3.0418336391448975, | |
| "learning_rate": 6.6865671641791055e-06, | |
| "loss": 0.4189613262812297, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 2.1054427042952395, | |
| "grad_norm": 4.628702640533447, | |
| "learning_rate": 6.631458094144662e-06, | |
| "loss": 0.38280495007832843, | |
| "step": 3396 | |
| }, | |
| { | |
| "epoch": 2.11288571871608, | |
| "grad_norm": 2.931917667388916, | |
| "learning_rate": 6.576349024110219e-06, | |
| "loss": 0.40134119987487793, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 2.1203287331369203, | |
| "grad_norm": 5.4905853271484375, | |
| "learning_rate": 6.521239954075776e-06, | |
| "loss": 0.3685312271118164, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.1277717475577607, | |
| "grad_norm": 2.9753782749176025, | |
| "learning_rate": 6.466130884041333e-06, | |
| "loss": 0.3878607749938965, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 2.1352147619786015, | |
| "grad_norm": 7.17921257019043, | |
| "learning_rate": 6.411021814006889e-06, | |
| "loss": 0.41369112332661945, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 2.142657776399442, | |
| "grad_norm": 13.806902885437012, | |
| "learning_rate": 6.355912743972445e-06, | |
| "loss": 0.43599124749501544, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 2.1501007908202823, | |
| "grad_norm": 3.4916634559631348, | |
| "learning_rate": 6.3008036739380026e-06, | |
| "loss": 0.3406885862350464, | |
| "step": 3468 | |
| }, | |
| { | |
| "epoch": 2.1575438052411227, | |
| "grad_norm": 6.193579196929932, | |
| "learning_rate": 6.245694603903559e-06, | |
| "loss": 0.3558163642883301, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.164986819661963, | |
| "grad_norm": 6.37896203994751, | |
| "learning_rate": 6.190585533869116e-06, | |
| "loss": 0.35776766141255695, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 2.1724298340828034, | |
| "grad_norm": 12.731496810913086, | |
| "learning_rate": 6.135476463834673e-06, | |
| "loss": 0.37972402572631836, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 2.179872848503644, | |
| "grad_norm": 19.98930549621582, | |
| "learning_rate": 6.08036739380023e-06, | |
| "loss": 0.42111217975616455, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 2.187315862924484, | |
| "grad_norm": 6.11861515045166, | |
| "learning_rate": 6.025258323765787e-06, | |
| "loss": 0.3672644297281901, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 2.194758877345325, | |
| "grad_norm": 11.929699897766113, | |
| "learning_rate": 5.970149253731343e-06, | |
| "loss": 0.4023996591567993, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.2022018917661654, | |
| "grad_norm": 17.26346206665039, | |
| "learning_rate": 5.9150401836969005e-06, | |
| "loss": 0.38841597239176434, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 2.209644906187006, | |
| "grad_norm": 9.183552742004395, | |
| "learning_rate": 5.859931113662457e-06, | |
| "loss": 0.42536401748657227, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 2.217087920607846, | |
| "grad_norm": 2.3118231296539307, | |
| "learning_rate": 5.804822043628014e-06, | |
| "loss": 0.4157342513402303, | |
| "step": 3576 | |
| }, | |
| { | |
| "epoch": 2.2245309350286866, | |
| "grad_norm": 6.309724807739258, | |
| "learning_rate": 5.749712973593571e-06, | |
| "loss": 0.4599275191624959, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 2.231973949449527, | |
| "grad_norm": 2.892469882965088, | |
| "learning_rate": 5.694603903559128e-06, | |
| "loss": 0.441303292910258, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.2394169638703674, | |
| "grad_norm": 6.523403167724609, | |
| "learning_rate": 5.6394948335246846e-06, | |
| "loss": 0.41275028387705487, | |
| "step": 3612 | |
| }, | |
| { | |
| "epoch": 2.2468599782912078, | |
| "grad_norm": 2.6101267337799072, | |
| "learning_rate": 5.584385763490242e-06, | |
| "loss": 0.41505225499471027, | |
| "step": 3624 | |
| }, | |
| { | |
| "epoch": 2.2543029927120486, | |
| "grad_norm": 5.343144416809082, | |
| "learning_rate": 5.529276693455798e-06, | |
| "loss": 0.38965781529744464, | |
| "step": 3636 | |
| }, | |
| { | |
| "epoch": 2.261746007132889, | |
| "grad_norm": 4.3300395011901855, | |
| "learning_rate": 5.474167623421355e-06, | |
| "loss": 0.4278339942296346, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 2.2691890215537294, | |
| "grad_norm": 5.109958171844482, | |
| "learning_rate": 5.419058553386912e-06, | |
| "loss": 0.366446574529012, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.2766320359745698, | |
| "grad_norm": 3.8399014472961426, | |
| "learning_rate": 5.363949483352469e-06, | |
| "loss": 0.3991047541300456, | |
| "step": 3672 | |
| }, | |
| { | |
| "epoch": 2.28407505039541, | |
| "grad_norm": 6.625537872314453, | |
| "learning_rate": 5.308840413318026e-06, | |
| "loss": 0.3346426486968994, | |
| "step": 3684 | |
| }, | |
| { | |
| "epoch": 2.2915180648162505, | |
| "grad_norm": 11.645654678344727, | |
| "learning_rate": 5.2537313432835825e-06, | |
| "loss": 0.3985482454299927, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 2.298961079237091, | |
| "grad_norm": 5.67885684967041, | |
| "learning_rate": 5.19862227324914e-06, | |
| "loss": 0.3815650939941406, | |
| "step": 3708 | |
| }, | |
| { | |
| "epoch": 2.3064040936579313, | |
| "grad_norm": 4.548233985900879, | |
| "learning_rate": 5.143513203214696e-06, | |
| "loss": 0.39840646584828693, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.3138471080787717, | |
| "grad_norm": 3.8364691734313965, | |
| "learning_rate": 5.088404133180253e-06, | |
| "loss": 0.4081765413284302, | |
| "step": 3732 | |
| }, | |
| { | |
| "epoch": 2.3212901224996125, | |
| "grad_norm": 2.5266079902648926, | |
| "learning_rate": 5.03329506314581e-06, | |
| "loss": 0.3613650401433309, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 2.328733136920453, | |
| "grad_norm": 7.049173831939697, | |
| "learning_rate": 4.9781859931113666e-06, | |
| "loss": 0.4112436771392822, | |
| "step": 3756 | |
| }, | |
| { | |
| "epoch": 2.3361761513412933, | |
| "grad_norm": 7.23855447769165, | |
| "learning_rate": 4.923076923076924e-06, | |
| "loss": 0.4015626907348633, | |
| "step": 3768 | |
| }, | |
| { | |
| "epoch": 2.3436191657621337, | |
| "grad_norm": 7.326627731323242, | |
| "learning_rate": 4.86796785304248e-06, | |
| "loss": 0.389956792195638, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.351062180182974, | |
| "grad_norm": 11.426876068115234, | |
| "learning_rate": 4.812858783008037e-06, | |
| "loss": 0.392941157023112, | |
| "step": 3792 | |
| }, | |
| { | |
| "epoch": 2.3585051946038145, | |
| "grad_norm": 5.058406352996826, | |
| "learning_rate": 4.757749712973594e-06, | |
| "loss": 0.388182799021403, | |
| "step": 3804 | |
| }, | |
| { | |
| "epoch": 2.365948209024655, | |
| "grad_norm": 7.783097267150879, | |
| "learning_rate": 4.702640642939151e-06, | |
| "loss": 0.4082544247309367, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 2.3733912234454952, | |
| "grad_norm": 4.8967084884643555, | |
| "learning_rate": 4.647531572904708e-06, | |
| "loss": 0.40780651569366455, | |
| "step": 3828 | |
| }, | |
| { | |
| "epoch": 2.3808342378663356, | |
| "grad_norm": 5.760252952575684, | |
| "learning_rate": 4.5924225028702645e-06, | |
| "loss": 0.4002196391423543, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.3882772522871765, | |
| "grad_norm": 4.79511022567749, | |
| "learning_rate": 4.537313432835822e-06, | |
| "loss": 0.3828426996866862, | |
| "step": 3852 | |
| }, | |
| { | |
| "epoch": 2.395720266708017, | |
| "grad_norm": 3.2499914169311523, | |
| "learning_rate": 4.4822043628013774e-06, | |
| "loss": 0.3649975061416626, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 2.4031632811288572, | |
| "grad_norm": 5.451921463012695, | |
| "learning_rate": 4.427095292766935e-06, | |
| "loss": 0.3998970588048299, | |
| "step": 3876 | |
| }, | |
| { | |
| "epoch": 2.4106062955496976, | |
| "grad_norm": 3.8105506896972656, | |
| "learning_rate": 4.371986222732491e-06, | |
| "loss": 0.45681726932525635, | |
| "step": 3888 | |
| }, | |
| { | |
| "epoch": 2.418049309970538, | |
| "grad_norm": 3.690845012664795, | |
| "learning_rate": 4.3168771526980486e-06, | |
| "loss": 0.3797287543614705, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.4254923243913784, | |
| "grad_norm": 12.44582748413086, | |
| "learning_rate": 4.261768082663605e-06, | |
| "loss": 0.47908584276835126, | |
| "step": 3912 | |
| }, | |
| { | |
| "epoch": 2.432935338812219, | |
| "grad_norm": 3.862395763397217, | |
| "learning_rate": 4.206659012629162e-06, | |
| "loss": 0.4127648671468099, | |
| "step": 3924 | |
| }, | |
| { | |
| "epoch": 2.4403783532330596, | |
| "grad_norm": 11.71980094909668, | |
| "learning_rate": 4.151549942594719e-06, | |
| "loss": 0.33937788009643555, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 2.4478213676539, | |
| "grad_norm": 4.254403591156006, | |
| "learning_rate": 4.096440872560276e-06, | |
| "loss": 0.3548990885416667, | |
| "step": 3948 | |
| }, | |
| { | |
| "epoch": 2.4552643820747404, | |
| "grad_norm": 5.00128173828125, | |
| "learning_rate": 4.041331802525833e-06, | |
| "loss": 0.4270055294036865, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.462707396495581, | |
| "grad_norm": 3.918459892272949, | |
| "learning_rate": 3.986222732491389e-06, | |
| "loss": 0.3760935465494792, | |
| "step": 3972 | |
| }, | |
| { | |
| "epoch": 2.470150410916421, | |
| "grad_norm": 11.43891716003418, | |
| "learning_rate": 3.9311136624569465e-06, | |
| "loss": 0.4183223644892375, | |
| "step": 3984 | |
| }, | |
| { | |
| "epoch": 2.4775934253372616, | |
| "grad_norm": 16.374967575073242, | |
| "learning_rate": 3.876004592422503e-06, | |
| "loss": 0.36837557951609295, | |
| "step": 3996 | |
| }, | |
| { | |
| "epoch": 2.485036439758102, | |
| "grad_norm": 4.490777015686035, | |
| "learning_rate": 3.82089552238806e-06, | |
| "loss": 0.4069160620371501, | |
| "step": 4008 | |
| }, | |
| { | |
| "epoch": 2.4924794541789423, | |
| "grad_norm": 8.420413970947266, | |
| "learning_rate": 3.7657864523536168e-06, | |
| "loss": 0.4271164337793986, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.4999224685997827, | |
| "grad_norm": 8.309126853942871, | |
| "learning_rate": 3.7106773823191737e-06, | |
| "loss": 0.3547343810399373, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 2.5073654830206236, | |
| "grad_norm": 14.98065185546875, | |
| "learning_rate": 3.6555683122847306e-06, | |
| "loss": 0.40314682324727374, | |
| "step": 4044 | |
| }, | |
| { | |
| "epoch": 2.514808497441464, | |
| "grad_norm": 16.558191299438477, | |
| "learning_rate": 3.600459242250287e-06, | |
| "loss": 0.36269084612528485, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 2.5222515118623043, | |
| "grad_norm": 6.547549724578857, | |
| "learning_rate": 3.545350172215844e-06, | |
| "loss": 0.36424537499745685, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 2.5296945262831447, | |
| "grad_norm": 4.773808002471924, | |
| "learning_rate": 3.490241102181401e-06, | |
| "loss": 0.37531224886576336, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.537137540703985, | |
| "grad_norm": 4.01258659362793, | |
| "learning_rate": 3.4351320321469578e-06, | |
| "loss": 0.36545733610788983, | |
| "step": 4092 | |
| }, | |
| { | |
| "epoch": 2.5445805551248255, | |
| "grad_norm": 10.372180938720703, | |
| "learning_rate": 3.3800229621125147e-06, | |
| "loss": 0.4671864112218221, | |
| "step": 4104 | |
| }, | |
| { | |
| "epoch": 2.552023569545666, | |
| "grad_norm": 3.3598952293395996, | |
| "learning_rate": 3.3249138920780716e-06, | |
| "loss": 0.3458172082901001, | |
| "step": 4116 | |
| }, | |
| { | |
| "epoch": 2.5594665839665063, | |
| "grad_norm": 11.469687461853027, | |
| "learning_rate": 3.2698048220436285e-06, | |
| "loss": 0.39522536595662433, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 2.5669095983873467, | |
| "grad_norm": 3.848041534423828, | |
| "learning_rate": 3.2146957520091854e-06, | |
| "loss": 0.41400329271952313, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.5743526128081875, | |
| "grad_norm": 4.791919231414795, | |
| "learning_rate": 3.159586681974742e-06, | |
| "loss": 0.393940011660258, | |
| "step": 4152 | |
| }, | |
| { | |
| "epoch": 2.581795627229028, | |
| "grad_norm": 21.486618041992188, | |
| "learning_rate": 3.1044776119402988e-06, | |
| "loss": 0.4398730993270874, | |
| "step": 4164 | |
| }, | |
| { | |
| "epoch": 2.5892386416498683, | |
| "grad_norm": 5.638022422790527, | |
| "learning_rate": 3.0493685419058557e-06, | |
| "loss": 0.3547349770863851, | |
| "step": 4176 | |
| }, | |
| { | |
| "epoch": 2.5966816560707087, | |
| "grad_norm": 7.414913177490234, | |
| "learning_rate": 2.9942594718714126e-06, | |
| "loss": 0.38705146312713623, | |
| "step": 4188 | |
| }, | |
| { | |
| "epoch": 2.604124670491549, | |
| "grad_norm": 6.696681976318359, | |
| "learning_rate": 2.9391504018369695e-06, | |
| "loss": 0.36440642674763996, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.6115676849123894, | |
| "grad_norm": 4.02039098739624, | |
| "learning_rate": 2.8840413318025264e-06, | |
| "loss": 0.39015217622121173, | |
| "step": 4212 | |
| }, | |
| { | |
| "epoch": 2.61901069933323, | |
| "grad_norm": 3.370777130126953, | |
| "learning_rate": 2.8289322617680833e-06, | |
| "loss": 0.4275425275166829, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 2.6264537137540707, | |
| "grad_norm": 8.47400951385498, | |
| "learning_rate": 2.7738231917336393e-06, | |
| "loss": 0.3559015194574992, | |
| "step": 4236 | |
| }, | |
| { | |
| "epoch": 2.6338967281749106, | |
| "grad_norm": 11.06500244140625, | |
| "learning_rate": 2.7187141216991963e-06, | |
| "loss": 0.3683815002441406, | |
| "step": 4248 | |
| }, | |
| { | |
| "epoch": 2.6413397425957514, | |
| "grad_norm": 3.4861528873443604, | |
| "learning_rate": 2.663605051664753e-06, | |
| "loss": 0.44681187470753986, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.648782757016592, | |
| "grad_norm": 10.642603874206543, | |
| "learning_rate": 2.60849598163031e-06, | |
| "loss": 0.4434703588485718, | |
| "step": 4272 | |
| }, | |
| { | |
| "epoch": 2.656225771437432, | |
| "grad_norm": 2.501110315322876, | |
| "learning_rate": 2.553386911595867e-06, | |
| "loss": 0.3525495131810506, | |
| "step": 4284 | |
| }, | |
| { | |
| "epoch": 2.6636687858582726, | |
| "grad_norm": 5.691764831542969, | |
| "learning_rate": 2.498277841561424e-06, | |
| "loss": 0.3853313128153483, | |
| "step": 4296 | |
| }, | |
| { | |
| "epoch": 2.671111800279113, | |
| "grad_norm": 4.1908135414123535, | |
| "learning_rate": 2.4431687715269808e-06, | |
| "loss": 0.38127346833546955, | |
| "step": 4308 | |
| }, | |
| { | |
| "epoch": 2.6785548146999534, | |
| "grad_norm": 9.538026809692383, | |
| "learning_rate": 2.3880597014925373e-06, | |
| "loss": 0.39995817343393963, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.6859978291207938, | |
| "grad_norm": 8.436595916748047, | |
| "learning_rate": 2.332950631458094e-06, | |
| "loss": 0.3635564645131429, | |
| "step": 4332 | |
| }, | |
| { | |
| "epoch": 2.6934408435416346, | |
| "grad_norm": 2.5905513763427734, | |
| "learning_rate": 2.277841561423651e-06, | |
| "loss": 0.46339670817057294, | |
| "step": 4344 | |
| }, | |
| { | |
| "epoch": 2.7008838579624745, | |
| "grad_norm": 6.738951206207275, | |
| "learning_rate": 2.222732491389208e-06, | |
| "loss": 0.3373739719390869, | |
| "step": 4356 | |
| }, | |
| { | |
| "epoch": 2.7083268723833154, | |
| "grad_norm": 5.625753402709961, | |
| "learning_rate": 2.167623421354765e-06, | |
| "loss": 0.3713107109069824, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 2.7157698868041558, | |
| "grad_norm": 3.6908581256866455, | |
| "learning_rate": 2.1125143513203218e-06, | |
| "loss": 0.3845006227493286, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.723212901224996, | |
| "grad_norm": 5.123325824737549, | |
| "learning_rate": 2.0574052812858787e-06, | |
| "loss": 0.3693963686625163, | |
| "step": 4392 | |
| }, | |
| { | |
| "epoch": 2.7306559156458365, | |
| "grad_norm": 5.600500583648682, | |
| "learning_rate": 2.002296211251435e-06, | |
| "loss": 0.4005578358968099, | |
| "step": 4404 | |
| }, | |
| { | |
| "epoch": 2.738098930066677, | |
| "grad_norm": 4.9075775146484375, | |
| "learning_rate": 1.947187141216992e-06, | |
| "loss": 0.44304617245992023, | |
| "step": 4416 | |
| }, | |
| { | |
| "epoch": 2.7455419444875173, | |
| "grad_norm": 2.535568952560425, | |
| "learning_rate": 1.892078071182549e-06, | |
| "loss": 0.36018415292104083, | |
| "step": 4428 | |
| }, | |
| { | |
| "epoch": 2.7529849589083577, | |
| "grad_norm": 3.863154888153076, | |
| "learning_rate": 1.8369690011481059e-06, | |
| "loss": 0.3833086093266805, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.7604279733291985, | |
| "grad_norm": 3.38565731048584, | |
| "learning_rate": 1.7818599311136626e-06, | |
| "loss": 0.36296629905700684, | |
| "step": 4452 | |
| }, | |
| { | |
| "epoch": 2.767870987750039, | |
| "grad_norm": 3.979094982147217, | |
| "learning_rate": 1.7267508610792195e-06, | |
| "loss": 0.400799036026001, | |
| "step": 4464 | |
| }, | |
| { | |
| "epoch": 2.7753140021708793, | |
| "grad_norm": 3.6006662845611572, | |
| "learning_rate": 1.6716417910447764e-06, | |
| "loss": 0.38404210408528644, | |
| "step": 4476 | |
| }, | |
| { | |
| "epoch": 2.7827570165917197, | |
| "grad_norm": 9.927759170532227, | |
| "learning_rate": 1.6165327210103333e-06, | |
| "loss": 0.47922762235005695, | |
| "step": 4488 | |
| }, | |
| { | |
| "epoch": 2.79020003101256, | |
| "grad_norm": 4.767171859741211, | |
| "learning_rate": 1.5614236509758898e-06, | |
| "loss": 0.40151556332906085, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.7976430454334005, | |
| "grad_norm": 5.649435043334961, | |
| "learning_rate": 1.5063145809414467e-06, | |
| "loss": 0.3603046735127767, | |
| "step": 4512 | |
| }, | |
| { | |
| "epoch": 2.805086059854241, | |
| "grad_norm": 11.296677589416504, | |
| "learning_rate": 1.4512055109070036e-06, | |
| "loss": 0.38084761301676434, | |
| "step": 4524 | |
| }, | |
| { | |
| "epoch": 2.8125290742750813, | |
| "grad_norm": 2.71022629737854, | |
| "learning_rate": 1.3960964408725605e-06, | |
| "loss": 0.3726603190104167, | |
| "step": 4536 | |
| }, | |
| { | |
| "epoch": 2.8199720886959216, | |
| "grad_norm": 3.849479913711548, | |
| "learning_rate": 1.3409873708381172e-06, | |
| "loss": 0.3995700279871623, | |
| "step": 4548 | |
| }, | |
| { | |
| "epoch": 2.8274151031167625, | |
| "grad_norm": 14.668109893798828, | |
| "learning_rate": 1.285878300803674e-06, | |
| "loss": 0.39227835337320965, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.834858117537603, | |
| "grad_norm": 3.9545083045959473, | |
| "learning_rate": 1.230769230769231e-06, | |
| "loss": 0.42009902000427246, | |
| "step": 4572 | |
| }, | |
| { | |
| "epoch": 2.8423011319584432, | |
| "grad_norm": 5.8148298263549805, | |
| "learning_rate": 1.1756601607347877e-06, | |
| "loss": 0.39560989538828534, | |
| "step": 4584 | |
| }, | |
| { | |
| "epoch": 2.8497441463792836, | |
| "grad_norm": 6.249505996704102, | |
| "learning_rate": 1.1205510907003444e-06, | |
| "loss": 0.42494750022888184, | |
| "step": 4596 | |
| }, | |
| { | |
| "epoch": 2.857187160800124, | |
| "grad_norm": 4.1339921951293945, | |
| "learning_rate": 1.0654420206659013e-06, | |
| "loss": 0.5030697584152222, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 2.8646301752209644, | |
| "grad_norm": 13.68895435333252, | |
| "learning_rate": 1.0103329506314582e-06, | |
| "loss": 0.36397520701090497, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.872073189641805, | |
| "grad_norm": 2.826042890548706, | |
| "learning_rate": 9.55223880597015e-07, | |
| "loss": 0.3502591848373413, | |
| "step": 4632 | |
| }, | |
| { | |
| "epoch": 2.8795162040626456, | |
| "grad_norm": 6.833806991577148, | |
| "learning_rate": 9.001148105625718e-07, | |
| "loss": 0.3613890012105306, | |
| "step": 4644 | |
| }, | |
| { | |
| "epoch": 2.8869592184834856, | |
| "grad_norm": 4.942678451538086, | |
| "learning_rate": 8.450057405281287e-07, | |
| "loss": 0.39194099108378094, | |
| "step": 4656 | |
| }, | |
| { | |
| "epoch": 2.8944022329043264, | |
| "grad_norm": 4.509676456451416, | |
| "learning_rate": 7.898966704936855e-07, | |
| "loss": 0.351750651995341, | |
| "step": 4668 | |
| }, | |
| { | |
| "epoch": 2.901845247325167, | |
| "grad_norm": 8.305526733398438, | |
| "learning_rate": 7.347876004592424e-07, | |
| "loss": 0.40360478560129803, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.909288261746007, | |
| "grad_norm": 4.9328765869140625, | |
| "learning_rate": 6.796785304247991e-07, | |
| "loss": 0.33100277185440063, | |
| "step": 4692 | |
| }, | |
| { | |
| "epoch": 2.9167312761668476, | |
| "grad_norm": 4.945671558380127, | |
| "learning_rate": 6.24569460390356e-07, | |
| "loss": 0.39974749088287354, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 2.924174290587688, | |
| "grad_norm": 9.925528526306152, | |
| "learning_rate": 5.694603903559128e-07, | |
| "loss": 0.4116141001383464, | |
| "step": 4716 | |
| }, | |
| { | |
| "epoch": 2.9316173050085284, | |
| "grad_norm": 4.063233375549316, | |
| "learning_rate": 5.143513203214697e-07, | |
| "loss": 0.3659325838088989, | |
| "step": 4728 | |
| }, | |
| { | |
| "epoch": 2.9390603194293687, | |
| "grad_norm": 3.5343589782714844, | |
| "learning_rate": 4.5924225028702647e-07, | |
| "loss": 0.3983626365661621, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.9465033338502096, | |
| "grad_norm": 6.534095764160156, | |
| "learning_rate": 4.041331802525833e-07, | |
| "loss": 0.393149733543396, | |
| "step": 4752 | |
| }, | |
| { | |
| "epoch": 2.9539463482710495, | |
| "grad_norm": 3.4787096977233887, | |
| "learning_rate": 3.490241102181401e-07, | |
| "loss": 0.3340187867482503, | |
| "step": 4764 | |
| }, | |
| { | |
| "epoch": 2.9613893626918903, | |
| "grad_norm": 5.42100191116333, | |
| "learning_rate": 2.939150401836969e-07, | |
| "loss": 0.3814918597539266, | |
| "step": 4776 | |
| }, | |
| { | |
| "epoch": 2.9688323771127307, | |
| "grad_norm": 4.148738861083984, | |
| "learning_rate": 2.3880597014925377e-07, | |
| "loss": 0.4039960702260335, | |
| "step": 4788 | |
| }, | |
| { | |
| "epoch": 2.976275391533571, | |
| "grad_norm": 4.3285746574401855, | |
| "learning_rate": 1.836969001148106e-07, | |
| "loss": 0.34236987431844074, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.9837184059544115, | |
| "grad_norm": 2.8112664222717285, | |
| "learning_rate": 1.2858783008036742e-07, | |
| "loss": 0.3349067767461141, | |
| "step": 4812 | |
| }, | |
| { | |
| "epoch": 2.991161420375252, | |
| "grad_norm": 4.724297523498535, | |
| "learning_rate": 7.347876004592423e-08, | |
| "loss": 0.38507378101348877, | |
| "step": 4824 | |
| }, | |
| { | |
| "epoch": 2.9986044347960923, | |
| "grad_norm": 7.1218132972717285, | |
| "learning_rate": 1.8369690011481057e-08, | |
| "loss": 0.34174474080403644, | |
| "step": 4836 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.43726749573500223, | |
| "eval_loss": 0.12126699090003967, | |
| "eval_precision": 0.4012637195169362, | |
| "eval_recall": 0.4913673269074057, | |
| "eval_runtime": 522.9994, | |
| "eval_samples_per_second": 73.975, | |
| "eval_steps_per_second": 1.543, | |
| "step": 4839 | |
| } | |
| ], | |
| "logging_steps": 12, | |
| "max_steps": 4839, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2215684896188826e+17, | |
| "train_batch_size": 48, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |